diff mbox series

[v5,03/12] x86/mm: consolidate full flush threshold decision

Message ID 20250116023127.1531583-4-riel@surriel.com (mailing list archive)
State New
Headers show
Series AMD broadcast TLB invalidation | expand

Commit Message

Rik van Riel Jan. 16, 2025, 2:30 a.m. UTC
Reduce code duplication by consolidating the decision point
for whether to do individual invalidations or a full flush
inside get_flush_tlb_info.

Signed-off-by: Rik van Riel <riel@surriel.com>
Suggested-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/mm/tlb.c | 43 ++++++++++++++++++++-----------------------
 1 file changed, 20 insertions(+), 23 deletions(-)

Comments

Michael Kelley Jan. 17, 2025, 7:23 p.m. UTC | #1
From: riel@surriel.com <riel@surriel.com> Sent: Wednesday, January 15, 2025 6:30 PM
> 
> Reduce code duplication by consolidating the decision point
> for whether to do individual invalidations or a full flush
> inside get_flush_tlb_info.
> 
> Signed-off-by: Rik van Riel <riel@surriel.com>
> Suggested-by: Dave Hansen <dave.hansen@intel.com>
> ---
>  arch/x86/mm/tlb.c | 43 ++++++++++++++++++++-----------------------
>  1 file changed, 20 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index 6cf881a942bb..2f38cf95dee3 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c

[snip]

> @@ -1089,22 +1089,19 @@ static void do_kernel_range_flush(void *info)
> 
>  void flush_tlb_kernel_range(unsigned long start, unsigned long end)
>  {
> -	/* Balance as user space task's flush, a bit conservative */
> -	if (end == TLB_FLUSH_ALL ||
> -	    (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
> -		on_each_cpu(do_flush_tlb_all, NULL, 1);
> -	} else {
> -		struct flush_tlb_info *info;
> +	struct flush_tlb_info *info;
> 
> -		preempt_disable();
> -		info = get_flush_tlb_info(NULL, start, end, 0, false,
> -					  TLB_GENERATION_INVALID);
> +	guard(preempt)();
> +
> +	info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
> +				  TLB_GENERATION_INVALID);
> 
> +	if (end == TLB_FLUSH_ALL)

This needs to test "info->end", not "end".

In my VM on Hyper-V *without* INVLPGB support, the bug causes
boot to hang as it tries to individually flush each page in the
[0, TLB_FLUSH_ALL] range. :-)

Michael

> +		on_each_cpu(do_flush_tlb_all, NULL, 1);
> +	else
>  		on_each_cpu(do_kernel_range_flush, info, 1);
> 
> -		put_flush_tlb_info();
> -		preempt_enable();
> -	}
> +	put_flush_tlb_info();
>  }
> 
>  /*
Rik van Riel Jan. 17, 2025, 7:32 p.m. UTC | #2
On Fri, 2025-01-17 at 19:23 +0000, Michael Kelley wrote:
> From: riel@surriel.com <riel@surriel.com> Sent: Wednesday, January
> 15, 2025 6:30 PM
> > 
> > -		info = get_flush_tlb_info(NULL, start, end, 0,
> > false,
> > -					  TLB_GENERATION_INVALID);
> > +	guard(preempt)();
> > +
> > +	info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT,
> > false,
> > +				  TLB_GENERATION_INVALID);
> > 
> > +	if (end == TLB_FLUSH_ALL)
> 
> This needs to test "info->end", not "end".
> 
> In my VM on Hyper-V *without* INVLPGB support, the bug causes
> boot to hang as it tries to individually flush each page in the
> [0, TLB_FLUSH_ALL] range. :-)

Ohhhh, good catch.

I guess on bare metal the automatic tests that run
on every kernel build didn't notice the slower
boot, because it's not as severe of an impact as
in a virtual machine.
diff mbox series

Patch

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6cf881a942bb..2f38cf95dee3 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1009,6 +1009,15 @@  static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
 	info->initiating_cpu	= smp_processor_id();
 	info->trim_cpumask	= 0;
 
+	/*
+	 * If the number of flushes is so large that a full flush
+	 * would be faster, do a full flush.
+	 */
+	if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) {
+		info->start = 0;
+		info->end = TLB_FLUSH_ALL;
+	}
+
 	return info;
 }
 
@@ -1026,17 +1035,8 @@  void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 				bool freed_tables)
 {
 	struct flush_tlb_info *info;
+	int cpu = get_cpu();
 	u64 new_tlb_gen;
-	int cpu;
-
-	cpu = get_cpu();
-
-	/* Should we flush just the requested range? */
-	if ((end == TLB_FLUSH_ALL) ||
-	    ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {
-		start = 0;
-		end = TLB_FLUSH_ALL;
-	}
 
 	/* This is also a barrier that synchronizes with switch_mm(). */
 	new_tlb_gen = inc_mm_tlb_gen(mm);
@@ -1089,22 +1089,19 @@  static void do_kernel_range_flush(void *info)
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-	/* Balance as user space task's flush, a bit conservative */
-	if (end == TLB_FLUSH_ALL ||
-	    (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
-		on_each_cpu(do_flush_tlb_all, NULL, 1);
-	} else {
-		struct flush_tlb_info *info;
+	struct flush_tlb_info *info;
 
-		preempt_disable();
-		info = get_flush_tlb_info(NULL, start, end, 0, false,
-					  TLB_GENERATION_INVALID);
+	guard(preempt)();
+
+	info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
+				  TLB_GENERATION_INVALID);
 
+	if (end == TLB_FLUSH_ALL)
+		on_each_cpu(do_flush_tlb_all, NULL, 1);
+	else
 		on_each_cpu(do_kernel_range_flush, info, 1);
 
-		put_flush_tlb_info();
-		preempt_enable();
-	}
+	put_flush_tlb_info();
 }
 
 /*
@@ -1276,7 +1273,7 @@  void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
 
 	int cpu = get_cpu();
 
-	info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
+	info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, PAGE_SHIFT, false,
 				  TLB_GENERATION_INVALID);
 	/*
 	 * flush_tlb_multi() is not optimized for the common case in which only