diff mbox

[v3] parisc: Fix ordering of cache and TLB flushes

Message ID 471bda47-a773-9c17-55af-6475fd8c2ad1@bell.net (mailing list archive)
State Accepted, archived
Headers show

Commit Message

John David Anglin Feb. 27, 2018, 1:16 p.m. UTC
The change to flush_kernel_vmap_range() wasn't sufficient to avoid the 
SMP stalls.  The
problem is some drivers call these routines with interrupts disabled.  
Interrupts need to be
enabled for flush_tlb_all() and flush_cache_all() to work.  This version 
adds checks to ensure
interrupts are not disabled before calling routines that need IPI 
interrupts.  When interrupts
are disabled, we now drop into slower code.

The attached change fixes the ordering of cache and TLB flushes in 
several cases.  When we
flush the cache using the existing PTE/TLB entries, we need to flush the 
TLB after doing the cache
flush.  We don't need to do this when we flush the entire instruction 
and data caches as these
flushes don't use the existing TLB entries.  The same is true for 
tmpalias region flushes.

The flush_kernel_vmap_range() and invalidate_kernel_vmap_range() 
routines have been
updated.

Secondly, we added a new purge_kernel_dcache_range_asm() routine to 
pacache.S and
use it in invalidate_kernel_vmap_range().  Nominally, purges are faster 
than flushes as the
cache lines don't have to be written back to memory.

Hopefully, this is sufficient to resolve the remaining problems due to 
cache speculation.  So far,
testing indicates that this is the case.  I did work up a patch using 
tmpalias flushes, but there
is a performance hit because we need the physical address for each page, 
and we also need
to sequence access to the tmpalias flush code.  This increases the 
probability of stalls.

Signed-off-by: John David Anglin  <dave.anglin@bell.net>

Comments

Matt Turner March 17, 2018, 4:02 p.m. UTC | #1
I just wanted to reply and give some confirmation and thanks. My
quad-CPU C8000 was unstable until this patch. I haven't seen a problem
with it since.

Thanks very much!
--
To unsubscribe from this list: send the line "unsubscribe linux-parisc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h
index 3742508cc534..bd5ce31936f5 100644
--- a/arch/parisc/include/asm/cacheflush.h
+++ b/arch/parisc/include/asm/cacheflush.h
@@ -26,6 +26,7 @@  void flush_user_icache_range_asm(unsigned long, unsigned long);
 void flush_kernel_icache_range_asm(unsigned long, unsigned long);
 void flush_user_dcache_range_asm(unsigned long, unsigned long);
 void flush_kernel_dcache_range_asm(unsigned long, unsigned long);
+void purge_kernel_dcache_range_asm(unsigned long, unsigned long);
 void flush_kernel_dcache_page_asm(void *);
 void flush_kernel_icache_page(void *);
 
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 19c0c141bc3f..c9c122da222f 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -465,10 +465,14 @@  EXPORT_SYMBOL(copy_user_page);
 int __flush_tlb_range(unsigned long sid, unsigned long start,
 		      unsigned long end)
 {
-	unsigned long flags, size;
+	unsigned long flags;
 
-	size = (end - start);
-	if (size >= parisc_tlb_flush_threshold) {
+#ifdef CONFIG_SMP
+	if (!arch_irqs_disabled() &&
+	    end - start >= parisc_tlb_flush_threshold) {
+#else
+	1f (end - start >= parisc_tlb_flush_threshold) {
+#endif
 		flush_tlb_all();
 		return 1;
 	}
@@ -539,13 +543,15 @@  void flush_cache_mm(struct mm_struct *mm)
 	struct vm_area_struct *vma;
 	pgd_t *pgd;
 
-	/* Flush the TLB to avoid speculation if coherency is required. */
-	if (parisc_requires_coherency())
-		flush_tlb_all();
-
 	/* Flushing the whole cache on each cpu takes forever on
 	   rp3440, etc.  So, avoid it if the mm isn't too big.  */
+#ifdef CONFIG_SMP
+	if (!arch_irqs_disabled() &&
+	    mm_total_size(mm) >= parisc_cache_flush_threshold) {
+#else
 	if (mm_total_size(mm) >= parisc_cache_flush_threshold) {
+#endif
+		flush_tlb_all();
 		flush_cache_all();
 		return;
 	}
@@ -553,9 +559,9 @@  void flush_cache_mm(struct mm_struct *mm)
 	if (mm->context == mfsp(3)) {
 		for (vma = mm->mmap; vma; vma = vma->vm_next) {
 			flush_user_dcache_range_asm(vma->vm_start, vma->vm_end);
-			if ((vma->vm_flags & VM_EXEC) == 0)
-				continue;
-			flush_user_icache_range_asm(vma->vm_start, vma->vm_end);
+			if (vma->vm_flags & VM_EXEC)
+				flush_user_icache_range_asm(vma->vm_start, vma->vm_end);
+			flush_tlb_range(vma, vma->vm_start, vma->vm_end);
 		}
 		return;
 	}
@@ -581,14 +587,13 @@  void flush_cache_mm(struct mm_struct *mm)
 void flush_cache_range(struct vm_area_struct *vma,
 		unsigned long start, unsigned long end)
 {
-	BUG_ON(!vma->vm_mm->context);
-
-	/* Flush the TLB to avoid speculation if coherency is required. */
-	if (parisc_requires_coherency())
+#ifdef CONFIG_SMP
+	if (!arch_irqs_disabled() &&
+	    end - start >= parisc_cache_flush_threshold) {
+#else
+	if (end - start >= parisc_cache_flush_threshold) {
+#endif
 		flush_tlb_range(vma, start, end);
-
-	if ((end - start) >= parisc_cache_flush_threshold
-	    || vma->vm_mm->context != mfsp(3)) {
 		flush_cache_all();
 		return;
 	}
@@ -596,6 +601,7 @@  void flush_cache_range(struct vm_area_struct *vma,
 	flush_user_dcache_range_asm(start, end);
 	if (vma->vm_flags & VM_EXEC)
 		flush_user_icache_range_asm(start, end);
+	flush_tlb_range(vma, start, end);
 }
 
 void
@@ -604,8 +610,7 @@  flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long
 	BUG_ON(!vma->vm_mm->context);
 
 	if (pfn_valid(pfn)) {
-		if (parisc_requires_coherency())
-			flush_tlb_page(vma, vmaddr);
+		flush_tlb_page(vma, vmaddr);
 		__flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
 	}
 }
@@ -613,21 +618,41 @@  flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long
 void flush_kernel_vmap_range(void *vaddr, int size)
 {
 	unsigned long start = (unsigned long)vaddr;
+	unsigned long end = start + size;
 
-	if ((unsigned long)size > parisc_cache_flush_threshold)
+#ifdef CONFIG_SMP
+	if (!arch_irqs_disabled() &&
+	    (unsigned long)size >= parisc_cache_flush_threshold) {
+#else
+	if ((unsigned long)size >= parisc_cache_flush_threshold) {
+#endif
+		flush_tlb_kernel_range(start, end);
 		flush_data_cache();
-	else
-		flush_kernel_dcache_range_asm(start, start + size);
+		return;
+	}
+
+	flush_kernel_dcache_range_asm(start, end);
+	flush_tlb_kernel_range(start, end);
 }
 EXPORT_SYMBOL(flush_kernel_vmap_range);
 
 void invalidate_kernel_vmap_range(void *vaddr, int size)
 {
 	unsigned long start = (unsigned long)vaddr;
+	unsigned long end = start + size;
 
-	if ((unsigned long)size > parisc_cache_flush_threshold)
+#ifdef CONFIG_SMP
+	if (!arch_irqs_disabled() &&
+	    (unsigned long)size >= parisc_cache_flush_threshold) {
+#else
+	if ((unsigned long)size >= parisc_cache_flush_threshold) {
+#endif
+		flush_tlb_kernel_range(start, end);
 		flush_data_cache();
-	else
-		flush_kernel_dcache_range_asm(start, start + size);
+		return;
+	}
+
+	purge_kernel_dcache_range_asm(start, end);
+	flush_tlb_kernel_range(start, end);
 }
 EXPORT_SYMBOL(invalidate_kernel_vmap_range);
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index 2d40c4ff3f69..67b0f7532e83 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -1110,6 +1110,28 @@  ENTRY_CFI(flush_kernel_dcache_range_asm)
 	.procend
 ENDPROC_CFI(flush_kernel_dcache_range_asm)
 
+ENTRY_CFI(purge_kernel_dcache_range_asm)
+	.proc
+	.callinfo NO_CALLS
+	.entry
+
+	ldil		L%dcache_stride, %r1
+	ldw		R%dcache_stride(%r1), %r23
+	ldo		-1(%r23), %r21
+	ANDCM		%r26, %r21, %r26
+
+1:      cmpb,COND(<<),n	%r26, %r25,1b
+	pdc,m		%r23(%r26)
+
+	sync
+	syncdma
+	bv		%r0(%r2)
+	nop
+	.exit
+
+	.procend
+ENDPROC_CFI(purge_kernel_dcache_range_asm)
+
 ENTRY_CFI(flush_user_icache_range_asm)
 	.proc
 	.callinfo NO_CALLS