@@ -41,6 +41,9 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
* Release the page cache reference for a pte removed by
* tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
* has already been freed, so just do free_page_and_swap_cache.
+ *
+ * s390 doesn't delay rmap removal, so there is nothing encoded in
+ * the page pointer.
*/
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
struct encoded_page *page,
@@ -259,6 +259,28 @@ struct mmu_gather_batch {
extern bool __tlb_remove_page_size(struct mmu_gather *tlb,
struct encoded_page *page,
int page_size);
+
+#ifdef CONFIG_SMP
+/*
+ * This both sets 'delayed_rmap', and returns true. It would be an inline
+ * function, except we define it before the 'struct mmu_gather'.
+ */
+#define tlb_delay_rmap(tlb) (((tlb)->delayed_rmap = 1), true)
+extern void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma);
+#endif
+
+#endif
+
+/*
+ * We have a no-op version of the rmap removal that doesn't
+ * delay anything. That is used on S390, which flushes remote
+ * TLBs synchronously, and on UP, which doesn't have any
+ * remote TLBs to flush and is not preemptible due to this
+ * all happening under the page table lock.
+ */
+#ifndef tlb_delay_rmap
+#define tlb_delay_rmap(tlb) (false)
+static inline void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
#endif
/*
@@ -291,6 +313,11 @@ struct mmu_gather {
*/
unsigned int freed_tables : 1;
+ /*
+ * Do we have pending delayed rmap removals?
+ */
+ unsigned int delayed_rmap : 1;
+
/*
* at which levels have we cleared entries?
*/
@@ -436,9 +463,9 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb,
tlb_flush_mmu(tlb);
}
-static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static __always_inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page, unsigned int flags)
{
- return __tlb_remove_page_size(tlb, encode_page(page, 0), PAGE_SIZE);
+ return __tlb_remove_page_size(tlb, encode_page(page, flags), PAGE_SIZE);
}
/* tlb_remove_page
@@ -1432,6 +1432,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
break;
if (pte_present(ptent)) {
+ unsigned int delay_rmap;
+
page = vm_normal_page(vma, addr, ptent);
if (unlikely(!should_zap_page(details, page)))
continue;
@@ -1443,20 +1445,26 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
if (unlikely(!page))
continue;
+ delay_rmap = 0;
if (!PageAnon(page)) {
if (pte_dirty(ptent)) {
- force_flush = 1;
set_page_dirty(page);
+ if (tlb_delay_rmap(tlb)) {
+ delay_rmap = 1;
+ force_flush = 1;
+ }
}
if (pte_young(ptent) &&
likely(!(vma->vm_flags & VM_SEQ_READ)))
mark_page_accessed(page);
}
rss[mm_counter(page)]--;
- page_remove_rmap(page, vma, false);
- if (unlikely(page_mapcount(page) < 0))
- print_bad_pte(vma, addr, ptent, page);
- if (unlikely(__tlb_remove_page(tlb, page))) {
+ if (!delay_rmap) {
+ page_remove_rmap(page, vma, false);
+ if (unlikely(page_mapcount(page) < 0))
+ print_bad_pte(vma, addr, ptent, page);
+ }
+ if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
force_flush = 1;
addr += PAGE_SIZE;
break;
@@ -1513,8 +1521,11 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
arch_leave_lazy_mmu_mode();
/* Do the actual TLB flush before dropping ptl */
- if (force_flush)
+ if (force_flush) {
tlb_flush_mmu_tlbonly(tlb);
+ if (tlb->delayed_rmap)
+ tlb_flush_rmaps(tlb, vma);
+ }
pte_unmap_unlock(start_pte, ptl);
/*
@@ -9,6 +9,7 @@
#include <linux/rcupdate.h>
#include <linux/smp.h>
#include <linux/swap.h>
+#include <linux/rmap.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
@@ -19,6 +20,10 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
{
struct mmu_gather_batch *batch;
+ /* No more batching if we have delayed rmaps pending */
+ if (tlb->delayed_rmap)
+ return false;
+
batch = tlb->active;
if (batch->next) {
tlb->active = batch->next;
@@ -43,6 +48,31 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
return true;
}
+/**
+ * tlb_flush_rmaps - do pending rmap removals after we have flushed the TLB
+ * @tlb: the current mmu_gather
+ *
+ * Note that because of how tlb_next_batch() above works, we will
+ * never start new batches with pending delayed rmaps, so we only
+ * need to walk through the current active batch.
+ */
+void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+ struct mmu_gather_batch *batch;
+
+ batch = tlb->active;
+ for (int i = 0; i < batch->nr; i++) {
+ struct encoded_page *enc = batch->encoded_pages[i];
+
+ if (encoded_page_flags(enc)) {
+ struct page *page = encoded_page_ptr(enc);
+ page_remove_rmap(page, vma, false);
+ }
+ }
+
+ tlb->delayed_rmap = 0;
+}
+
static void tlb_batch_pages_flush(struct mmu_gather *tlb)
{
struct mmu_gather_batch *batch;
@@ -286,6 +316,7 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
tlb->active = &tlb->local;
tlb->batch_count = 0;
#endif
+ tlb->delayed_rmap = 0;
tlb_table_init(tlb);
#ifdef CONFIG_MMU_GATHER_PAGE_SIZE