@@ -250,6 +250,7 @@ config X86
select X86_FEATURE_NAMES if PROC_FS
select PROC_PID_ARCH_STATUS if PROC_FS
select MAPPING_DIRTY_HELPERS
+ select PER_TABLE_DEFERRED_FLUSHES if X86_64
imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI
config INSTRUCTION_DECODER
@@ -774,17 +774,18 @@ static inline int pte_devmap(pte_t a)
}
#endif
-#define pte_accessible pte_accessible
-static inline bool pte_accessible(struct vm_area_struct *vma, pte_t *a)
-{
- if (pte_flags(*a) & _PAGE_PRESENT)
- return true;
-
- if ((pte_flags(*a) & _PAGE_PROTNONE) && pte_tlb_flush_pending(vma, a))
- return true;
-
- return false;
-}
+#define pte_accessible(vma, a) \
+ ({ \
+ pte_t *_a = (a); \
+ bool r = false; \
+ \
+ if (pte_flags(*_a) & _PAGE_PRESENT) \
+ r = true; \
+ else \
+ r = ((pte_flags(*_a) & _PAGE_PROTNONE) && \
+ pte_tlb_flush_pending((vma), _a)); \
+ r; \
+ })
static inline int pmd_present(pmd_t pmd)
{
@@ -1157,7 +1157,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
/* Clear accessed and referenced bits. */
pmdp_test_and_clear_young(vma, addr, pmd);
test_and_clear_page_young(page);
- tlb_flush_pmd_range(&cp->tlb, addr, HPAGE_PMD_SIZE);
+ tlb_flush_pmd_range(&cp->tlb, pmd, addr, HPAGE_PMD_SIZE);
ClearPageReferenced(page);
out:
spin_unlock(ptl);
@@ -1174,7 +1174,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
clear_soft_dirty(vma, addr, pte);
- tlb_flush_pte_range(&cp->tlb, addr, PAGE_SIZE);
+ tlb_flush_pte_range(&cp->tlb, pte, addr, PAGE_SIZE);
continue;
}
@@ -1188,7 +1188,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
/* Clear accessed and referenced bits. */
ptep_test_and_clear_young(vma, addr, pte);
test_and_clear_page_young(page);
- tlb_flush_pte_range(&cp->tlb, addr, PAGE_SIZE);
+ tlb_flush_pte_range(&cp->tlb, pte, addr, PAGE_SIZE);
ClearPageReferenced(page);
}
tlb_end_ptes(&cp->tlb);
@@ -310,10 +310,12 @@ struct mmu_gather {
#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
unsigned int page_size;
#endif
-
#ifdef CONFIG_ARCH_HAS_TLB_GENERATIONS
u64 defer_gen;
#endif
+#ifdef CONFIG_PER_TABLE_DEFERRED_FLUSHES
+ pte_t *last_pte;
+#endif
#endif
};
@@ -572,21 +574,45 @@ static inline void read_defer_tlb_flush_gen(struct mmu_gather *tlb)
}
}
+#ifndef CONFIG_PER_TABLE_DEFERRED_FLUSHES
+
/*
- * Store the deferred TLB generation in the VMA
+ * Store the deferred TLB generation in the VMA or page-table for PTEs or PMDs
*/
-static inline void store_deferred_tlb_gen(struct mmu_gather *tlb)
+static inline void store_deferred_tlb_gen(struct mmu_gather *tlb,
+ struct page *page)
{
tlb_update_generation(&tlb->vma->defer_tlb_gen, tlb->defer_gen);
}
+static inline void tlb_set_last_pte(struct mmu_gather *tlb, pte_t *pte) { }
+
+#else /* CONFIG_PER_TABLE_DEFERRED_FLUSHES */
+
+/*
+ * Store the deferred TLB generation in the VMA
+ */
+static inline void store_deferred_tlb_gen(struct mmu_gather *tlb,
+ struct page *page)
+{
+ page->deferred_tlb_gen = tlb->defer_gen;
+}
+
+static inline void tlb_set_last_pte(struct mmu_gather *tlb, pte_t *pte)
+{
+ tlb->last_pte = pte;
+}
+
+#endif /* CONFIG_PER_TABLE_DEFERRED_FLUSHES */
+
/*
* Track deferred TLB flushes for PTEs and PMDs to allow fine granularity checks
* whether a PTE is accessible. The TLB generation after the PTE is flushed is
* saved in the mmu_gather struct. Once a flush is performed, the geneartion is
* advanced.
*/
-static inline void track_defer_tlb_flush(struct mmu_gather *tlb)
+static inline void track_defer_tlb_flush(struct mmu_gather *tlb,
+ struct page *page)
{
if (tlb->fullmm)
return;
@@ -594,7 +620,7 @@ static inline void track_defer_tlb_flush(struct mmu_gather *tlb)
BUG_ON(!tlb->vma);
read_defer_tlb_flush_gen(tlb);
- store_deferred_tlb_gen(tlb);
+ store_deferred_tlb_gen(tlb, page);
}
#define init_vma_tlb_generation(vma) \
@@ -610,6 +636,7 @@ static inline void init_vma_tlb_generation(struct vm_area_struct *vma) { }
flush_tlb_batched_pending(_tlb->mm); \
if (IS_ENABLED(CONFIG_ARCH_HAS_TLB_GENERATIONS)) \
_tlb->cleared_ptes_in_table = 0; \
+ tlb_set_last_pte(_tlb, NULL); \
} while (0)
static inline void tlb_end_ptes(struct mmu_gather *tlb)
@@ -617,24 +644,31 @@ static inline void tlb_end_ptes(struct mmu_gather *tlb)
if (!IS_ENABLED(CONFIG_ARCH_HAS_TLB_GENERATIONS))
return;
+#ifdef CONFIG_PER_TABLE_DEFERRED_FLUSHES
+ if (tlb->last_pte)
+ track_defer_tlb_flush(tlb, pte_to_page(tlb->last_pte));
+#elif CONFIG_ARCH_HAS_TLB_GENERATIONS /* && !CONFIG_PER_TABLE_DEFERRED_FLUSHES */
if (tlb->cleared_ptes_in_table)
- track_defer_tlb_flush(tlb);
-
+ track_defer_tlb_flush(tlb, NULL);
tlb->cleared_ptes_in_table = 0;
+#endif /* CONFIG_PER_TABLE_DEFERRED_FLUSHES */
}
/*
* tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end,
* and set corresponding cleared_*.
*/
-static inline void tlb_flush_pte_range(struct mmu_gather *tlb,
+static inline void tlb_flush_pte_range(struct mmu_gather *tlb, pte_t *pte,
unsigned long address, unsigned long size)
{
__tlb_adjust_range(tlb, address, size);
tlb->cleared_ptes = 1;
- if (IS_ENABLED(CONFIG_ARCH_HAS_TLB_GENERATIONS))
+ if (IS_ENABLED(CONFIG_ARCH_HAS_TLB_GENERATIONS) &&
+ !IS_ENABLED(CONFIG_PER_TABLE_DEFERRED_FLUSHES))
tlb->cleared_ptes_in_table = 1;
+
+ tlb_set_last_pte(tlb, pte);
}
static inline void __tlb_flush_pmd_range(struct mmu_gather *tlb,
@@ -644,11 +678,11 @@ static inline void __tlb_flush_pmd_range(struct mmu_gather *tlb,
tlb->cleared_pmds = 1;
}
-static inline void tlb_flush_pmd_range(struct mmu_gather *tlb,
+static inline void tlb_flush_pmd_range(struct mmu_gather *tlb, pmd_t *pmd,
unsigned long address, unsigned long size)
{
__tlb_flush_pmd_range(tlb, address, size);
- track_defer_tlb_flush(tlb);
+ track_defer_tlb_flush(tlb, pmd_to_page(pmd));
}
static inline void tlb_flush_pud_range(struct mmu_gather *tlb,
@@ -678,7 +712,8 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
*/
#define tlb_remove_tlb_entry(tlb, ptep, address) \
do { \
- tlb_flush_pte_range(tlb, address, PAGE_SIZE); \
+ tlb_flush_pte_range(tlb, ptep, address, \
+ PAGE_SIZE); \
__tlb_remove_tlb_entry(tlb, ptep, address); \
} while (0)
@@ -686,7 +721,8 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
do { \
unsigned long _sz = huge_page_size(h); \
if (_sz == PMD_SIZE) \
- tlb_flush_pmd_range(tlb, address, _sz); \
+ tlb_flush_pmd_range(tlb, (pmd_t *)ptep, \
+ address, _sz); \
else if (_sz == PUD_SIZE) \
tlb_flush_pud_range(tlb, address, _sz); \
__tlb_remove_tlb_entry(tlb, ptep, address); \
@@ -702,7 +738,8 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb,
#define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \
do { \
- tlb_flush_pmd_range(tlb, address, HPAGE_PMD_SIZE); \
+ tlb_flush_pmd_range(tlb, pmdp, address, \
+ HPAGE_PMD_SIZE); \
__tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \
} while (0)
@@ -2208,11 +2208,21 @@ static inline void pgtable_init(void)
pgtable_cache_init();
}
+#ifdef CONFIG_PER_TABLE_DEFERRED_FLUSHES
+static inline void page_table_tlb_gen_init(struct page *page)
+{
+ page->deferred_tlb_gen = 0;
+}
+#else /* CONFIG_PER_TABLE_DEFERRED_FLUSHES */
+static inline void page_table_tlb_gen_init(struct page *page) { }
+#endif /* CONFIG_PER_TABLE_DEFERRED_FLUSHES */
+
static inline bool pgtable_pte_page_ctor(struct page *page)
{
if (!ptlock_init(page))
return false;
__SetPageTable(page);
+ page_table_tlb_gen_init(page);
inc_lruvec_page_state(page, NR_PAGETABLE);
return true;
}
@@ -2221,6 +2231,7 @@ static inline void pgtable_pte_page_dtor(struct page *page)
{
ptlock_free(page);
__ClearPageTable(page);
+ page_table_tlb_gen_init(page);
dec_lruvec_page_state(page, NR_PAGETABLE);
}
@@ -2308,6 +2319,7 @@ static inline bool pgtable_pmd_page_ctor(struct page *page)
if (!pmd_ptlock_init(page))
return false;
__SetPageTable(page);
+ page_table_tlb_gen_init(page);
inc_lruvec_page_state(page, NR_PAGETABLE);
return true;
}
@@ -2316,6 +2328,7 @@ static inline void pgtable_pmd_page_dtor(struct page *page)
{
pmd_ptlock_free(page);
__ClearPageTable(page);
+ page_table_tlb_gen_init(page);
dec_lruvec_page_state(page, NR_PAGETABLE);
}
@@ -148,6 +148,9 @@ struct page {
pgtable_t pmd_huge_pte; /* protected by page->ptl */
unsigned long _pt_pad_2; /* mapping */
union {
+#ifdef CONFIG_PER_TABLE_DEFERRED_FLUSHES
+ u64 deferred_tlb_gen; /* x86 non-pgd protected by page->ptl */
+#endif
struct mm_struct *pt_mm; /* x86 pgds only */
atomic_t pt_frag_refcount; /* powerpc */
};
@@ -632,6 +635,7 @@ static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
}
#ifdef CONFIG_ARCH_HAS_TLB_GENERATIONS
+#ifndef CONFIG_PER_TABLE_DEFERRED_FLUSHES
static inline bool pte_tlb_flush_pending(struct vm_area_struct *vma, pte_t *pte)
{
struct mm_struct *mm = vma->vm_mm;
@@ -645,6 +649,24 @@ static inline bool pmd_tlb_flush_pending(struct vm_area_struct *vma, pmd_t *pmd)
return atomic64_read(&vma->defer_tlb_gen) < atomic64_read(&mm->tlb_gen_completed);
}
+#else /* CONFIG_PER_TABLE_DEFERRED_FLUSHES */
+#define pte_tlb_flush_pending(vma, pte) \
+ ({ \
+ struct mm_struct *mm = (vma)->vm_mm; \
+ \
+ (pte_to_page(pte))->deferred_tlb_gen < \
+ atomic64_read(&mm->tlb_gen_completed); \
+ })
+
+#define pmd_tlb_flush_pending(vma, pmd) \
+ ({ \
+ struct mm_struct *mm = (vma)->vm_mm; \
+ \
+ (pmd_to_page(pmd))->deferred_tlb_gen < \
+ atomic64_read(&mm->tlb_gen_completed); \
+ })
+
+#endif /* CONFIG_PER_TABLE_DEFERRED_FLUSHES */
#else /* CONFIG_ARCH_HAS_TLB_GENERATIONS */
static inline bool pte_tlb_flush_pending(struct vm_area_struct *vma, pte_t *pte)
{
@@ -857,6 +857,13 @@ config ARCH_WANT_AGGRESSIVE_TLB_FLUSH_BATCHING
bool
depends on !CONFIG_MMU_GATHER_NO_GATHER
+#
+# For architectures that prefer to save deferred TLB generations in the
+# page-table instead of the VMA.
+config PER_TABLE_DEFERRED_FLUSHES
+ bool
+ depends on ARCH_HAS_TLB_GENERATIONS && 64BIT
+
config CC_HAS_INT128
def_bool !$(cc-option,$(m64-flag) -D__SIZEOF_INT128__=0) && 64BIT
@@ -1886,7 +1886,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
entry = pmd_clear_uffd_wp(entry);
}
ret = HPAGE_PMD_NR;
- tlb_flush_pmd_range(tlb, addr, HPAGE_PMD_SIZE);
+ tlb_flush_pmd_range(tlb, pmd, addr, HPAGE_PMD_SIZE);
set_pmd_at(mm, addr, pmd, entry);
BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry));
unlock:
@@ -48,7 +48,7 @@ static int wp_pte(pte_t *pte, unsigned long addr, unsigned long end,
wpwalk->total++;
if (pte_may_need_flush(old_pte, ptent))
- tlb_flush_pte_range(&wpwalk->tlb, addr, PAGE_SIZE);
+ tlb_flush_pte_range(&wpwalk->tlb, pte, addr, PAGE_SIZE);
tlb_end_ptes(&wpwalk->tlb);
}
@@ -110,7 +110,7 @@ static int clean_record_pte(pte_t *pte, unsigned long addr,
ptep_modify_prot_commit(walk->vma, addr, pte, old_pte, ptent);
wpwalk->total++;
- tlb_flush_pte_range(&wpwalk->tlb, addr, PAGE_SIZE);
+ tlb_flush_pte_range(&wpwalk->tlb, pte, addr, PAGE_SIZE);
tlb_end_ptes(&wpwalk->tlb);
__set_bit(pgoff, cwalk->bitmap);
@@ -140,7 +140,7 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
}
ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent);
if (pte_may_need_flush(oldpte, ptent))
- tlb_flush_pte_range(tlb, addr, PAGE_SIZE);
+ tlb_flush_pte_range(tlb, pte, addr, PAGE_SIZE);
pages++;
} else if (is_swap_pte(oldpte)) {
swp_entry_t entry = pte_to_swp_entry(oldpte);