@@ -27,7 +27,7 @@ spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma);
static inline spinlock_t *
pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma)
{
- if (pud_trans_huge(*pud) || pud_devmap(*pud))
+ if (pud_is_leaf(*pud))
return __pud_trans_huge_lock(pud, vma);
else
return NULL;
@@ -36,7 +36,7 @@ pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma)
#define split_huge_pud(__vma, __pud, __address) \
do { \
pud_t *____pud = (__pud); \
- if (pud_trans_huge(*____pud) || pud_devmap(*____pud)) \
+ if (pud_is_leaf(*____pud)) \
__split_huge_pud(__vma, __pud, __address); \
} while (0)
#else /* CONFIG_PGTABLE_HAS_PUD_LEAVES */
@@ -125,7 +125,7 @@ static inline int is_swap_pmd(pmd_t pmd)
static inline spinlock_t *
pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
{
- if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
+ if (is_swap_pmd(*pmd) || pmd_is_leaf(*pmd))
return __pmd_trans_huge_lock(pmd, vma);
else
return NULL;
@@ -1641,7 +1641,7 @@ static inline int pud_trans_unstable(pud_t *pud)
defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
pud_t pudval = READ_ONCE(*pud);
- if (pud_none(pudval) || pud_trans_huge(pudval) || pud_devmap(pudval))
+ if (pud_none(pudval) || pud_leaf(pudval))
return 1;
if (unlikely(pud_bad(pudval))) {
pud_clear_bad(pud);
@@ -1901,6 +1901,34 @@ typedef unsigned int pgtbl_mod_mask;
#define pmd_leaf(x) false
#endif
+/*
+ * Wrapper of pxx_leaf() helpers.
+ *
+ * Comparing to pxx_leaf() API, the only difference is: using these macros
+ * can help code generation, so unnecessary code can be omitted when the
+ * specific level of leaf is not possible due to kernel config. It is
+ * needed because normally pxx_leaf() can be defined in arch code without
+ * knowing the kernel config.
+ *
+ * Currently we only need pmd/pud versions, because the largest leaf Linux
+ * supports so far is pud.
+ *
+ * Defining here also means that in arch's pgtable headers these macros
+ * cannot be used, pxx_leaf()s need to be used instead, because this file
+ * will not be included in arch's pgtable headers.
+ */
+#ifdef CONFIG_PGTABLE_HAS_PMD_LEAVES
+#define pmd_is_leaf(x) pmd_leaf(x)
+#else
+#define pmd_is_leaf(x) false
+#endif
+
+#ifdef CONFIG_PGTABLE_HAS_PUD_LEAVES
+#define pud_is_leaf(x) pud_leaf(x)
+#else
+#define pud_is_leaf(x) false
+#endif
+
#ifndef pgd_leaf_size
#define pgd_leaf_size(x) (1ULL << PGDIR_SHIFT)
#endif
@@ -351,7 +351,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
}
- if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) {
+ if (pmd_is_leaf(pmd)) {
/*
* No need to take pmd_lock here, even if some other thread
* is splitting the huge pmd we will get that event through
@@ -362,7 +362,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
* values.
*/
pmd = pmdp_get_lockless(pmdp);
- if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
+ if (!pmd_is_leaf(pmd))
goto again;
return hmm_vma_handle_pmd(walk, addr, end, hmm_pfns, pmd);
@@ -60,8 +60,7 @@ spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
spinlock_t *ptl;
ptl = pmd_lock(vma->vm_mm, pmd);
- if (likely(is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) ||
- pmd_devmap(*pmd)))
+ if (likely(is_swap_pmd(*pmd) || pmd_is_leaf(*pmd)))
return ptl;
spin_unlock(ptl);
return NULL;
@@ -627,8 +626,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
VM_BUG_ON(haddr & ~HPAGE_PMD_MASK);
VM_BUG_ON_VMA(vma->vm_start > haddr, vma);
VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma);
- VM_BUG_ON(!is_pmd_migration_entry(*pmd) && !pmd_trans_huge(*pmd) &&
- !pmd_devmap(*pmd));
+ VM_BUG_ON(!is_pmd_migration_entry(*pmd) && !pmd_is_leaf(*pmd));
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
count_vm_event(THP_SPLIT_PMD);
@@ -845,8 +843,7 @@ void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address,
* require a folio to check the PMD against. Otherwise, there
* is a risk of replacing the wrong folio.
*/
- if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd) ||
- is_pmd_migration_entry(*pmd)) {
+ if (pmd_is_leaf(*pmd) || is_pmd_migration_entry(*pmd)) {
if (folio && folio != pmd_folio(*pmd))
return;
__split_huge_pmd_locked(vma, pmd, address, freeze);
@@ -57,7 +57,7 @@ spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma)
spinlock_t *ptl;
ptl = pud_lock(vma->vm_mm, pud);
- if (likely(pud_trans_huge(*pud) || pud_devmap(*pud)))
+ if (likely(pud_is_leaf(*pud)))
return ptl;
spin_unlock(ptl);
return NULL;
@@ -90,7 +90,7 @@ int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
ret = -EAGAIN;
pud = *src_pud;
- if (unlikely(!pud_trans_huge(pud) && !pud_devmap(pud)))
+ if (unlikely(!pud_leaf(pud)))
goto out_unlock;
/*
@@ -225,7 +225,7 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
(address & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE);
mmu_notifier_invalidate_range_start(&range);
ptl = pud_lock(vma->vm_mm, pud);
- if (unlikely(!pud_trans_huge(*pud) && !pud_devmap(*pud)))
+ if (unlikely(!pud_is_leaf(*pud)))
goto out;
__split_huge_pud_locked(vma, pud, range.start);
@@ -129,7 +129,7 @@ static int wp_clean_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long end,
pmd_t pmdval = pmdp_get_lockless(pmd);
/* Do not split a huge pmd, present or migrated */
- if (pmd_trans_huge(pmdval) || pmd_devmap(pmdval)) {
+ if (pmd_is_leaf(pmdval)) {
WARN_ON(pmd_write(pmdval) || pmd_dirty(pmdval));
walk->action = ACTION_CONTINUE;
}
@@ -152,7 +152,7 @@ static int wp_clean_pud_entry(pud_t *pud, unsigned long addr, unsigned long end,
pud_t pudval = READ_ONCE(*pud);
/* Do not split a huge pud */
- if (pud_trans_huge(pudval) || pud_devmap(pudval)) {
+ if (pud_is_leaf(pudval)) {
WARN_ON(pud_write(pudval) || pud_dirty(pudval));
walk->action = ACTION_CONTINUE;
}
@@ -1235,8 +1235,7 @@ copy_pmd_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
src_pmd = pmd_offset(src_pud, addr);
do {
next = pmd_addr_end(addr, end);
- if (is_swap_pmd(*src_pmd) || pmd_trans_huge(*src_pmd)
- || pmd_devmap(*src_pmd)) {
+ if (is_swap_pmd(*src_pmd) || pmd_is_leaf(*src_pmd)) {
int err;
VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma);
err = copy_huge_pmd(dst_mm, src_mm, dst_pmd, src_pmd,
@@ -1272,7 +1271,7 @@ copy_pud_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma,
src_pud = pud_offset(src_p4d, addr);
do {
next = pud_addr_end(addr, end);
- if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) {
+ if (pud_is_leaf(*src_pud)) {
int err;
VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, src_vma);
@@ -1710,7 +1709,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
- if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
+ if (is_swap_pmd(*pmd) || pmd_is_leaf(*pmd)) {
if (next - addr != HPAGE_PMD_SIZE)
__split_huge_pmd(vma, pmd, addr, false, NULL);
else if (zap_huge_pmd(tlb, vma, pmd, addr)) {
@@ -1752,7 +1751,7 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
pud = pud_offset(p4d, addr);
do {
next = pud_addr_end(addr, end);
- if (pud_trans_huge(*pud) || pud_devmap(*pud)) {
+ if (pud_is_leaf(*pud)) {
if (next - addr != HPAGE_PUD_SIZE) {
mmap_assert_locked(tlb->mm);
split_huge_pud(vma, pud, addr);
@@ -5605,8 +5604,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
pud_t orig_pud = *vmf.pud;
barrier();
- if (pud_trans_huge(orig_pud) || pud_devmap(orig_pud)) {
-
+ if (pud_is_leaf(orig_pud)) {
/*
* TODO once we support anonymous PUDs: NUMA case and
* FAULT_FLAG_UNSHARE handling.
@@ -5646,7 +5644,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
pmd_migration_entry_wait(mm, vmf.pmd);
return 0;
}
- if (pmd_trans_huge(vmf.orig_pmd) || pmd_devmap(vmf.orig_pmd)) {
+ if (pmd_is_leaf(vmf.orig_pmd)) {
if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma))
return do_huge_pmd_numa_page(&vmf);
@@ -596,7 +596,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
pmdp = pmd_alloc(mm, pudp, addr);
if (!pmdp)
goto abort;
- if (pmd_trans_huge(*pmdp) || pmd_devmap(*pmdp))
+ if (pmd_leaf(*pmdp))
goto abort;
if (pte_alloc(mm, pmdp))
goto abort;
@@ -381,7 +381,7 @@ static inline long change_pmd_range(struct mmu_gather *tlb,
goto next;
_pmd = pmdp_get_lockless(pmd);
- if (is_swap_pmd(_pmd) || pmd_trans_huge(_pmd) || pmd_devmap(_pmd)) {
+ if (is_swap_pmd(_pmd) || pmd_is_leaf(_pmd)) {
if ((next - addr != HPAGE_PMD_SIZE) ||
pgtable_split_needed(vma, cp_flags)) {
__split_huge_pmd(vma, pmd, addr, false, NULL);
@@ -452,7 +452,7 @@ static inline long change_pud_range(struct mmu_gather *tlb,
mmu_notifier_invalidate_range_start(&range);
}
- if (pud_leaf(pud)) {
+ if (pud_is_leaf(pud)) {
if ((next - addr != PUD_SIZE) ||
pgtable_split_needed(vma, cp_flags)) {
__split_huge_pud(vma, pudp, addr);
@@ -587,7 +587,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
new_pud = alloc_new_pud(vma->vm_mm, vma, new_addr);
if (!new_pud)
break;
- if (pud_trans_huge(*old_pud) || pud_devmap(*old_pud)) {
+ if (pud_is_leaf(*old_pud)) {
if (extent == HPAGE_PUD_SIZE) {
move_pgt_entry(HPAGE_PUD, vma, old_addr, new_addr,
old_pud, new_pud, need_rmap_locks);
@@ -609,8 +609,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
if (!new_pmd)
break;
again:
- if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd) ||
- pmd_devmap(*old_pmd)) {
+ if (is_swap_pmd(*old_pmd) || pmd_is_leaf(*old_pmd)) {
if (extent == HPAGE_PMD_SIZE &&
move_pgt_entry(HPAGE_PMD, vma, old_addr, new_addr,
old_pmd, new_pmd, need_rmap_locks))
@@ -235,8 +235,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
*/
pmde = pmdp_get_lockless(pvmw->pmd);
- if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde) ||
- (pmd_present(pmde) && pmd_devmap(pmde))) {
+ if (pmd_is_leaf(pmde) || is_pmd_migration_entry(pmde)) {
pvmw->ptl = pmd_lock(mm, pvmw->pmd);
pmde = *pvmw->pmd;
if (!pmd_present(pmde)) {
@@ -251,7 +250,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
return not_found(pvmw);
return true;
}
- if (likely(pmd_trans_huge(pmde) || pmd_devmap(pmde))) {
+ if (likely(pmd_is_leaf(pmde))) {
if (pvmw->flags & PVMW_MIGRATION)
return not_found(pvmw);
if (!check_pmd(pmd_pfn(pmde), pvmw))
@@ -139,8 +139,7 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
{
pmd_t pmd;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
- VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
- !pmd_devmap(*pmdp));
+ VM_BUG_ON(pmd_present(*pmdp) && !pmd_leaf(*pmdp));
pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
return pmd;
@@ -247,7 +246,7 @@ pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
pud_t pud;
VM_BUG_ON(address & ~HPAGE_PUD_MASK);
- VM_BUG_ON(!pud_trans_huge(*pudp) && !pud_devmap(*pudp));
+ VM_BUG_ON(!pud_leaf(*pudp));
pud = pudp_huge_get_and_clear(vma->vm_mm, address, pudp);
flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE);
return pud;
@@ -293,7 +292,7 @@ pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp)
*pmdvalp = pmdval;
if (unlikely(pmd_none(pmdval) || is_pmd_migration_entry(pmdval)))
goto nomap;
- if (unlikely(pmd_trans_huge(pmdval) || pmd_devmap(pmdval)))
+ if (unlikely(pmd_leaf(pmdval)))
goto nomap;
if (unlikely(pmd_bad(pmdval))) {
pmd_clear_bad(pmd);
This patch converted all such checks into one *_leaf() check under common mm/, as "thp+devmap" should compose everything for a *_leaf() for now. I didn't yet touch arch code in other directories, as some arch may need some special attention, so I left those separately. It should start to save some cycles on such check and pave way for the new leaf types. E.g., when a new type of leaf is introduced, it'll naturally go the same route to what we have now for thp+devmap. Here one issue with pxx_leaf() API is that such API will be defined by arch but it doesn't consider kernel config. For example, below "if" branch cannot be automatically optimized: if (pmd_leaf()) { ... } Even if both THP && HUGETLB are not enabled (which means pmd_leaf() can never return true). To provide a chance for compilers to optimize and omit code when possible, introduce a light wrapper for them and call them pxx_is_leaf(). That will take kernel config into account and properly allow omitting branches when the compiler knows it'll constantly returns false. This tries to mimic what we used to have with pxx_trans_huge() when !THP, so it now also applies to pxx_leaf() API. Cc: Alistair Popple <apopple@nvidia.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Peter Xu <peterx@redhat.com> --- include/linux/huge_mm.h | 6 +++--- include/linux/pgtable.h | 30 +++++++++++++++++++++++++++++- mm/hmm.c | 4 ++-- mm/huge_mapping_pmd.c | 9 +++------ mm/huge_mapping_pud.c | 6 +++--- mm/mapping_dirty_helpers.c | 4 ++-- mm/memory.c | 14 ++++++-------- mm/migrate_device.c | 2 +- mm/mprotect.c | 4 ++-- mm/mremap.c | 5 ++--- mm/page_vma_mapped.c | 5 ++--- mm/pgtable-generic.c | 7 +++---- 12 files changed, 58 insertions(+), 38 deletions(-)