@@ -250,6 +250,9 @@ static inline bool thp_migration_supported(void)
return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
}
+extern void try_collapse_huge_pmd(struct vm_area_struct *vma,
+ struct page *page);
+
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
#define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
#define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
@@ -368,6 +371,10 @@ static inline bool thp_migration_supported(void)
{
return false;
}
+
+static inline void try_collapse_huge_pmd(struct vm_area_struct *vma,
+ struct page *page) {}
+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* _LINUX_HUGE_MM_H */
@@ -474,6 +474,7 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
struct page *old_page, *new_page;
struct vm_area_struct *vma;
int ret, is_register, ref_ctr_updated = 0;
+ struct page *orig_page = NULL;
is_register = is_swbp_insn(&opcode);
uprobe = container_of(auprobe, struct uprobe, arch);
@@ -512,7 +513,6 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
if (!is_register) {
- struct page *orig_page;
pgoff_t index;
index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT;
@@ -540,6 +540,9 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
if (ret && is_register && ref_ctr_updated)
update_ref_ctr(uprobe, mm, -1);
+ if (!ret && orig_page && PageTransCompound(orig_page))
+ try_collapse_huge_pmd(vma, orig_page);
+
return ret;
}
@@ -2886,6 +2886,75 @@ static struct shrinker deferred_split_shrinker = {
.flags = SHRINKER_NUMA_AWARE,
};
+/**
+ * try_collapse_huge_pmd - try collapse pmd for a pte mapped huge page
+ * @vma: vma containing the huge page
+ * @page: any sub page of the huge page
+ */
+void try_collapse_huge_pmd(struct vm_area_struct *vma,
+ struct page *page)
+{
+ struct page *hpage = compound_head(page);
+ struct mm_struct *mm = vma->vm_mm;
+ struct mmu_notifier_range range;
+ unsigned long haddr;
+ unsigned long addr;
+ pmd_t *pmd, _pmd;
+ spinlock_t *ptl;
+ int i, count = 0;
+
+ VM_BUG_ON_PAGE(!PageCompound(page), page);
+
+ haddr = page_address_in_vma(hpage, vma);
+ pmd = mm_find_pmd(mm, haddr);
+ if (!pmd)
+ return;
+
+ lock_page(hpage);
+ ptl = pmd_lock(mm, pmd);
+
+ /* step 1: check all mapped PTEs */
+ for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
+ pte_t *pte = pte_offset_map(pmd, addr);
+
+ if (pte_none(*pte))
+ continue;
+ if (hpage + i != vm_normal_page(vma, addr, *pte)) {
+ spin_unlock(ptl);
+ unlock_page(hpage);
+ return;
+ }
+ count++;
+ }
+
+ /* step 2: adjust rmap */
+ for (i = 0, addr = haddr; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
+ pte_t *pte = pte_offset_map(pmd, addr);
+ struct page *p;
+
+ if (pte_none(*pte))
+ continue;
+ p = vm_normal_page(vma, addr, *pte);
+ page_remove_rmap(p, false);
+ }
+
+ /* step 3: flip page table */
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm,
+ haddr, haddr + HPAGE_PMD_SIZE);
+ mmu_notifier_invalidate_range_start(&range);
+
+ _pmd = pmdp_collapse_flush(vma, haddr, pmd);
+ spin_unlock(ptl);
+ mmu_notifier_invalidate_range_end(&range);
+
+ /* step 4: free pgtable, set refcount, mm_counters, etc. */
+ page_ref_sub(page, count);
+ unlock_page(hpage);
+ mm_dec_nr_ptes(mm);
+ pte_free(mm, pmd_pgtable(_pmd));
+ add_mm_counter(mm, mm_counter_file(page), -count);
+}
+
#ifdef CONFIG_DEBUG_FS
static int split_huge_pages_set(void *data, u64 val)
{
After all uprobes are removed from the huge page (with PTE pgtable), it is possible to collapse the pmd and benefit from THP again. This patch does the collapse. An issue on earlier version was discovered by kbuild test robot. Reported-by: kbuild test robot <lkp@intel.com> Signed-off-by: Song Liu <songliubraving@fb.com> --- include/linux/huge_mm.h | 7 +++++ kernel/events/uprobes.c | 5 ++- mm/huge_memory.c | 69 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 1 deletion(-)