diff mbox series

[uprobe,thp,4/4] uprobe: collapse THP pmd after removing all uprobes

Message ID 20190529212049.2413886-5-songliubraving@fb.com (mailing list archive)
State New, archived
Headers show
Series THP aware uprobe | expand

Commit Message

Song Liu May 29, 2019, 9:20 p.m. UTC
After all uprobes are removed from the huge page (with PTE pgtable), it
is possible to collapse the pmd and benefit from THP again. This patch
does the collapse.

An issue on earlier version was discovered by kbuild test robot.

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
 include/linux/huge_mm.h |  9 ++++++++
 kernel/events/uprobes.c |  3 +++
 mm/huge_memory.c        | 47 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+)

Comments

Kirill A. Shutemov May 30, 2019, 12:20 p.m. UTC | #1
On Wed, May 29, 2019 at 02:20:49PM -0700, Song Liu wrote:
> After all uprobes are removed from the huge page (with PTE pgtable), it
> is possible to collapse the pmd and benefit from THP again. This patch
> does the collapse.

I don't think it's right way to go. We should deferred it to khugepaged.
We need to teach khugepaged to deal with PTE-mapped compound page.
And uprobe should only kick khugepaged for a VMA. Maybe synchronously.
Song Liu May 30, 2019, 5:26 p.m. UTC | #2
> On May 30, 2019, at 5:20 AM, Kirill A. Shutemov <kirill@shutemov.name> wrote:
> 
> On Wed, May 29, 2019 at 02:20:49PM -0700, Song Liu wrote:
>> After all uprobes are removed from the huge page (with PTE pgtable), it
>> is possible to collapse the pmd and benefit from THP again. This patch
>> does the collapse.
> 
> I don't think it's right way to go. We should deferred it to khugepaged.
> We need to teach khugepaged to deal with PTE-mapped compound page.
> And uprobe should only kick khugepaged for a VMA. Maybe synchronously.
> 

I guess that would be the same logic, but run in khugepaged? It doesn't
have to be done synchronously. 

Let me try that

Thanks,
Song


> -- 
> Kirill A. Shutemov
Kirill A. Shutemov May 31, 2019, 7 a.m. UTC | #3
On Thu, May 30, 2019 at 05:26:38PM +0000, Song Liu wrote:
> 
> 
> > On May 30, 2019, at 5:20 AM, Kirill A. Shutemov <kirill@shutemov.name> wrote:
> > 
> > On Wed, May 29, 2019 at 02:20:49PM -0700, Song Liu wrote:
> >> After all uprobes are removed from the huge page (with PTE pgtable), it
> >> is possible to collapse the pmd and benefit from THP again. This patch
> >> does the collapse.
> > 
> > I don't think it's right way to go. We should deferred it to khugepaged.
> > We need to teach khugepaged to deal with PTE-mapped compound page.
> > And uprobe should only kick khugepaged for a VMA. Maybe synchronously.
> > 
> 
> I guess that would be the same logic, but run in khugepaged? It doesn't
> have to be done synchronously. 

My idea was that since we have all required locking in place we can call
into khugepaged code that does the collapse, without waithing for it to
get to the VMA.
diff mbox series

Patch

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 4832d6580969..61f6d574d9b4 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -252,6 +252,10 @@  static inline bool thp_migration_supported(void)
 	return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION);
 }
 
+extern inline void try_collapse_huge_pmd(struct mm_struct *mm,
+					 struct vm_area_struct *vma,
+					 unsigned long vaddr);
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; })
 #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; })
@@ -377,6 +381,11 @@  static inline bool thp_migration_supported(void)
 {
 	return false;
 }
+
+static inline void try_collapse_huge_pmd(struct mm_struct *mm,
+					 struct vm_area_struct *vma,
+					 unsigned long vaddr) {}
+
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_HUGE_MM_H */
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 56eeccc2f7a2..422617bdd5ff 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -564,6 +564,9 @@  int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	if (ret && is_register && ref_ctr_updated)
 		update_ref_ctr(uprobe, mm, -1);
 
+	if (!ret && orig_page && PageTransCompound(orig_page))
+		try_collapse_huge_pmd(mm, vma, vaddr);
+
 	return ret;
 }
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4714871353c0..e2edec3ffd43 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2923,6 +2923,53 @@  static struct shrinker deferred_split_shrinker = {
 	.flags = SHRINKER_NUMA_AWARE,
 };
 
+/**
+ * This function only checks whether all PTEs in this PMD point to
+ * continuous pages, the caller should make sure at least of these PTEs
+ * points to a huge page, e.g. PageTransCompound(one_page) != 0.
+ */
+void try_collapse_huge_pmd(struct mm_struct *mm,
+			   struct vm_area_struct *vma,
+			   unsigned long vaddr)
+{
+	struct mmu_notifier_range range;
+	unsigned long addr;
+	pmd_t *pmd, _pmd;
+	spinlock_t *ptl;
+	long long head;
+	int i;
+
+	pmd = mm_find_pmd(mm, vaddr);
+	if (!pmd)
+		return;
+
+	addr = vaddr & HPAGE_PMD_MASK;
+	head = pte_val(*pte_offset_map(pmd, addr));
+	ptl = pmd_lock(mm, pmd);
+	for (i = 0; i < HPAGE_PMD_NR; i++, addr += PAGE_SIZE) {
+		pte_t *pte = pte_offset_map(pmd, addr);
+
+		if (pte_val(*pte) != head + i * PAGE_SIZE) {
+			spin_unlock(ptl);
+			return;
+		}
+	}
+
+	addr = vaddr & HPAGE_PMD_MASK;
+	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm,
+				addr, addr + HPAGE_PMD_SIZE);
+	mmu_notifier_invalidate_range_start(&range);
+
+	_pmd = pmdp_collapse_flush(vma, addr, pmd);
+	spin_unlock(ptl);
+	mmu_notifier_invalidate_range_end(&range);
+	mm_dec_nr_ptes(mm);
+	pte_free(mm, pmd_pgtable(_pmd));
+	add_mm_counter(mm,
+		       shmem_file(vma->vm_file) ? MM_SHMEMPAGES : MM_FILEPAGES,
+		       -HPAGE_PMD_NR);
+}
+
 #ifdef CONFIG_DEBUG_FS
 static int split_huge_pages_set(void *data, u64 val)
 {