@@ -163,6 +163,8 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
bool freeze, struct page *page, pgtable_t prealloc_pgtable);
+bool mm_address_trans_huge(struct mm_struct *mm, unsigned long address);
+
void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
unsigned long address);
@@ -302,6 +304,12 @@ static inline void split_huge_pmd_address(struct vm_area_struct *vma,
unsigned long address, bool freeze, struct page *page,
pgtable_t prealloc_pgtable) {}
+static inline bool mm_address_trans_huge(struct mm_struct *mm,
+ unsigned long address)
+{
+ return false;
+}
+
#define split_huge_pud(__vma, __pmd, __address) \
do { } while (0)
@@ -26,6 +26,7 @@
#include <linux/percpu-rwsem.h>
#include <linux/task_work.h>
#include <linux/shmem_fs.h>
+#include <asm/pgalloc.h>
#include <linux/uprobes.h>
@@ -153,7 +154,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
{
struct mm_struct *mm = vma->vm_mm;
struct page_vma_mapped_walk pvmw = {
- .page = old_page,
+ .page = compound_head(old_page),
.vma = vma,
.address = addr,
};
@@ -165,8 +166,6 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
addr + PAGE_SIZE);
- VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
-
if (!orig) {
err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
&memcg, false);
@@ -188,7 +187,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
get_page(new_page);
if (orig) {
- page_add_file_rmap(new_page, false);
+ page_add_file_rmap(compound_head(new_page),
+ PageTransHuge(compound_head(new_page)));
inc_mm_counter(mm, mm_counter_file(new_page));
dec_mm_counter(mm, MM_ANONPAGES);
} else {
@@ -207,7 +207,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
set_pte_at_notify(mm, addr, pvmw.pte,
mk_pte(new_page, vma->vm_page_prot));
- page_remove_rmap(old_page, false);
+ page_remove_rmap(compound_head(old_page),
+ PageTransHuge(compound_head(old_page)));
if (!page_mapped(old_page))
try_to_free_swap(old_page);
page_vma_mapped_walk_done(&pvmw);
@@ -475,17 +476,42 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
struct vm_area_struct *vma;
int ret, is_register, ref_ctr_updated = 0;
pgoff_t index;
+ pgtable_t prealloc_pgtable = NULL;
+ unsigned long foll_flags = FOLL_FORCE;
is_register = is_swbp_insn(&opcode);
uprobe = container_of(auprobe, struct uprobe, arch);
+ /* do not FOLL_SPLIT yet */
+ ret = get_user_pages_remote(NULL, mm, vaddr, 1,
+ foll_flags, &old_page, &vma, NULL);
+
+ if (ret <= 0)
+ return ret;
+
+ if (mm_address_trans_huge(mm, vaddr)) {
+ prealloc_pgtable = pte_alloc_one(mm);
+ if (likely(prealloc_pgtable)) {
+ split_huge_pmd_address(vma, vaddr, false, NULL,
+ prealloc_pgtable);
+ goto verify;
+ } else {
+ /* fallback to FOLL_SPLIT */
+ foll_flags |= FOLL_SPLIT;
+ put_page(old_page);
+ }
+ } else {
+ goto verify;
+ }
+
retry:
/* Read the page with vaddr into memory */
ret = get_user_pages_remote(NULL, mm, vaddr, 1,
- FOLL_FORCE | FOLL_SPLIT, &old_page, &vma, NULL);
+ foll_flags, &old_page, &vma, NULL);
if (ret <= 0)
return ret;
+verify:
ret = verify_opcode(old_page, vaddr, &opcode);
if (ret <= 0)
goto put_old;
@@ -2360,6 +2360,30 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
____split_huge_pmd(vma, pmd, address, freeze, page, prealloc_pgtable);
}
+bool mm_address_trans_huge(struct mm_struct *mm, unsigned long address)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = pgd_offset(mm, address);
+ if (!pgd_present(*pgd))
+ return false;
+
+ p4d = p4d_offset(pgd, address);
+ if (!p4d_present(*p4d))
+ return false;
+
+ pud = pud_offset(p4d, address);
+ if (!pud_present(*pud))
+ return false;
+
+ pmd = pmd_offset(pud, address);
+
+ return pmd_trans_huge(*pmd);
+}
+
void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
Instead of splitting the compound page with FOLL_SPLIT, this patch allows uprobe to only split pmd for huge pages. A helper function mm_address_trans_huge(mm, address) was introduced to test whether the address in mm is pointing to THP. Signed-off-by: Song Liu <songliubraving@fb.com> --- include/linux/huge_mm.h | 8 ++++++++ kernel/events/uprobes.c | 38 ++++++++++++++++++++++++++++++++------ mm/huge_memory.c | 24 ++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 6 deletions(-)