diff mbox series

[uprobe,thp,v2,2/5] uprobe: use original page when all uprobes are removed

Message ID 20190604165138.1520916-3-songliubraving@fb.com (mailing list archive)
State New, archived
Headers show
Series THP aware uprobe | expand

Commit Message

Song Liu June 4, 2019, 4:51 p.m. UTC
Currently, uprobe swaps the target page with a anonymous page in both
install_breakpoint() and remove_breakpoint(). When all uprobes on a page
are removed, the given mm is still using an anonymous page (not the
original page).

This patch allows uprobe to use original page when possible (all uprobes
on the page are already removed).

Signed-off-by: Song Liu <songliubraving@fb.com>
---
 kernel/events/uprobes.c | 42 ++++++++++++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 9 deletions(-)

Comments

Oleg Nesterov June 5, 2019, 10:02 a.m. UTC | #1
On 06/04, Song Liu wrote:
>
> Currently, uprobe swaps the target page with a anonymous page in both
> install_breakpoint() and remove_breakpoint(). When all uprobes on a page
> are removed, the given mm is still using an anonymous page (not the
> original page).

Agreed, it would be nice to avoid this,

> @@ -461,9 +471,10 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
>  			unsigned long vaddr, uprobe_opcode_t opcode)
>  {
>  	struct uprobe *uprobe;
> -	struct page *old_page, *new_page;
> +	struct page *old_page, *new_page, *orig_page = NULL;
>  	struct vm_area_struct *vma;
>  	int ret, is_register, ref_ctr_updated = 0;
> +	pgoff_t index;
>  
>  	is_register = is_swbp_insn(&opcode);
>  	uprobe = container_of(auprobe, struct uprobe, arch);
> @@ -501,6 +512,19 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
>  	copy_highpage(new_page, old_page);
>  	copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
>  
> +	index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT;
> +	orig_page = find_get_page(vma->vm_file->f_inode->i_mapping, index);

I think you should take is_register into account, if it is true we are going
to install the breakpoint so we can avoid find_get_page/pages_identical.

> +	if (orig_page) {
> +		if (pages_identical(new_page, orig_page)) {
> +			/* if new_page matches orig_page, use orig_page */
> +			put_page(new_page);
> +			new_page = orig_page;

Hmm. can't we simply unmap the page in this case?

Oleg.
Song Liu June 5, 2019, 4:29 p.m. UTC | #2
Hi Oleg,

Thanks for your kind review!

> On Jun 5, 2019, at 3:02 AM, Oleg Nesterov <oleg@redhat.com> wrote:
> 
> On 06/04, Song Liu wrote:
>> 
>> Currently, uprobe swaps the target page with a anonymous page in both
>> install_breakpoint() and remove_breakpoint(). When all uprobes on a page
>> are removed, the given mm is still using an anonymous page (not the
>> original page).
> 
> Agreed, it would be nice to avoid this,
> 
>> @@ -461,9 +471,10 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
>> 			unsigned long vaddr, uprobe_opcode_t opcode)
>> {
>> 	struct uprobe *uprobe;
>> -	struct page *old_page, *new_page;
>> +	struct page *old_page, *new_page, *orig_page = NULL;
>> 	struct vm_area_struct *vma;
>> 	int ret, is_register, ref_ctr_updated = 0;
>> +	pgoff_t index;
>> 
>> 	is_register = is_swbp_insn(&opcode);
>> 	uprobe = container_of(auprobe, struct uprobe, arch);
>> @@ -501,6 +512,19 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
>> 	copy_highpage(new_page, old_page);
>> 	copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
>> 
>> +	index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT;
>> +	orig_page = find_get_page(vma->vm_file->f_inode->i_mapping, index);
> 
> I think you should take is_register into account, if it is true we are going
> to install the breakpoint so we can avoid find_get_page/pages_identical.

Good idea! I will add this to v3. 

> 
>> +	if (orig_page) {
>> +		if (pages_identical(new_page, orig_page)) {
>> +			/* if new_page matches orig_page, use orig_page */
>> +			put_page(new_page);
>> +			new_page = orig_page;
> 
> Hmm. can't we simply unmap the page in this case?

I haven't found an easier way here. I tried with zap_vma_ptes() and 
unmap_page_range(). But neither of them works well here. 

Also, we need to deal with *_mm_counter, rmap, etc. So I guess reusing
__replace_page() (as current patch) is probably the easiest solution. 

Did I miss anything? 

Thanks again,
Song
diff mbox series

Patch

diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 78f61bfc6b79..3fca7c55d370 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -160,16 +160,19 @@  static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	int err;
 	struct mmu_notifier_range range;
 	struct mem_cgroup *memcg;
+	bool orig = new_page->mapping != NULL;  /* new_page == orig_page */
 
 	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
 				addr + PAGE_SIZE);
 
 	VM_BUG_ON_PAGE(PageTransHuge(old_page), old_page);
 
-	err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL, &memcg,
-			false);
-	if (err)
-		return err;
+	if (!orig) {
+		err = mem_cgroup_try_charge(new_page, vma->vm_mm, GFP_KERNEL,
+					    &memcg, false);
+		if (err)
+			return err;
+	}
 
 	/* For try_to_free_swap() and munlock_vma_page() below */
 	lock_page(old_page);
@@ -177,15 +180,22 @@  static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	mmu_notifier_invalidate_range_start(&range);
 	err = -EAGAIN;
 	if (!page_vma_mapped_walk(&pvmw)) {
-		mem_cgroup_cancel_charge(new_page, memcg, false);
+		if (!orig)
+			mem_cgroup_cancel_charge(new_page, memcg, false);
 		goto unlock;
 	}
 	VM_BUG_ON_PAGE(addr != pvmw.address, old_page);
 
 	get_page(new_page);
-	page_add_new_anon_rmap(new_page, vma, addr, false);
-	mem_cgroup_commit_charge(new_page, memcg, false, false);
-	lru_cache_add_active_or_unevictable(new_page, vma);
+	if (orig) {
+		page_add_file_rmap(new_page, false);
+		inc_mm_counter(mm, mm_counter_file(new_page));
+		dec_mm_counter(mm, MM_ANONPAGES);
+	} else {
+		page_add_new_anon_rmap(new_page, vma, addr, false);
+		mem_cgroup_commit_charge(new_page, memcg, false, false);
+		lru_cache_add_active_or_unevictable(new_page, vma);
+	}
 
 	if (!PageAnon(old_page)) {
 		dec_mm_counter(mm, mm_counter_file(old_page));
@@ -461,9 +471,10 @@  int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
 			unsigned long vaddr, uprobe_opcode_t opcode)
 {
 	struct uprobe *uprobe;
-	struct page *old_page, *new_page;
+	struct page *old_page, *new_page, *orig_page = NULL;
 	struct vm_area_struct *vma;
 	int ret, is_register, ref_ctr_updated = 0;
+	pgoff_t index;
 
 	is_register = is_swbp_insn(&opcode);
 	uprobe = container_of(auprobe, struct uprobe, arch);
@@ -501,6 +512,19 @@  int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
 	copy_highpage(new_page, old_page);
 	copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
 
+	index = vaddr_to_offset(vma, vaddr & PAGE_MASK) >> PAGE_SHIFT;
+	orig_page = find_get_page(vma->vm_file->f_inode->i_mapping, index);
+	if (orig_page) {
+		if (pages_identical(new_page, orig_page)) {
+			/* if new_page matches orig_page, use orig_page */
+			put_page(new_page);
+			new_page = orig_page;
+		} else {
+			put_page(orig_page);
+			orig_page = NULL;
+		}
+	}
+
 	ret = __replace_page(vma, vaddr, old_page, new_page);
 	put_page(new_page);
 put_old: