diff mbox series

mm: memory: check userfaultfd_wp() in vmf_orig_pte_uffd_wp()

Message ID 20240417082359.3413259-1-wangkefeng.wang@huawei.com (mailing list archive)
State New
Headers show
Series mm: memory: check userfaultfd_wp() in vmf_orig_pte_uffd_wp() | expand

Commit Message

Kefeng Wang April 17, 2024, 8:23 a.m. UTC
Directly call vmf_orig_pte_uffd_wp() in do_anonymous_page() and
set_pte_range() to save a uffd_wp and add userfaultfd_wp() check
in vmf_orig_pte_uffd_wp() to avoid the unnecessary function calls
in the most sense, lat_pagefault testcase does show improvement
though very small(~1%).

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 mm/memory.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

Comments

Kefeng Wang April 17, 2024, 9:30 a.m. UTC | #1
On 2024/4/17 16:23, Kefeng Wang wrote:
> Directly call vmf_orig_pte_uffd_wp() in do_anonymous_page() and
> set_pte_range() to save a uffd_wp and add userfaultfd_wp() check
> in vmf_orig_pte_uffd_wp() to avoid the unnecessary function calls
> in the most sense, lat_pagefault testcase does show improvement
> though very small(~1%).
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>   mm/memory.c | 9 +++++----
>   1 file changed, 5 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/memory.c b/mm/memory.c
> index 5ae2409d3cb9..a6afc96001e6 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -117,6 +117,9 @@ static bool vmf_orig_pte_uffd_wp(struct vm_fault *vmf)


	 if (!IS_ENABLED(CONFIG_PTE_MARKER_UFFD_WP))
         	return false;

Will add config check too,

>   	if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID))
>   		return false;
>   
> +	if (!userfaultfd_wp(vmf->vma))
> +		return false;
> +

but wait for review.

>   	return pte_marker_uffd_wp(vmf->orig_pte);
>   }
>   
> @@ -4388,7 +4391,6 @@ static struct folio *alloc_anon_folio(struct vm_fault *vmf)
>    */
>   static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>   {
> -	bool uffd_wp = vmf_orig_pte_uffd_wp(vmf);
>   	struct vm_area_struct *vma = vmf->vma;
>   	unsigned long addr = vmf->address;
>   	struct folio *folio;
> @@ -4488,7 +4490,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>   	folio_add_new_anon_rmap(folio, vma, addr);
>   	folio_add_lru_vma(folio, vma);
>   setpte:
> -	if (uffd_wp)
> +	if (vmf_orig_pte_uffd_wp(vmf))
>   		entry = pte_mkuffd_wp(entry);
>   	set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr_pages);
>   
> @@ -4663,7 +4665,6 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio,
>   		struct page *page, unsigned int nr, unsigned long addr)
>   {
>   	struct vm_area_struct *vma = vmf->vma;
> -	bool uffd_wp = vmf_orig_pte_uffd_wp(vmf);
>   	bool write = vmf->flags & FAULT_FLAG_WRITE;
>   	bool prefault = in_range(vmf->address, addr, nr * PAGE_SIZE);
>   	pte_t entry;
> @@ -4678,7 +4679,7 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio,
>   
>   	if (write)
>   		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> -	if (unlikely(uffd_wp))
> +	if (unlikely(vmf_orig_pte_uffd_wp(vmf)))
>   		entry = pte_mkuffd_wp(entry);
>   	/* copy-on-write page */
>   	if (write && !(vma->vm_flags & VM_SHARED)) {
Peter Xu April 17, 2024, 6:34 p.m. UTC | #2
Hi, Kefeng,

On Wed, Apr 17, 2024 at 05:30:40PM +0800, Kefeng Wang wrote:
> 
> 
> On 2024/4/17 16:23, Kefeng Wang wrote:
> > Directly call vmf_orig_pte_uffd_wp() in do_anonymous_page() and
> > set_pte_range() to save a uffd_wp and add userfaultfd_wp() check
> > in vmf_orig_pte_uffd_wp() to avoid the unnecessary function calls
> > in the most sense, lat_pagefault testcase does show improvement
> > though very small(~1%).

I'm ok with the change if that helps as big as 1%, but I'm a bit surprised
to see such a difference, because for file pte_marker_uffd_wp() should
check first on pte_none() then it should return already if uffd not even
registered for the vma, while orig_pte should be hot too if valid.

> > 
> > Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> > ---
> >   mm/memory.c | 9 +++++----
> >   1 file changed, 5 insertions(+), 4 deletions(-)
> > 
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 5ae2409d3cb9..a6afc96001e6 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -117,6 +117,9 @@ static bool vmf_orig_pte_uffd_wp(struct vm_fault *vmf)
> 
> 
> 	 if (!IS_ENABLED(CONFIG_PTE_MARKER_UFFD_WP))
>         	return false;
> 
> Will add config check too,

pte_marker_uffd_wp() returns false when !PTE_MARKER_UFFD_WP, so kind of
imply this.  I assume you meant to avoid checking ORIG_PTE_VALID flag, but
the flags is pretty hot too.  Again, just want to double check with you on
whether it can have such a huge difference, e.g., how that compares with
the current code v.s. original patch v.s. this squashed.

Thanks,

> 
> >   	if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID))
> >   		return false;
> > +	if (!userfaultfd_wp(vmf->vma))
> > +		return false;
> > +
> 
> but wait for review.
> 
> >   	return pte_marker_uffd_wp(vmf->orig_pte);
> >   }
> > @@ -4388,7 +4391,6 @@ static struct folio *alloc_anon_folio(struct vm_fault *vmf)
> >    */
> >   static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
> >   {
> > -	bool uffd_wp = vmf_orig_pte_uffd_wp(vmf);
> >   	struct vm_area_struct *vma = vmf->vma;
> >   	unsigned long addr = vmf->address;
> >   	struct folio *folio;
> > @@ -4488,7 +4490,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
> >   	folio_add_new_anon_rmap(folio, vma, addr);
> >   	folio_add_lru_vma(folio, vma);
> >   setpte:
> > -	if (uffd_wp)
> > +	if (vmf_orig_pte_uffd_wp(vmf))
> >   		entry = pte_mkuffd_wp(entry);
> >   	set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr_pages);
> > @@ -4663,7 +4665,6 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio,
> >   		struct page *page, unsigned int nr, unsigned long addr)
> >   {
> >   	struct vm_area_struct *vma = vmf->vma;
> > -	bool uffd_wp = vmf_orig_pte_uffd_wp(vmf);
> >   	bool write = vmf->flags & FAULT_FLAG_WRITE;
> >   	bool prefault = in_range(vmf->address, addr, nr * PAGE_SIZE);
> >   	pte_t entry;
> > @@ -4678,7 +4679,7 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio,
> >   	if (write)
> >   		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> > -	if (unlikely(uffd_wp))
> > +	if (unlikely(vmf_orig_pte_uffd_wp(vmf)))
> >   		entry = pte_mkuffd_wp(entry);
> >   	/* copy-on-write page */
> >   	if (write && !(vma->vm_flags & VM_SHARED)) {
>
Kefeng Wang April 18, 2024, 1:47 a.m. UTC | #3
On 2024/4/18 2:34, Peter Xu wrote:
> Hi, Kefeng,
> 
> On Wed, Apr 17, 2024 at 05:30:40PM +0800, Kefeng Wang wrote:
>>
>>
>> On 2024/4/17 16:23, Kefeng Wang wrote:
>>> Directly call vmf_orig_pte_uffd_wp() in do_anonymous_page() and
>>> set_pte_range() to save a uffd_wp and add userfaultfd_wp() check
>>> in vmf_orig_pte_uffd_wp() to avoid the unnecessary function calls
>>> in the most sense, lat_pagefault testcase does show improvement
>>> though very small(~1%).
> 
> I'm ok with the change if that helps as big as 1%, but I'm a bit surprised
> to see such a difference, because for file pte_marker_uffd_wp() should
> check first on pte_none() then it should return already if uffd not even
> registered for the vma, while orig_pte should be hot too if valid.

Yes, retest, not as big as 1%, but the perf shows vmf_orig_pte_uffd_wp
is eliminated,

[root@localhost]# perf report -i perf.data.old |grep vmf
      0.17%     0.13%  lat_pagefault  [kernel.kallsyms]      [k] 
vmf_orig_pte_uffd_wp.part.0.isra.0
[root@localhost]# perf report -i perf.data |grep vmf

> 
>>>
>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>>> ---
>>>    mm/memory.c | 9 +++++----
>>>    1 file changed, 5 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/mm/memory.c b/mm/memory.c
>>> index 5ae2409d3cb9..a6afc96001e6 100644
>>> --- a/mm/memory.c
>>> +++ b/mm/memory.c
>>> @@ -117,6 +117,9 @@ static bool vmf_orig_pte_uffd_wp(struct vm_fault *vmf)
>>
>>
>> 	 if (!IS_ENABLED(CONFIG_PTE_MARKER_UFFD_WP))
>>          	return false;
>>
>> Will add config check too,
> 
> pte_marker_uffd_wp() returns false when !PTE_MARKER_UFFD_WP, so kind of
> imply this.  I assume you meant to avoid checking ORIG_PTE_VALID flag, but

Just to avoid checking ORIG_PTE_VALID and the new userfaultfd_wp() since
it is not supported on most archs.

> the flags is pretty hot too.  Again, just want to double check with you on
> whether it can have such a huge difference, e.g., how that compares with
> the current code v.s. original patch v.s. this squashed.

I will change the changelog to show different about vmf_orig_pte_uffd_wp 
  from perf data.

Thanks.
Kefeng Wang April 18, 2024, 10:44 a.m. UTC | #4
On 2024/4/18 9:47, Kefeng Wang wrote:
> 
> 
> On 2024/4/18 2:34, Peter Xu wrote:
>> Hi, Kefeng,
>>
>> On Wed, Apr 17, 2024 at 05:30:40PM +0800, Kefeng Wang wrote:
>>>
>>>
>>> On 2024/4/17 16:23, Kefeng Wang wrote:
>>>> Directly call vmf_orig_pte_uffd_wp() in do_anonymous_page() and
>>>> set_pte_range() to save a uffd_wp and add userfaultfd_wp() check
>>>> in vmf_orig_pte_uffd_wp() to avoid the unnecessary function calls
>>>> in the most sense, lat_pagefault testcase does show improvement
>>>> though very small(~1%).
>>
>> I'm ok with the change if that helps as big as 1%, but I'm a bit 
>> surprised
>> to see such a difference, because for file pte_marker_uffd_wp() should
>> check first on pte_none() then it should return already if uffd not even
>> registered for the vma, while orig_pte should be hot too if valid.
> 
> Yes, retest, not as big as 1%, but the perf shows vmf_orig_pte_uffd_wp
> is eliminated,
> 
> [root@localhost]# perf report -i perf.data.old |grep vmf
>       0.17%     0.13%  lat_pagefault  [kernel.kallsyms]      [k] 
> vmf_orig_pte_uffd_wp.part.0.isra.0
> [root@localhost]# perf report -i perf.data |grep vmf
> 
>>
>>>>
>>>> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
>>>> ---
>>>>    mm/memory.c | 9 +++++----
>>>>    1 file changed, 5 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/mm/memory.c b/mm/memory.c
>>>> index 5ae2409d3cb9..a6afc96001e6 100644
>>>> --- a/mm/memory.c
>>>> +++ b/mm/memory.c
>>>> @@ -117,6 +117,9 @@ static bool vmf_orig_pte_uffd_wp(struct vm_fault 
>>>> *vmf)
>>>
>>>
>>>      if (!IS_ENABLED(CONFIG_PTE_MARKER_UFFD_WP))
>>>              return false;
>>>
>>> Will add config check too,
>>
>> pte_marker_uffd_wp() returns false when !PTE_MARKER_UFFD_WP, so kind of
>> imply this.  I assume you meant to avoid checking ORIG_PTE_VALID flag, 
>> but
> 
> Just to avoid checking ORIG_PTE_VALID and the new userfaultfd_wp() since
> it is not supported on most archs.

Since pte_mkuffd_wp(return pte) if !PTE_MARKER_UFFD_WP, so this is not
needed, also confirm it after checking the disassemble, will update
changelog and resend.


> 
>> the flags is pretty hot too.  Again, just want to double check with 
>> you on
>> whether it can have such a huge difference, e.g., how that compares with
>> the current code v.s. original patch v.s. this squashed.
> 
> I will change the changelog to show different about vmf_orig_pte_uffd_wp 
>   from perf data.
> 
> Thanks.
>
diff mbox series

Patch

diff --git a/mm/memory.c b/mm/memory.c
index 5ae2409d3cb9..a6afc96001e6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -117,6 +117,9 @@  static bool vmf_orig_pte_uffd_wp(struct vm_fault *vmf)
 	if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID))
 		return false;
 
+	if (!userfaultfd_wp(vmf->vma))
+		return false;
+
 	return pte_marker_uffd_wp(vmf->orig_pte);
 }
 
@@ -4388,7 +4391,6 @@  static struct folio *alloc_anon_folio(struct vm_fault *vmf)
  */
 static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 {
-	bool uffd_wp = vmf_orig_pte_uffd_wp(vmf);
 	struct vm_area_struct *vma = vmf->vma;
 	unsigned long addr = vmf->address;
 	struct folio *folio;
@@ -4488,7 +4490,7 @@  static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	folio_add_new_anon_rmap(folio, vma, addr);
 	folio_add_lru_vma(folio, vma);
 setpte:
-	if (uffd_wp)
+	if (vmf_orig_pte_uffd_wp(vmf))
 		entry = pte_mkuffd_wp(entry);
 	set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr_pages);
 
@@ -4663,7 +4665,6 @@  void set_pte_range(struct vm_fault *vmf, struct folio *folio,
 		struct page *page, unsigned int nr, unsigned long addr)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	bool uffd_wp = vmf_orig_pte_uffd_wp(vmf);
 	bool write = vmf->flags & FAULT_FLAG_WRITE;
 	bool prefault = in_range(vmf->address, addr, nr * PAGE_SIZE);
 	pte_t entry;
@@ -4678,7 +4679,7 @@  void set_pte_range(struct vm_fault *vmf, struct folio *folio,
 
 	if (write)
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
-	if (unlikely(uffd_wp))
+	if (unlikely(vmf_orig_pte_uffd_wp(vmf)))
 		entry = pte_mkuffd_wp(entry);
 	/* copy-on-write page */
 	if (write && !(vma->vm_flags & VM_SHARED)) {