diff mbox series

[v3,3/3] mm/memory.c: Add memory read privilege before filling PTE entry

Message ID 1589778529-25627-3-git-send-email-maobibo@loongson.cn (mailing list archive)
State New, archived
Headers show
Series [v3,1/3] MIPS: Do not flush tlb page when updating PTE entry | expand

Commit Message

bibo mao May 18, 2020, 5:08 a.m. UTC
On mips platform, hw PTE entry valid bit is set in pte_mkyoung
function, it is used to set physical page with readable privilege.

Here add pte_mkyoung function to make page readable on MIPS platform
during page fault handling. This patch improves page fault latency
about 10% on my MIPS machine with lmbench lat_pagefault case.

Signed-off-by: Bibo Mao <maobibo@loongson.cn>
---
 mm/memory.c   | 3 +++
 mm/mprotect.c | 2 ++
 2 files changed, 5 insertions(+)

Comments

Andrew Morton May 18, 2020, 8:57 p.m. UTC | #1
On Mon, 18 May 2020 13:08:49 +0800 Bibo Mao <maobibo@loongson.cn> wrote:

> On mips platform, hw PTE entry valid bit is set in pte_mkyoung
> function, it is used to set physical page with readable privilege.

pte_mkyoung() seems to be a strange place to set the pte's valid bit. 
Why is it done there?  Can it be done within mips's mk_pte()?

> Here add pte_mkyoung function to make page readable on MIPS platform
> during page fault handling. This patch improves page fault latency
> about 10% on my MIPS machine with lmbench lat_pagefault case.
> 
> ...
>
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2704,6 +2704,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
>  		}
>  		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>  		entry = mk_pte(new_page, vma->vm_page_prot);
> +		entry = pte_mkyoung(entry);
>  		entry = maybe_mkwrite(pte_mkdirty(entry), vma);

What is the effect on non-mips machines?  If it's only "additional
overhead" then it would be better to add a new pte_mkvalid() (or
whatever) and arrange for it to be a no-op on all but mips?

>  		/*
>  		 * Clear the pte entry and flush it first, before updating the
> @@ -3378,6 +3379,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>  	__SetPageUptodate(page);
>  
>  	entry = mk_pte(page, vma->vm_page_prot);
> +	entry = pte_mkyoung(entry);
>  	if (vma->vm_flags & VM_WRITE)
>  		entry = pte_mkwrite(pte_mkdirty(entry));
>  
> @@ -3660,6 +3662,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
>  
>  	flush_icache_page(vma, page);
>  	entry = mk_pte(page, vma->vm_page_prot);
> +	entry = pte_mkyoung(entry);
>  	if (write)
>  		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>  	/* copy-on-write page */
> diff --git a/mm/mprotect.c b/mm/mprotect.c
> index 494192ca..673f1cd 100644
> --- a/mm/mprotect.c
> +++ b/mm/mprotect.c
> @@ -131,6 +131,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
>  				ptent = pte_clear_uffd_wp(ptent);
>  			}
>  
> +			if (vma->vm_flags & VM_READ)
> +				ptent = pte_mkyoung(ptent);
>  			/* Avoid taking write faults for known dirty pages */
>  			if (dirty_accountable && pte_dirty(ptent) &&
>  					(pte_soft_dirty(ptent) ||
bibo mao May 19, 2020, 3:22 a.m. UTC | #2
On 05/19/2020 04:57 AM, Andrew Morton wrote:
> On Mon, 18 May 2020 13:08:49 +0800 Bibo Mao <maobibo@loongson.cn> wrote:
> 
>> On mips platform, hw PTE entry valid bit is set in pte_mkyoung
>> function, it is used to set physical page with readable privilege.
> 
> pte_mkyoung() seems to be a strange place to set the pte's valid bit. 
> Why is it done there?  Can it be done within mips's mk_pte()?
On MIPS system hardware cannot set PAGE_ACCESS bit when accessing the page,
software sets PAGE_ACCESS software bit and PAGE_VALID hw bit together during page
fault stage.

If mk_pte is called in page fault flow, it is ok to set both bits. If it is not 
called in page fault, PAGE_ACCESS is set however there is no actual memory accessing.

> 
>> Here add pte_mkyoung function to make page readable on MIPS platform
>> during page fault handling. This patch improves page fault latency
>> about 10% on my MIPS machine with lmbench lat_pagefault case.
>>
>> ...
>>
>> --- a/mm/memory.c
>> +++ b/mm/memory.c
>> @@ -2704,6 +2704,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
>>  		}
>>  		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
>>  		entry = mk_pte(new_page, vma->vm_page_prot);
>> +		entry = pte_mkyoung(entry);
>>  		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
> 
> What is the effect on non-mips machines?  If it's only "additional
> overhead" then it would be better to add a new pte_mkvalid() (or
> whatever) and arrange for it to be a no-op on all but mips?

how about adding pte_sw_mkyoung alike function which is a noop on all but mips?
this function is used to set PAGE_ACCESS bit and PAGE_VALID bit on mips platform.

regards
bibo,mao

> 
>>  		/*
>>  		 * Clear the pte entry and flush it first, before updating the
>> @@ -3378,6 +3379,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>>  	__SetPageUptodate(page);
>>  
>>  	entry = mk_pte(page, vma->vm_page_prot);
>> +	entry = pte_mkyoung(entry);
>>  	if (vma->vm_flags & VM_WRITE)
>>  		entry = pte_mkwrite(pte_mkdirty(entry));
>>  
>> @@ -3660,6 +3662,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
>>  
>>  	flush_icache_page(vma, page);
>>  	entry = mk_pte(page, vma->vm_page_prot);
>> +	entry = pte_mkyoung(entry);
>>  	if (write)
>>  		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
>>  	/* copy-on-write page */
>> diff --git a/mm/mprotect.c b/mm/mprotect.c
>> index 494192ca..673f1cd 100644
>> --- a/mm/mprotect.c
>> +++ b/mm/mprotect.c
>> @@ -131,6 +131,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
>>  				ptent = pte_clear_uffd_wp(ptent);
>>  			}
>>  
>> +			if (vma->vm_flags & VM_READ)
>> +				ptent = pte_mkyoung(ptent);
>>  			/* Avoid taking write faults for known dirty pages */
>>  			if (dirty_accountable && pte_dirty(ptent) &&
>>  					(pte_soft_dirty(ptent) ||
Andrew Morton May 19, 2020, 3:34 a.m. UTC | #3
On Tue, 19 May 2020 11:22:49 +0800 maobibo <maobibo@loongson.cn> wrote:

> how about adding pte_sw_mkyoung alike function which is a noop on all but mips?
> this function is used to set PAGE_ACCESS bit and PAGE_VALID bit on mips platform.

Sounds good.  Please ensure that the interface (roles and
responsibilities, etc) is well documented in code comments.
Hugh Dickins May 27, 2020, 8:55 p.m. UTC | #4
On Tue, 19 May 2020, maobibo wrote:
> On 05/19/2020 04:57 AM, Andrew Morton wrote:
> > On Mon, 18 May 2020 13:08:49 +0800 Bibo Mao <maobibo@loongson.cn> wrote:
> > 
> >> On mips platform, hw PTE entry valid bit is set in pte_mkyoung
> >> function, it is used to set physical page with readable privilege.
> > 
> > pte_mkyoung() seems to be a strange place to set the pte's valid bit. 
> > Why is it done there?  Can it be done within mips's mk_pte()?
> On MIPS system hardware cannot set PAGE_ACCESS bit when accessing the page,
> software sets PAGE_ACCESS software bit and PAGE_VALID hw bit together during page
> fault stage.
> 
> If mk_pte is called in page fault flow, it is ok to set both bits. If it is not 
> called in page fault, PAGE_ACCESS is set however there is no actual memory accessing.

Sorry for joining in so late, but would you please explain that some more:
preferably in the final commit message, if not here.

I still don't understand why this is not done in the same way as on other
architectures - those that care (I just checked x86, powerpc, arm, arm64,
but not all of them) make sure that all the bits they want are there in
mm/mmap.c's protection_map[16], which then feeds into vma->vm_page_prot,
and so into mk_pte() as Andrew indicated.

And I can see that arch/mips/mm/cache.c has a setup_protection_map()
to do that: why does it not set the additional bits that you want?
including the valid bit and the accessed (young) bit, as others do.
Are you saying that there are circumstances in which it is wrong
for mk_pte() to set the additional bits?

I'm afraid that generic mm developers will have no clue as to whether
or not to add a pte_sw_mkyoung() after a mk_pte(); and generic source
will be the cleaner if it turns out not to be needed (but thank you
for making sure that it does nothing on the other architectures).

Hugh
bibo mao May 28, 2020, 4:33 a.m. UTC | #5
On 05/28/2020 04:55 AM, Hugh Dickins wrote:
> On Tue, 19 May 2020, maobibo wrote:
>> On 05/19/2020 04:57 AM, Andrew Morton wrote:
>>> On Mon, 18 May 2020 13:08:49 +0800 Bibo Mao <maobibo@loongson.cn> wrote:
>>>
>>>> On mips platform, hw PTE entry valid bit is set in pte_mkyoung
>>>> function, it is used to set physical page with readable privilege.
>>>
>>> pte_mkyoung() seems to be a strange place to set the pte's valid bit. 
>>> Why is it done there?  Can it be done within mips's mk_pte()?
>> On MIPS system hardware cannot set PAGE_ACCESS bit when accessing the page,
>> software sets PAGE_ACCESS software bit and PAGE_VALID hw bit together during page
>> fault stage.
>>
>> If mk_pte is called in page fault flow, it is ok to set both bits. If it is not 
>> called in page fault, PAGE_ACCESS is set however there is no actual memory accessing.
> 
> Sorry for joining in so late, but would you please explain that some more:
> preferably in the final commit message, if not here.
> 
> I still don't understand why this is not done in the same way as on other
> architectures - those that care (I just checked x86, powerpc, arm, arm64,
> but not all of them) make sure that all the bits they want are there in
> mm/mmap.c's protection_map[16], which then feeds into vma->vm_page_prot,
> and so into mk_pte() as Andrew indicated.
> 
> And I can see that arch/mips/mm/cache.c has a setup_protection_map()
> to do that: why does it not set the additional bits that you want?
> including the valid bit and the accessed (young) bit, as others do.
> Are you saying that there are circumstances in which it is wrong
> for mk_pte() to set the additional bits?
MIPS is actually strange here, _PAGE_ACCESSED is not set in protection_map.
I do not understand history of mips neither. 

On x86/aarch/powerpc system, _PAGE_ACCESSED bit is set in the beginning. How does
software track memory page accessing frequency?  Does software not care current
status about _PAGE_ACCESSED bit, just calles madvise_cold to clear this bit, and
then watches whether this bit is changed or not?

regards
bibo,mao
> 
> I'm afraid that generic mm developers will have no clue as to whether
> or not to add a pte_sw_mkyoung() after a mk_pte(); and generic source
> will be the cleaner if it turns out not to be needed (but thank you
> for making sure that it does nothing on the other architectures).
> 
> Hugh
>
diff mbox series

Patch

diff --git a/mm/memory.c b/mm/memory.c
index 2eb59a9..2399dcb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2704,6 +2704,7 @@  static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 		}
 		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
+		entry = pte_mkyoung(entry);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 		/*
 		 * Clear the pte entry and flush it first, before updating the
@@ -3378,6 +3379,7 @@  static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 	__SetPageUptodate(page);
 
 	entry = mk_pte(page, vma->vm_page_prot);
+	entry = pte_mkyoung(entry);
 	if (vma->vm_flags & VM_WRITE)
 		entry = pte_mkwrite(pte_mkdirty(entry));
 
@@ -3660,6 +3662,7 @@  vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
 
 	flush_icache_page(vma, page);
 	entry = mk_pte(page, vma->vm_page_prot);
+	entry = pte_mkyoung(entry);
 	if (write)
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 	/* copy-on-write page */
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 494192ca..673f1cd 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -131,6 +131,8 @@  static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 				ptent = pte_clear_uffd_wp(ptent);
 			}
 
+			if (vma->vm_flags & VM_READ)
+				ptent = pte_mkyoung(ptent);
 			/* Avoid taking write faults for known dirty pages */
 			if (dirty_accountable && pte_dirty(ptent) &&
 					(pte_soft_dirty(ptent) ||