diff mbox series

[RFC,1/3] arm64/hugetlb: Introduce new huge_ptep_get_access_flags() interface

Message ID a73f07314e79299b85fa4d7612d6ac22548f58c1.1651998586.git.baolin.wang@linux.alibaba.com (mailing list archive)
State New, archived
Headers show
Series Introduce new huge_ptep_get_access_flags() interface | expand

Commit Message

Baolin Wang May 8, 2022, 8:58 a.m. UTC
Now we use huge_ptep_get() to get the pte value of a hugetlb page,
however it will only return one specific pte value for the CONT-PTE
or CONT-PMD size hugetlb on ARM64 system, which can contain seravel
continuous pte or pmd entries with same page table attributes. And it
will not take into account the subpages' dirty or young bits of a
CONT-PTE/PMD size hugetlb page.

So the huge_ptep_get() is inconsistent with huge_ptep_get_and_clear(),
which already takes account the dirty or young bits for any subpages
in this CONT-PTE/PMD size hugetlb [1]. Meanwhile we can miss dirty or
young flags statistics for hugetlb pages with current huge_ptep_get(),
such as the gather_hugetlb_stats() function.

Thus introduce a new huge_ptep_get_access_flags() interface and define
an ARM64 specific implementation, that will take into account any subpages'
dirty or young bits for CONT-PTE/PMD size hugetlb page, for those functions
that want to check the dirty and young flags of a hugetlb page.

[1] https://lore.kernel.org/linux-mm/85bd80b4-b4fd-0d3f-a2e5-149559f2f387@oracle.com/

Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
 arch/arm64/include/asm/hugetlb.h |  2 ++
 arch/arm64/mm/hugetlbpage.c      | 24 ++++++++++++++++++++++++
 include/asm-generic/hugetlb.h    |  7 +++++++
 3 files changed, 33 insertions(+)

Comments

Yin, Fengwei May 8, 2022, 1:14 p.m. UTC | #1
On 2022年5月8日星期日 CST 下午4:58:52 Baolin Wang wrote:
> Now we use huge_ptep_get() to get the pte value of a hugetlb page,
> however it will only return one specific pte value for the CONT-PTE
> or CONT-PMD size hugetlb on ARM64 system, which can contain seravel
> continuous pte or pmd entries with same page table attributes. And it
> will not take into account the subpages' dirty or young bits of a
> CONT-PTE/PMD size hugetlb page.
> 
> So the huge_ptep_get() is inconsistent with huge_ptep_get_and_clear(),
> which already takes account the dirty or young bits for any subpages
> in this CONT-PTE/PMD size hugetlb [1]. Meanwhile we can miss dirty or
> young flags statistics for hugetlb pages with current huge_ptep_get(),
> such as the gather_hugetlb_stats() function.
> 
> Thus introduce a new huge_ptep_get_access_flags() interface and define
> an ARM64 specific implementation, that will take into account any subpages'
> dirty or young bits for CONT-PTE/PMD size hugetlb page, for those functions
> that want to check the dirty and young flags of a hugetlb page.
> 
> [1]
> https://lore.kernel.org/linux-mm/85bd80b4-b4fd-0d3f-a2e5-149559f2f387@oracl
> e.com/
> 
> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
> ---
>  arch/arm64/include/asm/hugetlb.h |  2 ++
>  arch/arm64/mm/hugetlbpage.c      | 24 ++++++++++++++++++++++++
>  include/asm-generic/hugetlb.h    |  7 +++++++
>  3 files changed, 33 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/hugetlb.h
> b/arch/arm64/include/asm/hugetlb.h index 616b2ca..a473544 100644
> --- a/arch/arm64/include/asm/hugetlb.h
> +++ b/arch/arm64/include/asm/hugetlb.h
> @@ -44,6 +44,8 @@ extern pte_t huge_ptep_clear_flush(struct vm_area_struct
> *vma, #define __HAVE_ARCH_HUGE_PTE_CLEAR
>  extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>  			   pte_t *ptep, unsigned long sz);
> +#define __HAVE_ARCH_HUGE_PTEP_GET_ACCESS_FLAGS
> +extern pte_t huge_ptep_get_access_flags(pte_t *ptep, unsigned long sz);
>  extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
>  				 pte_t *ptep, pte_t pte, unsigned long 
sz);
>  #define set_huge_swap_pte_at set_huge_swap_pte_at
> diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
> index ca8e65c..ce39699 100644
> --- a/arch/arm64/mm/hugetlbpage.c
> +++ b/arch/arm64/mm/hugetlbpage.c
> @@ -158,6 +158,30 @@ static inline int num_contig_ptes(unsigned long size,
> size_t *pgsize) return contig_ptes;
>  }
> 
> +pte_t huge_ptep_get_access_flags(pte_t *ptep, unsigned long sz)
The function name looks to me that it returns access flags of PTE.

> +{
> +	int ncontig, i;
> +	size_t pgsize;
> +	pte_t orig_pte = ptep_get(ptep);
> +
> +	if (!pte_cont(orig_pte))
> +		return orig_pte;
> +
> +	ncontig = num_contig_ptes(sz, &pgsize);
> +
> +	for (i = 0; i < ncontig; i++, ptep++) {
> +		pte_t pte = ptep_get(ptep);
> +
> +		if (pte_dirty(pte))
> +			orig_pte = pte_mkdirty(orig_pte);
> +
> +		if (pte_young(pte))
> +			orig_pte = pte_mkyoung(orig_pte);
> +	}
> +
> +	return orig_pte;
> +}
Not sure whether it's worthy being changed to:

        bool dirty = false, young = false;

        for (i = 0; i < ncontig; i++, ptep++) {
                pte_t pte = ptep_get(ptep);

                if (pte_dirty(pte))
                        dirty = true;

                if (pte_young(pte))
                        young = true;

                if (dirty && young)
                        break;
        }

        if (dirty)
                orig_pte = pte_mkdirty(orig_pte);

        if (young)
                orig_pte = pte_mkyoung(orit_pte);

        return orig_pte;

> +
>  /*
>   * Changing some bits of contiguous entries requires us to follow a
>   * Break-Before-Make approach, breaking the whole contiguous set
> diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
> index a57d667..bb77fb0 100644
> --- a/include/asm-generic/hugetlb.h
> +++ b/include/asm-generic/hugetlb.h
> @@ -150,6 +150,13 @@ static inline pte_t huge_ptep_get(pte_t *ptep)
>  }
>  #endif
> 
> +#ifndef __HAVE_ARCH_HUGE_PTEP_GET_ACCESS_FLAGS
> +static inline pte_t huge_ptep_get_access_flags(pte_t *ptep, unsigned long
> sz) +{
> +	return ptep_get(ptep);
Should be:
	return huge_ptep_get(ptep) ?


Regards
Yin, Fengwei

> +}
> +#endif
> +
>  #ifndef __HAVE_ARCH_GIGANTIC_PAGE_RUNTIME_SUPPORTED
>  static inline bool gigantic_page_runtime_supported(void)
>  {
Baolin Wang May 9, 2022, 1:19 a.m. UTC | #2
On 5/8/2022 9:14 PM, nh26223@qq.com wrote:
> On 2022年5月8日星期日 CST 下午4:58:52 Baolin Wang wrote:
>> Now we use huge_ptep_get() to get the pte value of a hugetlb page,
>> however it will only return one specific pte value for the CONT-PTE
>> or CONT-PMD size hugetlb on ARM64 system, which can contain seravel
>> continuous pte or pmd entries with same page table attributes. And it
>> will not take into account the subpages' dirty or young bits of a
>> CONT-PTE/PMD size hugetlb page.
>>
>> So the huge_ptep_get() is inconsistent with huge_ptep_get_and_clear(),
>> which already takes account the dirty or young bits for any subpages
>> in this CONT-PTE/PMD size hugetlb [1]. Meanwhile we can miss dirty or
>> young flags statistics for hugetlb pages with current huge_ptep_get(),
>> such as the gather_hugetlb_stats() function.
>>
>> Thus introduce a new huge_ptep_get_access_flags() interface and define
>> an ARM64 specific implementation, that will take into account any subpages'
>> dirty or young bits for CONT-PTE/PMD size hugetlb page, for those functions
>> that want to check the dirty and young flags of a hugetlb page.
>>
>> [1]
>> https://lore.kernel.org/linux-mm/85bd80b4-b4fd-0d3f-a2e5-149559f2f387@oracl
>> e.com/
>>
>> Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
>> ---
>>   arch/arm64/include/asm/hugetlb.h |  2 ++
>>   arch/arm64/mm/hugetlbpage.c      | 24 ++++++++++++++++++++++++
>>   include/asm-generic/hugetlb.h    |  7 +++++++
>>   3 files changed, 33 insertions(+)
>>
>> diff --git a/arch/arm64/include/asm/hugetlb.h
>> b/arch/arm64/include/asm/hugetlb.h index 616b2ca..a473544 100644
>> --- a/arch/arm64/include/asm/hugetlb.h
>> +++ b/arch/arm64/include/asm/hugetlb.h
>> @@ -44,6 +44,8 @@ extern pte_t huge_ptep_clear_flush(struct vm_area_struct
>> *vma, #define __HAVE_ARCH_HUGE_PTE_CLEAR
>>   extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>>   			   pte_t *ptep, unsigned long sz);
>> +#define __HAVE_ARCH_HUGE_PTEP_GET_ACCESS_FLAGS
>> +extern pte_t huge_ptep_get_access_flags(pte_t *ptep, unsigned long sz);
>>   extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
>>   				 pte_t *ptep, pte_t pte, unsigned long
> sz);
>>   #define set_huge_swap_pte_at set_huge_swap_pte_at
>> diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
>> index ca8e65c..ce39699 100644
>> --- a/arch/arm64/mm/hugetlbpage.c
>> +++ b/arch/arm64/mm/hugetlbpage.c
>> @@ -158,6 +158,30 @@ static inline int num_contig_ptes(unsigned long size,
>> size_t *pgsize) return contig_ptes;
>>   }
>>
>> +pte_t huge_ptep_get_access_flags(pte_t *ptep, unsigned long sz)
> The function name looks to me that it returns access flags of PTE.

Yes, not a good name. That's why this is a RFC patch set to get more 
suggestion :)

Maybe huge_ptep_get_with_access_flags()? or do you have some better idea?

> 
>> +{
>> +	int ncontig, i;
>> +	size_t pgsize;
>> +	pte_t orig_pte = ptep_get(ptep);
>> +
>> +	if (!pte_cont(orig_pte))
>> +		return orig_pte;
>> +
>> +	ncontig = num_contig_ptes(sz, &pgsize);
>> +
>> +	for (i = 0; i < ncontig; i++, ptep++) {
>> +		pte_t pte = ptep_get(ptep);
>> +
>> +		if (pte_dirty(pte))
>> +			orig_pte = pte_mkdirty(orig_pte);
>> +
>> +		if (pte_young(pte))
>> +			orig_pte = pte_mkyoung(orig_pte);
>> +	}
>> +
>> +	return orig_pte;
>> +}
> Not sure whether it's worthy being changed to:
> 
>          bool dirty = false, young = false;
> 
>          for (i = 0; i < ncontig; i++, ptep++) {
>                  pte_t pte = ptep_get(ptep);
> 
>                  if (pte_dirty(pte))
>                          dirty = true;
> 
>                  if (pte_young(pte))
>                          young = true;
> 
>                  if (dirty && young)
>                          break;
>          }
> 
>          if (dirty)
>                  orig_pte = pte_mkdirty(orig_pte);
> 
>          if (young)
>                  orig_pte = pte_mkyoung(orit_pte);
> 
>          return orig_pte;

I followed the same logics in get_clear_flush(), which is more readable 
I think. Yes, your approach can save some cycles, I can change to use it 
in next version if arm64 maintainers have no objection.

>> +
>>   /*
>>    * Changing some bits of contiguous entries requires us to follow a
>>    * Break-Before-Make approach, breaking the whole contiguous set
>> diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
>> index a57d667..bb77fb0 100644
>> --- a/include/asm-generic/hugetlb.h
>> +++ b/include/asm-generic/hugetlb.h
>> @@ -150,6 +150,13 @@ static inline pte_t huge_ptep_get(pte_t *ptep)
>>   }
>>   #endif
>>
>> +#ifndef __HAVE_ARCH_HUGE_PTEP_GET_ACCESS_FLAGS
>> +static inline pte_t huge_ptep_get_access_flags(pte_t *ptep, unsigned long
>> sz) +{
>> +	return ptep_get(ptep);
> Should be:
> 	return huge_ptep_get(ptep) ?

I don't think so. If no ARCH-specific definition, the 
huge_ptep_get_access_flags() implementation should be same as 
huge_ptep_get(). Thanks for your comments.

#ifndef __HAVE_ARCH_HUGE_PTEP_GET
static inline pte_t huge_ptep_get(pte_t *ptep)
{
         return ptep_get(ptep);
}
#endif
Yin, Fengwei May 9, 2022, 4:10 a.m. UTC | #3
----------------8<---------------
> >> 
> >> diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
> >> index ca8e65c..ce39699 100644
> >> --- a/arch/arm64/mm/hugetlbpage.c
> >> +++ b/arch/arm64/mm/hugetlbpage.c
> >> @@ -158,6 +158,30 @@ static inline int num_contig_ptes(unsigned long
> >> size,
> >> size_t *pgsize) return contig_ptes;
> >> 
> >>   }
> >> 
> >> +pte_t huge_ptep_get_access_flags(pte_t *ptep, unsigned long sz)
> > 
> > The function name looks to me that it returns access flags of PTE.
> 
> Yes, not a good name. That's why this is a RFC patch set to get more
> suggestion :)
> 
> Maybe huge_ptep_get_with_access_flags()? or do you have some better idea?
I don't have either. "Naming is hard". :)

> >> diff --git a/include/asm-generic/hugetlb.h
> >> b/include/asm-generic/hugetlb.h
> >> index a57d667..bb77fb0 100644
> >> --- a/include/asm-generic/hugetlb.h
> >> +++ b/include/asm-generic/hugetlb.h
> >> @@ -150,6 +150,13 @@ static inline pte_t huge_ptep_get(pte_t *ptep)
> >> 
> >>   }
> >>   #endif
> >> 
> >> +#ifndef __HAVE_ARCH_HUGE_PTEP_GET_ACCESS_FLAGS
> >> +static inline pte_t huge_ptep_get_access_flags(pte_t *ptep, unsigned
> >> long
> >> sz) +{
> >> +	return ptep_get(ptep);
> > 
> > Should be:
> > 	return huge_ptep_get(ptep) ?
> 
> I don't think so. If no ARCH-specific definition, the
> huge_ptep_get_access_flags() implementation should be same as
> huge_ptep_get(). Thanks for your comments.
If no __HAVE_ARCH_HUGE_PTEP_GET, huge_ptep_get() is same as
ptep_get().

Or it's not possible no __HAVE_ARCH_HUGE_PTEP_GET_ACCESS_FLAGS
but with __HAVE_ARCH_HUGE_PTEP_GET?


Regards
Yin, Fengwei

> 
> #ifndef __HAVE_ARCH_HUGE_PTEP_GET
> static inline pte_t huge_ptep_get(pte_t *ptep)
> {
>          return ptep_get(ptep);
> }
> #endif
Baolin Wang May 9, 2022, 4:19 a.m. UTC | #4
On 5/9/2022 12:10 PM, nh26223@qq.com write:
> ----------------8<---------------
>>>>
>>>> diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
>>>> index ca8e65c..ce39699 100644
>>>> --- a/arch/arm64/mm/hugetlbpage.c
>>>> +++ b/arch/arm64/mm/hugetlbpage.c
>>>> @@ -158,6 +158,30 @@ static inline int num_contig_ptes(unsigned long
>>>> size,
>>>> size_t *pgsize) return contig_ptes;
>>>>
>>>>    }
>>>>
>>>> +pte_t huge_ptep_get_access_flags(pte_t *ptep, unsigned long sz)
>>>
>>> The function name looks to me that it returns access flags of PTE.
>>
>> Yes, not a good name. That's why this is a RFC patch set to get more
>> suggestion :)
>>
>> Maybe huge_ptep_get_with_access_flags()? or do you have some better idea?
> I don't have either. "Naming is hard". :)
> 
>>>> diff --git a/include/asm-generic/hugetlb.h
>>>> b/include/asm-generic/hugetlb.h
>>>> index a57d667..bb77fb0 100644
>>>> --- a/include/asm-generic/hugetlb.h
>>>> +++ b/include/asm-generic/hugetlb.h
>>>> @@ -150,6 +150,13 @@ static inline pte_t huge_ptep_get(pte_t *ptep)
>>>>
>>>>    }
>>>>    #endif
>>>>
>>>> +#ifndef __HAVE_ARCH_HUGE_PTEP_GET_ACCESS_FLAGS
>>>> +static inline pte_t huge_ptep_get_access_flags(pte_t *ptep, unsigned
>>>> long
>>>> sz) +{
>>>> +	return ptep_get(ptep);
>>>
>>> Should be:
>>> 	return huge_ptep_get(ptep) ?
>>
>> I don't think so. If no ARCH-specific definition, the
>> huge_ptep_get_access_flags() implementation should be same as
>> huge_ptep_get(). Thanks for your comments.
> If no __HAVE_ARCH_HUGE_PTEP_GET, huge_ptep_get() is same as
> ptep_get().
> 
> Or it's not possible no __HAVE_ARCH_HUGE_PTEP_GET_ACCESS_FLAGS
> but with __HAVE_ARCH_HUGE_PTEP_GET?

Yes, I am wrong, shoule be huge_ptep_get(). Thanks for pointing out 
issues :)

PS: I think I will follow Muchun's suggestion in next version, so no 
need to add a new interface.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 616b2ca..a473544 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -44,6 +44,8 @@  extern pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
 #define __HAVE_ARCH_HUGE_PTE_CLEAR
 extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 			   pte_t *ptep, unsigned long sz);
+#define __HAVE_ARCH_HUGE_PTEP_GET_ACCESS_FLAGS
+extern pte_t huge_ptep_get_access_flags(pte_t *ptep, unsigned long sz);
 extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
 				 pte_t *ptep, pte_t pte, unsigned long sz);
 #define set_huge_swap_pte_at set_huge_swap_pte_at
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index ca8e65c..ce39699 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -158,6 +158,30 @@  static inline int num_contig_ptes(unsigned long size, size_t *pgsize)
 	return contig_ptes;
 }
 
+pte_t huge_ptep_get_access_flags(pte_t *ptep, unsigned long sz)
+{
+	int ncontig, i;
+	size_t pgsize;
+	pte_t orig_pte = ptep_get(ptep);
+
+	if (!pte_cont(orig_pte))
+		return orig_pte;
+
+	ncontig = num_contig_ptes(sz, &pgsize);
+
+	for (i = 0; i < ncontig; i++, ptep++) {
+		pte_t pte = ptep_get(ptep);
+
+		if (pte_dirty(pte))
+			orig_pte = pte_mkdirty(orig_pte);
+
+		if (pte_young(pte))
+			orig_pte = pte_mkyoung(orig_pte);
+	}
+
+	return orig_pte;
+}
+
 /*
  * Changing some bits of contiguous entries requires us to follow a
  * Break-Before-Make approach, breaking the whole contiguous set
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index a57d667..bb77fb0 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -150,6 +150,13 @@  static inline pte_t huge_ptep_get(pte_t *ptep)
 }
 #endif
 
+#ifndef __HAVE_ARCH_HUGE_PTEP_GET_ACCESS_FLAGS
+static inline pte_t huge_ptep_get_access_flags(pte_t *ptep, unsigned long sz)
+{
+	return ptep_get(ptep);
+}
+#endif
+
 #ifndef __HAVE_ARCH_GIGANTIC_PAGE_RUNTIME_SUPPORTED
 static inline bool gigantic_page_runtime_supported(void)
 {