diff mbox

[v6,5/9] arm64: hugetlb: Handle swap entries in huge_pte_offset() for contiguous hugepages

Message ID 20170810170906.30772-6-punit.agrawal@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Punit Agrawal Aug. 10, 2017, 5:09 p.m. UTC
huge_pte_offset() was updated to correctly handle swap entries for
hugepages. With the addition of the size parameter, it is now possible
to disambiguate whether the request is for a regular hugepage or a
contiguous hugepage.

Fix huge_pte_offset() for contiguous hugepages by using the size to find
the correct page table entry.

Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Cc: David Woods <dwoods@mellanox.com>
---
 arch/arm64/mm/hugetlbpage.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

Comments

Catalin Marinas Aug. 18, 2017, 11:20 a.m. UTC | #1
On Thu, Aug 10, 2017 at 06:09:02PM +0100, Punit Agrawal wrote:
> diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
> index d3a6713048a2..09e79785c019 100644
> --- a/arch/arm64/mm/hugetlbpage.c
> +++ b/arch/arm64/mm/hugetlbpage.c
> @@ -210,6 +210,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
>  	pgd_t *pgd;
>  	pud_t *pud;
>  	pmd_t *pmd;
> +	pte_t *pte;
>  
>  	pgd = pgd_offset(mm, addr);
>  	pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
> @@ -217,19 +218,29 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
>  		return NULL;
>  
>  	pud = pud_offset(pgd, addr);
> -	if (pud_none(*pud))
> +	if (pud_none(*pud) && sz != PUD_SIZE)
>  		return NULL;
>  	/* swap or huge page */
>  	if (!pud_present(*pud) || pud_huge(*pud))
>  		return (pte_t *)pud;
>  	/* table; check the next level */

So if sz == PUD_SIZE and we have pud_none(*pud) == true, it returns the
pud. Isn't this different from what you proposed for the generic
huge_pte_offset()? [1]

>  
> +	if (sz == CONT_PMD_SIZE)
> +		addr &= CONT_PMD_MASK;
> +
>  	pmd = pmd_offset(pud, addr);
> -	if (pmd_none(*pmd))
> +	if (pmd_none(*pmd) &&
> +	    !(sz == PMD_SIZE || sz == CONT_PMD_SIZE))
>  		return NULL;

Again, if sz == PMD_SIZE, you no longer return NULL. The generic
proposal in [1] looks like:

	if (pmd_none(*pmd))
		return NULL;

and that's even when sz == PMD_SIZE.

Anyway, I think we need to push for [1] again to be accepted before we
go ahead with these changes.

[1] http://lkml.kernel.org/r/20170725154114.24131-2-punit.agrawal@arm.com
Punit Agrawal Aug. 18, 2017, 1:49 p.m. UTC | #2
Catalin Marinas <catalin.marinas@arm.com> writes:

> On Thu, Aug 10, 2017 at 06:09:02PM +0100, Punit Agrawal wrote:
>> diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
>> index d3a6713048a2..09e79785c019 100644
>> --- a/arch/arm64/mm/hugetlbpage.c
>> +++ b/arch/arm64/mm/hugetlbpage.c
>> @@ -210,6 +210,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
>>  	pgd_t *pgd;
>>  	pud_t *pud;
>>  	pmd_t *pmd;
>> +	pte_t *pte;
>>  
>>  	pgd = pgd_offset(mm, addr);
>>  	pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
>> @@ -217,19 +218,29 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
>>  		return NULL;
>>  
>>  	pud = pud_offset(pgd, addr);
>> -	if (pud_none(*pud))
>> +	if (pud_none(*pud) && sz != PUD_SIZE)
>>  		return NULL;
>>  	/* swap or huge page */
>>  	if (!pud_present(*pud) || pud_huge(*pud))
>>  		return (pte_t *)pud;
>>  	/* table; check the next level */
>
> So if sz == PUD_SIZE and we have pud_none(*pud) == true, it returns the
> pud. Isn't this different from what you proposed for the generic
> huge_pte_offset()? [1]

I think I missed this case in the generic version.

As hugetlb_fault() deals with p*d_none() entries by calling
hugetlb_no_page(), the thinking was that returning the p*d saves us an
extra round trip by avoiding the call to huge_pte_alloc().

>
>>  
>> +	if (sz == CONT_PMD_SIZE)
>> +		addr &= CONT_PMD_MASK;
>> +
>>  	pmd = pmd_offset(pud, addr);
>> -	if (pmd_none(*pmd))
>> +	if (pmd_none(*pmd) &&
>> +	    !(sz == PMD_SIZE || sz == CONT_PMD_SIZE))
>>  		return NULL;
>
> Again, if sz == PMD_SIZE, you no longer return NULL. The generic
> proposal in [1] looks like:
>
> 	if (pmd_none(*pmd))
> 		return NULL;
>
> and that's even when sz == PMD_SIZE.
>
> Anyway, I think we need to push for [1] again to be accepted before we
> go ahead with these changes.

[1] is already queued in Andrew's tree. I'll send an update - hopefully
it can be picked up for the next merge.

>
> [1] http://lkml.kernel.org/r/20170725154114.24131-2-punit.agrawal@arm.com
diff mbox

Patch

diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index d3a6713048a2..09e79785c019 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -210,6 +210,7 @@  pte_t *huge_pte_offset(struct mm_struct *mm,
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
+	pte_t *pte;
 
 	pgd = pgd_offset(mm, addr);
 	pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd);
@@ -217,19 +218,29 @@  pte_t *huge_pte_offset(struct mm_struct *mm,
 		return NULL;
 
 	pud = pud_offset(pgd, addr);
-	if (pud_none(*pud))
+	if (pud_none(*pud) && sz != PUD_SIZE)
 		return NULL;
 	/* swap or huge page */
 	if (!pud_present(*pud) || pud_huge(*pud))
 		return (pte_t *)pud;
 	/* table; check the next level */
 
+	if (sz == CONT_PMD_SIZE)
+		addr &= CONT_PMD_MASK;
+
 	pmd = pmd_offset(pud, addr);
-	if (pmd_none(*pmd))
+	if (pmd_none(*pmd) &&
+	    !(sz == PMD_SIZE || sz == CONT_PMD_SIZE))
 		return NULL;
 	if (!pmd_present(*pmd) || pmd_huge(*pmd))
 		return (pte_t *)pmd;
 
+	if (sz == CONT_PTE_SIZE) {
+		pte = pte_offset_kernel(
+			pmd, (addr & CONT_PTE_MASK));
+		return pte;
+	}
+
 	return NULL;
 }