diff mbox series

[1/2] MIPS: clean up CONFIG_MIPS_PGD_C0_CONTEXT handling

Message ID 20210309080210.25561-2-huangpei@loongson.cn (mailing list archive)
State Superseded
Headers show
Series [1/2] MIPS: clean up CONFIG_MIPS_PGD_C0_CONTEXT handling | expand

Commit Message

Huang Pei March 9, 2021, 8:02 a.m. UTC
+. LOONGSON64 use 0x98xx_xxxx_xxxx_xxxx as xphys cached

+. let CONFIG_MIPS_PGD_C0_CONTEXT depend on 64bit

+. cast CAC_BASE into u64 to silence warning on MIPS32

CP0 Context has enough room for wraping pgd into its 41-bit PTEBase field.

+. For XPHYS, the trick is that pgd is 4kB aligned, and the PABITS <= 48,
only save 48 - 12 + 5(for bit[63:59]) = 41 bits, aka. :

   bit[63:59] | 0000 0000 000 |  bit[47:12] | 0000 0000 0000

+. for CKSEG0, only save 29 - 12 = 17 bits

Signed-off-by: Huang Pei <huangpei@loongson.cn>
---
 arch/mips/Kconfig    |  3 ++-
 arch/mips/mm/tlbex.c | 10 +++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

Comments

Thomas Bogendoerfer March 12, 2021, 10:24 a.m. UTC | #1
On Tue, Mar 09, 2021 at 04:02:09PM +0800, Huang Pei wrote:
> +. LOONGSON64 use 0x98xx_xxxx_xxxx_xxxx as xphys cached
> 
> +. let CONFIG_MIPS_PGD_C0_CONTEXT depend on 64bit
> 
> +. cast CAC_BASE into u64 to silence warning on MIPS32
> 
> CP0 Context has enough room for wraping pgd into its 41-bit PTEBase field.
> 
> +. For XPHYS, the trick is that pgd is 4kB aligned, and the PABITS <= 48,
> only save 48 - 12 + 5(for bit[63:59]) = 41 bits, aka. :
> 
>    bit[63:59] | 0000 0000 000 |  bit[47:12] | 0000 0000 0000
> 
> +. for CKSEG0, only save 29 - 12 = 17 bits

you are explaining what you are doing, but not why you are doing this.
So why are you doing this ?

>  #
>  # Set to y for ptrace access to watch registers.
> diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
> index a7521b8f7658..591cfa0fca02 100644
> --- a/arch/mips/mm/tlbex.c
> +++ b/arch/mips/mm/tlbex.c
> @@ -848,8 +848,8 @@ void build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
>  		/* Clear lower 23 bits of context. */
>  		uasm_i_dins(p, ptr, 0, 0, 23);
>  
> -		/* 1 0	1 0 1  << 6  xkphys cached */
> -		uasm_i_ori(p, ptr, ptr, 0x540);
> +		/* insert bit[63:59] of CAC_BASE into bit[11:6] of ptr */
> +		uasm_i_ori(p, ptr, ptr, ((u64)(CAC_BASE) >> 53));

you want to use bits 63..59 but picking bits 63..53 with this.  While
bits 58..53 are probably 0, wouldn't it make also sense to mask them out ?

>  		uasm_i_drotr(p, ptr, ptr, 11);
>  #elif defined(CONFIG_SMP)
>  		UASM_i_CPUID_MFC0(p, ptr, SMP_CPUID_REG);
> @@ -1164,8 +1164,9 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
>  
>  	if (pgd_reg == -1) {
>  		vmalloc_branch_delay_filled = 1;
> -		/* 1 0	1 0 1  << 6  xkphys cached */
> -		uasm_i_ori(p, ptr, ptr, 0x540);
> +		/* insert bit[63:59] of CAC_BASE into bit[11:6] of ptr */
> +		uasm_i_ori(p, ptr, ptr, ((u64)(CAC_BASE) >> 53));
> +
>  		uasm_i_drotr(p, ptr, ptr, 11);
>  	}
>  
> @@ -1292,7 +1293,6 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
>  
>  	return rv;
>  }
> -
>  /*

why are you removing this empty line ? I'd prefer that it stays there...

>   * For a 64-bit kernel, we are using the 64-bit XTLB refill exception
>   * because EXL == 0.  If we wrap, we can also use the 32 instruction
> -- 
> 2.17.1
Huang Pei March 13, 2021, 12:41 a.m. UTC | #2
Hi, 
On Fri, Mar 12, 2021 at 11:24:10AM +0100, Thomas Bogendoerfer wrote:
> On Tue, Mar 09, 2021 at 04:02:09PM +0800, Huang Pei wrote:
> > +. LOONGSON64 use 0x98xx_xxxx_xxxx_xxxx as xphys cached
> > 
> > +. let CONFIG_MIPS_PGD_C0_CONTEXT depend on 64bit
> > 
> > +. cast CAC_BASE into u64 to silence warning on MIPS32
> > 
> > CP0 Context has enough room for wraping pgd into its 41-bit PTEBase field.
> > 
> > +. For XPHYS, the trick is that pgd is 4kB aligned, and the PABITS <= 48,
> > only save 48 - 12 + 5(for bit[63:59]) = 41 bits, aka. :
> > 
> >    bit[63:59] | 0000 0000 000 |  bit[47:12] | 0000 0000 0000
> > 
> > +. for CKSEG0, only save 29 - 12 = 17 bits
> 
> you are explaining what you are doing, but not why you are doing this.
> So why are you doing this ?
> 
LOONGSON64 use 0x98xx_xxxx_xxxx_xxxx as xphys cached, instead of
0xa8xx_xxxx_xxxx_xxxx;
> >  #
> >  # Set to y for ptrace access to watch registers.
> > diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
> > index a7521b8f7658..591cfa0fca02 100644
> > --- a/arch/mips/mm/tlbex.c
> > +++ b/arch/mips/mm/tlbex.c
> > @@ -848,8 +848,8 @@ void build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
> >  		/* Clear lower 23 bits of context. */
> >  		uasm_i_dins(p, ptr, 0, 0, 23);
> >  
> > -		/* 1 0	1 0 1  << 6  xkphys cached */
> > -		uasm_i_ori(p, ptr, ptr, 0x540);
> > +		/* insert bit[63:59] of CAC_BASE into bit[11:6] of ptr */
> > +		uasm_i_ori(p, ptr, ptr, ((u64)(CAC_BASE) >> 53));
> 
> you want to use bits 63..59 but picking bits 63..53 with this.  While
> bits 58..53 are probably 0, wouldn't it make also sense to mask them out ?

In CP0 Context, xphys in wrapped as:

bit[47:12] (36 bits) | bit[63:59] (5 bits) | badv2 (19 bits) | 0 (4bits) 

bit[58:53] is located at badv2, which is not used, whether it is xphys cached 
or CKSEG0 wrapped into CP0 Context, it is extracted as xphys cached by
prefixed with bit[63:59] 

> 
> >  		uasm_i_drotr(p, ptr, ptr, 11);
> >  #elif defined(CONFIG_SMP)
> >  		UASM_i_CPUID_MFC0(p, ptr, SMP_CPUID_REG);
> > @@ -1164,8 +1164,9 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
> >  
> >  	if (pgd_reg == -1) {
> >  		vmalloc_branch_delay_filled = 1;
> > -		/* 1 0	1 0 1  << 6  xkphys cached */
> > -		uasm_i_ori(p, ptr, ptr, 0x540);
> > +		/* insert bit[63:59] of CAC_BASE into bit[11:6] of ptr */
> > +		uasm_i_ori(p, ptr, ptr, ((u64)(CAC_BASE) >> 53));
> > +
> >  		uasm_i_drotr(p, ptr, ptr, 11);
> >  	}
> >  
> > @@ -1292,7 +1293,6 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
> >  
> >  	return rv;
> >  }
> > -
> >  /*
> 
> why are you removing this empty line ? I'd prefer that it stays there...
> 
> >   * For a 64-bit kernel, we are using the 64-bit XTLB refill exception
> >   * because EXL == 0.  If we wrap, we can also use the 32 instruction
OK, I whill resend V5
> > -- 
> > 2.17.1
> 
> -- 
> Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
> good idea.                                                [ RFC1925, 2.3 ]
Huang Pei March 13, 2021, 1:18 a.m. UTC | #3
Hi, my calculate is wrong, but the result is right


Here is the new one:

CP0 Context has enough room for wraping pgd into its 41-bit PTEBase field.

+. For XPHYS, the trick is that pgd is 4kB aligned, and the PABITS <= 53,
only save 53 - 12 = 41 bit

   bit[63:59] | 0000 00 |  bit[52:12] | 0000 0000 0000

+. for CKSEG0, only save 29 - 12 = 17 bits

So, when switch pgd, only save bit[52:12] or bit[28:12] into CP0 Context's
bit[63:23], see following asm generated at runtime, ao hold pgd

	.set	push
	.set	noreorder

tlbmiss_handler_setup_pgd:

	dsra	a2, a0, 29
	move	a3, a0
	dins	a0, zero, 29, 35
	daddiu	a2, a2, 4

	movn	a0, a3, a2
	dsll	a0, a0, 11
	jr	ra
	dmtc0	a0, CP0_CONTEXT

	.set	pop

when used pgd at page walking

	dmfc0	k0, CP0_CONTEXT
	dins	k0, k0, 0, 23	         //zero badv2 
	ori	k0, k0, (CAC_BASE >> 53) //*prefix* with bit[63:59]
	drotr	k0, k0, 11		 // kick it at right position


On Fri, Mar 12, 2021 at 11:24:10AM +0100, Thomas Bogendoerfer wrote:
> On Tue, Mar 09, 2021 at 04:02:09PM +0800, Huang Pei wrote:
> > +. LOONGSON64 use 0x98xx_xxxx_xxxx_xxxx as xphys cached
> > 
> > +. let CONFIG_MIPS_PGD_C0_CONTEXT depend on 64bit
> > 
> > +. cast CAC_BASE into u64 to silence warning on MIPS32
> > 
> > CP0 Context has enough room for wraping pgd into its 41-bit PTEBase field.
> > 
> > +. For XPHYS, the trick is that pgd is 4kB aligned, and the PABITS <= 48,
> > only save 48 - 12 + 5(for bit[63:59]) = 41 bits, aka. :
> > 
> >    bit[63:59] | 0000 0000 000 |  bit[47:12] | 0000 0000 0000
> > 
> > +. for CKSEG0, only save 29 - 12 = 17 bits
> 
> you are explaining what you are doing, but not why you are doing this.
> So why are you doing this ?
> 
> >  #
> >  # Set to y for ptrace access to watch registers.
> > diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
> > index a7521b8f7658..591cfa0fca02 100644
> > --- a/arch/mips/mm/tlbex.c
> > +++ b/arch/mips/mm/tlbex.c
> > @@ -848,8 +848,8 @@ void build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
> >  		/* Clear lower 23 bits of context. */
> >  		uasm_i_dins(p, ptr, 0, 0, 23);
> >  
> > -		/* 1 0	1 0 1  << 6  xkphys cached */
> > -		uasm_i_ori(p, ptr, ptr, 0x540);
> > +		/* insert bit[63:59] of CAC_BASE into bit[11:6] of ptr */
> > +		uasm_i_ori(p, ptr, ptr, ((u64)(CAC_BASE) >> 53));
> 
> you want to use bits 63..59 but picking bits 63..53 with this.  While
> bits 58..53 are probably 0, wouldn't it make also sense to mask them out ?
> 
> >  		uasm_i_drotr(p, ptr, ptr, 11);
> >  #elif defined(CONFIG_SMP)
> >  		UASM_i_CPUID_MFC0(p, ptr, SMP_CPUID_REG);
> > @@ -1164,8 +1164,9 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
> >  
> >  	if (pgd_reg == -1) {
> >  		vmalloc_branch_delay_filled = 1;
> > -		/* 1 0	1 0 1  << 6  xkphys cached */
> > -		uasm_i_ori(p, ptr, ptr, 0x540);
> > +		/* insert bit[63:59] of CAC_BASE into bit[11:6] of ptr */
> > +		uasm_i_ori(p, ptr, ptr, ((u64)(CAC_BASE) >> 53));
> > +
> >  		uasm_i_drotr(p, ptr, ptr, 11);
> >  	}
> >  
> > @@ -1292,7 +1293,6 @@ build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
> >  
> >  	return rv;
> >  }
> > -
> >  /*
> 
> why are you removing this empty line ? I'd prefer that it stays there...
> 
> >   * For a 64-bit kernel, we are using the 64-bit XTLB refill exception
> >   * because EXL == 0.  If we wrap, we can also use the 32 instruction
> > -- 
> > 2.17.1
> 
> -- 
> Crap can work. Given enough thrust pigs will fly, but it's not necessarily a
> good idea.                                                [ RFC1925, 2.3 ]
diff mbox series

Patch

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 2000bb2b0220..5741dae35b74 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2142,7 +2142,8 @@  config CPU_SUPPORTS_HUGEPAGES
 	depends on !(32BIT && (ARCH_PHYS_ADDR_T_64BIT || EVA))
 config MIPS_PGD_C0_CONTEXT
 	bool
-	default y if 64BIT && (CPU_MIPSR2 || CPU_MIPSR6) && !CPU_XLP
+	depends on 64BIT
+	default y if (CPU_MIPSR2 || CPU_MIPSR6) && !CPU_XLP
 
 #
 # Set to y for ptrace access to watch registers.
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index a7521b8f7658..591cfa0fca02 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -848,8 +848,8 @@  void build_get_pmde64(u32 **p, struct uasm_label **l, struct uasm_reloc **r,
 		/* Clear lower 23 bits of context. */
 		uasm_i_dins(p, ptr, 0, 0, 23);
 
-		/* 1 0	1 0 1  << 6  xkphys cached */
-		uasm_i_ori(p, ptr, ptr, 0x540);
+		/* insert bit[63:59] of CAC_BASE into bit[11:6] of ptr */
+		uasm_i_ori(p, ptr, ptr, ((u64)(CAC_BASE) >> 53));
 		uasm_i_drotr(p, ptr, ptr, 11);
 #elif defined(CONFIG_SMP)
 		UASM_i_CPUID_MFC0(p, ptr, SMP_CPUID_REG);
@@ -1164,8 +1164,9 @@  build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
 
 	if (pgd_reg == -1) {
 		vmalloc_branch_delay_filled = 1;
-		/* 1 0	1 0 1  << 6  xkphys cached */
-		uasm_i_ori(p, ptr, ptr, 0x540);
+		/* insert bit[63:59] of CAC_BASE into bit[11:6] of ptr */
+		uasm_i_ori(p, ptr, ptr, ((u64)(CAC_BASE) >> 53));
+
 		uasm_i_drotr(p, ptr, ptr, 11);
 	}
 
@@ -1292,7 +1293,6 @@  build_fast_tlb_refill_handler (u32 **p, struct uasm_label **l,
 
 	return rv;
 }
-
 /*
  * For a 64-bit kernel, we are using the 64-bit XTLB refill exception
  * because EXL == 0.  If we wrap, we can also use the 32 instruction