Message ID | 20240216073315.3801833-1-linux@roeck-us.net (mailing list archive) |
---|---|
State | Accepted, archived |
Headers | show |
Series | parisc/unaligned: Rewrite 64-bit inline assembly of emulate_ldd() | expand |
On 2/16/24 08:33, Guenter Roeck wrote: > Convert to use real temp variables instead of clobbering processor > registers. Thanks for doing this. It was on my todo list since quite some time :-) > This aligns the 64-bit inline assembly code with the 32-bit > assembly code which was rewritten with commit 427c1073a2a1 > ("parisc/unaligned: Rewrite 32-bit inline assembly of emulate_ldd()"). > > While at it, fix comment in 32-bit rewrite code. Temporary variables are > now used for both 32-bit and 64-bit code, so move their declarations > to the function header. > > No functional change intended. > > Signed-off-by: Guenter Roeck <linux@roeck-us.net> > --- > Implemented while analyzing a bug. I am not really sure of it is worth > the effort, but I figured that I might as well submit it. > > arch/parisc/kernel/unaligned.c | 29 +++++++++++++---------------- > 1 file changed, 13 insertions(+), 16 deletions(-) > > diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c > index c520e551a165..622c7b549fb8 100644 > --- a/arch/parisc/kernel/unaligned.c > +++ b/arch/parisc/kernel/unaligned.c > @@ -169,7 +169,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) > static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) > { > unsigned long saddr = regs->ior; > - __u64 val = 0; > + unsigned long shift; > + __u64 val = 0, temp1; temp1 is ok to be "long". > ASM_EXCEPTIONTABLE_VAR(ret); > > DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n", > @@ -180,25 +181,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) > > #ifdef CONFIG_64BIT > __asm__ __volatile__ ( > -" depd,z %3,60,3,%%r19\n" /* r19=(ofs&7)*8 */ > -" mtsp %4, %%sr1\n" > -" depd %%r0,63,3,%3\n" > -"1: ldd 0(%%sr1,%3),%0\n" > -"2: ldd 8(%%sr1,%3),%%r20\n" > -" subi 64,%%r19,%%r19\n" > -" mtsar %%r19\n" > -" shrpd %0,%%r20,%%sar,%0\n" > +" depd,z %4,60,3,%2\n" /* shift=(ofs&7)*8 */ > +" mtsp %5, %%sr1\n" > +" depd %%r0,63,3,%4\n" > +"1: ldd 0(%%sr1,%4),%0\n" > +"2: ldd 8(%%sr1,%4),%3\n" > +" subi 64,%2,%2\n" > +" mtsar %2\n" > +" shrpd %0,%3,%%sar,%0\n" > "3: \n" > ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") > ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") > - : "=r" (val), "+r" (ret) > - : "0" (val), "r" (saddr), "r" (regs->isr) > - : "r19", "r20" ); > + : "+r" (val), "+r" (ret), "=&r" (shift), "=&r" (temp1) > + : "r" (saddr), "r" (regs->isr) ); addr is actually being modified. That's why I moved it into the output registers and shuffled shift and temp1 one backwards, so that the registers are now in the same ordering as on the 32-bit path. I've pushed the modified patch here: https://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux.git/commit/?h=for-next&id=a6ea53ce77e9dd6e388d673bdd4d80741f97b914 Please double-check! Thanks! Helge > #else > - { > - unsigned long shift, temp1; > __asm__ __volatile__ ( > -" zdep %2,29,2,%3\n" /* r19=(ofs&3)*8 */ > +" zdep %2,29,2,%3\n" /* shift=(ofs&3)*8 */ > " mtsp %5, %%sr1\n" > " dep %%r0,31,2,%2\n" > "1: ldw 0(%%sr1,%2),%0\n" > @@ -214,7 +212,6 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) > ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1") > : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) > : "r" (regs->isr) ); > - } > #endif > > DPRINTF("val = 0x%llx\n", val);
On 2/16/24 05:48, Helge Deller wrote: > On 2/16/24 08:33, Guenter Roeck wrote: >> Convert to use real temp variables instead of clobbering processor >> registers. > > Thanks for doing this. > It was on my todo list since quite some time :-) > >> This aligns the 64-bit inline assembly code with the 32-bit >> assembly code which was rewritten with commit 427c1073a2a1 >> ("parisc/unaligned: Rewrite 32-bit inline assembly of emulate_ldd()"). >> >> While at it, fix comment in 32-bit rewrite code. Temporary variables are >> now used for both 32-bit and 64-bit code, so move their declarations >> to the function header. >> >> No functional change intended. >> >> Signed-off-by: Guenter Roeck <linux@roeck-us.net> >> --- >> Implemented while analyzing a bug. I am not really sure of it is worth >> the effort, but I figured that I might as well submit it. >> >> arch/parisc/kernel/unaligned.c | 29 +++++++++++++---------------- >> 1 file changed, 13 insertions(+), 16 deletions(-) >> >> diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c >> index c520e551a165..622c7b549fb8 100644 >> --- a/arch/parisc/kernel/unaligned.c >> +++ b/arch/parisc/kernel/unaligned.c >> @@ -169,7 +169,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) >> static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) >> { >> unsigned long saddr = regs->ior; >> - __u64 val = 0; >> + unsigned long shift; >> + __u64 val = 0, temp1; > > temp1 is ok to be "long". > >> ASM_EXCEPTIONTABLE_VAR(ret); >> >> DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n", >> @@ -180,25 +181,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) >> >> #ifdef CONFIG_64BIT >> __asm__ __volatile__ ( >> -" depd,z %3,60,3,%%r19\n" /* r19=(ofs&7)*8 */ >> -" mtsp %4, %%sr1\n" >> -" depd %%r0,63,3,%3\n" >> -"1: ldd 0(%%sr1,%3),%0\n" >> -"2: ldd 8(%%sr1,%3),%%r20\n" >> -" subi 64,%%r19,%%r19\n" >> -" mtsar %%r19\n" >> -" shrpd %0,%%r20,%%sar,%0\n" >> +" depd,z %4,60,3,%2\n" /* shift=(ofs&7)*8 */ >> +" mtsp %5, %%sr1\n" >> +" depd %%r0,63,3,%4\n" >> +"1: ldd 0(%%sr1,%4),%0\n" >> +"2: ldd 8(%%sr1,%4),%3\n" >> +" subi 64,%2,%2\n" >> +" mtsar %2\n" >> +" shrpd %0,%3,%%sar,%0\n" >> "3: \n" >> ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") >> ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") >> - : "=r" (val), "+r" (ret) >> - : "0" (val), "r" (saddr), "r" (regs->isr) >> - : "r19", "r20" ); >> + : "+r" (val), "+r" (ret), "=&r" (shift), "=&r" (temp1) >> + : "r" (saddr), "r" (regs->isr) ); > > addr is actually being modified. > That's why I moved it into the output registers and > shuffled shift and temp1 one backwards, so that the registers > are now in the same ordering as on the 32-bit path. > > I've pushed the modified patch here: > https://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux.git/commit/?h=for-next&id=a6ea53ce77e9dd6e388d673bdd4d80741f97b914 > > Please double-check! > Confirmed working. Thanks, Guenter > Thanks! > Helge > > >> #else >> - { >> - unsigned long shift, temp1; >> __asm__ __volatile__ ( >> -" zdep %2,29,2,%3\n" /* r19=(ofs&3)*8 */ >> +" zdep %2,29,2,%3\n" /* shift=(ofs&3)*8 */ >> " mtsp %5, %%sr1\n" >> " dep %%r0,31,2,%2\n" >> "1: ldw 0(%%sr1,%2),%0\n" >> @@ -214,7 +212,6 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) >> ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1") >> : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) >> : "r" (regs->isr) ); >> - } >> #endif >> >> DPRINTF("val = 0x%llx\n", val); >
Hi Helge, On Thu, Feb 15, 2024 at 11:33:15PM -0800, Guenter Roeck wrote: > Convert to use real temp variables instead of clobbering processor > registers. This aligns the 64-bit inline assembly code with the 32-bit > assembly code which was rewritten with commit 427c1073a2a1 > ("parisc/unaligned: Rewrite 32-bit inline assembly of emulate_ldd()"). > > While at it, fix comment in 32-bit rewrite code. Temporary variables are > now used for both 32-bit and 64-bit code, so move their declarations > to the function header. > > No functional change intended. > > Signed-off-by: Guenter Roeck <linux@roeck-us.net> > --- > Implemented while analyzing a bug. I am not really sure of it is worth > the effort, but I figured that I might as well submit it. > > arch/parisc/kernel/unaligned.c | 29 +++++++++++++---------------- > 1 file changed, 13 insertions(+), 16 deletions(-) > > diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c > index c520e551a165..622c7b549fb8 100644 > --- a/arch/parisc/kernel/unaligned.c > +++ b/arch/parisc/kernel/unaligned.c > @@ -169,7 +169,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) > static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) > { > unsigned long saddr = regs->ior; > - __u64 val = 0; > + unsigned long shift; > + __u64 val = 0, temp1; > ASM_EXCEPTIONTABLE_VAR(ret); > > DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n", > @@ -180,25 +181,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) > > #ifdef CONFIG_64BIT > __asm__ __volatile__ ( > -" depd,z %3,60,3,%%r19\n" /* r19=(ofs&7)*8 */ > -" mtsp %4, %%sr1\n" > -" depd %%r0,63,3,%3\n" > -"1: ldd 0(%%sr1,%3),%0\n" > -"2: ldd 8(%%sr1,%3),%%r20\n" > -" subi 64,%%r19,%%r19\n" > -" mtsar %%r19\n" > -" shrpd %0,%%r20,%%sar,%0\n" > +" depd,z %4,60,3,%2\n" /* shift=(ofs&7)*8 */ > +" mtsp %5, %%sr1\n" > +" depd %%r0,63,3,%4\n" > +"1: ldd 0(%%sr1,%4),%0\n" > +"2: ldd 8(%%sr1,%4),%3\n" > +" subi 64,%2,%2\n" > +" mtsar %2\n" > +" shrpd %0,%3,%%sar,%0\n" > "3: \n" > ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") > ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") > - : "=r" (val), "+r" (ret) > - : "0" (val), "r" (saddr), "r" (regs->isr) > - : "r19", "r20" ); > + : "+r" (val), "+r" (ret), "=&r" (shift), "=&r" (temp1) > + : "r" (saddr), "r" (regs->isr) ); It looks like something went wrong when this patch was applied. It is now +" depd,z %4,60,3,%3\n" /* shift=(ofs&7)*8 */ ... + : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) + : "r" (regs->isr) ); meaning saddr is now %2, but the depd,z instruction still assumes it is %4. Unfortunately this results in a crash when trying to boot linux-next on parisc64. The patch below on top of linux-next fixes the problem for me. Guenter --- iff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 31974eddedc9..a8e75e5b884a 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -181,7 +181,7 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) #ifdef CONFIG_64BIT __asm__ __volatile__ ( -" depd,z %4,60,3,%3\n" /* shift=(ofs&7)*8 */ +" depd,z %2,60,3,%3\n" /* shift=(ofs&7)*8 */ " mtsp %5, %%sr1\n" " depd %%r0,63,3,%2\n" "1: ldd 0(%%sr1,%2),%0\n"
On 2/26/24 20:29, Guenter Roeck wrote: > Hi Helge, > > On Thu, Feb 15, 2024 at 11:33:15PM -0800, Guenter Roeck wrote: >> Convert to use real temp variables instead of clobbering processor >> registers. This aligns the 64-bit inline assembly code with the 32-bit >> assembly code which was rewritten with commit 427c1073a2a1 >> ("parisc/unaligned: Rewrite 32-bit inline assembly of emulate_ldd()"). >> >> While at it, fix comment in 32-bit rewrite code. Temporary variables are >> now used for both 32-bit and 64-bit code, so move their declarations >> to the function header. >> >> No functional change intended. >> >> Signed-off-by: Guenter Roeck <linux@roeck-us.net> >> --- >> Implemented while analyzing a bug. I am not really sure of it is worth >> the effort, but I figured that I might as well submit it. >> >> arch/parisc/kernel/unaligned.c | 29 +++++++++++++---------------- >> 1 file changed, 13 insertions(+), 16 deletions(-) >> >> diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c >> index c520e551a165..622c7b549fb8 100644 >> --- a/arch/parisc/kernel/unaligned.c >> +++ b/arch/parisc/kernel/unaligned.c >> @@ -169,7 +169,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) >> static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) >> { >> unsigned long saddr = regs->ior; >> - __u64 val = 0; >> + unsigned long shift; >> + __u64 val = 0, temp1; >> ASM_EXCEPTIONTABLE_VAR(ret); >> >> DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n", >> @@ -180,25 +181,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) >> >> #ifdef CONFIG_64BIT >> __asm__ __volatile__ ( >> -" depd,z %3,60,3,%%r19\n" /* r19=(ofs&7)*8 */ >> -" mtsp %4, %%sr1\n" >> -" depd %%r0,63,3,%3\n" >> -"1: ldd 0(%%sr1,%3),%0\n" >> -"2: ldd 8(%%sr1,%3),%%r20\n" >> -" subi 64,%%r19,%%r19\n" >> -" mtsar %%r19\n" >> -" shrpd %0,%%r20,%%sar,%0\n" >> +" depd,z %4,60,3,%2\n" /* shift=(ofs&7)*8 */ >> +" mtsp %5, %%sr1\n" >> +" depd %%r0,63,3,%4\n" >> +"1: ldd 0(%%sr1,%4),%0\n" >> +"2: ldd 8(%%sr1,%4),%3\n" >> +" subi 64,%2,%2\n" >> +" mtsar %2\n" >> +" shrpd %0,%3,%%sar,%0\n" >> "3: \n" >> ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") >> ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") >> - : "=r" (val), "+r" (ret) >> - : "0" (val), "r" (saddr), "r" (regs->isr) >> - : "r19", "r20" ); >> + : "+r" (val), "+r" (ret), "=&r" (shift), "=&r" (temp1) >> + : "r" (saddr), "r" (regs->isr) ); > > It looks like something went wrong when this patch was applied. I think this was my fault when I tried to reshuffle the input vars :-( > It is now > > +" depd,z %4,60,3,%3\n" /* shift=(ofs&7)*8 */ > ... > + : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) > + : "r" (regs->isr) ); > > meaning saddr is now %2, but the depd,z instruction > still assumes it is %4. Unfortunately this results in a crash > when trying to boot linux-next on parisc64. > > The patch below on top of linux-next fixes the problem for me. I fixed it up with your hunk below in the parisc for-next branch, so it should be fixed in linux-next soon. THANKS! Helge > Guenter > > --- > iff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c > index 31974eddedc9..a8e75e5b884a 100644 > --- a/arch/parisc/kernel/unaligned.c > +++ b/arch/parisc/kernel/unaligned.c > @@ -181,7 +181,7 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) > > #ifdef CONFIG_64BIT > __asm__ __volatile__ ( > -" depd,z %4,60,3,%3\n" /* shift=(ofs&7)*8 */ > +" depd,z %2,60,3,%3\n" /* shift=(ofs&7)*8 */ > " mtsp %5, %%sr1\n" > " depd %%r0,63,3,%2\n" > "1: ldd 0(%%sr1,%2),%0\n"
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index c520e551a165..622c7b549fb8 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -169,7 +169,8 @@ static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) { unsigned long saddr = regs->ior; - __u64 val = 0; + unsigned long shift; + __u64 val = 0, temp1; ASM_EXCEPTIONTABLE_VAR(ret); DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n", @@ -180,25 +181,22 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) #ifdef CONFIG_64BIT __asm__ __volatile__ ( -" depd,z %3,60,3,%%r19\n" /* r19=(ofs&7)*8 */ -" mtsp %4, %%sr1\n" -" depd %%r0,63,3,%3\n" -"1: ldd 0(%%sr1,%3),%0\n" -"2: ldd 8(%%sr1,%3),%%r20\n" -" subi 64,%%r19,%%r19\n" -" mtsar %%r19\n" -" shrpd %0,%%r20,%%sar,%0\n" +" depd,z %4,60,3,%2\n" /* shift=(ofs&7)*8 */ +" mtsp %5, %%sr1\n" +" depd %%r0,63,3,%4\n" +"1: ldd 0(%%sr1,%4),%0\n" +"2: ldd 8(%%sr1,%4),%3\n" +" subi 64,%2,%2\n" +" mtsar %2\n" +" shrpd %0,%3,%%sar,%0\n" "3: \n" ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b, "%1") ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b, "%1") - : "=r" (val), "+r" (ret) - : "0" (val), "r" (saddr), "r" (regs->isr) - : "r19", "r20" ); + : "+r" (val), "+r" (ret), "=&r" (shift), "=&r" (temp1) + : "r" (saddr), "r" (regs->isr) ); #else - { - unsigned long shift, temp1; __asm__ __volatile__ ( -" zdep %2,29,2,%3\n" /* r19=(ofs&3)*8 */ +" zdep %2,29,2,%3\n" /* shift=(ofs&3)*8 */ " mtsp %5, %%sr1\n" " dep %%r0,31,2,%2\n" "1: ldw 0(%%sr1,%2),%0\n" @@ -214,7 +212,6 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b, "%1") : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) : "r" (regs->isr) ); - } #endif DPRINTF("val = 0x%llx\n", val);
Convert to use real temp variables instead of clobbering processor registers. This aligns the 64-bit inline assembly code with the 32-bit assembly code which was rewritten with commit 427c1073a2a1 ("parisc/unaligned: Rewrite 32-bit inline assembly of emulate_ldd()"). While at it, fix comment in 32-bit rewrite code. Temporary variables are now used for both 32-bit and 64-bit code, so move their declarations to the function header. No functional change intended. Signed-off-by: Guenter Roeck <linux@roeck-us.net> --- Implemented while analyzing a bug. I am not really sure of it is worth the effort, but I figured that I might as well submit it. arch/parisc/kernel/unaligned.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-)