diff mbox series

parisc: Use lpa instruction to load physical addresses in driver code

Message ID 5d53371f-d918-0333-08a7-ad0d04eb3b26@bell.net (mailing list archive)
State Accepted, archived
Headers show
Series parisc: Use lpa instruction to load physical addresses in driver code | expand

Commit Message

John David Anglin June 2, 2019, 11:12 p.m. UTC
Most I/O in the kernel is done using the kernel offset mapping.  However, there
is one API that uses aliased kernel address ranges:

> The final category of APIs is for I/O to deliberately aliased address
> ranges inside the kernel.  Such aliases are set up by use of the
> vmap/vmalloc API.  Since kernel I/O goes via physical pages, the I/O
> subsystem assumes that the user mapping and kernel offset mapping are
> the only aliases.  This isn't true for vmap aliases, so anything in
> the kernel trying to do I/O to vmap areas must manually manage
> coherency.  It must do this by flushing the vmap range before doing
> I/O and invalidating it after the I/O returns.

For this reason, we should use the hardware lpa instruction to load the physical address
of kernel virtual addresses in the driver code.

I believe we only use the vmap/vmalloc API with old PA 1.x processors which don't have
a sba, so we don't hit this problem.

Tested on c3750, c8000 and rp3440.

This patch includes the previous change to use implicit space register access in loading
the coherence index as the two changes conflict.

Signed-off-by: John David Anglin <dave.anglin@bell.net>
---

Comments

John David Anglin June 3, 2019, 11:44 a.m. UTC | #1
Hi Carlo,

No but this change is unlikely to fix your problem.  It's just something I noticed looking at driver
code.

The patch that may fix the problem is at the bottom of my last message on this subject,
"Re: [PATCH v3] parisc: Fix crash due alternative coding for NP iopdir_fdc bit".  Remove or go
with Helge's v1 alternative coding fix with 5.1.*.  Just apply change with 4.20.* or earlier.
This patch attempts to make I/O pages uncacheable.

The c3600 has a write-back cache.  One needs a flush/sync to make a line visible when page is
cacheable.  I think the problem is writes to the IO-PDIR are not getting to the controller until a
flush/sync is done because of the TLB setup for the I/O page.

Dave

On 2019-06-03 6:09 a.m., Carlo Pisani wrote:
> hi
> do I have to revert any previous patch, and apply this patch?
> can it be applied to 4.20? or only to 5.1.*?
>
>
> Il giorno lun 3 giu 2019 alle ore 01:12 John David Anglin
> <dave.anglin@bell.net> ha scritto:
>> Most I/O in the kernel is done using the kernel offset mapping.  However, there
>> is one API that uses aliased kernel address ranges:
>>
>>> The final category of APIs is for I/O to deliberately aliased address
>>> ranges inside the kernel.  Such aliases are set up by use of the
>>> vmap/vmalloc API.  Since kernel I/O goes via physical pages, the I/O
>>> subsystem assumes that the user mapping and kernel offset mapping are
>>> the only aliases.  This isn't true for vmap aliases, so anything in
>>> the kernel trying to do I/O to vmap areas must manually manage
>>> coherency.  It must do this by flushing the vmap range before doing
>>> I/O and invalidating it after the I/O returns.
>> For this reason, we should use the hardware lpa instruction to load the physical address
>> of kernel virtual addresses in the driver code.
>>
>> I believe we only use the vmap/vmalloc API with old PA 1.x processors which don't have
>> a sba, so we don't hit this problem.
>>
>> Tested on c3750, c8000 and rp3440.
>>
>> This patch includes the previous change to use implicit space register access in loading
>> the coherence index as the two changes conflict.
>>
>> Signed-off-by: John David Anglin <dave.anglin@bell.net>
>> ---
>> diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h
>> index 3d4dd68e181b..a303ae9a77f4 100644
>> --- a/arch/parisc/include/asm/special_insns.h
>> +++ b/arch/parisc/include/asm/special_insns.h
>> @@ -2,6 +2,30 @@
>>  #ifndef __PARISC_SPECIAL_INSNS_H
>>  #define __PARISC_SPECIAL_INSNS_H
>>
>> +#define lpa(va)        ({                      \
>> +       unsigned long pa;               \
>> +       __asm__ __volatile__(           \
>> +               "copy %%r0,%0\n\t"      \
>> +               "lpa %%r0(%1),%0"       \
>> +               : "=r" (pa)             \
>> +               : "r" (va)              \
>> +               : "memory"              \
>> +       );                              \
>> +       pa;                             \
>> +})
>> +
>> +#define lpa_user(va)   ({              \
>> +       unsigned long pa;               \
>> +       __asm__ __volatile__(           \
>> +               "copy %%r0,%0\n\t"      \
>> +               "lpa %%r0(%%sr3,%1),%0" \
>> +               : "=r" (pa)             \
>> +               : "r" (va)              \
>> +               : "memory"              \
>> +       );                              \
>> +       pa;                             \
>> +})
>> +
>>  #define mfctl(reg)     ({              \
>>         unsigned long cr;               \
>>         __asm__ __volatile__(           \
>> diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
>> index 121f7603a595..217f15aafa4a 100644
>> --- a/drivers/parisc/ccio-dma.c
>> +++ b/drivers/parisc/ccio-dma.c
>> @@ -562,14 +562,12 @@ ccio_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba,
>>         /* We currently only support kernel addresses */
>>         BUG_ON(sid != KERNEL_SPACE);
>>
>> -       mtsp(sid,1);
>> -
>>         /*
>>         ** WORD 1 - low order word
>>         ** "hints" parm includes the VALID bit!
>>         ** "dep" clobbers the physical address offset bits as well.
>>         */
>> -       pa = virt_to_phys(vba);
>> +       pa = lpa(vba);
>>         asm volatile("depw  %1,31,12,%0" : "+r" (pa) : "r" (hints));
>>         ((u32 *)pdir_ptr)[1] = (u32) pa;
>>
>> @@ -594,7 +592,7 @@ ccio_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba,
>>         ** Grab virtual index [0:11]
>>         ** Deposit virt_idx bits into I/O PDIR word
>>         */
>> -       asm volatile ("lci %%r0(%%sr1, %1), %0" : "=r" (ci) : "r" (vba));
>> +       asm volatile ("lci %%r0(%1), %0" : "=r" (ci) : "r" (vba));
>>         asm volatile ("extru %1,19,12,%0" : "+r" (ci) : "r" (ci));
>>         asm volatile ("depw  %1,15,12,%0" : "+r" (pa) : "r" (ci));
>>
>> diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
>> index 8a9ea9bd050c..296668caf7e5 100644
>> --- a/drivers/parisc/sba_iommu.c
>> +++ b/drivers/parisc/sba_iommu.c
>> @@ -569,11 +569,10 @@ sba_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba,
>>         u64 pa; /* physical address */
>>         register unsigned ci; /* coherent index */
>>
>> -       pa = virt_to_phys(vba);
>> +       pa = lpa(vba);
>>         pa &= IOVP_MASK;
>>
>> -       mtsp(sid,1);
>> -       asm("lci 0(%%sr1, %1), %0" : "=r" (ci) : "r" (vba));
>> +       asm("lci 0(%1), %0" : "=r" (ci) : "r" (vba));
>>         pa |= (ci >> PAGE_SHIFT) & 0xff;  /* move CI (8 bits) into lowest byte */
>>
>>         pa |= SBA_PDIR_VALID_BIT;       /* set "valid" bit */
Helge Deller June 4, 2019, 7:36 p.m. UTC | #2
Hi Dave,

On 03.06.19 01:12, John David Anglin wrote:
> Most I/O in the kernel is done using the kernel offset mapping.  However, there
> is one API that uses aliased kernel address ranges:
>
>> The final category of APIs is for I/O to deliberately aliased address
>> ranges inside the kernel.  Such aliases are set up by use of the
>> vmap/vmalloc API.  Since kernel I/O goes via physical pages, the I/O
>> subsystem assumes that the user mapping and kernel offset mapping are
>> the only aliases.  This isn't true for vmap aliases, so anything in
>> the kernel trying to do I/O to vmap areas must manually manage
>> coherency.  It must do this by flushing the vmap range before doing
>> I/O and invalidating it after the I/O returns.
>
> For this reason, we should use the hardware lpa instruction to load the physical address
> of kernel virtual addresses in the driver code.
>
> I believe we only use the vmap/vmalloc API with old PA 1.x processors which don't have
> a sba, so we don't hit this problem.
>
> Tested on c3750, c8000 and rp3440.
>
> This patch includes the previous change to use implicit space register access in loading
> the coherence index as the two changes conflict.

Actually, I think it makes sense to push the drop-sr1/use-lci-without-sr1
change backward to the stable kernel series.
After that, in the second step, we could add the code to use lpa(), which
I don't think should go to stable series.
Would it be OK for you if we split it up into two patches?

Helge

>
> Signed-off-by: John David Anglin <dave.anglin@bell.net>
> ---
> diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h
> index 3d4dd68e181b..a303ae9a77f4 100644
> --- a/arch/parisc/include/asm/special_insns.h
> +++ b/arch/parisc/include/asm/special_insns.h
> @@ -2,6 +2,30 @@
>  #ifndef __PARISC_SPECIAL_INSNS_H
>  #define __PARISC_SPECIAL_INSNS_H
>
> +#define lpa(va)	({			\
> +	unsigned long pa;		\
> +	__asm__ __volatile__(		\
> +		"copy %%r0,%0\n\t"	\
> +		"lpa %%r0(%1),%0"	\
> +		: "=r" (pa)		\
> +		: "r" (va)		\
> +		: "memory"		\
> +	);				\
> +	pa;				\
> +})
> +
> +#define lpa_user(va)	({		\
> +	unsigned long pa;		\
> +	__asm__ __volatile__(		\
> +		"copy %%r0,%0\n\t"	\
> +		"lpa %%r0(%%sr3,%1),%0"	\
> +		: "=r" (pa)		\
> +		: "r" (va)		\
> +		: "memory"		\
> +	);				\
> +	pa;				\
> +})
> +
>  #define mfctl(reg)	({		\
>  	unsigned long cr;		\
>  	__asm__ __volatile__(		\
> diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
> index 121f7603a595..217f15aafa4a 100644
> --- a/drivers/parisc/ccio-dma.c
> +++ b/drivers/parisc/ccio-dma.c
> @@ -562,14 +562,12 @@ ccio_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba,
>  	/* We currently only support kernel addresses */
>  	BUG_ON(sid != KERNEL_SPACE);
>
> -	mtsp(sid,1);
> -
>  	/*
>  	** WORD 1 - low order word
>  	** "hints" parm includes the VALID bit!
>  	** "dep" clobbers the physical address offset bits as well.
>  	*/
> -	pa = virt_to_phys(vba);
> +	pa = lpa(vba);
>  	asm volatile("depw  %1,31,12,%0" : "+r" (pa) : "r" (hints));
>  	((u32 *)pdir_ptr)[1] = (u32) pa;
>
> @@ -594,7 +592,7 @@ ccio_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba,
>  	** Grab virtual index [0:11]
>  	** Deposit virt_idx bits into I/O PDIR word
>  	*/
> -	asm volatile ("lci %%r0(%%sr1, %1), %0" : "=r" (ci) : "r" (vba));
> +	asm volatile ("lci %%r0(%1), %0" : "=r" (ci) : "r" (vba));
>  	asm volatile ("extru %1,19,12,%0" : "+r" (ci) : "r" (ci));
>  	asm volatile ("depw  %1,15,12,%0" : "+r" (pa) : "r" (ci));
>
> diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
> index 8a9ea9bd050c..296668caf7e5 100644
> --- a/drivers/parisc/sba_iommu.c
> +++ b/drivers/parisc/sba_iommu.c
> @@ -569,11 +569,10 @@ sba_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba,
>  	u64 pa; /* physical address */
>  	register unsigned ci; /* coherent index */
>
> -	pa = virt_to_phys(vba);
> +	pa = lpa(vba);
>  	pa &= IOVP_MASK;
>
> -	mtsp(sid,1);
> -	asm("lci 0(%%sr1, %1), %0" : "=r" (ci) : "r" (vba));
> +	asm("lci 0(%1), %0" : "=r" (ci) : "r" (vba));
>  	pa |= (ci >> PAGE_SHIFT) & 0xff;  /* move CI (8 bits) into lowest byte */
>
>  	pa |= SBA_PDIR_VALID_BIT;	/* set "valid" bit */
>
John David Anglin June 4, 2019, 7:54 p.m. UTC | #3
On 2019-06-04 3:36 p.m., Helge Deller wrote:
> Actually, I think it makes sense to push the drop-sr1/use-lci-without-sr1
> change backward to the stable kernel series.
> After that, in the second step, we could add the code to use lpa(), which
> I don't think should go to stable series.
> Would it be OK for you if we split it up into two patches?

No problem.  I didn't have an easy way to separate changes.  There's a possible
race in using %sr1 so doing a stable backport makes sense.

Using lpa() is slightly slower than virt_to_phys() as it may generate a TLB miss.
On the other hand, it handles any mapping.

dave
Carlo Pisani June 5, 2019, 12:38 p.m. UTC | #4
hi guys
has Adaptec aha-3950u2 been tested on HPPA?
it looks a PCI64 card with a RISC chip on it for processing SCSI commands.

So it looks perfect for a stress test on the PCI bus

found a discount for qty=4 units, so I am considering it
Sven Schnelle June 5, 2019, 2:33 p.m. UTC | #5
Hi,

On Wed, Jun 05, 2019 at 02:38:31PM +0200, Carlo Pisani wrote:
> hi guys
> has Adaptec aha-3950u2 been tested on HPPA?
> it looks a PCI64 card with a RISC chip on it for processing SCSI commands.
> 
> So it looks perfect for a stress test on the PCI bus
> 
> found a discount for qty=4 units, so I am considering it

Can't say about 3950u2, but a 29320A works in my J5000.
Unlikely that this is well tested though, as NCR/LSI/Symbios Logic
was the default SCSI chipset vendor in the PA-RISC world.

Sven
diff mbox series

Patch

diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h
index 3d4dd68e181b..a303ae9a77f4 100644
--- a/arch/parisc/include/asm/special_insns.h
+++ b/arch/parisc/include/asm/special_insns.h
@@ -2,6 +2,30 @@ 
 #ifndef __PARISC_SPECIAL_INSNS_H
 #define __PARISC_SPECIAL_INSNS_H

+#define lpa(va)	({			\
+	unsigned long pa;		\
+	__asm__ __volatile__(		\
+		"copy %%r0,%0\n\t"	\
+		"lpa %%r0(%1),%0"	\
+		: "=r" (pa)		\
+		: "r" (va)		\
+		: "memory"		\
+	);				\
+	pa;				\
+})
+
+#define lpa_user(va)	({		\
+	unsigned long pa;		\
+	__asm__ __volatile__(		\
+		"copy %%r0,%0\n\t"	\
+		"lpa %%r0(%%sr3,%1),%0"	\
+		: "=r" (pa)		\
+		: "r" (va)		\
+		: "memory"		\
+	);				\
+	pa;				\
+})
+
 #define mfctl(reg)	({		\
 	unsigned long cr;		\
 	__asm__ __volatile__(		\
diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c
index 121f7603a595..217f15aafa4a 100644
--- a/drivers/parisc/ccio-dma.c
+++ b/drivers/parisc/ccio-dma.c
@@ -562,14 +562,12 @@  ccio_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba,
 	/* We currently only support kernel addresses */
 	BUG_ON(sid != KERNEL_SPACE);

-	mtsp(sid,1);
-
 	/*
 	** WORD 1 - low order word
 	** "hints" parm includes the VALID bit!
 	** "dep" clobbers the physical address offset bits as well.
 	*/
-	pa = virt_to_phys(vba);
+	pa = lpa(vba);
 	asm volatile("depw  %1,31,12,%0" : "+r" (pa) : "r" (hints));
 	((u32 *)pdir_ptr)[1] = (u32) pa;

@@ -594,7 +592,7 @@  ccio_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba,
 	** Grab virtual index [0:11]
 	** Deposit virt_idx bits into I/O PDIR word
 	*/
-	asm volatile ("lci %%r0(%%sr1, %1), %0" : "=r" (ci) : "r" (vba));
+	asm volatile ("lci %%r0(%1), %0" : "=r" (ci) : "r" (vba));
 	asm volatile ("extru %1,19,12,%0" : "+r" (ci) : "r" (ci));
 	asm volatile ("depw  %1,15,12,%0" : "+r" (pa) : "r" (ci));

diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index 8a9ea9bd050c..296668caf7e5 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -569,11 +569,10 @@  sba_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba,
 	u64 pa; /* physical address */
 	register unsigned ci; /* coherent index */

-	pa = virt_to_phys(vba);
+	pa = lpa(vba);
 	pa &= IOVP_MASK;

-	mtsp(sid,1);
-	asm("lci 0(%%sr1, %1), %0" : "=r" (ci) : "r" (vba));
+	asm("lci 0(%1), %0" : "=r" (ci) : "r" (vba));
 	pa |= (ci >> PAGE_SHIFT) & 0xff;  /* move CI (8 bits) into lowest byte */

 	pa |= SBA_PDIR_VALID_BIT;	/* set "valid" bit */