diff mbox series

[v4,6/7] iommu/dma: Centralise iommu_setup_dma_ops()

Message ID bebea331c1d688b34d9862eefd5ede47503961b8.1713523152.git.robin.murphy@arm.com (mailing list archive)
State New, archived
Headers show
Series iommu, dma-mapping: Simplify arch_setup_dma_ops() | expand

Commit Message

Robin Murphy April 19, 2024, 4:54 p.m. UTC
It's somewhat hard to see, but arm64's arch_setup_dma_ops() should only
ever call iommu_setup_dma_ops() after a successful iommu_probe_device(),
which means there should be no harm in achieving the same order of
operations by running it off the back of iommu_probe_device() itself.
This then puts it in line with the x86 and s390 .probe_finalize bodges,
letting us pull it all into the main flow properly. As a bonus this lets
us fold in and de-scope the PCI workaround setup as well.

At this point we can also then pull the call up inside the group mutex,
and avoid having to think about whether iommu_group_store_type() could
theoretically race and free the domain if iommu_setup_dma_ops() ran just
*before* iommu_device_use_default_domain() claims it... Furthermore we
replace one .probe_finalize call completely, since the only remaining
implementations are now one which only needs to run once for the initial
boot-time probe, and two which themselves render that path unreachable.

This leaves us a big step closer to realistically being able to unpick
the variety of different things that iommu_setup_dma_ops() has been
muddling together, and further streamline iommu-dma into core API flows
in future.

Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> # For Intel IOMMU
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
v2: Shuffle around to make sure the iommu_group_do_probe_finalize() case
    is covered as well, with bonus side-effects as above.
v3: *Really* do that, remembering the other two probe_finalize sites too.
---
 arch/arm64/mm/dma-mapping.c  |  2 --
 drivers/iommu/amd/iommu.c    |  8 --------
 drivers/iommu/dma-iommu.c    | 18 ++++++------------
 drivers/iommu/dma-iommu.h    | 14 ++++++--------
 drivers/iommu/intel/iommu.c  |  7 -------
 drivers/iommu/iommu.c        | 20 +++++++-------------
 drivers/iommu/s390-iommu.c   |  6 ------
 drivers/iommu/virtio-iommu.c | 10 ----------
 include/linux/iommu.h        |  7 -------
 9 files changed, 19 insertions(+), 73 deletions(-)

Comments

Catalin Marinas April 23, 2024, 9:57 a.m. UTC | #1
On Fri, Apr 19, 2024 at 05:54:45PM +0100, Robin Murphy wrote:
> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> index 61886e43e3a1..313d8938a2f0 100644
> --- a/arch/arm64/mm/dma-mapping.c
> +++ b/arch/arm64/mm/dma-mapping.c
> @@ -58,8 +58,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
>  		   ARCH_DMA_MINALIGN, cls);
>  
>  	dev->dma_coherent = coherent;
> -	if (device_iommu_mapped(dev))
> -		iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1);
>  
>  	xen_setup_dma_ops(dev);
>  }

In case you need an ack for the arm64 changes:

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Dmitry Baryshkov April 29, 2024, 4:31 p.m. UTC | #2
On Fri, Apr 19, 2024 at 05:54:45PM +0100, Robin Murphy wrote:
> It's somewhat hard to see, but arm64's arch_setup_dma_ops() should only
> ever call iommu_setup_dma_ops() after a successful iommu_probe_device(),
> which means there should be no harm in achieving the same order of
> operations by running it off the back of iommu_probe_device() itself.
> This then puts it in line with the x86 and s390 .probe_finalize bodges,
> letting us pull it all into the main flow properly. As a bonus this lets
> us fold in and de-scope the PCI workaround setup as well.
> 
> At this point we can also then pull the call up inside the group mutex,
> and avoid having to think about whether iommu_group_store_type() could
> theoretically race and free the domain if iommu_setup_dma_ops() ran just
> *before* iommu_device_use_default_domain() claims it... Furthermore we
> replace one .probe_finalize call completely, since the only remaining
> implementations are now one which only needs to run once for the initial
> boot-time probe, and two which themselves render that path unreachable.
> 
> This leaves us a big step closer to realistically being able to unpick
> the variety of different things that iommu_setup_dma_ops() has been
> muddling together, and further streamline iommu-dma into core API flows
> in future.
> 
> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> # For Intel IOMMU
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> Tested-by: Hanjun Guo <guohanjun@huawei.com>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
> v2: Shuffle around to make sure the iommu_group_do_probe_finalize() case
>     is covered as well, with bonus side-effects as above.
> v3: *Really* do that, remembering the other two probe_finalize sites too.
> ---
>  arch/arm64/mm/dma-mapping.c  |  2 --
>  drivers/iommu/amd/iommu.c    |  8 --------
>  drivers/iommu/dma-iommu.c    | 18 ++++++------------
>  drivers/iommu/dma-iommu.h    | 14 ++++++--------
>  drivers/iommu/intel/iommu.c  |  7 -------
>  drivers/iommu/iommu.c        | 20 +++++++-------------
>  drivers/iommu/s390-iommu.c   |  6 ------
>  drivers/iommu/virtio-iommu.c | 10 ----------
>  include/linux/iommu.h        |  7 -------
>  9 files changed, 19 insertions(+), 73 deletions(-)

This patch breaks UFS on Qualcomm SC8180X Primus platform:


[    3.846856] arm-smmu 15000000.iommu: Unhandled context fault: fsr=0x402, iova=0x1032db3e0, fsynr=0x130000, cbfrsynra=0x300, cb=4
[    3.846880] ufshcd-qcom 1d84000.ufshc: ufshcd_check_errors: saved_err 0x20000 saved_uic_err 0x0
[    3.846929] host_regs: 00000000: 1587031f 00000000 00000300 00000000
[    3.846935] host_regs: 00000010: 01000000 00010217 00000000 00000000
[    3.846941] host_regs: 00000020: 00000000 00070ef5 00000000 00000000
[    3.846946] host_regs: 00000030: 0000000f 00000001 00000000 00000000
[    3.846951] host_regs: 00000040: 00000000 00000000 00000000 00000000
[    3.846956] host_regs: 00000050: 032db000 00000001 00000000 00000000
[    3.846962] host_regs: 00000060: 00000000 80000000 00000000 00000000
[    3.846967] host_regs: 00000070: 032dd000 00000001 00000000 00000000
[    3.846972] host_regs: 00000080: 00000000 00000000 00000000 00000000
[    3.846977] host_regs: 00000090: 00000016 00000000 00000000 0000000c
[    3.847074] ufshcd-qcom 1d84000.ufshc: ufshcd_err_handler started; HBA state eh_fatal; powered 1; shutting down 0; saved_err = 131072; saved_uic_err = 0; force_reset = 0
[    4.406550] ufshcd-qcom 1d84000.ufshc: ufshcd_verify_dev_init: NOP OUT failed -11
[    4.417953] ufshcd-qcom 1d84000.ufshc: ufshcd_async_scan failed: -11
Dmitry Baryshkov April 29, 2024, 9:26 p.m. UTC | #3
On Mon, 29 Apr 2024 at 19:31, Dmitry Baryshkov
<dmitry.baryshkov@linaro.org> wrote:
>
> On Fri, Apr 19, 2024 at 05:54:45PM +0100, Robin Murphy wrote:
> > It's somewhat hard to see, but arm64's arch_setup_dma_ops() should only
> > ever call iommu_setup_dma_ops() after a successful iommu_probe_device(),
> > which means there should be no harm in achieving the same order of
> > operations by running it off the back of iommu_probe_device() itself.
> > This then puts it in line with the x86 and s390 .probe_finalize bodges,
> > letting us pull it all into the main flow properly. As a bonus this lets
> > us fold in and de-scope the PCI workaround setup as well.
> >
> > At this point we can also then pull the call up inside the group mutex,
> > and avoid having to think about whether iommu_group_store_type() could
> > theoretically race and free the domain if iommu_setup_dma_ops() ran just
> > *before* iommu_device_use_default_domain() claims it... Furthermore we
> > replace one .probe_finalize call completely, since the only remaining
> > implementations are now one which only needs to run once for the initial
> > boot-time probe, and two which themselves render that path unreachable.
> >
> > This leaves us a big step closer to realistically being able to unpick
> > the variety of different things that iommu_setup_dma_ops() has been
> > muddling together, and further streamline iommu-dma into core API flows
> > in future.
> >
> > Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> # For Intel IOMMU
> > Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> > Tested-by: Hanjun Guo <guohanjun@huawei.com>
> > Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> > ---
> > v2: Shuffle around to make sure the iommu_group_do_probe_finalize() case
> >     is covered as well, with bonus side-effects as above.
> > v3: *Really* do that, remembering the other two probe_finalize sites too.
> > ---
> >  arch/arm64/mm/dma-mapping.c  |  2 --
> >  drivers/iommu/amd/iommu.c    |  8 --------
> >  drivers/iommu/dma-iommu.c    | 18 ++++++------------
> >  drivers/iommu/dma-iommu.h    | 14 ++++++--------
> >  drivers/iommu/intel/iommu.c  |  7 -------
> >  drivers/iommu/iommu.c        | 20 +++++++-------------
> >  drivers/iommu/s390-iommu.c   |  6 ------
> >  drivers/iommu/virtio-iommu.c | 10 ----------
> >  include/linux/iommu.h        |  7 -------
> >  9 files changed, 19 insertions(+), 73 deletions(-)
>
> This patch breaks UFS on Qualcomm SC8180X Primus platform:
>
>
> [    3.846856] arm-smmu 15000000.iommu: Unhandled context fault: fsr=0x402, iova=0x1032db3e0, fsynr=0x130000, cbfrsynra=0x300, cb=4
> [    3.846880] ufshcd-qcom 1d84000.ufshc: ufshcd_check_errors: saved_err 0x20000 saved_uic_err 0x0
> [    3.846929] host_regs: 00000000: 1587031f 00000000 00000300 00000000
> [    3.846935] host_regs: 00000010: 01000000 00010217 00000000 00000000
> [    3.846941] host_regs: 00000020: 00000000 00070ef5 00000000 00000000
> [    3.846946] host_regs: 00000030: 0000000f 00000001 00000000 00000000
> [    3.846951] host_regs: 00000040: 00000000 00000000 00000000 00000000
> [    3.846956] host_regs: 00000050: 032db000 00000001 00000000 00000000
> [    3.846962] host_regs: 00000060: 00000000 80000000 00000000 00000000
> [    3.846967] host_regs: 00000070: 032dd000 00000001 00000000 00000000
> [    3.846972] host_regs: 00000080: 00000000 00000000 00000000 00000000
> [    3.846977] host_regs: 00000090: 00000016 00000000 00000000 0000000c
> [    3.847074] ufshcd-qcom 1d84000.ufshc: ufshcd_err_handler started; HBA state eh_fatal; powered 1; shutting down 0; saved_err = 131072; saved_uic_err = 0; force_reset = 0
> [    4.406550] ufshcd-qcom 1d84000.ufshc: ufshcd_verify_dev_init: NOP OUT failed -11
> [    4.417953] ufshcd-qcom 1d84000.ufshc: ufshcd_async_scan failed: -11

Just to confirm: reverting f091e93306e0 ("dma-mapping: Simplify
arch_setup_dma_ops()") and b67483b3c44e ("iommu/dma: Centralise
iommu_setup_dma_ops()" fixes the issue for me. Please ping me if you'd
like me to test a fix.
Robin Murphy April 29, 2024, 10:26 p.m. UTC | #4
On 2024-04-29 5:31 pm, Dmitry Baryshkov wrote:
> On Fri, Apr 19, 2024 at 05:54:45PM +0100, Robin Murphy wrote:
>> It's somewhat hard to see, but arm64's arch_setup_dma_ops() should only
>> ever call iommu_setup_dma_ops() after a successful iommu_probe_device(),
>> which means there should be no harm in achieving the same order of
>> operations by running it off the back of iommu_probe_device() itself.
>> This then puts it in line with the x86 and s390 .probe_finalize bodges,
>> letting us pull it all into the main flow properly. As a bonus this lets
>> us fold in and de-scope the PCI workaround setup as well.
>>
>> At this point we can also then pull the call up inside the group mutex,
>> and avoid having to think about whether iommu_group_store_type() could
>> theoretically race and free the domain if iommu_setup_dma_ops() ran just
>> *before* iommu_device_use_default_domain() claims it... Furthermore we
>> replace one .probe_finalize call completely, since the only remaining
>> implementations are now one which only needs to run once for the initial
>> boot-time probe, and two which themselves render that path unreachable.
>>
>> This leaves us a big step closer to realistically being able to unpick
>> the variety of different things that iommu_setup_dma_ops() has been
>> muddling together, and further streamline iommu-dma into core API flows
>> in future.
>>
>> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> # For Intel IOMMU
>> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
>> Tested-by: Hanjun Guo <guohanjun@huawei.com>
>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>> ---
>> v2: Shuffle around to make sure the iommu_group_do_probe_finalize() case
>>      is covered as well, with bonus side-effects as above.
>> v3: *Really* do that, remembering the other two probe_finalize sites too.
>> ---
>>   arch/arm64/mm/dma-mapping.c  |  2 --
>>   drivers/iommu/amd/iommu.c    |  8 --------
>>   drivers/iommu/dma-iommu.c    | 18 ++++++------------
>>   drivers/iommu/dma-iommu.h    | 14 ++++++--------
>>   drivers/iommu/intel/iommu.c  |  7 -------
>>   drivers/iommu/iommu.c        | 20 +++++++-------------
>>   drivers/iommu/s390-iommu.c   |  6 ------
>>   drivers/iommu/virtio-iommu.c | 10 ----------
>>   include/linux/iommu.h        |  7 -------
>>   9 files changed, 19 insertions(+), 73 deletions(-)
> 
> This patch breaks UFS on Qualcomm SC8180X Primus platform:
> 
> 
> [    3.846856] arm-smmu 15000000.iommu: Unhandled context fault: fsr=0x402, iova=0x1032db3e0, fsynr=0x130000, cbfrsynra=0x300, cb=4

Hmm, a context fault implies that the device did get attached to a DMA 
domain, thus has successfully been through __iommu_probe_device(), yet 
somehow still didn't get the right DMA ops (since that "IOVA" looks more 
like a PA to me). Do you see the "Adding to IOMMU group..." message for 
this device, and/or any other relevant messages or errors before this 
point? I'm guessing there's a fair chance probe deferral might be 
involved as well. I'd like to understand what path(s) this ends up 
taking through __iommu_probe_device() and of_dma_configure(), or at 
least the number and order of probe attempts between the UFS and SMMU 
drivers.

I'll stare at the code in the morning and see if I can spot any 
overlooked ways in which what I think might be happening could happen, 
but any more info to help narrow it down would be much appreciated.

Thanks,
Robin.

> [    3.846880] ufshcd-qcom 1d84000.ufshc: ufshcd_check_errors: saved_err 0x20000 saved_uic_err 0x0
> [    3.846929] host_regs: 00000000: 1587031f 00000000 00000300 00000000
> [    3.846935] host_regs: 00000010: 01000000 00010217 00000000 00000000
> [    3.846941] host_regs: 00000020: 00000000 00070ef5 00000000 00000000
> [    3.846946] host_regs: 00000030: 0000000f 00000001 00000000 00000000
> [    3.846951] host_regs: 00000040: 00000000 00000000 00000000 00000000
> [    3.846956] host_regs: 00000050: 032db000 00000001 00000000 00000000
> [    3.846962] host_regs: 00000060: 00000000 80000000 00000000 00000000
> [    3.846967] host_regs: 00000070: 032dd000 00000001 00000000 00000000
> [    3.846972] host_regs: 00000080: 00000000 00000000 00000000 00000000
> [    3.846977] host_regs: 00000090: 00000016 00000000 00000000 0000000c
> [    3.847074] ufshcd-qcom 1d84000.ufshc: ufshcd_err_handler started; HBA state eh_fatal; powered 1; shutting down 0; saved_err = 131072; saved_uic_err = 0; force_reset = 0
> [    4.406550] ufshcd-qcom 1d84000.ufshc: ufshcd_verify_dev_init: NOP OUT failed -11
> [    4.417953] ufshcd-qcom 1d84000.ufshc: ufshcd_async_scan failed: -11
>
Dmitry Baryshkov April 30, 2024, 12:41 a.m. UTC | #5
On Tue, 30 Apr 2024 at 01:26, Robin Murphy <robin.murphy@arm.com> wrote:
>
> On 2024-04-29 5:31 pm, Dmitry Baryshkov wrote:
> > On Fri, Apr 19, 2024 at 05:54:45PM +0100, Robin Murphy wrote:
> >> It's somewhat hard to see, but arm64's arch_setup_dma_ops() should only
> >> ever call iommu_setup_dma_ops() after a successful iommu_probe_device(),
> >> which means there should be no harm in achieving the same order of
> >> operations by running it off the back of iommu_probe_device() itself.
> >> This then puts it in line with the x86 and s390 .probe_finalize bodges,
> >> letting us pull it all into the main flow properly. As a bonus this lets
> >> us fold in and de-scope the PCI workaround setup as well.
> >>
> >> At this point we can also then pull the call up inside the group mutex,
> >> and avoid having to think about whether iommu_group_store_type() could
> >> theoretically race and free the domain if iommu_setup_dma_ops() ran just
> >> *before* iommu_device_use_default_domain() claims it... Furthermore we
> >> replace one .probe_finalize call completely, since the only remaining
> >> implementations are now one which only needs to run once for the initial
> >> boot-time probe, and two which themselves render that path unreachable.
> >>
> >> This leaves us a big step closer to realistically being able to unpick
> >> the variety of different things that iommu_setup_dma_ops() has been
> >> muddling together, and further streamline iommu-dma into core API flows
> >> in future.
> >>
> >> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> # For Intel IOMMU
> >> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> >> Tested-by: Hanjun Guo <guohanjun@huawei.com>
> >> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> >> ---
> >> v2: Shuffle around to make sure the iommu_group_do_probe_finalize() case
> >>      is covered as well, with bonus side-effects as above.
> >> v3: *Really* do that, remembering the other two probe_finalize sites too.
> >> ---
> >>   arch/arm64/mm/dma-mapping.c  |  2 --
> >>   drivers/iommu/amd/iommu.c    |  8 --------
> >>   drivers/iommu/dma-iommu.c    | 18 ++++++------------
> >>   drivers/iommu/dma-iommu.h    | 14 ++++++--------
> >>   drivers/iommu/intel/iommu.c  |  7 -------
> >>   drivers/iommu/iommu.c        | 20 +++++++-------------
> >>   drivers/iommu/s390-iommu.c   |  6 ------
> >>   drivers/iommu/virtio-iommu.c | 10 ----------
> >>   include/linux/iommu.h        |  7 -------
> >>   9 files changed, 19 insertions(+), 73 deletions(-)
> >
> > This patch breaks UFS on Qualcomm SC8180X Primus platform:
> >
> >
> > [    3.846856] arm-smmu 15000000.iommu: Unhandled context fault: fsr=0x402, iova=0x1032db3e0, fsynr=0x130000, cbfrsynra=0x300, cb=4
>
> Hmm, a context fault implies that the device did get attached to a DMA
> domain, thus has successfully been through __iommu_probe_device(), yet
> somehow still didn't get the right DMA ops (since that "IOVA" looks more
> like a PA to me). Do you see the "Adding to IOMMU group..." message for
> this device, and/or any other relevant messages or errors before this
> point?

No, nothing relevant.

[    8.372395] ufshcd-qcom 1d84000.ufshc: Adding to iommu group 6

(please ignore the timestamp, it comes before ufshc being probed).

> I'm guessing there's a fair chance probe deferral might be
> involved as well. I'd like to understand what path(s) this ends up
> taking through __iommu_probe_device() and of_dma_configure(), or at
> least the number and order of probe attempts between the UFS and SMMU
> drivers.

__iommu_probe_device() gets called twice and returns early because ops is NULL.

Then finally of_dma_configure_id() is called. The following branches are taken:

np == dev->of_node
of_dma_get_range() returned 0
bus_dma_limit and dma_range_map are set
__iommu_probe_device() is called, using the `!group->default_domain &&
!group_lis` case, then group->default_domain() is not NULL,
In the end, iommu_setup_dma_ops() is called.

Then the ufshc probe defers (most likely the PHY is not present or
some other device is not there yet).

On the next (succeeding) try, of_dma_configure_id() is called again.
The call trace is more or less the same, except that
__iommu_probe_device() is not called

> I'll stare at the code in the morning and see if I can spot any
> overlooked ways in which what I think might be happening could happen,
> but any more info to help narrow it down would be much appreciated.
>
> Thanks,
> Robin.
>
> > [    3.846880] ufshcd-qcom 1d84000.ufshc: ufshcd_check_errors: saved_err 0x20000 saved_uic_err 0x0
> > [    3.846929] host_regs: 00000000: 1587031f 00000000 00000300 00000000
> > [    3.846935] host_regs: 00000010: 01000000 00010217 00000000 00000000
> > [    3.846941] host_regs: 00000020: 00000000 00070ef5 00000000 00000000
> > [    3.846946] host_regs: 00000030: 0000000f 00000001 00000000 00000000
> > [    3.846951] host_regs: 00000040: 00000000 00000000 00000000 00000000
> > [    3.846956] host_regs: 00000050: 032db000 00000001 00000000 00000000
> > [    3.846962] host_regs: 00000060: 00000000 80000000 00000000 00000000
> > [    3.846967] host_regs: 00000070: 032dd000 00000001 00000000 00000000
> > [    3.846972] host_regs: 00000080: 00000000 00000000 00000000 00000000
> > [    3.846977] host_regs: 00000090: 00000016 00000000 00000000 0000000c
> > [    3.847074] ufshcd-qcom 1d84000.ufshc: ufshcd_err_handler started; HBA state eh_fatal; powered 1; shutting down 0; saved_err = 131072; saved_uic_err = 0; force_reset = 0
> > [    4.406550] ufshcd-qcom 1d84000.ufshc: ufshcd_verify_dev_init: NOP OUT failed -11
> > [    4.417953] ufshcd-qcom 1d84000.ufshc: ufshcd_async_scan failed: -11
> >
Robin Murphy April 30, 2024, 10:20 a.m. UTC | #6
On 2024-04-30 1:41 am, Dmitry Baryshkov wrote:
> On Tue, 30 Apr 2024 at 01:26, Robin Murphy <robin.murphy@arm.com> wrote:
>>
>> On 2024-04-29 5:31 pm, Dmitry Baryshkov wrote:
>>> On Fri, Apr 19, 2024 at 05:54:45PM +0100, Robin Murphy wrote:
>>>> It's somewhat hard to see, but arm64's arch_setup_dma_ops() should only
>>>> ever call iommu_setup_dma_ops() after a successful iommu_probe_device(),
>>>> which means there should be no harm in achieving the same order of
>>>> operations by running it off the back of iommu_probe_device() itself.
>>>> This then puts it in line with the x86 and s390 .probe_finalize bodges,
>>>> letting us pull it all into the main flow properly. As a bonus this lets
>>>> us fold in and de-scope the PCI workaround setup as well.
>>>>
>>>> At this point we can also then pull the call up inside the group mutex,
>>>> and avoid having to think about whether iommu_group_store_type() could
>>>> theoretically race and free the domain if iommu_setup_dma_ops() ran just
>>>> *before* iommu_device_use_default_domain() claims it... Furthermore we
>>>> replace one .probe_finalize call completely, since the only remaining
>>>> implementations are now one which only needs to run once for the initial
>>>> boot-time probe, and two which themselves render that path unreachable.
>>>>
>>>> This leaves us a big step closer to realistically being able to unpick
>>>> the variety of different things that iommu_setup_dma_ops() has been
>>>> muddling together, and further streamline iommu-dma into core API flows
>>>> in future.
>>>>
>>>> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> # For Intel IOMMU
>>>> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
>>>> Tested-by: Hanjun Guo <guohanjun@huawei.com>
>>>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>>>> ---
>>>> v2: Shuffle around to make sure the iommu_group_do_probe_finalize() case
>>>>       is covered as well, with bonus side-effects as above.
>>>> v3: *Really* do that, remembering the other two probe_finalize sites too.
>>>> ---
>>>>    arch/arm64/mm/dma-mapping.c  |  2 --
>>>>    drivers/iommu/amd/iommu.c    |  8 --------
>>>>    drivers/iommu/dma-iommu.c    | 18 ++++++------------
>>>>    drivers/iommu/dma-iommu.h    | 14 ++++++--------
>>>>    drivers/iommu/intel/iommu.c  |  7 -------
>>>>    drivers/iommu/iommu.c        | 20 +++++++-------------
>>>>    drivers/iommu/s390-iommu.c   |  6 ------
>>>>    drivers/iommu/virtio-iommu.c | 10 ----------
>>>>    include/linux/iommu.h        |  7 -------
>>>>    9 files changed, 19 insertions(+), 73 deletions(-)
>>>
>>> This patch breaks UFS on Qualcomm SC8180X Primus platform:
>>>
>>>
>>> [    3.846856] arm-smmu 15000000.iommu: Unhandled context fault: fsr=0x402, iova=0x1032db3e0, fsynr=0x130000, cbfrsynra=0x300, cb=4
>>
>> Hmm, a context fault implies that the device did get attached to a DMA
>> domain, thus has successfully been through __iommu_probe_device(), yet
>> somehow still didn't get the right DMA ops (since that "IOVA" looks more
>> like a PA to me). Do you see the "Adding to IOMMU group..." message for
>> this device, and/or any other relevant messages or errors before this
>> point?
> 
> No, nothing relevant.
> 
> [    8.372395] ufshcd-qcom 1d84000.ufshc: Adding to iommu group 6
> 
> (please ignore the timestamp, it comes before ufshc being probed).
> 
>> I'm guessing there's a fair chance probe deferral might be
>> involved as well. I'd like to understand what path(s) this ends up
>> taking through __iommu_probe_device() and of_dma_configure(), or at
>> least the number and order of probe attempts between the UFS and SMMU
>> drivers.
> 
> __iommu_probe_device() gets called twice and returns early because ops is NULL.
> 
> Then finally of_dma_configure_id() is called. The following branches are taken:
> 
> np == dev->of_node
> of_dma_get_range() returned 0
> bus_dma_limit and dma_range_map are set
> __iommu_probe_device() is called, using the `!group->default_domain &&
> !group_lis` case, then group->default_domain() is not NULL,
> In the end, iommu_setup_dma_ops() is called.
> 
> Then the ufshc probe defers (most likely the PHY is not present or
> some other device is not there yet).

Ah good, probe deferral. And indeed the half-formed hunch from last 
night grew into a pretty definite idea by this morning... patch incoming.

Thanks,
Robin.

> On the next (succeeding) try, of_dma_configure_id() is called again.
> The call trace is more or less the same, except that
> __iommu_probe_device() is not called
> 
>> I'll stare at the code in the morning and see if I can spot any
>> overlooked ways in which what I think might be happening could happen,
>> but any more info to help narrow it down would be much appreciated.
>>
>> Thanks,
>> Robin.
>>
>>> [    3.846880] ufshcd-qcom 1d84000.ufshc: ufshcd_check_errors: saved_err 0x20000 saved_uic_err 0x0
>>> [    3.846929] host_regs: 00000000: 1587031f 00000000 00000300 00000000
>>> [    3.846935] host_regs: 00000010: 01000000 00010217 00000000 00000000
>>> [    3.846941] host_regs: 00000020: 00000000 00070ef5 00000000 00000000
>>> [    3.846946] host_regs: 00000030: 0000000f 00000001 00000000 00000000
>>> [    3.846951] host_regs: 00000040: 00000000 00000000 00000000 00000000
>>> [    3.846956] host_regs: 00000050: 032db000 00000001 00000000 00000000
>>> [    3.846962] host_regs: 00000060: 00000000 80000000 00000000 00000000
>>> [    3.846967] host_regs: 00000070: 032dd000 00000001 00000000 00000000
>>> [    3.846972] host_regs: 00000080: 00000000 00000000 00000000 00000000
>>> [    3.846977] host_regs: 00000090: 00000016 00000000 00000000 0000000c
>>> [    3.847074] ufshcd-qcom 1d84000.ufshc: ufshcd_err_handler started; HBA state eh_fatal; powered 1; shutting down 0; saved_err = 131072; saved_uic_err = 0; force_reset = 0
>>> [    4.406550] ufshcd-qcom 1d84000.ufshc: ufshcd_verify_dev_init: NOP OUT failed -11
>>> [    4.417953] ufshcd-qcom 1d84000.ufshc: ufshcd_async_scan failed: -11
>>>
> 
> 
>
Dmitry Baryshkov April 30, 2024, 10:56 a.m. UTC | #7
On Tue, 30 Apr 2024 at 13:20, Robin Murphy <robin.murphy@arm.com> wrote:
>
> On 2024-04-30 1:41 am, Dmitry Baryshkov wrote:
> > On Tue, 30 Apr 2024 at 01:26, Robin Murphy <robin.murphy@arm.com> wrote:
> >>
> >> On 2024-04-29 5:31 pm, Dmitry Baryshkov wrote:
> >>> On Fri, Apr 19, 2024 at 05:54:45PM +0100, Robin Murphy wrote:
> >>>> It's somewhat hard to see, but arm64's arch_setup_dma_ops() should only
> >>>> ever call iommu_setup_dma_ops() after a successful iommu_probe_device(),
> >>>> which means there should be no harm in achieving the same order of
> >>>> operations by running it off the back of iommu_probe_device() itself.
> >>>> This then puts it in line with the x86 and s390 .probe_finalize bodges,
> >>>> letting us pull it all into the main flow properly. As a bonus this lets
> >>>> us fold in and de-scope the PCI workaround setup as well.
> >>>>
> >>>> At this point we can also then pull the call up inside the group mutex,
> >>>> and avoid having to think about whether iommu_group_store_type() could
> >>>> theoretically race and free the domain if iommu_setup_dma_ops() ran just
> >>>> *before* iommu_device_use_default_domain() claims it... Furthermore we
> >>>> replace one .probe_finalize call completely, since the only remaining
> >>>> implementations are now one which only needs to run once for the initial
> >>>> boot-time probe, and two which themselves render that path unreachable.
> >>>>
> >>>> This leaves us a big step closer to realistically being able to unpick
> >>>> the variety of different things that iommu_setup_dma_ops() has been
> >>>> muddling together, and further streamline iommu-dma into core API flows
> >>>> in future.
> >>>>
> >>>> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> # For Intel IOMMU
> >>>> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> >>>> Tested-by: Hanjun Guo <guohanjun@huawei.com>
> >>>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> >>>> ---
> >>>> v2: Shuffle around to make sure the iommu_group_do_probe_finalize() case
> >>>>       is covered as well, with bonus side-effects as above.
> >>>> v3: *Really* do that, remembering the other two probe_finalize sites too.
> >>>> ---
> >>>>    arch/arm64/mm/dma-mapping.c  |  2 --
> >>>>    drivers/iommu/amd/iommu.c    |  8 --------
> >>>>    drivers/iommu/dma-iommu.c    | 18 ++++++------------
> >>>>    drivers/iommu/dma-iommu.h    | 14 ++++++--------
> >>>>    drivers/iommu/intel/iommu.c  |  7 -------
> >>>>    drivers/iommu/iommu.c        | 20 +++++++-------------
> >>>>    drivers/iommu/s390-iommu.c   |  6 ------
> >>>>    drivers/iommu/virtio-iommu.c | 10 ----------
> >>>>    include/linux/iommu.h        |  7 -------
> >>>>    9 files changed, 19 insertions(+), 73 deletions(-)
> >>>
> >>> This patch breaks UFS on Qualcomm SC8180X Primus platform:
> >>>
> >>>
> >>> [    3.846856] arm-smmu 15000000.iommu: Unhandled context fault: fsr=0x402, iova=0x1032db3e0, fsynr=0x130000, cbfrsynra=0x300, cb=4
> >>
> >> Hmm, a context fault implies that the device did get attached to a DMA
> >> domain, thus has successfully been through __iommu_probe_device(), yet
> >> somehow still didn't get the right DMA ops (since that "IOVA" looks more
> >> like a PA to me). Do you see the "Adding to IOMMU group..." message for
> >> this device, and/or any other relevant messages or errors before this
> >> point?
> >
> > No, nothing relevant.
> >
> > [    8.372395] ufshcd-qcom 1d84000.ufshc: Adding to iommu group 6
> >
> > (please ignore the timestamp, it comes before ufshc being probed).
> >
> >> I'm guessing there's a fair chance probe deferral might be
> >> involved as well. I'd like to understand what path(s) this ends up
> >> taking through __iommu_probe_device() and of_dma_configure(), or at
> >> least the number and order of probe attempts between the UFS and SMMU
> >> drivers.
> >
> > __iommu_probe_device() gets called twice and returns early because ops is NULL.
> >
> > Then finally of_dma_configure_id() is called. The following branches are taken:
> >
> > np == dev->of_node
> > of_dma_get_range() returned 0
> > bus_dma_limit and dma_range_map are set
> > __iommu_probe_device() is called, using the `!group->default_domain &&
> > !group_lis` case, then group->default_domain() is not NULL,
> > In the end, iommu_setup_dma_ops() is called.
> >
> > Then the ufshc probe defers (most likely the PHY is not present or
> > some other device is not there yet).
>
> Ah good, probe deferral. And indeed the half-formed hunch from last
> night grew into a pretty definite idea by this morning... patch incoming.

Thanks a lot for the quick fix!
Konrad Dybcio April 30, 2024, 12:23 p.m. UTC | #8
On 29.04.2024 11:26 PM, Dmitry Baryshkov wrote:
> On Mon, 29 Apr 2024 at 19:31, Dmitry Baryshkov
> <dmitry.baryshkov@linaro.org> wrote:
>>
>> On Fri, Apr 19, 2024 at 05:54:45PM +0100, Robin Murphy wrote:
>>> It's somewhat hard to see, but arm64's arch_setup_dma_ops() should only
>>> ever call iommu_setup_dma_ops() after a successful iommu_probe_device(),
>>> which means there should be no harm in achieving the same order of
>>> operations by running it off the back of iommu_probe_device() itself.
>>> This then puts it in line with the x86 and s390 .probe_finalize bodges,
>>> letting us pull it all into the main flow properly. As a bonus this lets
>>> us fold in and de-scope the PCI workaround setup as well.
>>>
>>> At this point we can also then pull the call up inside the group mutex,
>>> and avoid having to think about whether iommu_group_store_type() could
>>> theoretically race and free the domain if iommu_setup_dma_ops() ran just
>>> *before* iommu_device_use_default_domain() claims it... Furthermore we
>>> replace one .probe_finalize call completely, since the only remaining
>>> implementations are now one which only needs to run once for the initial
>>> boot-time probe, and two which themselves render that path unreachable.
>>>
>>> This leaves us a big step closer to realistically being able to unpick
>>> the variety of different things that iommu_setup_dma_ops() has been
>>> muddling together, and further streamline iommu-dma into core API flows
>>> in future.
>>>
>>> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> # For Intel IOMMU
>>> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
>>> Tested-by: Hanjun Guo <guohanjun@huawei.com>
>>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>>> ---
>>> v2: Shuffle around to make sure the iommu_group_do_probe_finalize() case
>>>     is covered as well, with bonus side-effects as above.
>>> v3: *Really* do that, remembering the other two probe_finalize sites too.
>>> ---
>>>  arch/arm64/mm/dma-mapping.c  |  2 --
>>>  drivers/iommu/amd/iommu.c    |  8 --------
>>>  drivers/iommu/dma-iommu.c    | 18 ++++++------------
>>>  drivers/iommu/dma-iommu.h    | 14 ++++++--------
>>>  drivers/iommu/intel/iommu.c  |  7 -------
>>>  drivers/iommu/iommu.c        | 20 +++++++-------------
>>>  drivers/iommu/s390-iommu.c   |  6 ------
>>>  drivers/iommu/virtio-iommu.c | 10 ----------
>>>  include/linux/iommu.h        |  7 -------
>>>  9 files changed, 19 insertions(+), 73 deletions(-)
>>
>> This patch breaks UFS on Qualcomm SC8180X Primus platform:
>>
>>
>> [    3.846856] arm-smmu 15000000.iommu: Unhandled context fault: fsr=0x402, iova=0x1032db3e0, fsynr=0x130000, cbfrsynra=0x300, cb=4
>> [    3.846880] ufshcd-qcom 1d84000.ufshc: ufshcd_check_errors: saved_err 0x20000 saved_uic_err 0x0
>> [    3.846929] host_regs: 00000000: 1587031f 00000000 00000300 00000000
>> [    3.846935] host_regs: 00000010: 01000000 00010217 00000000 00000000
>> [    3.846941] host_regs: 00000020: 00000000 00070ef5 00000000 00000000
>> [    3.846946] host_regs: 00000030: 0000000f 00000001 00000000 00000000
>> [    3.846951] host_regs: 00000040: 00000000 00000000 00000000 00000000
>> [    3.846956] host_regs: 00000050: 032db000 00000001 00000000 00000000
>> [    3.846962] host_regs: 00000060: 00000000 80000000 00000000 00000000
>> [    3.846967] host_regs: 00000070: 032dd000 00000001 00000000 00000000
>> [    3.846972] host_regs: 00000080: 00000000 00000000 00000000 00000000
>> [    3.846977] host_regs: 00000090: 00000016 00000000 00000000 0000000c
>> [    3.847074] ufshcd-qcom 1d84000.ufshc: ufshcd_err_handler started; HBA state eh_fatal; powered 1; shutting down 0; saved_err = 131072; saved_uic_err = 0; force_reset = 0
>> [    4.406550] ufshcd-qcom 1d84000.ufshc: ufshcd_verify_dev_init: NOP OUT failed -11
>> [    4.417953] ufshcd-qcom 1d84000.ufshc: ufshcd_async_scan failed: -11
> 
> Just to confirm: reverting f091e93306e0 ("dma-mapping: Simplify
> arch_setup_dma_ops()") and b67483b3c44e ("iommu/dma: Centralise
> iommu_setup_dma_ops()" fixes the issue for me. Please ping me if you'd
> like me to test a fix.

This also triggers a different issue (that also comes down to "ufs bad") on
another QC platform (SM8550):

[    4.282098] scsi host0: ufshcd
[    4.315970] ufshcd-qcom 1d84000.ufs: ufshcd_check_errors: saved_err 0x20000 saved_uic_err 0x0
[    4.330155] host_regs: 00000000: 3587031f 00000000 00000400 00000000
[    4.343955] host_regs: 00000010: 01000000 00010217 00000000 00000000
[    4.356027] host_regs: 00000020: 00000000 00070ef5 00000000 00000000
[    4.370136] host_regs: 00000030: 0000000f 00000003 00000000 00000000
[    4.376662] host_regs: 00000040: 00000000 00000000 00000000 00000000
[    4.383192] host_regs: 00000050: 85109000 00000008 00000000 00000000
[    4.389719] host_regs: 00000060: 00000000 80000000 00000000 00000000
[    4.396245] host_regs: 00000070: 8510a000 00000008 00000000 00000000
[    4.402773] host_regs: 00000080: 00000000 00000000 00000000 00000000
[    4.409298] host_regs: 00000090: 00000016 00000000 00000000 0000000c
[    4.415900] arm-smmu 15000000.iommu: Unhandled context fault: fsr=0x402, iova=0x8851093e0, fsynr=0x3b0001, cbfrsynra=0x60, cb=2
[    4.416135] ufshcd-qcom 1d84000.ufs: ufshcd_err_handler started; HBA state eh_fatal; powered 1; shutting down 0; saved_err = 131072; saved_uic_err = 0; force_reset = 0
[    4.951750] ufshcd-qcom 1d84000.ufs: ufshcd_verify_dev_init: NOP OUT failed -11
[    4.960644] ufshcd-qcom 1d84000.ufs: ufshcd_async_scan failed: -11

Reverting the commits Dmitry mentioned also fixes this.

Konrad
Robin Murphy April 30, 2024, 12:33 p.m. UTC | #9
On 30/04/2024 1:23 pm, Konrad Dybcio wrote:
> On 29.04.2024 11:26 PM, Dmitry Baryshkov wrote:
>> On Mon, 29 Apr 2024 at 19:31, Dmitry Baryshkov
>> <dmitry.baryshkov@linaro.org> wrote:
>>>
>>> On Fri, Apr 19, 2024 at 05:54:45PM +0100, Robin Murphy wrote:
>>>> It's somewhat hard to see, but arm64's arch_setup_dma_ops() should only
>>>> ever call iommu_setup_dma_ops() after a successful iommu_probe_device(),
>>>> which means there should be no harm in achieving the same order of
>>>> operations by running it off the back of iommu_probe_device() itself.
>>>> This then puts it in line with the x86 and s390 .probe_finalize bodges,
>>>> letting us pull it all into the main flow properly. As a bonus this lets
>>>> us fold in and de-scope the PCI workaround setup as well.
>>>>
>>>> At this point we can also then pull the call up inside the group mutex,
>>>> and avoid having to think about whether iommu_group_store_type() could
>>>> theoretically race and free the domain if iommu_setup_dma_ops() ran just
>>>> *before* iommu_device_use_default_domain() claims it... Furthermore we
>>>> replace one .probe_finalize call completely, since the only remaining
>>>> implementations are now one which only needs to run once for the initial
>>>> boot-time probe, and two which themselves render that path unreachable.
>>>>
>>>> This leaves us a big step closer to realistically being able to unpick
>>>> the variety of different things that iommu_setup_dma_ops() has been
>>>> muddling together, and further streamline iommu-dma into core API flows
>>>> in future.
>>>>
>>>> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> # For Intel IOMMU
>>>> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
>>>> Tested-by: Hanjun Guo <guohanjun@huawei.com>
>>>> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
>>>> ---
>>>> v2: Shuffle around to make sure the iommu_group_do_probe_finalize() case
>>>>      is covered as well, with bonus side-effects as above.
>>>> v3: *Really* do that, remembering the other two probe_finalize sites too.
>>>> ---
>>>>   arch/arm64/mm/dma-mapping.c  |  2 --
>>>>   drivers/iommu/amd/iommu.c    |  8 --------
>>>>   drivers/iommu/dma-iommu.c    | 18 ++++++------------
>>>>   drivers/iommu/dma-iommu.h    | 14 ++++++--------
>>>>   drivers/iommu/intel/iommu.c  |  7 -------
>>>>   drivers/iommu/iommu.c        | 20 +++++++-------------
>>>>   drivers/iommu/s390-iommu.c   |  6 ------
>>>>   drivers/iommu/virtio-iommu.c | 10 ----------
>>>>   include/linux/iommu.h        |  7 -------
>>>>   9 files changed, 19 insertions(+), 73 deletions(-)
>>>
>>> This patch breaks UFS on Qualcomm SC8180X Primus platform:
>>>
>>>
>>> [    3.846856] arm-smmu 15000000.iommu: Unhandled context fault: fsr=0x402, iova=0x1032db3e0, fsynr=0x130000, cbfrsynra=0x300, cb=4
>>> [    3.846880] ufshcd-qcom 1d84000.ufshc: ufshcd_check_errors: saved_err 0x20000 saved_uic_err 0x0
>>> [    3.846929] host_regs: 00000000: 1587031f 00000000 00000300 00000000
>>> [    3.846935] host_regs: 00000010: 01000000 00010217 00000000 00000000
>>> [    3.846941] host_regs: 00000020: 00000000 00070ef5 00000000 00000000
>>> [    3.846946] host_regs: 00000030: 0000000f 00000001 00000000 00000000
>>> [    3.846951] host_regs: 00000040: 00000000 00000000 00000000 00000000
>>> [    3.846956] host_regs: 00000050: 032db000 00000001 00000000 00000000
>>> [    3.846962] host_regs: 00000060: 00000000 80000000 00000000 00000000
>>> [    3.846967] host_regs: 00000070: 032dd000 00000001 00000000 00000000
>>> [    3.846972] host_regs: 00000080: 00000000 00000000 00000000 00000000
>>> [    3.846977] host_regs: 00000090: 00000016 00000000 00000000 0000000c
>>> [    3.847074] ufshcd-qcom 1d84000.ufshc: ufshcd_err_handler started; HBA state eh_fatal; powered 1; shutting down 0; saved_err = 131072; saved_uic_err = 0; force_reset = 0
>>> [    4.406550] ufshcd-qcom 1d84000.ufshc: ufshcd_verify_dev_init: NOP OUT failed -11
>>> [    4.417953] ufshcd-qcom 1d84000.ufshc: ufshcd_async_scan failed: -11
>>
>> Just to confirm: reverting f091e93306e0 ("dma-mapping: Simplify
>> arch_setup_dma_ops()") and b67483b3c44e ("iommu/dma: Centralise
>> iommu_setup_dma_ops()" fixes the issue for me. Please ping me if you'd
>> like me to test a fix.
> 
> This also triggers a different issue (that also comes down to "ufs bad") on
> another QC platform (SM8550):
> 
> [    4.282098] scsi host0: ufshcd
> [    4.315970] ufshcd-qcom 1d84000.ufs: ufshcd_check_errors: saved_err 0x20000 saved_uic_err 0x0
> [    4.330155] host_regs: 00000000: 3587031f 00000000 00000400 00000000
> [    4.343955] host_regs: 00000010: 01000000 00010217 00000000 00000000
> [    4.356027] host_regs: 00000020: 00000000 00070ef5 00000000 00000000
> [    4.370136] host_regs: 00000030: 0000000f 00000003 00000000 00000000
> [    4.376662] host_regs: 00000040: 00000000 00000000 00000000 00000000
> [    4.383192] host_regs: 00000050: 85109000 00000008 00000000 00000000
> [    4.389719] host_regs: 00000060: 00000000 80000000 00000000 00000000
> [    4.396245] host_regs: 00000070: 8510a000 00000008 00000000 00000000
> [    4.402773] host_regs: 00000080: 00000000 00000000 00000000 00000000
> [    4.409298] host_regs: 00000090: 00000016 00000000 00000000 0000000c
> [    4.415900] arm-smmu 15000000.iommu: Unhandled context fault: fsr=0x402, iova=0x8851093e0, fsynr=0x3b0001, cbfrsynra=0x60, cb=2
> [    4.416135] ufshcd-qcom 1d84000.ufs: ufshcd_err_handler started; HBA state eh_fatal; powered 1; shutting down 0; saved_err = 131072; saved_uic_err = 0; force_reset = 0
> [    4.951750] ufshcd-qcom 1d84000.ufs: ufshcd_verify_dev_init: NOP OUT failed -11
> [    4.960644] ufshcd-qcom 1d84000.ufs: ufshcd_async_scan failed: -11
> 
> Reverting the commits Dmitry mentioned also fixes this.

Yeah, It'll be the same thing - doesn't really matter exactly *how* the 
UFS goes wrong due to the SMMU blocking it, the issue is that the SMMU 
is erroneously blocking it in the first place due to a DMA ops mixup. 
Fix is now here:

https://lore.kernel.org/linux-iommu/d4cc20cbb0c45175e98dd76bf187e2ad6421296d.1714472573.git.robin.murphy@arm.com/

Thanks,
Robin.
Niklas Schnelle April 30, 2024, 2:39 p.m. UTC | #10
On Fri, 2024-04-19 at 17:54 +0100, Robin Murphy wrote:
> It's somewhat hard to see, but arm64's arch_setup_dma_ops() should only
> ever call iommu_setup_dma_ops() after a successful iommu_probe_device(),
> which means there should be no harm in achieving the same order of
> operations by running it off the back of iommu_probe_device() itself.
> This then puts it in line with the x86 and s390 .probe_finalize bodges,
> letting us pull it all into the main flow properly. As a bonus this lets
> us fold in and de-scope the PCI workaround setup as well.
> 
> At this point we can also then pull the call up inside the group mutex,
> and avoid having to think about whether iommu_group_store_type() could
> theoretically race and free the domain if iommu_setup_dma_ops() ran just
> *before* iommu_device_use_default_domain() claims it... Furthermore we
> replace one .probe_finalize call completely, since the only remaining
> implementations are now one which only needs to run once for the initial
> boot-time probe, and two which themselves render that path unreachable.
> 
> This leaves us a big step closer to realistically being able to unpick
> the variety of different things that iommu_setup_dma_ops() has been
> muddling together, and further streamline iommu-dma into core API flows
> in future.
> 
> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> # For Intel IOMMU
> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
> Tested-by: Hanjun Guo <guohanjun@huawei.com>
> Signed-off-by: Robin Murphy <robin.murphy@arm.com>
> ---
---8<---
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index 9a5196f523de..d8eaa7ea380b 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -695,11 +695,6 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
>  	return size;
>  }
>  
> -static void s390_iommu_probe_finalize(struct device *dev)
> -{
> -	iommu_setup_dma_ops(dev, 0, U64_MAX);
> -}
> -
>  struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
>  {
>  	if (!zdev || !zdev->s390_domain)
> @@ -785,7 +780,6 @@ static const struct iommu_ops s390_iommu_ops = {
>  	.capable = s390_iommu_capable,
>  	.domain_alloc_paging = s390_domain_alloc_paging,
>  	.probe_device = s390_iommu_probe_device,
> -	.probe_finalize = s390_iommu_probe_finalize,
>  	.release_device = s390_iommu_release_device,
>  	.device_group = generic_device_group,
>  	.pgsize_bitmap = SZ_4K,

I gave this whole series a test boot on s390 and also tried running a
KVM guest with vfio-pci pass-through. For the s390 part feel free to
add my.

Acked-by: Niklas Schnelle <schnelle@linux.ibm.com> # for s390
Tested-by: Niklas Schnelle <schnelle@linux.ibm.com> # for s390

Thanks,
Niklas
diff mbox series

Patch

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 61886e43e3a1..313d8938a2f0 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -58,8 +58,6 @@  void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 		   ARCH_DMA_MINALIGN, cls);
 
 	dev->dma_coherent = coherent;
-	if (device_iommu_mapped(dev))
-		iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1);
 
 	xen_setup_dma_ops(dev);
 }
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index d35c1b8c8e65..085abf098fa9 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2175,13 +2175,6 @@  static struct iommu_device *amd_iommu_probe_device(struct device *dev)
 	return iommu_dev;
 }
 
-static void amd_iommu_probe_finalize(struct device *dev)
-{
-	/* Domains are initialized for this device - have a look what we ended up with */
-	set_dma_ops(dev, NULL);
-	iommu_setup_dma_ops(dev, 0, U64_MAX);
-}
-
 static void amd_iommu_release_device(struct device *dev)
 {
 	struct amd_iommu *iommu;
@@ -2784,7 +2777,6 @@  const struct iommu_ops amd_iommu_ops = {
 	.domain_alloc_user = amd_iommu_domain_alloc_user,
 	.probe_device = amd_iommu_probe_device,
 	.release_device = amd_iommu_release_device,
-	.probe_finalize = amd_iommu_probe_finalize,
 	.device_group = amd_iommu_device_group,
 	.get_resv_regions = amd_iommu_get_resv_regions,
 	.is_attach_deferred = amd_iommu_is_attach_deferred,
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index f542eabaefa4..89a53c2f2cf9 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1741,25 +1741,20 @@  static const struct dma_map_ops iommu_dma_ops = {
 	.max_mapping_size       = iommu_dma_max_mapping_size,
 };
 
-/*
- * The IOMMU core code allocates the default DMA domain, which the underlying
- * IOMMU driver needs to support via the dma-iommu layer.
- */
-void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
+void iommu_setup_dma_ops(struct device *dev)
 {
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
 
-	if (!domain)
-		goto out_err;
+	if (dev_is_pci(dev))
+		dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac;
 
-	/*
-	 * The IOMMU core code allocates the default DMA domain, which the
-	 * underlying IOMMU driver needs to support via the dma-iommu layer.
-	 */
 	if (iommu_is_dma_domain(domain)) {
 		if (iommu_dma_init_domain(domain, dev))
 			goto out_err;
 		dev->dma_ops = &iommu_dma_ops;
+	} else if (dev->dma_ops == &iommu_dma_ops) {
+		/* Clean up if we've switched *from* a DMA domain */
+		dev->dma_ops = NULL;
 	}
 
 	return;
@@ -1767,7 +1762,6 @@  void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
 	 pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
 		 dev_name(dev));
 }
-EXPORT_SYMBOL_GPL(iommu_setup_dma_ops);
 
 static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
 		phys_addr_t msi_addr, struct iommu_domain *domain)
diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h
index c829f1f82a99..c12d63457c76 100644
--- a/drivers/iommu/dma-iommu.h
+++ b/drivers/iommu/dma-iommu.h
@@ -9,6 +9,8 @@ 
 
 #ifdef CONFIG_IOMMU_DMA
 
+void iommu_setup_dma_ops(struct device *dev);
+
 int iommu_get_dma_cookie(struct iommu_domain *domain);
 void iommu_put_dma_cookie(struct iommu_domain *domain);
 
@@ -17,13 +19,13 @@  int iommu_dma_init_fq(struct iommu_domain *domain);
 void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
 
 extern bool iommu_dma_forcedac;
-static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev)
-{
-	dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac;
-}
 
 #else /* CONFIG_IOMMU_DMA */
 
+static inline void iommu_setup_dma_ops(struct device *dev)
+{
+}
+
 static inline int iommu_dma_init_fq(struct iommu_domain *domain)
 {
 	return -EINVAL;
@@ -42,9 +44,5 @@  static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he
 {
 }
 
-static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev)
-{
-}
-
 #endif	/* CONFIG_IOMMU_DMA */
 #endif	/* __DMA_IOMMU_H */
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 50eb9aed47cc..b2f6d8564463 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4349,12 +4349,6 @@  static void intel_iommu_release_device(struct device *dev)
 	set_dma_ops(dev, NULL);
 }
 
-static void intel_iommu_probe_finalize(struct device *dev)
-{
-	set_dma_ops(dev, NULL);
-	iommu_setup_dma_ops(dev, 0, U64_MAX);
-}
-
 static void intel_iommu_get_resv_regions(struct device *device,
 					 struct list_head *head)
 {
@@ -4839,7 +4833,6 @@  const struct iommu_ops intel_iommu_ops = {
 	.domain_alloc		= intel_iommu_domain_alloc,
 	.domain_alloc_user	= intel_iommu_domain_alloc_user,
 	.probe_device		= intel_iommu_probe_device,
-	.probe_finalize		= intel_iommu_probe_finalize,
 	.release_device		= intel_iommu_release_device,
 	.get_resv_regions	= intel_iommu_get_resv_regions,
 	.device_group		= intel_iommu_device_group,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index a95a483def2d..f01133b906e2 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -581,10 +581,11 @@  static int __iommu_probe_device(struct device *dev, struct list_head *group_list
 		if (list_empty(&group->entry))
 			list_add_tail(&group->entry, group_list);
 	}
-	mutex_unlock(&group->mutex);
 
-	if (dev_is_pci(dev))
-		iommu_dma_set_pci_32bit_workaround(dev);
+	if (group->default_domain)
+		iommu_setup_dma_ops(dev);
+
+	mutex_unlock(&group->mutex);
 
 	return 0;
 
@@ -1828,6 +1829,8 @@  int bus_iommu_probe(const struct bus_type *bus)
 			mutex_unlock(&group->mutex);
 			return ret;
 		}
+		for_each_group_device(group, gdev)
+			iommu_setup_dma_ops(gdev->dev);
 		mutex_unlock(&group->mutex);
 
 		/*
@@ -3066,18 +3069,9 @@  static ssize_t iommu_group_store_type(struct iommu_group *group,
 	if (ret)
 		goto out_unlock;
 
-	/*
-	 * Release the mutex here because ops->probe_finalize() call-back of
-	 * some vendor IOMMU drivers calls arm_iommu_attach_device() which
-	 * in-turn might call back into IOMMU core code, where it tries to take
-	 * group->mutex, resulting in a deadlock.
-	 */
-	mutex_unlock(&group->mutex);
-
 	/* Make sure dma_ops is appropriatley set */
 	for_each_group_device(group, gdev)
-		iommu_group_do_probe_finalize(gdev->dev);
-	return count;
+		iommu_setup_dma_ops(gdev->dev);
 
 out_unlock:
 	mutex_unlock(&group->mutex);
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 9a5196f523de..d8eaa7ea380b 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -695,11 +695,6 @@  static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
 	return size;
 }
 
-static void s390_iommu_probe_finalize(struct device *dev)
-{
-	iommu_setup_dma_ops(dev, 0, U64_MAX);
-}
-
 struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
 {
 	if (!zdev || !zdev->s390_domain)
@@ -785,7 +780,6 @@  static const struct iommu_ops s390_iommu_ops = {
 	.capable = s390_iommu_capable,
 	.domain_alloc_paging = s390_domain_alloc_paging,
 	.probe_device = s390_iommu_probe_device,
-	.probe_finalize = s390_iommu_probe_finalize,
 	.release_device = s390_iommu_release_device,
 	.device_group = generic_device_group,
 	.pgsize_bitmap = SZ_4K,
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 04048f64a2c0..8e776f6c6e35 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -1025,15 +1025,6 @@  static struct iommu_device *viommu_probe_device(struct device *dev)
 	return ERR_PTR(ret);
 }
 
-static void viommu_probe_finalize(struct device *dev)
-{
-#ifndef CONFIG_ARCH_HAS_SETUP_DMA_OPS
-	/* First clear the DMA ops in case we're switching from a DMA domain */
-	set_dma_ops(dev, NULL);
-	iommu_setup_dma_ops(dev, 0, U64_MAX);
-#endif
-}
-
 static void viommu_release_device(struct device *dev)
 {
 	struct viommu_endpoint *vdev = dev_iommu_priv_get(dev);
@@ -1073,7 +1064,6 @@  static struct iommu_ops viommu_ops = {
 	.capable		= viommu_capable,
 	.domain_alloc		= viommu_domain_alloc,
 	.probe_device		= viommu_probe_device,
-	.probe_finalize		= viommu_probe_finalize,
 	.release_device		= viommu_release_device,
 	.device_group		= viommu_device_group,
 	.get_resv_regions	= viommu_get_resv_regions,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 190173906ec9..ae6e5adebbd1 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -1445,9 +1445,6 @@  static inline void iommu_debugfs_setup(void) {}
 #ifdef CONFIG_IOMMU_DMA
 #include <linux/msi.h>
 
-/* Setup call for arch DMA mapping code */
-void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit);
-
 int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
 
 int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr);
@@ -1458,10 +1455,6 @@  void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg);
 struct msi_desc;
 struct msi_msg;
 
-static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
-{
-}
-
 static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
 {
 	return -ENODEV;