diff mbox

[v3] iommu/mediatek: Move attach_device after iommu-group is ready for M4Uv1

Message ID 1516878844-23021-1-git-send-email-yong.wu@mediatek.com (mailing list archive)
State New, archived
Headers show

Commit Message

Yong Wu (吴勇) Jan. 25, 2018, 11:14 a.m. UTC
In the commit 05f80300dc8b, the iommu framework has supposed all the
iommu drivers have their owner iommu-group, it get rid of the FIXME
workarounds while the group is NULL. But the flow of Mediatek M4U gen1
looks a bit trick that it will hang at this case:

Comments

Robin Murphy Jan. 25, 2018, 12:02 p.m. UTC | #1
On 25/01/18 11:14, Yong Wu wrote:
> In the commit 05f80300dc8b, the iommu framework has supposed all the
> iommu drivers have their owner iommu-group, it get rid of the FIXME
> workarounds while the group is NULL. But the flow of Mediatek M4U gen1
> looks a bit trick that it will hang at this case:
> 
> ==========================================
> Unable to handle kernel NULL pointer dereference at virtual address 00000030
> pgd = c0004000
> [00000030] *pgd=00000000
> PC is at mutex_lock+0x28/0x54
> LR is at iommu_attach_device+0xa4/0xd4
> pc : [<c07632e8>]    lr : [<c04736fc>]    psr: 60000013
> sp : df0edbb8  ip : df0edbc8  fp : df0edbc4
> r10: c114da14  r9 : df2a3e40  r8 : 00000003
> r7 : df27a210  r6 : df2a90c4  r5 : 00000030  r4 : 00000000
> r3 : df0f8000  r2 : fffff000  r1 : df29c610  r0 : 00000030
> Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> xxx
> (mutex_lock) from [<c04736fc>] (iommu_attach_device+0xa4/0xd4)
> (iommu_attach_device) from [<c011b9dc>] (__arm_iommu_attach_device+0x28/0x90)
> (__arm_iommu_attach_device) from [<c011ba60>] (arm_iommu_attach_device+0x1c/0x30)
> (arm_iommu_attach_device) from [<c04759ac>] (mtk_iommu_add_device+0xfc/0x214)
> (mtk_iommu_add_device) from [<c0472aa4>] (add_iommu_group+0x3c/0x68)
> (add_iommu_group) from [<c047d044>] (bus_for_each_dev+0x78/0xac)
> (bus_for_each_dev) from [<c04734a4>] (bus_set_iommu+0xb0/0xec)
> (bus_set_iommu) from [<c0476310>] (mtk_iommu_probe+0x328/0x368)
> (mtk_iommu_probe) from [<c048189c>] (platform_drv_probe+0x5c/0xc0)
> (platform_drv_probe) from [<c047f510>] (driver_probe_device+0x2f4/0x4d8)
> (driver_probe_device) from [<c047f800>] (__driver_attach+0x10c/0x128)
> (__driver_attach) from [<c047d044>] (bus_for_each_dev+0x78/0xac)
> (bus_for_each_dev) from [<c047ec78>] (driver_attach+0x2c/0x30)
> (driver_attach) from [<c047e640>] (bus_add_driver+0x1e0/0x278)
> (bus_add_driver) from [<c048052c>] (driver_register+0x88/0x108)
> (driver_register) from [<c04817ec>] (__platform_driver_register+0x50/0x58)
> (__platform_driver_register) from [<c0b31380>] (m4u_init+0x24/0x28)
> (m4u_init) from [<c0101c38>] (do_one_initcall+0xf0/0x17c)
> =========================
> 
> The root cause is that the device's iommu-group is NULL while
> arm_iommu_attach_device is called. This patch prepare a new iommu-group
> for the iommu consumer devices to fix this issue.
> 
> CC: Robin Murphy <robin.murphy@arm.com>
> CC: Honghui Zhang <honghui.zhang@mediatek.com>
> Fixes: 05f80300dc8b ('iommu: Finish making iommu_group support mandatory')
> Reported-by: Ryder Lee <ryder.lee@mediatek.com>
> Signed-off-by: Yong Wu <yong.wu@mediatek.com>
> ---
> changes notes:
> v3: don't use the global variable and allocate a new iommu group before
>      arm_iommu_attach_device following Robin's suggestion.
> 
> v2: http://lists.infradead.org/pipermail/linux-mediatek/2018-January/011810.html
>     Add mtk_domain_v1=NULL in domain_free for symmetry.
> 
> v1: https://patchwork.kernel.org/patch/10176255/
> ---
>   drivers/iommu/mtk_iommu_v1.c | 49 ++++++++++++++++++++++----------------------
>   1 file changed, 25 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
> index 542930c..aca76d2 100644
> --- a/drivers/iommu/mtk_iommu_v1.c
> +++ b/drivers/iommu/mtk_iommu_v1.c
> @@ -418,20 +418,12 @@ static int mtk_iommu_create_mapping(struct device *dev,
>   		m4udev->archdata.iommu = mtk_mapping;
>   	}
>   
> -	ret = arm_iommu_attach_device(dev, mtk_mapping);
> -	if (ret)
> -		goto err_release_mapping;
> -
>   	return 0;
> -
> -err_release_mapping:
> -	arm_iommu_release_mapping(mtk_mapping);
> -	m4udev->archdata.iommu = NULL;
> -	return ret;
>   }
>   
>   static int mtk_iommu_add_device(struct device *dev)
>   {
> +	struct dma_iommu_mapping *mtk_mapping;
>   	struct of_phandle_args iommu_spec;
>   	struct of_phandle_iterator it;
>   	struct mtk_iommu_data *data;
> @@ -452,9 +444,30 @@ static int mtk_iommu_add_device(struct device *dev)
>   	if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops)
>   		return -ENODEV; /* Not a iommu client device */
>   
> +	/*
> +	 * This is a short-term bodge because the ARM DMA code doesn't
> +	 * understand multi-device groups, but we have to call into it
> +	 * successfully (and not just rely on a normal IOMMU API attach
> +	 * here) in order to set the correct DMA API ops on @dev.
> +	 */
> +	group = iommu_group_alloc();
> +	if (IS_ERR(group))
> +		return PTR_ERR(group);
> +
> +	err = iommu_group_add_device(group, dev);
> +	iommu_group_put(group);
> +	if (err)
> +		return err;
> +
>   	data = dev->iommu_fwspec->iommu_priv;
> -	iommu_device_link(&data->iommu, dev);
> +	mtk_mapping = data->dev->archdata.iommu;
> +	err = arm_iommu_attach_device(dev, mtk_mapping);
> +	if (err) {
> +		iommu_group_remove_device(dev);
> +		return err;
> +	}
>   
> +	iommu_device_link(&data->iommu, dev);
>   	group = iommu_group_get_for_dev(dev);

This call now does nothing, so you may as well remove it (and the 
subsequent iommu_group_put)...

>   	if (IS_ERR(group))
>   		return PTR_ERR(group);
> @@ -479,20 +492,8 @@ static void mtk_iommu_remove_device(struct device *dev)
>   
>   static struct iommu_group *mtk_iommu_device_group(struct device *dev)
>   {
> -	struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv;
> -
> -	if (!data)
> -		return ERR_PTR(-ENODEV);
> -
> -	/* All the client devices are in the same m4u iommu-group */
> -	if (!data->m4u_group) {
> -		data->m4u_group = iommu_group_alloc();
> -		if (IS_ERR(data->m4u_group))
> -			dev_err(dev, "Failed to allocate M4U IOMMU group\n");
> -	} else {
> -		iommu_group_ref_get(data->m4u_group);
> -	}
> -	return data->m4u_group;
> +	/* The iommu-group has always been allocated in add_device. */
> +	return NULL;

... and get rid of this and the mtk_iommu_ops.device_group callback 
entirely.

If you're moving the arm_iommu_attach_device() call anyway though, 
another clean option would be put it after iommu_group_get_for_dev() and 
switch .device_group to generic_device_group - I *think* that should be 
directly equivalent to the open-coded iommu_group_alloc/add_device.

I admit I was somewhat hoping we could confine the ARM-specific stuff to 
mtk_iommu_create_mapping() and let all the "correct" IOMMU API code stay 
in the right places even if it would strictly be unused. On reflection 
though, that's clearly a bit silly, since we can always easily bring it 
back via a revert in future.

Robin.

>   }
>   
>   static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
>
Yong Wu (吴勇) Jan. 26, 2018, 8:34 a.m. UTC | #2
On Thu, 2018-01-25 at 12:02 +0000, Robin Murphy wrote:
> On 25/01/18 11:14, Yong Wu wrote:
> > In the commit 05f80300dc8b, the iommu framework has supposed all the
> > iommu drivers have their owner iommu-group, it get rid of the FIXME
> > workarounds while the group is NULL. But the flow of Mediatek M4U gen1
> > looks a bit trick that it will hang at this case:
> > 
> > ==========================================
> > Unable to handle kernel NULL pointer dereference at virtual address 00000030
> > pgd = c0004000
> > [00000030] *pgd=00000000
> > PC is at mutex_lock+0x28/0x54
> > LR is at iommu_attach_device+0xa4/0xd4
> > pc : [<c07632e8>]    lr : [<c04736fc>]    psr: 60000013
> > sp : df0edbb8  ip : df0edbc8  fp : df0edbc4
> > r10: c114da14  r9 : df2a3e40  r8 : 00000003
> > r7 : df27a210  r6 : df2a90c4  r5 : 00000030  r4 : 00000000
> > r3 : df0f8000  r2 : fffff000  r1 : df29c610  r0 : 00000030
> > Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
> > xxx
> > (mutex_lock) from [<c04736fc>] (iommu_attach_device+0xa4/0xd4)
> > (iommu_attach_device) from [<c011b9dc>] (__arm_iommu_attach_device+0x28/0x90)
> > (__arm_iommu_attach_device) from [<c011ba60>] (arm_iommu_attach_device+0x1c/0x30)
> > (arm_iommu_attach_device) from [<c04759ac>] (mtk_iommu_add_device+0xfc/0x214)
> > (mtk_iommu_add_device) from [<c0472aa4>] (add_iommu_group+0x3c/0x68)
> > (add_iommu_group) from [<c047d044>] (bus_for_each_dev+0x78/0xac)
> > (bus_for_each_dev) from [<c04734a4>] (bus_set_iommu+0xb0/0xec)
> > (bus_set_iommu) from [<c0476310>] (mtk_iommu_probe+0x328/0x368)
> > (mtk_iommu_probe) from [<c048189c>] (platform_drv_probe+0x5c/0xc0)
> > (platform_drv_probe) from [<c047f510>] (driver_probe_device+0x2f4/0x4d8)
> > (driver_probe_device) from [<c047f800>] (__driver_attach+0x10c/0x128)
> > (__driver_attach) from [<c047d044>] (bus_for_each_dev+0x78/0xac)
> > (bus_for_each_dev) from [<c047ec78>] (driver_attach+0x2c/0x30)
> > (driver_attach) from [<c047e640>] (bus_add_driver+0x1e0/0x278)
> > (bus_add_driver) from [<c048052c>] (driver_register+0x88/0x108)
> > (driver_register) from [<c04817ec>] (__platform_driver_register+0x50/0x58)
> > (__platform_driver_register) from [<c0b31380>] (m4u_init+0x24/0x28)
> > (m4u_init) from [<c0101c38>] (do_one_initcall+0xf0/0x17c)
> > =========================
> > 
> > The root cause is that the device's iommu-group is NULL while
> > arm_iommu_attach_device is called. This patch prepare a new iommu-group
> > for the iommu consumer devices to fix this issue.
> > 
> > CC: Robin Murphy <robin.murphy@arm.com>
> > CC: Honghui Zhang <honghui.zhang@mediatek.com>
> > Fixes: 05f80300dc8b ('iommu: Finish making iommu_group support mandatory')
> > Reported-by: Ryder Lee <ryder.lee@mediatek.com>
> > Signed-off-by: Yong Wu <yong.wu@mediatek.com>
> > ---
> > changes notes:
> > v3: don't use the global variable and allocate a new iommu group before
> >      arm_iommu_attach_device following Robin's suggestion.
> > 
> > v2: http://lists.infradead.org/pipermail/linux-mediatek/2018-January/011810.html
> >     Add mtk_domain_v1=NULL in domain_free for symmetry.
> > 
> > v1: https://patchwork.kernel.org/patch/10176255/
> > ---
> >   drivers/iommu/mtk_iommu_v1.c | 49 ++++++++++++++++++++++----------------------
> >   1 file changed, 25 insertions(+), 24 deletions(-)
> > 
> > diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
> > index 542930c..aca76d2 100644
> > --- a/drivers/iommu/mtk_iommu_v1.c
> > +++ b/drivers/iommu/mtk_iommu_v1.c
> > @@ -418,20 +418,12 @@ static int mtk_iommu_create_mapping(struct device *dev,
> >   		m4udev->archdata.iommu = mtk_mapping;
> >   	}
> >   
> > -	ret = arm_iommu_attach_device(dev, mtk_mapping);
> > -	if (ret)
> > -		goto err_release_mapping;
> > -
> >   	return 0;
> > -
> > -err_release_mapping:
> > -	arm_iommu_release_mapping(mtk_mapping);
> > -	m4udev->archdata.iommu = NULL;
> > -	return ret;
> >   }
> >   
> >   static int mtk_iommu_add_device(struct device *dev)
> >   {
> > +	struct dma_iommu_mapping *mtk_mapping;
> >   	struct of_phandle_args iommu_spec;
> >   	struct of_phandle_iterator it;
> >   	struct mtk_iommu_data *data;
> > @@ -452,9 +444,30 @@ static int mtk_iommu_add_device(struct device *dev)
> >   	if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops)
> >   		return -ENODEV; /* Not a iommu client device */
> >   
> > +	/*
> > +	 * This is a short-term bodge because the ARM DMA code doesn't
> > +	 * understand multi-device groups, but we have to call into it
> > +	 * successfully (and not just rely on a normal IOMMU API attach
> > +	 * here) in order to set the correct DMA API ops on @dev.
> > +	 */
> > +	group = iommu_group_alloc();
> > +	if (IS_ERR(group))
> > +		return PTR_ERR(group);
> > +
> > +	err = iommu_group_add_device(group, dev);
> > +	iommu_group_put(group);
> > +	if (err)
> > +		return err;
> > +
> >   	data = dev->iommu_fwspec->iommu_priv;
> > -	iommu_device_link(&data->iommu, dev);
> > +	mtk_mapping = data->dev->archdata.iommu;
> > +	err = arm_iommu_attach_device(dev, mtk_mapping);
> > +	if (err) {
> > +		iommu_group_remove_device(dev);
> > +		return err;
> > +	}
> >   
> > +	iommu_device_link(&data->iommu, dev);
> >   	group = iommu_group_get_for_dev(dev);
> 
> This call now does nothing, so you may as well remove it (and the 
> subsequent iommu_group_put)...
> 
> >   	if (IS_ERR(group))
> >   		return PTR_ERR(group);
> > @@ -479,20 +492,8 @@ static void mtk_iommu_remove_device(struct device *dev)
> >   
> >   static struct iommu_group *mtk_iommu_device_group(struct device *dev)
> >   {
> > -	struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv;
> > -
> > -	if (!data)
> > -		return ERR_PTR(-ENODEV);
> > -
> > -	/* All the client devices are in the same m4u iommu-group */
> > -	if (!data->m4u_group) {
> > -		data->m4u_group = iommu_group_alloc();
> > -		if (IS_ERR(data->m4u_group))
> > -			dev_err(dev, "Failed to allocate M4U IOMMU group\n");
> > -	} else {
> > -		iommu_group_ref_get(data->m4u_group);
> > -	}
> > -	return data->m4u_group;
> > +	/* The iommu-group has always been allocated in add_device. */
> > +	return NULL;
> 
> ... and get rid of this and the mtk_iommu_ops.device_group callback 
> entirely.

OK. Thanks.

> 
> If you're moving the arm_iommu_attach_device() call anyway though, 
> another clean option would be put it after iommu_group_get_for_dev() and 
> switch .device_group to generic_device_group - I *think* that should be 
> directly equivalent to the open-coded iommu_group_alloc/add_device.

In the v1 and v2, I followed this method. then I got a problem in [1]
which always create a new domain, that is not our expectation.Thus, I
have to use a global variable to restore our special M4U domain.

> 
> I admit I was somewhat hoping we could confine the ARM-specific stuff to 
> mtk_iommu_create_mapping() and let all the "correct" IOMMU API code stay 
> in the right places even if it would strictly be unused. On reflection 
> though, that's clearly a bit silly, since we can always easily bring it 
> back via a revert in future.
> 

I remember a patch[2], It looks try to synchronize the flow of ARM/DMA
and ARM64/DMA. Maybe this will help?.

[1]
http://elixir.free-electrons.com/linux/latest/source/drivers/iommu/iommu.c#L1027
[2] https://patchwork.kernel.org/patch/8357801/

> Robin.
> 
> >   }
> >   
> >   static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)
> >
diff mbox

Patch

==========================================
Unable to handle kernel NULL pointer dereference at virtual address 00000030
pgd = c0004000
[00000030] *pgd=00000000
PC is at mutex_lock+0x28/0x54
LR is at iommu_attach_device+0xa4/0xd4
pc : [<c07632e8>]    lr : [<c04736fc>]    psr: 60000013
sp : df0edbb8  ip : df0edbc8  fp : df0edbc4
r10: c114da14  r9 : df2a3e40  r8 : 00000003
r7 : df27a210  r6 : df2a90c4  r5 : 00000030  r4 : 00000000
r3 : df0f8000  r2 : fffff000  r1 : df29c610  r0 : 00000030
Flags: nZCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
xxx
(mutex_lock) from [<c04736fc>] (iommu_attach_device+0xa4/0xd4)
(iommu_attach_device) from [<c011b9dc>] (__arm_iommu_attach_device+0x28/0x90)
(__arm_iommu_attach_device) from [<c011ba60>] (arm_iommu_attach_device+0x1c/0x30)
(arm_iommu_attach_device) from [<c04759ac>] (mtk_iommu_add_device+0xfc/0x214)
(mtk_iommu_add_device) from [<c0472aa4>] (add_iommu_group+0x3c/0x68)
(add_iommu_group) from [<c047d044>] (bus_for_each_dev+0x78/0xac)
(bus_for_each_dev) from [<c04734a4>] (bus_set_iommu+0xb0/0xec)
(bus_set_iommu) from [<c0476310>] (mtk_iommu_probe+0x328/0x368)
(mtk_iommu_probe) from [<c048189c>] (platform_drv_probe+0x5c/0xc0)
(platform_drv_probe) from [<c047f510>] (driver_probe_device+0x2f4/0x4d8)
(driver_probe_device) from [<c047f800>] (__driver_attach+0x10c/0x128)
(__driver_attach) from [<c047d044>] (bus_for_each_dev+0x78/0xac)
(bus_for_each_dev) from [<c047ec78>] (driver_attach+0x2c/0x30)
(driver_attach) from [<c047e640>] (bus_add_driver+0x1e0/0x278)
(bus_add_driver) from [<c048052c>] (driver_register+0x88/0x108)
(driver_register) from [<c04817ec>] (__platform_driver_register+0x50/0x58)
(__platform_driver_register) from [<c0b31380>] (m4u_init+0x24/0x28)
(m4u_init) from [<c0101c38>] (do_one_initcall+0xf0/0x17c)
=========================

The root cause is that the device's iommu-group is NULL while
arm_iommu_attach_device is called. This patch prepare a new iommu-group
for the iommu consumer devices to fix this issue.

CC: Robin Murphy <robin.murphy@arm.com>
CC: Honghui Zhang <honghui.zhang@mediatek.com>
Fixes: 05f80300dc8b ('iommu: Finish making iommu_group support mandatory')
Reported-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Yong Wu <yong.wu@mediatek.com>
---
changes notes:
v3: don't use the global variable and allocate a new iommu group before
    arm_iommu_attach_device following Robin's suggestion.

v2: http://lists.infradead.org/pipermail/linux-mediatek/2018-January/011810.html
   Add mtk_domain_v1=NULL in domain_free for symmetry.

v1: https://patchwork.kernel.org/patch/10176255/
---
 drivers/iommu/mtk_iommu_v1.c | 49 ++++++++++++++++++++++----------------------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 542930c..aca76d2 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -418,20 +418,12 @@  static int mtk_iommu_create_mapping(struct device *dev,
 		m4udev->archdata.iommu = mtk_mapping;
 	}
 
-	ret = arm_iommu_attach_device(dev, mtk_mapping);
-	if (ret)
-		goto err_release_mapping;
-
 	return 0;
-
-err_release_mapping:
-	arm_iommu_release_mapping(mtk_mapping);
-	m4udev->archdata.iommu = NULL;
-	return ret;
 }
 
 static int mtk_iommu_add_device(struct device *dev)
 {
+	struct dma_iommu_mapping *mtk_mapping;
 	struct of_phandle_args iommu_spec;
 	struct of_phandle_iterator it;
 	struct mtk_iommu_data *data;
@@ -452,9 +444,30 @@  static int mtk_iommu_add_device(struct device *dev)
 	if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops)
 		return -ENODEV; /* Not a iommu client device */
 
+	/*
+	 * This is a short-term bodge because the ARM DMA code doesn't
+	 * understand multi-device groups, but we have to call into it
+	 * successfully (and not just rely on a normal IOMMU API attach
+	 * here) in order to set the correct DMA API ops on @dev.
+	 */
+	group = iommu_group_alloc();
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	err = iommu_group_add_device(group, dev);
+	iommu_group_put(group);
+	if (err)
+		return err;
+
 	data = dev->iommu_fwspec->iommu_priv;
-	iommu_device_link(&data->iommu, dev);
+	mtk_mapping = data->dev->archdata.iommu;
+	err = arm_iommu_attach_device(dev, mtk_mapping);
+	if (err) {
+		iommu_group_remove_device(dev);
+		return err;
+	}
 
+	iommu_device_link(&data->iommu, dev);
 	group = iommu_group_get_for_dev(dev);
 	if (IS_ERR(group))
 		return PTR_ERR(group);
@@ -479,20 +492,8 @@  static void mtk_iommu_remove_device(struct device *dev)
 
 static struct iommu_group *mtk_iommu_device_group(struct device *dev)
 {
-	struct mtk_iommu_data *data = dev->iommu_fwspec->iommu_priv;
-
-	if (!data)
-		return ERR_PTR(-ENODEV);
-
-	/* All the client devices are in the same m4u iommu-group */
-	if (!data->m4u_group) {
-		data->m4u_group = iommu_group_alloc();
-		if (IS_ERR(data->m4u_group))
-			dev_err(dev, "Failed to allocate M4U IOMMU group\n");
-	} else {
-		iommu_group_ref_get(data->m4u_group);
-	}
-	return data->m4u_group;
+	/* The iommu-group has always been allocated in add_device. */
+	return NULL;
 }
 
 static int mtk_iommu_hw_init(const struct mtk_iommu_data *data)