diff mbox series

drm/msm: Initialize MDSS irq domain at probe time

Message ID 20211125150947.354076-1-angelogioacchino.delregno@collabora.com (mailing list archive)
State New, archived
Headers show
Series drm/msm: Initialize MDSS irq domain at probe time | expand

Commit Message

AngeloGioacchino Del Regno Nov. 25, 2021, 3:09 p.m. UTC
Since commit 8f59ee9a570c ("drm/msm/dsi: Adjust probe order"), the
DSI host gets initialized earlier, but this caused unability to probe
the entire stack of components because they all depend on interrupts
coming from the main `mdss` node (mdp5, or dpu1).

To fix this issue, also anticipate probing mdp5 or dpu1 by initializing
them at msm_pdev_probe() time: this will make sure that we add the
required interrupt controller mapping before dsi and/or other components
try to initialize, finally satisfying the dependency.

While at it, also change the allocation of msm_drm_private to use the
devm variant of kzalloc().

Fixes: 8f59ee9a570c ("drm/msm/dsi: Adjust probe order")
Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
---
 drivers/gpu/drm/msm/msm_drv.c | 81 ++++++++++++++++-------------------
 1 file changed, 38 insertions(+), 43 deletions(-)

Comments

Dmitry Baryshkov Nov. 25, 2021, 3:23 p.m. UTC | #1
On 25/11/2021 18:09, AngeloGioacchino Del Regno wrote:
> Since commit 8f59ee9a570c ("drm/msm/dsi: Adjust probe order"), the
> DSI host gets initialized earlier, but this caused unability to probe
> the entire stack of components because they all depend on interrupts
> coming from the main `mdss` node (mdp5, or dpu1).
> 
> To fix this issue, also anticipate probing mdp5 or dpu1 by initializing
> them at msm_pdev_probe() time: this will make sure that we add the
> required interrupt controller mapping before dsi and/or other components
> try to initialize, finally satisfying the dependency.
> 
> While at it, also change the allocation of msm_drm_private to use the
> devm variant of kzalloc().
> 
> Fixes: 8f59ee9a570c ("drm/msm/dsi: Adjust probe order")
> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>

> ---
>   drivers/gpu/drm/msm/msm_drv.c | 81 ++++++++++++++++-------------------
>   1 file changed, 38 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index 7936e8d498dd..790acf4993c0 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -512,45 +512,12 @@ static int msm_init_vram(struct drm_device *dev)
>   static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
>   {
>   	struct platform_device *pdev = to_platform_device(dev);
> -	struct drm_device *ddev;
> -	struct msm_drm_private *priv;
> -	struct msm_kms *kms;
> -	struct msm_mdss *mdss;
> +	struct drm_device *ddev = platform_get_drvdata(pdev);
> +	struct msm_drm_private *priv = ddev->dev_private;
> +	struct msm_kms *kms = priv->kms;
> +	struct msm_mdss *mdss = priv->mdss;
>   	int ret, i;
>   
> -	ddev = drm_dev_alloc(drv, dev);
> -	if (IS_ERR(ddev)) {
> -		DRM_DEV_ERROR(dev, "failed to allocate drm_device\n");
> -		return PTR_ERR(ddev);
> -	}
> -
> -	platform_set_drvdata(pdev, ddev);
> -
> -	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> -	if (!priv) {
> -		ret = -ENOMEM;
> -		goto err_put_drm_dev;
> -	}
> -
> -	ddev->dev_private = priv;
> -	priv->dev = ddev;
> -
> -	switch (get_mdp_ver(pdev)) {
> -	case KMS_MDP5:
> -		ret = mdp5_mdss_init(ddev);
> -		break;
> -	case KMS_DPU:
> -		ret = dpu_mdss_init(ddev);
> -		break;
> -	default:
> -		ret = 0;
> -		break;
> -	}
> -	if (ret)
> -		goto err_free_priv;
> -
> -	mdss = priv->mdss;
> -
>   	priv->wq = alloc_ordered_workqueue("msm", 0);
>   	priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD;
>   
> @@ -685,11 +652,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
>   err_destroy_mdss:
>   	if (mdss && mdss->funcs)
>   		mdss->funcs->destroy(ddev);
> -err_free_priv:
> -	kfree(priv);
> -err_put_drm_dev:
> -	drm_dev_put(ddev);
> -	platform_set_drvdata(pdev, NULL);
>   	return ret;
>   }
>   
> @@ -1382,12 +1344,42 @@ static const struct component_master_ops msm_drm_ops = {
>   static int msm_pdev_probe(struct platform_device *pdev)
>   {
>   	struct component_match *match = NULL;
> +	struct msm_drm_private *priv;
> +	struct drm_device *ddev;
>   	int ret;
>   
> +	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
> +	if (!priv)
> +		return -ENOMEM;
> +
> +	ddev = drm_dev_alloc(&msm_driver, &pdev->dev);
> +	if (IS_ERR(ddev)) {
> +		DRM_DEV_ERROR(&pdev->dev, "failed to allocate drm_device\n");
> +		return PTR_ERR(ddev);
> +	}
> +
> +	platform_set_drvdata(pdev, ddev);
> +	ddev->dev_private = priv;
> +	priv->dev = ddev;
> +
> +	switch (get_mdp_ver(pdev)) {
> +	case KMS_MDP5:
> +		ret = mdp5_mdss_init(ddev);
> +		break;
> +	case KMS_DPU:
> +		ret = dpu_mdss_init(ddev);
> +		break;
> +	default:
> +		ret = 0;
> +		break;
> +	}
> +	if (ret)
> +		goto err_put_drm_dev;
> +
>   	if (get_mdp_ver(pdev)) {
>   		ret = add_display_components(pdev, &match);
>   		if (ret)
> -			return ret;
> +			goto fail;
>   	}
>   
>   	ret = add_gpu_components(&pdev->dev, &match);
> @@ -1409,6 +1401,9 @@ static int msm_pdev_probe(struct platform_device *pdev)
>   
>   fail:
>   	of_platform_depopulate(&pdev->dev);
> +err_put_drm_dev:
> +	drm_dev_put(ddev);
> +	platform_set_drvdata(pdev, NULL);
>   	return ret;
>   }
>   
>
Dmitry Baryshkov Nov. 25, 2021, 10:39 p.m. UTC | #2
On 25/11/2021 18:09, AngeloGioacchino Del Regno wrote:
> Since commit 8f59ee9a570c ("drm/msm/dsi: Adjust probe order"), the
> DSI host gets initialized earlier, but this caused unability to probe
> the entire stack of components because they all depend on interrupts
> coming from the main `mdss` node (mdp5, or dpu1).
> 
> To fix this issue, also anticipate probing mdp5 or dpu1 by initializing
> them at msm_pdev_probe() time: this will make sure that we add the
> required interrupt controller mapping before dsi and/or other components
> try to initialize, finally satisfying the dependency.
> 
> While at it, also change the allocation of msm_drm_private to use the
> devm variant of kzalloc().
> 
> Fixes: 8f59ee9a570c ("drm/msm/dsi: Adjust probe order")
> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>

If device is not fully bound (e.g. DSI host could not bind the panel), 
this patch causes the following oops on reboot:

[   75.011942] Unable to handle kernel NULL pointer dereference at 
virtual address 0000000000000030
[   75.020974] Mem abort info:
[   75.023859]   ESR = 0x96000006
[   75.027013]   EC = 0x25: DABT (current EL), IL = 32 bits
[   75.032480]   SET = 0, FnV = 0
[   75.035627]   EA = 0, S1PTW = 0
[   75.038861]   FSC = 0x06: level 2 translation fault
[   75.043876] Data abort info:
[   75.046847]   ISV = 0, ISS = 0x00000006
[   75.050796]   CM = 0, WnR = 0
[   75.053857] user pgtable: 4k pages, 48-bit VAs, pgdp=00000001102b3000
[   75.060478] [0000000000000030] pgd=080000011035d003, 
p4d=080000011035d003, pud=080000011035f003, pmd=0000000000000000
[   75.071380] Internal error: Oops: 96000006 [#1] SMP
[   75.076388] Modules linked in:
[   75.079530] CPU: 0 PID: 1442 Comm: reboot Not tainted 
5.16.0-rc1-00046-g2207fd610cf4-dirty #185
[   75.088460] Hardware name: Qualcomm Technologies, Inc. Robotics RB5 (DT)
[   75.095345] pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS 
BTYPE=--)
[   75.102496] pc : drm_atomic_state_alloc+0x14/0x74
[   75.107335] lr : drm_atomic_helper_disable_all+0x20/0x210
[   75.112885] sp : ffff80001480bb70
[   75.116298] x29: ffff80001480bb70 x28: ffff0c8753505400 x27: 
0000000000000000
[   75.123626] x26: ffff0c874097d890 x25: ffffaa357b610e00 x24: 
0000000000000000
[   75.130954] x23: ffffaa357bdaa030 x22: ffffaa357bdfd2d8 x21: 
ffff80001480bbf8
[   75.138282] x20: ffff0c87469bd800 x19: ffff0c87469bd800 x18: 
ffffffffffffffff
[   75.145608] x17: 000000000000000e x16: 0000000000000001 x15: 
ffff80009480ba3d
[   75.152934] x14: 0000000000000004 x13: 0000000000000000 x12: 
ffff0c87452c1288
[   75.160261] x11: 0000000000000003 x10: ffff0c87452c1240 x9 : 
0000000000000001
[   75.167588] x8 : ffff80001480bc38 x7 : 0000000000000000 x6 : 
ffff0c874f63d300
[   75.174914] x5 : 0000000000000000 x4 : ffffaa357b582d30 x3 : 
0000000000000000
[   75.182240] x2 : ffff80001480bc20 x1 : 0000000000000000 x0 : 
ffff0c87469bd800
[   75.189568] Call trace:
[   75.192092]  drm_atomic_state_alloc+0x14/0x74
[   75.196571]  drm_atomic_helper_disable_all+0x20/0x210
[   75.201765]  drm_atomic_helper_shutdown+0x80/0x130
[   75.206683]  msm_pdev_shutdown+0x2c/0x40
[   75.210717]  platform_shutdown+0x28/0x40
[   75.214751]  device_shutdown+0x15c/0x450
[   75.218785]  __do_sys_reboot+0x218/0x2a0
[   75.222819]  __arm64_sys_reboot+0x28/0x34
[   75.226937]  invoke_syscall+0x48/0x114
[   75.230794]  el0_svc_common.constprop.0+0xd4/0xfc
[   75.235626]  do_el0_svc+0x28/0x90
[   75.239030]  el0_svc+0x28/0x80
[   75.242174]  el0t_64_sync_handler+0xa4/0x130
[   75.246567]  el0t_64_sync+0x1a0/0x1a4
[   75.250338] Code: a9be7bfd 910003fd a90153f3 f9418c01 (f9401821)
[   75.256599] ---[ end trace d90b41486de58d22 ]---


The following patch fixes it:

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 41c6a9f9dd34..5a92417d21d0 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -1435,7 +1435,7 @@ static void msm_pdev_shutdown(struct
         struct drm_device *drm = platform_get_drvdata(pdev);
         struct msm_drm_private *priv = drm ? drm->dev_private : NULL;

-       if (!priv || !priv->kms)
+       if (!priv || !priv->kms || !drm->mode_config.funcs)
                 return;

         drm_atomic_helper_shutdown(drm);


> ---
>   drivers/gpu/drm/msm/msm_drv.c | 81 ++++++++++++++++-------------------
>   1 file changed, 38 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index 7936e8d498dd..790acf4993c0 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -512,45 +512,12 @@ static int msm_init_vram(struct drm_device *dev)
>   static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
>   {
>   	struct platform_device *pdev = to_platform_device(dev);
> -	struct drm_device *ddev;
> -	struct msm_drm_private *priv;
> -	struct msm_kms *kms;
> -	struct msm_mdss *mdss;
> +	struct drm_device *ddev = platform_get_drvdata(pdev);
> +	struct msm_drm_private *priv = ddev->dev_private;
> +	struct msm_kms *kms = priv->kms;
> +	struct msm_mdss *mdss = priv->mdss;
>   	int ret, i;
>   
> -	ddev = drm_dev_alloc(drv, dev);
> -	if (IS_ERR(ddev)) {
> -		DRM_DEV_ERROR(dev, "failed to allocate drm_device\n");
> -		return PTR_ERR(ddev);
> -	}
> -
> -	platform_set_drvdata(pdev, ddev);
> -
> -	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> -	if (!priv) {
> -		ret = -ENOMEM;
> -		goto err_put_drm_dev;
> -	}
> -
> -	ddev->dev_private = priv;
> -	priv->dev = ddev;
> -
> -	switch (get_mdp_ver(pdev)) {
> -	case KMS_MDP5:
> -		ret = mdp5_mdss_init(ddev);
> -		break;
> -	case KMS_DPU:
> -		ret = dpu_mdss_init(ddev);
> -		break;
> -	default:
> -		ret = 0;
> -		break;
> -	}
> -	if (ret)
> -		goto err_free_priv;
> -
> -	mdss = priv->mdss;
> -
>   	priv->wq = alloc_ordered_workqueue("msm", 0);
>   	priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD;
>   
> @@ -685,11 +652,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
>   err_destroy_mdss:
>   	if (mdss && mdss->funcs)
>   		mdss->funcs->destroy(ddev);
> -err_free_priv:
> -	kfree(priv);
> -err_put_drm_dev:
> -	drm_dev_put(ddev);
> -	platform_set_drvdata(pdev, NULL);
>   	return ret;
>   }
>   
> @@ -1382,12 +1344,42 @@ static const struct component_master_ops msm_drm_ops = {
>   static int msm_pdev_probe(struct platform_device *pdev)
>   {
>   	struct component_match *match = NULL;
> +	struct msm_drm_private *priv;
> +	struct drm_device *ddev;
>   	int ret;
>   
> +	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
> +	if (!priv)
> +		return -ENOMEM;
> +
> +	ddev = drm_dev_alloc(&msm_driver, &pdev->dev);
> +	if (IS_ERR(ddev)) {
> +		DRM_DEV_ERROR(&pdev->dev, "failed to allocate drm_device\n");
> +		return PTR_ERR(ddev);
> +	}
> +
> +	platform_set_drvdata(pdev, ddev);
> +	ddev->dev_private = priv;
> +	priv->dev = ddev;
> +
> +	switch (get_mdp_ver(pdev)) {
> +	case KMS_MDP5:
> +		ret = mdp5_mdss_init(ddev);
> +		break;
> +	case KMS_DPU:
> +		ret = dpu_mdss_init(ddev);
> +		break;
> +	default:
> +		ret = 0;
> +		break;
> +	}
> +	if (ret)
> +		goto err_put_drm_dev;
> +
>   	if (get_mdp_ver(pdev)) {
>   		ret = add_display_components(pdev, &match);
>   		if (ret)
> -			return ret;
> +			goto fail;
>   	}
>   
>   	ret = add_gpu_components(&pdev->dev, &match);
> @@ -1409,6 +1401,9 @@ static int msm_pdev_probe(struct platform_device *pdev)
>   
>   fail:
>   	of_platform_depopulate(&pdev->dev);
> +err_put_drm_dev:
> +	drm_dev_put(ddev);
> +	platform_set_drvdata(pdev, NULL);
>   	return ret;
>   }
>   
>
Dmitry Baryshkov Nov. 26, 2021, 12:06 a.m. UTC | #3
On 25/11/2021 18:09, AngeloGioacchino Del Regno wrote:
> Since commit 8f59ee9a570c ("drm/msm/dsi: Adjust probe order"), the
> DSI host gets initialized earlier, but this caused unability to probe
> the entire stack of components because they all depend on interrupts
> coming from the main `mdss` node (mdp5, or dpu1).
> 
> To fix this issue, also anticipate probing mdp5 or dpu1 by initializing
> them at msm_pdev_probe() time: this will make sure that we add the
> required interrupt controller mapping before dsi and/or other components
> try to initialize, finally satisfying the dependency.
> 
> While at it, also change the allocation of msm_drm_private to use the
> devm variant of kzalloc().
> 
> Fixes: 8f59ee9a570c ("drm/msm/dsi: Adjust probe order")
> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>

Another issue (or a pack of issues):
Now the msm_drm_init() is unbalanced with msm_drm_uninit(). Bits of code 
(putting the drm dev, removing the IRQ domain, etc) have to be called 
now from the msm_pdev_remove() function rather than from the unbind path.

The following changes fix the observed issues here, however additional 
care should be taken.

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 5a92417d21d0..0abb16256b61 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -342,7 +342,6 @@ static int msm_drm_uninit(struct device *dev)
         struct drm_device *ddev = platform_get_drvdata(pdev);
         struct msm_drm_private *priv = ddev->dev_private;
         struct msm_kms *kms = priv->kms;
-       struct msm_mdss *mdss = priv->mdss;
         int i;

         /*
@@ -402,14 +401,7 @@ static int msm_drm_uninit(struct device *dev)

         component_unbind_all(dev, ddev);

-       if (mdss && mdss->funcs)
-               mdss->funcs->destroy(ddev);
-
-       ddev->dev_private = NULL;
-       drm_dev_put(ddev);
-
         destroy_workqueue(priv->wq);
-       kfree(priv);

         return 0;
  }
@@ -515,7 +507,6 @@ static int msm_drm_init(struct device *dev, const
         struct drm_device *ddev = platform_get_drvdata(pdev);
         struct msm_drm_private *priv = ddev->dev_private;
         struct msm_kms *kms = priv->kms;
-       struct msm_mdss *mdss = priv->mdss;
         int ret, i;

         priv->wq = alloc_ordered_workqueue("msm", 0);
@@ -538,12 +529,12 @@ static int msm_drm_init(struct device *dev, const

         ret = msm_init_vram(ddev);
         if (ret)
-               goto err_destroy_mdss;
+               return ret;

         /* Bind all our sub-components: */
         ret = component_bind_all(dev, ddev);
         if (ret)
-               goto err_destroy_mdss;
+               return ret;

         dma_set_max_seg_size(dev, UINT_MAX);

@@ -649,10 +640,6 @@ static int msm_drm_init(struct device *dev, const
  err_msm_uninit:
         msm_drm_uninit(dev);
         return ret;
-err_destroy_mdss:
-       if (mdss && mdss->funcs)
-               mdss->funcs->destroy(ddev);
-       return ret;
  }

  /*
@@ -1424,9 +1411,20 @@ static int msm_pdev_probe(struct platform_device

  static int msm_pdev_remove(struct platform_device *pdev)
  {
+       struct drm_device *ddev = platform_get_drvdata(pdev);
+       struct msm_drm_private *priv = ddev->dev_private;
+       struct msm_mdss *mdss = priv->mdss;
+
         component_master_del(&pdev->dev, &msm_drm_ops);
+
         of_platform_depopulate(&pdev->dev);

+       if (mdss && mdss->funcs)
+               mdss->funcs->destroy(ddev);
+
+       ddev->dev_private = NULL;
+       drm_dev_put(ddev);
+
         return 0;
  }



> ---
>   drivers/gpu/drm/msm/msm_drv.c | 81 ++++++++++++++++-------------------
>   1 file changed, 38 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index 7936e8d498dd..790acf4993c0 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -512,45 +512,12 @@ static int msm_init_vram(struct drm_device *dev)
>   static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
>   {
>   	struct platform_device *pdev = to_platform_device(dev);
> -	struct drm_device *ddev;
> -	struct msm_drm_private *priv;
> -	struct msm_kms *kms;
> -	struct msm_mdss *mdss;
> +	struct drm_device *ddev = platform_get_drvdata(pdev);
> +	struct msm_drm_private *priv = ddev->dev_private;
> +	struct msm_kms *kms = priv->kms;
> +	struct msm_mdss *mdss = priv->mdss;
>   	int ret, i;
>   
> -	ddev = drm_dev_alloc(drv, dev);
> -	if (IS_ERR(ddev)) {
> -		DRM_DEV_ERROR(dev, "failed to allocate drm_device\n");
> -		return PTR_ERR(ddev);
> -	}
> -
> -	platform_set_drvdata(pdev, ddev);
> -
> -	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> -	if (!priv) {
> -		ret = -ENOMEM;
> -		goto err_put_drm_dev;
> -	}
> -
> -	ddev->dev_private = priv;
> -	priv->dev = ddev;
> -
> -	switch (get_mdp_ver(pdev)) {
> -	case KMS_MDP5:
> -		ret = mdp5_mdss_init(ddev);
> -		break;
> -	case KMS_DPU:
> -		ret = dpu_mdss_init(ddev);
> -		break;
> -	default:
> -		ret = 0;
> -		break;
> -	}
> -	if (ret)
> -		goto err_free_priv;
> -
> -	mdss = priv->mdss;
> -
>   	priv->wq = alloc_ordered_workqueue("msm", 0);
>   	priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD;
>   
> @@ -685,11 +652,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
>   err_destroy_mdss:
>   	if (mdss && mdss->funcs)
>   		mdss->funcs->destroy(ddev);
> -err_free_priv:
> -	kfree(priv);
> -err_put_drm_dev:
> -	drm_dev_put(ddev);
> -	platform_set_drvdata(pdev, NULL);
>   	return ret;
>   }
>   
> @@ -1382,12 +1344,42 @@ static const struct component_master_ops msm_drm_ops = {
>   static int msm_pdev_probe(struct platform_device *pdev)
>   {
>   	struct component_match *match = NULL;
> +	struct msm_drm_private *priv;
> +	struct drm_device *ddev;
>   	int ret;
>   
> +	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
> +	if (!priv)
> +		return -ENOMEM;
> +
> +	ddev = drm_dev_alloc(&msm_driver, &pdev->dev);
> +	if (IS_ERR(ddev)) {
> +		DRM_DEV_ERROR(&pdev->dev, "failed to allocate drm_device\n");
> +		return PTR_ERR(ddev);
> +	}
> +
> +	platform_set_drvdata(pdev, ddev);
> +	ddev->dev_private = priv;
> +	priv->dev = ddev;
> +
> +	switch (get_mdp_ver(pdev)) {
> +	case KMS_MDP5:
> +		ret = mdp5_mdss_init(ddev);
> +		break;
> +	case KMS_DPU:
> +		ret = dpu_mdss_init(ddev);
> +		break;
> +	default:
> +		ret = 0;
> +		break;
> +	}
> +	if (ret)
> +		goto err_put_drm_dev;
> +
>   	if (get_mdp_ver(pdev)) {
>   		ret = add_display_components(pdev, &match);
>   		if (ret)
> -			return ret;
> +			goto fail;
>   	}
>   
>   	ret = add_gpu_components(&pdev->dev, &match);
> @@ -1409,6 +1401,9 @@ static int msm_pdev_probe(struct platform_device *pdev)
>   
>   fail:
>   	of_platform_depopulate(&pdev->dev);
> +err_put_drm_dev:
> +	drm_dev_put(ddev);
> +	platform_set_drvdata(pdev, NULL);
>   	return ret;
>   }
>   
>
AngeloGioacchino Del Regno Nov. 26, 2021, 9:26 a.m. UTC | #4
Il 26/11/21 01:06, Dmitry Baryshkov ha scritto:
> On 25/11/2021 18:09, AngeloGioacchino Del Regno wrote:
>> Since commit 8f59ee9a570c ("drm/msm/dsi: Adjust probe order"), the
>> DSI host gets initialized earlier, but this caused unability to probe
>> the entire stack of components because they all depend on interrupts
>> coming from the main `mdss` node (mdp5, or dpu1).
>>
>> To fix this issue, also anticipate probing mdp5 or dpu1 by initializing
>> them at msm_pdev_probe() time: this will make sure that we add the
>> required interrupt controller mapping before dsi and/or other components
>> try to initialize, finally satisfying the dependency.
>>
>> While at it, also change the allocation of msm_drm_private to use the
>> devm variant of kzalloc().
>>
>> Fixes: 8f59ee9a570c ("drm/msm/dsi: Adjust probe order")
>> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> 
> Another issue (or a pack of issues):
> Now the msm_drm_init() is unbalanced with msm_drm_uninit(). Bits of code (putting 
> the drm dev, removing the IRQ domain, etc) have to be called now from the 
> msm_pdev_remove() function rather than from the unbind path.
> 
> The following changes fix the observed issues here, however additional care should 
> be taken.
> 


Hello Dmitry,

thanks for the thorough review (and solutions!).
Are you going to push your changes on top, or should I send a V2?

Cheers,
- Angelo
AngeloGioacchino Del Regno Nov. 26, 2021, 4:08 p.m. UTC | #5
Il 26/11/21 01:06, Dmitry Baryshkov ha scritto:
> On 25/11/2021 18:09, AngeloGioacchino Del Regno wrote:
>> Since commit 8f59ee9a570c ("drm/msm/dsi: Adjust probe order"), the
>> DSI host gets initialized earlier, but this caused unability to probe
>> the entire stack of components because they all depend on interrupts
>> coming from the main `mdss` node (mdp5, or dpu1).
>>
>> To fix this issue, also anticipate probing mdp5 or dpu1 by initializing
>> them at msm_pdev_probe() time: this will make sure that we add the
>> required interrupt controller mapping before dsi and/or other components
>> try to initialize, finally satisfying the dependency.
>>
>> While at it, also change the allocation of msm_drm_private to use the
>> devm variant of kzalloc().
>>
>> Fixes: 8f59ee9a570c ("drm/msm/dsi: Adjust probe order")
>> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> 
> Another issue (or a pack of issues):
> Now the msm_drm_init() is unbalanced with msm_drm_uninit(). Bits of code (putting 
> the drm dev, removing the IRQ domain, etc) have to be called now from the 
> msm_pdev_remove() function rather than from the unbind path.
> 
> The following changes fix the observed issues here, however additional care should 
> be taken.
> 
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index 5a92417d21d0..0abb16256b61 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -342,7 +342,6 @@ static int msm_drm_uninit(struct device *dev)
>          struct drm_device *ddev = platform_get_drvdata(pdev);
>          struct msm_drm_private *priv = ddev->dev_private;
>          struct msm_kms *kms = priv->kms;
> -       struct msm_mdss *mdss = priv->mdss;
>          int i;
> 
>          /*
> @@ -402,14 +401,7 @@ static int msm_drm_uninit(struct device *dev)
> 
>          component_unbind_all(dev, ddev);
> 
> -       if (mdss && mdss->funcs)
> -               mdss->funcs->destroy(ddev);
> -
> -       ddev->dev_private = NULL;
> -       drm_dev_put(ddev);
> -
>          destroy_workqueue(priv->wq);
> -       kfree(priv);
> 
>          return 0;
>   }
> @@ -515,7 +507,6 @@ static int msm_drm_init(struct device *dev, const
>          struct drm_device *ddev = platform_get_drvdata(pdev);
>          struct msm_drm_private *priv = ddev->dev_private;
>          struct msm_kms *kms = priv->kms;
> -       struct msm_mdss *mdss = priv->mdss;
>          int ret, i;
> 
>          priv->wq = alloc_ordered_workqueue("msm", 0);
> @@ -538,12 +529,12 @@ static int msm_drm_init(struct device *dev, const
> 
>          ret = msm_init_vram(ddev);
>          if (ret)
> -               goto err_destroy_mdss;
> +               return ret;
> 
>          /* Bind all our sub-components: */
>          ret = component_bind_all(dev, ddev);
>          if (ret)
> -               goto err_destroy_mdss;
> +               return ret;
> 
>          dma_set_max_seg_size(dev, UINT_MAX);
> 
> @@ -649,10 +640,6 @@ static int msm_drm_init(struct device *dev, const
>   err_msm_uninit:
>          msm_drm_uninit(dev);
>          return ret;
> -err_destroy_mdss:
> -       if (mdss && mdss->funcs)
> -               mdss->funcs->destroy(ddev);
> -       return ret;
>   }
> 
>   /*
> @@ -1424,9 +1411,20 @@ static int msm_pdev_probe(struct platform_device
> 
>   static int msm_pdev_remove(struct platform_device *pdev)
>   {
> +       struct drm_device *ddev = platform_get_drvdata(pdev);
> +       struct msm_drm_private *priv = ddev->dev_private;
> +       struct msm_mdss *mdss = priv->mdss;
> +
>          component_master_del(&pdev->dev, &msm_drm_ops);
> +
>          of_platform_depopulate(&pdev->dev);
> 
> +       if (mdss && mdss->funcs)
> +               mdss->funcs->destroy(ddev);
> +
> +       ddev->dev_private = NULL;
> +       drm_dev_put(ddev);
> +
>          return 0;
>   }
> 
> 
> 

Hello,
I had a chance to get back to this patch... and there's a bit more to do...

Applying your suggestion makes the kernel crash when removing the DSI panel:

[   92.084668] Unable to handle kernel paging request at virtual address 
ffffdd7f137945d8

[   92.092848] Mem abort info:

[   92.095758]   ESR = 0x96000007

[   92.098918]   EC = 0x25: DABT (current EL), IL = 32 bits

[   92.104395]   SET = 0, FnV = 0

[   92.107545]   EA = 0, S1PTW = 0

[   92.110785]   FSC = 0x07: level 3 translation fault

[   92.115802] Data abort info:

[   92.118767]   ISV = 0, ISS = 0x00000007

[   92.122720]   CM = 0, WnR = 0

[   92.125770] swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000082466000

[   92.132668] [ffffdd7f137945d8] pgd=100000017ffff003, p4d=100000017ffff003, 
pud=10000001034bb003, pmd=1000000108ec2003, pte=0000000000000000

[   92.145530] Internal error: Oops: 96000007 [#1] SMP

[   92.150557] Modules linked in: af_alg ipv6 uvcvideo videobuf2_vmalloc 
snd_soc_hdmi_codec venus_enc venus_dec videobuf2_dma_contig videobuf2_memops 
cdc_ether usbnet hci_uart ath10k_snoc msm venus_core r8152 ath10k_core btqca btbcm 
v4l2_mem2mem ti_sn65dsi86(-) ath gpu_sched videobuf2_v4l2 sx9310 cros_ec_typec 
drm_dp_aux_bus bluetooth mac80211 snd_soc_rt5682_i2c qrtr typec drm_kms_helper 
sbs_battery snd_soc_rt5682 videobuf2_common cros_usbpd_charger cros_usbpd_logger 
cros_ec_chardev elan_i2c pwm_cros_ec industrialio_triggered_buffer qcom_q6v5_mss 
videodev snd_soc_rl6231 kfifo_buf qcom_spmi_adc5 drm snd_soc_sc7180 
qcom_vadc_common qcom_pil_info qcom_spmi_temp_alarm ecdh_generic crct10dif_ce 
libarc4 mc snd_soc_qcom_common ecc qcom_stats qcom_q6v5 ipa cfg80211 
snd_soc_lpass_sc7180 i2c_qcom_geni reset_qcom_pdc qcom_sysmon dispcc_sc7180 
spi_geni_qcom snd_soc_lpass_hdmi videocc_sc7180 qcom_common qcom_glink_smem 
snd_soc_lpass_cpu spi_qcom_qspi lpasscorecc_sc7180 qmi_helpers gpucc_sc7180

[   92.150710]  snd_soc_lpass_platform icc_osm_l3 mdt_loader qcom_wdt 
snd_soc_max98357a socinfo rmtfs_mem pwm_bl rfkill uinput btrfs blake2b_generic 
libcrc32c xor xor_neon raid6_pq zstd_compress cuse fuse [last unloaded: panel_edp]

[   92.239499] CPU: 2 PID: 1627 Comm: rmmod Not tainted 5.16.0-rc2-next-20211125+ #29

[   92.239508] Hardware name: Google Lazor Limozeen without Touchscreen (rev5 - 
rev8) (DT)

[   92.239512] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)

[   92.239518] pc : drm_panel_disable+0x80/0xe0 [drm]

[   92.268628] rfkill: input handler enabled

[   92.276336] lr : drm_panel_disable+0x6c/0xe0 [drm]

[   92.297469] sp : ffff80000c7c38a0

[   92.297474] x29: ffff80000c7c38a0 x28: ffffdd7f136e6e68 x27: ffff280ed1801400

[   92.308207] x26: ffff280ec46c5080 x25: ffff280ec46b3880 x24: ffffdd7f1341e6f8

[   92.315534] x23: 0000000000000038 x22: ffff280ec46c50d8 x21: ffff280ec0a28e00

[   92.322858] x20: 0000000000000000 x19: ffff280ec3de7c80 x18: 0000000000000020

[   92.330191] x17: 0000000000000000 x16: ffffdd7f4b7acec0 x15: 0000000000000000

[   92.337531] x14: 0000000000000000 x13: 0000000000000000 x12: 0000000000000000

[   92.344859] x11: 0000000000000000 x10: 0000000000000000 x9 : ffffdd7f13335000

[   92.352182] x8 : ffff280f03fed280 x7 : 0000000000000001 x6 : ffff280edf1c2600

[   92.359504] x5 : 0000000000000002 x4 : ffff280f03fed280 x3 : ffff280ec4689820

[   92.366835] x2 : 0000000000000000 x1 : ffffdd7f137945c8 x0 : 0000000000000000

[   92.374159] Call trace:

[   92.376682]  drm_panel_disable+0x80/0xe0 [drm]

[   92.381367]  panel_bridge_disable+0x18/0x2c [drm_kms_helper]

[   92.387281]  drm_atomic_bridge_chain_disable+0x98/0xd0 [drm]

[   92.393194]  disable_outputs+0xfc/0x31c [drm_kms_helper]

[   92.398738]  drm_atomic_helper_commit_modeset_disables+0x20/0x50 [drm_kms_helper]

[   92.406482]  msm_atomic_commit_tail+0x188/0x500 [msm]

[   92.411772]  commit_tail+0xa4/0x184 [drm_kms_helper]

[   92.416954]  drm_atomic_helper_commit+0x164/0x3fc [drm_kms_helper]

[   92.423373]  drm_atomic_commit+0x50/0x60 [drm]

[   92.428064]  drm_atomic_helper_disable_all+0x1f8/0x20c [drm_kms_helper]

[   92.434989]  drm_atomic_helper_shutdown+0x80/0x130 [drm_kms_helper]

[   92.441497]  msm_drm_uninit.isra.0+0x14c/0x174 [msm]

[   92.446729]  msm_drm_unbind+0x14/0x20 [msm]

[   92.451125]  component_del+0xa8/0x160

[   92.454898]  dsi_dev_detach+0x24/0x30 [msm]

[   92.459294]  dsi_host_detach+0x20/0x64 [msm]

[   92.463764]  devm_mipi_dsi_detach+0x2c/0x40

[   92.468069]  devm_action_release+0x18/0x24

[   92.472278]  devres_release_group+0x100/0x1b0

[   92.476755]  i2c_device_remove+0x48/0xf0

[   92.480790]  __device_release_driver+0x188/0x23c

[   92.485534]  driver_detach+0xfc/0x1e0

[   92.489303]  bus_remove_driver+0x5c/0xd0

[   92.493333]  driver_unregister+0x34/0x64

[   92.497363]  i2c_del_driver+0x58/0x70

[   92.501134]  ti_sn65dsi86_exit+0x44/0x98c [ti_sn65dsi86]

[   92.506611]  __arm64_sys_delete_module+0x198/0x22c

[   92.511535]  invoke_syscall+0x48/0x114

[   92.515389]  el0_svc_common.constprop.0+0x44/0xec

[   92.520223]  do_el0_svc+0x28/0x90

[   92.523629]  el0_svc+0x20/0x60

[   92.526780]  el0t_64_sync_handler+0xec/0xf0

[   92.531085]  el0t_64_sync+0x1a0/0x1a4

[   92.534879] Code: f94013f5 52800000 f9400a61 b40000a1 (f9400821)

[   92.541134] ---[ end trace 1bc553757c40a199 ]---



I'll look into this.
In the meanwhile, if anyone has any suggestion before I solve this issue,
as to speed up getting this fix done (as it's pretty much critical), you're
welcome.
Dmitry Baryshkov Nov. 26, 2021, 9:12 p.m. UTC | #6
On 26/11/2021 12:26, AngeloGioacchino Del Regno wrote:
> Il 26/11/21 01:06, Dmitry Baryshkov ha scritto:
>> On 25/11/2021 18:09, AngeloGioacchino Del Regno wrote:
>>> Since commit 8f59ee9a570c ("drm/msm/dsi: Adjust probe order"), the
>>> DSI host gets initialized earlier, but this caused unability to probe
>>> the entire stack of components because they all depend on interrupts
>>> coming from the main `mdss` node (mdp5, or dpu1).
>>>
>>> To fix this issue, also anticipate probing mdp5 or dpu1 by initializing
>>> them at msm_pdev_probe() time: this will make sure that we add the
>>> required interrupt controller mapping before dsi and/or other components
>>> try to initialize, finally satisfying the dependency.
>>>
>>> While at it, also change the allocation of msm_drm_private to use the
>>> devm variant of kzalloc().
>>>
>>> Fixes: 8f59ee9a570c ("drm/msm/dsi: Adjust probe order")
>>> Signed-off-by: AngeloGioacchino Del Regno 
>>> <angelogioacchino.delregno@collabora.com>
>>
>> Another issue (or a pack of issues):
>> Now the msm_drm_init() is unbalanced with msm_drm_uninit(). Bits of 
>> code (putting the drm dev, removing the IRQ domain, etc) have to be 
>> called now from the msm_pdev_remove() function rather than from the 
>> unbind path.
>>
>> The following changes fix the observed issues here, however additional 
>> care should be taken.
>>
> 
> 
> Hello Dmitry,
> 
> thanks for the thorough review (and solutions!).
> Are you going to push your changes on top, or should I send a V2?

Please send a v2. As you see, my suggestions have to be validated too 
(and they were based on crashes/issues observed locally).
Dmitry Baryshkov Nov. 29, 2021, 2:20 a.m. UTC | #7
Hi,

On 25/11/2021 18:09, AngeloGioacchino Del Regno wrote:
> Since commit 8f59ee9a570c ("drm/msm/dsi: Adjust probe order"), the
> DSI host gets initialized earlier, but this caused unability to probe
> the entire stack of components because they all depend on interrupts
> coming from the main `mdss` node (mdp5, or dpu1).
> 
> To fix this issue, also anticipate probing mdp5 or dpu1 by initializing
> them at msm_pdev_probe() time: this will make sure that we add the
> required interrupt controller mapping before dsi and/or other components
> try to initialize, finally satisfying the dependency.
> 
> While at it, also change the allocation of msm_drm_private to use the
> devm variant of kzalloc().
> 
> Fixes: 8f59ee9a570c ("drm/msm/dsi: Adjust probe order")
> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>

I have been thinking about this. I do not feel that this is the correct 
approach. Currently DRM device exists only when all components are 
bound. If any of the subdevices is removed, corresponding component is 
delteted (and thus all components are unbound), the DRM device is taken 
down. This results in the state cleanup, userspace notifications, etc.

With your changes, DRM device will continue to exist even after one of 
subdevices is removed. This is not an expected behaviour, since 
subdrivers do not perform full cleanup, delegating that to DRM device 
takedown.

I suppose that proper solution would be to split msm_drv.c into into:
- generic components & drm code to be called from mdp4/mdp5/dpu driver 
(making mdp4, mdp5 or dpu1 the components master)

- bare mdss driver, taking care only about IRQs, OF devices population - 
calling proper mdss_init/mdss_destroy functions. Most probably we can 
drop this part altogether and just make md5_mdss.c/dpu_mdss.c proper 
platform drivers.

> ---
>   drivers/gpu/drm/msm/msm_drv.c | 81 ++++++++++++++++-------------------
>   1 file changed, 38 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
> index 7936e8d498dd..790acf4993c0 100644
> --- a/drivers/gpu/drm/msm/msm_drv.c
> +++ b/drivers/gpu/drm/msm/msm_drv.c
> @@ -512,45 +512,12 @@ static int msm_init_vram(struct drm_device *dev)
>   static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
>   {
>   	struct platform_device *pdev = to_platform_device(dev);
> -	struct drm_device *ddev;
> -	struct msm_drm_private *priv;
> -	struct msm_kms *kms;
> -	struct msm_mdss *mdss;
> +	struct drm_device *ddev = platform_get_drvdata(pdev);
> +	struct msm_drm_private *priv = ddev->dev_private;
> +	struct msm_kms *kms = priv->kms;
> +	struct msm_mdss *mdss = priv->mdss;
>   	int ret, i;
>   
> -	ddev = drm_dev_alloc(drv, dev);
> -	if (IS_ERR(ddev)) {
> -		DRM_DEV_ERROR(dev, "failed to allocate drm_device\n");
> -		return PTR_ERR(ddev);
> -	}
> -
> -	platform_set_drvdata(pdev, ddev);
> -
> -	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> -	if (!priv) {
> -		ret = -ENOMEM;
> -		goto err_put_drm_dev;
> -	}
> -
> -	ddev->dev_private = priv;
> -	priv->dev = ddev;
> -
> -	switch (get_mdp_ver(pdev)) {
> -	case KMS_MDP5:
> -		ret = mdp5_mdss_init(ddev);
> -		break;
> -	case KMS_DPU:
> -		ret = dpu_mdss_init(ddev);
> -		break;
> -	default:
> -		ret = 0;
> -		break;
> -	}
> -	if (ret)
> -		goto err_free_priv;
> -
> -	mdss = priv->mdss;
> -
>   	priv->wq = alloc_ordered_workqueue("msm", 0);
>   	priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD;
>   
> @@ -685,11 +652,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
>   err_destroy_mdss:
>   	if (mdss && mdss->funcs)
>   		mdss->funcs->destroy(ddev);
> -err_free_priv:
> -	kfree(priv);
> -err_put_drm_dev:
> -	drm_dev_put(ddev);
> -	platform_set_drvdata(pdev, NULL);
>   	return ret;
>   }
>   
> @@ -1382,12 +1344,42 @@ static const struct component_master_ops msm_drm_ops = {
>   static int msm_pdev_probe(struct platform_device *pdev)
>   {
>   	struct component_match *match = NULL;
> +	struct msm_drm_private *priv;
> +	struct drm_device *ddev;
>   	int ret;
>   
> +	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
> +	if (!priv)
> +		return -ENOMEM;
> +
> +	ddev = drm_dev_alloc(&msm_driver, &pdev->dev);
> +	if (IS_ERR(ddev)) {
> +		DRM_DEV_ERROR(&pdev->dev, "failed to allocate drm_device\n");
> +		return PTR_ERR(ddev);
> +	}
> +
> +	platform_set_drvdata(pdev, ddev);
> +	ddev->dev_private = priv;
> +	priv->dev = ddev;
> +
> +	switch (get_mdp_ver(pdev)) {
> +	case KMS_MDP5:
> +		ret = mdp5_mdss_init(ddev);
> +		break;
> +	case KMS_DPU:
> +		ret = dpu_mdss_init(ddev);
> +		break;
> +	default:
> +		ret = 0;
> +		break;
> +	}
> +	if (ret)
> +		goto err_put_drm_dev;
> +
>   	if (get_mdp_ver(pdev)) {
>   		ret = add_display_components(pdev, &match);
>   		if (ret)
> -			return ret;
> +			goto fail;
>   	}
>   
>   	ret = add_gpu_components(&pdev->dev, &match);
> @@ -1409,6 +1401,9 @@ static int msm_pdev_probe(struct platform_device *pdev)
>   
>   fail:
>   	of_platform_depopulate(&pdev->dev);
> +err_put_drm_dev:
> +	drm_dev_put(ddev);
> +	platform_set_drvdata(pdev, NULL);
>   	return ret;
>   }
>   
>
AngeloGioacchino Del Regno Nov. 29, 2021, 2:14 p.m. UTC | #8
Il 29/11/21 03:20, Dmitry Baryshkov ha scritto:
> Hi,
> 
> On 25/11/2021 18:09, AngeloGioacchino Del Regno wrote:
>> Since commit 8f59ee9a570c ("drm/msm/dsi: Adjust probe order"), the
>> DSI host gets initialized earlier, but this caused unability to probe
>> the entire stack of components because they all depend on interrupts
>> coming from the main `mdss` node (mdp5, or dpu1).
>>
>> To fix this issue, also anticipate probing mdp5 or dpu1 by initializing
>> them at msm_pdev_probe() time: this will make sure that we add the
>> required interrupt controller mapping before dsi and/or other components
>> try to initialize, finally satisfying the dependency.
>>
>> While at it, also change the allocation of msm_drm_private to use the
>> devm variant of kzalloc().
>>
>> Fixes: 8f59ee9a570c ("drm/msm/dsi: Adjust probe order")
>> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> 
> I have been thinking about this. I do not feel that this is the correct approach. 
> Currently DRM device exists only when all components are bound. If any of the 
> subdevices is removed, corresponding component is delteted (and thus all components 
> are unbound), the DRM device is taken down. This results in the state cleanup, 
> userspace notifications, etc.
> 
> With your changes, DRM device will continue to exist even after one of subdevices 
> is removed. This is not an expected behaviour, since subdrivers do not perform full 
> cleanup, delegating that to DRM device takedown.
> 
> I suppose that proper solution would be to split msm_drv.c into into:
> - generic components & drm code to be called from mdp4/mdp5/dpu driver (making 
> mdp4, mdp5 or dpu1 the components master)
> 
> - bare mdss driver, taking care only about IRQs, OF devices population - calling 
> proper mdss_init/mdss_destroy functions. Most probably we can drop this part 
> altogether and just make md5_mdss.c/dpu_mdss.c proper platform drivers.
> 


Hmm... getting a better look on how things are structured... yes, I mostly agree
with you, though I'm not sure about making MDP{4,5}/DPU1 the component master; that
would result in a major change in drm-msm, which may be "a bit too much".

Don't misunderstand me here, please, major changes are fine - but I feel urgency
to get this bug solved ASAP (since drm-msm is currently broken at least for drm 
bridges) and, if we do anything major, that would require a very careful slow
review process that will leave this driver broken for a lot of time.

I actually tried something else that "simplifies" the former approach, so here's
my proposal:
* we introduce {mdp5,dpu}_mdss_early_init(struct device, struct msm_drm_private)
* allocate only msm_drm_private in msm_pdev_probe, leaving the drm_dev_alloc call
   into msm_drm_init(), so that the drm_dev_put() stays in msm_drm_uninit()
* pass msm_drm_private as drvdata instead of drm_device
* change all the drvdata users to get drm_device from priv->dev, instead of getting
   msm_drm_private from drm_device->dev_private (like many other drm drivers are
   currently doing)

This way, we keep the current flow of creating the DRM device at msm_drm_init time
and tearing it down at msm_drm_unbind time, solving the issue that you are
describing.

If you're okay with this kind of approach, I have two patches here that are 95%
ready, can finish them off and send briefly.

Though, something else must be noted here... in the last mail where I'm pasting
a crash that happens when running 'rmmod panel_edp ti_sn65dsi86', I have implied
that this is happening due to the patch that I've sent: after some more research,
I'm not convinced anymore that this is a consequence of that. That crash may not
be related to my fix at all, but to something else (perhaps also related to commit
8f59ee9a570c, the one that we're fixing here).

Of course, that crash still happens even with the approach that I've just proposed.


Looking forward for your opinion!

Cheers,
- Angelo
Dmitry Baryshkov Nov. 29, 2021, 2:53 p.m. UTC | #9
Hi,

On Mon, 29 Nov 2021 at 17:15, AngeloGioacchino Del Regno
<angelogioacchino.delregno@collabora.com> wrote:
>
> Il 29/11/21 03:20, Dmitry Baryshkov ha scritto:
> > Hi,
> >
> > On 25/11/2021 18:09, AngeloGioacchino Del Regno wrote:
> >> Since commit 8f59ee9a570c ("drm/msm/dsi: Adjust probe order"), the
> >> DSI host gets initialized earlier, but this caused unability to probe
> >> the entire stack of components because they all depend on interrupts
> >> coming from the main `mdss` node (mdp5, or dpu1).
> >>
> >> To fix this issue, also anticipate probing mdp5 or dpu1 by initializing
> >> them at msm_pdev_probe() time: this will make sure that we add the
> >> required interrupt controller mapping before dsi and/or other components
> >> try to initialize, finally satisfying the dependency.
> >>
> >> While at it, also change the allocation of msm_drm_private to use the
> >> devm variant of kzalloc().
> >>
> >> Fixes: 8f59ee9a570c ("drm/msm/dsi: Adjust probe order")
> >> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> >
> > I have been thinking about this. I do not feel that this is the correct approach.
> > Currently DRM device exists only when all components are bound. If any of the
> > subdevices is removed, corresponding component is delteted (and thus all components
> > are unbound), the DRM device is taken down. This results in the state cleanup,
> > userspace notifications, etc.
> >
> > With your changes, DRM device will continue to exist even after one of subdevices
> > is removed. This is not an expected behaviour, since subdrivers do not perform full
> > cleanup, delegating that to DRM device takedown.
> >
> > I suppose that proper solution would be to split msm_drv.c into into:
> > - generic components & drm code to be called from mdp4/mdp5/dpu driver (making
> > mdp4, mdp5 or dpu1 the components master)
> >
> > - bare mdss driver, taking care only about IRQs, OF devices population - calling
> > proper mdss_init/mdss_destroy functions. Most probably we can drop this part
> > altogether and just make md5_mdss.c/dpu_mdss.c proper platform drivers.
> >
>
>
> Hmm... getting a better look on how things are structured... yes, I mostly agree
> with you, though I'm not sure about making MDP{4,5}/DPU1 the component master; that
> would result in a major change in drm-msm, which may be "a bit too much".
>
> Don't misunderstand me here, please, major changes are fine - but I feel urgency
> to get this bug solved ASAP (since drm-msm is currently broken at least for drm
> bridges) and, if we do anything major, that would require a very careful slow
> review process that will leave this driver broken for a lot of time.

Yep. I wanted to hear your and Rob's opinion, that's why I did not
rush into implementing it.
I still think this is the way to go in the long term. What I really
liked in your patchset was untying the knot between component binds
returning EPROBE_DEFER and mdss subdevices being in place. This solved
the "dsi host registration" infinite loop for me.

>
> I actually tried something else that "simplifies" the former approach, so here's
> my proposal:
> * we introduce {mdp5,dpu}_mdss_early_init(struct device, struct msm_drm_private)
> * allocate only msm_drm_private in msm_pdev_probe, leaving the drm_dev_alloc call
>    into msm_drm_init(), so that the drm_dev_put() stays in msm_drm_uninit()
> * pass msm_drm_private as drvdata instead of drm_device
> * change all the drvdata users to get drm_device from priv->dev, instead of getting
>    msm_drm_private from drm_device->dev_private (like many other drm drivers are
>    currently doing)

This sounds in a way that it should work. I'm looking forward to
seeing (and testing) your patches.

>
> This way, we keep the current flow of creating the DRM device at msm_drm_init time
> and tearing it down at msm_drm_unbind time, solving the issue that you are
> describing.
>
> If you're okay with this kind of approach, I have two patches here that are 95%
> ready, can finish them off and send briefly.
>
> Though, something else must be noted here... in the last mail where I'm pasting
> a crash that happens when running 'rmmod panel_edp ti_sn65dsi86', I have implied
> that this is happening due to the patch that I've sent: after some more research,
> I'm not convinced anymore that this is a consequence of that. That crash may not
> be related to my fix at all, but to something else (perhaps also related to commit
> 8f59ee9a570c, the one that we're fixing here).
>
> Of course, that crash still happens even with the approach that I've just proposed.
>
>
> Looking forward for your opinion!
>
> Cheers,
> - Angelo
AngeloGioacchino Del Regno Nov. 29, 2021, 2:56 p.m. UTC | #10
Il 29/11/21 15:53, Dmitry Baryshkov ha scritto:
> Hi,
> 
> On Mon, 29 Nov 2021 at 17:15, AngeloGioacchino Del Regno
> <angelogioacchino.delregno@collabora.com> wrote:
>>
>> Il 29/11/21 03:20, Dmitry Baryshkov ha scritto:
>>> Hi,
>>>
>>> On 25/11/2021 18:09, AngeloGioacchino Del Regno wrote:
>>>> Since commit 8f59ee9a570c ("drm/msm/dsi: Adjust probe order"), the
>>>> DSI host gets initialized earlier, but this caused unability to probe
>>>> the entire stack of components because they all depend on interrupts
>>>> coming from the main `mdss` node (mdp5, or dpu1).
>>>>
>>>> To fix this issue, also anticipate probing mdp5 or dpu1 by initializing
>>>> them at msm_pdev_probe() time: this will make sure that we add the
>>>> required interrupt controller mapping before dsi and/or other components
>>>> try to initialize, finally satisfying the dependency.
>>>>
>>>> While at it, also change the allocation of msm_drm_private to use the
>>>> devm variant of kzalloc().
>>>>
>>>> Fixes: 8f59ee9a570c ("drm/msm/dsi: Adjust probe order")
>>>> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
>>>
>>> I have been thinking about this. I do not feel that this is the correct approach.
>>> Currently DRM device exists only when all components are bound. If any of the
>>> subdevices is removed, corresponding component is delteted (and thus all components
>>> are unbound), the DRM device is taken down. This results in the state cleanup,
>>> userspace notifications, etc.
>>>
>>> With your changes, DRM device will continue to exist even after one of subdevices
>>> is removed. This is not an expected behaviour, since subdrivers do not perform full
>>> cleanup, delegating that to DRM device takedown.
>>>
>>> I suppose that proper solution would be to split msm_drv.c into into:
>>> - generic components & drm code to be called from mdp4/mdp5/dpu driver (making
>>> mdp4, mdp5 or dpu1 the components master)
>>>
>>> - bare mdss driver, taking care only about IRQs, OF devices population - calling
>>> proper mdss_init/mdss_destroy functions. Most probably we can drop this part
>>> altogether and just make md5_mdss.c/dpu_mdss.c proper platform drivers.
>>>
>>
>>
>> Hmm... getting a better look on how things are structured... yes, I mostly agree
>> with you, though I'm not sure about making MDP{4,5}/DPU1 the component master; that
>> would result in a major change in drm-msm, which may be "a bit too much".
>>
>> Don't misunderstand me here, please, major changes are fine - but I feel urgency
>> to get this bug solved ASAP (since drm-msm is currently broken at least for drm
>> bridges) and, if we do anything major, that would require a very careful slow
>> review process that will leave this driver broken for a lot of time.
> 
> Yep. I wanted to hear your and Rob's opinion, that's why I did not
> rush into implementing it.
> I still think this is the way to go in the long term. What I really
> liked in your patchset was untying the knot between component binds
> returning EPROBE_DEFER and mdss subdevices being in place. This solved
> the "dsi host registration" infinite loop for me.
> 

Thanks. I'm also curious about Rob's opinion on this, as that'd be very valuable.

>>
>> I actually tried something else that "simplifies" the former approach, so here's
>> my proposal:
>> * we introduce {mdp5,dpu}_mdss_early_init(struct device, struct msm_drm_private)
>> * allocate only msm_drm_private in msm_pdev_probe, leaving the drm_dev_alloc call
>>     into msm_drm_init(), so that the drm_dev_put() stays in msm_drm_uninit()
>> * pass msm_drm_private as drvdata instead of drm_device
>> * change all the drvdata users to get drm_device from priv->dev, instead of getting
>>     msm_drm_private from drm_device->dev_private (like many other drm drivers are
>>     currently doing)
> 
> This sounds in a way that it should work. I'm looking forward to
> seeing (and testing) your patches.
> 

Alright then, I'm running some more tests and cleaning up the patches.
Expect a v2 between today and tomorrow at max! :))

>>
>> This way, we keep the current flow of creating the DRM device at msm_drm_init time
>> and tearing it down at msm_drm_unbind time, solving the issue that you are
>> describing.
>>
>> If you're okay with this kind of approach, I have two patches here that are 95%
>> ready, can finish them off and send briefly.
>>
>> Though, something else must be noted here... in the last mail where I'm pasting
>> a crash that happens when running 'rmmod panel_edp ti_sn65dsi86', I have implied
>> that this is happening due to the patch that I've sent: after some more research,
>> I'm not convinced anymore that this is a consequence of that. That crash may not
>> be related to my fix at all, but to something else (perhaps also related to commit
>> 8f59ee9a570c, the one that we're fixing here).
>>
>> Of course, that crash still happens even with the approach that I've just proposed.
>>
>>
>> Looking forward for your opinion!
>>
>> Cheers,
>> - Angelo
> 
> 
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 7936e8d498dd..790acf4993c0 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -512,45 +512,12 @@  static int msm_init_vram(struct drm_device *dev)
 static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
 {
 	struct platform_device *pdev = to_platform_device(dev);
-	struct drm_device *ddev;
-	struct msm_drm_private *priv;
-	struct msm_kms *kms;
-	struct msm_mdss *mdss;
+	struct drm_device *ddev = platform_get_drvdata(pdev);
+	struct msm_drm_private *priv = ddev->dev_private;
+	struct msm_kms *kms = priv->kms;
+	struct msm_mdss *mdss = priv->mdss;
 	int ret, i;
 
-	ddev = drm_dev_alloc(drv, dev);
-	if (IS_ERR(ddev)) {
-		DRM_DEV_ERROR(dev, "failed to allocate drm_device\n");
-		return PTR_ERR(ddev);
-	}
-
-	platform_set_drvdata(pdev, ddev);
-
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv) {
-		ret = -ENOMEM;
-		goto err_put_drm_dev;
-	}
-
-	ddev->dev_private = priv;
-	priv->dev = ddev;
-
-	switch (get_mdp_ver(pdev)) {
-	case KMS_MDP5:
-		ret = mdp5_mdss_init(ddev);
-		break;
-	case KMS_DPU:
-		ret = dpu_mdss_init(ddev);
-		break;
-	default:
-		ret = 0;
-		break;
-	}
-	if (ret)
-		goto err_free_priv;
-
-	mdss = priv->mdss;
-
 	priv->wq = alloc_ordered_workqueue("msm", 0);
 	priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD;
 
@@ -685,11 +652,6 @@  static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
 err_destroy_mdss:
 	if (mdss && mdss->funcs)
 		mdss->funcs->destroy(ddev);
-err_free_priv:
-	kfree(priv);
-err_put_drm_dev:
-	drm_dev_put(ddev);
-	platform_set_drvdata(pdev, NULL);
 	return ret;
 }
 
@@ -1382,12 +1344,42 @@  static const struct component_master_ops msm_drm_ops = {
 static int msm_pdev_probe(struct platform_device *pdev)
 {
 	struct component_match *match = NULL;
+	struct msm_drm_private *priv;
+	struct drm_device *ddev;
 	int ret;
 
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	ddev = drm_dev_alloc(&msm_driver, &pdev->dev);
+	if (IS_ERR(ddev)) {
+		DRM_DEV_ERROR(&pdev->dev, "failed to allocate drm_device\n");
+		return PTR_ERR(ddev);
+	}
+
+	platform_set_drvdata(pdev, ddev);
+	ddev->dev_private = priv;
+	priv->dev = ddev;
+
+	switch (get_mdp_ver(pdev)) {
+	case KMS_MDP5:
+		ret = mdp5_mdss_init(ddev);
+		break;
+	case KMS_DPU:
+		ret = dpu_mdss_init(ddev);
+		break;
+	default:
+		ret = 0;
+		break;
+	}
+	if (ret)
+		goto err_put_drm_dev;
+
 	if (get_mdp_ver(pdev)) {
 		ret = add_display_components(pdev, &match);
 		if (ret)
-			return ret;
+			goto fail;
 	}
 
 	ret = add_gpu_components(&pdev->dev, &match);
@@ -1409,6 +1401,9 @@  static int msm_pdev_probe(struct platform_device *pdev)
 
 fail:
 	of_platform_depopulate(&pdev->dev);
+err_put_drm_dev:
+	drm_dev_put(ddev);
+	platform_set_drvdata(pdev, NULL);
 	return ret;
 }