diff mbox series

[v2,16/17] ASoC: Intel: bdw_rt286: Refactor suspend/resume

Message ID 20220613091546.1565167-17-cezary.rojewski@intel.com (mailing list archive)
State Superseded
Headers show
Series ASoC: Intel: haswell and broadwell boards update | expand

Commit Message

Cezary Rojewski June 13, 2022, 9:15 a.m. UTC
Make use of card->remove() rather than pdev->remove() to unassign jack
during card unbind procedure.

To reduce code size, define unified jack setter in form of
bdw_rt286_set_jack() and invoke it during remove(), suspend_pre() and
resume_port().

Signed-off-by: Cezary Rojewski <cezary.rojewski@intel.com>
Reviewed-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
---
 sound/soc/intel/boards/bdw_rt286.c | 50 +++++++++---------------------
 1 file changed, 14 insertions(+), 36 deletions(-)

Comments

Pierre-Louis Bossart June 15, 2022, 1:27 a.m. UTC | #1
On 6/13/22 04:15, Cezary Rojewski wrote:
> Make use of card->remove() rather than pdev->remove() to unassign jack
> during card unbind procedure.
> 
> To reduce code size, define unified jack setter in form of
> bdw_rt286_set_jack() and invoke it during remove(), suspend_pre() and
> resume_port().
> 
> Signed-off-by: Cezary Rojewski <cezary.rojewski@intel.com>
> Reviewed-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>

This seems to have rather negative side effects in our modprobe/modprobe
-r tests?

The pattern of disabling the jack in the platform device .remove is
fairly common, I don't recall having seen a machine driver doing this in
the card .remove step. Are you sure this is equivalent?

Reverting this patch removes the kernel oops.

I don't have time to debug further - but this adds to my point of
minimizing risk on legacy code, doesn't it? suspend-resume is difficult
to get right, and easy to break. I have done the latter more often that
the former.

if you want to reproduce the issue, see
https://github.com/thesofproject/linux/pull/3696

and use sof-test:
/root/sof-test/test-case/check-kmod-load-unload.sh -l 1



[  246.570618] BUG: kernel NULL pointer dereference, address:
0000000000000058

[  246.570636] #PF: supervisor read access in kernel mode

[  246.570644] #PF: error_code(0x0000) - not-present page

[  246.570653] PGD 0 P4D 0

[  246.570662] Oops: 0000 [#1] PREEMPT SMP PTI

[  246.570672] CPU: 2 PID: 5409 Comm: rmmod Tainted: G          I
5.19.0-rc1-test-05237-gf04711968c69 #224

[  246.570687] Hardware name: Intel Corporation Broadwell Client
platform/Wilson Beach SDS, BIOS BDW-E2R1.86C.0095.R09.1410300006 10/30/2014

[  246.570700] RIP: 0010:card_set_jack+0x62/0x80 [snd_soc_sst_broadwell]

[  246.570717] Code: 49 39 c4 74 39 8b 93 14 06 00 00 48 8b 83 08 06 00
00 48 c7 c6 8d 1c 4e c0 48 8b 2c d0 48 8b 7d 00 e8 22 7b e5 c6 85 c0 75
c8 <48> 8b 7d 58 5b 4c 89 ee 31 d2 5d 41 5c 41 5d e9 fa d2 ee ff 31 ed

[  246.570736] RSP: 0018:ffffa6b9810d3c90 EFLAGS: 00010246

[  246.570746] RAX: ffffffffc04e33e8 RBX: ffffffffc04e30e0 RCX:
0000000000000000

[  246.570756] RDX: 0000000000000001 RSI: 0000000000000000 RDI:
ffffffffc04e30e0

[  246.570766] RBP: 0000000000000000 R08: 0000003968c0fba0 R09:
0000000000000000

[  246.570775] R10: ffffffffc03c1aaa R11: 0000000000000000 R12:
ffffffffc04e33e8

[  246.570784] R13: 0000000000000000 R14: ffffffffc04e3420 R15:
dead000000000100

[  246.570794] FS:  00007f17697a0740(0000) GS:ffff8e9685a00000(0000)
knlGS:0000000000000000

[  246.570805] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033

[  246.570814] CR2: 0000000000000058 CR3: 0000000132d7c006 CR4:
00000000003706e0

[  246.570823] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000

[  246.570832] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7:
0000000000000400

[  246.570842] Call Trace:

[  246.570849]  <TASK>

[  246.570858]  snd_soc_card_remove+0x27/0x60 [snd_soc_core]

[  246.570902]  soc_cleanup_card_resources+0x22d/0x260 [snd_soc_core]

[  246.570938]  snd_soc_unregister_card+0xd9/0xe0 [snd_soc_core]

[  246.570970]  devres_release_all+0xb8/0x100

[  246.570988]  device_unbind_cleanup+0xe/0x70

[  246.571001]  device_release_driver_internal+0x1dd/0x230

[  246.571012]  bus_remove_device+0xef/0x160

[  246.571025]  device_del+0x18c/0x3f0

[  246.571037]  platform_device_del.part.0+0x13/0x70

[  246.571048]  platform_device_unregister+0x1c/0x30

[  246.571059]  snd_sof_device_remove+0x57/0xe0 [snd_sof]

[  246.571087]  sof_acpi_remove+0x34/0x40 [snd_sof_acpi]

[  246.571099]  platform_remove+0x1f/0x40

[  246.571109]  device_release_driver_internal+0x1b0/0x230

[  246.571120]  driver_detach+0x47/0x90

[  246.571129]  bus_remove_driver+0x58/0xd0

[  246.571141]  __do_sys_delete_module+0x19f/0x270

[  246.571156]  ? fpregs_assert_state_consistent+0x1e/0x40

[  246.571168]  ? exit_to_user_mode_prepare+0x37/0x120

[  246.571181]  do_syscall_64+0x3b/0x90

[  246.571193]  entry_SYSCALL_64_after_hwframe+0x46/0xb0


> ---
>  sound/soc/intel/boards/bdw_rt286.c | 50 +++++++++---------------------
>  1 file changed, 14 insertions(+), 36 deletions(-)
> 
> diff --git a/sound/soc/intel/boards/bdw_rt286.c b/sound/soc/intel/boards/bdw_rt286.c
> index 92fddf6061e8..106a06398858 100644
> --- a/sound/soc/intel/boards/bdw_rt286.c
> +++ b/sound/soc/intel/boards/bdw_rt286.c
> @@ -199,43 +199,33 @@ static struct snd_soc_dai_link card_dai_links[] = {
>  	},
>  };
>  
> -static void broadwell_disable_jack(struct snd_soc_card *card)
> +static int card_set_jack(struct snd_soc_card *card, struct snd_soc_jack *jack)
>  {
> -	struct snd_soc_component *component;
> -
> -	for_each_card_components(card, component) {
> -		if (!strcmp(component->name, "i2c-INT343A:00")) {
> -			dev_dbg(component->dev, "disabling jack detect before going to suspend.\n");
> -			snd_soc_component_set_jack(component, NULL, NULL);
> -			break;
> -		}
> -	}
> +	struct snd_soc_dai *codec_dai = snd_soc_card_get_codec_dai(card, "rt286-aif1");
> +
> +	return snd_soc_component_set_jack(codec_dai->component, jack, NULL);
>  }
>  
> -static int card_suspend_pre(struct snd_soc_card *card)
> +static int card_remove(struct snd_soc_card *card)
>  {
> -	broadwell_disable_jack(card);
> +	return card_set_jack(card, NULL);
> +}
>  
> -	return 0;
> +static int card_suspend_pre(struct snd_soc_card *card)
> +{
> +	return card_set_jack(card, NULL);
>  }
>  
>  static int card_resume_post(struct snd_soc_card *card)
>  {
> -	struct snd_soc_component *component;
> -
> -	for_each_card_components(card, component) {
> -		if (!strcmp(component->name, "i2c-INT343A:00")) {
> -			dev_dbg(component->dev, "enabling jack detect for resume.\n");
> -			snd_soc_component_set_jack(component, &card_headset, NULL);
> -			break;
> -		}
> -	}
> -
> -	return 0;
> +	return card_set_jack(card, &card_headset);
>  }
>  
>  static struct snd_soc_card bdw_rt286_card = {
>  	.owner = THIS_MODULE,
> +	.remove = card_remove,
> +	.suspend_pre = card_suspend_pre,
> +	.resume_post = card_resume_post,
>  	.dai_link = card_dai_links,
>  	.num_links = ARRAY_SIZE(card_dai_links),
>  	.controls = card_controls,
> @@ -245,8 +235,6 @@ static struct snd_soc_card bdw_rt286_card = {
>  	.dapm_routes = card_routes,
>  	.num_dapm_routes = ARRAY_SIZE(card_routes),
>  	.fully_routed = true,
> -	.suspend_pre = card_suspend_pre,
> -	.resume_post = card_resume_post,
>  };
>  
>  /* Use space before codec name to simplify card ID, and simplify driver name. */
> @@ -278,18 +266,8 @@ static int bdw_rt286_probe(struct platform_device *pdev)
>  	return devm_snd_soc_register_card(dev, &bdw_rt286_card);
>  }
>  
> -static int bdw_rt286_remove(struct platform_device *pdev)
> -{
> -	struct snd_soc_card *card = platform_get_drvdata(pdev);
> -
> -	broadwell_disable_jack(card);
> -
> -	return 0;
> -}
> -
>  static struct platform_driver bdw_rt286_driver = {
>  	.probe = bdw_rt286_probe,
> -	.remove = bdw_rt286_remove,
>  	.driver = {
>  		.name = "bdw_rt286",
>  		.pm = &snd_soc_pm_ops
Cezary Rojewski June 15, 2022, 12:57 p.m. UTC | #2
On 2022-06-15 3:27 AM, Pierre-Louis Bossart wrote:

> On 6/13/22 04:15, Cezary Rojewski wrote:
>> Make use of card->remove() rather than pdev->remove() to unassign jack
>> during card unbind procedure.
>>
>> To reduce code size, define unified jack setter in form of
>> bdw_rt286_set_jack() and invoke it during remove(), suspend_pre() and
>> resume_port().
>>
>> Signed-off-by: Cezary Rojewski <cezary.rojewski@intel.com>
>> Reviewed-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
> 
> This seems to have rather negative side effects in our modprobe/modprobe
> -r tests?
> 
> The pattern of disabling the jack in the platform device .remove is
> fairly common, I don't recall having seen a machine driver doing this in
> the card .remove step. Are you sure this is equivalent?
> 
> Reverting this patch removes the kernel oops.
> 
> I don't have time to debug further - but this adds to my point of
> minimizing risk on legacy code, doesn't it? suspend-resume is difficult
> to get right, and easy to break. I have done the latter more often that
> the former.
> 
> if you want to reproduce the issue, see
> https://github.com/thesofproject/linux/pull/3696
> 
> and use sof-test:
> /root/sof-test/test-case/check-kmod-load-unload.sh -l 1


Thanks for the report. Indeed, the latest "optimization" broke the 
card->remove() path.

Jacks are often initialized during dai_link initialization which is 
completely out of platform_device area. This report made me think 
further - if we assign jack in dai_link->init(), we should be able to 
drop it in dai_link->exit().

Not exactly! ->init() is done once card components are already accounted 
for (available for use) but snd_soc_link_exit() is called during 
snd_soc_remove_pcm_runtime() when card components are available no 
longer - soc_remove_link_components().

TLDR: teardown path is not symmetric with its counterpart, perhaps a 
problem yet to be addressed. I'll see if moving the jack-NULLing to 
codec's DAI ->remove() won't be a better temporary (?) solution than 
reverting to platform_device->remove() usage.


Mark,

Is it fine to leave v2 series as is, just ignoring this single 16/17 
patch? Or should I resend entire series as v3 without this very patch? 
I'd like to address the problem via a separate change.


Regards,
Czarek
Pierre-Louis Bossart June 15, 2022, 4:25 p.m. UTC | #3
> Jacks are often initialized during dai_link initialization which is
> completely out of platform_device area. This report made me think
> further - if we assign jack in dai_link->init(), we should be able to
> drop it in dai_link->exit().
> 
> Not exactly! ->init() is done once card components are already accounted
> for (available for use) but snd_soc_link_exit() is called during
> snd_soc_remove_pcm_runtime() when card components are available no
> longer - soc_remove_link_components().
> 
> TLDR: teardown path is not symmetric with its counterpart, perhaps a
> problem yet to be addressed. I'll see if moving the jack-NULLing to
> codec's DAI ->remove() won't be a better temporary (?) solution than
> reverting to platform_device->remove() usage.

It's a problem that impacted other platforms, see e.g.

static void kabylake_rt5660_codec_exit(struct snd_soc_pcm_runtime *rtd)
{
	struct kbl_codec_private *ctx = snd_soc_card_get_drvdata(rtd->card);

	/*
	 * The .exit() can be reached without going through the .init()
	 * so explicitly test if the gpiod is valid
	 */
	if (!IS_ERR_OR_NULL(ctx->gpio_lo_mute))
		gpiod_put(ctx->gpio_lo_mute);
}

I vaguely recall hitting this myself when working with codec properties.
It's worthy of a comment in the ASoC header to make sure this is better
known/shared.

I see in other drivers that the use of component_set_jack() is
symmetrical between .init and .exit, so far we haven't seen any issues
with sof_rt5682.c and others.
Cezary Rojewski June 19, 2022, 1:04 p.m. UTC | #4
On 2022-06-15 6:25 PM, Pierre-Louis Bossart wrote:
> 
>> Jacks are often initialized during dai_link initialization which is
>> completely out of platform_device area. This report made me think
>> further - if we assign jack in dai_link->init(), we should be able to
>> drop it in dai_link->exit().
>>
>> Not exactly! ->init() is done once card components are already accounted
>> for (available for use) but snd_soc_link_exit() is called during
>> snd_soc_remove_pcm_runtime() when card components are available no
>> longer - soc_remove_link_components().
>>
>> TLDR: teardown path is not symmetric with its counterpart, perhaps a
>> problem yet to be addressed. I'll see if moving the jack-NULLing to
>> codec's DAI ->remove() won't be a better temporary (?) solution than
>> reverting to platform_device->remove() usage.
> 
> It's a problem that impacted other platforms, see e.g.
> 
> static void kabylake_rt5660_codec_exit(struct snd_soc_pcm_runtime *rtd)
> {
> 	struct kbl_codec_private *ctx = snd_soc_card_get_drvdata(rtd->card);
> 
> 	/*
> 	 * The .exit() can be reached without going through the .init()
> 	 * so explicitly test if the gpiod is valid
> 	 */
> 	if (!IS_ERR_OR_NULL(ctx->gpio_lo_mute))
> 		gpiod_put(ctx->gpio_lo_mute);
> }
> 
> I vaguely recall hitting this myself when working with codec properties.
> It's worthy of a comment in the ASoC header to make sure this is better
> known/shared.
> 
> I see in other drivers that the use of component_set_jack() is
> symmetrical between .init and .exit, so far we haven't seen any issues
> with sof_rt5682.c and others.


I'll send a separate mail where we can discuss the teardown path. Don't 
believe the problem can be ignored. Even for the bdw_rt286.c usage of 
link->exit() generates:

   rt286 i2c-INT343A:00: ASoC: DAPM unknown pin LDO1

the following is the cause:

soc_remove_component() calls both component->remove() and 
snd_soc_dapm_free() for the component (in that order) so when 
link->exit() finally gets executes DAPM widgets are no longer there.


Regards,
Czarek
Cezary Rojewski June 19, 2022, 1:58 p.m. UTC | #5
On 2022-06-15 2:57 PM, Cezary Rojewski wrote:

> Mark,
> 
> Is it fine to leave v2 series as is, just ignoring this single 16/17 
> patch? Or should I resend entire series as v3 without this very patch? 
> I'd like to address the problem via a separate change.


Please ignore this question. I've opted to use link->exit() just like 
many other boards do and module reloads just fine. A separate thread 
will be opened for the card teardown flow discussion as mentioned by me 
earlier. link->exit() has its own problems too.

I'll send v3 tomorrow morning.


Regards,
Czarek
diff mbox series

Patch

diff --git a/sound/soc/intel/boards/bdw_rt286.c b/sound/soc/intel/boards/bdw_rt286.c
index 92fddf6061e8..106a06398858 100644
--- a/sound/soc/intel/boards/bdw_rt286.c
+++ b/sound/soc/intel/boards/bdw_rt286.c
@@ -199,43 +199,33 @@  static struct snd_soc_dai_link card_dai_links[] = {
 	},
 };
 
-static void broadwell_disable_jack(struct snd_soc_card *card)
+static int card_set_jack(struct snd_soc_card *card, struct snd_soc_jack *jack)
 {
-	struct snd_soc_component *component;
-
-	for_each_card_components(card, component) {
-		if (!strcmp(component->name, "i2c-INT343A:00")) {
-			dev_dbg(component->dev, "disabling jack detect before going to suspend.\n");
-			snd_soc_component_set_jack(component, NULL, NULL);
-			break;
-		}
-	}
+	struct snd_soc_dai *codec_dai = snd_soc_card_get_codec_dai(card, "rt286-aif1");
+
+	return snd_soc_component_set_jack(codec_dai->component, jack, NULL);
 }
 
-static int card_suspend_pre(struct snd_soc_card *card)
+static int card_remove(struct snd_soc_card *card)
 {
-	broadwell_disable_jack(card);
+	return card_set_jack(card, NULL);
+}
 
-	return 0;
+static int card_suspend_pre(struct snd_soc_card *card)
+{
+	return card_set_jack(card, NULL);
 }
 
 static int card_resume_post(struct snd_soc_card *card)
 {
-	struct snd_soc_component *component;
-
-	for_each_card_components(card, component) {
-		if (!strcmp(component->name, "i2c-INT343A:00")) {
-			dev_dbg(component->dev, "enabling jack detect for resume.\n");
-			snd_soc_component_set_jack(component, &card_headset, NULL);
-			break;
-		}
-	}
-
-	return 0;
+	return card_set_jack(card, &card_headset);
 }
 
 static struct snd_soc_card bdw_rt286_card = {
 	.owner = THIS_MODULE,
+	.remove = card_remove,
+	.suspend_pre = card_suspend_pre,
+	.resume_post = card_resume_post,
 	.dai_link = card_dai_links,
 	.num_links = ARRAY_SIZE(card_dai_links),
 	.controls = card_controls,
@@ -245,8 +235,6 @@  static struct snd_soc_card bdw_rt286_card = {
 	.dapm_routes = card_routes,
 	.num_dapm_routes = ARRAY_SIZE(card_routes),
 	.fully_routed = true,
-	.suspend_pre = card_suspend_pre,
-	.resume_post = card_resume_post,
 };
 
 /* Use space before codec name to simplify card ID, and simplify driver name. */
@@ -278,18 +266,8 @@  static int bdw_rt286_probe(struct platform_device *pdev)
 	return devm_snd_soc_register_card(dev, &bdw_rt286_card);
 }
 
-static int bdw_rt286_remove(struct platform_device *pdev)
-{
-	struct snd_soc_card *card = platform_get_drvdata(pdev);
-
-	broadwell_disable_jack(card);
-
-	return 0;
-}
-
 static struct platform_driver bdw_rt286_driver = {
 	.probe = bdw_rt286_probe,
-	.remove = bdw_rt286_remove,
 	.driver = {
 		.name = "bdw_rt286",
 		.pm = &snd_soc_pm_ops