diff mbox

hwmon: xgene: Fix crash when alarm occurs before driver probe

Message ID 1473220019-2519-1-git-send-email-hotran@apm.com (mailing list archive)
State Superseded
Headers show

Commit Message

hotran Sept. 7, 2016, 3:46 a.m. UTC
The system crashes during probing xgene-hwmon driver when temperature
alarm interrupt occurs before.
It's because
 - xgene_hwmon_probe() requests PCC mailbox channel which also enables
the mailbox interrupt.
 - As temperature alarm interrupt is pending, ISR runs and crashes when accesses
into invalid resource as unmapped PCC shared memory.

This patch fixes this issue by saving this alarm message and scheduling a
bottom handler after xgene_hwmon_probe() finish.

Signed-off-by: Hoan Tran <hotran@apm.com>
Reported-by: Itaru Kitayama <itaru.kitayama@riken.jp>
---
 drivers/hwmon/xgene-hwmon.c | 75 +++++++++++++++++++++++++++++++++------------
 1 file changed, 56 insertions(+), 19 deletions(-)

Comments

Guenter Roeck Sept. 7, 2016, 4:35 a.m. UTC | #1
On 09/06/2016 08:46 PM, Hoan Tran wrote:
> The system crashes during probing xgene-hwmon driver when temperature
> alarm interrupt occurs before.
> It's because
>  - xgene_hwmon_probe() requests PCC mailbox channel which also enables
> the mailbox interrupt.
>  - As temperature alarm interrupt is pending, ISR runs and crashes when accesses
> into invalid resource as unmapped PCC shared memory.
>
> This patch fixes this issue by saving this alarm message and scheduling a
> bottom handler after xgene_hwmon_probe() finish.
>

I am not completely happy with this fix. Main problem I have is that the
processing associated with resp_pending doesn't happen until init_flag is set.
Since the hwmon functions can be called right after hwmon_device_register_with_groups(),
there is now a new race condition between that call and setting init_flag.

I am also a bit concerned with init_flag and rx_pending not being atomic and protected.
What happens if a second interrupt occurs right after init_flag is set but before
(or while) rx_pending is evaluated ?

On top of that, init_flag and thus the added complexity is (unless I am missing
something) only needed if acpi is enabled. Penaltizing non-acpi code doesn't seem
to be optimal.

How do other drivers handle this situation ? This must be a common problem
with all mbox users.

Thanks,
Guenter

> Signed-off-by: Hoan Tran <hotran@apm.com>
> Reported-by: Itaru Kitayama <itaru.kitayama@riken.jp>
> ---
>  drivers/hwmon/xgene-hwmon.c | 75 +++++++++++++++++++++++++++++++++------------
>  1 file changed, 56 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c
> index bc78a5d..e3b4e84 100644
> --- a/drivers/hwmon/xgene-hwmon.c
> +++ b/drivers/hwmon/xgene-hwmon.c
> @@ -107,6 +107,8 @@ struct xgene_hwmon_dev {
>  	struct completion	rd_complete;
>  	int			resp_pending;
>  	struct slimpro_resp_msg sync_msg;
> +	bool			init_flag;
> +	bool			rx_pending;
>
>  	struct work_struct	workq;
>  	struct kfifo_rec_ptr_1	async_msg_fifo;
> @@ -465,13 +467,35 @@ static void xgene_hwmon_evt_work(struct work_struct *work)
>  	}
>  }
>
> +static int xgene_hwmon_rx_ready(struct xgene_hwmon_dev *ctx, void *msg)
> +{
> +	if (!ctx->init_flag) {
> +		ctx->rx_pending = true;
> +		/* Enqueue to the FIFO */
> +		kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
> +				    sizeof(struct slimpro_resp_msg),
> +				    &ctx->kfifo_lock);
> +		return -EBUSY;
> +	}
> +
> +	return 0;
> +}
> +
>  /*
>   * This function is called when the SLIMpro Mailbox received a message
>   */
>  static void xgene_hwmon_rx_cb(struct mbox_client *cl, void *msg)
>  {
>  	struct xgene_hwmon_dev *ctx = to_xgene_hwmon_dev(cl);
> -	struct slimpro_resp_msg amsg;
> +
> +	/*
> +	 * While the driver registers with the mailbox framework, an interrupt
> +	 * can be pending before the probe function completes its
> +	 * initialization. If such condition occurs, just queue up the message
> +	 * as the driver is not ready for servicing the callback.
> +	 */
> +	if (xgene_hwmon_rx_ready(ctx, msg) < 0)
> +		return;
>
>  	/*
>  	 * Response message format:
> @@ -500,12 +524,8 @@ static void xgene_hwmon_rx_cb(struct mbox_client *cl, void *msg)
>  		return;
>  	}
>
> -	amsg.msg   = ((u32 *)msg)[0];
> -	amsg.param1 = ((u32 *)msg)[1];
> -	amsg.param2 = ((u32 *)msg)[2];
> -
>  	/* Enqueue to the FIFO */
> -	kfifo_in_spinlocked(&ctx->async_msg_fifo, &amsg,
> +	kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
>  			    sizeof(struct slimpro_resp_msg), &ctx->kfifo_lock);
>  	/* Schedule the bottom handler */
>  	schedule_work(&ctx->workq);
> @@ -520,6 +540,15 @@ static void xgene_hwmon_pcc_rx_cb(struct mbox_client *cl, void *msg)
>  	struct acpi_pcct_shared_memory *generic_comm_base = ctx->pcc_comm_addr;
>  	struct slimpro_resp_msg amsg;
>
> +	/*
> +	 * While the driver registers with the mailbox framework, an interrupt
> +	 * can be pending before the probe function completes its
> +	 * initialization. If such condition occurs, just queue up the message
> +	 * as the driver is not ready for servicing the callback.
> +	 */
> +	if (xgene_hwmon_rx_ready(ctx, &amsg) < 0)
> +		return;
> +
>  	msg = generic_comm_base + 1;
>  	/* Check if platform sends interrupt */
>  	if (!xgene_word_tst_and_clr(&generic_comm_base->status,
> @@ -596,6 +625,17 @@ static int xgene_hwmon_probe(struct platform_device *pdev)
>  	platform_set_drvdata(pdev, ctx);
>  	cl = &ctx->mbox_client;
>
> +	spin_lock_init(&ctx->kfifo_lock);
> +	mutex_init(&ctx->rd_mutex);
> +
> +	rc = kfifo_alloc(&ctx->async_msg_fifo,
> +			 sizeof(struct slimpro_resp_msg) * ASYNC_MSG_FIFO_SIZE,
> +			 GFP_KERNEL);
> +	if (rc)
> +		goto out_mbox_free;
> +
> +	INIT_WORK(&ctx->workq, xgene_hwmon_evt_work);
> +
>  	/* Request mailbox channel */
>  	cl->dev = &pdev->dev;
>  	cl->tx_done = xgene_hwmon_tx_done;
> @@ -676,17 +716,6 @@ static int xgene_hwmon_probe(struct platform_device *pdev)
>  		ctx->usecs_lat = PCC_NUM_RETRIES * cppc_ss->latency;
>  	}
>
> -	spin_lock_init(&ctx->kfifo_lock);
> -	mutex_init(&ctx->rd_mutex);
> -
> -	rc = kfifo_alloc(&ctx->async_msg_fifo,
> -			 sizeof(struct slimpro_resp_msg) * ASYNC_MSG_FIFO_SIZE,
> -			 GFP_KERNEL);
> -	if (rc)
> -		goto out_mbox_free;
> -
> -	INIT_WORK(&ctx->workq, xgene_hwmon_evt_work);
> -
>  	ctx->hwmon_dev = hwmon_device_register_with_groups(ctx->dev,
>  							   "apm_xgene",
>  							   ctx,
> @@ -697,17 +726,25 @@ static int xgene_hwmon_probe(struct platform_device *pdev)
>  		goto out;
>  	}
>
> +	ctx->init_flag = true;
> +	if (ctx->rx_pending) {
> +		/*
> +		 * If there is a pending message, schedule the bottom handler
> +		 */
> +		schedule_work(&ctx->workq);
> +	}
> +
>  	dev_info(&pdev->dev, "APM X-Gene SoC HW monitor driver registered\n");
>
>  	return 0;
>
>  out:
> -	kfifo_free(&ctx->async_msg_fifo);
> -out_mbox_free:
>  	if (acpi_disabled)
>  		mbox_free_channel(ctx->mbox_chan);
>  	else
>  		pcc_mbox_free_channel(ctx->mbox_chan);
> +out_mbox_free:
> +	kfifo_free(&ctx->async_msg_fifo);
>
>  	return rc;
>  }
>

--
To unsubscribe from this list: send the line "unsubscribe linux-hwmon" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
hotran Sept. 7, 2016, 5:21 a.m. UTC | #2
Hi Guenter,

Thank for your quick review !

On Tue, Sep 6, 2016 at 9:35 PM, Guenter Roeck <linux@roeck-us.net> wrote:
> On 09/06/2016 08:46 PM, Hoan Tran wrote:
>>
>> The system crashes during probing xgene-hwmon driver when temperature
>> alarm interrupt occurs before.
>> It's because
>>  - xgene_hwmon_probe() requests PCC mailbox channel which also enables
>> the mailbox interrupt.
>>  - As temperature alarm interrupt is pending, ISR runs and crashes when
>> accesses
>> into invalid resource as unmapped PCC shared memory.
>>
>> This patch fixes this issue by saving this alarm message and scheduling a
>> bottom handler after xgene_hwmon_probe() finish.
>>
>
> I am not completely happy with this fix. Main problem I have is that the
> processing associated with resp_pending doesn't happen until init_flag is
> set.
> Since the hwmon functions can be called right after
> hwmon_device_register_with_groups(),
> there is now a new race condition between that call and setting init_flag.

I think it's still good if hwmon functions are called right after
hwmon_device_register_with_groups().
The response message will be queued into FIFO and be processed later.

>
> I am also a bit concerned with init_flag and rx_pending not being atomic and
> protected.
> What happens if a second interrupt occurs right after init_flag is set but
> before
> (or while) rx_pending is evaluated ?

Yah, that's a good catch. I can re-use the "kfifo_lock" spinlock for
this atomic protection.

>
> On top of that, init_flag and thus the added complexity is (unless I am
> missing
> something) only needed if acpi is enabled. Penaltizing non-acpi code doesn't
> seem
> to be optimal.

I think, with DT, we still need this flag. In a case of temperature
alarm, the driver needs to set "temp1_critical_alarm" sysfs.
This "temp1_critical_alarm" should be created before "init_flag" = true.

Thanks
Hoan

>
> How do other drivers handle this situation ? This must be a common problem
> with all mbox users.
>
> Thanks,
> Guenter
>
>
>> Signed-off-by: Hoan Tran <hotran@apm.com>
>> Reported-by: Itaru Kitayama <itaru.kitayama@riken.jp>
>> ---
>>  drivers/hwmon/xgene-hwmon.c | 75
>> +++++++++++++++++++++++++++++++++------------
>>  1 file changed, 56 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c
>> index bc78a5d..e3b4e84 100644
>> --- a/drivers/hwmon/xgene-hwmon.c
>> +++ b/drivers/hwmon/xgene-hwmon.c
>> @@ -107,6 +107,8 @@ struct xgene_hwmon_dev {
>>         struct completion       rd_complete;
>>         int                     resp_pending;
>>         struct slimpro_resp_msg sync_msg;
>> +       bool                    init_flag;
>> +       bool                    rx_pending;
>>
>>         struct work_struct      workq;
>>         struct kfifo_rec_ptr_1  async_msg_fifo;
>> @@ -465,13 +467,35 @@ static void xgene_hwmon_evt_work(struct work_struct
>> *work)
>>         }
>>  }
>>
>> +static int xgene_hwmon_rx_ready(struct xgene_hwmon_dev *ctx, void *msg)
>> +{
>> +       if (!ctx->init_flag) {
>> +               ctx->rx_pending = true;
>> +               /* Enqueue to the FIFO */
>> +               kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
>> +                                   sizeof(struct slimpro_resp_msg),
>> +                                   &ctx->kfifo_lock);
>> +               return -EBUSY;
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>>  /*
>>   * This function is called when the SLIMpro Mailbox received a message
>>   */
>>  static void xgene_hwmon_rx_cb(struct mbox_client *cl, void *msg)
>>  {
>>         struct xgene_hwmon_dev *ctx = to_xgene_hwmon_dev(cl);
>> -       struct slimpro_resp_msg amsg;
>> +
>> +       /*
>> +        * While the driver registers with the mailbox framework, an
>> interrupt
>> +        * can be pending before the probe function completes its
>> +        * initialization. If such condition occurs, just queue up the
>> message
>> +        * as the driver is not ready for servicing the callback.
>> +        */
>> +       if (xgene_hwmon_rx_ready(ctx, msg) < 0)
>> +               return;
>>
>>         /*
>>          * Response message format:
>> @@ -500,12 +524,8 @@ static void xgene_hwmon_rx_cb(struct mbox_client *cl,
>> void *msg)
>>                 return;
>>         }
>>
>> -       amsg.msg   = ((u32 *)msg)[0];
>> -       amsg.param1 = ((u32 *)msg)[1];
>> -       amsg.param2 = ((u32 *)msg)[2];
>> -
>>         /* Enqueue to the FIFO */
>> -       kfifo_in_spinlocked(&ctx->async_msg_fifo, &amsg,
>> +       kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
>>                             sizeof(struct slimpro_resp_msg),
>> &ctx->kfifo_lock);
>>         /* Schedule the bottom handler */
>>         schedule_work(&ctx->workq);
>> @@ -520,6 +540,15 @@ static void xgene_hwmon_pcc_rx_cb(struct mbox_client
>> *cl, void *msg)
>>         struct acpi_pcct_shared_memory *generic_comm_base =
>> ctx->pcc_comm_addr;
>>         struct slimpro_resp_msg amsg;
>>
>> +       /*
>> +        * While the driver registers with the mailbox framework, an
>> interrupt
>> +        * can be pending before the probe function completes its
>> +        * initialization. If such condition occurs, just queue up the
>> message
>> +        * as the driver is not ready for servicing the callback.
>> +        */
>> +       if (xgene_hwmon_rx_ready(ctx, &amsg) < 0)
>> +               return;
>> +
>>         msg = generic_comm_base + 1;
>>         /* Check if platform sends interrupt */
>>         if (!xgene_word_tst_and_clr(&generic_comm_base->status,
>> @@ -596,6 +625,17 @@ static int xgene_hwmon_probe(struct platform_device
>> *pdev)
>>         platform_set_drvdata(pdev, ctx);
>>         cl = &ctx->mbox_client;
>>
>> +       spin_lock_init(&ctx->kfifo_lock);
>> +       mutex_init(&ctx->rd_mutex);
>> +
>> +       rc = kfifo_alloc(&ctx->async_msg_fifo,
>> +                        sizeof(struct slimpro_resp_msg) *
>> ASYNC_MSG_FIFO_SIZE,
>> +                        GFP_KERNEL);
>> +       if (rc)
>> +               goto out_mbox_free;
>> +
>> +       INIT_WORK(&ctx->workq, xgene_hwmon_evt_work);
>> +
>>         /* Request mailbox channel */
>>         cl->dev = &pdev->dev;
>>         cl->tx_done = xgene_hwmon_tx_done;
>> @@ -676,17 +716,6 @@ static int xgene_hwmon_probe(struct platform_device
>> *pdev)
>>                 ctx->usecs_lat = PCC_NUM_RETRIES * cppc_ss->latency;
>>         }
>>
>> -       spin_lock_init(&ctx->kfifo_lock);
>> -       mutex_init(&ctx->rd_mutex);
>> -
>> -       rc = kfifo_alloc(&ctx->async_msg_fifo,
>> -                        sizeof(struct slimpro_resp_msg) *
>> ASYNC_MSG_FIFO_SIZE,
>> -                        GFP_KERNEL);
>> -       if (rc)
>> -               goto out_mbox_free;
>> -
>> -       INIT_WORK(&ctx->workq, xgene_hwmon_evt_work);
>> -
>>         ctx->hwmon_dev = hwmon_device_register_with_groups(ctx->dev,
>>                                                            "apm_xgene",
>>                                                            ctx,
>> @@ -697,17 +726,25 @@ static int xgene_hwmon_probe(struct platform_device
>> *pdev)
>>                 goto out;
>>         }
>>
>> +       ctx->init_flag = true;
>> +       if (ctx->rx_pending) {
>> +               /*
>> +                * If there is a pending message, schedule the bottom
>> handler
>> +                */
>> +               schedule_work(&ctx->workq);
>> +       }
>> +
>>         dev_info(&pdev->dev, "APM X-Gene SoC HW monitor driver
>> registered\n");
>>
>>         return 0;
>>
>>  out:
>> -       kfifo_free(&ctx->async_msg_fifo);
>> -out_mbox_free:
>>         if (acpi_disabled)
>>                 mbox_free_channel(ctx->mbox_chan);
>>         else
>>                 pcc_mbox_free_channel(ctx->mbox_chan);
>> +out_mbox_free:
>> +       kfifo_free(&ctx->async_msg_fifo);
>>
>>         return rc;
>>  }
>>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-hwmon" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Guenter Roeck Sept. 7, 2016, 5:50 a.m. UTC | #3
On 09/06/2016 10:21 PM, Hoan Tran wrote:
> Hi Guenter,
>
> Thank for your quick review !
>
> On Tue, Sep 6, 2016 at 9:35 PM, Guenter Roeck <linux@roeck-us.net> wrote:
>> On 09/06/2016 08:46 PM, Hoan Tran wrote:
>>>
>>> The system crashes during probing xgene-hwmon driver when temperature
>>> alarm interrupt occurs before.
>>> It's because
>>>  - xgene_hwmon_probe() requests PCC mailbox channel which also enables
>>> the mailbox interrupt.
>>>  - As temperature alarm interrupt is pending, ISR runs and crashes when
>>> accesses
>>> into invalid resource as unmapped PCC shared memory.
>>>
>>> This patch fixes this issue by saving this alarm message and scheduling a
>>> bottom handler after xgene_hwmon_probe() finish.
>>>
>>
>> I am not completely happy with this fix. Main problem I have is that the
>> processing associated with resp_pending doesn't happen until init_flag is
>> set.
>> Since the hwmon functions can be called right after
>> hwmon_device_register_with_groups(),
>> there is now a new race condition between that call and setting init_flag.
>
> I think it's still good if hwmon functions are called right after
> hwmon_device_register_with_groups().
> The response message will be queued into FIFO and be processed later.
>
Yes, but the call to complete() won't happen in this case, or am I missing
something ?

Guenter

>>
>> I am also a bit concerned with init_flag and rx_pending not being atomic and
>> protected.
>> What happens if a second interrupt occurs right after init_flag is set but
>> before
>> (or while) rx_pending is evaluated ?
>
> Yah, that's a good catch. I can re-use the "kfifo_lock" spinlock for
> this atomic protection.
>
>>
>> On top of that, init_flag and thus the added complexity is (unless I am
>> missing
>> something) only needed if acpi is enabled. Penaltizing non-acpi code doesn't
>> seem
>> to be optimal.
>
> I think, with DT, we still need this flag. In a case of temperature
> alarm, the driver needs to set "temp1_critical_alarm" sysfs.
> This "temp1_critical_alarm" should be created before "init_flag" = true.
>
> Thanks
> Hoan
>
>>
>> How do other drivers handle this situation ? This must be a common problem
>> with all mbox users.
>>
>> Thanks,
>> Guenter
>>
>>
>>> Signed-off-by: Hoan Tran <hotran@apm.com>
>>> Reported-by: Itaru Kitayama <itaru.kitayama@riken.jp>
>>> ---
>>>  drivers/hwmon/xgene-hwmon.c | 75
>>> +++++++++++++++++++++++++++++++++------------
>>>  1 file changed, 56 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c
>>> index bc78a5d..e3b4e84 100644
>>> --- a/drivers/hwmon/xgene-hwmon.c
>>> +++ b/drivers/hwmon/xgene-hwmon.c
>>> @@ -107,6 +107,8 @@ struct xgene_hwmon_dev {
>>>         struct completion       rd_complete;
>>>         int                     resp_pending;
>>>         struct slimpro_resp_msg sync_msg;
>>> +       bool                    init_flag;
>>> +       bool                    rx_pending;
>>>
>>>         struct work_struct      workq;
>>>         struct kfifo_rec_ptr_1  async_msg_fifo;
>>> @@ -465,13 +467,35 @@ static void xgene_hwmon_evt_work(struct work_struct
>>> *work)
>>>         }
>>>  }
>>>
>>> +static int xgene_hwmon_rx_ready(struct xgene_hwmon_dev *ctx, void *msg)
>>> +{
>>> +       if (!ctx->init_flag) {
>>> +               ctx->rx_pending = true;
>>> +               /* Enqueue to the FIFO */
>>> +               kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
>>> +                                   sizeof(struct slimpro_resp_msg),
>>> +                                   &ctx->kfifo_lock);
>>> +               return -EBUSY;
>>> +       }
>>> +
>>> +       return 0;
>>> +}
>>> +
>>>  /*
>>>   * This function is called when the SLIMpro Mailbox received a message
>>>   */
>>>  static void xgene_hwmon_rx_cb(struct mbox_client *cl, void *msg)
>>>  {
>>>         struct xgene_hwmon_dev *ctx = to_xgene_hwmon_dev(cl);
>>> -       struct slimpro_resp_msg amsg;
>>> +
>>> +       /*
>>> +        * While the driver registers with the mailbox framework, an
>>> interrupt
>>> +        * can be pending before the probe function completes its
>>> +        * initialization. If such condition occurs, just queue up the
>>> message
>>> +        * as the driver is not ready for servicing the callback.
>>> +        */
>>> +       if (xgene_hwmon_rx_ready(ctx, msg) < 0)
>>> +               return;
>>>
>>>         /*
>>>          * Response message format:
>>> @@ -500,12 +524,8 @@ static void xgene_hwmon_rx_cb(struct mbox_client *cl,
>>> void *msg)
>>>                 return;
>>>         }
>>>
>>> -       amsg.msg   = ((u32 *)msg)[0];
>>> -       amsg.param1 = ((u32 *)msg)[1];
>>> -       amsg.param2 = ((u32 *)msg)[2];
>>> -
>>>         /* Enqueue to the FIFO */
>>> -       kfifo_in_spinlocked(&ctx->async_msg_fifo, &amsg,
>>> +       kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
>>>                             sizeof(struct slimpro_resp_msg),
>>> &ctx->kfifo_lock);
>>>         /* Schedule the bottom handler */
>>>         schedule_work(&ctx->workq);
>>> @@ -520,6 +540,15 @@ static void xgene_hwmon_pcc_rx_cb(struct mbox_client
>>> *cl, void *msg)
>>>         struct acpi_pcct_shared_memory *generic_comm_base =
>>> ctx->pcc_comm_addr;
>>>         struct slimpro_resp_msg amsg;
>>>
>>> +       /*
>>> +        * While the driver registers with the mailbox framework, an
>>> interrupt
>>> +        * can be pending before the probe function completes its
>>> +        * initialization. If such condition occurs, just queue up the
>>> message
>>> +        * as the driver is not ready for servicing the callback.
>>> +        */
>>> +       if (xgene_hwmon_rx_ready(ctx, &amsg) < 0)
>>> +               return;
>>> +
>>>         msg = generic_comm_base + 1;
>>>         /* Check if platform sends interrupt */
>>>         if (!xgene_word_tst_and_clr(&generic_comm_base->status,
>>> @@ -596,6 +625,17 @@ static int xgene_hwmon_probe(struct platform_device
>>> *pdev)
>>>         platform_set_drvdata(pdev, ctx);
>>>         cl = &ctx->mbox_client;
>>>
>>> +       spin_lock_init(&ctx->kfifo_lock);
>>> +       mutex_init(&ctx->rd_mutex);
>>> +
>>> +       rc = kfifo_alloc(&ctx->async_msg_fifo,
>>> +                        sizeof(struct slimpro_resp_msg) *
>>> ASYNC_MSG_FIFO_SIZE,
>>> +                        GFP_KERNEL);
>>> +       if (rc)
>>> +               goto out_mbox_free;
>>> +
>>> +       INIT_WORK(&ctx->workq, xgene_hwmon_evt_work);
>>> +
>>>         /* Request mailbox channel */
>>>         cl->dev = &pdev->dev;
>>>         cl->tx_done = xgene_hwmon_tx_done;
>>> @@ -676,17 +716,6 @@ static int xgene_hwmon_probe(struct platform_device
>>> *pdev)
>>>                 ctx->usecs_lat = PCC_NUM_RETRIES * cppc_ss->latency;
>>>         }
>>>
>>> -       spin_lock_init(&ctx->kfifo_lock);
>>> -       mutex_init(&ctx->rd_mutex);
>>> -
>>> -       rc = kfifo_alloc(&ctx->async_msg_fifo,
>>> -                        sizeof(struct slimpro_resp_msg) *
>>> ASYNC_MSG_FIFO_SIZE,
>>> -                        GFP_KERNEL);
>>> -       if (rc)
>>> -               goto out_mbox_free;
>>> -
>>> -       INIT_WORK(&ctx->workq, xgene_hwmon_evt_work);
>>> -
>>>         ctx->hwmon_dev = hwmon_device_register_with_groups(ctx->dev,
>>>                                                            "apm_xgene",
>>>                                                            ctx,
>>> @@ -697,17 +726,25 @@ static int xgene_hwmon_probe(struct platform_device
>>> *pdev)
>>>                 goto out;
>>>         }
>>>
>>> +       ctx->init_flag = true;
>>> +       if (ctx->rx_pending) {
>>> +               /*
>>> +                * If there is a pending message, schedule the bottom
>>> handler
>>> +                */
>>> +               schedule_work(&ctx->workq);
>>> +       }
>>> +
>>>         dev_info(&pdev->dev, "APM X-Gene SoC HW monitor driver
>>> registered\n");
>>>
>>>         return 0;
>>>
>>>  out:
>>> -       kfifo_free(&ctx->async_msg_fifo);
>>> -out_mbox_free:
>>>         if (acpi_disabled)
>>>                 mbox_free_channel(ctx->mbox_chan);
>>>         else
>>>                 pcc_mbox_free_channel(ctx->mbox_chan);
>>> +out_mbox_free:
>>> +       kfifo_free(&ctx->async_msg_fifo);
>>>
>>>         return rc;
>>>  }
>>>
>>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-hwmon" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
hotran Sept. 7, 2016, 6:07 a.m. UTC | #4
Hi Guenter,

On Tue, Sep 6, 2016 at 10:50 PM, Guenter Roeck <linux@roeck-us.net> wrote:
> On 09/06/2016 10:21 PM, Hoan Tran wrote:
>>
>> Hi Guenter,
>>
>> Thank for your quick review !
>>
>> On Tue, Sep 6, 2016 at 9:35 PM, Guenter Roeck <linux@roeck-us.net> wrote:
>>>
>>> On 09/06/2016 08:46 PM, Hoan Tran wrote:
>>>>
>>>>
>>>> The system crashes during probing xgene-hwmon driver when temperature
>>>> alarm interrupt occurs before.
>>>> It's because
>>>>  - xgene_hwmon_probe() requests PCC mailbox channel which also enables
>>>> the mailbox interrupt.
>>>>  - As temperature alarm interrupt is pending, ISR runs and crashes when
>>>> accesses
>>>> into invalid resource as unmapped PCC shared memory.
>>>>
>>>> This patch fixes this issue by saving this alarm message and scheduling
>>>> a
>>>> bottom handler after xgene_hwmon_probe() finish.
>>>>
>>>
>>> I am not completely happy with this fix. Main problem I have is that the
>>> processing associated with resp_pending doesn't happen until init_flag is
>>> set.
>>> Since the hwmon functions can be called right after
>>> hwmon_device_register_with_groups(),
>>> there is now a new race condition between that call and setting
>>> init_flag.
>>
>>
>> I think it's still good if hwmon functions are called right after
>> hwmon_device_register_with_groups().
>> The response message will be queued into FIFO and be processed later.
>>
> Yes, but the call to complete() won't happen in this case, or am I missing
> something ?

Yes, I think xgene_hwmon_rd() and xgene_hwmon_pcc_rd() functions have
to check "init_flag == true" before issue the read command.

Thanks
Hoan

>
> Guenter
>
>
>>>
>>> I am also a bit concerned with init_flag and rx_pending not being atomic
>>> and
>>> protected.
>>> What happens if a second interrupt occurs right after init_flag is set
>>> but
>>> before
>>> (or while) rx_pending is evaluated ?
>>
>>
>> Yah, that's a good catch. I can re-use the "kfifo_lock" spinlock for
>> this atomic protection.
>>
>>>
>>> On top of that, init_flag and thus the added complexity is (unless I am
>>> missing
>>> something) only needed if acpi is enabled. Penaltizing non-acpi code
>>> doesn't
>>> seem
>>> to be optimal.
>>
>>
>> I think, with DT, we still need this flag. In a case of temperature
>> alarm, the driver needs to set "temp1_critical_alarm" sysfs.
>> This "temp1_critical_alarm" should be created before "init_flag" = true.
>>
>> Thanks
>> Hoan
>>
>>>
>>> How do other drivers handle this situation ? This must be a common
>>> problem
>>> with all mbox users.
>>>
>>> Thanks,
>>> Guenter
>>>
>>>
>>>> Signed-off-by: Hoan Tran <hotran@apm.com>
>>>> Reported-by: Itaru Kitayama <itaru.kitayama@riken.jp>
>>>> ---
>>>>  drivers/hwmon/xgene-hwmon.c | 75
>>>> +++++++++++++++++++++++++++++++++------------
>>>>  1 file changed, 56 insertions(+), 19 deletions(-)
>>>>
>>>> diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c
>>>> index bc78a5d..e3b4e84 100644
>>>> --- a/drivers/hwmon/xgene-hwmon.c
>>>> +++ b/drivers/hwmon/xgene-hwmon.c
>>>> @@ -107,6 +107,8 @@ struct xgene_hwmon_dev {
>>>>         struct completion       rd_complete;
>>>>         int                     resp_pending;
>>>>         struct slimpro_resp_msg sync_msg;
>>>> +       bool                    init_flag;
>>>> +       bool                    rx_pending;
>>>>
>>>>         struct work_struct      workq;
>>>>         struct kfifo_rec_ptr_1  async_msg_fifo;
>>>> @@ -465,13 +467,35 @@ static void xgene_hwmon_evt_work(struct
>>>> work_struct
>>>> *work)
>>>>         }
>>>>  }
>>>>
>>>> +static int xgene_hwmon_rx_ready(struct xgene_hwmon_dev *ctx, void *msg)
>>>> +{
>>>> +       if (!ctx->init_flag) {
>>>> +               ctx->rx_pending = true;
>>>> +               /* Enqueue to the FIFO */
>>>> +               kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
>>>> +                                   sizeof(struct slimpro_resp_msg),
>>>> +                                   &ctx->kfifo_lock);
>>>> +               return -EBUSY;
>>>> +       }
>>>> +
>>>> +       return 0;
>>>> +}
>>>> +
>>>>  /*
>>>>   * This function is called when the SLIMpro Mailbox received a message
>>>>   */
>>>>  static void xgene_hwmon_rx_cb(struct mbox_client *cl, void *msg)
>>>>  {
>>>>         struct xgene_hwmon_dev *ctx = to_xgene_hwmon_dev(cl);
>>>> -       struct slimpro_resp_msg amsg;
>>>> +
>>>> +       /*
>>>> +        * While the driver registers with the mailbox framework, an
>>>> interrupt
>>>> +        * can be pending before the probe function completes its
>>>> +        * initialization. If such condition occurs, just queue up the
>>>> message
>>>> +        * as the driver is not ready for servicing the callback.
>>>> +        */
>>>> +       if (xgene_hwmon_rx_ready(ctx, msg) < 0)
>>>> +               return;
>>>>
>>>>         /*
>>>>          * Response message format:
>>>> @@ -500,12 +524,8 @@ static void xgene_hwmon_rx_cb(struct mbox_client
>>>> *cl,
>>>> void *msg)
>>>>                 return;
>>>>         }
>>>>
>>>> -       amsg.msg   = ((u32 *)msg)[0];
>>>> -       amsg.param1 = ((u32 *)msg)[1];
>>>> -       amsg.param2 = ((u32 *)msg)[2];
>>>> -
>>>>         /* Enqueue to the FIFO */
>>>> -       kfifo_in_spinlocked(&ctx->async_msg_fifo, &amsg,
>>>> +       kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
>>>>                             sizeof(struct slimpro_resp_msg),
>>>> &ctx->kfifo_lock);
>>>>         /* Schedule the bottom handler */
>>>>         schedule_work(&ctx->workq);
>>>> @@ -520,6 +540,15 @@ static void xgene_hwmon_pcc_rx_cb(struct
>>>> mbox_client
>>>> *cl, void *msg)
>>>>         struct acpi_pcct_shared_memory *generic_comm_base =
>>>> ctx->pcc_comm_addr;
>>>>         struct slimpro_resp_msg amsg;
>>>>
>>>> +       /*
>>>> +        * While the driver registers with the mailbox framework, an
>>>> interrupt
>>>> +        * can be pending before the probe function completes its
>>>> +        * initialization. If such condition occurs, just queue up the
>>>> message
>>>> +        * as the driver is not ready for servicing the callback.
>>>> +        */
>>>> +       if (xgene_hwmon_rx_ready(ctx, &amsg) < 0)
>>>> +               return;
>>>> +
>>>>         msg = generic_comm_base + 1;
>>>>         /* Check if platform sends interrupt */
>>>>         if (!xgene_word_tst_and_clr(&generic_comm_base->status,
>>>> @@ -596,6 +625,17 @@ static int xgene_hwmon_probe(struct platform_device
>>>> *pdev)
>>>>         platform_set_drvdata(pdev, ctx);
>>>>         cl = &ctx->mbox_client;
>>>>
>>>> +       spin_lock_init(&ctx->kfifo_lock);
>>>> +       mutex_init(&ctx->rd_mutex);
>>>> +
>>>> +       rc = kfifo_alloc(&ctx->async_msg_fifo,
>>>> +                        sizeof(struct slimpro_resp_msg) *
>>>> ASYNC_MSG_FIFO_SIZE,
>>>> +                        GFP_KERNEL);
>>>> +       if (rc)
>>>> +               goto out_mbox_free;
>>>> +
>>>> +       INIT_WORK(&ctx->workq, xgene_hwmon_evt_work);
>>>> +
>>>>         /* Request mailbox channel */
>>>>         cl->dev = &pdev->dev;
>>>>         cl->tx_done = xgene_hwmon_tx_done;
>>>> @@ -676,17 +716,6 @@ static int xgene_hwmon_probe(struct platform_device
>>>> *pdev)
>>>>                 ctx->usecs_lat = PCC_NUM_RETRIES * cppc_ss->latency;
>>>>         }
>>>>
>>>> -       spin_lock_init(&ctx->kfifo_lock);
>>>> -       mutex_init(&ctx->rd_mutex);
>>>> -
>>>> -       rc = kfifo_alloc(&ctx->async_msg_fifo,
>>>> -                        sizeof(struct slimpro_resp_msg) *
>>>> ASYNC_MSG_FIFO_SIZE,
>>>> -                        GFP_KERNEL);
>>>> -       if (rc)
>>>> -               goto out_mbox_free;
>>>> -
>>>> -       INIT_WORK(&ctx->workq, xgene_hwmon_evt_work);
>>>> -
>>>>         ctx->hwmon_dev = hwmon_device_register_with_groups(ctx->dev,
>>>>                                                            "apm_xgene",
>>>>                                                            ctx,
>>>> @@ -697,17 +726,25 @@ static int xgene_hwmon_probe(struct
>>>> platform_device
>>>> *pdev)
>>>>                 goto out;
>>>>         }
>>>>
>>>> +       ctx->init_flag = true;
>>>> +       if (ctx->rx_pending) {
>>>> +               /*
>>>> +                * If there is a pending message, schedule the bottom
>>>> handler
>>>> +                */
>>>> +               schedule_work(&ctx->workq);
>>>> +       }
>>>> +
>>>>         dev_info(&pdev->dev, "APM X-Gene SoC HW monitor driver
>>>> registered\n");
>>>>
>>>>         return 0;
>>>>
>>>>  out:
>>>> -       kfifo_free(&ctx->async_msg_fifo);
>>>> -out_mbox_free:
>>>>         if (acpi_disabled)
>>>>                 mbox_free_channel(ctx->mbox_chan);
>>>>         else
>>>>                 pcc_mbox_free_channel(ctx->mbox_chan);
>>>> +out_mbox_free:
>>>> +       kfifo_free(&ctx->async_msg_fifo);
>>>>
>>>>         return rc;
>>>>  }
>>>>
>>>
>>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-hwmon" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Guenter Roeck Sept. 7, 2016, 6:39 a.m. UTC | #5
On 09/06/2016 11:07 PM, Hoan Tran wrote:
> Hi Guenter,
>
> On Tue, Sep 6, 2016 at 10:50 PM, Guenter Roeck <linux@roeck-us.net> wrote:
>> On 09/06/2016 10:21 PM, Hoan Tran wrote:
>>>
>>> Hi Guenter,
>>>
>>> Thank for your quick review !
>>>
>>> On Tue, Sep 6, 2016 at 9:35 PM, Guenter Roeck <linux@roeck-us.net> wrote:
>>>>
>>>> On 09/06/2016 08:46 PM, Hoan Tran wrote:
>>>>>
>>>>>
>>>>> The system crashes during probing xgene-hwmon driver when temperature
>>>>> alarm interrupt occurs before.
>>>>> It's because
>>>>>  - xgene_hwmon_probe() requests PCC mailbox channel which also enables
>>>>> the mailbox interrupt.
>>>>>  - As temperature alarm interrupt is pending, ISR runs and crashes when
>>>>> accesses
>>>>> into invalid resource as unmapped PCC shared memory.
>>>>>
>>>>> This patch fixes this issue by saving this alarm message and scheduling
>>>>> a
>>>>> bottom handler after xgene_hwmon_probe() finish.
>>>>>
>>>>
>>>> I am not completely happy with this fix. Main problem I have is that the
>>>> processing associated with resp_pending doesn't happen until init_flag is
>>>> set.
>>>> Since the hwmon functions can be called right after
>>>> hwmon_device_register_with_groups(),
>>>> there is now a new race condition between that call and setting
>>>> init_flag.
>>>
>>>
>>> I think it's still good if hwmon functions are called right after
>>> hwmon_device_register_with_groups().
>>> The response message will be queued into FIFO and be processed later.
>>>
>> Yes, but the call to complete() won't happen in this case, or am I missing
>> something ?
>
> Yes, I think xgene_hwmon_rd() and xgene_hwmon_pcc_rd() functions have
> to check "init_flag == true" before issue the read command.
>

This is getting more and more messy :-(

> Thanks
> Hoan
>
>>
>> Guenter
>>
>>
>>>>
>>>> I am also a bit concerned with init_flag and rx_pending not being atomic
>>>> and
>>>> protected.
>>>> What happens if a second interrupt occurs right after init_flag is set
>>>> but
>>>> before
>>>> (or while) rx_pending is evaluated ?
>>>
>>>
>>> Yah, that's a good catch. I can re-use the "kfifo_lock" spinlock for
>>> this atomic protection.
>>>
>>>>
>>>> On top of that, init_flag and thus the added complexity is (unless I am
>>>> missing
>>>> something) only needed if acpi is enabled. Penaltizing non-acpi code
>>>> doesn't
>>>> seem
>>>> to be optimal.
>>>
>>>
>>> I think, with DT, we still need this flag. In a case of temperature
>>> alarm, the driver needs to set "temp1_critical_alarm" sysfs.
>>> This "temp1_critical_alarm" should be created before "init_flag" = true.
>>>

I don't know. After all, any such alarm would be lost if it occurred
before the driver was loaded, no ? Those mailboxes should really have
a means to be informed that the initiator is ready to handle
interrupts/callbacks.

Guenter

>>> Thanks
>>> Hoan
>>>
>>>>
>>>> How do other drivers handle this situation ? This must be a common
>>>> problem
>>>> with all mbox users.
>>>>
>>>> Thanks,
>>>> Guenter
>>>>
>>>>
>>>>> Signed-off-by: Hoan Tran <hotran@apm.com>
>>>>> Reported-by: Itaru Kitayama <itaru.kitayama@riken.jp>
>>>>> ---
>>>>>  drivers/hwmon/xgene-hwmon.c | 75
>>>>> +++++++++++++++++++++++++++++++++------------
>>>>>  1 file changed, 56 insertions(+), 19 deletions(-)
>>>>>
>>>>> diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c
>>>>> index bc78a5d..e3b4e84 100644
>>>>> --- a/drivers/hwmon/xgene-hwmon.c
>>>>> +++ b/drivers/hwmon/xgene-hwmon.c
>>>>> @@ -107,6 +107,8 @@ struct xgene_hwmon_dev {
>>>>>         struct completion       rd_complete;
>>>>>         int                     resp_pending;
>>>>>         struct slimpro_resp_msg sync_msg;
>>>>> +       bool                    init_flag;
>>>>> +       bool                    rx_pending;
>>>>>
>>>>>         struct work_struct      workq;
>>>>>         struct kfifo_rec_ptr_1  async_msg_fifo;
>>>>> @@ -465,13 +467,35 @@ static void xgene_hwmon_evt_work(struct
>>>>> work_struct
>>>>> *work)
>>>>>         }
>>>>>  }
>>>>>
>>>>> +static int xgene_hwmon_rx_ready(struct xgene_hwmon_dev *ctx, void *msg)
>>>>> +{
>>>>> +       if (!ctx->init_flag) {
>>>>> +               ctx->rx_pending = true;
>>>>> +               /* Enqueue to the FIFO */
>>>>> +               kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
>>>>> +                                   sizeof(struct slimpro_resp_msg),
>>>>> +                                   &ctx->kfifo_lock);
>>>>> +               return -EBUSY;
>>>>> +       }
>>>>> +
>>>>> +       return 0;
>>>>> +}
>>>>> +
>>>>>  /*
>>>>>   * This function is called when the SLIMpro Mailbox received a message
>>>>>   */
>>>>>  static void xgene_hwmon_rx_cb(struct mbox_client *cl, void *msg)
>>>>>  {
>>>>>         struct xgene_hwmon_dev *ctx = to_xgene_hwmon_dev(cl);
>>>>> -       struct slimpro_resp_msg amsg;
>>>>> +
>>>>> +       /*
>>>>> +        * While the driver registers with the mailbox framework, an
>>>>> interrupt
>>>>> +        * can be pending before the probe function completes its
>>>>> +        * initialization. If such condition occurs, just queue up the
>>>>> message
>>>>> +        * as the driver is not ready for servicing the callback.
>>>>> +        */
>>>>> +       if (xgene_hwmon_rx_ready(ctx, msg) < 0)
>>>>> +               return;
>>>>>
>>>>>         /*
>>>>>          * Response message format:
>>>>> @@ -500,12 +524,8 @@ static void xgene_hwmon_rx_cb(struct mbox_client
>>>>> *cl,
>>>>> void *msg)
>>>>>                 return;
>>>>>         }
>>>>>
>>>>> -       amsg.msg   = ((u32 *)msg)[0];
>>>>> -       amsg.param1 = ((u32 *)msg)[1];
>>>>> -       amsg.param2 = ((u32 *)msg)[2];
>>>>> -
>>>>>         /* Enqueue to the FIFO */
>>>>> -       kfifo_in_spinlocked(&ctx->async_msg_fifo, &amsg,
>>>>> +       kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
>>>>>                             sizeof(struct slimpro_resp_msg),
>>>>> &ctx->kfifo_lock);
>>>>>         /* Schedule the bottom handler */
>>>>>         schedule_work(&ctx->workq);
>>>>> @@ -520,6 +540,15 @@ static void xgene_hwmon_pcc_rx_cb(struct
>>>>> mbox_client
>>>>> *cl, void *msg)
>>>>>         struct acpi_pcct_shared_memory *generic_comm_base =
>>>>> ctx->pcc_comm_addr;
>>>>>         struct slimpro_resp_msg amsg;
>>>>>
>>>>> +       /*
>>>>> +        * While the driver registers with the mailbox framework, an
>>>>> interrupt
>>>>> +        * can be pending before the probe function completes its
>>>>> +        * initialization. If such condition occurs, just queue up the
>>>>> message
>>>>> +        * as the driver is not ready for servicing the callback.
>>>>> +        */
>>>>> +       if (xgene_hwmon_rx_ready(ctx, &amsg) < 0)
>>>>> +               return;
>>>>> +
>>>>>         msg = generic_comm_base + 1;
>>>>>         /* Check if platform sends interrupt */
>>>>>         if (!xgene_word_tst_and_clr(&generic_comm_base->status,
>>>>> @@ -596,6 +625,17 @@ static int xgene_hwmon_probe(struct platform_device
>>>>> *pdev)
>>>>>         platform_set_drvdata(pdev, ctx);
>>>>>         cl = &ctx->mbox_client;
>>>>>
>>>>> +       spin_lock_init(&ctx->kfifo_lock);
>>>>> +       mutex_init(&ctx->rd_mutex);
>>>>> +
>>>>> +       rc = kfifo_alloc(&ctx->async_msg_fifo,
>>>>> +                        sizeof(struct slimpro_resp_msg) *
>>>>> ASYNC_MSG_FIFO_SIZE,
>>>>> +                        GFP_KERNEL);
>>>>> +       if (rc)
>>>>> +               goto out_mbox_free;
>>>>> +
>>>>> +       INIT_WORK(&ctx->workq, xgene_hwmon_evt_work);
>>>>> +
>>>>>         /* Request mailbox channel */
>>>>>         cl->dev = &pdev->dev;
>>>>>         cl->tx_done = xgene_hwmon_tx_done;
>>>>> @@ -676,17 +716,6 @@ static int xgene_hwmon_probe(struct platform_device
>>>>> *pdev)
>>>>>                 ctx->usecs_lat = PCC_NUM_RETRIES * cppc_ss->latency;
>>>>>         }
>>>>>
>>>>> -       spin_lock_init(&ctx->kfifo_lock);
>>>>> -       mutex_init(&ctx->rd_mutex);
>>>>> -
>>>>> -       rc = kfifo_alloc(&ctx->async_msg_fifo,
>>>>> -                        sizeof(struct slimpro_resp_msg) *
>>>>> ASYNC_MSG_FIFO_SIZE,
>>>>> -                        GFP_KERNEL);
>>>>> -       if (rc)
>>>>> -               goto out_mbox_free;
>>>>> -
>>>>> -       INIT_WORK(&ctx->workq, xgene_hwmon_evt_work);
>>>>> -
>>>>>         ctx->hwmon_dev = hwmon_device_register_with_groups(ctx->dev,
>>>>>                                                            "apm_xgene",
>>>>>                                                            ctx,
>>>>> @@ -697,17 +726,25 @@ static int xgene_hwmon_probe(struct
>>>>> platform_device
>>>>> *pdev)
>>>>>                 goto out;
>>>>>         }
>>>>>
>>>>> +       ctx->init_flag = true;
>>>>> +       if (ctx->rx_pending) {
>>>>> +               /*
>>>>> +                * If there is a pending message, schedule the bottom
>>>>> handler
>>>>> +                */
>>>>> +               schedule_work(&ctx->workq);
>>>>> +       }
>>>>> +
>>>>>         dev_info(&pdev->dev, "APM X-Gene SoC HW monitor driver
>>>>> registered\n");
>>>>>
>>>>>         return 0;
>>>>>
>>>>>  out:
>>>>> -       kfifo_free(&ctx->async_msg_fifo);
>>>>> -out_mbox_free:
>>>>>         if (acpi_disabled)
>>>>>                 mbox_free_channel(ctx->mbox_chan);
>>>>>         else
>>>>>                 pcc_mbox_free_channel(ctx->mbox_chan);
>>>>> +out_mbox_free:
>>>>> +       kfifo_free(&ctx->async_msg_fifo);
>>>>>
>>>>>         return rc;
>>>>>  }
>>>>>
>>>>
>>>
>>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-hwmon" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
hotran Sept. 7, 2016, 6:55 p.m. UTC | #6
Hi Guenter,

On Tue, Sep 6, 2016 at 11:39 PM, Guenter Roeck <linux@roeck-us.net> wrote:
> On 09/06/2016 11:07 PM, Hoan Tran wrote:
>>
>> Hi Guenter,
>>
>> On Tue, Sep 6, 2016 at 10:50 PM, Guenter Roeck <linux@roeck-us.net> wrote:
>>>
>>> On 09/06/2016 10:21 PM, Hoan Tran wrote:
>>>>
>>>>
>>>> Hi Guenter,
>>>>
>>>> Thank for your quick review !
>>>>
>>>> On Tue, Sep 6, 2016 at 9:35 PM, Guenter Roeck <linux@roeck-us.net>
>>>> wrote:
>>>>>
>>>>>
>>>>> On 09/06/2016 08:46 PM, Hoan Tran wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>> The system crashes during probing xgene-hwmon driver when temperature
>>>>>> alarm interrupt occurs before.
>>>>>> It's because
>>>>>>  - xgene_hwmon_probe() requests PCC mailbox channel which also enables
>>>>>> the mailbox interrupt.
>>>>>>  - As temperature alarm interrupt is pending, ISR runs and crashes
>>>>>> when
>>>>>> accesses
>>>>>> into invalid resource as unmapped PCC shared memory.
>>>>>>
>>>>>> This patch fixes this issue by saving this alarm message and
>>>>>> scheduling
>>>>>> a
>>>>>> bottom handler after xgene_hwmon_probe() finish.
>>>>>>
>>>>>
>>>>> I am not completely happy with this fix. Main problem I have is that
>>>>> the
>>>>> processing associated with resp_pending doesn't happen until init_flag
>>>>> is
>>>>> set.
>>>>> Since the hwmon functions can be called right after
>>>>> hwmon_device_register_with_groups(),
>>>>> there is now a new race condition between that call and setting
>>>>> init_flag.
>>>>
>>>>
>>>>
>>>> I think it's still good if hwmon functions are called right after
>>>> hwmon_device_register_with_groups().
>>>> The response message will be queued into FIFO and be processed later.
>>>>
>>> Yes, but the call to complete() won't happen in this case, or am I
>>> missing
>>> something ?
>>
>>
>> Yes, I think xgene_hwmon_rd() and xgene_hwmon_pcc_rd() functions have
>> to check "init_flag == true" before issue the read command.
>>
>
> This is getting more and more messy :-(
>
>> Thanks
>> Hoan
>>
>>>
>>> Guenter
>>>
>>>
>>>>>
>>>>> I am also a bit concerned with init_flag and rx_pending not being
>>>>> atomic
>>>>> and
>>>>> protected.
>>>>> What happens if a second interrupt occurs right after init_flag is set
>>>>> but
>>>>> before
>>>>> (or while) rx_pending is evaluated ?
>>>>
>>>>
>>>>
>>>> Yah, that's a good catch. I can re-use the "kfifo_lock" spinlock for
>>>> this atomic protection.
>>>>
>>>>>
>>>>> On top of that, init_flag and thus the added complexity is (unless I am
>>>>> missing
>>>>> something) only needed if acpi is enabled. Penaltizing non-acpi code
>>>>> doesn't
>>>>> seem
>>>>> to be optimal.
>>>>
>>>>
>>>>
>>>> I think, with DT, we still need this flag. In a case of temperature
>>>> alarm, the driver needs to set "temp1_critical_alarm" sysfs.
>>>> This "temp1_critical_alarm" should be created before "init_flag" = true.
>>>>
>
> I don't know. After all, any such alarm would be lost if it occurred
> before the driver was loaded, no ? Those mailboxes should really have
> a means to be informed that the initiator is ready to handle
> interrupts/callbacks.

We don't want to miss an alarm message. User should be warned.

As
ctx->hwmon_dev = hwmon_device_register_with_groups(ctx->dev,

"apm_xgene",
                                                           ctx,
                                                           xgene_hwmon_groups);
How about, callback functions check the ctx->hwmon_dev validation. If
not, they just save msg into FIFO.
Beside of that, as hwmon functions can be called before ctx->hwmon_dev
is assigned. Callback functions check if there is a mailbox response
pending before saving msg into FIFO as below

static int xgene_hwmon_rx_ready(struct xgene_hwmon_dev *ctx, void *msg)
{
        if (IS_ERR(ctx->hwmon_dev) && !ctx->resp_pending) {
                /* Enqueue to the FIFO */
                kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
                                    sizeof(struct slimpro_resp_msg),
                                    &ctx->kfifo_lock);

                return -EBUSY;
        }

        return 0;
}


At the end of probe function, driver always schedules a bottom handler
to handle FIFO msg.

Then we can remove the init_flag and rx_pending.

Thanks
Hoan


>
> Guenter
>
>
>>>> Thanks
>>>> Hoan
>>>>
>>>>>
>>>>> How do other drivers handle this situation ? This must be a common
>>>>> problem
>>>>> with all mbox users.
>>>>>
>>>>> Thanks,
>>>>> Guenter
>>>>>
>>>>>
>>>>>> Signed-off-by: Hoan Tran <hotran@apm.com>
>>>>>> Reported-by: Itaru Kitayama <itaru.kitayama@riken.jp>
>>>>>> ---
>>>>>>  drivers/hwmon/xgene-hwmon.c | 75
>>>>>> +++++++++++++++++++++++++++++++++------------
>>>>>>  1 file changed, 56 insertions(+), 19 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c
>>>>>> index bc78a5d..e3b4e84 100644
>>>>>> --- a/drivers/hwmon/xgene-hwmon.c
>>>>>> +++ b/drivers/hwmon/xgene-hwmon.c
>>>>>> @@ -107,6 +107,8 @@ struct xgene_hwmon_dev {
>>>>>>         struct completion       rd_complete;
>>>>>>         int                     resp_pending;
>>>>>>         struct slimpro_resp_msg sync_msg;
>>>>>> +       bool                    init_flag;
>>>>>> +       bool                    rx_pending;
>>>>>>
>>>>>>         struct work_struct      workq;
>>>>>>         struct kfifo_rec_ptr_1  async_msg_fifo;
>>>>>> @@ -465,13 +467,35 @@ static void xgene_hwmon_evt_work(struct
>>>>>> work_struct
>>>>>> *work)
>>>>>>         }
>>>>>>  }
>>>>>>
>>>>>> +static int xgene_hwmon_rx_ready(struct xgene_hwmon_dev *ctx, void
>>>>>> *msg)
>>>>>> +{
>>>>>> +       if (!ctx->init_flag) {
>>>>>> +               ctx->rx_pending = true;
>>>>>> +               /* Enqueue to the FIFO */
>>>>>> +               kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
>>>>>> +                                   sizeof(struct slimpro_resp_msg),
>>>>>> +                                   &ctx->kfifo_lock);
>>>>>> +               return -EBUSY;
>>>>>> +       }
>>>>>> +
>>>>>> +       return 0;
>>>>>> +}
>>>>>> +
>>>>>>  /*
>>>>>>   * This function is called when the SLIMpro Mailbox received a
>>>>>> message
>>>>>>   */
>>>>>>  static void xgene_hwmon_rx_cb(struct mbox_client *cl, void *msg)
>>>>>>  {
>>>>>>         struct xgene_hwmon_dev *ctx = to_xgene_hwmon_dev(cl);
>>>>>> -       struct slimpro_resp_msg amsg;
>>>>>> +
>>>>>> +       /*
>>>>>> +        * While the driver registers with the mailbox framework, an
>>>>>> interrupt
>>>>>> +        * can be pending before the probe function completes its
>>>>>> +        * initialization. If such condition occurs, just queue up the
>>>>>> message
>>>>>> +        * as the driver is not ready for servicing the callback.
>>>>>> +        */
>>>>>> +       if (xgene_hwmon_rx_ready(ctx, msg) < 0)
>>>>>> +               return;
>>>>>>
>>>>>>         /*
>>>>>>          * Response message format:
>>>>>> @@ -500,12 +524,8 @@ static void xgene_hwmon_rx_cb(struct mbox_client
>>>>>> *cl,
>>>>>> void *msg)
>>>>>>                 return;
>>>>>>         }
>>>>>>
>>>>>> -       amsg.msg   = ((u32 *)msg)[0];
>>>>>> -       amsg.param1 = ((u32 *)msg)[1];
>>>>>> -       amsg.param2 = ((u32 *)msg)[2];
>>>>>> -
>>>>>>         /* Enqueue to the FIFO */
>>>>>> -       kfifo_in_spinlocked(&ctx->async_msg_fifo, &amsg,
>>>>>> +       kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
>>>>>>                             sizeof(struct slimpro_resp_msg),
>>>>>> &ctx->kfifo_lock);
>>>>>>         /* Schedule the bottom handler */
>>>>>>         schedule_work(&ctx->workq);
>>>>>> @@ -520,6 +540,15 @@ static void xgene_hwmon_pcc_rx_cb(struct
>>>>>> mbox_client
>>>>>> *cl, void *msg)
>>>>>>         struct acpi_pcct_shared_memory *generic_comm_base =
>>>>>> ctx->pcc_comm_addr;
>>>>>>         struct slimpro_resp_msg amsg;
>>>>>>
>>>>>> +       /*
>>>>>> +        * While the driver registers with the mailbox framework, an
>>>>>> interrupt
>>>>>> +        * can be pending before the probe function completes its
>>>>>> +        * initialization. If such condition occurs, just queue up the
>>>>>> message
>>>>>> +        * as the driver is not ready for servicing the callback.
>>>>>> +        */
>>>>>> +       if (xgene_hwmon_rx_ready(ctx, &amsg) < 0)
>>>>>> +               return;
>>>>>> +
>>>>>>         msg = generic_comm_base + 1;
>>>>>>         /* Check if platform sends interrupt */
>>>>>>         if (!xgene_word_tst_and_clr(&generic_comm_base->status,
>>>>>> @@ -596,6 +625,17 @@ static int xgene_hwmon_probe(struct
>>>>>> platform_device
>>>>>> *pdev)
>>>>>>         platform_set_drvdata(pdev, ctx);
>>>>>>         cl = &ctx->mbox_client;
>>>>>>
>>>>>> +       spin_lock_init(&ctx->kfifo_lock);
>>>>>> +       mutex_init(&ctx->rd_mutex);
>>>>>> +
>>>>>> +       rc = kfifo_alloc(&ctx->async_msg_fifo,
>>>>>> +                        sizeof(struct slimpro_resp_msg) *
>>>>>> ASYNC_MSG_FIFO_SIZE,
>>>>>> +                        GFP_KERNEL);
>>>>>> +       if (rc)
>>>>>> +               goto out_mbox_free;
>>>>>> +
>>>>>> +       INIT_WORK(&ctx->workq, xgene_hwmon_evt_work);
>>>>>> +
>>>>>>         /* Request mailbox channel */
>>>>>>         cl->dev = &pdev->dev;
>>>>>>         cl->tx_done = xgene_hwmon_tx_done;
>>>>>> @@ -676,17 +716,6 @@ static int xgene_hwmon_probe(struct
>>>>>> platform_device
>>>>>> *pdev)
>>>>>>                 ctx->usecs_lat = PCC_NUM_RETRIES * cppc_ss->latency;
>>>>>>         }
>>>>>>
>>>>>> -       spin_lock_init(&ctx->kfifo_lock);
>>>>>> -       mutex_init(&ctx->rd_mutex);
>>>>>> -
>>>>>> -       rc = kfifo_alloc(&ctx->async_msg_fifo,
>>>>>> -                        sizeof(struct slimpro_resp_msg) *
>>>>>> ASYNC_MSG_FIFO_SIZE,
>>>>>> -                        GFP_KERNEL);
>>>>>> -       if (rc)
>>>>>> -               goto out_mbox_free;
>>>>>> -
>>>>>> -       INIT_WORK(&ctx->workq, xgene_hwmon_evt_work);
>>>>>> -
>>>>>>         ctx->hwmon_dev = hwmon_device_register_with_groups(ctx->dev,
>>>>>>
>>>>>> "apm_xgene",
>>>>>>                                                            ctx,
>>>>>> @@ -697,17 +726,25 @@ static int xgene_hwmon_probe(struct
>>>>>> platform_device
>>>>>> *pdev)
>>>>>>                 goto out;
>>>>>>         }
>>>>>>
>>>>>> +       ctx->init_flag = true;
>>>>>> +       if (ctx->rx_pending) {
>>>>>> +               /*
>>>>>> +                * If there is a pending message, schedule the bottom
>>>>>> handler
>>>>>> +                */
>>>>>> +               schedule_work(&ctx->workq);
>>>>>> +       }
>>>>>> +
>>>>>>         dev_info(&pdev->dev, "APM X-Gene SoC HW monitor driver
>>>>>> registered\n");
>>>>>>
>>>>>>         return 0;
>>>>>>
>>>>>>  out:
>>>>>> -       kfifo_free(&ctx->async_msg_fifo);
>>>>>> -out_mbox_free:
>>>>>>         if (acpi_disabled)
>>>>>>                 mbox_free_channel(ctx->mbox_chan);
>>>>>>         else
>>>>>>                 pcc_mbox_free_channel(ctx->mbox_chan);
>>>>>> +out_mbox_free:
>>>>>> +       kfifo_free(&ctx->async_msg_fifo);
>>>>>>
>>>>>>         return rc;
>>>>>>  }
>>>>>>
>>>>>
>>>>
>>>
>>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-hwmon" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Guenter Roeck Sept. 7, 2016, 7:24 p.m. UTC | #7
On Wed, Sep 07, 2016 at 11:55:06AM -0700, Hoan Tran wrote:
> Hi Guenter,
> 
> On Tue, Sep 6, 2016 at 11:39 PM, Guenter Roeck <linux@roeck-us.net> wrote:
> > On 09/06/2016 11:07 PM, Hoan Tran wrote:
> >>
> >> Hi Guenter,
> >>
> >> On Tue, Sep 6, 2016 at 10:50 PM, Guenter Roeck <linux@roeck-us.net> wrote:
> >>>
> >>> On 09/06/2016 10:21 PM, Hoan Tran wrote:
> >>>>
> >>>>
> >>>> Hi Guenter,
> >>>>
> >>>> Thank for your quick review !
> >>>>
> >>>> On Tue, Sep 6, 2016 at 9:35 PM, Guenter Roeck <linux@roeck-us.net>
> >>>> wrote:
> >>>>>
> >>>>>
> >>>>> On 09/06/2016 08:46 PM, Hoan Tran wrote:
> >>>>>>
> >>>>>>
> >>>>>>
> >>>>>> The system crashes during probing xgene-hwmon driver when temperature
> >>>>>> alarm interrupt occurs before.
> >>>>>> It's because
> >>>>>>  - xgene_hwmon_probe() requests PCC mailbox channel which also enables
> >>>>>> the mailbox interrupt.
> >>>>>>  - As temperature alarm interrupt is pending, ISR runs and crashes
> >>>>>> when
> >>>>>> accesses
> >>>>>> into invalid resource as unmapped PCC shared memory.
> >>>>>>
> >>>>>> This patch fixes this issue by saving this alarm message and
> >>>>>> scheduling
> >>>>>> a
> >>>>>> bottom handler after xgene_hwmon_probe() finish.
> >>>>>>
> >>>>>
> >>>>> I am not completely happy with this fix. Main problem I have is that
> >>>>> the
> >>>>> processing associated with resp_pending doesn't happen until init_flag
> >>>>> is
> >>>>> set.
> >>>>> Since the hwmon functions can be called right after
> >>>>> hwmon_device_register_with_groups(),
> >>>>> there is now a new race condition between that call and setting
> >>>>> init_flag.
> >>>>
> >>>>
> >>>>
> >>>> I think it's still good if hwmon functions are called right after
> >>>> hwmon_device_register_with_groups().
> >>>> The response message will be queued into FIFO and be processed later.
> >>>>
> >>> Yes, but the call to complete() won't happen in this case, or am I
> >>> missing
> >>> something ?
> >>
> >>
> >> Yes, I think xgene_hwmon_rd() and xgene_hwmon_pcc_rd() functions have
> >> to check "init_flag == true" before issue the read command.
> >>
> >
> > This is getting more and more messy :-(
> >
> >> Thanks
> >> Hoan
> >>
> >>>
> >>> Guenter
> >>>
> >>>
> >>>>>
> >>>>> I am also a bit concerned with init_flag and rx_pending not being
> >>>>> atomic
> >>>>> and
> >>>>> protected.
> >>>>> What happens if a second interrupt occurs right after init_flag is set
> >>>>> but
> >>>>> before
> >>>>> (or while) rx_pending is evaluated ?
> >>>>
> >>>>
> >>>>
> >>>> Yah, that's a good catch. I can re-use the "kfifo_lock" spinlock for
> >>>> this atomic protection.
> >>>>
> >>>>>
> >>>>> On top of that, init_flag and thus the added complexity is (unless I am
> >>>>> missing
> >>>>> something) only needed if acpi is enabled. Penaltizing non-acpi code
> >>>>> doesn't
> >>>>> seem
> >>>>> to be optimal.
> >>>>
> >>>>
> >>>>
> >>>> I think, with DT, we still need this flag. In a case of temperature
> >>>> alarm, the driver needs to set "temp1_critical_alarm" sysfs.
> >>>> This "temp1_critical_alarm" should be created before "init_flag" = true.
> >>>>
> >
> > I don't know. After all, any such alarm would be lost if it occurred
> > before the driver was loaded, no ? Those mailboxes should really have
> > a means to be informed that the initiator is ready to handle
> > interrupts/callbacks.
> 
> We don't want to miss an alarm message. User should be warned.
> 
> As
> ctx->hwmon_dev = hwmon_device_register_with_groups(ctx->dev,
> 
> "apm_xgene",
>                                                            ctx,
>                                                            xgene_hwmon_groups);
> How about, callback functions check the ctx->hwmon_dev validation. If
> not, they just save msg into FIFO.
> Beside of that, as hwmon functions can be called before ctx->hwmon_dev
> is assigned. Callback functions check if there is a mailbox response
> pending before saving msg into FIFO as below
> 
> static int xgene_hwmon_rx_ready(struct xgene_hwmon_dev *ctx, void *msg)
> {
>         if (IS_ERR(ctx->hwmon_dev) && !ctx->resp_pending) {

Probably IS_ERR_OR_ZERO() ?

Guenter

>                 /* Enqueue to the FIFO */
>                 kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
>                                     sizeof(struct slimpro_resp_msg),
>                                     &ctx->kfifo_lock);
> 
>                 return -EBUSY;

Need to find something else. EBUSY isn't correct.

>         }
> 
>         return 0;
> }
> 
> 
> At the end of probe function, driver always schedules a bottom handler
> to handle FIFO msg.
> 
> Then we can remove the init_flag and rx_pending.
> 

At least better than before, though I think it is still racy.
It might be worthwhile checking by adding a large msleep()
after hwmon registration and before ctx->hwmon_dev is written,
and have a user space program access sysfs attributes immediately
after they have been created.

Thanks,
Guenter
--
To unsubscribe from this list: send the line "unsubscribe linux-hwmon" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
hotran Sept. 7, 2016, 9:06 p.m. UTC | #8
Hi Guenter,

On Wed, Sep 7, 2016 at 12:24 PM, Guenter Roeck <linux@roeck-us.net> wrote:
> On Wed, Sep 07, 2016 at 11:55:06AM -0700, Hoan Tran wrote:
>> Hi Guenter,
>>
>> On Tue, Sep 6, 2016 at 11:39 PM, Guenter Roeck <linux@roeck-us.net> wrote:
>> > On 09/06/2016 11:07 PM, Hoan Tran wrote:
>> >>
>> >> Hi Guenter,
>> >>
>> >> On Tue, Sep 6, 2016 at 10:50 PM, Guenter Roeck <linux@roeck-us.net> wrote:
>> >>>
>> >>> On 09/06/2016 10:21 PM, Hoan Tran wrote:
>> >>>>
>> >>>>
>> >>>> Hi Guenter,
>> >>>>
>> >>>> Thank for your quick review !
>> >>>>
>> >>>> On Tue, Sep 6, 2016 at 9:35 PM, Guenter Roeck <linux@roeck-us.net>
>> >>>> wrote:
>> >>>>>
>> >>>>>
>> >>>>> On 09/06/2016 08:46 PM, Hoan Tran wrote:
>> >>>>>>
>> >>>>>>
>> >>>>>>
>> >>>>>> The system crashes during probing xgene-hwmon driver when temperature
>> >>>>>> alarm interrupt occurs before.
>> >>>>>> It's because
>> >>>>>>  - xgene_hwmon_probe() requests PCC mailbox channel which also enables
>> >>>>>> the mailbox interrupt.
>> >>>>>>  - As temperature alarm interrupt is pending, ISR runs and crashes
>> >>>>>> when
>> >>>>>> accesses
>> >>>>>> into invalid resource as unmapped PCC shared memory.
>> >>>>>>
>> >>>>>> This patch fixes this issue by saving this alarm message and
>> >>>>>> scheduling
>> >>>>>> a
>> >>>>>> bottom handler after xgene_hwmon_probe() finish.
>> >>>>>>
>> >>>>>
>> >>>>> I am not completely happy with this fix. Main problem I have is that
>> >>>>> the
>> >>>>> processing associated with resp_pending doesn't happen until init_flag
>> >>>>> is
>> >>>>> set.
>> >>>>> Since the hwmon functions can be called right after
>> >>>>> hwmon_device_register_with_groups(),
>> >>>>> there is now a new race condition between that call and setting
>> >>>>> init_flag.
>> >>>>
>> >>>>
>> >>>>
>> >>>> I think it's still good if hwmon functions are called right after
>> >>>> hwmon_device_register_with_groups().
>> >>>> The response message will be queued into FIFO and be processed later.
>> >>>>
>> >>> Yes, but the call to complete() won't happen in this case, or am I
>> >>> missing
>> >>> something ?
>> >>
>> >>
>> >> Yes, I think xgene_hwmon_rd() and xgene_hwmon_pcc_rd() functions have
>> >> to check "init_flag == true" before issue the read command.
>> >>
>> >
>> > This is getting more and more messy :-(
>> >
>> >> Thanks
>> >> Hoan
>> >>
>> >>>
>> >>> Guenter
>> >>>
>> >>>
>> >>>>>
>> >>>>> I am also a bit concerned with init_flag and rx_pending not being
>> >>>>> atomic
>> >>>>> and
>> >>>>> protected.
>> >>>>> What happens if a second interrupt occurs right after init_flag is set
>> >>>>> but
>> >>>>> before
>> >>>>> (or while) rx_pending is evaluated ?
>> >>>>
>> >>>>
>> >>>>
>> >>>> Yah, that's a good catch. I can re-use the "kfifo_lock" spinlock for
>> >>>> this atomic protection.
>> >>>>
>> >>>>>
>> >>>>> On top of that, init_flag and thus the added complexity is (unless I am
>> >>>>> missing
>> >>>>> something) only needed if acpi is enabled. Penaltizing non-acpi code
>> >>>>> doesn't
>> >>>>> seem
>> >>>>> to be optimal.
>> >>>>
>> >>>>
>> >>>>
>> >>>> I think, with DT, we still need this flag. In a case of temperature
>> >>>> alarm, the driver needs to set "temp1_critical_alarm" sysfs.
>> >>>> This "temp1_critical_alarm" should be created before "init_flag" = true.
>> >>>>
>> >
>> > I don't know. After all, any such alarm would be lost if it occurred
>> > before the driver was loaded, no ? Those mailboxes should really have
>> > a means to be informed that the initiator is ready to handle
>> > interrupts/callbacks.
>>
>> We don't want to miss an alarm message. User should be warned.
>>
>> As
>> ctx->hwmon_dev = hwmon_device_register_with_groups(ctx->dev,
>>
>> "apm_xgene",
>>                                                            ctx,
>>                                                            xgene_hwmon_groups);
>> How about, callback functions check the ctx->hwmon_dev validation. If
>> not, they just save msg into FIFO.
>> Beside of that, as hwmon functions can be called before ctx->hwmon_dev
>> is assigned. Callback functions check if there is a mailbox response
>> pending before saving msg into FIFO as below
>>
>> static int xgene_hwmon_rx_ready(struct xgene_hwmon_dev *ctx, void *msg)
>> {
>>         if (IS_ERR(ctx->hwmon_dev) && !ctx->resp_pending) {
>
> Probably IS_ERR_OR_ZERO() ?

Yes, I'll use IS_ERR_OR_NULL().

>
> Guenter
>
>>                 /* Enqueue to the FIFO */
>>                 kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
>>                                     sizeof(struct slimpro_resp_msg),
>>                                     &ctx->kfifo_lock);
>>
>>                 return -EBUSY;
>
> Need to find something else. EBUSY isn't correct.

Change to ENODEV.

>
>>         }
>>
>>         return 0;
>> }
>>
>>
>> At the end of probe function, driver always schedules a bottom handler
>> to handle FIFO msg.
>>
>> Then we can remove the init_flag and rx_pending.
>>
>
> At least better than before, though I think it is still racy.
> It might be worthwhile checking by adding a large msleep()
> after hwmon registration and before ctx->hwmon_dev is written,
> and have a user space program access sysfs attributes immediately
> after they have been created.

Yes, I'll test it out by adding msleep() right before
__hwmon_device_register() returns.

Thanks
Hoan

>
> Thanks,
> Guenter
--
To unsubscribe from this list: send the line "unsubscribe linux-hwmon" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c
index bc78a5d..e3b4e84 100644
--- a/drivers/hwmon/xgene-hwmon.c
+++ b/drivers/hwmon/xgene-hwmon.c
@@ -107,6 +107,8 @@  struct xgene_hwmon_dev {
 	struct completion	rd_complete;
 	int			resp_pending;
 	struct slimpro_resp_msg sync_msg;
+	bool			init_flag;
+	bool			rx_pending;
 
 	struct work_struct	workq;
 	struct kfifo_rec_ptr_1	async_msg_fifo;
@@ -465,13 +467,35 @@  static void xgene_hwmon_evt_work(struct work_struct *work)
 	}
 }
 
+static int xgene_hwmon_rx_ready(struct xgene_hwmon_dev *ctx, void *msg)
+{
+	if (!ctx->init_flag) {
+		ctx->rx_pending = true;
+		/* Enqueue to the FIFO */
+		kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
+				    sizeof(struct slimpro_resp_msg),
+				    &ctx->kfifo_lock);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
 /*
  * This function is called when the SLIMpro Mailbox received a message
  */
 static void xgene_hwmon_rx_cb(struct mbox_client *cl, void *msg)
 {
 	struct xgene_hwmon_dev *ctx = to_xgene_hwmon_dev(cl);
-	struct slimpro_resp_msg amsg;
+
+	/*
+	 * While the driver registers with the mailbox framework, an interrupt
+	 * can be pending before the probe function completes its
+	 * initialization. If such condition occurs, just queue up the message
+	 * as the driver is not ready for servicing the callback.
+	 */
+	if (xgene_hwmon_rx_ready(ctx, msg) < 0)
+		return;
 
 	/*
 	 * Response message format:
@@ -500,12 +524,8 @@  static void xgene_hwmon_rx_cb(struct mbox_client *cl, void *msg)
 		return;
 	}
 
-	amsg.msg   = ((u32 *)msg)[0];
-	amsg.param1 = ((u32 *)msg)[1];
-	amsg.param2 = ((u32 *)msg)[2];
-
 	/* Enqueue to the FIFO */
-	kfifo_in_spinlocked(&ctx->async_msg_fifo, &amsg,
+	kfifo_in_spinlocked(&ctx->async_msg_fifo, msg,
 			    sizeof(struct slimpro_resp_msg), &ctx->kfifo_lock);
 	/* Schedule the bottom handler */
 	schedule_work(&ctx->workq);
@@ -520,6 +540,15 @@  static void xgene_hwmon_pcc_rx_cb(struct mbox_client *cl, void *msg)
 	struct acpi_pcct_shared_memory *generic_comm_base = ctx->pcc_comm_addr;
 	struct slimpro_resp_msg amsg;
 
+	/*
+	 * While the driver registers with the mailbox framework, an interrupt
+	 * can be pending before the probe function completes its
+	 * initialization. If such condition occurs, just queue up the message
+	 * as the driver is not ready for servicing the callback.
+	 */
+	if (xgene_hwmon_rx_ready(ctx, &amsg) < 0)
+		return;
+
 	msg = generic_comm_base + 1;
 	/* Check if platform sends interrupt */
 	if (!xgene_word_tst_and_clr(&generic_comm_base->status,
@@ -596,6 +625,17 @@  static int xgene_hwmon_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, ctx);
 	cl = &ctx->mbox_client;
 
+	spin_lock_init(&ctx->kfifo_lock);
+	mutex_init(&ctx->rd_mutex);
+
+	rc = kfifo_alloc(&ctx->async_msg_fifo,
+			 sizeof(struct slimpro_resp_msg) * ASYNC_MSG_FIFO_SIZE,
+			 GFP_KERNEL);
+	if (rc)
+		goto out_mbox_free;
+
+	INIT_WORK(&ctx->workq, xgene_hwmon_evt_work);
+
 	/* Request mailbox channel */
 	cl->dev = &pdev->dev;
 	cl->tx_done = xgene_hwmon_tx_done;
@@ -676,17 +716,6 @@  static int xgene_hwmon_probe(struct platform_device *pdev)
 		ctx->usecs_lat = PCC_NUM_RETRIES * cppc_ss->latency;
 	}
 
-	spin_lock_init(&ctx->kfifo_lock);
-	mutex_init(&ctx->rd_mutex);
-
-	rc = kfifo_alloc(&ctx->async_msg_fifo,
-			 sizeof(struct slimpro_resp_msg) * ASYNC_MSG_FIFO_SIZE,
-			 GFP_KERNEL);
-	if (rc)
-		goto out_mbox_free;
-
-	INIT_WORK(&ctx->workq, xgene_hwmon_evt_work);
-
 	ctx->hwmon_dev = hwmon_device_register_with_groups(ctx->dev,
 							   "apm_xgene",
 							   ctx,
@@ -697,17 +726,25 @@  static int xgene_hwmon_probe(struct platform_device *pdev)
 		goto out;
 	}
 
+	ctx->init_flag = true;
+	if (ctx->rx_pending) {
+		/*
+		 * If there is a pending message, schedule the bottom handler
+		 */
+		schedule_work(&ctx->workq);
+	}
+
 	dev_info(&pdev->dev, "APM X-Gene SoC HW monitor driver registered\n");
 
 	return 0;
 
 out:
-	kfifo_free(&ctx->async_msg_fifo);
-out_mbox_free:
 	if (acpi_disabled)
 		mbox_free_channel(ctx->mbox_chan);
 	else
 		pcc_mbox_free_channel(ctx->mbox_chan);
+out_mbox_free:
+	kfifo_free(&ctx->async_msg_fifo);
 
 	return rc;
 }