diff mbox series

[v2,1/3] drivers: qcom: rpmh: Disallow active requests in solver mode

Message ID 1611555637-7688-1-git-send-email-mkshah@codeaurora.org (mailing list archive)
State New, archived
Headers show
Series [v2,1/3] drivers: qcom: rpmh: Disallow active requests in solver mode | expand

Commit Message

Maulik Shah Jan. 25, 2021, 6:20 a.m. UTC
From: Lina Iyer <ilina@codeaurora.org>

Controllers may be in 'solver' state, where they could be in autonomous
mode executing low power modes for their hardware and as such are not
available for sending active votes. Device driver may notify RPMH
that the controller is in solver mode and when in such mode, disallow
requests from platform drivers for state change using the RSC.

Signed-off-by: Lina Iyer <ilina@codeaurora.org>
Signed-off-by: Maulik Shah <mkshah@codeaurora.org>
---
(no changes since v1)
---
 drivers/soc/qcom/rpmh-internal.h |  5 ++++
 drivers/soc/qcom/rpmh-rsc.c      | 31 ++++++++++++++++++++++
 drivers/soc/qcom/rpmh.c          | 56 ++++++++++++++++++++++++++++++++++++++++
 drivers/soc/qcom/trace-rpmh.h    | 20 ++++++++++++++
 include/soc/qcom/rpmh.h          |  5 ++++
 5 files changed, 117 insertions(+)

Comments

Doug Anderson Feb. 3, 2021, 6:35 p.m. UTC | #1
Hi,

On Sun, Jan 24, 2021 at 10:21 PM Maulik Shah <mkshah@codeaurora.org> wrote:
>
> From: Lina Iyer <ilina@codeaurora.org>
>
> Controllers may be in 'solver' state, where they could be in autonomous
> mode executing low power modes for their hardware and as such are not
> available for sending active votes. Device driver may notify RPMH
> that the controller is in solver mode and when in such mode, disallow
> requests from platform drivers for state change using the RSC.

It feels like there's still a bit missing to talk about what solver
mode is.  When would you use solver mode and when would you use
non-solver mode?  What are the pros and cons of the two modes?  How do
all the clients of RPMH agree that they should be in solver mode or
not?


> @@ -77,12 +77,14 @@ struct rpmh_request {
>   * @cache: the list of cached requests
>   * @cache_lock: synchronize access to the cache data
>   * @dirty: was the cache updated since flush
> + * @in_solver_mode: Controller is busy in solver mode
>   * @batch_cache: Cache sleep and wake requests sent as batch
>   */
>  struct rpmh_ctrlr {
>         struct list_head cache;
>         spinlock_t cache_lock;
>         bool dirty;
> +       bool in_solver_mode;
>         struct list_head batch_cache;
>  };
>
> @@ -94,6 +96,7 @@ struct rpmh_ctrlr {
>   * @tcs_base:           Start address of the TCS registers in this controller.
>   * @id:                 Instance id in the controller (Direct Resource Voter).
>   * @num_tcs:            Number of TCSes in this DRV.
> + * @in_solver_mode:     Controller is busy in solver mode

Why in both structures?  I think we only need this in the rsc_drv.


> diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
> index a84ab0d..1c1f5b0 100644
> --- a/drivers/soc/qcom/rpmh-rsc.c
> +++ b/drivers/soc/qcom/rpmh-rsc.c
> @@ -635,6 +635,12 @@ int rpmh_rsc_send_data(struct rsc_drv *drv, const struct tcs_request *msg)
>
>         spin_lock_irqsave(&drv->lock, flags);
>
> +       if (drv->in_solver_mode) {
> +               /* Controller is busy in 'solver' mode */
> +               spin_unlock_irqrestore(&drv->lock, flags);
> +               return -EBUSY;

Function comment doesn't say anything about -EBUSY.

What should a client do if -EBUSY is returned?  Try again?  Panic and
reboot?  Is -EBUSY something that's expected or a sign that something
was designed incorrectly?


> +       }
> +
>         /* Wait forever for a free tcs. It better be there eventually! */
>         wait_event_lock_irq(drv->tcs_wait,
>                             (tcs_id = claim_tcs_for_req(drv, tcs, msg)) >= 0,
> @@ -855,6 +861,31 @@ static int rpmh_rsc_cpu_pm_callback(struct notifier_block *nfb,
>         return ret;
>  }
>
> +/**
> + * rpmh_rsc_mode_solver_set() - Enable/disable solver mode.
> + * @drv:     The controller.
> + * @enable:  Boolean state to be set - true/false
> + *
> + * Return:
> + * * 0                 - success
> + * * -EBUSY            - AMCs are busy

What are the implications of being busy?  Does it signify a logic
error in the design of things or is it something the caller is
expected to retry?


> + */
> +int rpmh_rsc_mode_solver_set(struct rsc_drv *drv, bool enable)
> +{
> +       int ret = -EBUSY;
> +
> +       if (spin_trylock(&drv->lock)) {

Almost certainly should at least be spin_trylock_irq() or
spin_trylock_irqsave().  Otherwise you could get the spinlock and
immediately be interrupted by an IRQ on the same CPU.  The IRQ might
try to grab the spinlock and BOOM.

I'd also question whether this should really even be a "trylock".  It
certainly makes the function a bit harder to reason about.  If you
didn't do a trylock then the function with "enable = false" will
always work but now it might not if someone happens to be holding the
spinlock.  I have to go and figure out if that matters.

Presumably using trylock is just a micro-optimization.  Since I don't
think this function is called in any inner loop or anything (right?),
I'd suggest just using a normal spin_lock_irqsave() or
spin_lock_irq().


> diff --git a/drivers/soc/qcom/rpmh.c b/drivers/soc/qcom/rpmh.c
> index 01765ee..cbe6b96 100644
> --- a/drivers/soc/qcom/rpmh.c
> +++ b/drivers/soc/qcom/rpmh.c
> @@ -76,6 +76,22 @@ static struct rpmh_ctrlr *get_rpmh_ctrlr(const struct device *dev)
>         return &drv->client;
>  }
>
> +static int check_ctrlr_state(struct rpmh_ctrlr *ctrlr, enum rpmh_state state)
> +{
> +       int ret = 0;
> +
> +       if (state != RPMH_ACTIVE_ONLY_STATE)
> +               return ret;
> +
> +       /* Do not allow sending active votes when in solver mode */
> +       spin_lock(&ctrlr->cache_lock);

Should almost certainly be using irqsave or irq variant.


> @@ -229,9 +245,14 @@ static int __fill_rpmh_msg(struct rpmh_request *req, enum rpmh_state state,
>  int rpmh_write_async(const struct device *dev, enum rpmh_state state,
>                      const struct tcs_cmd *cmd, u32 n)
>  {
> +       struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
>         struct rpmh_request *rpm_msg;
>         int ret;
>
> +       ret = check_ctrlr_state(ctrlr, state);
> +       if (ret)
> +               return ret;
> +

Remove the above check and just let rpmh_rsc check for you.  There's
no reason to check the same thing twice.  In any case, the above check
is racy.  Why?

* cpu0: rpmh_write_async()
* cpu0: -> check_ctrlr_state() => no errors
* cpu1: rpmh_mode_solver_set()
* cpu0: -> __rpmh_write()

In addition, looking at this code path makes me realize a pre-existing
bug in the code.  If __rpmh_write() returns an error then we'll leak
the memory that rpmh_write_async() allocated with the kzalloc.  Maybe
you could add a patch fixing that before this one.


> @@ -262,8 +283,13 @@ int rpmh_write(const struct device *dev, enum rpmh_state state,
>  {
>         DECLARE_COMPLETION_ONSTACK(compl);
>         DEFINE_RPMH_MSG_ONSTACK(dev, state, &compl, rpm_msg);
> +       struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
>         int ret;
>
> +       ret = check_ctrlr_state(ctrlr, state);
> +       if (ret)
> +               return ret;
> +

Like above, remove this check and let rpmh_rsc check for you.


> @@ -338,6 +364,10 @@ int rpmh_write_batch(const struct device *dev, enum rpmh_state state,
>         int ret, i;
>         void *ptr;
>
> +       ret = check_ctrlr_state(ctrlr, state);
> +       if (ret)
> +               return ret;
> +

Like above, remove this check and let rpmh_rsc check for you.


> @@ -505,3 +535,29 @@ void rpmh_invalidate(const struct device *dev)
>         spin_unlock_irqrestore(&ctrlr->cache_lock, flags);
>  }
>  EXPORT_SYMBOL(rpmh_invalidate);
> +
> +/**
> + * rpmh_mode_solver_set() - Indicate that the RSC controller hardware has
> + * been configured to be in solver mode
> + *
> + * @dev: The device making the request
> + * @enable: Boolean value indicating if the controller is in solver mode.
> + *
> + * Return:
> + * * 0          - Success
> + * * Error code - Otherwise
> + */
> +int rpmh_mode_solver_set(const struct device *dev, bool enable)
> +{
> +       int ret;
> +       struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
> +
> +       spin_lock(&ctrlr->cache_lock);

Should be irq or irqsave variant.

...or, actually, don't lock at all since we should be removing
"ctrlr->in_solver_mode" and this will just be a call straight into
rpmh_rsc_mode_solver_set().

Also: isn't there some sort of need to actually tell the hardware that
we're in solver mode?  Maybe this gets into my lack of understanding
of how this is all supposed to do something useful (documentation
please!)

-Doug
diff mbox series

Patch

diff --git a/drivers/soc/qcom/rpmh-internal.h b/drivers/soc/qcom/rpmh-internal.h
index 344ba68..79486d6 100644
--- a/drivers/soc/qcom/rpmh-internal.h
+++ b/drivers/soc/qcom/rpmh-internal.h
@@ -77,12 +77,14 @@  struct rpmh_request {
  * @cache: the list of cached requests
  * @cache_lock: synchronize access to the cache data
  * @dirty: was the cache updated since flush
+ * @in_solver_mode: Controller is busy in solver mode
  * @batch_cache: Cache sleep and wake requests sent as batch
  */
 struct rpmh_ctrlr {
 	struct list_head cache;
 	spinlock_t cache_lock;
 	bool dirty;
+	bool in_solver_mode;
 	struct list_head batch_cache;
 };
 
@@ -94,6 +96,7 @@  struct rpmh_ctrlr {
  * @tcs_base:           Start address of the TCS registers in this controller.
  * @id:                 Instance id in the controller (Direct Resource Voter).
  * @num_tcs:            Number of TCSes in this DRV.
+ * @in_solver_mode:     Controller is busy in solver mode
  * @rsc_pm:             CPU PM notifier for controller.
  *                      Used when solver mode is not present.
  * @cpus_in_pm:         Number of CPUs not in idle power collapse.
@@ -116,6 +119,7 @@  struct rsc_drv {
 	void __iomem *tcs_base;
 	int id;
 	int num_tcs;
+	bool in_solver_mode;
 	struct notifier_block rsc_pm;
 	atomic_t cpus_in_pm;
 	struct tcs_group tcs[TCS_TYPE_NR];
@@ -129,6 +133,7 @@  int rpmh_rsc_send_data(struct rsc_drv *drv, const struct tcs_request *msg);
 int rpmh_rsc_write_ctrl_data(struct rsc_drv *drv,
 			     const struct tcs_request *msg);
 void rpmh_rsc_invalidate(struct rsc_drv *drv);
+int rpmh_rsc_mode_solver_set(struct rsc_drv *drv, bool enable);
 
 void rpmh_tx_done(const struct tcs_request *msg, int r);
 int rpmh_flush(struct rpmh_ctrlr *ctrlr);
diff --git a/drivers/soc/qcom/rpmh-rsc.c b/drivers/soc/qcom/rpmh-rsc.c
index a84ab0d..1c1f5b0 100644
--- a/drivers/soc/qcom/rpmh-rsc.c
+++ b/drivers/soc/qcom/rpmh-rsc.c
@@ -635,6 +635,12 @@  int rpmh_rsc_send_data(struct rsc_drv *drv, const struct tcs_request *msg)
 
 	spin_lock_irqsave(&drv->lock, flags);
 
+	if (drv->in_solver_mode) {
+		/* Controller is busy in 'solver' mode */
+		spin_unlock_irqrestore(&drv->lock, flags);
+		return -EBUSY;
+	}
+
 	/* Wait forever for a free tcs. It better be there eventually! */
 	wait_event_lock_irq(drv->tcs_wait,
 			    (tcs_id = claim_tcs_for_req(drv, tcs, msg)) >= 0,
@@ -855,6 +861,31 @@  static int rpmh_rsc_cpu_pm_callback(struct notifier_block *nfb,
 	return ret;
 }
 
+/**
+ * rpmh_rsc_mode_solver_set() - Enable/disable solver mode.
+ * @drv:     The controller.
+ * @enable:  Boolean state to be set - true/false
+ *
+ * Return:
+ * * 0			- success
+ * * -EBUSY		- AMCs are busy
+ */
+int rpmh_rsc_mode_solver_set(struct rsc_drv *drv, bool enable)
+{
+	int ret = -EBUSY;
+
+	if (spin_trylock(&drv->lock)) {
+		if (!enable || !rpmh_rsc_ctrlr_is_busy(drv)) {
+			drv->in_solver_mode = enable;
+			trace_rpmh_solver_set(drv, enable);
+			ret = 0;
+		}
+		spin_unlock(&drv->lock);
+	}
+
+	return ret;
+}
+
 static int rpmh_probe_tcs_config(struct platform_device *pdev,
 				 struct rsc_drv *drv, void __iomem *base)
 {
diff --git a/drivers/soc/qcom/rpmh.c b/drivers/soc/qcom/rpmh.c
index 01765ee..cbe6b96 100644
--- a/drivers/soc/qcom/rpmh.c
+++ b/drivers/soc/qcom/rpmh.c
@@ -76,6 +76,22 @@  static struct rpmh_ctrlr *get_rpmh_ctrlr(const struct device *dev)
 	return &drv->client;
 }
 
+static int check_ctrlr_state(struct rpmh_ctrlr *ctrlr, enum rpmh_state state)
+{
+	int ret = 0;
+
+	if (state != RPMH_ACTIVE_ONLY_STATE)
+		return ret;
+
+	/* Do not allow sending active votes when in solver mode */
+	spin_lock(&ctrlr->cache_lock);
+	if (ctrlr->in_solver_mode)
+		ret = -EBUSY;
+	spin_unlock(&ctrlr->cache_lock);
+
+	return ret;
+}
+
 void rpmh_tx_done(const struct tcs_request *msg, int r)
 {
 	struct rpmh_request *rpm_msg = container_of(msg, struct rpmh_request,
@@ -229,9 +245,14 @@  static int __fill_rpmh_msg(struct rpmh_request *req, enum rpmh_state state,
 int rpmh_write_async(const struct device *dev, enum rpmh_state state,
 		     const struct tcs_cmd *cmd, u32 n)
 {
+	struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
 	struct rpmh_request *rpm_msg;
 	int ret;
 
+	ret = check_ctrlr_state(ctrlr, state);
+	if (ret)
+		return ret;
+
 	rpm_msg = kzalloc(sizeof(*rpm_msg), GFP_ATOMIC);
 	if (!rpm_msg)
 		return -ENOMEM;
@@ -262,8 +283,13 @@  int rpmh_write(const struct device *dev, enum rpmh_state state,
 {
 	DECLARE_COMPLETION_ONSTACK(compl);
 	DEFINE_RPMH_MSG_ONSTACK(dev, state, &compl, rpm_msg);
+	struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
 	int ret;
 
+	ret = check_ctrlr_state(ctrlr, state);
+	if (ret)
+		return ret;
+
 	ret = __fill_rpmh_msg(&rpm_msg, state, cmd, n);
 	if (ret)
 		return ret;
@@ -338,6 +364,10 @@  int rpmh_write_batch(const struct device *dev, enum rpmh_state state,
 	int ret, i;
 	void *ptr;
 
+	ret = check_ctrlr_state(ctrlr, state);
+	if (ret)
+		return ret;
+
 	if (!cmd || !n)
 		return -EINVAL;
 
@@ -505,3 +535,29 @@  void rpmh_invalidate(const struct device *dev)
 	spin_unlock_irqrestore(&ctrlr->cache_lock, flags);
 }
 EXPORT_SYMBOL(rpmh_invalidate);
+
+/**
+ * rpmh_mode_solver_set() - Indicate that the RSC controller hardware has
+ * been configured to be in solver mode
+ *
+ * @dev: The device making the request
+ * @enable: Boolean value indicating if the controller is in solver mode.
+ *
+ * Return:
+ * * 0          - Success
+ * * Error code - Otherwise
+ */
+int rpmh_mode_solver_set(const struct device *dev, bool enable)
+{
+	int ret;
+	struct rpmh_ctrlr *ctrlr = get_rpmh_ctrlr(dev);
+
+	spin_lock(&ctrlr->cache_lock);
+	ret = rpmh_rsc_mode_solver_set(ctrlr_to_drv(ctrlr), enable);
+	if (!ret)
+		ctrlr->in_solver_mode = enable;
+	spin_unlock(&ctrlr->cache_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL(rpmh_mode_solver_set);
diff --git a/drivers/soc/qcom/trace-rpmh.h b/drivers/soc/qcom/trace-rpmh.h
index feb0cb4..b2b934c 100644
--- a/drivers/soc/qcom/trace-rpmh.h
+++ b/drivers/soc/qcom/trace-rpmh.h
@@ -71,6 +71,26 @@  TRACE_EVENT(rpmh_send_msg,
 		  __entry->addr, __entry->data, __entry->wait)
 );
 
+TRACE_EVENT(rpmh_solver_set,
+
+	TP_PROTO(struct rsc_drv *d, bool set),
+
+	TP_ARGS(d, set),
+
+	TP_STRUCT__entry(
+			 __string(name, d->name)
+			 __field(bool, set)
+	),
+
+	TP_fast_assign(
+		       __assign_str(name, d->name);
+		       __entry->set = set;
+	),
+
+	TP_printk("%s: solver mode set: %d",
+		  __get_str(name), __entry->set)
+);
+
 #endif /* _TRACE_RPMH_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/include/soc/qcom/rpmh.h b/include/soc/qcom/rpmh.h
index bdbee1a..fa8bb53 100644
--- a/include/soc/qcom/rpmh.h
+++ b/include/soc/qcom/rpmh.h
@@ -20,6 +20,8 @@  int rpmh_write_async(const struct device *dev, enum rpmh_state state,
 int rpmh_write_batch(const struct device *dev, enum rpmh_state state,
 		     const struct tcs_cmd *cmd, u32 *n);
 
+int rpmh_mode_solver_set(const struct device *dev, bool enable);
+
 void rpmh_invalidate(const struct device *dev);
 
 #else
@@ -38,6 +40,9 @@  static inline int rpmh_write_batch(const struct device *dev,
 				   const struct tcs_cmd *cmd, u32 *n)
 { return -ENODEV; }
 
+static int rpmh_mode_solver_set(const struct device *dev, bool enable)
+{ return -ENODEV; }
+
 static inline void rpmh_invalidate(const struct device *dev)
 {
 }