diff mbox series

[v10,04/15] PM / EM: Expose the Energy Model in sysfs

Message ID 20181203095628.11858-5-quentin.perret@arm.com (mailing list archive)
State Not Applicable, archived
Headers show
Series Energy Aware Scheduling | expand

Commit Message

Quentin Perret Dec. 3, 2018, 9:56 a.m. UTC
Expose the Energy Model (read-only) of all performance domains in sysfs
for convenience. To do so, add a kobject to the CPU subsystem under the
umbrella of which a kobject for each performance domain is attached.

The resulting hierarchy is as follows for a platform with two
performance domains for example:

   /sys/devices/system/cpu/energy_model
   ├── pd0
   │   ├── cost
   │   ├── cpus
   │   ├── frequency
   │   └── power
   └── pd4
       ├── cost
       ├── cpus
       ├── frequency
       └── power

In this implementation, the kobject abstraction is only used as a
convenient way of exposing data to sysfs. However, it could also be
used in the future to allocate and release performance domains in a more
dynamic way using reference counting.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Signed-off-by: Quentin Perret <quentin.perret@arm.com>
---
 include/linux/energy_model.h |  2 +
 kernel/power/energy_model.c  | 90 ++++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+)

Comments

Ingo Molnar Dec. 11, 2018, 2:18 p.m. UTC | #1
* Quentin Perret <quentin.perret@arm.com> wrote:

> Expose the Energy Model (read-only) of all performance domains in sysfs
> for convenience. To do so, add a kobject to the CPU subsystem under the
> umbrella of which a kobject for each performance domain is attached.
> 
> The resulting hierarchy is as follows for a platform with two
> performance domains for example:
> 
>    /sys/devices/system/cpu/energy_model
>    ├── pd0
>    │   ├── cost
>    │   ├── cpus
>    │   ├── frequency
>    │   └── power
>    └── pd4
>        ├── cost
>        ├── cpus
>        ├── frequency
>        └── power
> 
> In this implementation, the kobject abstraction is only used as a
> convenient way of exposing data to sysfs. However, it could also be
> used in the future to allocate and release performance domains in a more
> dynamic way using reference counting.
> 
> Cc: Peter Zijlstra <peterz@infradead.org>
> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
> Signed-off-by: Quentin Perret <quentin.perret@arm.com>
> ---
>  include/linux/energy_model.h |  2 +
>  kernel/power/energy_model.c  | 90 ++++++++++++++++++++++++++++++++++++
>  2 files changed, 92 insertions(+)

Why is a read-only ABI added for 'convenience'? We really don't do that 
as ABIs are final and they come with responsibilities.

I think if this is for debug purposes it should be declared and put into 
debugfs or so.

If it's for some other purpose that purpose should be declared.

Thanks,

	Ingo
Quentin Perret Dec. 11, 2018, 3:04 p.m. UTC | #2
On Tuesday 11 Dec 2018 at 15:18:28 (+0100), Ingo Molnar wrote:
> 
> * Quentin Perret <quentin.perret@arm.com> wrote:
> 
> > Expose the Energy Model (read-only) of all performance domains in sysfs
> > for convenience. To do so, add a kobject to the CPU subsystem under the
> > umbrella of which a kobject for each performance domain is attached.
> > 
> > The resulting hierarchy is as follows for a platform with two
> > performance domains for example:
> > 
> >    /sys/devices/system/cpu/energy_model
> >    ├── pd0
> >    │   ├── cost
> >    │   ├── cpus
> >    │   ├── frequency
> >    │   └── power
> >    └── pd4
> >        ├── cost
> >        ├── cpus
> >        ├── frequency
> >        └── power
> > 
> > In this implementation, the kobject abstraction is only used as a
> > convenient way of exposing data to sysfs. However, it could also be
> > used in the future to allocate and release performance domains in a more
> > dynamic way using reference counting.
> > 
> > Cc: Peter Zijlstra <peterz@infradead.org>
> > Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
> > Signed-off-by: Quentin Perret <quentin.perret@arm.com>
> > ---
> >  include/linux/energy_model.h |  2 +
> >  kernel/power/energy_model.c  | 90 ++++++++++++++++++++++++++++++++++++
> >  2 files changed, 92 insertions(+)
> 
> Why is a read-only ABI added for 'convenience'? We really don't do that 
> as ABIs are final and they come with responsibilities.
> 
> I think if this is for debug purposes it should be declared and put into 
> debugfs or so.
> 
> If it's for some other purpose that purpose should be declared.

This is basically supposed to look like the CPUFreq policy entries.
You can get things like this for CPUFreq:

  $ cat /sys/devices/system/cpu/cpufreq/policy1/scaling_available_frequencies
  450000 625000 800000 950000 1100000

And things like this for the EM:

$ cat /sys/devices/system/cpu/energy_model/pd1/power
  160 239 343 454 583

In general, I expect these two interfaces to be used for similar
purposes. Now, whether or not they should be exposed in debugfs is a
good question. I went for sysfs for consistency with CPUFreq, but I'm
not opposed to a change in this area for the EM if you feel it's
preferable.

Also, I'm happy to just drop this patch for now and to begin by not
exposing the EM at all. It's easier to expose it later than to remove it
once userspace tools depend on it.

Thanks,
Quentin
diff mbox series

Patch

diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index aa027f7bcb3e..55deab2b38dc 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -27,6 +27,7 @@  struct em_cap_state {
  * em_perf_domain - Performance domain
  * @table:		List of capacity states, in ascending order
  * @nr_cap_states:	Number of capacity states
+ * @kobj:		Kobject used to expose the domain in sysfs
  * @cpus:		Cpumask covering the CPUs of the domain
  *
  * A "performance domain" represents a group of CPUs whose performance is
@@ -37,6 +38,7 @@  struct em_cap_state {
 struct em_perf_domain {
 	struct em_cap_state *table;
 	int nr_cap_states;
+	struct kobject kobj;
 	unsigned long cpus[0];
 };
 
diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index d9dc2c38764a..5ec376d4f2f3 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -23,6 +23,82 @@  static DEFINE_PER_CPU(struct em_perf_domain *, em_data);
  */
 static DEFINE_MUTEX(em_pd_mutex);
 
+static struct kobject *em_kobject;
+
+/* Getters for the attributes of em_perf_domain objects */
+struct em_pd_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct em_perf_domain *pd, char *buf);
+	ssize_t (*store)(struct em_perf_domain *pd, const char *buf, size_t s);
+};
+
+#define EM_ATTR_LEN 13
+#define show_table_attr(_attr) \
+static ssize_t show_##_attr(struct em_perf_domain *pd, char *buf) \
+{ \
+	ssize_t cnt = 0; \
+	int i; \
+	for (i = 0; i < pd->nr_cap_states; i++) { \
+		if (cnt >= (ssize_t) (PAGE_SIZE / sizeof(char) \
+				      - (EM_ATTR_LEN + 2))) \
+			goto out; \
+		cnt += scnprintf(&buf[cnt], EM_ATTR_LEN + 1, "%lu ", \
+				 pd->table[i]._attr); \
+	} \
+out: \
+	cnt += sprintf(&buf[cnt], "\n"); \
+	return cnt; \
+}
+
+show_table_attr(power);
+show_table_attr(frequency);
+show_table_attr(cost);
+
+static ssize_t show_cpus(struct em_perf_domain *pd, char *buf)
+{
+	return sprintf(buf, "%*pbl\n", cpumask_pr_args(to_cpumask(pd->cpus)));
+}
+
+#define pd_attr(_name) em_pd_##_name##_attr
+#define define_pd_attr(_name) static struct em_pd_attr pd_attr(_name) = \
+		__ATTR(_name, 0444, show_##_name, NULL)
+
+define_pd_attr(power);
+define_pd_attr(frequency);
+define_pd_attr(cost);
+define_pd_attr(cpus);
+
+static struct attribute *em_pd_default_attrs[] = {
+	&pd_attr(power).attr,
+	&pd_attr(frequency).attr,
+	&pd_attr(cost).attr,
+	&pd_attr(cpus).attr,
+	NULL
+};
+
+#define to_pd(k) container_of(k, struct em_perf_domain, kobj)
+#define to_pd_attr(a) container_of(a, struct em_pd_attr, attr)
+
+static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+	struct em_perf_domain *pd = to_pd(kobj);
+	struct em_pd_attr *pd_attr = to_pd_attr(attr);
+	ssize_t ret;
+
+	ret = pd_attr->show(pd, buf);
+
+	return ret;
+}
+
+static const struct sysfs_ops em_pd_sysfs_ops = {
+	.show	= show,
+};
+
+static struct kobj_type ktype_em_pd = {
+	.sysfs_ops	= &em_pd_sysfs_ops,
+	.default_attrs	= em_pd_default_attrs,
+};
+
 static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states,
 						struct em_data_callback *cb)
 {
@@ -102,6 +178,11 @@  static struct em_perf_domain *em_create_pd(cpumask_t *span, int nr_states,
 	pd->nr_cap_states = nr_states;
 	cpumask_copy(to_cpumask(pd->cpus), span);
 
+	ret = kobject_init_and_add(&pd->kobj, &ktype_em_pd, em_kobject,
+				   "pd%u", cpu);
+	if (ret)
+		pr_err("pd%d: failed kobject_init_and_add(): %d\n", cpu, ret);
+
 	return pd;
 
 free_cs_table:
@@ -155,6 +236,15 @@  int em_register_perf_domain(cpumask_t *span, unsigned int nr_states,
 	 */
 	mutex_lock(&em_pd_mutex);
 
+	if (!em_kobject) {
+		em_kobject = kobject_create_and_add("energy_model",
+						&cpu_subsys.dev_root->kobj);
+		if (!em_kobject) {
+			ret = -ENODEV;
+			goto unlock;
+		}
+	}
+
 	for_each_cpu(cpu, span) {
 		/* Make sure we don't register again an existing domain. */
 		if (READ_ONCE(per_cpu(em_data, cpu))) {