diff mbox series

[RFV,v2,05/13] perf/core: Add function perf_event_create_group_kernel_counters()

Message ID 20230808063111.1870070-6-dapeng1.mi@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Enable fixed counter 3 and topdown perf metrics for vPMU | expand

Commit Message

Mi, Dapeng Aug. 8, 2023, 6:31 a.m. UTC
Add function perf_event_create_group_kernel_counters() which can be used
to create group perf events from kernel space.

Comparing with modifying function perf_event_create_kernel_counter()
directly to support create group events, creating a new function looks a
better method since function perf_event_create_kernel_counter() is called
by many places in kernel and modifying directly this function introduces
lots of changes.

Kernel space may want to create group events just like user space perf
tool does. One example is to support topdown metrics feature in KVM.

Current perf logic requires perf tool creates an perf events group to
handle the topdown metrics profiling. The events group couples one slots
event acting as group leader and multiple metric events.

To support topdown metrics feature in KVM, KVM has to follow this
requirement to create the events group from kernel space. That's why we
need to add this new function.

Suggested-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
 include/linux/perf_event.h |  6 ++++++
 kernel/events/core.c       | 39 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 43 insertions(+), 2 deletions(-)

Comments

Mi, Dapeng Aug. 9, 2023, 8:44 a.m. UTC | #1
On Tue, Aug 08, 2023 at 12:21:27PM +0200, Peter Zijlstra wrote:
> Date: Tue, 8 Aug 2023 12:21:27 +0200
> From: Peter Zijlstra <peterz@infradead.org>
> Subject: Re: [PATCH RFV v2 05/13] perf/core: Add function
>  perf_event_create_group_kernel_counters()
> 
> On Tue, Aug 08, 2023 at 02:31:03PM +0800, Dapeng Mi wrote:
> > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > index 15eb82d1a010..1877171e9590 100644
> > --- a/kernel/events/core.c
> > +++ b/kernel/events/core.c
> > @@ -12762,11 +12762,34 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
> >  				 struct task_struct *task,
> >  				 perf_overflow_handler_t overflow_handler,
> >  				 void *context)
> > +{
> > +	return perf_event_create_group_kernel_counters(attr, cpu, task,
> > +			NULL, overflow_handler, context);
> > +}
> > +EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
> > +
> > +/**
> > + * perf_event_create_group_kernel_counters
> > + *
> > + * @attr: attributes of the counter to create
> > + * @cpu: cpu in which the counter is bound
> > + * @task: task to profile (NULL for percpu)
> > + * @group_leader: the group leader event of the created event
> > + * @overflow_handler: callback to trigger when we hit the event
> > + * @context: context data could be used in overflow_handler callback
> > + */
> > +struct perf_event *
> > +perf_event_create_group_kernel_counters(struct perf_event_attr *attr,
> > +					int cpu, struct task_struct *task,
> > +					struct perf_event *group_leader,
> > +					perf_overflow_handler_t overflow_handler,
> > +					void *context)
> 
> I would much prefer if you just add the argument to
> perf_event_create_kernel_counter(), there aren't *that* many users.

Sure. Thanks.
diff mbox series

Patch

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 2166a69e3bf2..e95152531f4c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1104,6 +1104,12 @@  perf_event_create_kernel_counter(struct perf_event_attr *attr,
 				struct task_struct *task,
 				perf_overflow_handler_t callback,
 				void *context);
+extern struct perf_event *
+perf_event_create_group_kernel_counters(struct perf_event_attr *attr,
+					int cpu, struct task_struct *task,
+					struct perf_event *group_leader,
+					perf_overflow_handler_t overflow_handler,
+					void *context);
 extern void perf_pmu_migrate_context(struct pmu *pmu,
 				int src_cpu, int dst_cpu);
 int perf_event_read_local(struct perf_event *event, u64 *value,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 15eb82d1a010..1877171e9590 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -12762,11 +12762,34 @@  perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 				 struct task_struct *task,
 				 perf_overflow_handler_t overflow_handler,
 				 void *context)
+{
+	return perf_event_create_group_kernel_counters(attr, cpu, task,
+			NULL, overflow_handler, context);
+}
+EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
+
+/**
+ * perf_event_create_group_kernel_counters
+ *
+ * @attr: attributes of the counter to create
+ * @cpu: cpu in which the counter is bound
+ * @task: task to profile (NULL for percpu)
+ * @group_leader: the group leader event of the created event
+ * @overflow_handler: callback to trigger when we hit the event
+ * @context: context data could be used in overflow_handler callback
+ */
+struct perf_event *
+perf_event_create_group_kernel_counters(struct perf_event_attr *attr,
+					int cpu, struct task_struct *task,
+					struct perf_event *group_leader,
+					perf_overflow_handler_t overflow_handler,
+					void *context)
 {
 	struct perf_event_pmu_context *pmu_ctx;
 	struct perf_event_context *ctx;
 	struct perf_event *event;
 	struct pmu *pmu;
+	int move_group = 0;
 	int err;
 
 	/*
@@ -12776,7 +12799,11 @@  perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 	if (attr->aux_output)
 		return ERR_PTR(-EINVAL);
 
-	event = perf_event_alloc(attr, cpu, task, NULL, NULL,
+	if (task && group_leader &&
+	    group_leader->attr.inherit != attr->inherit)
+		return ERR_PTR(-EINVAL);
+
+	event = perf_event_alloc(attr, cpu, task, group_leader, NULL,
 				 overflow_handler, context, -1);
 	if (IS_ERR(event)) {
 		err = PTR_ERR(event);
@@ -12806,6 +12833,11 @@  perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 		goto err_unlock;
 	}
 
+	err = perf_event_group_leader_check(group_leader, event, attr, ctx,
+					    &pmu, &move_group);
+	if (err)
+		goto err_unlock;
+
 	pmu_ctx = find_get_pmu_context(pmu, ctx, event);
 	if (IS_ERR(pmu_ctx)) {
 		err = PTR_ERR(pmu_ctx);
@@ -12833,6 +12865,9 @@  perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 		goto err_pmu_ctx;
 	}
 
+	if (move_group)
+		perf_event_move_group(group_leader, pmu_ctx, ctx);
+
 	perf_install_in_context(ctx, event, event->cpu);
 	perf_unpin_context(ctx);
 	mutex_unlock(&ctx->mutex);
@@ -12851,7 +12886,7 @@  perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 err:
 	return ERR_PTR(err);
 }
-EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
+EXPORT_SYMBOL_GPL(perf_event_create_group_kernel_counters);
 
 static void __perf_pmu_remove(struct perf_event_context *ctx,
 			      int cpu, struct pmu *pmu,