Message ID | 20240506053020.3911940-7-mizhang@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Mediated Passthrough vPMU 2.0 for x86 | expand |
On Mon, May 06, 2024 at 05:29:31AM +0000, Mingwei Zhang wrote: > +int perf_get_mediated_pmu(void) > +{ > + int ret = 0; > + > + mutex_lock(&perf_mediated_pmu_mutex); > + if (refcount_inc_not_zero(&nr_mediated_pmu_vms)) > + goto end; > + > + if (atomic_read(&nr_include_guest_events)) { > + ret = -EBUSY; > + goto end; > + } > + refcount_set(&nr_mediated_pmu_vms, 1); > +end: > + mutex_unlock(&perf_mediated_pmu_mutex); > + return ret; > +} > +EXPORT_SYMBOL_GPL(perf_get_mediated_pmu); Blergh... it seems I never got my perf guard patches merged :/, but could we please write this like: int perf_get_mediated_pmu(void) { guard(mutex)(&perf_mediated_pmu_mutex); if (refcount_inc_not_zero(&nr_mediated_pmu_vms)) return 0; if (atomic_read(&nr_include_guest_events)) return -EBUSY; refcount_set(&nr_mediated_pmu_vms, 1); return 0; } And avoid adding more unlock goto thingies?
On Mon, May 06, 2024 at 05:29:31AM +0000, Mingwei Zhang wrote: > +int perf_get_mediated_pmu(void) > +{ > + int ret = 0; > + > + mutex_lock(&perf_mediated_pmu_mutex); > + if (refcount_inc_not_zero(&nr_mediated_pmu_vms)) > + goto end; > + > + if (atomic_read(&nr_include_guest_events)) { > + ret = -EBUSY; > + goto end; > + } > + refcount_set(&nr_mediated_pmu_vms, 1); > +end: > + mutex_unlock(&perf_mediated_pmu_mutex); > + return ret; > +} > +EXPORT_SYMBOL_GPL(perf_get_mediated_pmu); > + > +void perf_put_mediated_pmu(void) > +{ > + if (!refcount_dec_not_one(&nr_mediated_pmu_vms)) > + refcount_set(&nr_mediated_pmu_vms, 0); I'm sorry, but this made the WTF'o'meter go 'ding'. Isn't that simply refcount_dec() ? > +} > +EXPORT_SYMBOL_GPL(perf_put_mediated_pmu); > + > /* > * Holding the top-level event's child_mutex means that any > * descendant process that has inherited this event will block > @@ -12086,11 +12140,24 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, > if (err) > goto err_callchain_buffer; > > + if (is_include_guest_event(event)) { > + mutex_lock(&perf_mediated_pmu_mutex); > + if (refcount_read(&nr_mediated_pmu_vms)) { > + mutex_unlock(&perf_mediated_pmu_mutex); > + err = -EACCES; > + goto err_security_alloc; > + } > + atomic_inc(&nr_include_guest_events); > + mutex_unlock(&perf_mediated_pmu_mutex); > + } Wouldn't all that be nicer with a helper function? if (is_include_guest_event() && !perf_get_guest_event()) goto err_security_alloc; > + > /* symmetric to unaccount_event() in _free_event() */ > account_event(event); > > return event; > > +err_security_alloc: > + security_perf_event_free(event); > err_callchain_buffer: > if (!event->parent) { > if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) > -- > 2.45.0.rc1.225.g2a3ae87e7f-goog >
On 5/7/2024 4:31 PM, Peter Zijlstra wrote: > On Mon, May 06, 2024 at 05:29:31AM +0000, Mingwei Zhang wrote: > > >> +int perf_get_mediated_pmu(void) >> +{ >> + int ret = 0; >> + >> + mutex_lock(&perf_mediated_pmu_mutex); >> + if (refcount_inc_not_zero(&nr_mediated_pmu_vms)) >> + goto end; >> + >> + if (atomic_read(&nr_include_guest_events)) { >> + ret = -EBUSY; >> + goto end; >> + } >> + refcount_set(&nr_mediated_pmu_vms, 1); >> +end: >> + mutex_unlock(&perf_mediated_pmu_mutex); >> + return ret; >> +} >> +EXPORT_SYMBOL_GPL(perf_get_mediated_pmu); > > Blergh... it seems I never got my perf guard patches merged :/, but > could we please write this like: > > int perf_get_mediated_pmu(void) > { > guard(mutex)(&perf_mediated_pmu_mutex); > if (refcount_inc_not_zero(&nr_mediated_pmu_vms)) > return 0; > > if (atomic_read(&nr_include_guest_events)) > return -EBUSY; > > refcount_set(&nr_mediated_pmu_vms, 1); > return 0; > } > > And avoid adding more unlock goto thingies? >yes, this works. And code is cleaner. thanks
On 5/7/2024 4:41 PM, Peter Zijlstra wrote: > On Mon, May 06, 2024 at 05:29:31AM +0000, Mingwei Zhang wrote: > >> +int perf_get_mediated_pmu(void) >> +{ >> + int ret = 0; >> + >> + mutex_lock(&perf_mediated_pmu_mutex); >> + if (refcount_inc_not_zero(&nr_mediated_pmu_vms)) >> + goto end; >> + >> + if (atomic_read(&nr_include_guest_events)) { >> + ret = -EBUSY; >> + goto end; >> + } >> + refcount_set(&nr_mediated_pmu_vms, 1); >> +end: >> + mutex_unlock(&perf_mediated_pmu_mutex); >> + return ret; >> +} >> +EXPORT_SYMBOL_GPL(perf_get_mediated_pmu); >> + >> +void perf_put_mediated_pmu(void) >> +{ >> + if (!refcount_dec_not_one(&nr_mediated_pmu_vms)) >> + refcount_set(&nr_mediated_pmu_vms, 0); > > I'm sorry, but this made the WTF'o'meter go 'ding'. > > Isn't that simply refcount_dec() ? when nr_mediated_pmu_vms is 1, refcount_dec(&nr_mediated_pmu_vms) has an error and call trace: refcount_t: decrement hit 0; leaking memory. Similar when nr_mediated_pmu_vms is 0, refcount_inc(&nr_mediated_pmu_vms) has an error and call trace also: refcount_t: addition on 0; use_after_free. it seems refcount_set() should be used to set 1 or 0 to refcount_t. > >> +} >> +EXPORT_SYMBOL_GPL(perf_put_mediated_pmu); >> + >> /* >> * Holding the top-level event's child_mutex means that any >> * descendant process that has inherited this event will block >> @@ -12086,11 +12140,24 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, >> if (err) >> goto err_callchain_buffer; >> >> + if (is_include_guest_event(event)) { >> + mutex_lock(&perf_mediated_pmu_mutex); >> + if (refcount_read(&nr_mediated_pmu_vms)) { >> + mutex_unlock(&perf_mediated_pmu_mutex); >> + err = -EACCES; >> + goto err_security_alloc; >> + } >> + atomic_inc(&nr_include_guest_events); >> + mutex_unlock(&perf_mediated_pmu_mutex); >> + } > > Wouldn't all that be nicer with a helper function? yes, it is nicer. thanks > > if (is_include_guest_event() && !perf_get_guest_event()) > goto err_security_alloc; > >> + >> /* symmetric to unaccount_event() in _free_event() */ >> account_event(event); >> >> return event; >> >> +err_security_alloc: >> + security_perf_event_free(event); >> err_callchain_buffer: >> if (!event->parent) { >> if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) >> -- >> 2.45.0.rc1.225.g2a3ae87e7f-goog >> >
On Wed, May 08, 2024 at 12:54:31PM +0800, Zhang, Xiong Y wrote: > On 5/7/2024 4:41 PM, Peter Zijlstra wrote: > > On Mon, May 06, 2024 at 05:29:31AM +0000, Mingwei Zhang wrote: > >> +void perf_put_mediated_pmu(void) > >> +{ > >> + if (!refcount_dec_not_one(&nr_mediated_pmu_vms)) > >> + refcount_set(&nr_mediated_pmu_vms, 0); > > > > I'm sorry, but this made the WTF'o'meter go 'ding'. > > > > Isn't that simply refcount_dec() ? > when nr_mediated_pmu_vms is 1, refcount_dec(&nr_mediated_pmu_vms) has an > error and call trace: refcount_t: decrement hit 0; leaking memory. > > Similar when nr_mediated_pmu_vms is 0, refcount_inc(&nr_mediated_pmu_vms) > has an error and call trace also: refcount_t: addition on 0; use_after_free. > > it seems refcount_set() should be used to set 1 or 0 to refcount_t. Ah, yes, you need refcount_dec_and_test() in order to free. But if this is the case, then you simply shouldn't be using refcount.
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d2a15c0c6f8a..dd4920bf3d1b 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -291,6 +291,7 @@ struct perf_event_pmu_context; #define PERF_PMU_CAP_NO_EXCLUDE 0x0040 #define PERF_PMU_CAP_AUX_OUTPUT 0x0080 #define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 +#define PERF_PMU_CAP_PASSTHROUGH_VPMU 0x0200 struct perf_output_handle; @@ -1731,6 +1732,8 @@ extern void perf_event_task_tick(void); extern int perf_event_account_interrupt(struct perf_event *event); extern int perf_event_period(struct perf_event *event, u64 value); extern u64 perf_event_pause(struct perf_event *event, bool reset); +extern int perf_get_mediated_pmu(void); +extern void perf_put_mediated_pmu(void); #else /* !CONFIG_PERF_EVENTS: */ static inline void * perf_aux_output_begin(struct perf_output_handle *handle, @@ -1817,6 +1820,12 @@ static inline u64 perf_event_pause(struct perf_event *event, bool reset) { return 0; } +static inline int perf_get_mediated_pmu(void) +{ + return 0; +} + +static inline void perf_put_mediated_pmu(void) { } #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) diff --git a/kernel/events/core.c b/kernel/events/core.c index 724e6d7e128f..701b622c670e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -402,6 +402,21 @@ static atomic_t nr_bpf_events __read_mostly; static atomic_t nr_cgroup_events __read_mostly; static atomic_t nr_text_poke_events __read_mostly; static atomic_t nr_build_id_events __read_mostly; +static atomic_t nr_include_guest_events __read_mostly; + +static refcount_t nr_mediated_pmu_vms = REFCOUNT_INIT(0); +static DEFINE_MUTEX(perf_mediated_pmu_mutex); + +/* !exclude_guest system wide event of PMU with PERF_PMU_CAP_PASSTHROUGH_VPMU */ +static inline bool is_include_guest_event(struct perf_event *event) +{ + if ((event->pmu->capabilities & PERF_PMU_CAP_PASSTHROUGH_VPMU) && + !event->attr.exclude_guest && + !event->attr.task) + return true; + + return false; +} static LIST_HEAD(pmus); static DEFINE_MUTEX(pmus_lock); @@ -5193,6 +5208,9 @@ static void _free_event(struct perf_event *event) unaccount_event(event); + if (is_include_guest_event(event)) + atomic_dec(&nr_include_guest_events); + security_perf_event_free(event); if (event->rb) { @@ -5737,6 +5755,42 @@ u64 perf_event_pause(struct perf_event *event, bool reset) } EXPORT_SYMBOL_GPL(perf_event_pause); +/* + * Currently invoked at VM creation to + * - Check whether there are existing !exclude_guest system wide events + * of PMU with PERF_PMU_CAP_PASSTHROUGH_VPMU + * - Set nr_mediated_pmu_vms to prevent !exclude_guest event creation on + * PMUs with PERF_PMU_CAP_PASSTHROUGH_VPMU + * + * No impact for the PMU without PERF_PMU_CAP_PASSTHROUGH_VPMU. The perf + * still owns all the PMU resources. + */ +int perf_get_mediated_pmu(void) +{ + int ret = 0; + + mutex_lock(&perf_mediated_pmu_mutex); + if (refcount_inc_not_zero(&nr_mediated_pmu_vms)) + goto end; + + if (atomic_read(&nr_include_guest_events)) { + ret = -EBUSY; + goto end; + } + refcount_set(&nr_mediated_pmu_vms, 1); +end: + mutex_unlock(&perf_mediated_pmu_mutex); + return ret; +} +EXPORT_SYMBOL_GPL(perf_get_mediated_pmu); + +void perf_put_mediated_pmu(void) +{ + if (!refcount_dec_not_one(&nr_mediated_pmu_vms)) + refcount_set(&nr_mediated_pmu_vms, 0); +} +EXPORT_SYMBOL_GPL(perf_put_mediated_pmu); + /* * Holding the top-level event's child_mutex means that any * descendant process that has inherited this event will block @@ -12086,11 +12140,24 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, if (err) goto err_callchain_buffer; + if (is_include_guest_event(event)) { + mutex_lock(&perf_mediated_pmu_mutex); + if (refcount_read(&nr_mediated_pmu_vms)) { + mutex_unlock(&perf_mediated_pmu_mutex); + err = -EACCES; + goto err_security_alloc; + } + atomic_inc(&nr_include_guest_events); + mutex_unlock(&perf_mediated_pmu_mutex); + } + /* symmetric to unaccount_event() in _free_event() */ account_event(event); return event; +err_security_alloc: + security_perf_event_free(event); err_callchain_buffer: if (!event->parent) { if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)