diff mbox series

[RFC,6/9] RISC-V: KVM: Add SBI PMU extension support

Message ID 20220718170205.2972215-7-atishp@rivosinc.com (mailing list archive)
State New, archived
Headers show
Series KVM perf support | expand

Commit Message

Atish Kumar Patra July 18, 2022, 5:02 p.m. UTC
SBI PMU extension allows KVM guests to configure/start/stop/query about
the PMU counters in virtualized enviornment as well.

In order to allow that, KVM implements the entire SBI PMU extension.

Signed-off-by: Atish Patra <atishp@rivosinc.com>
---
 arch/riscv/kvm/vcpu_sbi.c     | 11 +++++
 arch/riscv/kvm/vcpu_sbi_pmu.c | 81 +++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+)
 create mode 100644 arch/riscv/kvm/vcpu_sbi_pmu.c

Comments

Andrew Jones Nov. 1, 2022, 2:26 p.m. UTC | #1
On Mon, Jul 18, 2022 at 10:02:02AM -0700, Atish Patra wrote:
> SBI PMU extension allows KVM guests to configure/start/stop/query about
> the PMU counters in virtualized enviornment as well.
> 
> In order to allow that, KVM implements the entire SBI PMU extension.
> 
> Signed-off-by: Atish Patra <atishp@rivosinc.com>
> ---
>  arch/riscv/kvm/vcpu_sbi.c     | 11 +++++
>  arch/riscv/kvm/vcpu_sbi_pmu.c | 81 +++++++++++++++++++++++++++++++++++
>  2 files changed, 92 insertions(+)
>  create mode 100644 arch/riscv/kvm/vcpu_sbi_pmu.c
> 
> diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
> index d45e7da3f0d3..da9f7959340e 100644
> --- a/arch/riscv/kvm/vcpu_sbi.c
> +++ b/arch/riscv/kvm/vcpu_sbi.c
> @@ -50,6 +50,16 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm;
>  extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
>  extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;
>  
> +#ifdef CONFIG_RISCV_PMU_SBI
> +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu;
> +#else
> +static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu = {
> +	.extid_start = -1UL,
> +	.extid_end = -1UL,
> +	.handler = NULL,
> +};
> +#endif
> +
>  static const struct kvm_vcpu_sbi_extension *sbi_ext[] = {
>  	&vcpu_sbi_ext_v01,
>  	&vcpu_sbi_ext_base,
> @@ -58,6 +68,7 @@ static const struct kvm_vcpu_sbi_extension *sbi_ext[] = {
>  	&vcpu_sbi_ext_rfence,
>  	&vcpu_sbi_ext_srst,
>  	&vcpu_sbi_ext_hsm,
> +	&vcpu_sbi_ext_pmu,
>  	&vcpu_sbi_ext_experimental,
>  	&vcpu_sbi_ext_vendor,
>  };
> diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c
> new file mode 100644
> index 000000000000..90c51a95d4f4
> --- /dev/null
> +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c
> @@ -0,0 +1,81 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2022 Rivos Inc
> + *
> + * Authors:
> + *     Atish Patra <atishp@rivosinc.com>
> + */
> +
> +#include <linux/errno.h>
> +#include <linux/err.h>
> +#include <linux/kvm_host.h>
> +#include <asm/csr.h>
> +#include <asm/sbi.h>
> +#include <asm/kvm_vcpu_sbi.h>
> +
> +static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
> +				   unsigned long *out_val,
> +				   struct kvm_cpu_trap *utrap,
> +				   bool *exit)
> +{
> +	int ret = -EOPNOTSUPP;
> +	struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
> +	unsigned long funcid = cp->a6;
> +	uint64_t temp;

I think we need something like

   if (!vcpu_to_pmu(vcpu)->enabled)
      return -EOPNOTSUPP;

here. Where 'enabled' is only true when we successfully initialize
the pmu. And, successful initialization depends on 
IS_ENABLED(CONFIG_RISCV_PMU_SBI) and
riscv_isa_extension_available(NULL, SSCOFPMF) as well as not
failing other things. And, KVM userspace should be able to
disable the pmu, which keep enabled from being set as well.

> +
> +	switch (funcid) {
> +	case SBI_EXT_PMU_NUM_COUNTERS:
> +		ret = kvm_riscv_vcpu_pmu_num_ctrs(vcpu, out_val);
> +		break;
> +	case SBI_EXT_PMU_COUNTER_GET_INFO:
> +		ret = kvm_riscv_vcpu_pmu_ctr_info(vcpu, cp->a0, out_val);
> +		break;
> +	case SBI_EXT_PMU_COUNTER_CFG_MATCH:
> +#if defined(CONFIG_32BIT)
> +		temp = ((uint64_t)cp->a5 << 32) | cp->a4;
> +#else
> +		temp = cp->a4;
> +#endif
> +		ret = kvm_riscv_vcpu_pmu_ctr_cfg_match(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, temp);
> +		if (ret >= 0) {
> +			*out_val = ret;
> +			ret = 0;
> +		}
> +		break;
> +	case SBI_EXT_PMU_COUNTER_START:
> +#if defined(CONFIG_32BIT)
> +		temp = ((uint64_t)cp->a4 << 32) | cp->a3;
> +#else
> +		temp = cp->a3;
> +#endif
> +		ret = kvm_riscv_vcpu_pmu_ctr_start(vcpu, cp->a0, cp->a1, cp->a2, temp);
> +		break;
> +	case SBI_EXT_PMU_COUNTER_STOP:
> +		ret = kvm_riscv_vcpu_pmu_ctr_stop(vcpu, cp->a0, cp->a1, cp->a2);
> +		break;
> +	case SBI_EXT_PMU_COUNTER_FW_READ:
> +		ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, out_val);
> +		break;
> +	default:
> +		ret = -EOPNOTSUPP;
> +	}
> +
> +	return ret;
> +}
> +
> +unsigned long kvm_sbi_ext_pmu_probe(unsigned long extid)
> +{
> +	/*
> +	 * PMU Extension is only available to guests if privilege mode filtering
> +	 * is available. Otherwise, guest will always count events while the
> +	 * execution is in hypervisor mode.
> +	 */
> +	return riscv_isa_extension_available(NULL, SSCOFPMF);
> +}
> +
> +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu = {
> +	.extid_start = SBI_EXT_PMU,
> +	.extid_end = SBI_EXT_PMU,
> +	.handler = kvm_sbi_ext_pmu_handler,
> +	.probe = kvm_sbi_ext_pmu_probe,
> +};
> -- 
> 2.25.1
>

Thanks,
drew
Atish Patra Nov. 22, 2022, 11:08 p.m. UTC | #2
On Tue, Nov 1, 2022 at 7:26 AM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Mon, Jul 18, 2022 at 10:02:02AM -0700, Atish Patra wrote:
> > SBI PMU extension allows KVM guests to configure/start/stop/query about
> > the PMU counters in virtualized enviornment as well.
> >
> > In order to allow that, KVM implements the entire SBI PMU extension.
> >
> > Signed-off-by: Atish Patra <atishp@rivosinc.com>
> > ---
> >  arch/riscv/kvm/vcpu_sbi.c     | 11 +++++
> >  arch/riscv/kvm/vcpu_sbi_pmu.c | 81 +++++++++++++++++++++++++++++++++++
> >  2 files changed, 92 insertions(+)
> >  create mode 100644 arch/riscv/kvm/vcpu_sbi_pmu.c
> >
> > diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
> > index d45e7da3f0d3..da9f7959340e 100644
> > --- a/arch/riscv/kvm/vcpu_sbi.c
> > +++ b/arch/riscv/kvm/vcpu_sbi.c
> > @@ -50,6 +50,16 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm;
> >  extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
> >  extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;
> >
> > +#ifdef CONFIG_RISCV_PMU_SBI
> > +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu;
> > +#else
> > +static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu = {
> > +     .extid_start = -1UL,
> > +     .extid_end = -1UL,
> > +     .handler = NULL,
> > +};
> > +#endif
> > +
> >  static const struct kvm_vcpu_sbi_extension *sbi_ext[] = {
> >       &vcpu_sbi_ext_v01,
> >       &vcpu_sbi_ext_base,
> > @@ -58,6 +68,7 @@ static const struct kvm_vcpu_sbi_extension *sbi_ext[] = {
> >       &vcpu_sbi_ext_rfence,
> >       &vcpu_sbi_ext_srst,
> >       &vcpu_sbi_ext_hsm,
> > +     &vcpu_sbi_ext_pmu,
> >       &vcpu_sbi_ext_experimental,
> >       &vcpu_sbi_ext_vendor,
> >  };
> > diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c
> > new file mode 100644
> > index 000000000000..90c51a95d4f4
> > --- /dev/null
> > +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c
> > @@ -0,0 +1,81 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +/*
> > + * Copyright (c) 2022 Rivos Inc
> > + *
> > + * Authors:
> > + *     Atish Patra <atishp@rivosinc.com>
> > + */
> > +
> > +#include <linux/errno.h>
> > +#include <linux/err.h>
> > +#include <linux/kvm_host.h>
> > +#include <asm/csr.h>
> > +#include <asm/sbi.h>
> > +#include <asm/kvm_vcpu_sbi.h>
> > +
> > +static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
> > +                                unsigned long *out_val,
> > +                                struct kvm_cpu_trap *utrap,
> > +                                bool *exit)
> > +{
> > +     int ret = -EOPNOTSUPP;
> > +     struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
> > +     unsigned long funcid = cp->a6;
> > +     uint64_t temp;
>
> I think we need something like
>
>    if (!vcpu_to_pmu(vcpu)->enabled)
>       return -EOPNOTSUPP;
>
> here. Where 'enabled' is only true when we successfully initialize
> the pmu. And, successful initialization depends on

Yes. I have added the flag. But should we do the check here or
respective function
as a paranoia sanity check ?

> IS_ENABLED(CONFIG_RISCV_PMU_SBI) and

Why do we need to guard under CONFIG_RISCV_PMU_SBI ?
vcpu_sbi_pmu.c is only compiled under CONFIG_RISCV_PMU_SBI

If CONFIG_RISCV_PMU_SBI is not enabled, probe function will return failure.

In fact, I think we should also add the pmu enabled check in the probe function
itself. Probe function(kvm_sbi_ext_pmu_probe) should only true when
both vcpu_to_pmu(vcpu)->enabled and
riscv_isa_extension_available(NULL, SSCOFPMF) are true.

Thoughts?

> riscv_isa_extension_available(NULL, SSCOFPMF) as well as not
> failing other things. And, KVM userspace should be able to
> disable the pmu, which keep enabled from being set as well.
>
We already have provisions for disabling sscofpmf from guests via ISA
one reg interface.
Do you mean disable the entire PMU from userspace ?

Currently, ARM64 enables pmu from user space using device control APIs
on vcpu fd.
Are you suggesting we should do something like that ?

If PMU needs to have device control APIs (either via vcpu fd or its
own), we can retrieve
the hpmcounter width and count from there as well.

> > +
> > +     switch (funcid) {
> > +     case SBI_EXT_PMU_NUM_COUNTERS:
> > +             ret = kvm_riscv_vcpu_pmu_num_ctrs(vcpu, out_val);
> > +             break;
> > +     case SBI_EXT_PMU_COUNTER_GET_INFO:
> > +             ret = kvm_riscv_vcpu_pmu_ctr_info(vcpu, cp->a0, out_val);
> > +             break;
> > +     case SBI_EXT_PMU_COUNTER_CFG_MATCH:
> > +#if defined(CONFIG_32BIT)
> > +             temp = ((uint64_t)cp->a5 << 32) | cp->a4;
> > +#else
> > +             temp = cp->a4;
> > +#endif
> > +             ret = kvm_riscv_vcpu_pmu_ctr_cfg_match(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, temp);
> > +             if (ret >= 0) {
> > +                     *out_val = ret;
> > +                     ret = 0;
> > +             }
> > +             break;
> > +     case SBI_EXT_PMU_COUNTER_START:
> > +#if defined(CONFIG_32BIT)
> > +             temp = ((uint64_t)cp->a4 << 32) | cp->a3;
> > +#else
> > +             temp = cp->a3;
> > +#endif
> > +             ret = kvm_riscv_vcpu_pmu_ctr_start(vcpu, cp->a0, cp->a1, cp->a2, temp);
> > +             break;
> > +     case SBI_EXT_PMU_COUNTER_STOP:
> > +             ret = kvm_riscv_vcpu_pmu_ctr_stop(vcpu, cp->a0, cp->a1, cp->a2);
> > +             break;
> > +     case SBI_EXT_PMU_COUNTER_FW_READ:
> > +             ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, out_val);
> > +             break;
> > +     default:
> > +             ret = -EOPNOTSUPP;
> > +     }
> > +
> > +     return ret;
> > +}
> > +
> > +unsigned long kvm_sbi_ext_pmu_probe(unsigned long extid)
> > +{
> > +     /*
> > +      * PMU Extension is only available to guests if privilege mode filtering
> > +      * is available. Otherwise, guest will always count events while the
> > +      * execution is in hypervisor mode.
> > +      */
> > +     return riscv_isa_extension_available(NULL, SSCOFPMF);
> > +}
> > +
> > +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu = {
> > +     .extid_start = SBI_EXT_PMU,
> > +     .extid_end = SBI_EXT_PMU,
> > +     .handler = kvm_sbi_ext_pmu_handler,
> > +     .probe = kvm_sbi_ext_pmu_probe,
> > +};
> > --
> > 2.25.1
> >
>
> Thanks,
> drew
Andrew Jones Nov. 23, 2022, 1:58 p.m. UTC | #3
On Tue, Nov 22, 2022 at 03:08:34PM -0800, Atish Patra wrote:
> On Tue, Nov 1, 2022 at 7:26 AM Andrew Jones <ajones@ventanamicro.com> wrote:
> >
> > On Mon, Jul 18, 2022 at 10:02:02AM -0700, Atish Patra wrote:
...
> > > +static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
> > > +                                unsigned long *out_val,
> > > +                                struct kvm_cpu_trap *utrap,
> > > +                                bool *exit)
> > > +{
> > > +     int ret = -EOPNOTSUPP;
> > > +     struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
> > > +     unsigned long funcid = cp->a6;
> > > +     uint64_t temp;
> >
> > I think we need something like
> >
> >    if (!vcpu_to_pmu(vcpu)->enabled)
> >       return -EOPNOTSUPP;
> >
> > here. Where 'enabled' is only true when we successfully initialize
> > the pmu. And, successful initialization depends on
> 
> Yes. I have added the flag. But should we do the check here or
> respective function
> as a paranoia sanity check ?

I think something like above that returns a not-supported error should be
in all the entry points, like the top level SBI call handler. Functions
that should never be called unless the PMU is active could have WARNs
added for sanity checks.

> 
> > IS_ENABLED(CONFIG_RISCV_PMU_SBI) and
> 
> Why do we need to guard under CONFIG_RISCV_PMU_SBI ?
> vcpu_sbi_pmu.c is only compiled under CONFIG_RISCV_PMU_SBI
> 
> If CONFIG_RISCV_PMU_SBI is not enabled, probe function will return failure.

You're right. We don't need explicit config checks for things that can't
exist without the config.

> 
> In fact, I think we should also add the pmu enabled check in the probe function
> itself. Probe function(kvm_sbi_ext_pmu_probe) should only true when
> both vcpu_to_pmu(vcpu)->enabled and
> riscv_isa_extension_available(NULL, SSCOFPMF) are true.
> 
> Thoughts?

Agreed.

> 
> > riscv_isa_extension_available(NULL, SSCOFPMF) as well as not
> > failing other things. And, KVM userspace should be able to
> > disable the pmu, which keep enabled from being set as well.
> >
> We already have provisions for disabling sscofpmf from guests via ISA
> one reg interface.
> Do you mean disable the entire PMU from userspace ?

Yes. We may need to configure a VM without a PMU to increase its
migratability, workaround errata, or just for testing/debugging purposes.

> 
> Currently, ARM64 enables pmu from user space using device control APIs
> on vcpu fd.
> Are you suggesting we should do something like that ?

Yes. Although choosing which KVM API should be used could probably be
thought-out again. x86 uses VM ioctls.

> 
> If PMU needs to have device control APIs (either via vcpu fd or its
> own), we can retrieve
> the hpmcounter width and count from there as well.

Right. We need to decide how the VM/VCPU + PMU user interface should look.
A separate PMU device, like arm64 has, sounds good, but the ioctl
sequences for initialization may get more tricky.

Thanks,
drew
Atish Patra Nov. 24, 2022, 10:18 a.m. UTC | #4
On Wed, Nov 23, 2022 at 5:58 AM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Tue, Nov 22, 2022 at 03:08:34PM -0800, Atish Patra wrote:
> > On Tue, Nov 1, 2022 at 7:26 AM Andrew Jones <ajones@ventanamicro.com> wrote:
> > >
> > > On Mon, Jul 18, 2022 at 10:02:02AM -0700, Atish Patra wrote:
> ...
> > > > +static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
> > > > +                                unsigned long *out_val,
> > > > +                                struct kvm_cpu_trap *utrap,
> > > > +                                bool *exit)
> > > > +{
> > > > +     int ret = -EOPNOTSUPP;
> > > > +     struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
> > > > +     unsigned long funcid = cp->a6;
> > > > +     uint64_t temp;
> > >
> > > I think we need something like
> > >
> > >    if (!vcpu_to_pmu(vcpu)->enabled)
> > >       return -EOPNOTSUPP;
> > >
> > > here. Where 'enabled' is only true when we successfully initialize
> > > the pmu. And, successful initialization depends on
> >
> > Yes. I have added the flag. But should we do the check here or
> > respective function
> > as a paranoia sanity check ?
>
> I think something like above that returns a not-supported error should be
> in all the entry points, like the top level SBI call handler. Functions
> that should never be called unless the PMU is active could have WARNs
> added for sanity checks.
>

Sure. Will add those checks.

> >
> > > IS_ENABLED(CONFIG_RISCV_PMU_SBI) and
> >
> > Why do we need to guard under CONFIG_RISCV_PMU_SBI ?
> > vcpu_sbi_pmu.c is only compiled under CONFIG_RISCV_PMU_SBI
> >
> > If CONFIG_RISCV_PMU_SBI is not enabled, probe function will return failure.
>
> You're right. We don't need explicit config checks for things that can't
> exist without the config.
>
> >
> > In fact, I think we should also add the pmu enabled check in the probe function
> > itself. Probe function(kvm_sbi_ext_pmu_probe) should only true when
> > both vcpu_to_pmu(vcpu)->enabled and
> > riscv_isa_extension_available(NULL, SSCOFPMF) are true.
> >
> > Thoughts?
>
> Agreed.
>
> >
> > > riscv_isa_extension_available(NULL, SSCOFPMF) as well as not
> > > failing other things. And, KVM userspace should be able to
> > > disable the pmu, which keep enabled from being set as well.
> > >
> > We already have provisions for disabling sscofpmf from guests via ISA
> > one reg interface.
> > Do you mean disable the entire PMU from userspace ?
>
> Yes. We may need to configure a VM without a PMU to increase its
> migratability, workaround errata, or just for testing/debugging purposes.
>
> >
> > Currently, ARM64 enables pmu from user space using device control APIs
> > on vcpu fd.
> > Are you suggesting we should do something like that ?
>
> Yes. Although choosing which KVM API should be used could probably be
> thought-out again. x86 uses VM ioctls.
>

How does it handle hetergenous systems in per VM ioctls ?

> >
> > If PMU needs to have device control APIs (either via vcpu fd or its
> > own), we can retrieve
> > the hpmcounter width and count from there as well.
>
> Right. We need to decide how the VM/VCPU + PMU user interface should look.
> A separate PMU device, like arm64 has, sounds good, but the ioctl
> sequences for initialization may get more tricky.
>

Do we really need a per VM interface ? I was thinking we can just
continue to use
one reg interface for PMU as well. We probably need two of them.

1. To enable/disable SBI extension
    -- The probe function will depend on this
2. PMU specific get/set
    -- Number of hpmcounters
    -- hpmcounter width
    -- enable PMU


> Thanks,
> drew
Andrew Jones Nov. 24, 2022, 10:50 a.m. UTC | #5
On Thu, Nov 24, 2022 at 02:18:26AM -0800, Atish Patra wrote:
> On Wed, Nov 23, 2022 at 5:58 AM Andrew Jones <ajones@ventanamicro.com> wrote:
> >
> > On Tue, Nov 22, 2022 at 03:08:34PM -0800, Atish Patra wrote:
...
> > > Currently, ARM64 enables pmu from user space using device control APIs
> > > on vcpu fd.
> > > Are you suggesting we should do something like that ?
> >
> > Yes. Although choosing which KVM API should be used could probably be
> > thought-out again. x86 uses VM ioctls.
> >
> 
> How does it handle hetergenous systems in per VM ioctls ?

I don't think it does, but neither does arm64. Afaik, the only way to run
KVM VMs on heterogeneous systems is to pin the VM to one set of the CPUs,
i.e. make sure the system it runs on is homogeneous.

I agree we shouldn't paint ourselves into a homogeneous-only corner for
riscv, though, so if it's possible to use VCPU APIs, then I guess we
should. Although, one thing to keep in mind is that if the same ioctl
needs to be run on each VCPU, then, when we start building VMs with
hundreds of VCPUs, we'll see slow VM starts.

> 
> > >
> > > If PMU needs to have device control APIs (either via vcpu fd or its
> > > own), we can retrieve
> > > the hpmcounter width and count from there as well.
> >
> > Right. We need to decide how the VM/VCPU + PMU user interface should look.
> > A separate PMU device, like arm64 has, sounds good, but the ioctl
> > sequences for initialization may get more tricky.
> >
> 
> Do we really need a per VM interface ? I was thinking we can just
> continue to use
> one reg interface for PMU as well. We probably need two of them.
> 
> 1. To enable/disable SBI extension
>     -- The probe function will depend on this
> 2. PMU specific get/set
>     -- Number of hpmcounters
>     -- hpmcounter width
>     -- enable PMU

ONE_REG is good for registers and virtual registers, which means the
number of hpmcounters and the hpmcounter width are probably good
candidates, but I'm not sure we should use it for enable/init types of
purposes.

Thanks,
drew
Anup Patel Nov. 24, 2022, 12:59 p.m. UTC | #6
On Thu, Nov 24, 2022 at 4:21 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Thu, Nov 24, 2022 at 02:18:26AM -0800, Atish Patra wrote:
> > On Wed, Nov 23, 2022 at 5:58 AM Andrew Jones <ajones@ventanamicro.com> wrote:
> > >
> > > On Tue, Nov 22, 2022 at 03:08:34PM -0800, Atish Patra wrote:
> ...
> > > > Currently, ARM64 enables pmu from user space using device control APIs
> > > > on vcpu fd.
> > > > Are you suggesting we should do something like that ?
> > >
> > > Yes. Although choosing which KVM API should be used could probably be
> > > thought-out again. x86 uses VM ioctls.
> > >
> >
> > How does it handle hetergenous systems in per VM ioctls ?
>
> I don't think it does, but neither does arm64. Afaik, the only way to run
> KVM VMs on heterogeneous systems is to pin the VM to one set of the CPUs,
> i.e. make sure the system it runs on is homogeneous.
>
> I agree we shouldn't paint ourselves into a homogeneous-only corner for
> riscv, though, so if it's possible to use VCPU APIs, then I guess we
> should. Although, one thing to keep in mind is that if the same ioctl
> needs to be run on each VCPU, then, when we start building VMs with
> hundreds of VCPUs, we'll see slow VM starts.
>
> >
> > > >
> > > > If PMU needs to have device control APIs (either via vcpu fd or its
> > > > own), we can retrieve
> > > > the hpmcounter width and count from there as well.
> > >
> > > Right. We need to decide how the VM/VCPU + PMU user interface should look.
> > > A separate PMU device, like arm64 has, sounds good, but the ioctl
> > > sequences for initialization may get more tricky.
> > >
> >
> > Do we really need a per VM interface ? I was thinking we can just
> > continue to use
> > one reg interface for PMU as well. We probably need two of them.
> >
> > 1. To enable/disable SBI extension
> >     -- The probe function will depend on this
> > 2. PMU specific get/set
> >     -- Number of hpmcounters
> >     -- hpmcounter width
> >     -- enable PMU
>
> ONE_REG is good for registers and virtual registers, which means the
> number of hpmcounters and the hpmcounter width are probably good
> candidates, but I'm not sure we should use it for enable/init types of
> purposes.

We are already using ONE_REG interface to enable/disable
ISA extensions so we should follow the same pattern and have
ONE_REG interface to enable/disable SBI extensions as well.

Regards,
Anup
Atish Patra Nov. 28, 2022, 9 p.m. UTC | #7
On Thu, Nov 24, 2022 at 4:59 AM Anup Patel <apatel@ventanamicro.com> wrote:
>
> On Thu, Nov 24, 2022 at 4:21 PM Andrew Jones <ajones@ventanamicro.com> wrote:
> >
> > On Thu, Nov 24, 2022 at 02:18:26AM -0800, Atish Patra wrote:
> > > On Wed, Nov 23, 2022 at 5:58 AM Andrew Jones <ajones@ventanamicro.com> wrote:
> > > >
> > > > On Tue, Nov 22, 2022 at 03:08:34PM -0800, Atish Patra wrote:
> > ...
> > > > > Currently, ARM64 enables pmu from user space using device control APIs
> > > > > on vcpu fd.
> > > > > Are you suggesting we should do something like that ?
> > > >
> > > > Yes. Although choosing which KVM API should be used could probably be
> > > > thought-out again. x86 uses VM ioctls.
> > > >
> > >
> > > How does it handle hetergenous systems in per VM ioctls ?
> >
> > I don't think it does, but neither does arm64. Afaik, the only way to run
> > KVM VMs on heterogeneous systems is to pin the VM to one set of the CPUs,
> > i.e. make sure the system it runs on is homogeneous.
> >
> > I agree we shouldn't paint ourselves into a homogeneous-only corner for
> > riscv, though, so if it's possible to use VCPU APIs, then I guess we
> > should. Although, one thing to keep in mind is that if the same ioctl
> > needs to be run on each VCPU, then, when we start building VMs with
> > hundreds of VCPUs, we'll see slow VM starts.
> >
> > >
> > > > >
> > > > > If PMU needs to have device control APIs (either via vcpu fd or its
> > > > > own), we can retrieve
> > > > > the hpmcounter width and count from there as well.
> > > >
> > > > Right. We need to decide how the VM/VCPU + PMU user interface should look.
> > > > A separate PMU device, like arm64 has, sounds good, but the ioctl
> > > > sequences for initialization may get more tricky.
> > > >
> > >
> > > Do we really need a per VM interface ? I was thinking we can just
> > > continue to use
> > > one reg interface for PMU as well. We probably need two of them.
> > >
> > > 1. To enable/disable SBI extension
> > >     -- The probe function will depend on this
> > > 2. PMU specific get/set
> > >     -- Number of hpmcounters
> > >     -- hpmcounter width
> > >     -- enable PMU
> >
> > ONE_REG is good for registers and virtual registers, which means the
> > number of hpmcounters and the hpmcounter width are probably good
> > candidates, but I'm not sure we should use it for enable/init types of
> > purposes.
>
> We are already using ONE_REG interface to enable/disable
> ISA extensions so we should follow the same pattern and have
> ONE_REG interface to enable/disable SBI extensions as well.
>

Thinking about it more, it may end up in vcpus with heterogeneous
capabilities (different SBI extension).
Most likely, it will be a bug and incorrect configuration from VMM but
it's a possibility.
For example: There will be two different scenarios for PMU extension

1. Boot vcpu disabled PMU extension - probably okay as guest queries
the PMU extension on boot cpu only.
The PMU extension won't be available for any other vcpu.

2. Boot vcpu has PMU extension enabled but others have it disabled.
The run time SBI PMU calls will
fail with EOPNOTSUPP and the user gets confused.

There will be similar cases for HSM extension as well.

As the entire extension won't be available to the guest if the SBI
extension is disabled for the boot cpu only.
There is also a slow VM start issue Andrew pointed about earlier.

This makes me think if a VM ioctl to disable/enable the SBI extension
for all vcpus makes more sense in this case.


> Regards,
> Anup
diff mbox series

Patch

diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c
index d45e7da3f0d3..da9f7959340e 100644
--- a/arch/riscv/kvm/vcpu_sbi.c
+++ b/arch/riscv/kvm/vcpu_sbi.c
@@ -50,6 +50,16 @@  extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm;
 extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental;
 extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor;
 
+#ifdef CONFIG_RISCV_PMU_SBI
+extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu;
+#else
+static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu = {
+	.extid_start = -1UL,
+	.extid_end = -1UL,
+	.handler = NULL,
+};
+#endif
+
 static const struct kvm_vcpu_sbi_extension *sbi_ext[] = {
 	&vcpu_sbi_ext_v01,
 	&vcpu_sbi_ext_base,
@@ -58,6 +68,7 @@  static const struct kvm_vcpu_sbi_extension *sbi_ext[] = {
 	&vcpu_sbi_ext_rfence,
 	&vcpu_sbi_ext_srst,
 	&vcpu_sbi_ext_hsm,
+	&vcpu_sbi_ext_pmu,
 	&vcpu_sbi_ext_experimental,
 	&vcpu_sbi_ext_vendor,
 };
diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c
new file mode 100644
index 000000000000..90c51a95d4f4
--- /dev/null
+++ b/arch/riscv/kvm/vcpu_sbi_pmu.c
@@ -0,0 +1,81 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Rivos Inc
+ *
+ * Authors:
+ *     Atish Patra <atishp@rivosinc.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <asm/csr.h>
+#include <asm/sbi.h>
+#include <asm/kvm_vcpu_sbi.h>
+
+static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run,
+				   unsigned long *out_val,
+				   struct kvm_cpu_trap *utrap,
+				   bool *exit)
+{
+	int ret = -EOPNOTSUPP;
+	struct kvm_cpu_context *cp = &vcpu->arch.guest_context;
+	unsigned long funcid = cp->a6;
+	uint64_t temp;
+
+	switch (funcid) {
+	case SBI_EXT_PMU_NUM_COUNTERS:
+		ret = kvm_riscv_vcpu_pmu_num_ctrs(vcpu, out_val);
+		break;
+	case SBI_EXT_PMU_COUNTER_GET_INFO:
+		ret = kvm_riscv_vcpu_pmu_ctr_info(vcpu, cp->a0, out_val);
+		break;
+	case SBI_EXT_PMU_COUNTER_CFG_MATCH:
+#if defined(CONFIG_32BIT)
+		temp = ((uint64_t)cp->a5 << 32) | cp->a4;
+#else
+		temp = cp->a4;
+#endif
+		ret = kvm_riscv_vcpu_pmu_ctr_cfg_match(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, temp);
+		if (ret >= 0) {
+			*out_val = ret;
+			ret = 0;
+		}
+		break;
+	case SBI_EXT_PMU_COUNTER_START:
+#if defined(CONFIG_32BIT)
+		temp = ((uint64_t)cp->a4 << 32) | cp->a3;
+#else
+		temp = cp->a3;
+#endif
+		ret = kvm_riscv_vcpu_pmu_ctr_start(vcpu, cp->a0, cp->a1, cp->a2, temp);
+		break;
+	case SBI_EXT_PMU_COUNTER_STOP:
+		ret = kvm_riscv_vcpu_pmu_ctr_stop(vcpu, cp->a0, cp->a1, cp->a2);
+		break;
+	case SBI_EXT_PMU_COUNTER_FW_READ:
+		ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, out_val);
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+	}
+
+	return ret;
+}
+
+unsigned long kvm_sbi_ext_pmu_probe(unsigned long extid)
+{
+	/*
+	 * PMU Extension is only available to guests if privilege mode filtering
+	 * is available. Otherwise, guest will always count events while the
+	 * execution is in hypervisor mode.
+	 */
+	return riscv_isa_extension_available(NULL, SSCOFPMF);
+}
+
+const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu = {
+	.extid_start = SBI_EXT_PMU,
+	.extid_end = SBI_EXT_PMU,
+	.handler = kvm_sbi_ext_pmu_handler,
+	.probe = kvm_sbi_ext_pmu_probe,
+};