Message ID | 20220718170205.2972215-7-atishp@rivosinc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM perf support | expand |
On Mon, Jul 18, 2022 at 10:02:02AM -0700, Atish Patra wrote: > SBI PMU extension allows KVM guests to configure/start/stop/query about > the PMU counters in virtualized enviornment as well. > > In order to allow that, KVM implements the entire SBI PMU extension. > > Signed-off-by: Atish Patra <atishp@rivosinc.com> > --- > arch/riscv/kvm/vcpu_sbi.c | 11 +++++ > arch/riscv/kvm/vcpu_sbi_pmu.c | 81 +++++++++++++++++++++++++++++++++++ > 2 files changed, 92 insertions(+) > create mode 100644 arch/riscv/kvm/vcpu_sbi_pmu.c > > diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c > index d45e7da3f0d3..da9f7959340e 100644 > --- a/arch/riscv/kvm/vcpu_sbi.c > +++ b/arch/riscv/kvm/vcpu_sbi.c > @@ -50,6 +50,16 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; > extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; > extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; > > +#ifdef CONFIG_RISCV_PMU_SBI > +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu; > +#else > +static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu = { > + .extid_start = -1UL, > + .extid_end = -1UL, > + .handler = NULL, > +}; > +#endif > + > static const struct kvm_vcpu_sbi_extension *sbi_ext[] = { > &vcpu_sbi_ext_v01, > &vcpu_sbi_ext_base, > @@ -58,6 +68,7 @@ static const struct kvm_vcpu_sbi_extension *sbi_ext[] = { > &vcpu_sbi_ext_rfence, > &vcpu_sbi_ext_srst, > &vcpu_sbi_ext_hsm, > + &vcpu_sbi_ext_pmu, > &vcpu_sbi_ext_experimental, > &vcpu_sbi_ext_vendor, > }; > diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c > new file mode 100644 > index 000000000000..90c51a95d4f4 > --- /dev/null > +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c > @@ -0,0 +1,81 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (c) 2022 Rivos Inc > + * > + * Authors: > + * Atish Patra <atishp@rivosinc.com> > + */ > + > +#include <linux/errno.h> > +#include <linux/err.h> > +#include <linux/kvm_host.h> > +#include <asm/csr.h> > +#include <asm/sbi.h> > +#include <asm/kvm_vcpu_sbi.h> > + > +static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, > + unsigned long *out_val, > + struct kvm_cpu_trap *utrap, > + bool *exit) > +{ > + int ret = -EOPNOTSUPP; > + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; > + unsigned long funcid = cp->a6; > + uint64_t temp; I think we need something like if (!vcpu_to_pmu(vcpu)->enabled) return -EOPNOTSUPP; here. Where 'enabled' is only true when we successfully initialize the pmu. And, successful initialization depends on IS_ENABLED(CONFIG_RISCV_PMU_SBI) and riscv_isa_extension_available(NULL, SSCOFPMF) as well as not failing other things. And, KVM userspace should be able to disable the pmu, which keep enabled from being set as well. > + > + switch (funcid) { > + case SBI_EXT_PMU_NUM_COUNTERS: > + ret = kvm_riscv_vcpu_pmu_num_ctrs(vcpu, out_val); > + break; > + case SBI_EXT_PMU_COUNTER_GET_INFO: > + ret = kvm_riscv_vcpu_pmu_ctr_info(vcpu, cp->a0, out_val); > + break; > + case SBI_EXT_PMU_COUNTER_CFG_MATCH: > +#if defined(CONFIG_32BIT) > + temp = ((uint64_t)cp->a5 << 32) | cp->a4; > +#else > + temp = cp->a4; > +#endif > + ret = kvm_riscv_vcpu_pmu_ctr_cfg_match(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, temp); > + if (ret >= 0) { > + *out_val = ret; > + ret = 0; > + } > + break; > + case SBI_EXT_PMU_COUNTER_START: > +#if defined(CONFIG_32BIT) > + temp = ((uint64_t)cp->a4 << 32) | cp->a3; > +#else > + temp = cp->a3; > +#endif > + ret = kvm_riscv_vcpu_pmu_ctr_start(vcpu, cp->a0, cp->a1, cp->a2, temp); > + break; > + case SBI_EXT_PMU_COUNTER_STOP: > + ret = kvm_riscv_vcpu_pmu_ctr_stop(vcpu, cp->a0, cp->a1, cp->a2); > + break; > + case SBI_EXT_PMU_COUNTER_FW_READ: > + ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, out_val); > + break; > + default: > + ret = -EOPNOTSUPP; > + } > + > + return ret; > +} > + > +unsigned long kvm_sbi_ext_pmu_probe(unsigned long extid) > +{ > + /* > + * PMU Extension is only available to guests if privilege mode filtering > + * is available. Otherwise, guest will always count events while the > + * execution is in hypervisor mode. > + */ > + return riscv_isa_extension_available(NULL, SSCOFPMF); > +} > + > +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu = { > + .extid_start = SBI_EXT_PMU, > + .extid_end = SBI_EXT_PMU, > + .handler = kvm_sbi_ext_pmu_handler, > + .probe = kvm_sbi_ext_pmu_probe, > +}; > -- > 2.25.1 > Thanks, drew
On Tue, Nov 1, 2022 at 7:26 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > On Mon, Jul 18, 2022 at 10:02:02AM -0700, Atish Patra wrote: > > SBI PMU extension allows KVM guests to configure/start/stop/query about > > the PMU counters in virtualized enviornment as well. > > > > In order to allow that, KVM implements the entire SBI PMU extension. > > > > Signed-off-by: Atish Patra <atishp@rivosinc.com> > > --- > > arch/riscv/kvm/vcpu_sbi.c | 11 +++++ > > arch/riscv/kvm/vcpu_sbi_pmu.c | 81 +++++++++++++++++++++++++++++++++++ > > 2 files changed, 92 insertions(+) > > create mode 100644 arch/riscv/kvm/vcpu_sbi_pmu.c > > > > diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c > > index d45e7da3f0d3..da9f7959340e 100644 > > --- a/arch/riscv/kvm/vcpu_sbi.c > > +++ b/arch/riscv/kvm/vcpu_sbi.c > > @@ -50,6 +50,16 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; > > extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; > > extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; > > > > +#ifdef CONFIG_RISCV_PMU_SBI > > +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu; > > +#else > > +static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu = { > > + .extid_start = -1UL, > > + .extid_end = -1UL, > > + .handler = NULL, > > +}; > > +#endif > > + > > static const struct kvm_vcpu_sbi_extension *sbi_ext[] = { > > &vcpu_sbi_ext_v01, > > &vcpu_sbi_ext_base, > > @@ -58,6 +68,7 @@ static const struct kvm_vcpu_sbi_extension *sbi_ext[] = { > > &vcpu_sbi_ext_rfence, > > &vcpu_sbi_ext_srst, > > &vcpu_sbi_ext_hsm, > > + &vcpu_sbi_ext_pmu, > > &vcpu_sbi_ext_experimental, > > &vcpu_sbi_ext_vendor, > > }; > > diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c > > new file mode 100644 > > index 000000000000..90c51a95d4f4 > > --- /dev/null > > +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c > > @@ -0,0 +1,81 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* > > + * Copyright (c) 2022 Rivos Inc > > + * > > + * Authors: > > + * Atish Patra <atishp@rivosinc.com> > > + */ > > + > > +#include <linux/errno.h> > > +#include <linux/err.h> > > +#include <linux/kvm_host.h> > > +#include <asm/csr.h> > > +#include <asm/sbi.h> > > +#include <asm/kvm_vcpu_sbi.h> > > + > > +static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, > > + unsigned long *out_val, > > + struct kvm_cpu_trap *utrap, > > + bool *exit) > > +{ > > + int ret = -EOPNOTSUPP; > > + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; > > + unsigned long funcid = cp->a6; > > + uint64_t temp; > > I think we need something like > > if (!vcpu_to_pmu(vcpu)->enabled) > return -EOPNOTSUPP; > > here. Where 'enabled' is only true when we successfully initialize > the pmu. And, successful initialization depends on Yes. I have added the flag. But should we do the check here or respective function as a paranoia sanity check ? > IS_ENABLED(CONFIG_RISCV_PMU_SBI) and Why do we need to guard under CONFIG_RISCV_PMU_SBI ? vcpu_sbi_pmu.c is only compiled under CONFIG_RISCV_PMU_SBI If CONFIG_RISCV_PMU_SBI is not enabled, probe function will return failure. In fact, I think we should also add the pmu enabled check in the probe function itself. Probe function(kvm_sbi_ext_pmu_probe) should only true when both vcpu_to_pmu(vcpu)->enabled and riscv_isa_extension_available(NULL, SSCOFPMF) are true. Thoughts? > riscv_isa_extension_available(NULL, SSCOFPMF) as well as not > failing other things. And, KVM userspace should be able to > disable the pmu, which keep enabled from being set as well. > We already have provisions for disabling sscofpmf from guests via ISA one reg interface. Do you mean disable the entire PMU from userspace ? Currently, ARM64 enables pmu from user space using device control APIs on vcpu fd. Are you suggesting we should do something like that ? If PMU needs to have device control APIs (either via vcpu fd or its own), we can retrieve the hpmcounter width and count from there as well. > > + > > + switch (funcid) { > > + case SBI_EXT_PMU_NUM_COUNTERS: > > + ret = kvm_riscv_vcpu_pmu_num_ctrs(vcpu, out_val); > > + break; > > + case SBI_EXT_PMU_COUNTER_GET_INFO: > > + ret = kvm_riscv_vcpu_pmu_ctr_info(vcpu, cp->a0, out_val); > > + break; > > + case SBI_EXT_PMU_COUNTER_CFG_MATCH: > > +#if defined(CONFIG_32BIT) > > + temp = ((uint64_t)cp->a5 << 32) | cp->a4; > > +#else > > + temp = cp->a4; > > +#endif > > + ret = kvm_riscv_vcpu_pmu_ctr_cfg_match(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, temp); > > + if (ret >= 0) { > > + *out_val = ret; > > + ret = 0; > > + } > > + break; > > + case SBI_EXT_PMU_COUNTER_START: > > +#if defined(CONFIG_32BIT) > > + temp = ((uint64_t)cp->a4 << 32) | cp->a3; > > +#else > > + temp = cp->a3; > > +#endif > > + ret = kvm_riscv_vcpu_pmu_ctr_start(vcpu, cp->a0, cp->a1, cp->a2, temp); > > + break; > > + case SBI_EXT_PMU_COUNTER_STOP: > > + ret = kvm_riscv_vcpu_pmu_ctr_stop(vcpu, cp->a0, cp->a1, cp->a2); > > + break; > > + case SBI_EXT_PMU_COUNTER_FW_READ: > > + ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, out_val); > > + break; > > + default: > > + ret = -EOPNOTSUPP; > > + } > > + > > + return ret; > > +} > > + > > +unsigned long kvm_sbi_ext_pmu_probe(unsigned long extid) > > +{ > > + /* > > + * PMU Extension is only available to guests if privilege mode filtering > > + * is available. Otherwise, guest will always count events while the > > + * execution is in hypervisor mode. > > + */ > > + return riscv_isa_extension_available(NULL, SSCOFPMF); > > +} > > + > > +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu = { > > + .extid_start = SBI_EXT_PMU, > > + .extid_end = SBI_EXT_PMU, > > + .handler = kvm_sbi_ext_pmu_handler, > > + .probe = kvm_sbi_ext_pmu_probe, > > +}; > > -- > > 2.25.1 > > > > Thanks, > drew
On Tue, Nov 22, 2022 at 03:08:34PM -0800, Atish Patra wrote: > On Tue, Nov 1, 2022 at 7:26 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > > > On Mon, Jul 18, 2022 at 10:02:02AM -0700, Atish Patra wrote: ... > > > +static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, > > > + unsigned long *out_val, > > > + struct kvm_cpu_trap *utrap, > > > + bool *exit) > > > +{ > > > + int ret = -EOPNOTSUPP; > > > + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; > > > + unsigned long funcid = cp->a6; > > > + uint64_t temp; > > > > I think we need something like > > > > if (!vcpu_to_pmu(vcpu)->enabled) > > return -EOPNOTSUPP; > > > > here. Where 'enabled' is only true when we successfully initialize > > the pmu. And, successful initialization depends on > > Yes. I have added the flag. But should we do the check here or > respective function > as a paranoia sanity check ? I think something like above that returns a not-supported error should be in all the entry points, like the top level SBI call handler. Functions that should never be called unless the PMU is active could have WARNs added for sanity checks. > > > IS_ENABLED(CONFIG_RISCV_PMU_SBI) and > > Why do we need to guard under CONFIG_RISCV_PMU_SBI ? > vcpu_sbi_pmu.c is only compiled under CONFIG_RISCV_PMU_SBI > > If CONFIG_RISCV_PMU_SBI is not enabled, probe function will return failure. You're right. We don't need explicit config checks for things that can't exist without the config. > > In fact, I think we should also add the pmu enabled check in the probe function > itself. Probe function(kvm_sbi_ext_pmu_probe) should only true when > both vcpu_to_pmu(vcpu)->enabled and > riscv_isa_extension_available(NULL, SSCOFPMF) are true. > > Thoughts? Agreed. > > > riscv_isa_extension_available(NULL, SSCOFPMF) as well as not > > failing other things. And, KVM userspace should be able to > > disable the pmu, which keep enabled from being set as well. > > > We already have provisions for disabling sscofpmf from guests via ISA > one reg interface. > Do you mean disable the entire PMU from userspace ? Yes. We may need to configure a VM without a PMU to increase its migratability, workaround errata, or just for testing/debugging purposes. > > Currently, ARM64 enables pmu from user space using device control APIs > on vcpu fd. > Are you suggesting we should do something like that ? Yes. Although choosing which KVM API should be used could probably be thought-out again. x86 uses VM ioctls. > > If PMU needs to have device control APIs (either via vcpu fd or its > own), we can retrieve > the hpmcounter width and count from there as well. Right. We need to decide how the VM/VCPU + PMU user interface should look. A separate PMU device, like arm64 has, sounds good, but the ioctl sequences for initialization may get more tricky. Thanks, drew
On Wed, Nov 23, 2022 at 5:58 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > On Tue, Nov 22, 2022 at 03:08:34PM -0800, Atish Patra wrote: > > On Tue, Nov 1, 2022 at 7:26 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > > > > > On Mon, Jul 18, 2022 at 10:02:02AM -0700, Atish Patra wrote: > ... > > > > +static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, > > > > + unsigned long *out_val, > > > > + struct kvm_cpu_trap *utrap, > > > > + bool *exit) > > > > +{ > > > > + int ret = -EOPNOTSUPP; > > > > + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; > > > > + unsigned long funcid = cp->a6; > > > > + uint64_t temp; > > > > > > I think we need something like > > > > > > if (!vcpu_to_pmu(vcpu)->enabled) > > > return -EOPNOTSUPP; > > > > > > here. Where 'enabled' is only true when we successfully initialize > > > the pmu. And, successful initialization depends on > > > > Yes. I have added the flag. But should we do the check here or > > respective function > > as a paranoia sanity check ? > > I think something like above that returns a not-supported error should be > in all the entry points, like the top level SBI call handler. Functions > that should never be called unless the PMU is active could have WARNs > added for sanity checks. > Sure. Will add those checks. > > > > > IS_ENABLED(CONFIG_RISCV_PMU_SBI) and > > > > Why do we need to guard under CONFIG_RISCV_PMU_SBI ? > > vcpu_sbi_pmu.c is only compiled under CONFIG_RISCV_PMU_SBI > > > > If CONFIG_RISCV_PMU_SBI is not enabled, probe function will return failure. > > You're right. We don't need explicit config checks for things that can't > exist without the config. > > > > > In fact, I think we should also add the pmu enabled check in the probe function > > itself. Probe function(kvm_sbi_ext_pmu_probe) should only true when > > both vcpu_to_pmu(vcpu)->enabled and > > riscv_isa_extension_available(NULL, SSCOFPMF) are true. > > > > Thoughts? > > Agreed. > > > > > > riscv_isa_extension_available(NULL, SSCOFPMF) as well as not > > > failing other things. And, KVM userspace should be able to > > > disable the pmu, which keep enabled from being set as well. > > > > > We already have provisions for disabling sscofpmf from guests via ISA > > one reg interface. > > Do you mean disable the entire PMU from userspace ? > > Yes. We may need to configure a VM without a PMU to increase its > migratability, workaround errata, or just for testing/debugging purposes. > > > > > Currently, ARM64 enables pmu from user space using device control APIs > > on vcpu fd. > > Are you suggesting we should do something like that ? > > Yes. Although choosing which KVM API should be used could probably be > thought-out again. x86 uses VM ioctls. > How does it handle hetergenous systems in per VM ioctls ? > > > > If PMU needs to have device control APIs (either via vcpu fd or its > > own), we can retrieve > > the hpmcounter width and count from there as well. > > Right. We need to decide how the VM/VCPU + PMU user interface should look. > A separate PMU device, like arm64 has, sounds good, but the ioctl > sequences for initialization may get more tricky. > Do we really need a per VM interface ? I was thinking we can just continue to use one reg interface for PMU as well. We probably need two of them. 1. To enable/disable SBI extension -- The probe function will depend on this 2. PMU specific get/set -- Number of hpmcounters -- hpmcounter width -- enable PMU > Thanks, > drew
On Thu, Nov 24, 2022 at 02:18:26AM -0800, Atish Patra wrote: > On Wed, Nov 23, 2022 at 5:58 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > > > On Tue, Nov 22, 2022 at 03:08:34PM -0800, Atish Patra wrote: ... > > > Currently, ARM64 enables pmu from user space using device control APIs > > > on vcpu fd. > > > Are you suggesting we should do something like that ? > > > > Yes. Although choosing which KVM API should be used could probably be > > thought-out again. x86 uses VM ioctls. > > > > How does it handle hetergenous systems in per VM ioctls ? I don't think it does, but neither does arm64. Afaik, the only way to run KVM VMs on heterogeneous systems is to pin the VM to one set of the CPUs, i.e. make sure the system it runs on is homogeneous. I agree we shouldn't paint ourselves into a homogeneous-only corner for riscv, though, so if it's possible to use VCPU APIs, then I guess we should. Although, one thing to keep in mind is that if the same ioctl needs to be run on each VCPU, then, when we start building VMs with hundreds of VCPUs, we'll see slow VM starts. > > > > > > > If PMU needs to have device control APIs (either via vcpu fd or its > > > own), we can retrieve > > > the hpmcounter width and count from there as well. > > > > Right. We need to decide how the VM/VCPU + PMU user interface should look. > > A separate PMU device, like arm64 has, sounds good, but the ioctl > > sequences for initialization may get more tricky. > > > > Do we really need a per VM interface ? I was thinking we can just > continue to use > one reg interface for PMU as well. We probably need two of them. > > 1. To enable/disable SBI extension > -- The probe function will depend on this > 2. PMU specific get/set > -- Number of hpmcounters > -- hpmcounter width > -- enable PMU ONE_REG is good for registers and virtual registers, which means the number of hpmcounters and the hpmcounter width are probably good candidates, but I'm not sure we should use it for enable/init types of purposes. Thanks, drew
On Thu, Nov 24, 2022 at 4:21 PM Andrew Jones <ajones@ventanamicro.com> wrote: > > On Thu, Nov 24, 2022 at 02:18:26AM -0800, Atish Patra wrote: > > On Wed, Nov 23, 2022 at 5:58 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > > > > > On Tue, Nov 22, 2022 at 03:08:34PM -0800, Atish Patra wrote: > ... > > > > Currently, ARM64 enables pmu from user space using device control APIs > > > > on vcpu fd. > > > > Are you suggesting we should do something like that ? > > > > > > Yes. Although choosing which KVM API should be used could probably be > > > thought-out again. x86 uses VM ioctls. > > > > > > > How does it handle hetergenous systems in per VM ioctls ? > > I don't think it does, but neither does arm64. Afaik, the only way to run > KVM VMs on heterogeneous systems is to pin the VM to one set of the CPUs, > i.e. make sure the system it runs on is homogeneous. > > I agree we shouldn't paint ourselves into a homogeneous-only corner for > riscv, though, so if it's possible to use VCPU APIs, then I guess we > should. Although, one thing to keep in mind is that if the same ioctl > needs to be run on each VCPU, then, when we start building VMs with > hundreds of VCPUs, we'll see slow VM starts. > > > > > > > > > > > If PMU needs to have device control APIs (either via vcpu fd or its > > > > own), we can retrieve > > > > the hpmcounter width and count from there as well. > > > > > > Right. We need to decide how the VM/VCPU + PMU user interface should look. > > > A separate PMU device, like arm64 has, sounds good, but the ioctl > > > sequences for initialization may get more tricky. > > > > > > > Do we really need a per VM interface ? I was thinking we can just > > continue to use > > one reg interface for PMU as well. We probably need two of them. > > > > 1. To enable/disable SBI extension > > -- The probe function will depend on this > > 2. PMU specific get/set > > -- Number of hpmcounters > > -- hpmcounter width > > -- enable PMU > > ONE_REG is good for registers and virtual registers, which means the > number of hpmcounters and the hpmcounter width are probably good > candidates, but I'm not sure we should use it for enable/init types of > purposes. We are already using ONE_REG interface to enable/disable ISA extensions so we should follow the same pattern and have ONE_REG interface to enable/disable SBI extensions as well. Regards, Anup
On Thu, Nov 24, 2022 at 4:59 AM Anup Patel <apatel@ventanamicro.com> wrote: > > On Thu, Nov 24, 2022 at 4:21 PM Andrew Jones <ajones@ventanamicro.com> wrote: > > > > On Thu, Nov 24, 2022 at 02:18:26AM -0800, Atish Patra wrote: > > > On Wed, Nov 23, 2022 at 5:58 AM Andrew Jones <ajones@ventanamicro.com> wrote: > > > > > > > > On Tue, Nov 22, 2022 at 03:08:34PM -0800, Atish Patra wrote: > > ... > > > > > Currently, ARM64 enables pmu from user space using device control APIs > > > > > on vcpu fd. > > > > > Are you suggesting we should do something like that ? > > > > > > > > Yes. Although choosing which KVM API should be used could probably be > > > > thought-out again. x86 uses VM ioctls. > > > > > > > > > > How does it handle hetergenous systems in per VM ioctls ? > > > > I don't think it does, but neither does arm64. Afaik, the only way to run > > KVM VMs on heterogeneous systems is to pin the VM to one set of the CPUs, > > i.e. make sure the system it runs on is homogeneous. > > > > I agree we shouldn't paint ourselves into a homogeneous-only corner for > > riscv, though, so if it's possible to use VCPU APIs, then I guess we > > should. Although, one thing to keep in mind is that if the same ioctl > > needs to be run on each VCPU, then, when we start building VMs with > > hundreds of VCPUs, we'll see slow VM starts. > > > > > > > > > > > > > > > If PMU needs to have device control APIs (either via vcpu fd or its > > > > > own), we can retrieve > > > > > the hpmcounter width and count from there as well. > > > > > > > > Right. We need to decide how the VM/VCPU + PMU user interface should look. > > > > A separate PMU device, like arm64 has, sounds good, but the ioctl > > > > sequences for initialization may get more tricky. > > > > > > > > > > Do we really need a per VM interface ? I was thinking we can just > > > continue to use > > > one reg interface for PMU as well. We probably need two of them. > > > > > > 1. To enable/disable SBI extension > > > -- The probe function will depend on this > > > 2. PMU specific get/set > > > -- Number of hpmcounters > > > -- hpmcounter width > > > -- enable PMU > > > > ONE_REG is good for registers and virtual registers, which means the > > number of hpmcounters and the hpmcounter width are probably good > > candidates, but I'm not sure we should use it for enable/init types of > > purposes. > > We are already using ONE_REG interface to enable/disable > ISA extensions so we should follow the same pattern and have > ONE_REG interface to enable/disable SBI extensions as well. > Thinking about it more, it may end up in vcpus with heterogeneous capabilities (different SBI extension). Most likely, it will be a bug and incorrect configuration from VMM but it's a possibility. For example: There will be two different scenarios for PMU extension 1. Boot vcpu disabled PMU extension - probably okay as guest queries the PMU extension on boot cpu only. The PMU extension won't be available for any other vcpu. 2. Boot vcpu has PMU extension enabled but others have it disabled. The run time SBI PMU calls will fail with EOPNOTSUPP and the user gets confused. There will be similar cases for HSM extension as well. As the entire extension won't be available to the guest if the SBI extension is disabled for the boot cpu only. There is also a slow VM start issue Andrew pointed about earlier. This makes me think if a VM ioctl to disable/enable the SBI extension for all vcpus makes more sense in this case. > Regards, > Anup
diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index d45e7da3f0d3..da9f7959340e 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -50,6 +50,16 @@ extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_hsm; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_experimental; extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_vendor; +#ifdef CONFIG_RISCV_PMU_SBI +extern const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu; +#else +static const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu = { + .extid_start = -1UL, + .extid_end = -1UL, + .handler = NULL, +}; +#endif + static const struct kvm_vcpu_sbi_extension *sbi_ext[] = { &vcpu_sbi_ext_v01, &vcpu_sbi_ext_base, @@ -58,6 +68,7 @@ static const struct kvm_vcpu_sbi_extension *sbi_ext[] = { &vcpu_sbi_ext_rfence, &vcpu_sbi_ext_srst, &vcpu_sbi_ext_hsm, + &vcpu_sbi_ext_pmu, &vcpu_sbi_ext_experimental, &vcpu_sbi_ext_vendor, }; diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c new file mode 100644 index 000000000000..90c51a95d4f4 --- /dev/null +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022 Rivos Inc + * + * Authors: + * Atish Patra <atishp@rivosinc.com> + */ + +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <asm/csr.h> +#include <asm/sbi.h> +#include <asm/kvm_vcpu_sbi.h> + +static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, + unsigned long *out_val, + struct kvm_cpu_trap *utrap, + bool *exit) +{ + int ret = -EOPNOTSUPP; + struct kvm_cpu_context *cp = &vcpu->arch.guest_context; + unsigned long funcid = cp->a6; + uint64_t temp; + + switch (funcid) { + case SBI_EXT_PMU_NUM_COUNTERS: + ret = kvm_riscv_vcpu_pmu_num_ctrs(vcpu, out_val); + break; + case SBI_EXT_PMU_COUNTER_GET_INFO: + ret = kvm_riscv_vcpu_pmu_ctr_info(vcpu, cp->a0, out_val); + break; + case SBI_EXT_PMU_COUNTER_CFG_MATCH: +#if defined(CONFIG_32BIT) + temp = ((uint64_t)cp->a5 << 32) | cp->a4; +#else + temp = cp->a4; +#endif + ret = kvm_riscv_vcpu_pmu_ctr_cfg_match(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, temp); + if (ret >= 0) { + *out_val = ret; + ret = 0; + } + break; + case SBI_EXT_PMU_COUNTER_START: +#if defined(CONFIG_32BIT) + temp = ((uint64_t)cp->a4 << 32) | cp->a3; +#else + temp = cp->a3; +#endif + ret = kvm_riscv_vcpu_pmu_ctr_start(vcpu, cp->a0, cp->a1, cp->a2, temp); + break; + case SBI_EXT_PMU_COUNTER_STOP: + ret = kvm_riscv_vcpu_pmu_ctr_stop(vcpu, cp->a0, cp->a1, cp->a2); + break; + case SBI_EXT_PMU_COUNTER_FW_READ: + ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, out_val); + break; + default: + ret = -EOPNOTSUPP; + } + + return ret; +} + +unsigned long kvm_sbi_ext_pmu_probe(unsigned long extid) +{ + /* + * PMU Extension is only available to guests if privilege mode filtering + * is available. Otherwise, guest will always count events while the + * execution is in hypervisor mode. + */ + return riscv_isa_extension_available(NULL, SSCOFPMF); +} + +const struct kvm_vcpu_sbi_extension vcpu_sbi_ext_pmu = { + .extid_start = SBI_EXT_PMU, + .extid_end = SBI_EXT_PMU, + .handler = kvm_sbi_ext_pmu_handler, + .probe = kvm_sbi_ext_pmu_probe, +};
SBI PMU extension allows KVM guests to configure/start/stop/query about the PMU counters in virtualized enviornment as well. In order to allow that, KVM implements the entire SBI PMU extension. Signed-off-by: Atish Patra <atishp@rivosinc.com> --- arch/riscv/kvm/vcpu_sbi.c | 11 +++++ arch/riscv/kvm/vcpu_sbi_pmu.c | 81 +++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 arch/riscv/kvm/vcpu_sbi_pmu.c