[v1,09/10] RISC-V: KVM: Add perf sampling support for guests

Message ID	20231218104107.2976925-10-atishp@rivosinc.com (mailing list archive)
State	New, archived
Headers	show Received: from mail-ot1-f41.google.com (mail-ot1-f41.google.com [209.85.210.41]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E3C1D200D6 for <kvm@vger.kernel.org>; Mon, 18 Dec 2023 10:41:32 +0000 (UTC) From: Atish Patra <atishp@rivosinc.com> To: linux-kernel@vger.kernel.org Cc: Atish Patra <atishp@rivosinc.com>, Albert Ou <aou@eecs.berkeley.edu>, Alexandre Ghiti <alexghiti@rivosinc.com>, Andrew Jones <ajones@ventanamicro.com>, Anup Patel <anup@brainfault.org>, Atish Patra <atishp@atishpatra.org>, Conor Dooley <conor.dooley@microchip.com>, Guo Ren <guoren@kernel.org>, Icenowy Zheng <uwu@icenowy.me>, kvm-riscv@lists.infradead.org, kvm@vger.kernel.org, linux-riscv@lists.infradead.org, Mark Rutland <mark.rutland@arm.com>, Palmer Dabbelt <palmer@dabbelt.com>, Paul Walmsley <paul.walmsley@sifive.com>, Will Deacon <will@kernel.org> Subject: [v1 09/10] RISC-V: KVM: Add perf sampling support for guests Date: Mon, 18 Dec 2023 02:41:06 -0800 Message-Id: <20231218104107.2976925-10-atishp@rivosinc.com> In-Reply-To: <20231218104107.2976925-1-atishp@rivosinc.com> References: <20231218104107.2976925-1-atishp@rivosinc.com> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	RISC-V SBI v2.0 PMU improvements and Perf sampling in KVM guest \| expand [v1,00/10] RISC-V SBI v2.0 PMU improvements and Perf sampling in KVM guest [v1,01/10] RISC-V: Fix the typo in Scountovf CSR name [v1,02/10] RISC-V: Add FIRMWARE_READ_HI definition [v1,03/10] drivers/perf: riscv: Read upper bits of a firmware counter [v1,04/10] RISC-V: Add SBI PMU snapshot definitions [v1,05/10] drivers/perf: riscv: Implement SBI PMU snapshot function [v1,06/10] RISC-V: KVM: No need to update the counter value during reset [v1,07/10] RISC-V: KVM: No need to exit to the user space if perf event failed [v1,08/10] RISC-V: KVM: Implement SBI PMU Snapshot feature [v1,09/10] RISC-V: KVM: Add perf sampling support for guests [v1,10/10] RISC-V: KVM: Support 64 bit firmware counters on RV32

diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 88cdc8a3e654..bec09b33e2f0 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -168,7 +168,8 @@ #define VSIP_TO_HVIP_SHIFT (IRQ_VS_SOFT - IRQ_S_SOFT) #define VSIP_VALID_MASK ((_AC(1, UL) << IRQ_S_SOFT) | \ (_AC(1, UL) << IRQ_S_TIMER) | \ - (_AC(1, UL) << IRQ_S_EXT)) + (_AC(1, UL) << IRQ_S_EXT) | \ + (_AC(1, UL) << IRQ_PMU_OVF)) /* AIA CSR bits */ #define TOPI_IID_SHIFT 16 diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h index 5e6fc9ac2b90..931887e07a66 100644 --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h @@ -36,6 +36,7 @@ struct kvm_pmc { bool started; /* Monitoring event ID */ unsigned long event_idx; + struct kvm_vcpu *vcpu; }; /* PMU data structure per vcpu */ diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 60d3b21dead7..741c16f4518e 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -139,6 +139,7 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZIHPM, KVM_RISCV_ISA_EXT_SMSTATEEN, KVM_RISCV_ISA_EXT_ZICOND, + KVM_RISCV_ISA_EXT_SSCOFPMF, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 225a435d9c9a..5a3a4cee0e3d 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -43,6 +43,7 @@ int kvm_arch_hardware_enable(void) csr_write(CSR_HCOUNTEREN, 0x02); csr_write(CSR_HVIP, 0); + csr_write(CSR_HVIEN, 1UL << IRQ_PMU_OVF); kvm_riscv_aia_enable(); diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index e087c809073c..2d9f252356c3 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -380,7 +380,8 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) if (irq < IRQ_LOCAL_MAX && irq != IRQ_VS_SOFT && irq != IRQ_VS_TIMER && - irq != IRQ_VS_EXT) + irq != IRQ_VS_EXT && + irq != IRQ_PMU_OVF) return -EINVAL; set_bit(irq, vcpu->arch.irqs_pending); @@ -395,14 +396,15 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) { /* - * We only allow VS-mode software, timer, and external + * We only allow VS-mode software, timer, counter overflow and external * interrupts when irq is one of the local interrupts * defined by RISC-V privilege specification. */ if (irq < IRQ_LOCAL_MAX && irq != IRQ_VS_SOFT && irq != IRQ_VS_TIMER && - irq != IRQ_VS_EXT) + irq != IRQ_VS_EXT && + irq != IRQ_PMU_OVF) return -EINVAL; clear_bit(irq, vcpu->arch.irqs_pending); diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 855d12b6a4a5..e926681398f2 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -36,6 +36,7 @@ static const unsigned long kvm_isa_ext_arr[] = { /* Multi letter extensions (alphabetically sorted) */ KVM_ISA_EXT_ARR(SMSTATEEN), KVM_ISA_EXT_ARR(SSAIA), + KVM_ISA_EXT_ARR(SSCOFPMF), KVM_ISA_EXT_ARR(SSTC), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), @@ -88,6 +89,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_I: case KVM_RISCV_ISA_EXT_M: case KVM_RISCV_ISA_EXT_SSTC: + case KVM_RISCV_ISA_EXT_SSCOFPMF: case KVM_RISCV_ISA_EXT_SVINVAL: case KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_RISCV_ISA_EXT_ZBA: diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index a6e9c2132e24..368c3a8a8d4a 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -229,6 +229,47 @@ static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ct return 0; } +static void kvm_riscv_pmu_overflow(struct perf_event *perf_event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct kvm_pmc *pmc = perf_event->overflow_handler_context; + struct kvm_vcpu *vcpu = pmc->vcpu; + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu); + u64 period; + + /* + * Stop the event counting by directly accessing the perf_event. + * Otherwise, this needs to deferred via a workqueue. + * That will introduce skew in the counter value because the actual + * physical counter would start after returning from this function. + * It will be stopped again once the workqueue is scheduled + */ + rpmu->pmu.stop(perf_event, PERF_EF_UPDATE); + + /* + * The hw counter would start automatically when this function returns. + * Thus, the host may continue to interrupt and inject it to the guest + * even without the guest configuring the next event. Depending on the hardware + * the host may have some sluggishness only if privilege mode filtering is not + * available. In an ideal world, where qemu is not the only capable hardware, + * this can be removed. + * FYI: ARM64 does this way while x86 doesn't do anything as such. + * TODO: Should we keep it for RISC-V ? + */ + period = -(local64_read(&perf_event->count)); + + local64_set(&perf_event->hw.period_left, 0); + perf_event->attr.sample_period = period; + perf_event->hw.sample_period = period; + + set_bit(pmc->idx, kvpmu->pmc_overflown); + kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF); + + rpmu->pmu.start(perf_event, PERF_EF_RELOAD); +} + static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr, unsigned long flags, unsigned long eidx, unsigned long evtdata) @@ -248,7 +289,7 @@ static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_att */ attr->sample_period = kvm_pmu_get_sample_period(pmc); - event = perf_event_create_kernel_counter(attr, -1, current, NULL, pmc); + event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc); if (IS_ERR(event)) { pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); return PTR_ERR(event); @@ -473,6 +514,12 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base, } } + /* The guest have serviced the interrupt and starting the counter again */ + if (test_bit(IRQ_PMU_OVF, vcpu->arch.irqs_pending)) { + clear_bit(pmc_index, kvpmu->pmc_overflown); + kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_PMU_OVF); + } + out: retdata->err_val = sbiret; @@ -539,7 +586,13 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, else if (pmc->perf_event) pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running); - /* TODO: Add counter overflow support when sscofpmf support is added */ + /* + * The counter and overflow indicies in the snapshot region are w.r.to + * cbase. Modify the set bit in the counter mask instead of the pmc_index + * which indicates the absolute counter index. + */ + if (test_bit(pmc_index, kvpmu->pmc_overflown)) + kvpmu->sdata->ctr_overflow_mask |= (1UL << i); kvpmu->sdata->ctr_values[i] = pmc->counter_val; kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, sizeof(struct riscv_pmu_snapshot_data)); @@ -548,15 +601,20 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, if (flags & SBI_PMU_STOP_FLAG_RESET) { pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; clear_bit(pmc_index, kvpmu->pmc_in_use); + clear_bit(pmc_index, kvpmu->pmc_overflown); if (bSnapshot) { /* Clear the snapshot area for the upcoming deletion event */ kvpmu->sdata->ctr_values[i] = 0; + /* + * Only clear the given counter as the caller is responsible to + * validate both the overflow mask and configured counters. + */ + kvpmu->sdata->ctr_overflow_mask &= ~(1UL << i); kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, sizeof(struct riscv_pmu_snapshot_data)); } } } - out: retdata->err_val = sbiret; @@ -699,6 +757,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) pmc = &kvpmu->pmc[i]; pmc->idx = i; pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; + pmc->vcpu = vcpu; if (i < kvpmu->num_hw_ctrs) { pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW; if (i < 3) @@ -731,13 +790,14 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu) if (!kvpmu) return; - for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) { + for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) { pmc = &kvpmu->pmc[i]; pmc->counter_val = 0; kvm_pmu_release_perf_event(pmc); pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; } - bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS); + bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS); + bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS); memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); kvm_pmu_clear_snapshot_area(vcpu); }

[v1,09/10] RISC-V: KVM: Add perf sampling support for guests

Commit Message

Patch