@@ -168,7 +168,8 @@
#define VSIP_TO_HVIP_SHIFT (IRQ_VS_SOFT - IRQ_S_SOFT)
#define VSIP_VALID_MASK ((_AC(1, UL) << IRQ_S_SOFT) | \
(_AC(1, UL) << IRQ_S_TIMER) | \
- (_AC(1, UL) << IRQ_S_EXT))
+ (_AC(1, UL) << IRQ_S_EXT) | \
+ (_AC(1, UL) << IRQ_PMU_OVF))
/* AIA CSR bits */
#define TOPI_IID_SHIFT 16
@@ -36,6 +36,7 @@ struct kvm_pmc {
bool started;
/* Monitoring event ID */
unsigned long event_idx;
+ struct kvm_vcpu *vcpu;
};
/* PMU data structure per vcpu */
@@ -50,6 +51,8 @@ struct kvm_pmu {
bool init_done;
/* Bit map of all the virtual counter used */
DECLARE_BITMAP(pmc_in_use, RISCV_KVM_MAX_COUNTERS);
+ /* Bit map of all the virtual counter overflown */
+ DECLARE_BITMAP(pmc_overflown, RISCV_KVM_MAX_COUNTERS);
/* The address of the counter snapshot area (guest physical address) */
gpa_t snapshot_addr;
/* The actual data of the snapshot */
@@ -139,6 +139,7 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_ZIHPM,
KVM_RISCV_ISA_EXT_SMSTATEEN,
KVM_RISCV_ISA_EXT_ZICOND,
+ KVM_RISCV_ISA_EXT_SSCOFPMF,
KVM_RISCV_ISA_EXT_MAX,
};
@@ -545,6 +545,9 @@ void kvm_riscv_aia_enable(void)
enable_percpu_irq(hgei_parent_irq,
irq_get_trigger_type(hgei_parent_irq));
csr_set(CSR_HIE, BIT(IRQ_S_GEXT));
+ /* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */
+ if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF))
+ csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF));
}
void kvm_riscv_aia_disable(void)
@@ -558,6 +561,8 @@ void kvm_riscv_aia_disable(void)
return;
hgctrl = get_cpu_ptr(&aia_hgei);
+ if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF))
+ csr_clear(CSR_HVIEN, BIT(IRQ_PMU_OVF));
/* Disable per-CPU SGEI interrupt */
csr_clear(CSR_HIE, BIT(IRQ_S_GEXT));
disable_percpu_irq(hgei_parent_irq);
@@ -382,7 +382,8 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
if (irq < IRQ_LOCAL_MAX &&
irq != IRQ_VS_SOFT &&
irq != IRQ_VS_TIMER &&
- irq != IRQ_VS_EXT)
+ irq != IRQ_VS_EXT &&
+ irq != IRQ_PMU_OVF)
return -EINVAL;
set_bit(irq, vcpu->arch.irqs_pending);
@@ -397,14 +398,15 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq)
{
/*
- * We only allow VS-mode software, timer, and external
+ * We only allow VS-mode software, timer, counter overflow and external
* interrupts when irq is one of the local interrupts
* defined by RISC-V privilege specification.
*/
if (irq < IRQ_LOCAL_MAX &&
irq != IRQ_VS_SOFT &&
irq != IRQ_VS_TIMER &&
- irq != IRQ_VS_EXT)
+ irq != IRQ_VS_EXT &&
+ irq != IRQ_PMU_OVF)
return -EINVAL;
clear_bit(irq, vcpu->arch.irqs_pending);
@@ -36,6 +36,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
/* Multi letter extensions (alphabetically sorted) */
KVM_ISA_EXT_ARR(SMSTATEEN),
KVM_ISA_EXT_ARR(SSAIA),
+ KVM_ISA_EXT_ARR(SSCOFPMF),
KVM_ISA_EXT_ARR(SSTC),
KVM_ISA_EXT_ARR(SVINVAL),
KVM_ISA_EXT_ARR(SVNAPOT),
@@ -88,6 +89,7 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
case KVM_RISCV_ISA_EXT_I:
case KVM_RISCV_ISA_EXT_M:
case KVM_RISCV_ISA_EXT_SSTC:
+ case KVM_RISCV_ISA_EXT_SSCOFPMF:
case KVM_RISCV_ISA_EXT_SVINVAL:
case KVM_RISCV_ISA_EXT_SVNAPOT:
case KVM_RISCV_ISA_EXT_ZBA:
@@ -117,8 +119,13 @@ void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu)
for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) {
host_isa = kvm_isa_ext_arr[i];
if (__riscv_isa_extension_available(NULL, host_isa) &&
- kvm_riscv_vcpu_isa_enable_allowed(i))
+ kvm_riscv_vcpu_isa_enable_allowed(i)) {
+ /* Sscofpmf depends on interrupt filtering defined in ssaia */
+ if (host_isa == RISCV_ISA_EXT_SSCOFPMF &&
+ !__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA))
+ continue;
set_bit(host_isa, vcpu->arch.isa);
+ }
}
}
@@ -229,6 +229,47 @@ static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ct
return 0;
}
+static void kvm_riscv_pmu_overflow(struct perf_event *perf_event,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ struct kvm_pmc *pmc = perf_event->overflow_handler_context;
+ struct kvm_vcpu *vcpu = pmc->vcpu;
+ struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu);
+ struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu);
+ u64 period;
+
+ /*
+ * Stop the event counting by directly accessing the perf_event.
+ * Otherwise, this needs to deferred via a workqueue.
+ * That will introduce skew in the counter value because the actual
+ * physical counter would start after returning from this function.
+ * It will be stopped again once the workqueue is scheduled
+ */
+ rpmu->pmu.stop(perf_event, PERF_EF_UPDATE);
+
+ /*
+ * The hw counter would start automatically when this function returns.
+ * Thus, the host may continue to interrupt and inject it to the guest
+ * even without the guest configuring the next event. Depending on the hardware
+ * the host may have some sluggishness only if privilege mode filtering is not
+ * available. In an ideal world, where qemu is not the only capable hardware,
+ * this can be removed.
+ * FYI: ARM64 does this way while x86 doesn't do anything as such.
+ * TODO: Should we keep it for RISC-V ?
+ */
+ period = -(local64_read(&perf_event->count));
+
+ local64_set(&perf_event->hw.period_left, 0);
+ perf_event->attr.sample_period = period;
+ perf_event->hw.sample_period = period;
+
+ set_bit(pmc->idx, kvpmu->pmc_overflown);
+ kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF);
+
+ rpmu->pmu.start(perf_event, PERF_EF_RELOAD);
+}
+
static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr,
unsigned long flags, unsigned long eidx,
unsigned long evtdata)
@@ -248,7 +289,7 @@ static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_att
*/
attr->sample_period = kvm_pmu_get_sample_period(pmc);
- event = perf_event_create_kernel_counter(attr, -1, current, NULL, pmc);
+ event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc);
if (IS_ERR(event)) {
pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event));
return PTR_ERR(event);
@@ -473,6 +514,12 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base,
}
}
+ /* The guest have serviced the interrupt and starting the counter again */
+ if (test_bit(IRQ_PMU_OVF, vcpu->arch.irqs_pending)) {
+ clear_bit(pmc_index, kvpmu->pmc_overflown);
+ kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_PMU_OVF);
+ }
+
out:
retdata->err_val = sbiret;
@@ -539,7 +586,13 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
else if (pmc->perf_event)
pmc->counter_val += perf_event_read_value(pmc->perf_event,
&enabled, &running);
- /* TODO: Add counter overflow support when sscofpmf support is added */
+ /*
+ * The counter and overflow indicies in the snapshot region are w.r.to
+ * cbase. Modify the set bit in the counter mask instead of the pmc_index
+ * which indicates the absolute counter index.
+ */
+ if (test_bit(pmc_index, kvpmu->pmc_overflown))
+ kvpmu->sdata->ctr_overflow_mask |= (1UL << i);
kvpmu->sdata->ctr_values[i] = pmc->counter_val;
kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
sizeof(struct riscv_pmu_snapshot_data));
@@ -548,15 +601,20 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base,
if (flags & SBI_PMU_STOP_FLAG_RESET) {
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
clear_bit(pmc_index, kvpmu->pmc_in_use);
+ clear_bit(pmc_index, kvpmu->pmc_overflown);
if (snap_flag_set) {
/* Clear the snapshot area for the upcoming deletion event */
kvpmu->sdata->ctr_values[i] = 0;
+ /*
+ * Only clear the given counter as the caller is responsible to
+ * validate both the overflow mask and configured counters.
+ */
+ kvpmu->sdata->ctr_overflow_mask &= ~(1UL << i);
kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata,
sizeof(struct riscv_pmu_snapshot_data));
}
}
}
-
out:
retdata->err_val = sbiret;
@@ -699,6 +757,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu)
pmc = &kvpmu->pmc[i];
pmc->idx = i;
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
+ pmc->vcpu = vcpu;
if (i < kvpmu->num_hw_ctrs) {
pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW;
if (i < 3)
@@ -731,13 +790,14 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu)
if (!kvpmu)
return;
- for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) {
+ for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) {
pmc = &kvpmu->pmc[i];
pmc->counter_val = 0;
kvm_pmu_release_perf_event(pmc);
pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID;
}
- bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS);
+ bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS);
+ bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS);
memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event));
kvm_pmu_clear_snapshot_area(vcpu);
}