Message ID | 20230612053932.58604-7-apatel@ventanamicro.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | RISC-V KVM in-kernel AIA irqchip | expand |
Context | Check | Description |
---|---|---|
conchuod/tree_selection | fail | Failed to apply to next/pending-fixes, riscv/for-next or riscv/master |
On Sun, Jun 11, 2023 at 10:40 PM Anup Patel <apatel@ventanamicro.com> wrote: > > We implement KVM device interface for in-kernel AIA irqchip so that > user-space can use KVM device ioctls to create, configure, and destroy > in-kernel AIA irqchip. > > Signed-off-by: Anup Patel <apatel@ventanamicro.com> > --- > arch/riscv/include/asm/kvm_aia.h | 132 +++++-- > arch/riscv/include/uapi/asm/kvm.h | 45 +++ > arch/riscv/kvm/Makefile | 1 + > arch/riscv/kvm/aia.c | 11 + > arch/riscv/kvm/aia_device.c | 622 ++++++++++++++++++++++++++++++ > include/uapi/linux/kvm.h | 2 + > 6 files changed, 771 insertions(+), 42 deletions(-) > create mode 100644 arch/riscv/kvm/aia_device.c > > diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h > index 3bc0a0e47a15..a1281ebc9b92 100644 > --- a/arch/riscv/include/asm/kvm_aia.h > +++ b/arch/riscv/include/asm/kvm_aia.h > @@ -20,6 +20,33 @@ struct kvm_aia { > > /* In-kernel irqchip initialized */ > bool initialized; > + > + /* Virtualization mode (Emulation, HW Accelerated, or Auto) */ > + u32 mode; > + > + /* Number of MSIs */ > + u32 nr_ids; > + > + /* Number of wired IRQs */ > + u32 nr_sources; > + > + /* Number of group bits in IMSIC address */ > + u32 nr_group_bits; > + > + /* Position of group bits in IMSIC address */ > + u32 nr_group_shift; > + > + /* Number of hart bits in IMSIC address */ > + u32 nr_hart_bits; > + > + /* Number of guest bits in IMSIC address */ > + u32 nr_guest_bits; > + > + /* Guest physical address of APLIC */ > + gpa_t aplic_addr; > + > + /* Internal state of APLIC */ > + void *aplic_state; > }; > > struct kvm_vcpu_aia_csr { > @@ -38,8 +65,19 @@ struct kvm_vcpu_aia { > > /* CPU AIA CSR context upon Guest VCPU reset */ > struct kvm_vcpu_aia_csr guest_reset_csr; > + > + /* Guest physical address of IMSIC for this VCPU */ > + gpa_t imsic_addr; > + > + /* HART index of IMSIC extacted from guest physical address */ > + u32 hart_index; > + > + /* Internal state of IMSIC for this VCPU */ > + void *imsic_state; > }; > > +#define KVM_RISCV_AIA_UNDEF_ADDR (-1) > + > #define kvm_riscv_aia_initialized(k) ((k)->arch.aia.initialized) > > #define irqchip_in_kernel(k) ((k)->arch.aia.in_kernel) > @@ -50,10 +88,17 @@ DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available); > #define kvm_riscv_aia_available() \ > static_branch_unlikely(&kvm_riscv_aia_available) > > +extern struct kvm_device_ops kvm_riscv_aia_device_ops; > + > static inline void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu) > { > } > > +static inline int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu) > +{ > + return 1; > +} > + > #define KVM_RISCV_AIA_IMSIC_TOPEI (ISELECT_MASK + 1) > static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, > unsigned long isel, > @@ -64,6 +109,41 @@ static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, > return 0; > } > > +static inline void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu) > +{ > +} > + > +static inline int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu, > + u32 guest_index, u32 offset, > + u32 iid) > +{ > + return 0; > +} > + > +static inline int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu) > +{ > + return 0; > +} > + > +static inline void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu) > +{ > +} > + > +static inline int kvm_riscv_aia_aplic_inject(struct kvm *kvm, > + u32 source, bool level) > +{ > + return 0; > +} > + > +static inline int kvm_riscv_aia_aplic_init(struct kvm *kvm) > +{ > + return 0; > +} > + > +static inline void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm) > +{ > +} > + > #ifdef CONFIG_32BIT > void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu); > void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu); > @@ -99,50 +179,18 @@ int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num, > { .base = CSR_SIREG, .count = 1, .func = kvm_riscv_vcpu_aia_rmw_ireg }, \ > { .base = CSR_STOPEI, .count = 1, .func = kvm_riscv_vcpu_aia_rmw_topei }, > > -static inline int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu) > -{ > - return 1; > -} > - > -static inline void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu) > -{ > -} > - > -static inline int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu) > -{ > - return 0; > -} > - > -static inline void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu) > -{ > -} > - > -static inline int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, > - u32 hart_index, > - u32 guest_index, u32 iid) > -{ > - return 0; > -} > - > -static inline int kvm_riscv_aia_inject_msi(struct kvm *kvm, > - struct kvm_msi *msi) > -{ > - return 0; > -} > +int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu); > +void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu); > +int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu); > +void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu); > > -static inline int kvm_riscv_aia_inject_irq(struct kvm *kvm, > - unsigned int irq, bool level) > -{ > - return 0; > -} > +int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, u32 hart_index, > + u32 guest_index, u32 iid); > +int kvm_riscv_aia_inject_msi(struct kvm *kvm, struct kvm_msi *msi); > +int kvm_riscv_aia_inject_irq(struct kvm *kvm, unsigned int irq, bool level); > > -static inline void kvm_riscv_aia_init_vm(struct kvm *kvm) > -{ > -} > - > -static inline void kvm_riscv_aia_destroy_vm(struct kvm *kvm) > -{ > -} > +void kvm_riscv_aia_init_vm(struct kvm *kvm); > +void kvm_riscv_aia_destroy_vm(struct kvm *kvm); > > int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner, > void __iomem **hgei_va, phys_addr_t *hgei_pa); > diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h > index 332d4a274891..047c8fc5bd71 100644 > --- a/arch/riscv/include/uapi/asm/kvm.h > +++ b/arch/riscv/include/uapi/asm/kvm.h > @@ -204,6 +204,51 @@ enum KVM_RISCV_SBI_EXT_ID { > #define KVM_REG_RISCV_SBI_MULTI_REG_LAST \ > KVM_REG_RISCV_SBI_MULTI_REG(KVM_RISCV_SBI_EXT_MAX - 1) > > +/* Device Control API: RISC-V AIA */ > +#define KVM_DEV_RISCV_APLIC_ALIGN 0x1000 > +#define KVM_DEV_RISCV_APLIC_SIZE 0x4000 > +#define KVM_DEV_RISCV_APLIC_MAX_HARTS 0x4000 > +#define KVM_DEV_RISCV_IMSIC_ALIGN 0x1000 > +#define KVM_DEV_RISCV_IMSIC_SIZE 0x1000 > + > +#define KVM_DEV_RISCV_AIA_GRP_CONFIG 0 > +#define KVM_DEV_RISCV_AIA_CONFIG_MODE 0 > +#define KVM_DEV_RISCV_AIA_CONFIG_IDS 1 > +#define KVM_DEV_RISCV_AIA_CONFIG_SRCS 2 > +#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS 3 > +#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT 4 > +#define KVM_DEV_RISCV_AIA_CONFIG_HART_BITS 5 > +#define KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS 6 > + > +/* > + * Modes of RISC-V AIA device: > + * 1) EMUL (aka Emulation): Trap-n-emulate IMSIC > + * 2) HWACCEL (aka HW Acceleration): Virtualize IMSIC using IMSIC guest files > + * 3) AUTO (aka Automatic): Virtualize IMSIC using IMSIC guest files whenever > + * available otherwise fallback to trap-n-emulation > + */ > +#define KVM_DEV_RISCV_AIA_MODE_EMUL 0 > +#define KVM_DEV_RISCV_AIA_MODE_HWACCEL 1 > +#define KVM_DEV_RISCV_AIA_MODE_AUTO 2 > + > +#define KVM_DEV_RISCV_AIA_IDS_MIN 63 > +#define KVM_DEV_RISCV_AIA_IDS_MAX 2048 > +#define KVM_DEV_RISCV_AIA_SRCS_MAX 1024 > +#define KVM_DEV_RISCV_AIA_GROUP_BITS_MAX 8 > +#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN 24 > +#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MAX 56 > +#define KVM_DEV_RISCV_AIA_HART_BITS_MAX 16 > +#define KVM_DEV_RISCV_AIA_GUEST_BITS_MAX 8 > + > +#define KVM_DEV_RISCV_AIA_GRP_ADDR 1 > +#define KVM_DEV_RISCV_AIA_ADDR_APLIC 0 > +#define KVM_DEV_RISCV_AIA_ADDR_IMSIC(__vcpu) (1 + (__vcpu)) > +#define KVM_DEV_RISCV_AIA_ADDR_MAX \ > + (1 + KVM_DEV_RISCV_APLIC_MAX_HARTS) > + > +#define KVM_DEV_RISCV_AIA_GRP_CTRL 2 > +#define KVM_DEV_RISCV_AIA_CTRL_INIT 0 > + > /* One single KVM irqchip, ie. the AIA */ > #define KVM_NR_IRQCHIPS 1 > > diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile > index 8031b8912a0d..dd69ebe098bd 100644 > --- a/arch/riscv/kvm/Makefile > +++ b/arch/riscv/kvm/Makefile > @@ -27,3 +27,4 @@ kvm-y += vcpu_sbi_hsm.o > kvm-y += vcpu_timer.o > kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o > kvm-y += aia.o > +kvm-y += aia_device.o > diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c > index 18c442c15ff2..585a3b42c52c 100644 > --- a/arch/riscv/kvm/aia.c > +++ b/arch/riscv/kvm/aia.c > @@ -631,6 +631,14 @@ int kvm_riscv_aia_init(void) > if (rc) > return rc; > > + /* Register device operations */ > + rc = kvm_register_device_ops(&kvm_riscv_aia_device_ops, > + KVM_DEV_TYPE_RISCV_AIA); > + if (rc) { > + aia_hgei_exit(); > + return rc; > + } > + > /* Enable KVM AIA support */ > static_branch_enable(&kvm_riscv_aia_available); > > @@ -642,6 +650,9 @@ void kvm_riscv_aia_exit(void) > if (!kvm_riscv_aia_available()) > return; > > + /* Unregister device operations */ > + kvm_unregister_device_ops(KVM_DEV_TYPE_RISCV_AIA); > + > /* Cleanup the HGEI state */ > aia_hgei_exit(); > } > diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c > new file mode 100644 > index 000000000000..a151fb357887 > --- /dev/null > +++ b/arch/riscv/kvm/aia_device.c > @@ -0,0 +1,622 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2021 Western Digital Corporation or its affiliates. > + * Copyright (C) 2022 Ventana Micro Systems Inc. > + * > + * Authors: > + * Anup Patel <apatel@ventanamicro.com> > + */ > + > +#include <linux/bits.h> > +#include <linux/kvm_host.h> > +#include <linux/uaccess.h> > +#include <asm/kvm_aia_imsic.h> > + > +static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) > +{ > + struct kvm_vcpu *tmp_vcpu; > + > + for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { > + tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); > + mutex_unlock(&tmp_vcpu->mutex); > + } > +} > + > +static void unlock_all_vcpus(struct kvm *kvm) > +{ > + unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); > +} > + > +static bool lock_all_vcpus(struct kvm *kvm) > +{ > + struct kvm_vcpu *tmp_vcpu; > + unsigned long c; > + > + kvm_for_each_vcpu(c, tmp_vcpu, kvm) { > + if (!mutex_trylock(&tmp_vcpu->mutex)) { > + unlock_vcpus(kvm, c - 1); > + return false; > + } > + } > + > + return true; > +} > + > +static int aia_create(struct kvm_device *dev, u32 type) > +{ > + int ret; > + unsigned long i; > + struct kvm *kvm = dev->kvm; > + struct kvm_vcpu *vcpu; > + > + if (irqchip_in_kernel(kvm)) > + return -EEXIST; > + > + ret = -EBUSY; > + if (!lock_all_vcpus(kvm)) > + return ret; > + > + kvm_for_each_vcpu(i, vcpu, kvm) { > + if (vcpu->arch.ran_atleast_once) > + goto out_unlock; > + } > + ret = 0; > + > + kvm->arch.aia.in_kernel = true; > + > +out_unlock: > + unlock_all_vcpus(kvm); > + return ret; > +} > + > +static void aia_destroy(struct kvm_device *dev) > +{ > + kfree(dev); > +} > + > +static int aia_config(struct kvm *kvm, unsigned long type, > + u32 *nr, bool write) > +{ > + struct kvm_aia *aia = &kvm->arch.aia; > + > + /* Writes can only be done before irqchip is initialized */ > + if (write && kvm_riscv_aia_initialized(kvm)) > + return -EBUSY; > + > + switch (type) { > + case KVM_DEV_RISCV_AIA_CONFIG_MODE: > + if (write) { > + switch (*nr) { > + case KVM_DEV_RISCV_AIA_MODE_EMUL: > + break; > + case KVM_DEV_RISCV_AIA_MODE_HWACCEL: > + case KVM_DEV_RISCV_AIA_MODE_AUTO: > + /* > + * HW Acceleration and Auto modes only > + * supported on host with non-zero guest > + * external interrupts (i.e. non-zero > + * VS-level IMSIC pages). > + */ > + if (!kvm_riscv_aia_nr_hgei) > + return -EINVAL; > + break; > + default: > + return -EINVAL; > + }; > + aia->mode = *nr; > + } else > + *nr = aia->mode; > + break; > + case KVM_DEV_RISCV_AIA_CONFIG_IDS: > + if (write) { > + if ((*nr < KVM_DEV_RISCV_AIA_IDS_MIN) || > + (*nr >= KVM_DEV_RISCV_AIA_IDS_MAX) || > + (*nr & KVM_DEV_RISCV_AIA_IDS_MIN) || > + (kvm_riscv_aia_max_ids <= *nr)) > + return -EINVAL; > + aia->nr_ids = *nr; > + } else > + *nr = aia->nr_ids; > + break; > + case KVM_DEV_RISCV_AIA_CONFIG_SRCS: > + if (write) { > + if ((*nr >= KVM_DEV_RISCV_AIA_SRCS_MAX) || > + (*nr >= kvm_riscv_aia_max_ids)) > + return -EINVAL; > + aia->nr_sources = *nr; > + } else > + *nr = aia->nr_sources; > + break; > + case KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS: > + if (write) { > + if (*nr >= KVM_DEV_RISCV_AIA_GROUP_BITS_MAX) > + return -EINVAL; > + aia->nr_group_bits = *nr; > + } else > + *nr = aia->nr_group_bits; > + break; > + case KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT: > + if (write) { > + if ((*nr < KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN) || > + (*nr >= KVM_DEV_RISCV_AIA_GROUP_SHIFT_MAX)) > + return -EINVAL; > + aia->nr_group_shift = *nr; > + } else > + *nr = aia->nr_group_shift; > + break; > + case KVM_DEV_RISCV_AIA_CONFIG_HART_BITS: > + if (write) { > + if (*nr >= KVM_DEV_RISCV_AIA_HART_BITS_MAX) > + return -EINVAL; > + aia->nr_hart_bits = *nr; > + } else > + *nr = aia->nr_hart_bits; > + break; > + case KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS: > + if (write) { > + if (*nr >= KVM_DEV_RISCV_AIA_GUEST_BITS_MAX) > + return -EINVAL; > + aia->nr_guest_bits = *nr; > + } else > + *nr = aia->nr_guest_bits; > + break; > + default: > + return -ENXIO; > + }; > + > + return 0; > +} > + > +static int aia_aplic_addr(struct kvm *kvm, u64 *addr, bool write) > +{ > + struct kvm_aia *aia = &kvm->arch.aia; > + > + if (write) { > + /* Writes can only be done before irqchip is initialized */ > + if (kvm_riscv_aia_initialized(kvm)) > + return -EBUSY; > + > + if (*addr & (KVM_DEV_RISCV_APLIC_ALIGN - 1)) > + return -EINVAL; > + > + aia->aplic_addr = *addr; > + } else > + *addr = aia->aplic_addr; > + > + return 0; > +} > + > +static int aia_imsic_addr(struct kvm *kvm, u64 *addr, > + unsigned long vcpu_idx, bool write) > +{ > + struct kvm_vcpu *vcpu; > + struct kvm_vcpu_aia *vcpu_aia; > + > + vcpu = kvm_get_vcpu(kvm, vcpu_idx); > + if (!vcpu) > + return -EINVAL; > + vcpu_aia = &vcpu->arch.aia_context; > + > + if (write) { > + /* Writes can only be done before irqchip is initialized */ > + if (kvm_riscv_aia_initialized(kvm)) > + return -EBUSY; > + > + if (*addr & (KVM_DEV_RISCV_IMSIC_ALIGN - 1)) > + return -EINVAL; > + } > + > + mutex_lock(&vcpu->mutex); > + if (write) > + vcpu_aia->imsic_addr = *addr; > + else > + *addr = vcpu_aia->imsic_addr; > + mutex_unlock(&vcpu->mutex); > + > + return 0; > +} > + > +static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr) > +{ > + u32 h, l; > + gpa_t mask = 0; > + > + h = aia->nr_hart_bits + aia->nr_guest_bits + > + IMSIC_MMIO_PAGE_SHIFT - 1; > + mask = GENMASK_ULL(h, 0); > + > + if (aia->nr_group_bits) { > + h = aia->nr_group_bits + aia->nr_group_shift - 1; > + l = aia->nr_group_shift; > + mask |= GENMASK_ULL(h, l); > + } > + > + return (addr & ~mask) >> IMSIC_MMIO_PAGE_SHIFT; > +} > + > +static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr) > +{ > + u32 hart, group = 0; > + > + hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) & > + GENMASK_ULL(aia->nr_hart_bits - 1, 0); > + if (aia->nr_group_bits) > + group = (addr >> aia->nr_group_shift) & > + GENMASK_ULL(aia->nr_group_bits - 1, 0); > + > + return (group << aia->nr_hart_bits) | hart; > +} > + > +static int aia_init(struct kvm *kvm) > +{ > + int ret, i; > + unsigned long idx; > + struct kvm_vcpu *vcpu; > + struct kvm_vcpu_aia *vaia; > + struct kvm_aia *aia = &kvm->arch.aia; > + gpa_t base_ppn = KVM_RISCV_AIA_UNDEF_ADDR; > + > + /* Irqchip can be initialized only once */ > + if (kvm_riscv_aia_initialized(kvm)) > + return -EBUSY; > + > + /* We might be in the middle of creating a VCPU? */ > + if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus)) > + return -EBUSY; > + > + /* Number of sources should be less than or equals number of IDs */ > + if (aia->nr_ids < aia->nr_sources) > + return -EINVAL; > + > + /* APLIC base is required for non-zero number of sources */ > + if (aia->nr_sources && aia->aplic_addr == KVM_RISCV_AIA_UNDEF_ADDR) > + return -EINVAL; > + > + /* Initialize APLIC */ > + ret = kvm_riscv_aia_aplic_init(kvm); > + if (ret) > + return ret; > + > + /* Iterate over each VCPU */ > + kvm_for_each_vcpu(idx, vcpu, kvm) { > + vaia = &vcpu->arch.aia_context; > + > + /* IMSIC base is required */ > + if (vaia->imsic_addr == KVM_RISCV_AIA_UNDEF_ADDR) { > + ret = -EINVAL; > + goto fail_cleanup_imsics; > + } > + > + /* All IMSICs should have matching base PPN */ > + if (base_ppn == KVM_RISCV_AIA_UNDEF_ADDR) > + base_ppn = aia_imsic_ppn(aia, vaia->imsic_addr); > + if (base_ppn != aia_imsic_ppn(aia, vaia->imsic_addr)) { > + ret = -EINVAL; > + goto fail_cleanup_imsics; > + } > + > + /* Update HART index of the IMSIC based on IMSIC base */ > + vaia->hart_index = aia_imsic_hart_index(aia, > + vaia->imsic_addr); > + > + /* Initialize IMSIC for this VCPU */ > + ret = kvm_riscv_vcpu_aia_imsic_init(vcpu); > + if (ret) > + goto fail_cleanup_imsics; > + } > + > + /* Set the initialized flag */ > + kvm->arch.aia.initialized = true; > + > + return 0; > + > +fail_cleanup_imsics: > + for (i = idx - 1; i >= 0; i--) { > + vcpu = kvm_get_vcpu(kvm, i); > + if (!vcpu) > + continue; > + kvm_riscv_vcpu_aia_imsic_cleanup(vcpu); > + } > + kvm_riscv_aia_aplic_cleanup(kvm); > + return ret; > +} > + > +static int aia_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) > +{ > + u32 nr; > + u64 addr; > + int nr_vcpus, r = -ENXIO; > + unsigned long type = (unsigned long)attr->attr; > + void __user *uaddr = (void __user *)(long)attr->addr; > + > + switch (attr->group) { > + case KVM_DEV_RISCV_AIA_GRP_CONFIG: > + if (copy_from_user(&nr, uaddr, sizeof(nr))) > + return -EFAULT; > + > + mutex_lock(&dev->kvm->lock); > + r = aia_config(dev->kvm, type, &nr, true); > + mutex_unlock(&dev->kvm->lock); > + > + break; > + > + case KVM_DEV_RISCV_AIA_GRP_ADDR: > + if (copy_from_user(&addr, uaddr, sizeof(addr))) > + return -EFAULT; > + > + nr_vcpus = atomic_read(&dev->kvm->online_vcpus); > + mutex_lock(&dev->kvm->lock); > + if (type == KVM_DEV_RISCV_AIA_ADDR_APLIC) > + r = aia_aplic_addr(dev->kvm, &addr, true); > + else if (type < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus)) > + r = aia_imsic_addr(dev->kvm, &addr, > + type - KVM_DEV_RISCV_AIA_ADDR_IMSIC(0), true); > + mutex_unlock(&dev->kvm->lock); > + > + break; > + > + case KVM_DEV_RISCV_AIA_GRP_CTRL: > + switch (type) { > + case KVM_DEV_RISCV_AIA_CTRL_INIT: > + mutex_lock(&dev->kvm->lock); > + r = aia_init(dev->kvm); > + mutex_unlock(&dev->kvm->lock); > + break; > + } > + > + break; > + } > + > + return r; > +} > + > +static int aia_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) > +{ > + u32 nr; > + u64 addr; > + int nr_vcpus, r = -ENXIO; > + void __user *uaddr = (void __user *)(long)attr->addr; > + unsigned long type = (unsigned long)attr->attr; > + > + switch (attr->group) { > + case KVM_DEV_RISCV_AIA_GRP_CONFIG: > + if (copy_from_user(&nr, uaddr, sizeof(nr))) > + return -EFAULT; > + > + mutex_lock(&dev->kvm->lock); > + r = aia_config(dev->kvm, type, &nr, false); > + mutex_unlock(&dev->kvm->lock); > + if (r) > + return r; > + > + if (copy_to_user(uaddr, &nr, sizeof(nr))) > + return -EFAULT; > + > + break; > + case KVM_DEV_RISCV_AIA_GRP_ADDR: > + if (copy_from_user(&addr, uaddr, sizeof(addr))) > + return -EFAULT; > + > + nr_vcpus = atomic_read(&dev->kvm->online_vcpus); > + mutex_lock(&dev->kvm->lock); > + if (type == KVM_DEV_RISCV_AIA_ADDR_APLIC) > + r = aia_aplic_addr(dev->kvm, &addr, false); > + else if (type < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus)) > + r = aia_imsic_addr(dev->kvm, &addr, > + type - KVM_DEV_RISCV_AIA_ADDR_IMSIC(0), false); > + mutex_unlock(&dev->kvm->lock); > + if (r) > + return r; > + > + if (copy_to_user(uaddr, &addr, sizeof(addr))) > + return -EFAULT; > + > + break; > + } > + > + return r; > +} > + > +static int aia_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) > +{ > + int nr_vcpus; > + > + switch (attr->group) { > + case KVM_DEV_RISCV_AIA_GRP_CONFIG: > + switch (attr->attr) { > + case KVM_DEV_RISCV_AIA_CONFIG_MODE: > + case KVM_DEV_RISCV_AIA_CONFIG_IDS: > + case KVM_DEV_RISCV_AIA_CONFIG_SRCS: > + case KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS: > + case KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT: > + case KVM_DEV_RISCV_AIA_CONFIG_HART_BITS: > + case KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS: > + return 0; > + } > + break; > + case KVM_DEV_RISCV_AIA_GRP_ADDR: > + nr_vcpus = atomic_read(&dev->kvm->online_vcpus); > + if (attr->attr == KVM_DEV_RISCV_AIA_ADDR_APLIC) > + return 0; > + else if (attr->attr < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus)) > + return 0; > + break; > + case KVM_DEV_RISCV_AIA_GRP_CTRL: > + switch (attr->attr) { > + case KVM_DEV_RISCV_AIA_CTRL_INIT: > + return 0; > + } > + break; > + } > + > + return -ENXIO; > +} > + > +struct kvm_device_ops kvm_riscv_aia_device_ops = { > + .name = "kvm-riscv-aia", > + .create = aia_create, > + .destroy = aia_destroy, > + .set_attr = aia_set_attr, > + .get_attr = aia_get_attr, > + .has_attr = aia_has_attr, > +}; > + > +int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu) > +{ > + /* Proceed only if AIA was initialized successfully */ > + if (!kvm_riscv_aia_initialized(vcpu->kvm)) > + return 1; > + > + /* Update the IMSIC HW state before entering guest mode */ > + return kvm_riscv_vcpu_aia_imsic_update(vcpu); > +} > + > +void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu) > +{ > + struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; > + struct kvm_vcpu_aia_csr *reset_csr = > + &vcpu->arch.aia_context.guest_reset_csr; > + > + if (!kvm_riscv_aia_available()) > + return; > + memcpy(csr, reset_csr, sizeof(*csr)); > + > + /* Proceed only if AIA was initialized successfully */ > + if (!kvm_riscv_aia_initialized(vcpu->kvm)) > + return; > + > + /* Reset the IMSIC context */ > + kvm_riscv_vcpu_aia_imsic_reset(vcpu); > +} > + > +int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu) > +{ > + struct kvm_vcpu_aia *vaia = &vcpu->arch.aia_context; > + > + if (!kvm_riscv_aia_available()) > + return 0; > + > + /* > + * We don't do any memory allocations over here because these > + * will be done after AIA device is initialized by the user-space. > + * > + * Refer, aia_init() implementation for more details. > + */ > + > + /* Initialize default values in AIA vcpu context */ > + vaia->imsic_addr = KVM_RISCV_AIA_UNDEF_ADDR; > + vaia->hart_index = vcpu->vcpu_idx; > + > + return 0; > +} > + > +void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu) > +{ > + /* Proceed only if AIA was initialized successfully */ > + if (!kvm_riscv_aia_initialized(vcpu->kvm)) > + return; > + > + /* Cleanup IMSIC context */ > + kvm_riscv_vcpu_aia_imsic_cleanup(vcpu); > +} > + > +int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, u32 hart_index, > + u32 guest_index, u32 iid) > +{ > + unsigned long idx; > + struct kvm_vcpu *vcpu; > + > + /* Proceed only if AIA was initialized successfully */ > + if (!kvm_riscv_aia_initialized(kvm)) > + return -EBUSY; > + > + /* Inject MSI to matching VCPU */ > + kvm_for_each_vcpu(idx, vcpu, kvm) { > + if (vcpu->arch.aia_context.hart_index == hart_index) > + return kvm_riscv_vcpu_aia_imsic_inject(vcpu, > + guest_index, > + 0, iid); > + } > + > + return 0; > +} > + > +int kvm_riscv_aia_inject_msi(struct kvm *kvm, struct kvm_msi *msi) > +{ > + gpa_t tppn, ippn; > + unsigned long idx; > + struct kvm_vcpu *vcpu; > + u32 g, toff, iid = msi->data; > + struct kvm_aia *aia = &kvm->arch.aia; > + gpa_t target = (((gpa_t)msi->address_hi) << 32) | msi->address_lo; > + > + /* Proceed only if AIA was initialized successfully */ > + if (!kvm_riscv_aia_initialized(kvm)) > + return -EBUSY; > + > + /* Convert target address to target PPN */ > + tppn = target >> IMSIC_MMIO_PAGE_SHIFT; > + > + /* Extract and clear Guest ID from target PPN */ > + g = tppn & (BIT(aia->nr_guest_bits) - 1); > + tppn &= ~((gpa_t)(BIT(aia->nr_guest_bits) - 1)); > + > + /* Inject MSI to matching VCPU */ > + kvm_for_each_vcpu(idx, vcpu, kvm) { > + ippn = vcpu->arch.aia_context.imsic_addr >> > + IMSIC_MMIO_PAGE_SHIFT; > + if (ippn == tppn) { > + toff = target & (IMSIC_MMIO_PAGE_SZ - 1); > + return kvm_riscv_vcpu_aia_imsic_inject(vcpu, g, > + toff, iid); > + } > + } > + > + return 0; > +} > + > +int kvm_riscv_aia_inject_irq(struct kvm *kvm, unsigned int irq, bool level) > +{ > + /* Proceed only if AIA was initialized successfully */ > + if (!kvm_riscv_aia_initialized(kvm)) > + return -EBUSY; > + > + /* Inject interrupt level change in APLIC */ > + return kvm_riscv_aia_aplic_inject(kvm, irq, level); > +} > + > +void kvm_riscv_aia_init_vm(struct kvm *kvm) > +{ > + struct kvm_aia *aia = &kvm->arch.aia; > + > + if (!kvm_riscv_aia_available()) > + return; > + > + /* > + * We don't do any memory allocations over here because these > + * will be done after AIA device is initialized by the user-space. > + * > + * Refer, aia_init() implementation for more details. > + */ > + > + /* Initialize default values in AIA global context */ > + aia->mode = (kvm_riscv_aia_nr_hgei) ? > + KVM_DEV_RISCV_AIA_MODE_AUTO : KVM_DEV_RISCV_AIA_MODE_EMUL; > + aia->nr_ids = kvm_riscv_aia_max_ids - 1; > + aia->nr_sources = 0; > + aia->nr_group_bits = 0; > + aia->nr_group_shift = KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN; > + aia->nr_hart_bits = 0; > + aia->nr_guest_bits = 0; > + aia->aplic_addr = KVM_RISCV_AIA_UNDEF_ADDR; > +} > + > +void kvm_riscv_aia_destroy_vm(struct kvm *kvm) > +{ > + /* Proceed only if AIA was initialized successfully */ > + if (!kvm_riscv_aia_initialized(kvm)) > + return; > + > + /* Cleanup APLIC context */ > + kvm_riscv_aia_aplic_cleanup(kvm); > +} > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 737318b1c1d9..27ccd07898e1 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -1442,6 +1442,8 @@ enum kvm_device_type { > #define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE > KVM_DEV_TYPE_ARM_PV_TIME, > #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME > + KVM_DEV_TYPE_RISCV_AIA, > +#define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA > KVM_DEV_TYPE_MAX, > }; > > -- > 2.34.1 > Reviewed-by: Atish Patra <atishp@rivosinc.com>
Hi Anup, On Mon, Jun 12, 2023 at 1:41 PM Anup Patel <apatel@ventanamicro.com> wrote: > > We implement KVM device interface for in-kernel AIA irqchip so that > user-space can use KVM device ioctls to create, configure, and destroy > in-kernel AIA irqchip. > > Signed-off-by: Anup Patel <apatel@ventanamicro.com> > --- > arch/riscv/include/asm/kvm_aia.h | 132 +++++-- > arch/riscv/include/uapi/asm/kvm.h | 45 +++ > arch/riscv/kvm/Makefile | 1 + > arch/riscv/kvm/aia.c | 11 + > arch/riscv/kvm/aia_device.c | 622 ++++++++++++++++++++++++++++++ > include/uapi/linux/kvm.h | 2 + > 6 files changed, 771 insertions(+), 42 deletions(-) > create mode 100644 arch/riscv/kvm/aia_device.c > > diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h > index 3bc0a0e47a15..a1281ebc9b92 100644 > --- a/arch/riscv/include/asm/kvm_aia.h > +++ b/arch/riscv/include/asm/kvm_aia.h > @@ -20,6 +20,33 @@ struct kvm_aia { > > /* In-kernel irqchip initialized */ > bool initialized; > + > + /* Virtualization mode (Emulation, HW Accelerated, or Auto) */ > + u32 mode; > + > + /* Number of MSIs */ > + u32 nr_ids; > + > + /* Number of wired IRQs */ > + u32 nr_sources; > + > + /* Number of group bits in IMSIC address */ > + u32 nr_group_bits; > + > + /* Position of group bits in IMSIC address */ > + u32 nr_group_shift; > + > + /* Number of hart bits in IMSIC address */ > + u32 nr_hart_bits; > + > + /* Number of guest bits in IMSIC address */ > + u32 nr_guest_bits; > + > + /* Guest physical address of APLIC */ > + gpa_t aplic_addr; > + > + /* Internal state of APLIC */ > + void *aplic_state; > }; > > struct kvm_vcpu_aia_csr { > @@ -38,8 +65,19 @@ struct kvm_vcpu_aia { > > /* CPU AIA CSR context upon Guest VCPU reset */ > struct kvm_vcpu_aia_csr guest_reset_csr; > + > + /* Guest physical address of IMSIC for this VCPU */ > + gpa_t imsic_addr; > + > + /* HART index of IMSIC extacted from guest physical address */ > + u32 hart_index; > + > + /* Internal state of IMSIC for this VCPU */ > + void *imsic_state; > }; > > +#define KVM_RISCV_AIA_UNDEF_ADDR (-1) > + > #define kvm_riscv_aia_initialized(k) ((k)->arch.aia.initialized) > > #define irqchip_in_kernel(k) ((k)->arch.aia.in_kernel) > @@ -50,10 +88,17 @@ DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available); > #define kvm_riscv_aia_available() \ > static_branch_unlikely(&kvm_riscv_aia_available) > > +extern struct kvm_device_ops kvm_riscv_aia_device_ops; > + > static inline void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu) > { > } > > +static inline int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu) > +{ > + return 1; > +} > + > #define KVM_RISCV_AIA_IMSIC_TOPEI (ISELECT_MASK + 1) > static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, > unsigned long isel, > @@ -64,6 +109,41 @@ static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, > return 0; > } > > +static inline void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu) > +{ > +} > + > +static inline int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu, > + u32 guest_index, u32 offset, > + u32 iid) > +{ > + return 0; > +} > + > +static inline int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu) > +{ > + return 0; > +} > + > +static inline void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu) > +{ > +} > + > +static inline int kvm_riscv_aia_aplic_inject(struct kvm *kvm, > + u32 source, bool level) > +{ > + return 0; > +} > + > +static inline int kvm_riscv_aia_aplic_init(struct kvm *kvm) > +{ > + return 0; > +} > + > +static inline void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm) > +{ > +} > + > #ifdef CONFIG_32BIT > void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu); > void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu); > @@ -99,50 +179,18 @@ int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num, > { .base = CSR_SIREG, .count = 1, .func = kvm_riscv_vcpu_aia_rmw_ireg }, \ > { .base = CSR_STOPEI, .count = 1, .func = kvm_riscv_vcpu_aia_rmw_topei }, > > -static inline int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu) > -{ > - return 1; > -} > - > -static inline void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu) > -{ > -} > - > -static inline int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu) > -{ > - return 0; > -} > - > -static inline void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu) > -{ > -} > - > -static inline int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, > - u32 hart_index, > - u32 guest_index, u32 iid) > -{ > - return 0; > -} > - > -static inline int kvm_riscv_aia_inject_msi(struct kvm *kvm, > - struct kvm_msi *msi) > -{ > - return 0; > -} > +int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu); > +void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu); > +int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu); > +void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu); > > -static inline int kvm_riscv_aia_inject_irq(struct kvm *kvm, > - unsigned int irq, bool level) > -{ > - return 0; > -} > +int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, u32 hart_index, > + u32 guest_index, u32 iid); > +int kvm_riscv_aia_inject_msi(struct kvm *kvm, struct kvm_msi *msi); > +int kvm_riscv_aia_inject_irq(struct kvm *kvm, unsigned int irq, bool level); > > -static inline void kvm_riscv_aia_init_vm(struct kvm *kvm) > -{ > -} > - > -static inline void kvm_riscv_aia_destroy_vm(struct kvm *kvm) > -{ > -} > +void kvm_riscv_aia_init_vm(struct kvm *kvm); > +void kvm_riscv_aia_destroy_vm(struct kvm *kvm); > > int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner, > void __iomem **hgei_va, phys_addr_t *hgei_pa); > diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h > index 332d4a274891..047c8fc5bd71 100644 > --- a/arch/riscv/include/uapi/asm/kvm.h > +++ b/arch/riscv/include/uapi/asm/kvm.h > @@ -204,6 +204,51 @@ enum KVM_RISCV_SBI_EXT_ID { > #define KVM_REG_RISCV_SBI_MULTI_REG_LAST \ > KVM_REG_RISCV_SBI_MULTI_REG(KVM_RISCV_SBI_EXT_MAX - 1) > > +/* Device Control API: RISC-V AIA */ > +#define KVM_DEV_RISCV_APLIC_ALIGN 0x1000 > +#define KVM_DEV_RISCV_APLIC_SIZE 0x4000 > +#define KVM_DEV_RISCV_APLIC_MAX_HARTS 0x4000 > +#define KVM_DEV_RISCV_IMSIC_ALIGN 0x1000 > +#define KVM_DEV_RISCV_IMSIC_SIZE 0x1000 > + > +#define KVM_DEV_RISCV_AIA_GRP_CONFIG 0 > +#define KVM_DEV_RISCV_AIA_CONFIG_MODE 0 > +#define KVM_DEV_RISCV_AIA_CONFIG_IDS 1 > +#define KVM_DEV_RISCV_AIA_CONFIG_SRCS 2 > +#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS 3 > +#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT 4 > +#define KVM_DEV_RISCV_AIA_CONFIG_HART_BITS 5 > +#define KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS 6 > + > +/* > + * Modes of RISC-V AIA device: > + * 1) EMUL (aka Emulation): Trap-n-emulate IMSIC > + * 2) HWACCEL (aka HW Acceleration): Virtualize IMSIC using IMSIC guest files > + * 3) AUTO (aka Automatic): Virtualize IMSIC using IMSIC guest files whenever > + * available otherwise fallback to trap-n-emulation > + */ > +#define KVM_DEV_RISCV_AIA_MODE_EMUL 0 > +#define KVM_DEV_RISCV_AIA_MODE_HWACCEL 1 > +#define KVM_DEV_RISCV_AIA_MODE_AUTO 2 > + > +#define KVM_DEV_RISCV_AIA_IDS_MIN 63 > +#define KVM_DEV_RISCV_AIA_IDS_MAX 2048 > +#define KVM_DEV_RISCV_AIA_SRCS_MAX 1024 > +#define KVM_DEV_RISCV_AIA_GROUP_BITS_MAX 8 > +#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN 24 > +#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MAX 56 > +#define KVM_DEV_RISCV_AIA_HART_BITS_MAX 16 > +#define KVM_DEV_RISCV_AIA_GUEST_BITS_MAX 8 > + > +#define KVM_DEV_RISCV_AIA_GRP_ADDR 1 > +#define KVM_DEV_RISCV_AIA_ADDR_APLIC 0 > +#define KVM_DEV_RISCV_AIA_ADDR_IMSIC(__vcpu) (1 + (__vcpu)) > +#define KVM_DEV_RISCV_AIA_ADDR_MAX \ > + (1 + KVM_DEV_RISCV_APLIC_MAX_HARTS) > + > +#define KVM_DEV_RISCV_AIA_GRP_CTRL 2 > +#define KVM_DEV_RISCV_AIA_CTRL_INIT 0 > + > /* One single KVM irqchip, ie. the AIA */ > #define KVM_NR_IRQCHIPS 1 > > diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile > index 8031b8912a0d..dd69ebe098bd 100644 > --- a/arch/riscv/kvm/Makefile > +++ b/arch/riscv/kvm/Makefile > @@ -27,3 +27,4 @@ kvm-y += vcpu_sbi_hsm.o > kvm-y += vcpu_timer.o > kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o > kvm-y += aia.o > +kvm-y += aia_device.o > diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c > index 18c442c15ff2..585a3b42c52c 100644 > --- a/arch/riscv/kvm/aia.c > +++ b/arch/riscv/kvm/aia.c > @@ -631,6 +631,14 @@ int kvm_riscv_aia_init(void) > if (rc) > return rc; > > + /* Register device operations */ > + rc = kvm_register_device_ops(&kvm_riscv_aia_device_ops, > + KVM_DEV_TYPE_RISCV_AIA); > + if (rc) { > + aia_hgei_exit(); > + return rc; > + } > + > /* Enable KVM AIA support */ > static_branch_enable(&kvm_riscv_aia_available); > > @@ -642,6 +650,9 @@ void kvm_riscv_aia_exit(void) > if (!kvm_riscv_aia_available()) > return; > > + /* Unregister device operations */ > + kvm_unregister_device_ops(KVM_DEV_TYPE_RISCV_AIA); > + > /* Cleanup the HGEI state */ > aia_hgei_exit(); > } > diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c > new file mode 100644 > index 000000000000..a151fb357887 > --- /dev/null > +++ b/arch/riscv/kvm/aia_device.c > @@ -0,0 +1,622 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Copyright (C) 2021 Western Digital Corporation or its affiliates. > + * Copyright (C) 2022 Ventana Micro Systems Inc. > + * > + * Authors: > + * Anup Patel <apatel@ventanamicro.com> > + */ > + > +#include <linux/bits.h> > +#include <linux/kvm_host.h> > +#include <linux/uaccess.h> > +#include <asm/kvm_aia_imsic.h> > + > +static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) > +{ > + struct kvm_vcpu *tmp_vcpu; > + > + for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { > + tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); > + mutex_unlock(&tmp_vcpu->mutex); > + } > +} > + > +static void unlock_all_vcpus(struct kvm *kvm) > +{ > + unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); > +} > + > +static bool lock_all_vcpus(struct kvm *kvm) > +{ > + struct kvm_vcpu *tmp_vcpu; > + unsigned long c; > + > + kvm_for_each_vcpu(c, tmp_vcpu, kvm) { > + if (!mutex_trylock(&tmp_vcpu->mutex)) { > + unlock_vcpus(kvm, c - 1); > + return false; > + } > + } > + > + return true; > +} > + > +static int aia_create(struct kvm_device *dev, u32 type) > +{ > + int ret; > + unsigned long i; > + struct kvm *kvm = dev->kvm; > + struct kvm_vcpu *vcpu; > + > + if (irqchip_in_kernel(kvm)) > + return -EEXIST; > + > + ret = -EBUSY; > + if (!lock_all_vcpus(kvm)) > + return ret; > + > + kvm_for_each_vcpu(i, vcpu, kvm) { > + if (vcpu->arch.ran_atleast_once) > + goto out_unlock; > + } > + ret = 0; > + > + kvm->arch.aia.in_kernel = true; > + > +out_unlock: > + unlock_all_vcpus(kvm); > + return ret; > +} > + > +static void aia_destroy(struct kvm_device *dev) > +{ > + kfree(dev); > +} > + > +static int aia_config(struct kvm *kvm, unsigned long type, > + u32 *nr, bool write) > +{ > + struct kvm_aia *aia = &kvm->arch.aia; > + > + /* Writes can only be done before irqchip is initialized */ > + if (write && kvm_riscv_aia_initialized(kvm)) > + return -EBUSY; > + > + switch (type) { > + case KVM_DEV_RISCV_AIA_CONFIG_MODE: > + if (write) { > + switch (*nr) { > + case KVM_DEV_RISCV_AIA_MODE_EMUL: > + break; > + case KVM_DEV_RISCV_AIA_MODE_HWACCEL: > + case KVM_DEV_RISCV_AIA_MODE_AUTO: > + /* > + * HW Acceleration and Auto modes only > + * supported on host with non-zero guest > + * external interrupts (i.e. non-zero > + * VS-level IMSIC pages). > + */ > + if (!kvm_riscv_aia_nr_hgei) > + return -EINVAL; > + break; > + default: > + return -EINVAL; > + }; > + aia->mode = *nr; > + } else > + *nr = aia->mode; > + break; > + case KVM_DEV_RISCV_AIA_CONFIG_IDS: > + if (write) { > + if ((*nr < KVM_DEV_RISCV_AIA_IDS_MIN) || > + (*nr >= KVM_DEV_RISCV_AIA_IDS_MAX) || > + (*nr & KVM_DEV_RISCV_AIA_IDS_MIN) || The msi number is one less than a multiple of 64. This line should be ((*nr & KVM_DEV_RISCV_AIA_IDS_MIN) != KVM_DEV_RISCV_AIA_IDS_MIN) > + (kvm_riscv_aia_max_ids <= *nr)) > + return -EINVAL; > + aia->nr_ids = *nr; > + } else > + *nr = aia->nr_ids; > + break; > + case KVM_DEV_RISCV_AIA_CONFIG_SRCS: > + if (write) { > + if ((*nr >= KVM_DEV_RISCV_AIA_SRCS_MAX) || > + (*nr >= kvm_riscv_aia_max_ids)) > + return -EINVAL; > + aia->nr_sources = *nr; > + } else > + *nr = aia->nr_sources; > + break; > + case KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS: > + if (write) { > + if (*nr >= KVM_DEV_RISCV_AIA_GROUP_BITS_MAX) > + return -EINVAL; > + aia->nr_group_bits = *nr; > + } else > + *nr = aia->nr_group_bits; > + break; > + case KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT: > + if (write) { > + if ((*nr < KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN) || > + (*nr >= KVM_DEV_RISCV_AIA_GROUP_SHIFT_MAX)) > + return -EINVAL; > + aia->nr_group_shift = *nr; > + } else > + *nr = aia->nr_group_shift; > + break; > + case KVM_DEV_RISCV_AIA_CONFIG_HART_BITS: > + if (write) { > + if (*nr >= KVM_DEV_RISCV_AIA_HART_BITS_MAX) > + return -EINVAL; > + aia->nr_hart_bits = *nr; > + } else > + *nr = aia->nr_hart_bits; > + break; > + case KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS: > + if (write) { > + if (*nr >= KVM_DEV_RISCV_AIA_GUEST_BITS_MAX) > + return -EINVAL; > + aia->nr_guest_bits = *nr; > + } else > + *nr = aia->nr_guest_bits; > + break; > + default: > + return -ENXIO; > + }; > + > + return 0; > +} > + > +static int aia_aplic_addr(struct kvm *kvm, u64 *addr, bool write) > +{ > + struct kvm_aia *aia = &kvm->arch.aia; > + > + if (write) { > + /* Writes can only be done before irqchip is initialized */ > + if (kvm_riscv_aia_initialized(kvm)) > + return -EBUSY; > + > + if (*addr & (KVM_DEV_RISCV_APLIC_ALIGN - 1)) > + return -EINVAL; > + > + aia->aplic_addr = *addr; > + } else > + *addr = aia->aplic_addr; > + > + return 0; > +} > + > +static int aia_imsic_addr(struct kvm *kvm, u64 *addr, > + unsigned long vcpu_idx, bool write) > +{ > + struct kvm_vcpu *vcpu; > + struct kvm_vcpu_aia *vcpu_aia; > + > + vcpu = kvm_get_vcpu(kvm, vcpu_idx); > + if (!vcpu) > + return -EINVAL; > + vcpu_aia = &vcpu->arch.aia_context; > + > + if (write) { > + /* Writes can only be done before irqchip is initialized */ > + if (kvm_riscv_aia_initialized(kvm)) > + return -EBUSY; > + > + if (*addr & (KVM_DEV_RISCV_IMSIC_ALIGN - 1)) > + return -EINVAL; > + } > + > + mutex_lock(&vcpu->mutex); > + if (write) > + vcpu_aia->imsic_addr = *addr; > + else > + *addr = vcpu_aia->imsic_addr; > + mutex_unlock(&vcpu->mutex); > + > + return 0; > +} > + > +static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr) > +{ > + u32 h, l; > + gpa_t mask = 0; > + > + h = aia->nr_hart_bits + aia->nr_guest_bits + > + IMSIC_MMIO_PAGE_SHIFT - 1; > + mask = GENMASK_ULL(h, 0); > + > + if (aia->nr_group_bits) { > + h = aia->nr_group_bits + aia->nr_group_shift - 1; > + l = aia->nr_group_shift; > + mask |= GENMASK_ULL(h, l); > + } > + > + return (addr & ~mask) >> IMSIC_MMIO_PAGE_SHIFT; > +} > + > +static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr) > +{ > + u32 hart, group = 0; > + > + hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) & > + GENMASK_ULL(aia->nr_hart_bits - 1, 0); > + if (aia->nr_group_bits) > + group = (addr >> aia->nr_group_shift) & > + GENMASK_ULL(aia->nr_group_bits - 1, 0); > + > + return (group << aia->nr_hart_bits) | hart; > +} > + > +static int aia_init(struct kvm *kvm) > +{ > + int ret, i; > + unsigned long idx; > + struct kvm_vcpu *vcpu; > + struct kvm_vcpu_aia *vaia; > + struct kvm_aia *aia = &kvm->arch.aia; > + gpa_t base_ppn = KVM_RISCV_AIA_UNDEF_ADDR; > + > + /* Irqchip can be initialized only once */ > + if (kvm_riscv_aia_initialized(kvm)) > + return -EBUSY; > + > + /* We might be in the middle of creating a VCPU? */ > + if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus)) > + return -EBUSY; > + > + /* Number of sources should be less than or equals number of IDs */ > + if (aia->nr_ids < aia->nr_sources) > + return -EINVAL; > + > + /* APLIC base is required for non-zero number of sources */ > + if (aia->nr_sources && aia->aplic_addr == KVM_RISCV_AIA_UNDEF_ADDR) > + return -EINVAL; > + > + /* Initialize APLIC */ > + ret = kvm_riscv_aia_aplic_init(kvm); > + if (ret) > + return ret; > + > + /* Iterate over each VCPU */ > + kvm_for_each_vcpu(idx, vcpu, kvm) { > + vaia = &vcpu->arch.aia_context; > + > + /* IMSIC base is required */ > + if (vaia->imsic_addr == KVM_RISCV_AIA_UNDEF_ADDR) { > + ret = -EINVAL; > + goto fail_cleanup_imsics; > + } > + > + /* All IMSICs should have matching base PPN */ > + if (base_ppn == KVM_RISCV_AIA_UNDEF_ADDR) > + base_ppn = aia_imsic_ppn(aia, vaia->imsic_addr); > + if (base_ppn != aia_imsic_ppn(aia, vaia->imsic_addr)) { > + ret = -EINVAL; > + goto fail_cleanup_imsics; > + } > + > + /* Update HART index of the IMSIC based on IMSIC base */ > + vaia->hart_index = aia_imsic_hart_index(aia, > + vaia->imsic_addr); > + > + /* Initialize IMSIC for this VCPU */ > + ret = kvm_riscv_vcpu_aia_imsic_init(vcpu); > + if (ret) > + goto fail_cleanup_imsics; > + } > + > + /* Set the initialized flag */ > + kvm->arch.aia.initialized = true; > + > + return 0; > + > +fail_cleanup_imsics: > + for (i = idx - 1; i >= 0; i--) { > + vcpu = kvm_get_vcpu(kvm, i); > + if (!vcpu) > + continue; > + kvm_riscv_vcpu_aia_imsic_cleanup(vcpu); > + } > + kvm_riscv_aia_aplic_cleanup(kvm); > + return ret; > +} > + > +static int aia_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) > +{ > + u32 nr; > + u64 addr; > + int nr_vcpus, r = -ENXIO; > + unsigned long type = (unsigned long)attr->attr; > + void __user *uaddr = (void __user *)(long)attr->addr; > + > + switch (attr->group) { > + case KVM_DEV_RISCV_AIA_GRP_CONFIG: > + if (copy_from_user(&nr, uaddr, sizeof(nr))) > + return -EFAULT; > + > + mutex_lock(&dev->kvm->lock); > + r = aia_config(dev->kvm, type, &nr, true); > + mutex_unlock(&dev->kvm->lock); > + > + break; > + > + case KVM_DEV_RISCV_AIA_GRP_ADDR: > + if (copy_from_user(&addr, uaddr, sizeof(addr))) > + return -EFAULT; > + > + nr_vcpus = atomic_read(&dev->kvm->online_vcpus); > + mutex_lock(&dev->kvm->lock); > + if (type == KVM_DEV_RISCV_AIA_ADDR_APLIC) > + r = aia_aplic_addr(dev->kvm, &addr, true); > + else if (type < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus)) > + r = aia_imsic_addr(dev->kvm, &addr, > + type - KVM_DEV_RISCV_AIA_ADDR_IMSIC(0), true); > + mutex_unlock(&dev->kvm->lock); > + > + break; > + > + case KVM_DEV_RISCV_AIA_GRP_CTRL: > + switch (type) { > + case KVM_DEV_RISCV_AIA_CTRL_INIT: > + mutex_lock(&dev->kvm->lock); > + r = aia_init(dev->kvm); > + mutex_unlock(&dev->kvm->lock); > + break; > + } > + > + break; > + } > + > + return r; > +} > + > +static int aia_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) > +{ > + u32 nr; > + u64 addr; > + int nr_vcpus, r = -ENXIO; > + void __user *uaddr = (void __user *)(long)attr->addr; > + unsigned long type = (unsigned long)attr->attr; > + > + switch (attr->group) { > + case KVM_DEV_RISCV_AIA_GRP_CONFIG: > + if (copy_from_user(&nr, uaddr, sizeof(nr))) > + return -EFAULT; > + > + mutex_lock(&dev->kvm->lock); > + r = aia_config(dev->kvm, type, &nr, false); > + mutex_unlock(&dev->kvm->lock); > + if (r) > + return r; > + > + if (copy_to_user(uaddr, &nr, sizeof(nr))) > + return -EFAULT; > + > + break; > + case KVM_DEV_RISCV_AIA_GRP_ADDR: > + if (copy_from_user(&addr, uaddr, sizeof(addr))) > + return -EFAULT; > + > + nr_vcpus = atomic_read(&dev->kvm->online_vcpus); > + mutex_lock(&dev->kvm->lock); > + if (type == KVM_DEV_RISCV_AIA_ADDR_APLIC) > + r = aia_aplic_addr(dev->kvm, &addr, false); > + else if (type < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus)) > + r = aia_imsic_addr(dev->kvm, &addr, > + type - KVM_DEV_RISCV_AIA_ADDR_IMSIC(0), false); > + mutex_unlock(&dev->kvm->lock); > + if (r) > + return r; > + > + if (copy_to_user(uaddr, &addr, sizeof(addr))) > + return -EFAULT; > + > + break; > + } > + > + return r; > +} > + > +static int aia_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) > +{ > + int nr_vcpus; > + > + switch (attr->group) { > + case KVM_DEV_RISCV_AIA_GRP_CONFIG: > + switch (attr->attr) { > + case KVM_DEV_RISCV_AIA_CONFIG_MODE: > + case KVM_DEV_RISCV_AIA_CONFIG_IDS: > + case KVM_DEV_RISCV_AIA_CONFIG_SRCS: > + case KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS: > + case KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT: > + case KVM_DEV_RISCV_AIA_CONFIG_HART_BITS: > + case KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS: > + return 0; > + } > + break; > + case KVM_DEV_RISCV_AIA_GRP_ADDR: > + nr_vcpus = atomic_read(&dev->kvm->online_vcpus); > + if (attr->attr == KVM_DEV_RISCV_AIA_ADDR_APLIC) > + return 0; > + else if (attr->attr < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus)) > + return 0; > + break; > + case KVM_DEV_RISCV_AIA_GRP_CTRL: > + switch (attr->attr) { > + case KVM_DEV_RISCV_AIA_CTRL_INIT: > + return 0; > + } > + break; > + } > + > + return -ENXIO; > +} > + > +struct kvm_device_ops kvm_riscv_aia_device_ops = { > + .name = "kvm-riscv-aia", > + .create = aia_create, > + .destroy = aia_destroy, > + .set_attr = aia_set_attr, > + .get_attr = aia_get_attr, > + .has_attr = aia_has_attr, > +}; > + > +int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu) > +{ > + /* Proceed only if AIA was initialized successfully */ > + if (!kvm_riscv_aia_initialized(vcpu->kvm)) > + return 1; > + > + /* Update the IMSIC HW state before entering guest mode */ > + return kvm_riscv_vcpu_aia_imsic_update(vcpu); > +} > + > +void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu) > +{ > + struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; > + struct kvm_vcpu_aia_csr *reset_csr = > + &vcpu->arch.aia_context.guest_reset_csr; > + > + if (!kvm_riscv_aia_available()) > + return; > + memcpy(csr, reset_csr, sizeof(*csr)); > + > + /* Proceed only if AIA was initialized successfully */ > + if (!kvm_riscv_aia_initialized(vcpu->kvm)) > + return; > + > + /* Reset the IMSIC context */ > + kvm_riscv_vcpu_aia_imsic_reset(vcpu); > +} > + > +int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu) > +{ > + struct kvm_vcpu_aia *vaia = &vcpu->arch.aia_context; > + > + if (!kvm_riscv_aia_available()) > + return 0; > + > + /* > + * We don't do any memory allocations over here because these > + * will be done after AIA device is initialized by the user-space. > + * > + * Refer, aia_init() implementation for more details. > + */ > + > + /* Initialize default values in AIA vcpu context */ > + vaia->imsic_addr = KVM_RISCV_AIA_UNDEF_ADDR; > + vaia->hart_index = vcpu->vcpu_idx; > + > + return 0; > +} > + > +void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu) > +{ > + /* Proceed only if AIA was initialized successfully */ > + if (!kvm_riscv_aia_initialized(vcpu->kvm)) > + return; > + > + /* Cleanup IMSIC context */ > + kvm_riscv_vcpu_aia_imsic_cleanup(vcpu); > +} > + > +int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, u32 hart_index, > + u32 guest_index, u32 iid) > +{ > + unsigned long idx; > + struct kvm_vcpu *vcpu; > + > + /* Proceed only if AIA was initialized successfully */ > + if (!kvm_riscv_aia_initialized(kvm)) > + return -EBUSY; > + > + /* Inject MSI to matching VCPU */ > + kvm_for_each_vcpu(idx, vcpu, kvm) { > + if (vcpu->arch.aia_context.hart_index == hart_index) > + return kvm_riscv_vcpu_aia_imsic_inject(vcpu, > + guest_index, > + 0, iid); > + } > + > + return 0; > +} > + > +int kvm_riscv_aia_inject_msi(struct kvm *kvm, struct kvm_msi *msi) > +{ > + gpa_t tppn, ippn; > + unsigned long idx; > + struct kvm_vcpu *vcpu; > + u32 g, toff, iid = msi->data; > + struct kvm_aia *aia = &kvm->arch.aia; > + gpa_t target = (((gpa_t)msi->address_hi) << 32) | msi->address_lo; > + > + /* Proceed only if AIA was initialized successfully */ > + if (!kvm_riscv_aia_initialized(kvm)) > + return -EBUSY; > + > + /* Convert target address to target PPN */ > + tppn = target >> IMSIC_MMIO_PAGE_SHIFT; > + > + /* Extract and clear Guest ID from target PPN */ > + g = tppn & (BIT(aia->nr_guest_bits) - 1); > + tppn &= ~((gpa_t)(BIT(aia->nr_guest_bits) - 1)); > + > + /* Inject MSI to matching VCPU */ > + kvm_for_each_vcpu(idx, vcpu, kvm) { > + ippn = vcpu->arch.aia_context.imsic_addr >> > + IMSIC_MMIO_PAGE_SHIFT; > + if (ippn == tppn) { > + toff = target & (IMSIC_MMIO_PAGE_SZ - 1); > + return kvm_riscv_vcpu_aia_imsic_inject(vcpu, g, > + toff, iid); > + } > + } > + > + return 0; > +} > + > +int kvm_riscv_aia_inject_irq(struct kvm *kvm, unsigned int irq, bool level) > +{ > + /* Proceed only if AIA was initialized successfully */ > + if (!kvm_riscv_aia_initialized(kvm)) > + return -EBUSY; > + > + /* Inject interrupt level change in APLIC */ > + return kvm_riscv_aia_aplic_inject(kvm, irq, level); > +} > + > +void kvm_riscv_aia_init_vm(struct kvm *kvm) > +{ > + struct kvm_aia *aia = &kvm->arch.aia; > + > + if (!kvm_riscv_aia_available()) > + return; > + > + /* > + * We don't do any memory allocations over here because these > + * will be done after AIA device is initialized by the user-space. > + * > + * Refer, aia_init() implementation for more details. > + */ > + > + /* Initialize default values in AIA global context */ > + aia->mode = (kvm_riscv_aia_nr_hgei) ? > + KVM_DEV_RISCV_AIA_MODE_AUTO : KVM_DEV_RISCV_AIA_MODE_EMUL; > + aia->nr_ids = kvm_riscv_aia_max_ids - 1; > + aia->nr_sources = 0; > + aia->nr_group_bits = 0; > + aia->nr_group_shift = KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN; > + aia->nr_hart_bits = 0; > + aia->nr_guest_bits = 0; > + aia->aplic_addr = KVM_RISCV_AIA_UNDEF_ADDR; > +} > + > +void kvm_riscv_aia_destroy_vm(struct kvm *kvm) > +{ > + /* Proceed only if AIA was initialized successfully */ > + if (!kvm_riscv_aia_initialized(kvm)) > + return; > + > + /* Cleanup APLIC context */ > + kvm_riscv_aia_aplic_cleanup(kvm); > +} > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index 737318b1c1d9..27ccd07898e1 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -1442,6 +1442,8 @@ enum kvm_device_type { > #define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE > KVM_DEV_TYPE_ARM_PV_TIME, > #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME > + KVM_DEV_TYPE_RISCV_AIA, > +#define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA > KVM_DEV_TYPE_MAX, > }; > > -- > 2.34.1 > Additionally, it might be necessary to consider adding KVM_CAP_IRQCHIP in the kvm_vm_ioctl_check_extension() function when the KVM AIA chip is present. Regards, Yong-Xuan
On Thu, Jun 15, 2023 at 11:03 AM Yong-Xuan Wang <yongxuan.wang@sifive.com> wrote: > > Hi Anup, > > On Mon, Jun 12, 2023 at 1:41 PM Anup Patel <apatel@ventanamicro.com> wrote: > > > > We implement KVM device interface for in-kernel AIA irqchip so that > > user-space can use KVM device ioctls to create, configure, and destroy > > in-kernel AIA irqchip. > > > > Signed-off-by: Anup Patel <apatel@ventanamicro.com> > > --- > > arch/riscv/include/asm/kvm_aia.h | 132 +++++-- > > arch/riscv/include/uapi/asm/kvm.h | 45 +++ > > arch/riscv/kvm/Makefile | 1 + > > arch/riscv/kvm/aia.c | 11 + > > arch/riscv/kvm/aia_device.c | 622 ++++++++++++++++++++++++++++++ > > include/uapi/linux/kvm.h | 2 + > > 6 files changed, 771 insertions(+), 42 deletions(-) > > create mode 100644 arch/riscv/kvm/aia_device.c > > > > diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h > > index 3bc0a0e47a15..a1281ebc9b92 100644 > > --- a/arch/riscv/include/asm/kvm_aia.h > > +++ b/arch/riscv/include/asm/kvm_aia.h > > @@ -20,6 +20,33 @@ struct kvm_aia { > > > > /* In-kernel irqchip initialized */ > > bool initialized; > > + > > + /* Virtualization mode (Emulation, HW Accelerated, or Auto) */ > > + u32 mode; > > + > > + /* Number of MSIs */ > > + u32 nr_ids; > > + > > + /* Number of wired IRQs */ > > + u32 nr_sources; > > + > > + /* Number of group bits in IMSIC address */ > > + u32 nr_group_bits; > > + > > + /* Position of group bits in IMSIC address */ > > + u32 nr_group_shift; > > + > > + /* Number of hart bits in IMSIC address */ > > + u32 nr_hart_bits; > > + > > + /* Number of guest bits in IMSIC address */ > > + u32 nr_guest_bits; > > + > > + /* Guest physical address of APLIC */ > > + gpa_t aplic_addr; > > + > > + /* Internal state of APLIC */ > > + void *aplic_state; > > }; > > > > struct kvm_vcpu_aia_csr { > > @@ -38,8 +65,19 @@ struct kvm_vcpu_aia { > > > > /* CPU AIA CSR context upon Guest VCPU reset */ > > struct kvm_vcpu_aia_csr guest_reset_csr; > > + > > + /* Guest physical address of IMSIC for this VCPU */ > > + gpa_t imsic_addr; > > + > > + /* HART index of IMSIC extacted from guest physical address */ > > + u32 hart_index; > > + > > + /* Internal state of IMSIC for this VCPU */ > > + void *imsic_state; > > }; > > > > +#define KVM_RISCV_AIA_UNDEF_ADDR (-1) > > + > > #define kvm_riscv_aia_initialized(k) ((k)->arch.aia.initialized) > > > > #define irqchip_in_kernel(k) ((k)->arch.aia.in_kernel) > > @@ -50,10 +88,17 @@ DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available); > > #define kvm_riscv_aia_available() \ > > static_branch_unlikely(&kvm_riscv_aia_available) > > > > +extern struct kvm_device_ops kvm_riscv_aia_device_ops; > > + > > static inline void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu) > > { > > } > > > > +static inline int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu) > > +{ > > + return 1; > > +} > > + > > #define KVM_RISCV_AIA_IMSIC_TOPEI (ISELECT_MASK + 1) > > static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, > > unsigned long isel, > > @@ -64,6 +109,41 @@ static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, > > return 0; > > } > > > > +static inline void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu) > > +{ > > +} > > + > > +static inline int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu, > > + u32 guest_index, u32 offset, > > + u32 iid) > > +{ > > + return 0; > > +} > > + > > +static inline int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu) > > +{ > > + return 0; > > +} > > + > > +static inline void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu) > > +{ > > +} > > + > > +static inline int kvm_riscv_aia_aplic_inject(struct kvm *kvm, > > + u32 source, bool level) > > +{ > > + return 0; > > +} > > + > > +static inline int kvm_riscv_aia_aplic_init(struct kvm *kvm) > > +{ > > + return 0; > > +} > > + > > +static inline void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm) > > +{ > > +} > > + > > #ifdef CONFIG_32BIT > > void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu); > > void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu); > > @@ -99,50 +179,18 @@ int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num, > > { .base = CSR_SIREG, .count = 1, .func = kvm_riscv_vcpu_aia_rmw_ireg }, \ > > { .base = CSR_STOPEI, .count = 1, .func = kvm_riscv_vcpu_aia_rmw_topei }, > > > > -static inline int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu) > > -{ > > - return 1; > > -} > > - > > -static inline void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu) > > -{ > > -} > > - > > -static inline int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu) > > -{ > > - return 0; > > -} > > - > > -static inline void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu) > > -{ > > -} > > - > > -static inline int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, > > - u32 hart_index, > > - u32 guest_index, u32 iid) > > -{ > > - return 0; > > -} > > - > > -static inline int kvm_riscv_aia_inject_msi(struct kvm *kvm, > > - struct kvm_msi *msi) > > -{ > > - return 0; > > -} > > +int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu); > > +void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu); > > +int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu); > > +void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu); > > > > -static inline int kvm_riscv_aia_inject_irq(struct kvm *kvm, > > - unsigned int irq, bool level) > > -{ > > - return 0; > > -} > > +int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, u32 hart_index, > > + u32 guest_index, u32 iid); > > +int kvm_riscv_aia_inject_msi(struct kvm *kvm, struct kvm_msi *msi); > > +int kvm_riscv_aia_inject_irq(struct kvm *kvm, unsigned int irq, bool level); > > > > -static inline void kvm_riscv_aia_init_vm(struct kvm *kvm) > > -{ > > -} > > - > > -static inline void kvm_riscv_aia_destroy_vm(struct kvm *kvm) > > -{ > > -} > > +void kvm_riscv_aia_init_vm(struct kvm *kvm); > > +void kvm_riscv_aia_destroy_vm(struct kvm *kvm); > > > > int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner, > > void __iomem **hgei_va, phys_addr_t *hgei_pa); > > diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h > > index 332d4a274891..047c8fc5bd71 100644 > > --- a/arch/riscv/include/uapi/asm/kvm.h > > +++ b/arch/riscv/include/uapi/asm/kvm.h > > @@ -204,6 +204,51 @@ enum KVM_RISCV_SBI_EXT_ID { > > #define KVM_REG_RISCV_SBI_MULTI_REG_LAST \ > > KVM_REG_RISCV_SBI_MULTI_REG(KVM_RISCV_SBI_EXT_MAX - 1) > > > > +/* Device Control API: RISC-V AIA */ > > +#define KVM_DEV_RISCV_APLIC_ALIGN 0x1000 > > +#define KVM_DEV_RISCV_APLIC_SIZE 0x4000 > > +#define KVM_DEV_RISCV_APLIC_MAX_HARTS 0x4000 > > +#define KVM_DEV_RISCV_IMSIC_ALIGN 0x1000 > > +#define KVM_DEV_RISCV_IMSIC_SIZE 0x1000 > > + > > +#define KVM_DEV_RISCV_AIA_GRP_CONFIG 0 > > +#define KVM_DEV_RISCV_AIA_CONFIG_MODE 0 > > +#define KVM_DEV_RISCV_AIA_CONFIG_IDS 1 > > +#define KVM_DEV_RISCV_AIA_CONFIG_SRCS 2 > > +#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS 3 > > +#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT 4 > > +#define KVM_DEV_RISCV_AIA_CONFIG_HART_BITS 5 > > +#define KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS 6 > > + > > +/* > > + * Modes of RISC-V AIA device: > > + * 1) EMUL (aka Emulation): Trap-n-emulate IMSIC > > + * 2) HWACCEL (aka HW Acceleration): Virtualize IMSIC using IMSIC guest files > > + * 3) AUTO (aka Automatic): Virtualize IMSIC using IMSIC guest files whenever > > + * available otherwise fallback to trap-n-emulation > > + */ > > +#define KVM_DEV_RISCV_AIA_MODE_EMUL 0 > > +#define KVM_DEV_RISCV_AIA_MODE_HWACCEL 1 > > +#define KVM_DEV_RISCV_AIA_MODE_AUTO 2 > > + > > +#define KVM_DEV_RISCV_AIA_IDS_MIN 63 > > +#define KVM_DEV_RISCV_AIA_IDS_MAX 2048 > > +#define KVM_DEV_RISCV_AIA_SRCS_MAX 1024 > > +#define KVM_DEV_RISCV_AIA_GROUP_BITS_MAX 8 > > +#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN 24 > > +#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MAX 56 > > +#define KVM_DEV_RISCV_AIA_HART_BITS_MAX 16 > > +#define KVM_DEV_RISCV_AIA_GUEST_BITS_MAX 8 > > + > > +#define KVM_DEV_RISCV_AIA_GRP_ADDR 1 > > +#define KVM_DEV_RISCV_AIA_ADDR_APLIC 0 > > +#define KVM_DEV_RISCV_AIA_ADDR_IMSIC(__vcpu) (1 + (__vcpu)) > > +#define KVM_DEV_RISCV_AIA_ADDR_MAX \ > > + (1 + KVM_DEV_RISCV_APLIC_MAX_HARTS) > > + > > +#define KVM_DEV_RISCV_AIA_GRP_CTRL 2 > > +#define KVM_DEV_RISCV_AIA_CTRL_INIT 0 > > + > > /* One single KVM irqchip, ie. the AIA */ > > #define KVM_NR_IRQCHIPS 1 > > > > diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile > > index 8031b8912a0d..dd69ebe098bd 100644 > > --- a/arch/riscv/kvm/Makefile > > +++ b/arch/riscv/kvm/Makefile > > @@ -27,3 +27,4 @@ kvm-y += vcpu_sbi_hsm.o > > kvm-y += vcpu_timer.o > > kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o > > kvm-y += aia.o > > +kvm-y += aia_device.o > > diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c > > index 18c442c15ff2..585a3b42c52c 100644 > > --- a/arch/riscv/kvm/aia.c > > +++ b/arch/riscv/kvm/aia.c > > @@ -631,6 +631,14 @@ int kvm_riscv_aia_init(void) > > if (rc) > > return rc; > > > > + /* Register device operations */ > > + rc = kvm_register_device_ops(&kvm_riscv_aia_device_ops, > > + KVM_DEV_TYPE_RISCV_AIA); > > + if (rc) { > > + aia_hgei_exit(); > > + return rc; > > + } > > + > > /* Enable KVM AIA support */ > > static_branch_enable(&kvm_riscv_aia_available); > > > > @@ -642,6 +650,9 @@ void kvm_riscv_aia_exit(void) > > if (!kvm_riscv_aia_available()) > > return; > > > > + /* Unregister device operations */ > > + kvm_unregister_device_ops(KVM_DEV_TYPE_RISCV_AIA); > > + > > /* Cleanup the HGEI state */ > > aia_hgei_exit(); > > } > > diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c > > new file mode 100644 > > index 000000000000..a151fb357887 > > --- /dev/null > > +++ b/arch/riscv/kvm/aia_device.c > > @@ -0,0 +1,622 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* > > + * Copyright (C) 2021 Western Digital Corporation or its affiliates. > > + * Copyright (C) 2022 Ventana Micro Systems Inc. > > + * > > + * Authors: > > + * Anup Patel <apatel@ventanamicro.com> > > + */ > > + > > +#include <linux/bits.h> > > +#include <linux/kvm_host.h> > > +#include <linux/uaccess.h> > > +#include <asm/kvm_aia_imsic.h> > > + > > +static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) > > +{ > > + struct kvm_vcpu *tmp_vcpu; > > + > > + for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { > > + tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); > > + mutex_unlock(&tmp_vcpu->mutex); > > + } > > +} > > + > > +static void unlock_all_vcpus(struct kvm *kvm) > > +{ > > + unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); > > +} > > + > > +static bool lock_all_vcpus(struct kvm *kvm) > > +{ > > + struct kvm_vcpu *tmp_vcpu; > > + unsigned long c; > > + > > + kvm_for_each_vcpu(c, tmp_vcpu, kvm) { > > + if (!mutex_trylock(&tmp_vcpu->mutex)) { > > + unlock_vcpus(kvm, c - 1); > > + return false; > > + } > > + } > > + > > + return true; > > +} > > + > > +static int aia_create(struct kvm_device *dev, u32 type) > > +{ > > + int ret; > > + unsigned long i; > > + struct kvm *kvm = dev->kvm; > > + struct kvm_vcpu *vcpu; > > + > > + if (irqchip_in_kernel(kvm)) > > + return -EEXIST; > > + > > + ret = -EBUSY; > > + if (!lock_all_vcpus(kvm)) > > + return ret; > > + > > + kvm_for_each_vcpu(i, vcpu, kvm) { > > + if (vcpu->arch.ran_atleast_once) > > + goto out_unlock; > > + } > > + ret = 0; > > + > > + kvm->arch.aia.in_kernel = true; > > + > > +out_unlock: > > + unlock_all_vcpus(kvm); > > + return ret; > > +} > > + > > +static void aia_destroy(struct kvm_device *dev) > > +{ > > + kfree(dev); > > +} > > + > > +static int aia_config(struct kvm *kvm, unsigned long type, > > + u32 *nr, bool write) > > +{ > > + struct kvm_aia *aia = &kvm->arch.aia; > > + > > + /* Writes can only be done before irqchip is initialized */ > > + if (write && kvm_riscv_aia_initialized(kvm)) > > + return -EBUSY; > > + > > + switch (type) { > > + case KVM_DEV_RISCV_AIA_CONFIG_MODE: > > + if (write) { > > + switch (*nr) { > > + case KVM_DEV_RISCV_AIA_MODE_EMUL: > > + break; > > + case KVM_DEV_RISCV_AIA_MODE_HWACCEL: > > + case KVM_DEV_RISCV_AIA_MODE_AUTO: > > + /* > > + * HW Acceleration and Auto modes only > > + * supported on host with non-zero guest > > + * external interrupts (i.e. non-zero > > + * VS-level IMSIC pages). > > + */ > > + if (!kvm_riscv_aia_nr_hgei) > > + return -EINVAL; > > + break; > > + default: > > + return -EINVAL; > > + }; > > + aia->mode = *nr; > > + } else > > + *nr = aia->mode; > > + break; > > + case KVM_DEV_RISCV_AIA_CONFIG_IDS: > > + if (write) { > > + if ((*nr < KVM_DEV_RISCV_AIA_IDS_MIN) || > > + (*nr >= KVM_DEV_RISCV_AIA_IDS_MAX) || > > + (*nr & KVM_DEV_RISCV_AIA_IDS_MIN) || > > The msi number is one less than a multiple of 64. This line should be > ((*nr & KVM_DEV_RISCV_AIA_IDS_MIN) != KVM_DEV_RISCV_AIA_IDS_MIN) Good catch. I will fix it in the next revision. > > > + (kvm_riscv_aia_max_ids <= *nr)) > > + return -EINVAL; > > + aia->nr_ids = *nr; > > + } else > > + *nr = aia->nr_ids; > > + break; > > + case KVM_DEV_RISCV_AIA_CONFIG_SRCS: > > + if (write) { > > + if ((*nr >= KVM_DEV_RISCV_AIA_SRCS_MAX) || > > + (*nr >= kvm_riscv_aia_max_ids)) > > + return -EINVAL; > > + aia->nr_sources = *nr; > > + } else > > + *nr = aia->nr_sources; > > + break; > > + case KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS: > > + if (write) { > > + if (*nr >= KVM_DEV_RISCV_AIA_GROUP_BITS_MAX) > > + return -EINVAL; > > + aia->nr_group_bits = *nr; > > + } else > > + *nr = aia->nr_group_bits; > > + break; > > + case KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT: > > + if (write) { > > + if ((*nr < KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN) || > > + (*nr >= KVM_DEV_RISCV_AIA_GROUP_SHIFT_MAX)) > > + return -EINVAL; > > + aia->nr_group_shift = *nr; > > + } else > > + *nr = aia->nr_group_shift; > > + break; > > + case KVM_DEV_RISCV_AIA_CONFIG_HART_BITS: > > + if (write) { > > + if (*nr >= KVM_DEV_RISCV_AIA_HART_BITS_MAX) > > + return -EINVAL; > > + aia->nr_hart_bits = *nr; > > + } else > > + *nr = aia->nr_hart_bits; > > + break; > > + case KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS: > > + if (write) { > > + if (*nr >= KVM_DEV_RISCV_AIA_GUEST_BITS_MAX) > > + return -EINVAL; > > + aia->nr_guest_bits = *nr; > > + } else > > + *nr = aia->nr_guest_bits; > > + break; > > + default: > > + return -ENXIO; > > + }; > > + > > + return 0; > > +} > > + > > +static int aia_aplic_addr(struct kvm *kvm, u64 *addr, bool write) > > +{ > > + struct kvm_aia *aia = &kvm->arch.aia; > > + > > + if (write) { > > + /* Writes can only be done before irqchip is initialized */ > > + if (kvm_riscv_aia_initialized(kvm)) > > + return -EBUSY; > > + > > + if (*addr & (KVM_DEV_RISCV_APLIC_ALIGN - 1)) > > + return -EINVAL; > > + > > + aia->aplic_addr = *addr; > > + } else > > + *addr = aia->aplic_addr; > > + > > + return 0; > > +} > > + > > +static int aia_imsic_addr(struct kvm *kvm, u64 *addr, > > + unsigned long vcpu_idx, bool write) > > +{ > > + struct kvm_vcpu *vcpu; > > + struct kvm_vcpu_aia *vcpu_aia; > > + > > + vcpu = kvm_get_vcpu(kvm, vcpu_idx); > > + if (!vcpu) > > + return -EINVAL; > > + vcpu_aia = &vcpu->arch.aia_context; > > + > > + if (write) { > > + /* Writes can only be done before irqchip is initialized */ > > + if (kvm_riscv_aia_initialized(kvm)) > > + return -EBUSY; > > + > > + if (*addr & (KVM_DEV_RISCV_IMSIC_ALIGN - 1)) > > + return -EINVAL; > > + } > > + > > + mutex_lock(&vcpu->mutex); > > + if (write) > > + vcpu_aia->imsic_addr = *addr; > > + else > > + *addr = vcpu_aia->imsic_addr; > > + mutex_unlock(&vcpu->mutex); > > + > > + return 0; > > +} > > + > > +static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr) > > +{ > > + u32 h, l; > > + gpa_t mask = 0; > > + > > + h = aia->nr_hart_bits + aia->nr_guest_bits + > > + IMSIC_MMIO_PAGE_SHIFT - 1; > > + mask = GENMASK_ULL(h, 0); > > + > > + if (aia->nr_group_bits) { > > + h = aia->nr_group_bits + aia->nr_group_shift - 1; > > + l = aia->nr_group_shift; > > + mask |= GENMASK_ULL(h, l); > > + } > > + > > + return (addr & ~mask) >> IMSIC_MMIO_PAGE_SHIFT; > > +} > > + > > +static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr) > > +{ > > + u32 hart, group = 0; > > + > > + hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) & > > + GENMASK_ULL(aia->nr_hart_bits - 1, 0); > > + if (aia->nr_group_bits) > > + group = (addr >> aia->nr_group_shift) & > > + GENMASK_ULL(aia->nr_group_bits - 1, 0); > > + > > + return (group << aia->nr_hart_bits) | hart; > > +} > > + > > +static int aia_init(struct kvm *kvm) > > +{ > > + int ret, i; > > + unsigned long idx; > > + struct kvm_vcpu *vcpu; > > + struct kvm_vcpu_aia *vaia; > > + struct kvm_aia *aia = &kvm->arch.aia; > > + gpa_t base_ppn = KVM_RISCV_AIA_UNDEF_ADDR; > > + > > + /* Irqchip can be initialized only once */ > > + if (kvm_riscv_aia_initialized(kvm)) > > + return -EBUSY; > > + > > + /* We might be in the middle of creating a VCPU? */ > > + if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus)) > > + return -EBUSY; > > + > > + /* Number of sources should be less than or equals number of IDs */ > > + if (aia->nr_ids < aia->nr_sources) > > + return -EINVAL; > > + > > + /* APLIC base is required for non-zero number of sources */ > > + if (aia->nr_sources && aia->aplic_addr == KVM_RISCV_AIA_UNDEF_ADDR) > > + return -EINVAL; > > + > > + /* Initialize APLIC */ > > + ret = kvm_riscv_aia_aplic_init(kvm); > > + if (ret) > > + return ret; > > + > > + /* Iterate over each VCPU */ > > + kvm_for_each_vcpu(idx, vcpu, kvm) { > > + vaia = &vcpu->arch.aia_context; > > + > > + /* IMSIC base is required */ > > + if (vaia->imsic_addr == KVM_RISCV_AIA_UNDEF_ADDR) { > > + ret = -EINVAL; > > + goto fail_cleanup_imsics; > > + } > > + > > + /* All IMSICs should have matching base PPN */ > > + if (base_ppn == KVM_RISCV_AIA_UNDEF_ADDR) > > + base_ppn = aia_imsic_ppn(aia, vaia->imsic_addr); > > + if (base_ppn != aia_imsic_ppn(aia, vaia->imsic_addr)) { > > + ret = -EINVAL; > > + goto fail_cleanup_imsics; > > + } > > + > > + /* Update HART index of the IMSIC based on IMSIC base */ > > + vaia->hart_index = aia_imsic_hart_index(aia, > > + vaia->imsic_addr); > > + > > + /* Initialize IMSIC for this VCPU */ > > + ret = kvm_riscv_vcpu_aia_imsic_init(vcpu); > > + if (ret) > > + goto fail_cleanup_imsics; > > + } > > + > > + /* Set the initialized flag */ > > + kvm->arch.aia.initialized = true; > > + > > + return 0; > > + > > +fail_cleanup_imsics: > > + for (i = idx - 1; i >= 0; i--) { > > + vcpu = kvm_get_vcpu(kvm, i); > > + if (!vcpu) > > + continue; > > + kvm_riscv_vcpu_aia_imsic_cleanup(vcpu); > > + } > > + kvm_riscv_aia_aplic_cleanup(kvm); > > + return ret; > > +} > > + > > +static int aia_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) > > +{ > > + u32 nr; > > + u64 addr; > > + int nr_vcpus, r = -ENXIO; > > + unsigned long type = (unsigned long)attr->attr; > > + void __user *uaddr = (void __user *)(long)attr->addr; > > + > > + switch (attr->group) { > > + case KVM_DEV_RISCV_AIA_GRP_CONFIG: > > + if (copy_from_user(&nr, uaddr, sizeof(nr))) > > + return -EFAULT; > > + > > + mutex_lock(&dev->kvm->lock); > > + r = aia_config(dev->kvm, type, &nr, true); > > + mutex_unlock(&dev->kvm->lock); > > + > > + break; > > + > > + case KVM_DEV_RISCV_AIA_GRP_ADDR: > > + if (copy_from_user(&addr, uaddr, sizeof(addr))) > > + return -EFAULT; > > + > > + nr_vcpus = atomic_read(&dev->kvm->online_vcpus); > > + mutex_lock(&dev->kvm->lock); > > + if (type == KVM_DEV_RISCV_AIA_ADDR_APLIC) > > + r = aia_aplic_addr(dev->kvm, &addr, true); > > + else if (type < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus)) > > + r = aia_imsic_addr(dev->kvm, &addr, > > + type - KVM_DEV_RISCV_AIA_ADDR_IMSIC(0), true); > > + mutex_unlock(&dev->kvm->lock); > > + > > + break; > > + > > + case KVM_DEV_RISCV_AIA_GRP_CTRL: > > + switch (type) { > > + case KVM_DEV_RISCV_AIA_CTRL_INIT: > > + mutex_lock(&dev->kvm->lock); > > + r = aia_init(dev->kvm); > > + mutex_unlock(&dev->kvm->lock); > > + break; > > + } > > + > > + break; > > + } > > + > > + return r; > > +} > > + > > +static int aia_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) > > +{ > > + u32 nr; > > + u64 addr; > > + int nr_vcpus, r = -ENXIO; > > + void __user *uaddr = (void __user *)(long)attr->addr; > > + unsigned long type = (unsigned long)attr->attr; > > + > > + switch (attr->group) { > > + case KVM_DEV_RISCV_AIA_GRP_CONFIG: > > + if (copy_from_user(&nr, uaddr, sizeof(nr))) > > + return -EFAULT; > > + > > + mutex_lock(&dev->kvm->lock); > > + r = aia_config(dev->kvm, type, &nr, false); > > + mutex_unlock(&dev->kvm->lock); > > + if (r) > > + return r; > > + > > + if (copy_to_user(uaddr, &nr, sizeof(nr))) > > + return -EFAULT; > > + > > + break; > > + case KVM_DEV_RISCV_AIA_GRP_ADDR: > > + if (copy_from_user(&addr, uaddr, sizeof(addr))) > > + return -EFAULT; > > + > > + nr_vcpus = atomic_read(&dev->kvm->online_vcpus); > > + mutex_lock(&dev->kvm->lock); > > + if (type == KVM_DEV_RISCV_AIA_ADDR_APLIC) > > + r = aia_aplic_addr(dev->kvm, &addr, false); > > + else if (type < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus)) > > + r = aia_imsic_addr(dev->kvm, &addr, > > + type - KVM_DEV_RISCV_AIA_ADDR_IMSIC(0), false); > > + mutex_unlock(&dev->kvm->lock); > > + if (r) > > + return r; > > + > > + if (copy_to_user(uaddr, &addr, sizeof(addr))) > > + return -EFAULT; > > + > > + break; > > + } > > + > > + return r; > > +} > > + > > +static int aia_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) > > +{ > > + int nr_vcpus; > > + > > + switch (attr->group) { > > + case KVM_DEV_RISCV_AIA_GRP_CONFIG: > > + switch (attr->attr) { > > + case KVM_DEV_RISCV_AIA_CONFIG_MODE: > > + case KVM_DEV_RISCV_AIA_CONFIG_IDS: > > + case KVM_DEV_RISCV_AIA_CONFIG_SRCS: > > + case KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS: > > + case KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT: > > + case KVM_DEV_RISCV_AIA_CONFIG_HART_BITS: > > + case KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS: > > + return 0; > > + } > > + break; > > + case KVM_DEV_RISCV_AIA_GRP_ADDR: > > + nr_vcpus = atomic_read(&dev->kvm->online_vcpus); > > + if (attr->attr == KVM_DEV_RISCV_AIA_ADDR_APLIC) > > + return 0; > > + else if (attr->attr < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus)) > > + return 0; > > + break; > > + case KVM_DEV_RISCV_AIA_GRP_CTRL: > > + switch (attr->attr) { > > + case KVM_DEV_RISCV_AIA_CTRL_INIT: > > + return 0; > > + } > > + break; > > + } > > + > > + return -ENXIO; > > +} > > + > > +struct kvm_device_ops kvm_riscv_aia_device_ops = { > > + .name = "kvm-riscv-aia", > > + .create = aia_create, > > + .destroy = aia_destroy, > > + .set_attr = aia_set_attr, > > + .get_attr = aia_get_attr, > > + .has_attr = aia_has_attr, > > +}; > > + > > +int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu) > > +{ > > + /* Proceed only if AIA was initialized successfully */ > > + if (!kvm_riscv_aia_initialized(vcpu->kvm)) > > + return 1; > > + > > + /* Update the IMSIC HW state before entering guest mode */ > > + return kvm_riscv_vcpu_aia_imsic_update(vcpu); > > +} > > + > > +void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu) > > +{ > > + struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; > > + struct kvm_vcpu_aia_csr *reset_csr = > > + &vcpu->arch.aia_context.guest_reset_csr; > > + > > + if (!kvm_riscv_aia_available()) > > + return; > > + memcpy(csr, reset_csr, sizeof(*csr)); > > + > > + /* Proceed only if AIA was initialized successfully */ > > + if (!kvm_riscv_aia_initialized(vcpu->kvm)) > > + return; > > + > > + /* Reset the IMSIC context */ > > + kvm_riscv_vcpu_aia_imsic_reset(vcpu); > > +} > > + > > +int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu) > > +{ > > + struct kvm_vcpu_aia *vaia = &vcpu->arch.aia_context; > > + > > + if (!kvm_riscv_aia_available()) > > + return 0; > > + > > + /* > > + * We don't do any memory allocations over here because these > > + * will be done after AIA device is initialized by the user-space. > > + * > > + * Refer, aia_init() implementation for more details. > > + */ > > + > > + /* Initialize default values in AIA vcpu context */ > > + vaia->imsic_addr = KVM_RISCV_AIA_UNDEF_ADDR; > > + vaia->hart_index = vcpu->vcpu_idx; > > + > > + return 0; > > +} > > + > > +void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu) > > +{ > > + /* Proceed only if AIA was initialized successfully */ > > + if (!kvm_riscv_aia_initialized(vcpu->kvm)) > > + return; > > + > > + /* Cleanup IMSIC context */ > > + kvm_riscv_vcpu_aia_imsic_cleanup(vcpu); > > +} > > + > > +int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, u32 hart_index, > > + u32 guest_index, u32 iid) > > +{ > > + unsigned long idx; > > + struct kvm_vcpu *vcpu; > > + > > + /* Proceed only if AIA was initialized successfully */ > > + if (!kvm_riscv_aia_initialized(kvm)) > > + return -EBUSY; > > + > > + /* Inject MSI to matching VCPU */ > > + kvm_for_each_vcpu(idx, vcpu, kvm) { > > + if (vcpu->arch.aia_context.hart_index == hart_index) > > + return kvm_riscv_vcpu_aia_imsic_inject(vcpu, > > + guest_index, > > + 0, iid); > > + } > > + > > + return 0; > > +} > > + > > +int kvm_riscv_aia_inject_msi(struct kvm *kvm, struct kvm_msi *msi) > > +{ > > + gpa_t tppn, ippn; > > + unsigned long idx; > > + struct kvm_vcpu *vcpu; > > + u32 g, toff, iid = msi->data; > > + struct kvm_aia *aia = &kvm->arch.aia; > > + gpa_t target = (((gpa_t)msi->address_hi) << 32) | msi->address_lo; > > + > > + /* Proceed only if AIA was initialized successfully */ > > + if (!kvm_riscv_aia_initialized(kvm)) > > + return -EBUSY; > > + > > + /* Convert target address to target PPN */ > > + tppn = target >> IMSIC_MMIO_PAGE_SHIFT; > > + > > + /* Extract and clear Guest ID from target PPN */ > > + g = tppn & (BIT(aia->nr_guest_bits) - 1); > > + tppn &= ~((gpa_t)(BIT(aia->nr_guest_bits) - 1)); > > + > > + /* Inject MSI to matching VCPU */ > > + kvm_for_each_vcpu(idx, vcpu, kvm) { > > + ippn = vcpu->arch.aia_context.imsic_addr >> > > + IMSIC_MMIO_PAGE_SHIFT; > > + if (ippn == tppn) { > > + toff = target & (IMSIC_MMIO_PAGE_SZ - 1); > > + return kvm_riscv_vcpu_aia_imsic_inject(vcpu, g, > > + toff, iid); > > + } > > + } > > + > > + return 0; > > +} > > + > > +int kvm_riscv_aia_inject_irq(struct kvm *kvm, unsigned int irq, bool level) > > +{ > > + /* Proceed only if AIA was initialized successfully */ > > + if (!kvm_riscv_aia_initialized(kvm)) > > + return -EBUSY; > > + > > + /* Inject interrupt level change in APLIC */ > > + return kvm_riscv_aia_aplic_inject(kvm, irq, level); > > +} > > + > > +void kvm_riscv_aia_init_vm(struct kvm *kvm) > > +{ > > + struct kvm_aia *aia = &kvm->arch.aia; > > + > > + if (!kvm_riscv_aia_available()) > > + return; > > + > > + /* > > + * We don't do any memory allocations over here because these > > + * will be done after AIA device is initialized by the user-space. > > + * > > + * Refer, aia_init() implementation for more details. > > + */ > > + > > + /* Initialize default values in AIA global context */ > > + aia->mode = (kvm_riscv_aia_nr_hgei) ? > > + KVM_DEV_RISCV_AIA_MODE_AUTO : KVM_DEV_RISCV_AIA_MODE_EMUL; > > + aia->nr_ids = kvm_riscv_aia_max_ids - 1; > > + aia->nr_sources = 0; > > + aia->nr_group_bits = 0; > > + aia->nr_group_shift = KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN; > > + aia->nr_hart_bits = 0; > > + aia->nr_guest_bits = 0; > > + aia->aplic_addr = KVM_RISCV_AIA_UNDEF_ADDR; > > +} > > + > > +void kvm_riscv_aia_destroy_vm(struct kvm *kvm) > > +{ > > + /* Proceed only if AIA was initialized successfully */ > > + if (!kvm_riscv_aia_initialized(kvm)) > > + return; > > + > > + /* Cleanup APLIC context */ > > + kvm_riscv_aia_aplic_cleanup(kvm); > > +} > > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > > index 737318b1c1d9..27ccd07898e1 100644 > > --- a/include/uapi/linux/kvm.h > > +++ b/include/uapi/linux/kvm.h > > @@ -1442,6 +1442,8 @@ enum kvm_device_type { > > #define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE > > KVM_DEV_TYPE_ARM_PV_TIME, > > #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME > > + KVM_DEV_TYPE_RISCV_AIA, > > +#define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA > > KVM_DEV_TYPE_MAX, > > }; > > > > -- > > 2.34.1 > > > > Additionally, it might be necessary to consider adding KVM_CAP_IRQCHIP > in the kvm_vm_ioctl_check_extension() function when the KVM AIA chip > is present. > Advertising KVM_CAP_IRQCHIP is not of much use since KVM user space can already discover the presence of AIA device. I will add it anyway. Regards, Anup
diff --git a/arch/riscv/include/asm/kvm_aia.h b/arch/riscv/include/asm/kvm_aia.h index 3bc0a0e47a15..a1281ebc9b92 100644 --- a/arch/riscv/include/asm/kvm_aia.h +++ b/arch/riscv/include/asm/kvm_aia.h @@ -20,6 +20,33 @@ struct kvm_aia { /* In-kernel irqchip initialized */ bool initialized; + + /* Virtualization mode (Emulation, HW Accelerated, or Auto) */ + u32 mode; + + /* Number of MSIs */ + u32 nr_ids; + + /* Number of wired IRQs */ + u32 nr_sources; + + /* Number of group bits in IMSIC address */ + u32 nr_group_bits; + + /* Position of group bits in IMSIC address */ + u32 nr_group_shift; + + /* Number of hart bits in IMSIC address */ + u32 nr_hart_bits; + + /* Number of guest bits in IMSIC address */ + u32 nr_guest_bits; + + /* Guest physical address of APLIC */ + gpa_t aplic_addr; + + /* Internal state of APLIC */ + void *aplic_state; }; struct kvm_vcpu_aia_csr { @@ -38,8 +65,19 @@ struct kvm_vcpu_aia { /* CPU AIA CSR context upon Guest VCPU reset */ struct kvm_vcpu_aia_csr guest_reset_csr; + + /* Guest physical address of IMSIC for this VCPU */ + gpa_t imsic_addr; + + /* HART index of IMSIC extacted from guest physical address */ + u32 hart_index; + + /* Internal state of IMSIC for this VCPU */ + void *imsic_state; }; +#define KVM_RISCV_AIA_UNDEF_ADDR (-1) + #define kvm_riscv_aia_initialized(k) ((k)->arch.aia.initialized) #define irqchip_in_kernel(k) ((k)->arch.aia.in_kernel) @@ -50,10 +88,17 @@ DECLARE_STATIC_KEY_FALSE(kvm_riscv_aia_available); #define kvm_riscv_aia_available() \ static_branch_unlikely(&kvm_riscv_aia_available) +extern struct kvm_device_ops kvm_riscv_aia_device_ops; + static inline void kvm_riscv_vcpu_aia_imsic_release(struct kvm_vcpu *vcpu) { } +static inline int kvm_riscv_vcpu_aia_imsic_update(struct kvm_vcpu *vcpu) +{ + return 1; +} + #define KVM_RISCV_AIA_IMSIC_TOPEI (ISELECT_MASK + 1) static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, unsigned long isel, @@ -64,6 +109,41 @@ static inline int kvm_riscv_vcpu_aia_imsic_rmw(struct kvm_vcpu *vcpu, return 0; } +static inline void kvm_riscv_vcpu_aia_imsic_reset(struct kvm_vcpu *vcpu) +{ +} + +static inline int kvm_riscv_vcpu_aia_imsic_inject(struct kvm_vcpu *vcpu, + u32 guest_index, u32 offset, + u32 iid) +{ + return 0; +} + +static inline int kvm_riscv_vcpu_aia_imsic_init(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static inline void kvm_riscv_vcpu_aia_imsic_cleanup(struct kvm_vcpu *vcpu) +{ +} + +static inline int kvm_riscv_aia_aplic_inject(struct kvm *kvm, + u32 source, bool level) +{ + return 0; +} + +static inline int kvm_riscv_aia_aplic_init(struct kvm *kvm) +{ + return 0; +} + +static inline void kvm_riscv_aia_aplic_cleanup(struct kvm *kvm) +{ +} + #ifdef CONFIG_32BIT void kvm_riscv_vcpu_aia_flush_interrupts(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_aia_sync_interrupts(struct kvm_vcpu *vcpu); @@ -99,50 +179,18 @@ int kvm_riscv_vcpu_aia_rmw_ireg(struct kvm_vcpu *vcpu, unsigned int csr_num, { .base = CSR_SIREG, .count = 1, .func = kvm_riscv_vcpu_aia_rmw_ireg }, \ { .base = CSR_STOPEI, .count = 1, .func = kvm_riscv_vcpu_aia_rmw_topei }, -static inline int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu) -{ - return 1; -} - -static inline void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu) -{ -} - -static inline int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu) -{ - return 0; -} - -static inline void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu) -{ -} - -static inline int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, - u32 hart_index, - u32 guest_index, u32 iid) -{ - return 0; -} - -static inline int kvm_riscv_aia_inject_msi(struct kvm *kvm, - struct kvm_msi *msi) -{ - return 0; -} +int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu); +int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu); -static inline int kvm_riscv_aia_inject_irq(struct kvm *kvm, - unsigned int irq, bool level) -{ - return 0; -} +int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, u32 hart_index, + u32 guest_index, u32 iid); +int kvm_riscv_aia_inject_msi(struct kvm *kvm, struct kvm_msi *msi); +int kvm_riscv_aia_inject_irq(struct kvm *kvm, unsigned int irq, bool level); -static inline void kvm_riscv_aia_init_vm(struct kvm *kvm) -{ -} - -static inline void kvm_riscv_aia_destroy_vm(struct kvm *kvm) -{ -} +void kvm_riscv_aia_init_vm(struct kvm *kvm); +void kvm_riscv_aia_destroy_vm(struct kvm *kvm); int kvm_riscv_aia_alloc_hgei(int cpu, struct kvm_vcpu *owner, void __iomem **hgei_va, phys_addr_t *hgei_pa); diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 332d4a274891..047c8fc5bd71 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -204,6 +204,51 @@ enum KVM_RISCV_SBI_EXT_ID { #define KVM_REG_RISCV_SBI_MULTI_REG_LAST \ KVM_REG_RISCV_SBI_MULTI_REG(KVM_RISCV_SBI_EXT_MAX - 1) +/* Device Control API: RISC-V AIA */ +#define KVM_DEV_RISCV_APLIC_ALIGN 0x1000 +#define KVM_DEV_RISCV_APLIC_SIZE 0x4000 +#define KVM_DEV_RISCV_APLIC_MAX_HARTS 0x4000 +#define KVM_DEV_RISCV_IMSIC_ALIGN 0x1000 +#define KVM_DEV_RISCV_IMSIC_SIZE 0x1000 + +#define KVM_DEV_RISCV_AIA_GRP_CONFIG 0 +#define KVM_DEV_RISCV_AIA_CONFIG_MODE 0 +#define KVM_DEV_RISCV_AIA_CONFIG_IDS 1 +#define KVM_DEV_RISCV_AIA_CONFIG_SRCS 2 +#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS 3 +#define KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT 4 +#define KVM_DEV_RISCV_AIA_CONFIG_HART_BITS 5 +#define KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS 6 + +/* + * Modes of RISC-V AIA device: + * 1) EMUL (aka Emulation): Trap-n-emulate IMSIC + * 2) HWACCEL (aka HW Acceleration): Virtualize IMSIC using IMSIC guest files + * 3) AUTO (aka Automatic): Virtualize IMSIC using IMSIC guest files whenever + * available otherwise fallback to trap-n-emulation + */ +#define KVM_DEV_RISCV_AIA_MODE_EMUL 0 +#define KVM_DEV_RISCV_AIA_MODE_HWACCEL 1 +#define KVM_DEV_RISCV_AIA_MODE_AUTO 2 + +#define KVM_DEV_RISCV_AIA_IDS_MIN 63 +#define KVM_DEV_RISCV_AIA_IDS_MAX 2048 +#define KVM_DEV_RISCV_AIA_SRCS_MAX 1024 +#define KVM_DEV_RISCV_AIA_GROUP_BITS_MAX 8 +#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN 24 +#define KVM_DEV_RISCV_AIA_GROUP_SHIFT_MAX 56 +#define KVM_DEV_RISCV_AIA_HART_BITS_MAX 16 +#define KVM_DEV_RISCV_AIA_GUEST_BITS_MAX 8 + +#define KVM_DEV_RISCV_AIA_GRP_ADDR 1 +#define KVM_DEV_RISCV_AIA_ADDR_APLIC 0 +#define KVM_DEV_RISCV_AIA_ADDR_IMSIC(__vcpu) (1 + (__vcpu)) +#define KVM_DEV_RISCV_AIA_ADDR_MAX \ + (1 + KVM_DEV_RISCV_APLIC_MAX_HARTS) + +#define KVM_DEV_RISCV_AIA_GRP_CTRL 2 +#define KVM_DEV_RISCV_AIA_CTRL_INIT 0 + /* One single KVM irqchip, ie. the AIA */ #define KVM_NR_IRQCHIPS 1 diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index 8031b8912a0d..dd69ebe098bd 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -27,3 +27,4 @@ kvm-y += vcpu_sbi_hsm.o kvm-y += vcpu_timer.o kvm-$(CONFIG_RISCV_PMU_SBI) += vcpu_pmu.o vcpu_sbi_pmu.o kvm-y += aia.o +kvm-y += aia_device.o diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index 18c442c15ff2..585a3b42c52c 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -631,6 +631,14 @@ int kvm_riscv_aia_init(void) if (rc) return rc; + /* Register device operations */ + rc = kvm_register_device_ops(&kvm_riscv_aia_device_ops, + KVM_DEV_TYPE_RISCV_AIA); + if (rc) { + aia_hgei_exit(); + return rc; + } + /* Enable KVM AIA support */ static_branch_enable(&kvm_riscv_aia_available); @@ -642,6 +650,9 @@ void kvm_riscv_aia_exit(void) if (!kvm_riscv_aia_available()) return; + /* Unregister device operations */ + kvm_unregister_device_ops(KVM_DEV_TYPE_RISCV_AIA); + /* Cleanup the HGEI state */ aia_hgei_exit(); } diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c new file mode 100644 index 000000000000..a151fb357887 --- /dev/null +++ b/arch/riscv/kvm/aia_device.c @@ -0,0 +1,622 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Western Digital Corporation or its affiliates. + * Copyright (C) 2022 Ventana Micro Systems Inc. + * + * Authors: + * Anup Patel <apatel@ventanamicro.com> + */ + +#include <linux/bits.h> +#include <linux/kvm_host.h> +#include <linux/uaccess.h> +#include <asm/kvm_aia_imsic.h> + +static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) +{ + struct kvm_vcpu *tmp_vcpu; + + for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { + tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); + mutex_unlock(&tmp_vcpu->mutex); + } +} + +static void unlock_all_vcpus(struct kvm *kvm) +{ + unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); +} + +static bool lock_all_vcpus(struct kvm *kvm) +{ + struct kvm_vcpu *tmp_vcpu; + unsigned long c; + + kvm_for_each_vcpu(c, tmp_vcpu, kvm) { + if (!mutex_trylock(&tmp_vcpu->mutex)) { + unlock_vcpus(kvm, c - 1); + return false; + } + } + + return true; +} + +static int aia_create(struct kvm_device *dev, u32 type) +{ + int ret; + unsigned long i; + struct kvm *kvm = dev->kvm; + struct kvm_vcpu *vcpu; + + if (irqchip_in_kernel(kvm)) + return -EEXIST; + + ret = -EBUSY; + if (!lock_all_vcpus(kvm)) + return ret; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu->arch.ran_atleast_once) + goto out_unlock; + } + ret = 0; + + kvm->arch.aia.in_kernel = true; + +out_unlock: + unlock_all_vcpus(kvm); + return ret; +} + +static void aia_destroy(struct kvm_device *dev) +{ + kfree(dev); +} + +static int aia_config(struct kvm *kvm, unsigned long type, + u32 *nr, bool write) +{ + struct kvm_aia *aia = &kvm->arch.aia; + + /* Writes can only be done before irqchip is initialized */ + if (write && kvm_riscv_aia_initialized(kvm)) + return -EBUSY; + + switch (type) { + case KVM_DEV_RISCV_AIA_CONFIG_MODE: + if (write) { + switch (*nr) { + case KVM_DEV_RISCV_AIA_MODE_EMUL: + break; + case KVM_DEV_RISCV_AIA_MODE_HWACCEL: + case KVM_DEV_RISCV_AIA_MODE_AUTO: + /* + * HW Acceleration and Auto modes only + * supported on host with non-zero guest + * external interrupts (i.e. non-zero + * VS-level IMSIC pages). + */ + if (!kvm_riscv_aia_nr_hgei) + return -EINVAL; + break; + default: + return -EINVAL; + }; + aia->mode = *nr; + } else + *nr = aia->mode; + break; + case KVM_DEV_RISCV_AIA_CONFIG_IDS: + if (write) { + if ((*nr < KVM_DEV_RISCV_AIA_IDS_MIN) || + (*nr >= KVM_DEV_RISCV_AIA_IDS_MAX) || + (*nr & KVM_DEV_RISCV_AIA_IDS_MIN) || + (kvm_riscv_aia_max_ids <= *nr)) + return -EINVAL; + aia->nr_ids = *nr; + } else + *nr = aia->nr_ids; + break; + case KVM_DEV_RISCV_AIA_CONFIG_SRCS: + if (write) { + if ((*nr >= KVM_DEV_RISCV_AIA_SRCS_MAX) || + (*nr >= kvm_riscv_aia_max_ids)) + return -EINVAL; + aia->nr_sources = *nr; + } else + *nr = aia->nr_sources; + break; + case KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS: + if (write) { + if (*nr >= KVM_DEV_RISCV_AIA_GROUP_BITS_MAX) + return -EINVAL; + aia->nr_group_bits = *nr; + } else + *nr = aia->nr_group_bits; + break; + case KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT: + if (write) { + if ((*nr < KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN) || + (*nr >= KVM_DEV_RISCV_AIA_GROUP_SHIFT_MAX)) + return -EINVAL; + aia->nr_group_shift = *nr; + } else + *nr = aia->nr_group_shift; + break; + case KVM_DEV_RISCV_AIA_CONFIG_HART_BITS: + if (write) { + if (*nr >= KVM_DEV_RISCV_AIA_HART_BITS_MAX) + return -EINVAL; + aia->nr_hart_bits = *nr; + } else + *nr = aia->nr_hart_bits; + break; + case KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS: + if (write) { + if (*nr >= KVM_DEV_RISCV_AIA_GUEST_BITS_MAX) + return -EINVAL; + aia->nr_guest_bits = *nr; + } else + *nr = aia->nr_guest_bits; + break; + default: + return -ENXIO; + }; + + return 0; +} + +static int aia_aplic_addr(struct kvm *kvm, u64 *addr, bool write) +{ + struct kvm_aia *aia = &kvm->arch.aia; + + if (write) { + /* Writes can only be done before irqchip is initialized */ + if (kvm_riscv_aia_initialized(kvm)) + return -EBUSY; + + if (*addr & (KVM_DEV_RISCV_APLIC_ALIGN - 1)) + return -EINVAL; + + aia->aplic_addr = *addr; + } else + *addr = aia->aplic_addr; + + return 0; +} + +static int aia_imsic_addr(struct kvm *kvm, u64 *addr, + unsigned long vcpu_idx, bool write) +{ + struct kvm_vcpu *vcpu; + struct kvm_vcpu_aia *vcpu_aia; + + vcpu = kvm_get_vcpu(kvm, vcpu_idx); + if (!vcpu) + return -EINVAL; + vcpu_aia = &vcpu->arch.aia_context; + + if (write) { + /* Writes can only be done before irqchip is initialized */ + if (kvm_riscv_aia_initialized(kvm)) + return -EBUSY; + + if (*addr & (KVM_DEV_RISCV_IMSIC_ALIGN - 1)) + return -EINVAL; + } + + mutex_lock(&vcpu->mutex); + if (write) + vcpu_aia->imsic_addr = *addr; + else + *addr = vcpu_aia->imsic_addr; + mutex_unlock(&vcpu->mutex); + + return 0; +} + +static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr) +{ + u32 h, l; + gpa_t mask = 0; + + h = aia->nr_hart_bits + aia->nr_guest_bits + + IMSIC_MMIO_PAGE_SHIFT - 1; + mask = GENMASK_ULL(h, 0); + + if (aia->nr_group_bits) { + h = aia->nr_group_bits + aia->nr_group_shift - 1; + l = aia->nr_group_shift; + mask |= GENMASK_ULL(h, l); + } + + return (addr & ~mask) >> IMSIC_MMIO_PAGE_SHIFT; +} + +static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr) +{ + u32 hart, group = 0; + + hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) & + GENMASK_ULL(aia->nr_hart_bits - 1, 0); + if (aia->nr_group_bits) + group = (addr >> aia->nr_group_shift) & + GENMASK_ULL(aia->nr_group_bits - 1, 0); + + return (group << aia->nr_hart_bits) | hart; +} + +static int aia_init(struct kvm *kvm) +{ + int ret, i; + unsigned long idx; + struct kvm_vcpu *vcpu; + struct kvm_vcpu_aia *vaia; + struct kvm_aia *aia = &kvm->arch.aia; + gpa_t base_ppn = KVM_RISCV_AIA_UNDEF_ADDR; + + /* Irqchip can be initialized only once */ + if (kvm_riscv_aia_initialized(kvm)) + return -EBUSY; + + /* We might be in the middle of creating a VCPU? */ + if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus)) + return -EBUSY; + + /* Number of sources should be less than or equals number of IDs */ + if (aia->nr_ids < aia->nr_sources) + return -EINVAL; + + /* APLIC base is required for non-zero number of sources */ + if (aia->nr_sources && aia->aplic_addr == KVM_RISCV_AIA_UNDEF_ADDR) + return -EINVAL; + + /* Initialize APLIC */ + ret = kvm_riscv_aia_aplic_init(kvm); + if (ret) + return ret; + + /* Iterate over each VCPU */ + kvm_for_each_vcpu(idx, vcpu, kvm) { + vaia = &vcpu->arch.aia_context; + + /* IMSIC base is required */ + if (vaia->imsic_addr == KVM_RISCV_AIA_UNDEF_ADDR) { + ret = -EINVAL; + goto fail_cleanup_imsics; + } + + /* All IMSICs should have matching base PPN */ + if (base_ppn == KVM_RISCV_AIA_UNDEF_ADDR) + base_ppn = aia_imsic_ppn(aia, vaia->imsic_addr); + if (base_ppn != aia_imsic_ppn(aia, vaia->imsic_addr)) { + ret = -EINVAL; + goto fail_cleanup_imsics; + } + + /* Update HART index of the IMSIC based on IMSIC base */ + vaia->hart_index = aia_imsic_hart_index(aia, + vaia->imsic_addr); + + /* Initialize IMSIC for this VCPU */ + ret = kvm_riscv_vcpu_aia_imsic_init(vcpu); + if (ret) + goto fail_cleanup_imsics; + } + + /* Set the initialized flag */ + kvm->arch.aia.initialized = true; + + return 0; + +fail_cleanup_imsics: + for (i = idx - 1; i >= 0; i--) { + vcpu = kvm_get_vcpu(kvm, i); + if (!vcpu) + continue; + kvm_riscv_vcpu_aia_imsic_cleanup(vcpu); + } + kvm_riscv_aia_aplic_cleanup(kvm); + return ret; +} + +static int aia_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + u32 nr; + u64 addr; + int nr_vcpus, r = -ENXIO; + unsigned long type = (unsigned long)attr->attr; + void __user *uaddr = (void __user *)(long)attr->addr; + + switch (attr->group) { + case KVM_DEV_RISCV_AIA_GRP_CONFIG: + if (copy_from_user(&nr, uaddr, sizeof(nr))) + return -EFAULT; + + mutex_lock(&dev->kvm->lock); + r = aia_config(dev->kvm, type, &nr, true); + mutex_unlock(&dev->kvm->lock); + + break; + + case KVM_DEV_RISCV_AIA_GRP_ADDR: + if (copy_from_user(&addr, uaddr, sizeof(addr))) + return -EFAULT; + + nr_vcpus = atomic_read(&dev->kvm->online_vcpus); + mutex_lock(&dev->kvm->lock); + if (type == KVM_DEV_RISCV_AIA_ADDR_APLIC) + r = aia_aplic_addr(dev->kvm, &addr, true); + else if (type < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus)) + r = aia_imsic_addr(dev->kvm, &addr, + type - KVM_DEV_RISCV_AIA_ADDR_IMSIC(0), true); + mutex_unlock(&dev->kvm->lock); + + break; + + case KVM_DEV_RISCV_AIA_GRP_CTRL: + switch (type) { + case KVM_DEV_RISCV_AIA_CTRL_INIT: + mutex_lock(&dev->kvm->lock); + r = aia_init(dev->kvm); + mutex_unlock(&dev->kvm->lock); + break; + } + + break; + } + + return r; +} + +static int aia_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + u32 nr; + u64 addr; + int nr_vcpus, r = -ENXIO; + void __user *uaddr = (void __user *)(long)attr->addr; + unsigned long type = (unsigned long)attr->attr; + + switch (attr->group) { + case KVM_DEV_RISCV_AIA_GRP_CONFIG: + if (copy_from_user(&nr, uaddr, sizeof(nr))) + return -EFAULT; + + mutex_lock(&dev->kvm->lock); + r = aia_config(dev->kvm, type, &nr, false); + mutex_unlock(&dev->kvm->lock); + if (r) + return r; + + if (copy_to_user(uaddr, &nr, sizeof(nr))) + return -EFAULT; + + break; + case KVM_DEV_RISCV_AIA_GRP_ADDR: + if (copy_from_user(&addr, uaddr, sizeof(addr))) + return -EFAULT; + + nr_vcpus = atomic_read(&dev->kvm->online_vcpus); + mutex_lock(&dev->kvm->lock); + if (type == KVM_DEV_RISCV_AIA_ADDR_APLIC) + r = aia_aplic_addr(dev->kvm, &addr, false); + else if (type < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus)) + r = aia_imsic_addr(dev->kvm, &addr, + type - KVM_DEV_RISCV_AIA_ADDR_IMSIC(0), false); + mutex_unlock(&dev->kvm->lock); + if (r) + return r; + + if (copy_to_user(uaddr, &addr, sizeof(addr))) + return -EFAULT; + + break; + } + + return r; +} + +static int aia_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + int nr_vcpus; + + switch (attr->group) { + case KVM_DEV_RISCV_AIA_GRP_CONFIG: + switch (attr->attr) { + case KVM_DEV_RISCV_AIA_CONFIG_MODE: + case KVM_DEV_RISCV_AIA_CONFIG_IDS: + case KVM_DEV_RISCV_AIA_CONFIG_SRCS: + case KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS: + case KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT: + case KVM_DEV_RISCV_AIA_CONFIG_HART_BITS: + case KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS: + return 0; + } + break; + case KVM_DEV_RISCV_AIA_GRP_ADDR: + nr_vcpus = atomic_read(&dev->kvm->online_vcpus); + if (attr->attr == KVM_DEV_RISCV_AIA_ADDR_APLIC) + return 0; + else if (attr->attr < KVM_DEV_RISCV_AIA_ADDR_IMSIC(nr_vcpus)) + return 0; + break; + case KVM_DEV_RISCV_AIA_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_RISCV_AIA_CTRL_INIT: + return 0; + } + break; + } + + return -ENXIO; +} + +struct kvm_device_ops kvm_riscv_aia_device_ops = { + .name = "kvm-riscv-aia", + .create = aia_create, + .destroy = aia_destroy, + .set_attr = aia_set_attr, + .get_attr = aia_get_attr, + .has_attr = aia_has_attr, +}; + +int kvm_riscv_vcpu_aia_update(struct kvm_vcpu *vcpu) +{ + /* Proceed only if AIA was initialized successfully */ + if (!kvm_riscv_aia_initialized(vcpu->kvm)) + return 1; + + /* Update the IMSIC HW state before entering guest mode */ + return kvm_riscv_vcpu_aia_imsic_update(vcpu); +} + +void kvm_riscv_vcpu_aia_reset(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr; + struct kvm_vcpu_aia_csr *reset_csr = + &vcpu->arch.aia_context.guest_reset_csr; + + if (!kvm_riscv_aia_available()) + return; + memcpy(csr, reset_csr, sizeof(*csr)); + + /* Proceed only if AIA was initialized successfully */ + if (!kvm_riscv_aia_initialized(vcpu->kvm)) + return; + + /* Reset the IMSIC context */ + kvm_riscv_vcpu_aia_imsic_reset(vcpu); +} + +int kvm_riscv_vcpu_aia_init(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_aia *vaia = &vcpu->arch.aia_context; + + if (!kvm_riscv_aia_available()) + return 0; + + /* + * We don't do any memory allocations over here because these + * will be done after AIA device is initialized by the user-space. + * + * Refer, aia_init() implementation for more details. + */ + + /* Initialize default values in AIA vcpu context */ + vaia->imsic_addr = KVM_RISCV_AIA_UNDEF_ADDR; + vaia->hart_index = vcpu->vcpu_idx; + + return 0; +} + +void kvm_riscv_vcpu_aia_deinit(struct kvm_vcpu *vcpu) +{ + /* Proceed only if AIA was initialized successfully */ + if (!kvm_riscv_aia_initialized(vcpu->kvm)) + return; + + /* Cleanup IMSIC context */ + kvm_riscv_vcpu_aia_imsic_cleanup(vcpu); +} + +int kvm_riscv_aia_inject_msi_by_id(struct kvm *kvm, u32 hart_index, + u32 guest_index, u32 iid) +{ + unsigned long idx; + struct kvm_vcpu *vcpu; + + /* Proceed only if AIA was initialized successfully */ + if (!kvm_riscv_aia_initialized(kvm)) + return -EBUSY; + + /* Inject MSI to matching VCPU */ + kvm_for_each_vcpu(idx, vcpu, kvm) { + if (vcpu->arch.aia_context.hart_index == hart_index) + return kvm_riscv_vcpu_aia_imsic_inject(vcpu, + guest_index, + 0, iid); + } + + return 0; +} + +int kvm_riscv_aia_inject_msi(struct kvm *kvm, struct kvm_msi *msi) +{ + gpa_t tppn, ippn; + unsigned long idx; + struct kvm_vcpu *vcpu; + u32 g, toff, iid = msi->data; + struct kvm_aia *aia = &kvm->arch.aia; + gpa_t target = (((gpa_t)msi->address_hi) << 32) | msi->address_lo; + + /* Proceed only if AIA was initialized successfully */ + if (!kvm_riscv_aia_initialized(kvm)) + return -EBUSY; + + /* Convert target address to target PPN */ + tppn = target >> IMSIC_MMIO_PAGE_SHIFT; + + /* Extract and clear Guest ID from target PPN */ + g = tppn & (BIT(aia->nr_guest_bits) - 1); + tppn &= ~((gpa_t)(BIT(aia->nr_guest_bits) - 1)); + + /* Inject MSI to matching VCPU */ + kvm_for_each_vcpu(idx, vcpu, kvm) { + ippn = vcpu->arch.aia_context.imsic_addr >> + IMSIC_MMIO_PAGE_SHIFT; + if (ippn == tppn) { + toff = target & (IMSIC_MMIO_PAGE_SZ - 1); + return kvm_riscv_vcpu_aia_imsic_inject(vcpu, g, + toff, iid); + } + } + + return 0; +} + +int kvm_riscv_aia_inject_irq(struct kvm *kvm, unsigned int irq, bool level) +{ + /* Proceed only if AIA was initialized successfully */ + if (!kvm_riscv_aia_initialized(kvm)) + return -EBUSY; + + /* Inject interrupt level change in APLIC */ + return kvm_riscv_aia_aplic_inject(kvm, irq, level); +} + +void kvm_riscv_aia_init_vm(struct kvm *kvm) +{ + struct kvm_aia *aia = &kvm->arch.aia; + + if (!kvm_riscv_aia_available()) + return; + + /* + * We don't do any memory allocations over here because these + * will be done after AIA device is initialized by the user-space. + * + * Refer, aia_init() implementation for more details. + */ + + /* Initialize default values in AIA global context */ + aia->mode = (kvm_riscv_aia_nr_hgei) ? + KVM_DEV_RISCV_AIA_MODE_AUTO : KVM_DEV_RISCV_AIA_MODE_EMUL; + aia->nr_ids = kvm_riscv_aia_max_ids - 1; + aia->nr_sources = 0; + aia->nr_group_bits = 0; + aia->nr_group_shift = KVM_DEV_RISCV_AIA_GROUP_SHIFT_MIN; + aia->nr_hart_bits = 0; + aia->nr_guest_bits = 0; + aia->aplic_addr = KVM_RISCV_AIA_UNDEF_ADDR; +} + +void kvm_riscv_aia_destroy_vm(struct kvm *kvm) +{ + /* Proceed only if AIA was initialized successfully */ + if (!kvm_riscv_aia_initialized(kvm)) + return; + + /* Cleanup APLIC context */ + kvm_riscv_aia_aplic_cleanup(kvm); +} diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 737318b1c1d9..27ccd07898e1 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1442,6 +1442,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_ARM_PV_TIME, #define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME + KVM_DEV_TYPE_RISCV_AIA, +#define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_MAX, };
We implement KVM device interface for in-kernel AIA irqchip so that user-space can use KVM device ioctls to create, configure, and destroy in-kernel AIA irqchip. Signed-off-by: Anup Patel <apatel@ventanamicro.com> --- arch/riscv/include/asm/kvm_aia.h | 132 +++++-- arch/riscv/include/uapi/asm/kvm.h | 45 +++ arch/riscv/kvm/Makefile | 1 + arch/riscv/kvm/aia.c | 11 + arch/riscv/kvm/aia_device.c | 622 ++++++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 2 + 6 files changed, 771 insertions(+), 42 deletions(-) create mode 100644 arch/riscv/kvm/aia_device.c