Message ID | 20240213094114.3961683-1-oliver.upton@linux.dev (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: arm64: Improvements to LPI injection | expand |
On Tue, 13 Feb 2024 09:41:14 +0000, Oliver Upton <oliver.upton@linux.dev> wrote: > > A prerequisite of testing LPI injection performance is of course > instantiating an ITS for the guest. Add a small library for creating an > ITS and interacting with it *from userspace*. > > Yep, you read that right. KVM unintentionally allows userspace to send > commands to the virtual ITS via the command queue. Besides adding test > coverage for an elusive UAPI, interacting with the ITS in userspace > simplifies the handling of commands that need to allocate memory, like a > MAPD command with an ITT. I don't mean to derail the party, but I really think we should plug this hole. Either that, or we make it an official interface for state restore. And don't we all love to have multiple interfaces to do the same thing? > > Signed-off-by: Oliver Upton <oliver.upton@linux.dev> > --- > .../selftests/kvm/include/aarch64/gic.h | 7 +- > .../selftests/kvm/include/aarch64/vgic.h | 20 ++ > .../testing/selftests/kvm/lib/aarch64/vgic.c | 241 ++++++++++++++++++ > 3 files changed, 267 insertions(+), 1 deletion(-) > > diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/aarch64/gic.h > index 16d944486e9c..abb41d67880c 100644 > --- a/tools/testing/selftests/kvm/include/aarch64/gic.h > +++ b/tools/testing/selftests/kvm/include/aarch64/gic.h > @@ -11,7 +11,12 @@ enum gic_type { > GIC_TYPE_MAX, > }; > > -#define GICD_BASE_GPA 0x8000000ULL > +/* > + * Note that the redistributor frames are at the end, as the range scales > + * with the number of vCPUs in the VM. > + */ > +#define GITS_BASE_GPA 0x8000000ULL > +#define GICD_BASE_GPA (GITS_BASE_GPA + SZ_128K) > #define GICR_BASE_GPA (GICD_BASE_GPA + SZ_64K) > > /* The GIC is identity-mapped into the guest at the time of setup. */ > diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h > index ce19aa0a8360..d45b2902439d 100644 > --- a/tools/testing/selftests/kvm/include/aarch64/vgic.h > +++ b/tools/testing/selftests/kvm/include/aarch64/vgic.h > @@ -32,4 +32,24 @@ void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu); > > #define KVM_IRQCHIP_NUM_PINS (1020 - 32) > > +struct vgic_its { > + int its_fd; > + void *cmdq_hva; > + size_t cmdq_size; > +}; > + > +struct vgic_its *vgic_its_setup(struct kvm_vm *vm, > + vm_paddr_t coll_tbl, size_t coll_tbl_sz, > + vm_paddr_t device_tbl, size_t device_tbl_sz, > + vm_paddr_t cmdq, size_t cmdq_size); > +void vgic_its_destroy(struct vgic_its *its); > + > +void vgic_its_send_mapd_cmd(struct vgic_its *its, u32 device_id, > + vm_paddr_t itt_base, size_t itt_size, bool valid); > +void vgic_its_send_mapc_cmd(struct vgic_its *its, struct kvm_vcpu *vcpu, > + u32 collection_id, bool valid); > +void vgic_its_send_mapti_cmd(struct vgic_its *its, u32 device_id, > + u32 event_id, u32 collection_id, u32 intid); > +void vgic_its_send_invall_cmd(struct vgic_its *its, u32 collection_id); > + > #endif // SELFTEST_KVM_VGIC_H > diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c > index ac55b6c2e915..fc7b4fbe6453 100644 > --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c > +++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c > @@ -12,6 +12,7 @@ > #include "vgic.h" > #include "gic.h" > #include "gic_v3.h" > +#include "processor.h" > > /* > * vGIC-v3 default host setup > @@ -166,3 +167,243 @@ void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu) > { > vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER); > } > + > +static u64 vgic_its_read_reg(int its_fd, unsigned long offset) > +{ > + u64 attr; > + > + kvm_device_attr_get(its_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS, > + offset, &attr); > + return attr; > +} > + > +static void vgic_its_write_reg(int its_fd, unsigned long offset, u64 val) > +{ > + kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS, > + offset, &val); > +} > + > +static unsigned long vgic_its_find_baser(int its_fd, unsigned int type) > +{ > + int i; > + > + for (i = 0; i < GITS_BASER_NR_REGS; i++) { > + u64 baser; > + unsigned long offset = GITS_BASER + (i * sizeof(baser)); > + > + baser = vgic_its_read_reg(its_fd, offset); > + if (GITS_BASER_TYPE(baser) == type) > + return offset; > + } > + > + TEST_FAIL("Couldn't find an ITS BASER of type %u", type); > + return -1; > +} > + > +static void vgic_its_install_table(int its_fd, unsigned int type, vm_paddr_t base, > + size_t size) > +{ > + unsigned long offset = vgic_its_find_baser(its_fd, type); > + u64 baser; > + > + baser = ((size / SZ_64K) - 1) | > + GITS_BASER_PAGE_SIZE_64K | > + GITS_BASER_InnerShareable | > + base | > + GITS_BASER_RaWaWb | > + GITS_BASER_VALID; > + > + vgic_its_write_reg(its_fd, offset, baser); > +} > + > +static void vgic_its_install_cmdq(int its_fd, vm_paddr_t base, size_t size) > +{ > + u64 cbaser; > + > + cbaser = ((size / SZ_4K) - 1) | > + GITS_CBASER_InnerShareable | > + base | > + GITS_CBASER_RaWaWb | > + GITS_CBASER_VALID; > + > + vgic_its_write_reg(its_fd, GITS_CBASER, cbaser); > +} > + > +struct vgic_its *vgic_its_setup(struct kvm_vm *vm, > + vm_paddr_t coll_tbl, size_t coll_tbl_sz, > + vm_paddr_t device_tbl, size_t device_tbl_sz, > + vm_paddr_t cmdq, size_t cmdq_size) > +{ > + int its_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_ITS); > + struct vgic_its *its = malloc(sizeof(struct vgic_its)); > + u64 attr, ctlr; > + > + attr = GITS_BASE_GPA; > + kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, > + KVM_VGIC_ITS_ADDR_TYPE, &attr); > + > + kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, > + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); > + > + vgic_its_install_table(its_fd, GITS_BASER_TYPE_COLLECTION, coll_tbl, > + coll_tbl_sz); > + vgic_its_install_table(its_fd, GITS_BASER_TYPE_DEVICE, device_tbl, > + device_tbl_sz); > + > + vgic_its_install_cmdq(its_fd, cmdq, cmdq_size); > + > + ctlr = vgic_its_read_reg(its_fd, GITS_CTLR); > + ctlr |= GITS_CTLR_ENABLE; > + vgic_its_write_reg(its_fd, GITS_CTLR, ctlr); > + > + *its = (struct vgic_its) { > + .its_fd = its_fd, > + .cmdq_hva = addr_gpa2hva(vm, cmdq), > + .cmdq_size = cmdq_size, > + }; > + > + return its; > +} > + > +void vgic_its_destroy(struct vgic_its *its) > +{ > + close(its->its_fd); > + free(its); > +} > + > +struct its_cmd_block { > + union { > + u64 raw_cmd[4]; > + __le64 raw_cmd_le[4]; > + }; > +}; > + > +static inline void its_fixup_cmd(struct its_cmd_block *cmd) > +{ > + /* Let's fixup BE commands */ > + cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]); > + cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]); > + cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]); > + cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]); > +} > + > +static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l) > +{ > + u64 mask = GENMASK_ULL(h, l); > + *raw_cmd &= ~mask; > + *raw_cmd |= (val << l) & mask; > +} > + > +static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr) > +{ > + its_mask_encode(&cmd->raw_cmd[0], cmd_nr, 7, 0); > +} > + > +static void its_encode_devid(struct its_cmd_block *cmd, u32 devid) > +{ > + its_mask_encode(&cmd->raw_cmd[0], devid, 63, 32); > +} > + > +static void its_encode_event_id(struct its_cmd_block *cmd, u32 id) > +{ > + its_mask_encode(&cmd->raw_cmd[1], id, 31, 0); > +} > + > +static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id) > +{ > + its_mask_encode(&cmd->raw_cmd[1], phys_id, 63, 32); > +} > + > +static void its_encode_size(struct its_cmd_block *cmd, u8 size) > +{ > + its_mask_encode(&cmd->raw_cmd[1], size, 4, 0); > +} > + > +static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr) > +{ > + its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8); > +} > + > +static void its_encode_valid(struct its_cmd_block *cmd, int valid) > +{ > + its_mask_encode(&cmd->raw_cmd[2], !!valid, 63, 63); > +} > + > +static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr) > +{ > + its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16); > +} > + > +static void its_encode_collection(struct its_cmd_block *cmd, u16 col) > +{ > + its_mask_encode(&cmd->raw_cmd[2], col, 15, 0); > +} > + > +static void vgic_its_send_cmd(struct vgic_its *its, struct its_cmd_block *cmd) > +{ > + u64 cwriter = vgic_its_read_reg(its->its_fd, GITS_CWRITER); > + struct its_cmd_block *dst = its->cmdq_hva + cwriter; > + u64 next; > + > + its_fixup_cmd(cmd); > + > + WRITE_ONCE(*dst, *cmd); > + dsb(ishst); > + > + next = (cwriter + sizeof(*cmd)) % its->cmdq_size; > + vgic_its_write_reg(its->its_fd, GITS_CWRITER, next); > + > + TEST_ASSERT(vgic_its_read_reg(its->its_fd, GITS_CREADR) == next, > + "ITS didn't process command at offset: %lu\n", cwriter); > +} > + > +void vgic_its_send_mapd_cmd(struct vgic_its *its, u32 device_id, > + vm_paddr_t itt_base, size_t itt_size, bool valid) > +{ > + struct its_cmd_block cmd = {}; > + > + its_encode_cmd(&cmd, GITS_CMD_MAPD); > + its_encode_devid(&cmd, device_id); > + its_encode_size(&cmd, ilog2(itt_size) - 1); > + its_encode_itt(&cmd, itt_base); > + its_encode_valid(&cmd, valid); > + > + vgic_its_send_cmd(its, &cmd); > +} > + > +void vgic_its_send_mapc_cmd(struct vgic_its *its, struct kvm_vcpu *vcpu, > + u32 collection_id, bool valid) > +{ > + struct its_cmd_block cmd = {}; > + > + its_encode_cmd(&cmd, GITS_CMD_MAPC); > + its_encode_collection(&cmd, collection_id); > + its_encode_target(&cmd, vcpu->id); > + its_encode_valid(&cmd, valid); > + > + vgic_its_send_cmd(its, &cmd); > +} > + > +void vgic_its_send_mapti_cmd(struct vgic_its *its, u32 device_id, > + u32 event_id, u32 collection_id, u32 intid) > +{ > + struct its_cmd_block cmd = {}; > + > + its_encode_cmd(&cmd, GITS_CMD_MAPTI); > + its_encode_devid(&cmd, device_id); > + its_encode_event_id(&cmd, event_id); > + its_encode_phys_id(&cmd, intid); > + its_encode_collection(&cmd, collection_id); > + > + vgic_its_send_cmd(its, &cmd); > +} > + > +void vgic_its_send_invall_cmd(struct vgic_its *its, u32 collection_id) > +{ > + struct its_cmd_block cmd = {}; > + > + its_encode_cmd(&cmd, GITS_CMD_INVALL); > + its_encode_collection(&cmd, collection_id); > + > + vgic_its_send_cmd(its, &cmd); > +} Holy crap, that's a whole ITS driver in loserspace. *mindblown*. M.
On Wed, Feb 14, 2024 at 05:32:25PM +0000, Marc Zyngier wrote: > On Tue, 13 Feb 2024 09:41:14 +0000, > Oliver Upton <oliver.upton@linux.dev> wrote: > > > > A prerequisite of testing LPI injection performance is of course > > instantiating an ITS for the guest. Add a small library for creating an > > ITS and interacting with it *from userspace*. > > > > Yep, you read that right. KVM unintentionally allows userspace to send > > commands to the virtual ITS via the command queue. Besides adding test > > coverage for an elusive UAPI, interacting with the ITS in userspace > > simplifies the handling of commands that need to allocate memory, like a > > MAPD command with an ITT. > > I don't mean to derail the party, but I really think we should plug > this hole. Either that, or we make it an official interface for state > restore. And don't we all love to have multiple interfaces to do the > same thing? Ok, I've thought about it a bit more and I'm fully convinced we need to shut the door on this stupidity. We expect CREADR == CWRITER at the time userspace saves the ITS registers, but we have a *hideous* ordering issue on the restore path. If the order of restore from userspace is CBASER, CWRITER, CREADR then we **wind up replaying the entire command queue**. While insane, I'm pretty sure it is legal for the guest to write garbage after the read pointer has moved past a particular command index. Fsck!!! So, how about we do this: - Provide a uaccess hook for CWRITER that changes the write-pointer without processing any commands - Assert an invariant that at any time CWRITER or CREADR are read from userspace that CREADR == CWRITER. Fail the ioctl and scream if that isn't the case, so that way we never need to worry about processing 'in-flight' commands at the destination.
On Wed, 14 Feb 2024 19:00:00 +0000, Oliver Upton <oliver.upton@linux.dev> wrote: > > On Wed, Feb 14, 2024 at 05:32:25PM +0000, Marc Zyngier wrote: > > On Tue, 13 Feb 2024 09:41:14 +0000, > > Oliver Upton <oliver.upton@linux.dev> wrote: > > > > > > A prerequisite of testing LPI injection performance is of course > > > instantiating an ITS for the guest. Add a small library for creating an > > > ITS and interacting with it *from userspace*. > > > > > > Yep, you read that right. KVM unintentionally allows userspace to send > > > commands to the virtual ITS via the command queue. Besides adding test > > > coverage for an elusive UAPI, interacting with the ITS in userspace > > > simplifies the handling of commands that need to allocate memory, like a > > > MAPD command with an ITT. > > > > I don't mean to derail the party, but I really think we should plug > > this hole. Either that, or we make it an official interface for state > > restore. And don't we all love to have multiple interfaces to do the > > same thing? > > Ok, I've thought about it a bit more and I'm fully convinced we need to > shut the door on this stupidity. > > We expect CREADR == CWRITER at the time userspace saves the ITS > registers, but we have a *hideous* ordering issue on the restore path. > > If the order of restore from userspace is CBASER, CWRITER, CREADR then > we **wind up replaying the entire command queue**. While insane, I'm > pretty sure it is legal for the guest to write garbage after the read > pointer has moved past a particular command index. > > Fsck!!! This is documented Documentation/virt/kvm/devices/arm-vgic-its.rst to some extent, and it is allowed for the guest to crap itself on behalf of userspace if the ordering isn't respected. > So, how about we do this: > > - Provide a uaccess hook for CWRITER that changes the write-pointer > without processing any commands > > - Assert an invariant that at any time CWRITER or CREADR are read from > userspace that CREADR == CWRITER. Fail the ioctl and scream if that > isn't the case, so that way we never need to worry about processing > 'in-flight' commands at the destination. Are we guaranteed that we cannot ever see CWRITER != CREADR at VM dumping time? I'm not convinced that we cannot preempt the vcpu thread at the right spot, specially given that you can have an arbitrary large batch of commands to execute. Just add a page-fault to the mix, and a signal pending. Pronto, you see a guest exit and you should be able to start dumping things without the ITS having processed much. I haven't tried, but that doesn't seem totally unlikely. M.
On Wed, Feb 14, 2024 at 08:09:52PM +0000, Marc Zyngier wrote: > > If the order of restore from userspace is CBASER, CWRITER, CREADR then > > we **wind up replaying the entire command queue**. While insane, I'm > > pretty sure it is legal for the guest to write garbage after the read > > pointer has moved past a particular command index. > > > > Fsck!!! > > This is documented Documentation/virt/kvm/devices/arm-vgic-its.rst to > some extent, and it is allowed for the guest to crap itself on behalf > of userspace if the ordering isn't respected. Ah, fair, I missed the documentation here. If we require userspace to write CTLR last then we _should_ be fine, but damn is this a tricky set of expectations. > > So, how about we do this: > > > > - Provide a uaccess hook for CWRITER that changes the write-pointer > > without processing any commands > > > > - Assert an invariant that at any time CWRITER or CREADR are read from > > userspace that CREADR == CWRITER. Fail the ioctl and scream if that > > isn't the case, so that way we never need to worry about processing > > 'in-flight' commands at the destination. > > Are we guaranteed that we cannot ever see CWRITER != CREADR at VM > dumping time? I'm not convinced that we cannot preempt the vcpu thread > at the right spot, specially given that you can have an arbitrary > large batch of commands to execute. > > Just add a page-fault to the mix, and a signal pending. Pronto, you > see a guest exit and you should be able to start dumping things > without the ITS having processed much. I haven't tried, but that > doesn't seem totally unlikely. Well, we would need to run all userspace reads and writes through the cmd_lock in this case, which is what we already do for the CREADR uaccess hook. To me the 'racy' queue accessors only make sense for guest accesses, since the driver is expecting to poll for completion in that case. Otherwise we decide the existing rules for restoring the ITS are fine and I get to keep my funky driver :)
On Wed, Feb 14, 2024 at 12:55:11PM -0800, Oliver Upton wrote: > On Wed, Feb 14, 2024 at 08:09:52PM +0000, Marc Zyngier wrote: > > > If the order of restore from userspace is CBASER, CWRITER, CREADR then > > > we **wind up replaying the entire command queue**. While insane, I'm > > > pretty sure it is legal for the guest to write garbage after the read > > > pointer has moved past a particular command index. > > > > > > Fsck!!! > > > > This is documented Documentation/virt/kvm/devices/arm-vgic-its.rst to > > some extent, and it is allowed for the guest to crap itself on behalf > > of userspace if the ordering isn't respected. > > Ah, fair, I missed the documentation here. If we require userspace to > write CTLR last then we _should_ be fine, but damn is this a tricky set > of expectations. > > > > So, how about we do this: > > > > > > - Provide a uaccess hook for CWRITER that changes the write-pointer > > > without processing any commands > > > > > > - Assert an invariant that at any time CWRITER or CREADR are read from > > > userspace that CREADR == CWRITER. Fail the ioctl and scream if that > > > isn't the case, so that way we never need to worry about processing > > > 'in-flight' commands at the destination. > > > > Are we guaranteed that we cannot ever see CWRITER != CREADR at VM > > dumping time? I'm not convinced that we cannot preempt the vcpu thread > > at the right spot, specially given that you can have an arbitrary > > large batch of commands to execute. > > > > Just add a page-fault to the mix, and a signal pending. Pronto, you > > see a guest exit and you should be able to start dumping things > > without the ITS having processed much. I haven't tried, but that > > doesn't seem totally unlikely. > > Well, we would need to run all userspace reads and writes through the > cmd_lock in this case, which is what we already do for the CREADR > uaccess hook. To me the 'racy' queue accessors only make sense for guest > accesses, since the driver is expecting to poll for completion in that > case. My proposed invariant cannot be maintained, of course, since userspace can do whatever it pleases on the cmdq pointers. > Otherwise we decide the existing rules for restoring the ITS are fine > and I get to keep my funky driver :) > > -- > Thanks, > Oliver >
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/aarch64/gic.h index 16d944486e9c..abb41d67880c 100644 --- a/tools/testing/selftests/kvm/include/aarch64/gic.h +++ b/tools/testing/selftests/kvm/include/aarch64/gic.h @@ -11,7 +11,12 @@ enum gic_type { GIC_TYPE_MAX, }; -#define GICD_BASE_GPA 0x8000000ULL +/* + * Note that the redistributor frames are at the end, as the range scales + * with the number of vCPUs in the VM. + */ +#define GITS_BASE_GPA 0x8000000ULL +#define GICD_BASE_GPA (GITS_BASE_GPA + SZ_128K) #define GICR_BASE_GPA (GICD_BASE_GPA + SZ_64K) /* The GIC is identity-mapped into the guest at the time of setup. */ diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h index ce19aa0a8360..d45b2902439d 100644 --- a/tools/testing/selftests/kvm/include/aarch64/vgic.h +++ b/tools/testing/selftests/kvm/include/aarch64/vgic.h @@ -32,4 +32,24 @@ void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu); #define KVM_IRQCHIP_NUM_PINS (1020 - 32) +struct vgic_its { + int its_fd; + void *cmdq_hva; + size_t cmdq_size; +}; + +struct vgic_its *vgic_its_setup(struct kvm_vm *vm, + vm_paddr_t coll_tbl, size_t coll_tbl_sz, + vm_paddr_t device_tbl, size_t device_tbl_sz, + vm_paddr_t cmdq, size_t cmdq_size); +void vgic_its_destroy(struct vgic_its *its); + +void vgic_its_send_mapd_cmd(struct vgic_its *its, u32 device_id, + vm_paddr_t itt_base, size_t itt_size, bool valid); +void vgic_its_send_mapc_cmd(struct vgic_its *its, struct kvm_vcpu *vcpu, + u32 collection_id, bool valid); +void vgic_its_send_mapti_cmd(struct vgic_its *its, u32 device_id, + u32 event_id, u32 collection_id, u32 intid); +void vgic_its_send_invall_cmd(struct vgic_its *its, u32 collection_id); + #endif // SELFTEST_KVM_VGIC_H diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c index ac55b6c2e915..fc7b4fbe6453 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c @@ -12,6 +12,7 @@ #include "vgic.h" #include "gic.h" #include "gic_v3.h" +#include "processor.h" /* * vGIC-v3 default host setup @@ -166,3 +167,243 @@ void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu) { vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER); } + +static u64 vgic_its_read_reg(int its_fd, unsigned long offset) +{ + u64 attr; + + kvm_device_attr_get(its_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS, + offset, &attr); + return attr; +} + +static void vgic_its_write_reg(int its_fd, unsigned long offset, u64 val) +{ + kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ITS_REGS, + offset, &val); +} + +static unsigned long vgic_its_find_baser(int its_fd, unsigned int type) +{ + int i; + + for (i = 0; i < GITS_BASER_NR_REGS; i++) { + u64 baser; + unsigned long offset = GITS_BASER + (i * sizeof(baser)); + + baser = vgic_its_read_reg(its_fd, offset); + if (GITS_BASER_TYPE(baser) == type) + return offset; + } + + TEST_FAIL("Couldn't find an ITS BASER of type %u", type); + return -1; +} + +static void vgic_its_install_table(int its_fd, unsigned int type, vm_paddr_t base, + size_t size) +{ + unsigned long offset = vgic_its_find_baser(its_fd, type); + u64 baser; + + baser = ((size / SZ_64K) - 1) | + GITS_BASER_PAGE_SIZE_64K | + GITS_BASER_InnerShareable | + base | + GITS_BASER_RaWaWb | + GITS_BASER_VALID; + + vgic_its_write_reg(its_fd, offset, baser); +} + +static void vgic_its_install_cmdq(int its_fd, vm_paddr_t base, size_t size) +{ + u64 cbaser; + + cbaser = ((size / SZ_4K) - 1) | + GITS_CBASER_InnerShareable | + base | + GITS_CBASER_RaWaWb | + GITS_CBASER_VALID; + + vgic_its_write_reg(its_fd, GITS_CBASER, cbaser); +} + +struct vgic_its *vgic_its_setup(struct kvm_vm *vm, + vm_paddr_t coll_tbl, size_t coll_tbl_sz, + vm_paddr_t device_tbl, size_t device_tbl_sz, + vm_paddr_t cmdq, size_t cmdq_size) +{ + int its_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_ITS); + struct vgic_its *its = malloc(sizeof(struct vgic_its)); + u64 attr, ctlr; + + attr = GITS_BASE_GPA; + kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_ITS_ADDR_TYPE, &attr); + + kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + vgic_its_install_table(its_fd, GITS_BASER_TYPE_COLLECTION, coll_tbl, + coll_tbl_sz); + vgic_its_install_table(its_fd, GITS_BASER_TYPE_DEVICE, device_tbl, + device_tbl_sz); + + vgic_its_install_cmdq(its_fd, cmdq, cmdq_size); + + ctlr = vgic_its_read_reg(its_fd, GITS_CTLR); + ctlr |= GITS_CTLR_ENABLE; + vgic_its_write_reg(its_fd, GITS_CTLR, ctlr); + + *its = (struct vgic_its) { + .its_fd = its_fd, + .cmdq_hva = addr_gpa2hva(vm, cmdq), + .cmdq_size = cmdq_size, + }; + + return its; +} + +void vgic_its_destroy(struct vgic_its *its) +{ + close(its->its_fd); + free(its); +} + +struct its_cmd_block { + union { + u64 raw_cmd[4]; + __le64 raw_cmd_le[4]; + }; +}; + +static inline void its_fixup_cmd(struct its_cmd_block *cmd) +{ + /* Let's fixup BE commands */ + cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]); + cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]); + cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]); + cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]); +} + +static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l) +{ + u64 mask = GENMASK_ULL(h, l); + *raw_cmd &= ~mask; + *raw_cmd |= (val << l) & mask; +} + +static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr) +{ + its_mask_encode(&cmd->raw_cmd[0], cmd_nr, 7, 0); +} + +static void its_encode_devid(struct its_cmd_block *cmd, u32 devid) +{ + its_mask_encode(&cmd->raw_cmd[0], devid, 63, 32); +} + +static void its_encode_event_id(struct its_cmd_block *cmd, u32 id) +{ + its_mask_encode(&cmd->raw_cmd[1], id, 31, 0); +} + +static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id) +{ + its_mask_encode(&cmd->raw_cmd[1], phys_id, 63, 32); +} + +static void its_encode_size(struct its_cmd_block *cmd, u8 size) +{ + its_mask_encode(&cmd->raw_cmd[1], size, 4, 0); +} + +static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr) +{ + its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8); +} + +static void its_encode_valid(struct its_cmd_block *cmd, int valid) +{ + its_mask_encode(&cmd->raw_cmd[2], !!valid, 63, 63); +} + +static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr) +{ + its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16); +} + +static void its_encode_collection(struct its_cmd_block *cmd, u16 col) +{ + its_mask_encode(&cmd->raw_cmd[2], col, 15, 0); +} + +static void vgic_its_send_cmd(struct vgic_its *its, struct its_cmd_block *cmd) +{ + u64 cwriter = vgic_its_read_reg(its->its_fd, GITS_CWRITER); + struct its_cmd_block *dst = its->cmdq_hva + cwriter; + u64 next; + + its_fixup_cmd(cmd); + + WRITE_ONCE(*dst, *cmd); + dsb(ishst); + + next = (cwriter + sizeof(*cmd)) % its->cmdq_size; + vgic_its_write_reg(its->its_fd, GITS_CWRITER, next); + + TEST_ASSERT(vgic_its_read_reg(its->its_fd, GITS_CREADR) == next, + "ITS didn't process command at offset: %lu\n", cwriter); +} + +void vgic_its_send_mapd_cmd(struct vgic_its *its, u32 device_id, + vm_paddr_t itt_base, size_t itt_size, bool valid) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_MAPD); + its_encode_devid(&cmd, device_id); + its_encode_size(&cmd, ilog2(itt_size) - 1); + its_encode_itt(&cmd, itt_base); + its_encode_valid(&cmd, valid); + + vgic_its_send_cmd(its, &cmd); +} + +void vgic_its_send_mapc_cmd(struct vgic_its *its, struct kvm_vcpu *vcpu, + u32 collection_id, bool valid) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_MAPC); + its_encode_collection(&cmd, collection_id); + its_encode_target(&cmd, vcpu->id); + its_encode_valid(&cmd, valid); + + vgic_its_send_cmd(its, &cmd); +} + +void vgic_its_send_mapti_cmd(struct vgic_its *its, u32 device_id, + u32 event_id, u32 collection_id, u32 intid) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_MAPTI); + its_encode_devid(&cmd, device_id); + its_encode_event_id(&cmd, event_id); + its_encode_phys_id(&cmd, intid); + its_encode_collection(&cmd, collection_id); + + vgic_its_send_cmd(its, &cmd); +} + +void vgic_its_send_invall_cmd(struct vgic_its *its, u32 collection_id) +{ + struct its_cmd_block cmd = {}; + + its_encode_cmd(&cmd, GITS_CMD_INVALL); + its_encode_collection(&cmd, collection_id); + + vgic_its_send_cmd(its, &cmd); +}
A prerequisite of testing LPI injection performance is of course instantiating an ITS for the guest. Add a small library for creating an ITS and interacting with it *from userspace*. Yep, you read that right. KVM unintentionally allows userspace to send commands to the virtual ITS via the command queue. Besides adding test coverage for an elusive UAPI, interacting with the ITS in userspace simplifies the handling of commands that need to allocate memory, like a MAPD command with an ITT. Signed-off-by: Oliver Upton <oliver.upton@linux.dev> --- .../selftests/kvm/include/aarch64/gic.h | 7 +- .../selftests/kvm/include/aarch64/vgic.h | 20 ++ .../testing/selftests/kvm/lib/aarch64/vgic.c | 241 ++++++++++++++++++ 3 files changed, 267 insertions(+), 1 deletion(-)