@@ -367,6 +367,11 @@ struct kvm_arm_counter_offset {
#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3
#define KVM_DEV_ARM_ITS_CTRL_RESET 4
+/*
+ * Flags for KVM_DEV_ARM_ITS_{SAVE,RESTORE}_TABLES
+ */
+#define KVM_DEV_ARM_ITS_ITT_UBUF (1ULL << 0)
+
#define KVM_DEV_ARM_VGIC_NR_IRQS_SHIFT 12
#define KVM_DEV_ARM_VGIC_NR_IRQS_MASK \
((1 << KVM_DEV_ARM_VGIC_NR_IRQS_SHIFT) - 1)
@@ -2225,6 +2225,62 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz,
return 1;
}
+static int vgic_its_ubuf_append_entry(struct vgic_its *its, u64 entry)
+{
+ entry = cpu_to_le64(entry);
+
+ if (its->itt_ubuf.slot_next > its->itt_ubuf.slot_max)
+ return -ENOMEM;
+
+ if (copy_to_user(&its->itt_ubuf.ubuf[its->itt_ubuf.slot_next], &entry, sizeof(entry)))
+ return -EFAULT;
+
+ its->itt_ubuf.slot_next++;
+ return 0;
+}
+
+static int vgic_its_save_itt_ubuf(struct vgic_its *its, struct its_device *device) {
+ int ret = 0;
+ u64 val;
+ struct its_ite *ite;
+
+ /*
+ * Write the start marker. Here we abuse the ITS Table ABI REV0. A
+ * valid physical LPI has an ID of 8192. We can use numbers lower than
+ * that for different types of entries such as ITT start/end markers.
+ * The high 16-bits of the entry contain the device ID.
+ */
+ val = ((u64)device->device_id << KVM_ITS_ITE_NEXT_SHIFT) |
+ ((u64)KVM_ITS_ITT_START_MARKER << KVM_ITS_ITE_PINTID_SHIFT);
+ if ((ret = vgic_its_ubuf_append_entry(its, val)))
+ return ret;
+
+ list_for_each_entry(ite, &device->itt_head, ite_list) {
+ /*
+ * If an LPI carries the HW bit, this means that this
+ * interrupt is controlled by GICv4, and we do not
+ * have direct access to that state without GICv4.1.
+ * Let's simply fail the save operation...
+ */
+ if (ite->irq->hw && !kvm_vgic_global_state.has_gicv4_1)
+ return -EACCES;
+
+ val = ((u64)ite->event_id << KVM_ITS_ITE_NEXT_SHIFT) |
+ ((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) |
+ ite->collection->collection_id;
+ if ((ret = vgic_its_ubuf_append_entry(its, val)))
+ return ret;
+ }
+
+ /* Write the end marker */
+ val = ((u64)device->device_id << KVM_ITS_ITE_NEXT_SHIFT) |
+ ((u64)KVM_ITS_ITT_END_MARKER << KVM_ITS_ITE_PINTID_SHIFT);
+ if ((ret = vgic_its_ubuf_append_entry(its, val)))
+ return ret;
+
+ return ret;
+}
+
/**
* vgic_its_save_ite - Save an interrupt translation entry at @gpa
*/
@@ -2327,6 +2383,9 @@ static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device)
list_sort(NULL, &device->itt_head, vgic_its_ite_cmp);
+ if (its->itt_ubuf.must_be_used)
+ return vgic_its_save_itt_ubuf(its, device);
+
list_for_each_entry(ite, &device->itt_head, ite_list) {
gpa_t gpa = base + ite->event_id * ite_esz;
@@ -2494,10 +2553,12 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
if (IS_ERR(dev))
return PTR_ERR(dev);
- ret = vgic_its_restore_itt(its, dev);
- if (ret) {
- vgic_its_free_device(its->dev->kvm, its, dev, false);
- return ret;
+ if (!its->itt_ubuf.must_be_used) {
+ ret = vgic_its_restore_itt(its, dev);
+ if (ret) {
+ vgic_its_free_device(its->dev->kvm, its, dev, false);
+ return ret;
+ }
}
return offset;
@@ -2776,6 +2837,112 @@ static int vgic_its_save_tables_v0(struct vgic_its *its)
return vgic_its_save_collection_table(its);
}
+static int vgic_its_ubuf_pop_entry(struct vgic_its *its, u64 *entry)
+{
+ if (!entry)
+ return -EINVAL;
+
+ if (its->itt_ubuf.slot_next > its->itt_ubuf.slot_max)
+ return -ENOMEM;
+
+ if (copy_from_user(entry, &its->itt_ubuf.ubuf[its->itt_ubuf.slot_next], sizeof(*entry)))
+ return -EFAULT;
+
+ its->itt_ubuf.slot_next++;
+
+ *entry = le64_to_cpu(*entry);
+
+ return 0;
+}
+
+static int vgic_its_restore_itt_ubuf(struct vgic_its *its, struct its_device *device)
+{
+ u64 entry, device_id, type, event_id;
+ bool found_end = false;
+ int ret;
+
+ /* Confirm there is a start marker matching the device ID */
+ ret = vgic_its_ubuf_pop_entry(its, &entry);
+ if (ret)
+ return ret;
+
+ /*
+ * See the comment in vgic_its_save_itt_ubuf() explaining how the ITS
+ * Table ABI REV0 is abused.
+ */
+ device_id = entry >> KVM_ITS_ITE_NEXT_SHIFT;
+ type = (entry & KVM_ITS_ITE_PINTID_MASK) >> KVM_ITS_ITE_PINTID_SHIFT;
+
+ if (type != KVM_ITS_ITT_START_MARKER) {
+ printk(KERN_WARNING "Failed to restore vGIC interrupt translation entry: did not find start marker (device_id=%u)",
+ device->device_id);
+ return -EBADF;
+ }
+ if (device_id != device->device_id) {
+ printk(KERN_WARNING "Failed to restore vGIC interrupt translation entry: found start marker for device_id=%llu instead of device_id=%u",
+ device_id, device->device_id);
+ return -ENODEV;
+ }
+
+ while (its->itt_ubuf.slot_next <= its->itt_ubuf.slot_max) {
+ ret = vgic_its_ubuf_pop_entry(its, &entry);
+ if (ret)
+ return ret;
+
+ /*
+ * Is this an ITE or is it an end marker?
+ */
+ type = (entry & KVM_ITS_ITE_PINTID_MASK) >> KVM_ITS_ITE_PINTID_SHIFT;
+ if (type == KVM_ITS_ITT_END_MARKER) {
+ found_end = true;
+ device_id = entry >> KVM_ITS_ITE_NEXT_SHIFT;
+ if (device_id != device->device_id)
+ return -ENODEV;
+ break;
+ }
+
+ event_id = entry >> KVM_ITS_ITE_NEXT_SHIFT;
+ /*
+ * Set the 'next' field of the entry to 0 which is a valid
+ * value for vgic_its_restore_ite().
+ */
+ entry &= ~KVM_ITS_ITE_NEXT_MASK;
+ ret = vgic_its_restore_ite(its, event_id, &entry, device);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (!found_end)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int vgic_its_restore_itt_all_ubuf(struct vgic_its *its)
+{
+ int ret;
+ struct its_device *dev;
+
+ /*
+ * The list is sorted in vgic_its_save_device_tables() before
+ * serialization, therefore we expect the ITTs to be sorted in the blob.
+ */
+ list_sort(NULL, &its->device_list, vgic_its_device_cmp);
+
+ list_for_each_entry(dev, &its->device_list, dev_list) {
+ ret = vgic_its_restore_itt_ubuf(its, dev);
+ if (ret)
+ break;
+ }
+
+ if (ret) {
+ vgic_its_free_device(its->dev->kvm, its, dev, false);
+ return ret;
+ }
+
+ return 0;
+}
+
/**
* vgic_its_restore_tables_v0 - Restore the ITS tables from guest RAM
* to internal data structs according to V0 ABI
@@ -2789,7 +2956,14 @@ static int vgic_its_restore_tables_v0(struct vgic_its *its)
if (ret)
return ret;
- return vgic_its_restore_device_tables(its);
+ ret = vgic_its_restore_device_tables(its);
+ if (ret)
+ return ret;
+
+ if (its->itt_ubuf.must_be_used)
+ return vgic_its_restore_itt_all_ubuf(its);
+
+ return 0;
}
static int vgic_its_commit_v0(struct vgic_its *its)
@@ -2860,7 +3034,11 @@ static int vgic_its_ctlr(struct kvm_device *dev,
struct kvm *kvm = dev->kvm;
const struct vgic_its_abi *abi = vgic_its_get_abi(its);
int ret = 0;
- u64 attrval = attr->attr;
+ /*
+ * The low 32 bits are used for the attribute, whereas the high 32 bits
+ * have a special meaning for KVM_DEV_ARM_ITS_{SAVE,RESTORE}_TABLES
+ */
+ u64 attrval = attr->attr & 0xffffffff;
bool need_itslock = true;
switch (attrval) {
@@ -2886,6 +3064,29 @@ static int vgic_its_ctlr(struct kvm_device *dev,
return -EBUSY;
}
+ if (attrval == KVM_DEV_ARM_ITS_SAVE_TABLES || attrval == KVM_DEV_ARM_ITS_RESTORE_TABLES) {
+ if (attr->flags & KVM_DEV_ARM_ITS_ITT_UBUF) {
+ u32 buf_size = attr->attr >> 32;
+ u32 num_slots = buf_size / sizeof(u64);
+ if (num_slots == 0)
+ return -ENOSPC;
+
+ its->itt_ubuf.must_be_used = true;
+ its->itt_ubuf.ubuf = (u64 *)attr->addr;
+ its->itt_ubuf.slot_next = 0;
+ its->itt_ubuf.slot_max = num_slots - 1;
+
+ if (attrval == KVM_DEV_ARM_ITS_SAVE_TABLES) {
+ /* Zero out the first entry */
+ u64 invalid = 0;
+ if (copy_to_user(&its->itt_ubuf.ubuf[0], &invalid, sizeof(invalid)))
+ return -EFAULT;
+ }
+ } else {
+ its->itt_ubuf.must_be_used = false;
+ }
+ }
+
switch (attrval) {
case KVM_DEV_ARM_ITS_CTRL_RESET:
vgic_its_reset(kvm, its);
@@ -76,6 +76,7 @@ static inline bool vgic_irq_is_lpi(u32 const intid)
#define KVM_ITS_CTE_RDBASE_SHIFT 16
#define KVM_ITS_CTE_ICID_MASK GENMASK_ULL(15, 0)
#define KVM_ITS_ITE_NEXT_SHIFT 48
+#define KVM_ITS_ITE_NEXT_MASK GENMASK_ULL(63, 48)
#define KVM_ITS_ITE_PINTID_SHIFT 16
#define KVM_ITS_ITE_PINTID_MASK GENMASK_ULL(47, 16)
#define KVM_ITS_ITE_ICID_MASK GENMASK_ULL(15, 0)
@@ -90,6 +91,9 @@ static inline bool vgic_irq_is_lpi(u32 const intid)
/* we only support 64 kB translation table page size */
#define KVM_ITS_L1E_ADDR_MASK GENMASK_ULL(51, 16)
+#define KVM_ITS_ITT_START_MARKER 1
+#define KVM_ITS_ITT_END_MARKER 2
+
#define KVM_VGIC_V3_RDIST_INDEX_MASK GENMASK_ULL(11, 0)
#define KVM_VGIC_V3_RDIST_FLAGS_MASK GENMASK_ULL(15, 12)
#define KVM_VGIC_V3_RDIST_FLAGS_SHIFT 12
@@ -190,6 +190,17 @@ struct vgic_its {
struct list_head device_list;
struct list_head collection_list;
struct list_head inval_dte_list;
+
+ /*
+ * Userspace buffer to be used by KVM_DEV_ARM_ITS_{SAVE,RESTORE}_TABLES
+ * optionally for saving/restoring the ITTs of all device tables.
+ */
+ struct {
+ bool must_be_used;
+ u64 __user *ubuf;
+ size_t slot_max;
+ size_t slot_next;
+ } itt_ubuf;
};
struct vgic_state_iter;
When running a protected VM on top of pKVM or another lowvisor the EL1 host kernel cannot access guest memory in order to save/restore the ITT tables for the KVM_DEV_ARM_ITS_SAVE_TABLES and KVM_DEV_ARM_ITS_RESTORE_TABLES operations. Introduce a new KVM_DEV_ARM_ITS_ITT_UBUF flag that when set instructs the vITS to serialize the ITTs into a buffer provided by userspace or restore them from it. The struct kvm_device_attr passed to KVM_DEV_ARM_ITS_{SAVE,RESTORE}_TABLES has a currently unused 'addr' field. Use that field to pass the buffer address. Also use the upper 32-bits of 'attr' from the same struct for the buffer size. The format of the blob stored in the buffer is the following. There is a 64-bit ITT start marker which embeds the device ID owning the ITT. The start marker is followed by 64-bit ITEs stored using the existing ITS Table ABI REV0 with the 'next' field being replaced by an 'event_id' field which stores the event ID rather than an offset. An end marker indicates the end of the ITT and is followed by the start marker for the ITT of the next device. This is an RFC patch, the ABI is not documented yet. Signed-off-by: Ilias Stamatis <ilstam@amazon.com> --- arch/arm64/include/uapi/asm/kvm.h | 5 + arch/arm64/kvm/vgic/vgic-its.c | 213 +++++++++++++++++++++++++++++- arch/arm64/kvm/vgic/vgic.h | 4 + include/kvm/arm_vgic.h | 11 ++ 4 files changed, 227 insertions(+), 6 deletions(-)