@@ -801,6 +801,7 @@ struct kvm {
struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
spinlock_t ring_lock;
struct list_head coalesced_zones;
+ struct list_head coalesced_buffers;
#endif
struct mutex irq_lock;
@@ -1573,4 +1573,6 @@ struct kvm_pre_fault_memory {
__u64 padding[5];
};
+#define KVM_CREATE_COALESCED_MMIO_BUFFER _IO(KVMIO, 0xd6)
+
#endif /* __LINUX_KVM_H */
@@ -4,6 +4,7 @@
*
* Copyright (c) 2008 Bull S.A.S.
* Copyright 2009 Red Hat, Inc. and/or its affiliates.
+ * Copyright 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Author: Laurent Vivier <Laurent.Vivier@bull.net>
*
@@ -14,6 +15,7 @@
#include <linux/kvm_host.h>
#include <linux/slab.h>
#include <linux/kvm.h>
+#include <linux/anon_inodes.h>
#include "coalesced_mmio.h"
@@ -40,17 +42,14 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
return 1;
}
-static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev, u32 last)
+static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_ring *ring, u32 last)
{
- struct kvm_coalesced_mmio_ring *ring;
-
/* Are we able to batch it ? */
/* last is the first free entry
* check if we don't meet the first used entry
* there is always one unused entry in the buffer
*/
- ring = dev->kvm->coalesced_mmio_ring;
if ((last + 1) % KVM_COALESCED_MMIO_MAX == READ_ONCE(ring->first)) {
/* full */
return 0;
@@ -65,17 +64,28 @@ static int coalesced_mmio_write(struct kvm_vcpu *vcpu,
{
struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
+ spinlock_t *lock = dev->buffer_dev ?
+ &dev->buffer_dev->ring_lock :
+ &dev->kvm->ring_lock;
__u32 insert;
if (!coalesced_mmio_in_range(dev, addr, len))
return -EOPNOTSUPP;
- spin_lock(&dev->kvm->ring_lock);
+ spin_lock(lock);
+
+ if (dev->buffer_dev) {
+ ring = dev->buffer_dev->ring;
+ if (!ring) {
+ spin_unlock(lock);
+ return -EOPNOTSUPP;
+ }
+ }
insert = READ_ONCE(ring->last);
- if (!coalesced_mmio_has_room(dev, insert) ||
+ if (!coalesced_mmio_has_room(ring, insert) ||
insert >= KVM_COALESCED_MMIO_MAX) {
- spin_unlock(&dev->kvm->ring_lock);
+ spin_unlock(lock);
return -EOPNOTSUPP;
}
@@ -87,7 +97,7 @@ static int coalesced_mmio_write(struct kvm_vcpu *vcpu,
ring->coalesced_mmio[insert].pio = dev->zone.pio;
smp_wmb();
ring->last = (insert + 1) % KVM_COALESCED_MMIO_MAX;
- spin_unlock(&dev->kvm->ring_lock);
+ spin_unlock(lock);
return 0;
}
@@ -122,6 +132,7 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
*/
spin_lock_init(&kvm->ring_lock);
INIT_LIST_HEAD(&kvm->coalesced_zones);
+ INIT_LIST_HEAD(&kvm->coalesced_buffers);
return 0;
}
@@ -132,11 +143,125 @@ void kvm_coalesced_mmio_free(struct kvm *kvm)
free_page((unsigned long)kvm->coalesced_mmio_ring);
}
+static void coalesced_mmio_buffer_vma_close(struct vm_area_struct *vma)
+{
+ struct kvm_coalesced_mmio_buffer_dev *dev = vma->vm_private_data;
+
+ spin_lock(&dev->ring_lock);
+
+ vfree(dev->ring);
+ dev->ring = NULL;
+
+ spin_unlock(&dev->ring_lock);
+}
+
+static const struct vm_operations_struct coalesced_mmio_buffer_vm_ops = {
+ .close = coalesced_mmio_buffer_vma_close,
+};
+
+static int coalesced_mmio_buffer_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct kvm_coalesced_mmio_buffer_dev *dev = file->private_data;
+ unsigned long pfn;
+ int ret = 0;
+
+ spin_lock(&dev->ring_lock);
+
+ if (dev->ring) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
+ dev->ring = vmalloc_user(PAGE_SIZE);
+ if (!dev->ring) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ pfn = vmalloc_to_pfn(dev->ring);
+
+ if (remap_pfn_range(vma, vma->vm_start, pfn, PAGE_SIZE,
+ vma->vm_page_prot)) {
+ vfree(dev->ring);
+ dev->ring = NULL;
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+
+ vma->vm_ops = &coalesced_mmio_buffer_vm_ops;
+ vma->vm_private_data = dev;
+
+out_unlock:
+ spin_unlock(&dev->ring_lock);
+
+ return ret;
+}
+
+static int coalesced_mmio_buffer_release(struct inode *inode, struct file *file)
+{
+
+ struct kvm_coalesced_mmio_buffer_dev *buffer_dev = file->private_data;
+ struct kvm_coalesced_mmio_dev *mmio_dev, *tmp;
+ struct kvm *kvm = buffer_dev->kvm;
+
+ /* Deregister all zones associated with this ring buffer */
+ mutex_lock(&kvm->slots_lock);
+
+ list_for_each_entry_safe(mmio_dev, tmp, &kvm->coalesced_zones, list) {
+ if (mmio_dev->buffer_dev == buffer_dev) {
+ if (kvm_io_bus_unregister_dev(kvm,
+ mmio_dev->zone.pio ? KVM_PIO_BUS : KVM_MMIO_BUS,
+ &mmio_dev->dev))
+ break;
+ }
+ }
+
+ list_del(&buffer_dev->list);
+ kfree(buffer_dev);
+
+ mutex_unlock(&kvm->slots_lock);
+
+ return 0;
+}
+
+static const struct file_operations coalesced_mmio_buffer_ops = {
+ .mmap = coalesced_mmio_buffer_mmap,
+ .release = coalesced_mmio_buffer_release,
+};
+
+int kvm_vm_ioctl_create_coalesced_mmio_buffer(struct kvm *kvm)
+{
+ int ret;
+ struct kvm_coalesced_mmio_buffer_dev *dev;
+
+ dev = kzalloc(sizeof(struct kvm_coalesced_mmio_buffer_dev),
+ GFP_KERNEL_ACCOUNT);
+ if (!dev)
+ return -ENOMEM;
+
+ dev->kvm = kvm;
+ spin_lock_init(&dev->ring_lock);
+
+ ret = anon_inode_getfd("coalesced_mmio_buf", &coalesced_mmio_buffer_ops,
+ dev, O_RDWR | O_CLOEXEC);
+ if (ret < 0) {
+ kfree(dev);
+ return ret;
+ }
+
+ mutex_lock(&kvm->slots_lock);
+ list_add_tail(&dev->list, &kvm->coalesced_buffers);
+ mutex_unlock(&kvm->slots_lock);
+
+ return ret;
+}
+
int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
struct kvm_coalesced_mmio_zone *zone)
{
int ret;
struct kvm_coalesced_mmio_dev *dev;
+ struct kvm_coalesced_mmio_buffer_dev *buffer_dev = NULL;
if (zone->pio != 1 && zone->pio != 0)
return -EINVAL;
@@ -149,6 +274,7 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
dev->kvm = kvm;
dev->zone = *zone;
+ dev->buffer_dev = buffer_dev;
mutex_lock(&kvm->slots_lock);
ret = kvm_io_bus_register_dev(kvm,
@@ -20,6 +20,14 @@ struct kvm_coalesced_mmio_dev {
struct kvm_io_device dev;
struct kvm *kvm;
struct kvm_coalesced_mmio_zone zone;
+ struct kvm_coalesced_mmio_buffer_dev *buffer_dev;
+};
+
+struct kvm_coalesced_mmio_buffer_dev {
+ struct list_head list;
+ struct kvm *kvm;
+ spinlock_t ring_lock;
+ struct kvm_coalesced_mmio_ring *ring;
};
int kvm_coalesced_mmio_init(struct kvm *kvm);
@@ -28,6 +36,7 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
struct kvm_coalesced_mmio_zone *zone);
int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
struct kvm_coalesced_mmio_zone *zone);
+int kvm_vm_ioctl_create_coalesced_mmio_buffer(struct kvm *kvm);
#else
@@ -5246,6 +5246,10 @@ static long kvm_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
break;
}
+ case KVM_CREATE_COALESCED_MMIO_BUFFER: {
+ r = kvm_vm_ioctl_create_coalesced_mmio_buffer(kvm);
+ break;
+ }
#endif
case KVM_IRQFD: {
struct kvm_irqfd data;