diff mbox series

[v3,1/2] KVM: guest_memfd: add generic population via write

Message ID 20250303130838.28812-2-kalyazin@amazon.com (mailing list archive)
State New
Headers show
Series KVM: guest_memfd: use write for population | expand

Commit Message

Nikita Kalyazin March 3, 2025, 1:08 p.m. UTC
write syscall populates guest_memfd with user-supplied data in a generic
way, ie no vendor-specific preparation is performed.  This is supposed
to be used in non-CoCo setups where guest memory is not
hardware-encrypted.

The following behaviour is implemented:
 - only page-aligned count and offset are allowed
 - if the memory is already allocated, the call will successfully
   populate it
 - if the memory is not allocated, the call will both allocate and
   populate
 - if the memory is already populated, the call will not repopulate it

Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com>
---
 virt/kvm/guest_memfd.c | 94 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 91 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 30b47ff0e6d2..f93fe5835173 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -417,12 +417,97 @@  static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
 
 	return 0;
 }
-#else
-#define kvm_gmem_mmap NULL
+
+static ssize_t kvm_kmem_gmem_write(struct file *file, const char __user *buf,
+				   size_t count, loff_t *offset)
+{
+	pgoff_t start, end, index;
+	ssize_t ret = 0;
+
+	if (!PAGE_ALIGNED(*offset) || !PAGE_ALIGNED(count))
+		return -EINVAL;
+
+	if (*offset + count > i_size_read(file_inode(file)))
+		return -EINVAL;
+
+	if (!buf)
+		return -EINVAL;
+
+	start = *offset >> PAGE_SHIFT;
+	end = (*offset + count) >> PAGE_SHIFT;
+
+	filemap_invalidate_lock_shared(file->f_mapping);
+
+	for (index = start; index < end; ) {
+		struct folio *folio;
+		void *vaddr;
+		pgoff_t buf_offset = (index - start) << PAGE_SHIFT;
+
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			goto out;
+		}
+
+		folio = kvm_gmem_get_folio(file_inode(file), index);
+		if (IS_ERR(folio)) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		if (folio_test_hwpoison(folio)) {
+			folio_unlock(folio);
+			folio_put(folio);
+			ret = -EFAULT;
+			goto out;
+		}
+
+		/* No support for huge pages. */
+		if (WARN_ON_ONCE(folio_test_large(folio))) {
+			folio_unlock(folio);
+			folio_put(folio);
+			ret = -EFAULT;
+			goto out;
+		}
+
+		if (folio_test_uptodate(folio)) {
+			folio_unlock(folio);
+			folio_put(folio);
+			ret = -ENOSPC;
+			goto out;
+		}
+
+		folio_unlock(folio);
+
+		vaddr = kmap_local_folio(folio, 0);
+		ret = copy_from_user(vaddr, buf + buf_offset, PAGE_SIZE);
+		kunmap_local(vaddr);
+		if (ret) {
+			ret = -EINVAL;
+			folio_put(folio);
+			goto out;
+		}
+
+		kvm_gmem_mark_prepared(folio);
+		folio_put(folio);
+
+		index = folio_next_index(folio);
+		*offset += PAGE_SIZE;
+	}
+
+out:
+	filemap_invalidate_unlock_shared(file->f_mapping);
+
+	return ret && start == (*offset >> PAGE_SHIFT) ?
+		ret : *offset - (start << PAGE_SHIFT);
+}
 #endif /* CONFIG_KVM_GMEM_SHARED_MEM */
 
 static struct file_operations kvm_gmem_fops = {
-	.mmap		= kvm_gmem_mmap,
+#ifdef CONFIG_KVM_GMEM_SHARED_MEM
+	.mmap           = kvm_gmem_mmap,
+	.llseek         = default_llseek,
+	.write          = kvm_kmem_gmem_write,
+#endif /* CONFIG_KVM_GMEM_SHARED_MEM */
 	.open		= generic_file_open,
 	.release	= kvm_gmem_release,
 	.fallocate	= kvm_gmem_fallocate,
@@ -538,6 +623,9 @@  static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
 	}
 
 	file->f_flags |= O_LARGEFILE;
+#ifdef CONFIG_KVM_GMEM_SHARED_MEM
+	file->f_mode |= FMODE_LSEEK | FMODE_PWRITE;
+#endif /* CONFIG_KVM_GMEM_SHARED_MEM */
 
 	inode = file->f_inode;
 	WARN_ON(file->f_mapping != inode->i_mapping);