diff mbox series

[v19,014/130] KVM: Add KVM vcpu ioctl to pre-populate guest memory

Message ID 8b7380f1b02f8e3995f18bebb085e43165d5d682.1708933498.git.isaku.yamahata@intel.com (mailing list archive)
State New, archived
Headers show
Series [v19,001/130] x86/virt/tdx: Rename _offset to _member for TD_SYSINFO_MAP() macro | expand

Commit Message

Isaku Yamahata Feb. 26, 2024, 8:25 a.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

Add new ioctl KVM_MEMORY_MAPPING in the kvm common code. It iterates on the
memory range and call arch specific function.  Add stub function as weak
symbol.

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
v19:
- newly added
---
 include/linux/kvm_host.h |  4 +++
 include/uapi/linux/kvm.h | 10 ++++++
 virt/kvm/kvm_main.c      | 67 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 81 insertions(+)

Comments

Yin Fengwei March 7, 2024, 7:01 a.m. UTC | #1
On 2/26/24 16:25, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> Add new ioctl KVM_MEMORY_MAPPING in the kvm common code. It iterates on the
> memory range and call arch specific function.  Add stub function as weak
> symbol.
> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
> v19:
> - newly added
> ---
>  include/linux/kvm_host.h |  4 +++
>  include/uapi/linux/kvm.h | 10 ++++++
>  virt/kvm/kvm_main.c      | 67 ++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 81 insertions(+)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 0520cd8d03cc..eeaf4e73317c 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -2389,4 +2389,8 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
>  }
>  #endif /* CONFIG_KVM_PRIVATE_MEM */
>  
> +void kvm_arch_vcpu_pre_memory_mapping(struct kvm_vcpu *vcpu);
> +int kvm_arch_vcpu_memory_mapping(struct kvm_vcpu *vcpu,
> +				 struct kvm_memory_mapping *mapping);
> +
>  #endif
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index c3308536482b..5e2b28934aa9 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1155,6 +1155,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_MEMORY_ATTRIBUTES 233
>  #define KVM_CAP_GUEST_MEMFD 234
>  #define KVM_CAP_VM_TYPES 235
> +#define KVM_CAP_MEMORY_MAPPING 236
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> @@ -2227,4 +2228,13 @@ struct kvm_create_guest_memfd {
>  	__u64 reserved[6];
>  };
>  
> +#define KVM_MEMORY_MAPPING	_IOWR(KVMIO, 0xd5, struct kvm_memory_mapping)
> +
> +struct kvm_memory_mapping {
> +	__u64 base_gfn;
> +	__u64 nr_pages;
> +	__u64 flags;
> +	__u64 source;
> +};
> +
>  #endif /* __LINUX_KVM_H */
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 0349e1f241d1..2f0a8e28795e 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -4409,6 +4409,62 @@ static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu)
>  	return fd;
>  }
>  
> +__weak void kvm_arch_vcpu_pre_memory_mapping(struct kvm_vcpu *vcpu)
> +{
> +}
> +
> +__weak int kvm_arch_vcpu_memory_mapping(struct kvm_vcpu *vcpu,
> +					struct kvm_memory_mapping *mapping)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
> +static int kvm_vcpu_memory_mapping(struct kvm_vcpu *vcpu,
> +				   struct kvm_memory_mapping *mapping)
> +{
> +	bool added = false;
> +	int idx, r = 0;
> +
> +	/* flags isn't used yet. */
> +	if (mapping->flags)
> +		return -EINVAL;
> +
> +	/* Sanity check */
> +	if (!IS_ALIGNED(mapping->source, PAGE_SIZE) ||
> +	    !mapping->nr_pages ||
> +	    mapping->nr_pages & GENMASK_ULL(63, 63 - PAGE_SHIFT) ||
> +	    mapping->base_gfn + mapping->nr_pages <= mapping->base_gfn)
I suppose !mapping->nr_pages can be deleted as this line can cover it.
> +		return -EINVAL;
> +
> +	vcpu_load(vcpu);
> +	idx = srcu_read_lock(&vcpu->kvm->srcu);
> +	kvm_arch_vcpu_pre_memory_mapping(vcpu);
> +
> +	while (mapping->nr_pages) {
> +		if (signal_pending(current)) {
> +			r = -ERESTARTSYS;
> +			break;
> +		}
> +
> +		if (need_resched())
> +			cond_resched();
> +
> +		r = kvm_arch_vcpu_memory_mapping(vcpu, mapping);
> +		if (r)
> +			break;
> +
> +		added = true;
> +	}
> +
> +	srcu_read_unlock(&vcpu->kvm->srcu, idx);
> +	vcpu_put(vcpu);
> +
> +	if (added && mapping->nr_pages > 0)
> +		r = -EAGAIN;
> +
> +	return r;
> +}
> +
>  static long kvm_vcpu_ioctl(struct file *filp,
>  			   unsigned int ioctl, unsigned long arg)
>  {
> @@ -4610,6 +4666,17 @@ static long kvm_vcpu_ioctl(struct file *filp,
>  		r = kvm_vcpu_ioctl_get_stats_fd(vcpu);
>  		break;
>  	}
> +	case KVM_MEMORY_MAPPING: {
> +		struct kvm_memory_mapping mapping;
> +
> +		r = -EFAULT;
> +		if (copy_from_user(&mapping, argp, sizeof(mapping)))
> +			break;
> +		r = kvm_vcpu_memory_mapping(vcpu, &mapping);
return value r should be checked before copy_to_user


Regards
Yin, Fengwei

> +		if (copy_to_user(argp, &mapping, sizeof(mapping)))
> +			r = -EFAULT;
> +		break;
> +	}
>  	default:
>  		r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
>  	}
Isaku Yamahata March 8, 2024, 9:01 p.m. UTC | #2
On Thu, Mar 07, 2024 at 03:01:11PM +0800,
Yin Fengwei <fengwei.yin@intel.com> wrote:

> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 0349e1f241d1..2f0a8e28795e 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -4409,6 +4409,62 @@ static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu)
> >  	return fd;
> >  }
> >  
> > +__weak void kvm_arch_vcpu_pre_memory_mapping(struct kvm_vcpu *vcpu)
> > +{
> > +}
> > +
> > +__weak int kvm_arch_vcpu_memory_mapping(struct kvm_vcpu *vcpu,
> > +					struct kvm_memory_mapping *mapping)
> > +{
> > +	return -EOPNOTSUPP;
> > +}
> > +
> > +static int kvm_vcpu_memory_mapping(struct kvm_vcpu *vcpu,
> > +				   struct kvm_memory_mapping *mapping)
> > +{
> > +	bool added = false;
> > +	int idx, r = 0;
> > +
> > +	/* flags isn't used yet. */
> > +	if (mapping->flags)
> > +		return -EINVAL;
> > +
> > +	/* Sanity check */
> > +	if (!IS_ALIGNED(mapping->source, PAGE_SIZE) ||
> > +	    !mapping->nr_pages ||
> > +	    mapping->nr_pages & GENMASK_ULL(63, 63 - PAGE_SHIFT) ||
> > +	    mapping->base_gfn + mapping->nr_pages <= mapping->base_gfn)
> I suppose !mapping->nr_pages can be deleted as this line can cover it.
> > +		return -EINVAL;
> > +
> > +	vcpu_load(vcpu);
> > +	idx = srcu_read_lock(&vcpu->kvm->srcu);
> > +	kvm_arch_vcpu_pre_memory_mapping(vcpu);
> > +
> > +	while (mapping->nr_pages) {
> > +		if (signal_pending(current)) {
> > +			r = -ERESTARTSYS;
> > +			break;
> > +		}
> > +
> > +		if (need_resched())
> > +			cond_resched();
> > +
> > +		r = kvm_arch_vcpu_memory_mapping(vcpu, mapping);
> > +		if (r)
> > +			break;
> > +
> > +		added = true;
> > +	}
> > +
> > +	srcu_read_unlock(&vcpu->kvm->srcu, idx);
> > +	vcpu_put(vcpu);
> > +
> > +	if (added && mapping->nr_pages > 0)
> > +		r = -EAGAIN;
> > +
> > +	return r;
> > +}
> > +
> >  static long kvm_vcpu_ioctl(struct file *filp,
> >  			   unsigned int ioctl, unsigned long arg)
> >  {
> > @@ -4610,6 +4666,17 @@ static long kvm_vcpu_ioctl(struct file *filp,
> >  		r = kvm_vcpu_ioctl_get_stats_fd(vcpu);
> >  		break;
> >  	}
> > +	case KVM_MEMORY_MAPPING: {
> > +		struct kvm_memory_mapping mapping;
> > +
> > +		r = -EFAULT;
> > +		if (copy_from_user(&mapping, argp, sizeof(mapping)))
> > +			break;
> > +		r = kvm_vcpu_memory_mapping(vcpu, &mapping);
> return value r should be checked before copy_to_user

That's intentional to tell the mapping is partially or fully processed
regardless that error happened or not.

> 
> 
> Regards
> Yin, Fengwei
> 
> > +		if (copy_to_user(argp, &mapping, sizeof(mapping)))
> > +			r = -EFAULT;
> > +		break;
> > +	}
> >  	default:
> >  		r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
> >  	}
>
diff mbox series

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0520cd8d03cc..eeaf4e73317c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2389,4 +2389,8 @@  static inline int kvm_gmem_get_pfn(struct kvm *kvm,
 }
 #endif /* CONFIG_KVM_PRIVATE_MEM */
 
+void kvm_arch_vcpu_pre_memory_mapping(struct kvm_vcpu *vcpu);
+int kvm_arch_vcpu_memory_mapping(struct kvm_vcpu *vcpu,
+				 struct kvm_memory_mapping *mapping);
+
 #endif
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index c3308536482b..5e2b28934aa9 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1155,6 +1155,7 @@  struct kvm_ppc_resize_hpt {
 #define KVM_CAP_MEMORY_ATTRIBUTES 233
 #define KVM_CAP_GUEST_MEMFD 234
 #define KVM_CAP_VM_TYPES 235
+#define KVM_CAP_MEMORY_MAPPING 236
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -2227,4 +2228,13 @@  struct kvm_create_guest_memfd {
 	__u64 reserved[6];
 };
 
+#define KVM_MEMORY_MAPPING	_IOWR(KVMIO, 0xd5, struct kvm_memory_mapping)
+
+struct kvm_memory_mapping {
+	__u64 base_gfn;
+	__u64 nr_pages;
+	__u64 flags;
+	__u64 source;
+};
+
 #endif /* __LINUX_KVM_H */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0349e1f241d1..2f0a8e28795e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4409,6 +4409,62 @@  static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu)
 	return fd;
 }
 
+__weak void kvm_arch_vcpu_pre_memory_mapping(struct kvm_vcpu *vcpu)
+{
+}
+
+__weak int kvm_arch_vcpu_memory_mapping(struct kvm_vcpu *vcpu,
+					struct kvm_memory_mapping *mapping)
+{
+	return -EOPNOTSUPP;
+}
+
+static int kvm_vcpu_memory_mapping(struct kvm_vcpu *vcpu,
+				   struct kvm_memory_mapping *mapping)
+{
+	bool added = false;
+	int idx, r = 0;
+
+	/* flags isn't used yet. */
+	if (mapping->flags)
+		return -EINVAL;
+
+	/* Sanity check */
+	if (!IS_ALIGNED(mapping->source, PAGE_SIZE) ||
+	    !mapping->nr_pages ||
+	    mapping->nr_pages & GENMASK_ULL(63, 63 - PAGE_SHIFT) ||
+	    mapping->base_gfn + mapping->nr_pages <= mapping->base_gfn)
+		return -EINVAL;
+
+	vcpu_load(vcpu);
+	idx = srcu_read_lock(&vcpu->kvm->srcu);
+	kvm_arch_vcpu_pre_memory_mapping(vcpu);
+
+	while (mapping->nr_pages) {
+		if (signal_pending(current)) {
+			r = -ERESTARTSYS;
+			break;
+		}
+
+		if (need_resched())
+			cond_resched();
+
+		r = kvm_arch_vcpu_memory_mapping(vcpu, mapping);
+		if (r)
+			break;
+
+		added = true;
+	}
+
+	srcu_read_unlock(&vcpu->kvm->srcu, idx);
+	vcpu_put(vcpu);
+
+	if (added && mapping->nr_pages > 0)
+		r = -EAGAIN;
+
+	return r;
+}
+
 static long kvm_vcpu_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -4610,6 +4666,17 @@  static long kvm_vcpu_ioctl(struct file *filp,
 		r = kvm_vcpu_ioctl_get_stats_fd(vcpu);
 		break;
 	}
+	case KVM_MEMORY_MAPPING: {
+		struct kvm_memory_mapping mapping;
+
+		r = -EFAULT;
+		if (copy_from_user(&mapping, argp, sizeof(mapping)))
+			break;
+		r = kvm_vcpu_memory_mapping(vcpu, &mapping);
+		if (copy_to_user(argp, &mapping, sizeof(mapping)))
+			r = -EFAULT;
+		break;
+	}
 	default:
 		r = kvm_arch_vcpu_ioctl(filp, ioctl, arg);
 	}