@@ -923,6 +923,7 @@ struct kvm_s390_pv {
u64 guest_len;
unsigned long stor_base;
void *stor_var;
+ bool dumping;
};
struct kvm_arch{
@@ -2271,6 +2271,68 @@ static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
}
}
+static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
+ struct kvm_s390_pv_dmp dmp)
+{
+ int r = -EINVAL;
+ void __user *result_buff = (void __user *)dmp.buff_addr;
+
+ switch (dmp.subcmd) {
+ case KVM_PV_DUMP_INIT: {
+ if (kvm->arch.pv.dumping)
+ break;
+
+ /*
+ * Block SIE entry as concurrent dump UVCs could lead
+ * to validities.
+ */
+ kvm_s390_vcpu_block_all(kvm);
+
+ r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+ UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
+ cmd->rc, cmd->rrc);
+ if (!r) {
+ kvm->arch.pv.dumping = true;
+ } else {
+ kvm_s390_vcpu_unblock_all(kvm);
+ r = -EINVAL;
+ }
+ break;
+ }
+ case KVM_PV_DUMP_CONFIG_STOR_STATE: {
+ if (!kvm->arch.pv.dumping)
+ break;
+
+ /*
+ * gaddr is an output parameter since we might stop
+ * early. As dmp will be copied back in our caller, we
+ * don't need to do it ourselves.
+ */
+ r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
+ &cmd->rc, &cmd->rrc);
+ break;
+ }
+ case KVM_PV_DUMP_COMPLETE: {
+ if (!kvm->arch.pv.dumping)
+ break;
+
+ r = -EINVAL;
+ if (dmp.buff_len < uv_info.conf_dump_finalize_len)
+ break;
+
+ r = kvm_s390_pv_dump_complete(kvm, result_buff,
+ &cmd->rc, &cmd->rrc);
+ break;
+ }
+ default:
+ r = -ENOTTY;
+ break;
+ }
+
+ return r;
+}
+
static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
{
int r = 0;
@@ -2447,6 +2509,28 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
r = 0;
break;
}
+ case KVM_PV_DUMP: {
+ struct kvm_s390_pv_dmp dmp;
+
+ r = -EINVAL;
+ if (!kvm_s390_pv_is_protected(kvm))
+ break;
+
+ r = -EFAULT;
+ if (copy_from_user(&dmp, argp, sizeof(dmp)))
+ break;
+
+ r = kvm_s390_pv_dmp(kvm, cmd, dmp);
+ if (r)
+ break;
+
+ if (copy_to_user(argp, &dmp, sizeof(dmp))) {
+ r = -EFAULT;
+ break;
+ }
+
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -4564,6 +4648,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
struct kvm_run *kvm_run = vcpu->run;
int rc;
+ /*
+ * Running a VM while dumping always has the potential to
+ * produce inconsistent dump data. But for PV vcpus a SIE
+ * entry while dumping could also lead to a fatal validity
+ * intercept which we absolutely want to avoid.
+ */
+ if (vcpu->kvm->arch.pv.dumping)
+ return -EINVAL;
+
if (kvm_run->immediate_exit)
return -EINTR;
@@ -250,6 +250,10 @@ int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
int kvm_s390_pv_unpack(struct kvm *kvm, unsigned long addr, unsigned long size,
unsigned long tweak, u16 *rc, u16 *rrc);
int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state);
+int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
+ u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc);
+int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
+ u16 *rc, u16 *rrc);
static inline u64 kvm_s390_pv_get_handle(struct kvm *kvm)
{
@@ -7,6 +7,7 @@
*/
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <linux/minmax.h>
#include <linux/pagemap.h>
#include <linux/sched/signal.h>
#include <asm/gmap.h>
@@ -298,3 +299,184 @@ int kvm_s390_pv_set_cpu_state(struct kvm_vcpu *vcpu, u8 state)
return -EINVAL;
return 0;
}
+
+/* Size of the cache for the storage state dump data. 1MB for now */
+#define DUMP_BUFF_LEN HPAGE_SIZE
+
+/**
+ * kvm_s390_pv_dump_stor_state
+ *
+ * @kvm: pointer to the guest's KVM struct
+ * @buff_user: Userspace pointer where we will write the results to
+ * @gaddr: Starting absolute guest address for which the storage state
+ * is requested.
+ * @buff_user_len: Length of the buff_user buffer
+ * @rc: Pointer to where the uvcb return code is stored
+ * @rrc: Pointer to where the uvcb return reason code is stored
+ *
+ * Stores buff_len bytes of tweak component values to buff_user
+ * starting with the 1MB block specified by the absolute guest address
+ * (gaddr). The gaddr pointer will be updated with the last address
+ * for which data was written when returning to userspace. buff_user
+ * might be written to even if an error rc is returned. For instance
+ * if we encounter a fault after writing the first page of data.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return:
+ * 0 on success
+ * -ENOMEM if allocating the cache fails
+ * -EINVAL if gaddr is not aligned to 1MB
+ * -EINVAL if buff_user_len is not aligned to uv_info.conf_dump_storage_state_len
+ * -EINVAL if the UV call fails, rc and rrc will be set in this case
+ * -EFAULT if copying the result to buff_user failed
+ */
+int kvm_s390_pv_dump_stor_state(struct kvm *kvm, void __user *buff_user,
+ u64 *gaddr, u64 buff_user_len, u16 *rc, u16 *rrc)
+{
+ struct uv_cb_dump_stor_state uvcb = {
+ .header.cmd = UVC_CMD_DUMP_CONF_STOR_STATE,
+ .header.len = sizeof(uvcb),
+ .config_handle = kvm->arch.pv.handle,
+ .gaddr = *gaddr,
+ .dump_area_origin = 0,
+ };
+ const u64 increment_len = uv_info.conf_dump_storage_state_len;
+ size_t buff_kvm_size;
+ size_t size_done = 0;
+ u8 *buff_kvm = NULL;
+ int cc, ret;
+
+ ret = -EINVAL;
+ /* UV call processes 1MB guest storage chunks at a time */
+ if (!IS_ALIGNED(*gaddr, HPAGE_SIZE))
+ goto out;
+
+ /*
+ * We provide the storage state for 1MB chunks of guest
+ * storage. The buffer will need to be aligned to
+ * conf_dump_storage_state_len so we don't end on a partial
+ * chunk.
+ */
+ if (!buff_user_len ||
+ !IS_ALIGNED(buff_user_len, increment_len))
+ goto out;
+
+ /*
+ * Allocate a buffer from which we will later copy to the user
+ * process. We don't want userspace to dictate our buffer size
+ * so we limit it to DUMP_BUFF_LEN.
+ */
+ ret = -ENOMEM;
+ buff_kvm_size = min_t(u64, buff_user_len, DUMP_BUFF_LEN);
+ buff_kvm = vzalloc(buff_kvm_size);
+ if (!buff_kvm)
+ goto out;
+
+ ret = 0;
+ uvcb.dump_area_origin = (u64)buff_kvm;
+ /* We will loop until the user buffer is filled or an error occurs */
+ do {
+ /* Get 1MB worth of guest storage state data */
+ cc = uv_call_sched(0, (u64)&uvcb);
+
+ /* All or nothing */
+ if (cc) {
+ ret = -EINVAL;
+ break;
+ }
+
+ size_done += increment_len;
+ uvcb.dump_area_origin += increment_len;
+ buff_user_len -= increment_len;
+ uvcb.gaddr += HPAGE_SIZE;
+
+ /* KVM Buffer full, time to copy to the process */
+ if (!buff_user_len || size_done == DUMP_BUFF_LEN) {
+ if (copy_to_user(buff_user, buff_kvm, size_done)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ buff_user += size_done;
+ size_done = 0;
+ uvcb.dump_area_origin = (u64)buff_kvm;
+ }
+ } while (buff_user_len);
+
+ /* Report back where we ended dumping */
+ *gaddr = uvcb.gaddr;
+
+ /* Lets only log errors, we don't want to spam */
+out:
+ if (ret)
+ KVM_UV_EVENT(kvm, 3,
+ "PROTVIRT DUMP STORAGE STATE: addr %llx ret %d, uvcb rc %x rrc %x",
+ uvcb.gaddr, ret, uvcb.header.rc, uvcb.header.rrc);
+ *rc = uvcb.header.rc;
+ *rrc = uvcb.header.rrc;
+ vfree(buff_kvm);
+
+ return ret;
+}
+
+/**
+ * kvm_s390_pv_dump_complete
+ *
+ * @kvm: pointer to the guest's KVM struct
+ * @buff_user: Userspace pointer where we will write the results to
+ * @rc: Pointer to where the uvcb return code is stored
+ * @rrc: Pointer to where the uvcb return reason code is stored
+ *
+ * Completes the dumping operation and writes the completion data to
+ * user space.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return:
+ * 0 on success
+ * -ENOMEM if allocating the completion buffer fails
+ * -EINVAL if the UV call fails, rc and rrc will be set in this case
+ * -EFAULT if copying the result to buff_user failed
+ */
+int kvm_s390_pv_dump_complete(struct kvm *kvm, void __user *buff_user,
+ u16 *rc, u16 *rrc)
+{
+ struct uv_cb_dump_complete complete = {
+ .header.len = sizeof(complete),
+ .header.cmd = UVC_CMD_DUMP_COMPLETE,
+ .config_handle = kvm_s390_pv_get_handle(kvm),
+ };
+ u64 *compl_data;
+ int ret;
+
+ /* Allocate dump area */
+ compl_data = vzalloc(uv_info.conf_dump_finalize_len);
+ if (!compl_data)
+ return -ENOMEM;
+ complete.dump_area_origin = (u64)compl_data;
+
+ ret = uv_call_sched(0, (u64)&complete);
+ *rc = complete.header.rc;
+ *rrc = complete.header.rrc;
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP COMPLETE: rc %x rrc %x",
+ complete.header.rc, complete.header.rrc);
+
+ if (!ret) {
+ /*
+ * kvm_s390_pv_dealloc_vm() will also (mem)set
+ * this to false on a reboot or other destroy
+ * operation for this vm.
+ */
+ kvm->arch.pv.dumping = false;
+ kvm_s390_vcpu_unblock_all(kvm);
+ ret = copy_to_user(buff_user, compl_data, uv_info.conf_dump_finalize_len);
+ if (ret)
+ ret = -EFAULT;
+ }
+ vfree(compl_data);
+ /* If the UVC returned an error, translate it to -EINVAL */
+ if (ret > 0)
+ ret = -EINVAL;
+ return ret;
+}
@@ -1660,6 +1660,20 @@ struct kvm_s390_pv_unp {
__u64 tweak;
};
+enum pv_cmd_dmp_id {
+ KVM_PV_DUMP_INIT,
+ KVM_PV_DUMP_CONFIG_STOR_STATE,
+ KVM_PV_DUMP_COMPLETE,
+};
+
+struct kvm_s390_pv_dmp {
+ __u64 subcmd;
+ __u64 buff_addr;
+ __u64 buff_len;
+ __u64 gaddr; /* For dump storage state */
+ __u64 reserved[4];
+};
+
enum pv_cmd_info_id {
KVM_PV_INFO_VM,
KVM_PV_INFO_DUMP,
@@ -1703,6 +1717,7 @@ enum pv_cmd_id {
KVM_PV_PREP_RESET,
KVM_PV_UNSHARE_ALL,
KVM_PV_INFO,
+ KVM_PV_DUMP,
};
struct kvm_pv_cmd {