diff mbox series

[v4,1/2] LoongArch: KVM: Add steal time support in kvm side

Message ID 20240524073812.731032-2-maobibo@loongson.cn (mailing list archive)
State New, archived
Headers show
Series LoongArch: Add steal time support | expand

Commit Message

Bibo Mao May 24, 2024, 7:38 a.m. UTC
Steal time feature is added here in kvm side, VM can search supported
features provided by KVM hypervisor, feature KVM_FEATURE_STEAL_TIME
is added here. Like x86, steal time structure is saved in guest memory,
one hypercall function KVM_HCALL_FUNC_NOTIFY is added to notify KVM to
enable the feature.

One cpu attr ioctl command KVM_LOONGARCH_VCPU_PVTIME_CTRL is added to
save and restore base address of steal time structure when VM is migrated.

Signed-off-by: Bibo Mao <maobibo@loongson.cn>
---
 arch/loongarch/include/asm/kvm_host.h  |   7 ++
 arch/loongarch/include/asm/kvm_para.h  |  10 ++
 arch/loongarch/include/asm/kvm_vcpu.h  |   4 +
 arch/loongarch/include/asm/loongarch.h |   1 +
 arch/loongarch/include/uapi/asm/kvm.h  |   4 +
 arch/loongarch/kvm/Kconfig             |   1 +
 arch/loongarch/kvm/exit.c              |  38 +++++++-
 arch/loongarch/kvm/vcpu.c              | 124 +++++++++++++++++++++++++
 8 files changed, 187 insertions(+), 2 deletions(-)

Comments

Huacai Chen July 6, 2024, 3 a.m. UTC | #1
Hi, Bibo,

On Fri, May 24, 2024 at 3:38 PM Bibo Mao <maobibo@loongson.cn> wrote:
>
> Steal time feature is added here in kvm side, VM can search supported
> features provided by KVM hypervisor, feature KVM_FEATURE_STEAL_TIME
> is added here. Like x86, steal time structure is saved in guest memory,
> one hypercall function KVM_HCALL_FUNC_NOTIFY is added to notify KVM to
> enable the feature.
>
> One cpu attr ioctl command KVM_LOONGARCH_VCPU_PVTIME_CTRL is added to
> save and restore base address of steal time structure when VM is migrated.
>
> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
> ---
>  arch/loongarch/include/asm/kvm_host.h  |   7 ++
>  arch/loongarch/include/asm/kvm_para.h  |  10 ++
>  arch/loongarch/include/asm/kvm_vcpu.h  |   4 +
>  arch/loongarch/include/asm/loongarch.h |   1 +
>  arch/loongarch/include/uapi/asm/kvm.h  |   4 +
>  arch/loongarch/kvm/Kconfig             |   1 +
>  arch/loongarch/kvm/exit.c              |  38 +++++++-
>  arch/loongarch/kvm/vcpu.c              | 124 +++++++++++++++++++++++++
>  8 files changed, 187 insertions(+), 2 deletions(-)
>
> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
> index c87b6ea0ec47..2eb2f7572023 100644
> --- a/arch/loongarch/include/asm/kvm_host.h
> +++ b/arch/loongarch/include/asm/kvm_host.h
> @@ -30,6 +30,7 @@
>  #define KVM_PRIVATE_MEM_SLOTS          0
>
>  #define KVM_HALT_POLL_NS_DEFAULT       500000
> +#define KVM_REQ_STEAL_UPDATE           KVM_ARCH_REQ(1)
>
>  #define KVM_GUESTDBG_SW_BP_MASK                \
>         (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
> @@ -201,6 +202,12 @@ struct kvm_vcpu_arch {
>         struct kvm_mp_state mp_state;
>         /* cpucfg */
>         u32 cpucfg[KVM_MAX_CPUCFG_REGS];
> +       /* paravirt steal time */
> +       struct {
> +               u64 guest_addr;
> +               u64 last_steal;
> +               struct gfn_to_hva_cache cache;
> +       } st;
>  };
>
>  static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg)
> diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
> index 4ba2312e5f8c..a9ba8185d4af 100644
> --- a/arch/loongarch/include/asm/kvm_para.h
> +++ b/arch/loongarch/include/asm/kvm_para.h
> @@ -14,6 +14,7 @@
>
>  #define KVM_HCALL_SERVICE              HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SERVICE)
>  #define  KVM_HCALL_FUNC_IPI            1
> +#define  KVM_HCALL_FUNC_NOTIFY         2
>
>  #define KVM_HCALL_SWDBG                        HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG)
>
> @@ -24,6 +25,15 @@
>  #define KVM_HCALL_INVALID_CODE         -1UL
>  #define KVM_HCALL_INVALID_PARAMETER    -2UL
>
> +#define KVM_STEAL_PHYS_VALID           BIT_ULL(0)
> +#define KVM_STEAL_PHYS_MASK            GENMASK_ULL(63, 6)
> +struct kvm_steal_time {
> +       __u64 steal;
> +       __u32 version;
> +       __u32 flags;
I found that x86 has a preempted field here, in our internal repo the
LoongArch version also has this field. Moreover,
kvm_steal_time_set_preempted() and kvm_steal_time_clear_preempted()
seems needed.

> +       __u32 pad[12];
> +};
> +
>  /*
>   * Hypercall interface for KVM hypervisor
>   *
> diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
> index 590a92cb5416..d7e51300a89f 100644
> --- a/arch/loongarch/include/asm/kvm_vcpu.h
> +++ b/arch/loongarch/include/asm/kvm_vcpu.h
> @@ -120,4 +120,8 @@ static inline void kvm_write_reg(struct kvm_vcpu *vcpu, int num, unsigned long v
>         vcpu->arch.gprs[num] = val;
>  }
>
> +static inline bool kvm_pvtime_supported(void)
> +{
> +       return !!sched_info_on();
> +}
>  #endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */
> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
> index eb09adda54b7..7a4633ef284b 100644
> --- a/arch/loongarch/include/asm/loongarch.h
> +++ b/arch/loongarch/include/asm/loongarch.h
> @@ -169,6 +169,7 @@
>  #define  KVM_SIGNATURE                 "KVM\0"
>  #define CPUCFG_KVM_FEATURE             (CPUCFG_KVM_BASE + 4)
>  #define  KVM_FEATURE_IPI               BIT(1)
> +#define  KVM_FEATURE_STEAL_TIME                BIT(2)
>
>  #ifndef __ASSEMBLY__
>
> diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h
> index f9abef382317..ddc5cab0ffd0 100644
> --- a/arch/loongarch/include/uapi/asm/kvm.h
> +++ b/arch/loongarch/include/uapi/asm/kvm.h
> @@ -81,7 +81,11 @@ struct kvm_fpu {
>  #define LOONGARCH_REG_64(TYPE, REG)    (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT))
>  #define KVM_IOC_CSRID(REG)             LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG)
>  #define KVM_IOC_CPUCFG(REG)            LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
> +
> +/* Device Control API on vcpu fd */
>  #define KVM_LOONGARCH_VCPU_CPUCFG      0
> +#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1
> +#define  KVM_LOONGARCH_VCPU_PVTIME_GPA 0
>
>  struct kvm_debug_exit_arch {
>  };
> diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig
> index c4ef2b4d9797..248744b4d086 100644
> --- a/arch/loongarch/kvm/Kconfig
> +++ b/arch/loongarch/kvm/Kconfig
> @@ -29,6 +29,7 @@ config KVM
>         select KVM_MMIO
>         select HAVE_KVM_READONLY_MEM
>         select KVM_XFER_TO_GUEST_WORK
> +       select SCHED_INFO
>         help
>           Support hosting virtualized guest machines using
>           hardware virtualization extensions. You will need
> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
> index c86e099af5ca..e2abd97fb13f 100644
> --- a/arch/loongarch/kvm/exit.c
> +++ b/arch/loongarch/kvm/exit.c
> @@ -24,7 +24,7 @@
>  static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
>  {
>         int rd, rj;
> -       unsigned int index;
> +       unsigned int index, ret;
>
>         if (inst.reg2_format.opcode != cpucfg_op)
>                 return EMULATE_FAIL;
> @@ -50,7 +50,10 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
>                 vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
>                 break;
>         case CPUCFG_KVM_FEATURE:
> -               vcpu->arch.gprs[rd] = KVM_FEATURE_IPI;
> +               ret = KVM_FEATURE_IPI;
> +               if (sched_info_on())
What about replacing it with your helper function kvm_pvtime_supported()?

Huacai

> +                       ret |= KVM_FEATURE_STEAL_TIME;
> +               vcpu->arch.gprs[rd] = ret;
>                 break;
>         default:
>                 vcpu->arch.gprs[rd] = 0;
> @@ -687,6 +690,34 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
>         return RESUME_GUEST;
>  }
>
> +static long kvm_save_notify(struct kvm_vcpu *vcpu)
> +{
> +       unsigned long id, data;
> +
> +       id   = kvm_read_reg(vcpu, LOONGARCH_GPR_A1);
> +       data = kvm_read_reg(vcpu, LOONGARCH_GPR_A2);
> +       switch (id) {
> +       case KVM_FEATURE_STEAL_TIME:
> +               if (!kvm_pvtime_supported())
> +                       return KVM_HCALL_INVALID_CODE;
> +
> +               if (data & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
> +                       return KVM_HCALL_INVALID_PARAMETER;
> +
> +               vcpu->arch.st.guest_addr = data;
> +               if (!(data & KVM_STEAL_PHYS_VALID))
> +                       break;
> +
> +               vcpu->arch.st.last_steal = current->sched_info.run_delay;
> +               kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
> +               break;
> +       default:
> +               break;
> +       };
> +
> +       return 0;
> +};
> +
>  /*
>   * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root.
>   * @vcpu:      Virtual CPU context.
> @@ -758,6 +789,9 @@ static void kvm_handle_service(struct kvm_vcpu *vcpu)
>                 kvm_send_pv_ipi(vcpu);
>                 ret = KVM_HCALL_SUCCESS;
>                 break;
> +       case KVM_HCALL_FUNC_NOTIFY:
> +               ret = kvm_save_notify(vcpu);
> +               break;
>         default:
>                 ret = KVM_HCALL_INVALID_CODE;
>                 break;
> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
> index 9e8030d45129..382796f1d3e6 100644
> --- a/arch/loongarch/kvm/vcpu.c
> +++ b/arch/loongarch/kvm/vcpu.c
> @@ -31,6 +31,117 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
>                        sizeof(kvm_vcpu_stats_desc),
>  };
>
> +static void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
> +{
> +       struct kvm_steal_time __user *st;
> +       struct gfn_to_hva_cache *ghc;
> +       struct kvm_memslots *slots;
> +       gpa_t gpa;
> +       u64 steal;
> +       u32 version;
> +
> +       ghc = &vcpu->arch.st.cache;
> +       gpa = vcpu->arch.st.guest_addr;
> +       if (!(gpa & KVM_STEAL_PHYS_VALID))
> +               return;
> +
> +       gpa &= KVM_STEAL_PHYS_MASK;
> +       slots = kvm_memslots(vcpu->kvm);
> +       if (slots->generation != ghc->generation || gpa != ghc->gpa) {
> +               if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa,
> +                                       sizeof(*st))) {
> +                       ghc->gpa = INVALID_GPA;
> +                       return;
> +               }
> +       }
> +
> +       st = (struct kvm_steal_time __user *)ghc->hva;
> +       unsafe_get_user(version, &st->version, out);
> +       if (version & 1)
> +               version += 1;
> +       version += 1;
> +       unsafe_put_user(version, &st->version, out);
> +       smp_wmb();
> +
> +       unsafe_get_user(steal, &st->steal, out);
> +       steal += current->sched_info.run_delay -
> +               vcpu->arch.st.last_steal;
> +       vcpu->arch.st.last_steal = current->sched_info.run_delay;
> +       unsafe_put_user(steal, &st->steal, out);
> +
> +       smp_wmb();
> +       version += 1;
> +       unsafe_put_user(version, &st->version, out);
> +out:
> +       mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
> +}
> +
> +static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu,
> +                                       struct kvm_device_attr *attr)
> +{
> +       if (!kvm_pvtime_supported() ||
> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
> +               return -ENXIO;
> +
> +       return 0;
> +}
> +
> +static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu,
> +                                       struct kvm_device_attr *attr)
> +{
> +       u64 __user *user = (u64 __user *)attr->addr;
> +       u64 gpa;
> +
> +       if (!kvm_pvtime_supported() ||
> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
> +               return -ENXIO;
> +
> +       gpa = vcpu->arch.st.guest_addr;
> +       if (put_user(gpa, user))
> +               return -EFAULT;
> +
> +       return 0;
> +}
> +
> +static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu,
> +                                       struct kvm_device_attr *attr)
> +{
> +       u64 __user *user = (u64 __user *)attr->addr;
> +       struct kvm *kvm = vcpu->kvm;
> +       u64 gpa;
> +       int ret = 0;
> +       int idx;
> +
> +       if (!kvm_pvtime_supported() ||
> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
> +               return -ENXIO;
> +
> +       if (get_user(gpa, user))
> +               return -EFAULT;
> +
> +       if (gpa & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
> +               return -EINVAL;
> +
> +       if (!(gpa & KVM_STEAL_PHYS_VALID)) {
> +               vcpu->arch.st.guest_addr = gpa;
> +               return 0;
> +       }
> +
> +       /* Check the address is in a valid memslot */
> +       idx = srcu_read_lock(&kvm->srcu);
> +       if (kvm_is_error_hva(gfn_to_hva(kvm, gpa >> PAGE_SHIFT)))
> +               ret = -EINVAL;
> +       srcu_read_unlock(&kvm->srcu, idx);
> +
> +       if (!ret) {
> +               vcpu->arch.st.guest_addr = gpa;
> +               vcpu->arch.st.last_steal = current->sched_info.run_delay;
> +               kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
> +       }
> +
> +       return ret;
> +}
> +
>  /*
>   * kvm_check_requests - check and handle pending vCPU requests
>   *
> @@ -48,6 +159,9 @@ static int kvm_check_requests(struct kvm_vcpu *vcpu)
>         if (kvm_dirty_ring_check_request(vcpu))
>                 return RESUME_HOST;
>
> +       if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
> +               kvm_update_stolen_time(vcpu);
> +
>         return RESUME_GUEST;
>  }
>
> @@ -671,6 +785,9 @@ static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu,
>         case KVM_LOONGARCH_VCPU_CPUCFG:
>                 ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr);
>                 break;
> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
> +               ret = kvm_loongarch_pvtime_has_attr(vcpu, attr);
> +               break;
>         default:
>                 break;
>         }
> @@ -703,6 +820,9 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu,
>         case KVM_LOONGARCH_VCPU_CPUCFG:
>                 ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr);
>                 break;
> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
> +               ret = kvm_loongarch_pvtime_get_attr(vcpu, attr);
> +               break;
>         default:
>                 break;
>         }
> @@ -725,6 +845,9 @@ static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu,
>         case KVM_LOONGARCH_VCPU_CPUCFG:
>                 ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr);
>                 break;
> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
> +               ret = kvm_loongarch_pvtime_set_attr(vcpu, attr);
> +               break;
>         default:
>                 break;
>         }
> @@ -1084,6 +1207,7 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>
>         /* Control guest page CCA attribute */
>         change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT);
> +       kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
>
>         /* Don't bother restoring registers multiple times unless necessary */
>         if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE)
> --
> 2.39.3
>
Bibo Mao July 6, 2024, 6:59 a.m. UTC | #2
Huacai,

On 2024/7/6 上午11:00, Huacai Chen wrote:
> Hi, Bibo,
> 
> On Fri, May 24, 2024 at 3:38 PM Bibo Mao <maobibo@loongson.cn> wrote:
>>
>> Steal time feature is added here in kvm side, VM can search supported
>> features provided by KVM hypervisor, feature KVM_FEATURE_STEAL_TIME
>> is added here. Like x86, steal time structure is saved in guest memory,
>> one hypercall function KVM_HCALL_FUNC_NOTIFY is added to notify KVM to
>> enable the feature.
>>
>> One cpu attr ioctl command KVM_LOONGARCH_VCPU_PVTIME_CTRL is added to
>> save and restore base address of steal time structure when VM is migrated.
>>
>> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
>> ---
>>   arch/loongarch/include/asm/kvm_host.h  |   7 ++
>>   arch/loongarch/include/asm/kvm_para.h  |  10 ++
>>   arch/loongarch/include/asm/kvm_vcpu.h  |   4 +
>>   arch/loongarch/include/asm/loongarch.h |   1 +
>>   arch/loongarch/include/uapi/asm/kvm.h  |   4 +
>>   arch/loongarch/kvm/Kconfig             |   1 +
>>   arch/loongarch/kvm/exit.c              |  38 +++++++-
>>   arch/loongarch/kvm/vcpu.c              | 124 +++++++++++++++++++++++++
>>   8 files changed, 187 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
>> index c87b6ea0ec47..2eb2f7572023 100644
>> --- a/arch/loongarch/include/asm/kvm_host.h
>> +++ b/arch/loongarch/include/asm/kvm_host.h
>> @@ -30,6 +30,7 @@
>>   #define KVM_PRIVATE_MEM_SLOTS          0
>>
>>   #define KVM_HALT_POLL_NS_DEFAULT       500000
>> +#define KVM_REQ_STEAL_UPDATE           KVM_ARCH_REQ(1)
>>
>>   #define KVM_GUESTDBG_SW_BP_MASK                \
>>          (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
>> @@ -201,6 +202,12 @@ struct kvm_vcpu_arch {
>>          struct kvm_mp_state mp_state;
>>          /* cpucfg */
>>          u32 cpucfg[KVM_MAX_CPUCFG_REGS];
>> +       /* paravirt steal time */
>> +       struct {
>> +               u64 guest_addr;
>> +               u64 last_steal;
>> +               struct gfn_to_hva_cache cache;
>> +       } st;
>>   };
>>
>>   static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg)
>> diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
>> index 4ba2312e5f8c..a9ba8185d4af 100644
>> --- a/arch/loongarch/include/asm/kvm_para.h
>> +++ b/arch/loongarch/include/asm/kvm_para.h
>> @@ -14,6 +14,7 @@
>>
>>   #define KVM_HCALL_SERVICE              HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SERVICE)
>>   #define  KVM_HCALL_FUNC_IPI            1
>> +#define  KVM_HCALL_FUNC_NOTIFY         2
>>
>>   #define KVM_HCALL_SWDBG                        HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG)
>>
>> @@ -24,6 +25,15 @@
>>   #define KVM_HCALL_INVALID_CODE         -1UL
>>   #define KVM_HCALL_INVALID_PARAMETER    -2UL
>>
>> +#define KVM_STEAL_PHYS_VALID           BIT_ULL(0)
>> +#define KVM_STEAL_PHYS_MASK            GENMASK_ULL(63, 6)
>> +struct kvm_steal_time {
>> +       __u64 steal;
>> +       __u32 version;
>> +       __u32 flags;
> I found that x86 has a preempted field here, in our internal repo the
> LoongArch version also has this field. Moreover,
> kvm_steal_time_set_preempted() and kvm_steal_time_clear_preempted()
> seems needed.
By my understanding, macro vcpu_is_preempted() is used together with pv 
spinlock, and pv spinlock depends on pv stealtime. So I think preempted 
flag is not part of pv stealtime, it is part of pv spinlock.

We are going to add preempted field if pv spinlock is added.
> 
>> +       __u32 pad[12];
>> +};
>> +
>>   /*
>>    * Hypercall interface for KVM hypervisor
>>    *
>> diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
>> index 590a92cb5416..d7e51300a89f 100644
>> --- a/arch/loongarch/include/asm/kvm_vcpu.h
>> +++ b/arch/loongarch/include/asm/kvm_vcpu.h
>> @@ -120,4 +120,8 @@ static inline void kvm_write_reg(struct kvm_vcpu *vcpu, int num, unsigned long v
>>          vcpu->arch.gprs[num] = val;
>>   }
>>
>> +static inline bool kvm_pvtime_supported(void)
>> +{
>> +       return !!sched_info_on();
>> +}
>>   #endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */
>> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
>> index eb09adda54b7..7a4633ef284b 100644
>> --- a/arch/loongarch/include/asm/loongarch.h
>> +++ b/arch/loongarch/include/asm/loongarch.h
>> @@ -169,6 +169,7 @@
>>   #define  KVM_SIGNATURE                 "KVM\0"
>>   #define CPUCFG_KVM_FEATURE             (CPUCFG_KVM_BASE + 4)
>>   #define  KVM_FEATURE_IPI               BIT(1)
>> +#define  KVM_FEATURE_STEAL_TIME                BIT(2)
>>
>>   #ifndef __ASSEMBLY__
>>
>> diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h
>> index f9abef382317..ddc5cab0ffd0 100644
>> --- a/arch/loongarch/include/uapi/asm/kvm.h
>> +++ b/arch/loongarch/include/uapi/asm/kvm.h
>> @@ -81,7 +81,11 @@ struct kvm_fpu {
>>   #define LOONGARCH_REG_64(TYPE, REG)    (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT))
>>   #define KVM_IOC_CSRID(REG)             LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG)
>>   #define KVM_IOC_CPUCFG(REG)            LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
>> +
>> +/* Device Control API on vcpu fd */
>>   #define KVM_LOONGARCH_VCPU_CPUCFG      0
>> +#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1
>> +#define  KVM_LOONGARCH_VCPU_PVTIME_GPA 0
>>
>>   struct kvm_debug_exit_arch {
>>   };
>> diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig
>> index c4ef2b4d9797..248744b4d086 100644
>> --- a/arch/loongarch/kvm/Kconfig
>> +++ b/arch/loongarch/kvm/Kconfig
>> @@ -29,6 +29,7 @@ config KVM
>>          select KVM_MMIO
>>          select HAVE_KVM_READONLY_MEM
>>          select KVM_XFER_TO_GUEST_WORK
>> +       select SCHED_INFO
>>          help
>>            Support hosting virtualized guest machines using
>>            hardware virtualization extensions. You will need
>> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
>> index c86e099af5ca..e2abd97fb13f 100644
>> --- a/arch/loongarch/kvm/exit.c
>> +++ b/arch/loongarch/kvm/exit.c
>> @@ -24,7 +24,7 @@
>>   static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
>>   {
>>          int rd, rj;
>> -       unsigned int index;
>> +       unsigned int index, ret;
>>
>>          if (inst.reg2_format.opcode != cpucfg_op)
>>                  return EMULATE_FAIL;
>> @@ -50,7 +50,10 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
>>                  vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
>>                  break;
>>          case CPUCFG_KVM_FEATURE:
>> -               vcpu->arch.gprs[rd] = KVM_FEATURE_IPI;
>> +               ret = KVM_FEATURE_IPI;
>> +               if (sched_info_on())
> What about replacing it with your helper function kvm_pvtime_supported()?
Sure, will replace it with helper function kvm_pvtime_supported().

Regards
Bibo Mao
> 
> Huacai
> 
>> +                       ret |= KVM_FEATURE_STEAL_TIME;
>> +               vcpu->arch.gprs[rd] = ret;
>>                  break;
>>          default:
>>                  vcpu->arch.gprs[rd] = 0;
>> @@ -687,6 +690,34 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
>>          return RESUME_GUEST;
>>   }
>>
>> +static long kvm_save_notify(struct kvm_vcpu *vcpu)
>> +{
>> +       unsigned long id, data;
>> +
>> +       id   = kvm_read_reg(vcpu, LOONGARCH_GPR_A1);
>> +       data = kvm_read_reg(vcpu, LOONGARCH_GPR_A2);
>> +       switch (id) {
>> +       case KVM_FEATURE_STEAL_TIME:
>> +               if (!kvm_pvtime_supported())
>> +                       return KVM_HCALL_INVALID_CODE;
>> +
>> +               if (data & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
>> +                       return KVM_HCALL_INVALID_PARAMETER;
>> +
>> +               vcpu->arch.st.guest_addr = data;
>> +               if (!(data & KVM_STEAL_PHYS_VALID))
>> +                       break;
>> +
>> +               vcpu->arch.st.last_steal = current->sched_info.run_delay;
>> +               kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
>> +               break;
>> +       default:
>> +               break;
>> +       };
>> +
>> +       return 0;
>> +};
>> +
>>   /*
>>    * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root.
>>    * @vcpu:      Virtual CPU context.
>> @@ -758,6 +789,9 @@ static void kvm_handle_service(struct kvm_vcpu *vcpu)
>>                  kvm_send_pv_ipi(vcpu);
>>                  ret = KVM_HCALL_SUCCESS;
>>                  break;
>> +       case KVM_HCALL_FUNC_NOTIFY:
>> +               ret = kvm_save_notify(vcpu);
>> +               break;
>>          default:
>>                  ret = KVM_HCALL_INVALID_CODE;
>>                  break;
>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
>> index 9e8030d45129..382796f1d3e6 100644
>> --- a/arch/loongarch/kvm/vcpu.c
>> +++ b/arch/loongarch/kvm/vcpu.c
>> @@ -31,6 +31,117 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
>>                         sizeof(kvm_vcpu_stats_desc),
>>   };
>>
>> +static void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
>> +{
>> +       struct kvm_steal_time __user *st;
>> +       struct gfn_to_hva_cache *ghc;
>> +       struct kvm_memslots *slots;
>> +       gpa_t gpa;
>> +       u64 steal;
>> +       u32 version;
>> +
>> +       ghc = &vcpu->arch.st.cache;
>> +       gpa = vcpu->arch.st.guest_addr;
>> +       if (!(gpa & KVM_STEAL_PHYS_VALID))
>> +               return;
>> +
>> +       gpa &= KVM_STEAL_PHYS_MASK;
>> +       slots = kvm_memslots(vcpu->kvm);
>> +       if (slots->generation != ghc->generation || gpa != ghc->gpa) {
>> +               if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa,
>> +                                       sizeof(*st))) {
>> +                       ghc->gpa = INVALID_GPA;
>> +                       return;
>> +               }
>> +       }
>> +
>> +       st = (struct kvm_steal_time __user *)ghc->hva;
>> +       unsafe_get_user(version, &st->version, out);
>> +       if (version & 1)
>> +               version += 1;
>> +       version += 1;
>> +       unsafe_put_user(version, &st->version, out);
>> +       smp_wmb();
>> +
>> +       unsafe_get_user(steal, &st->steal, out);
>> +       steal += current->sched_info.run_delay -
>> +               vcpu->arch.st.last_steal;
>> +       vcpu->arch.st.last_steal = current->sched_info.run_delay;
>> +       unsafe_put_user(steal, &st->steal, out);
>> +
>> +       smp_wmb();
>> +       version += 1;
>> +       unsafe_put_user(version, &st->version, out);
>> +out:
>> +       mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
>> +}
>> +
>> +static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu,
>> +                                       struct kvm_device_attr *attr)
>> +{
>> +       if (!kvm_pvtime_supported() ||
>> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
>> +               return -ENXIO;
>> +
>> +       return 0;
>> +}
>> +
>> +static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu,
>> +                                       struct kvm_device_attr *attr)
>> +{
>> +       u64 __user *user = (u64 __user *)attr->addr;
>> +       u64 gpa;
>> +
>> +       if (!kvm_pvtime_supported() ||
>> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
>> +               return -ENXIO;
>> +
>> +       gpa = vcpu->arch.st.guest_addr;
>> +       if (put_user(gpa, user))
>> +               return -EFAULT;
>> +
>> +       return 0;
>> +}
>> +
>> +static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu,
>> +                                       struct kvm_device_attr *attr)
>> +{
>> +       u64 __user *user = (u64 __user *)attr->addr;
>> +       struct kvm *kvm = vcpu->kvm;
>> +       u64 gpa;
>> +       int ret = 0;
>> +       int idx;
>> +
>> +       if (!kvm_pvtime_supported() ||
>> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
>> +               return -ENXIO;
>> +
>> +       if (get_user(gpa, user))
>> +               return -EFAULT;
>> +
>> +       if (gpa & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
>> +               return -EINVAL;
>> +
>> +       if (!(gpa & KVM_STEAL_PHYS_VALID)) {
>> +               vcpu->arch.st.guest_addr = gpa;
>> +               return 0;
>> +       }
>> +
>> +       /* Check the address is in a valid memslot */
>> +       idx = srcu_read_lock(&kvm->srcu);
>> +       if (kvm_is_error_hva(gfn_to_hva(kvm, gpa >> PAGE_SHIFT)))
>> +               ret = -EINVAL;
>> +       srcu_read_unlock(&kvm->srcu, idx);
>> +
>> +       if (!ret) {
>> +               vcpu->arch.st.guest_addr = gpa;
>> +               vcpu->arch.st.last_steal = current->sched_info.run_delay;
>> +               kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
>> +       }
>> +
>> +       return ret;
>> +}
>> +
>>   /*
>>    * kvm_check_requests - check and handle pending vCPU requests
>>    *
>> @@ -48,6 +159,9 @@ static int kvm_check_requests(struct kvm_vcpu *vcpu)
>>          if (kvm_dirty_ring_check_request(vcpu))
>>                  return RESUME_HOST;
>>
>> +       if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
>> +               kvm_update_stolen_time(vcpu);
>> +
>>          return RESUME_GUEST;
>>   }
>>
>> @@ -671,6 +785,9 @@ static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu,
>>          case KVM_LOONGARCH_VCPU_CPUCFG:
>>                  ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr);
>>                  break;
>> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
>> +               ret = kvm_loongarch_pvtime_has_attr(vcpu, attr);
>> +               break;
>>          default:
>>                  break;
>>          }
>> @@ -703,6 +820,9 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu,
>>          case KVM_LOONGARCH_VCPU_CPUCFG:
>>                  ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr);
>>                  break;
>> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
>> +               ret = kvm_loongarch_pvtime_get_attr(vcpu, attr);
>> +               break;
>>          default:
>>                  break;
>>          }
>> @@ -725,6 +845,9 @@ static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu,
>>          case KVM_LOONGARCH_VCPU_CPUCFG:
>>                  ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr);
>>                  break;
>> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
>> +               ret = kvm_loongarch_pvtime_set_attr(vcpu, attr);
>> +               break;
>>          default:
>>                  break;
>>          }
>> @@ -1084,6 +1207,7 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>
>>          /* Control guest page CCA attribute */
>>          change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT);
>> +       kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
>>
>>          /* Don't bother restoring registers multiple times unless necessary */
>>          if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE)
>> --
>> 2.39.3
>>
Huacai Chen July 6, 2024, 9:41 a.m. UTC | #3
On Sat, Jul 6, 2024 at 2:59 PM maobibo <maobibo@loongson.cn> wrote:
>
> Huacai,
>
> On 2024/7/6 上午11:00, Huacai Chen wrote:
> > Hi, Bibo,
> >
> > On Fri, May 24, 2024 at 3:38 PM Bibo Mao <maobibo@loongson.cn> wrote:
> >>
> >> Steal time feature is added here in kvm side, VM can search supported
> >> features provided by KVM hypervisor, feature KVM_FEATURE_STEAL_TIME
> >> is added here. Like x86, steal time structure is saved in guest memory,
> >> one hypercall function KVM_HCALL_FUNC_NOTIFY is added to notify KVM to
> >> enable the feature.
> >>
> >> One cpu attr ioctl command KVM_LOONGARCH_VCPU_PVTIME_CTRL is added to
> >> save and restore base address of steal time structure when VM is migrated.
> >>
> >> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
> >> ---
> >>   arch/loongarch/include/asm/kvm_host.h  |   7 ++
> >>   arch/loongarch/include/asm/kvm_para.h  |  10 ++
> >>   arch/loongarch/include/asm/kvm_vcpu.h  |   4 +
> >>   arch/loongarch/include/asm/loongarch.h |   1 +
> >>   arch/loongarch/include/uapi/asm/kvm.h  |   4 +
> >>   arch/loongarch/kvm/Kconfig             |   1 +
> >>   arch/loongarch/kvm/exit.c              |  38 +++++++-
> >>   arch/loongarch/kvm/vcpu.c              | 124 +++++++++++++++++++++++++
> >>   8 files changed, 187 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
> >> index c87b6ea0ec47..2eb2f7572023 100644
> >> --- a/arch/loongarch/include/asm/kvm_host.h
> >> +++ b/arch/loongarch/include/asm/kvm_host.h
> >> @@ -30,6 +30,7 @@
> >>   #define KVM_PRIVATE_MEM_SLOTS          0
> >>
> >>   #define KVM_HALT_POLL_NS_DEFAULT       500000
> >> +#define KVM_REQ_STEAL_UPDATE           KVM_ARCH_REQ(1)
> >>
> >>   #define KVM_GUESTDBG_SW_BP_MASK                \
> >>          (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
> >> @@ -201,6 +202,12 @@ struct kvm_vcpu_arch {
> >>          struct kvm_mp_state mp_state;
> >>          /* cpucfg */
> >>          u32 cpucfg[KVM_MAX_CPUCFG_REGS];
> >> +       /* paravirt steal time */
> >> +       struct {
> >> +               u64 guest_addr;
> >> +               u64 last_steal;
> >> +               struct gfn_to_hva_cache cache;
> >> +       } st;
> >>   };
> >>
> >>   static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg)
> >> diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
> >> index 4ba2312e5f8c..a9ba8185d4af 100644
> >> --- a/arch/loongarch/include/asm/kvm_para.h
> >> +++ b/arch/loongarch/include/asm/kvm_para.h
> >> @@ -14,6 +14,7 @@
> >>
> >>   #define KVM_HCALL_SERVICE              HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SERVICE)
> >>   #define  KVM_HCALL_FUNC_IPI            1
> >> +#define  KVM_HCALL_FUNC_NOTIFY         2
> >>
> >>   #define KVM_HCALL_SWDBG                        HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG)
> >>
> >> @@ -24,6 +25,15 @@
> >>   #define KVM_HCALL_INVALID_CODE         -1UL
> >>   #define KVM_HCALL_INVALID_PARAMETER    -2UL
> >>
> >> +#define KVM_STEAL_PHYS_VALID           BIT_ULL(0)
> >> +#define KVM_STEAL_PHYS_MASK            GENMASK_ULL(63, 6)
> >> +struct kvm_steal_time {
> >> +       __u64 steal;
> >> +       __u32 version;
> >> +       __u32 flags;
> > I found that x86 has a preempted field here, in our internal repo the
> > LoongArch version also has this field. Moreover,
> > kvm_steal_time_set_preempted() and kvm_steal_time_clear_preempted()
> > seems needed.
> By my understanding, macro vcpu_is_preempted() is used together with pv
> spinlock, and pv spinlock depends on pv stealtime. So I think preempted
> flag is not part of pv stealtime, it is part of pv spinlock.
>
> We are going to add preempted field if pv spinlock is added.
> >
> >> +       __u32 pad[12];
> >> +};
> >> +
> >>   /*
> >>    * Hypercall interface for KVM hypervisor
> >>    *
> >> diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
> >> index 590a92cb5416..d7e51300a89f 100644
> >> --- a/arch/loongarch/include/asm/kvm_vcpu.h
> >> +++ b/arch/loongarch/include/asm/kvm_vcpu.h
> >> @@ -120,4 +120,8 @@ static inline void kvm_write_reg(struct kvm_vcpu *vcpu, int num, unsigned long v
> >>          vcpu->arch.gprs[num] = val;
> >>   }
> >>
> >> +static inline bool kvm_pvtime_supported(void)
> >> +{
> >> +       return !!sched_info_on();
> >> +}
> >>   #endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */
> >> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
> >> index eb09adda54b7..7a4633ef284b 100644
> >> --- a/arch/loongarch/include/asm/loongarch.h
> >> +++ b/arch/loongarch/include/asm/loongarch.h
> >> @@ -169,6 +169,7 @@
> >>   #define  KVM_SIGNATURE                 "KVM\0"
> >>   #define CPUCFG_KVM_FEATURE             (CPUCFG_KVM_BASE + 4)
> >>   #define  KVM_FEATURE_IPI               BIT(1)
> >> +#define  KVM_FEATURE_STEAL_TIME                BIT(2)
> >>
> >>   #ifndef __ASSEMBLY__
> >>
> >> diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h
> >> index f9abef382317..ddc5cab0ffd0 100644
> >> --- a/arch/loongarch/include/uapi/asm/kvm.h
> >> +++ b/arch/loongarch/include/uapi/asm/kvm.h
> >> @@ -81,7 +81,11 @@ struct kvm_fpu {
> >>   #define LOONGARCH_REG_64(TYPE, REG)    (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT))
> >>   #define KVM_IOC_CSRID(REG)             LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG)
> >>   #define KVM_IOC_CPUCFG(REG)            LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
> >> +
> >> +/* Device Control API on vcpu fd */
> >>   #define KVM_LOONGARCH_VCPU_CPUCFG      0
> >> +#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1
> >> +#define  KVM_LOONGARCH_VCPU_PVTIME_GPA 0
> >>
> >>   struct kvm_debug_exit_arch {
> >>   };
> >> diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig
> >> index c4ef2b4d9797..248744b4d086 100644
> >> --- a/arch/loongarch/kvm/Kconfig
> >> +++ b/arch/loongarch/kvm/Kconfig
> >> @@ -29,6 +29,7 @@ config KVM
> >>          select KVM_MMIO
> >>          select HAVE_KVM_READONLY_MEM
> >>          select KVM_XFER_TO_GUEST_WORK
> >> +       select SCHED_INFO
> >>          help
> >>            Support hosting virtualized guest machines using
> >>            hardware virtualization extensions. You will need
> >> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
> >> index c86e099af5ca..e2abd97fb13f 100644
> >> --- a/arch/loongarch/kvm/exit.c
> >> +++ b/arch/loongarch/kvm/exit.c
> >> @@ -24,7 +24,7 @@
> >>   static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
> >>   {
> >>          int rd, rj;
> >> -       unsigned int index;
> >> +       unsigned int index, ret;
> >>
> >>          if (inst.reg2_format.opcode != cpucfg_op)
> >>                  return EMULATE_FAIL;
> >> @@ -50,7 +50,10 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
> >>                  vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
> >>                  break;
> >>          case CPUCFG_KVM_FEATURE:
> >> -               vcpu->arch.gprs[rd] = KVM_FEATURE_IPI;
> >> +               ret = KVM_FEATURE_IPI;
> >> +               if (sched_info_on())
> > What about replacing it with your helper function kvm_pvtime_supported()?
> Sure, will replace it with helper function kvm_pvtime_supported().
If you are sure this is the only issue, then needn't submit a new version.

Huacai

>
> Regards
> Bibo Mao
> >
> > Huacai
> >
> >> +                       ret |= KVM_FEATURE_STEAL_TIME;
> >> +               vcpu->arch.gprs[rd] = ret;
> >>                  break;
> >>          default:
> >>                  vcpu->arch.gprs[rd] = 0;
> >> @@ -687,6 +690,34 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
> >>          return RESUME_GUEST;
> >>   }
> >>
> >> +static long kvm_save_notify(struct kvm_vcpu *vcpu)
> >> +{
> >> +       unsigned long id, data;
> >> +
> >> +       id   = kvm_read_reg(vcpu, LOONGARCH_GPR_A1);
> >> +       data = kvm_read_reg(vcpu, LOONGARCH_GPR_A2);
> >> +       switch (id) {
> >> +       case KVM_FEATURE_STEAL_TIME:
> >> +               if (!kvm_pvtime_supported())
> >> +                       return KVM_HCALL_INVALID_CODE;
> >> +
> >> +               if (data & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
> >> +                       return KVM_HCALL_INVALID_PARAMETER;
> >> +
> >> +               vcpu->arch.st.guest_addr = data;
> >> +               if (!(data & KVM_STEAL_PHYS_VALID))
> >> +                       break;
> >> +
> >> +               vcpu->arch.st.last_steal = current->sched_info.run_delay;
> >> +               kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
> >> +               break;
> >> +       default:
> >> +               break;
> >> +       };
> >> +
> >> +       return 0;
> >> +};
> >> +
> >>   /*
> >>    * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root.
> >>    * @vcpu:      Virtual CPU context.
> >> @@ -758,6 +789,9 @@ static void kvm_handle_service(struct kvm_vcpu *vcpu)
> >>                  kvm_send_pv_ipi(vcpu);
> >>                  ret = KVM_HCALL_SUCCESS;
> >>                  break;
> >> +       case KVM_HCALL_FUNC_NOTIFY:
> >> +               ret = kvm_save_notify(vcpu);
> >> +               break;
> >>          default:
> >>                  ret = KVM_HCALL_INVALID_CODE;
> >>                  break;
> >> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
> >> index 9e8030d45129..382796f1d3e6 100644
> >> --- a/arch/loongarch/kvm/vcpu.c
> >> +++ b/arch/loongarch/kvm/vcpu.c
> >> @@ -31,6 +31,117 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
> >>                         sizeof(kvm_vcpu_stats_desc),
> >>   };
> >>
> >> +static void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
> >> +{
> >> +       struct kvm_steal_time __user *st;
> >> +       struct gfn_to_hva_cache *ghc;
> >> +       struct kvm_memslots *slots;
> >> +       gpa_t gpa;
> >> +       u64 steal;
> >> +       u32 version;
> >> +
> >> +       ghc = &vcpu->arch.st.cache;
> >> +       gpa = vcpu->arch.st.guest_addr;
> >> +       if (!(gpa & KVM_STEAL_PHYS_VALID))
> >> +               return;
> >> +
> >> +       gpa &= KVM_STEAL_PHYS_MASK;
> >> +       slots = kvm_memslots(vcpu->kvm);
> >> +       if (slots->generation != ghc->generation || gpa != ghc->gpa) {
> >> +               if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa,
> >> +                                       sizeof(*st))) {
> >> +                       ghc->gpa = INVALID_GPA;
> >> +                       return;
> >> +               }
> >> +       }
> >> +
> >> +       st = (struct kvm_steal_time __user *)ghc->hva;
> >> +       unsafe_get_user(version, &st->version, out);
> >> +       if (version & 1)
> >> +               version += 1;
> >> +       version += 1;
> >> +       unsafe_put_user(version, &st->version, out);
> >> +       smp_wmb();
> >> +
> >> +       unsafe_get_user(steal, &st->steal, out);
> >> +       steal += current->sched_info.run_delay -
> >> +               vcpu->arch.st.last_steal;
> >> +       vcpu->arch.st.last_steal = current->sched_info.run_delay;
> >> +       unsafe_put_user(steal, &st->steal, out);
> >> +
> >> +       smp_wmb();
> >> +       version += 1;
> >> +       unsafe_put_user(version, &st->version, out);
> >> +out:
> >> +       mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
> >> +}
> >> +
> >> +static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu,
> >> +                                       struct kvm_device_attr *attr)
> >> +{
> >> +       if (!kvm_pvtime_supported() ||
> >> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
> >> +               return -ENXIO;
> >> +
> >> +       return 0;
> >> +}
> >> +
> >> +static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu,
> >> +                                       struct kvm_device_attr *attr)
> >> +{
> >> +       u64 __user *user = (u64 __user *)attr->addr;
> >> +       u64 gpa;
> >> +
> >> +       if (!kvm_pvtime_supported() ||
> >> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
> >> +               return -ENXIO;
> >> +
> >> +       gpa = vcpu->arch.st.guest_addr;
> >> +       if (put_user(gpa, user))
> >> +               return -EFAULT;
> >> +
> >> +       return 0;
> >> +}
> >> +
> >> +static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu,
> >> +                                       struct kvm_device_attr *attr)
> >> +{
> >> +       u64 __user *user = (u64 __user *)attr->addr;
> >> +       struct kvm *kvm = vcpu->kvm;
> >> +       u64 gpa;
> >> +       int ret = 0;
> >> +       int idx;
> >> +
> >> +       if (!kvm_pvtime_supported() ||
> >> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
> >> +               return -ENXIO;
> >> +
> >> +       if (get_user(gpa, user))
> >> +               return -EFAULT;
> >> +
> >> +       if (gpa & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
> >> +               return -EINVAL;
> >> +
> >> +       if (!(gpa & KVM_STEAL_PHYS_VALID)) {
> >> +               vcpu->arch.st.guest_addr = gpa;
> >> +               return 0;
> >> +       }
> >> +
> >> +       /* Check the address is in a valid memslot */
> >> +       idx = srcu_read_lock(&kvm->srcu);
> >> +       if (kvm_is_error_hva(gfn_to_hva(kvm, gpa >> PAGE_SHIFT)))
> >> +               ret = -EINVAL;
> >> +       srcu_read_unlock(&kvm->srcu, idx);
> >> +
> >> +       if (!ret) {
> >> +               vcpu->arch.st.guest_addr = gpa;
> >> +               vcpu->arch.st.last_steal = current->sched_info.run_delay;
> >> +               kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
> >> +       }
> >> +
> >> +       return ret;
> >> +}
> >> +
> >>   /*
> >>    * kvm_check_requests - check and handle pending vCPU requests
> >>    *
> >> @@ -48,6 +159,9 @@ static int kvm_check_requests(struct kvm_vcpu *vcpu)
> >>          if (kvm_dirty_ring_check_request(vcpu))
> >>                  return RESUME_HOST;
> >>
> >> +       if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
> >> +               kvm_update_stolen_time(vcpu);
> >> +
> >>          return RESUME_GUEST;
> >>   }
> >>
> >> @@ -671,6 +785,9 @@ static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu,
> >>          case KVM_LOONGARCH_VCPU_CPUCFG:
> >>                  ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr);
> >>                  break;
> >> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
> >> +               ret = kvm_loongarch_pvtime_has_attr(vcpu, attr);
> >> +               break;
> >>          default:
> >>                  break;
> >>          }
> >> @@ -703,6 +820,9 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu,
> >>          case KVM_LOONGARCH_VCPU_CPUCFG:
> >>                  ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr);
> >>                  break;
> >> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
> >> +               ret = kvm_loongarch_pvtime_get_attr(vcpu, attr);
> >> +               break;
> >>          default:
> >>                  break;
> >>          }
> >> @@ -725,6 +845,9 @@ static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu,
> >>          case KVM_LOONGARCH_VCPU_CPUCFG:
> >>                  ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr);
> >>                  break;
> >> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
> >> +               ret = kvm_loongarch_pvtime_set_attr(vcpu, attr);
> >> +               break;
> >>          default:
> >>                  break;
> >>          }
> >> @@ -1084,6 +1207,7 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> >>
> >>          /* Control guest page CCA attribute */
> >>          change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT);
> >> +       kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
> >>
> >>          /* Don't bother restoring registers multiple times unless necessary */
> >>          if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE)
> >> --
> >> 2.39.3
> >>
>
Bibo Mao July 8, 2024, 1:16 a.m. UTC | #4
On 2024/7/6 下午5:41, Huacai Chen wrote:
> On Sat, Jul 6, 2024 at 2:59 PM maobibo <maobibo@loongson.cn> wrote:
>>
>> Huacai,
>>
>> On 2024/7/6 上午11:00, Huacai Chen wrote:
>>> Hi, Bibo,
>>>
>>> On Fri, May 24, 2024 at 3:38 PM Bibo Mao <maobibo@loongson.cn> wrote:
>>>>
>>>> Steal time feature is added here in kvm side, VM can search supported
>>>> features provided by KVM hypervisor, feature KVM_FEATURE_STEAL_TIME
>>>> is added here. Like x86, steal time structure is saved in guest memory,
>>>> one hypercall function KVM_HCALL_FUNC_NOTIFY is added to notify KVM to
>>>> enable the feature.
>>>>
>>>> One cpu attr ioctl command KVM_LOONGARCH_VCPU_PVTIME_CTRL is added to
>>>> save and restore base address of steal time structure when VM is migrated.
>>>>
>>>> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
>>>> ---
>>>>    arch/loongarch/include/asm/kvm_host.h  |   7 ++
>>>>    arch/loongarch/include/asm/kvm_para.h  |  10 ++
>>>>    arch/loongarch/include/asm/kvm_vcpu.h  |   4 +
>>>>    arch/loongarch/include/asm/loongarch.h |   1 +
>>>>    arch/loongarch/include/uapi/asm/kvm.h  |   4 +
>>>>    arch/loongarch/kvm/Kconfig             |   1 +
>>>>    arch/loongarch/kvm/exit.c              |  38 +++++++-
>>>>    arch/loongarch/kvm/vcpu.c              | 124 +++++++++++++++++++++++++
>>>>    8 files changed, 187 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
>>>> index c87b6ea0ec47..2eb2f7572023 100644
>>>> --- a/arch/loongarch/include/asm/kvm_host.h
>>>> +++ b/arch/loongarch/include/asm/kvm_host.h
>>>> @@ -30,6 +30,7 @@
>>>>    #define KVM_PRIVATE_MEM_SLOTS          0
>>>>
>>>>    #define KVM_HALT_POLL_NS_DEFAULT       500000
>>>> +#define KVM_REQ_STEAL_UPDATE           KVM_ARCH_REQ(1)
>>>>
>>>>    #define KVM_GUESTDBG_SW_BP_MASK                \
>>>>           (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
>>>> @@ -201,6 +202,12 @@ struct kvm_vcpu_arch {
>>>>           struct kvm_mp_state mp_state;
>>>>           /* cpucfg */
>>>>           u32 cpucfg[KVM_MAX_CPUCFG_REGS];
>>>> +       /* paravirt steal time */
>>>> +       struct {
>>>> +               u64 guest_addr;
>>>> +               u64 last_steal;
>>>> +               struct gfn_to_hva_cache cache;
>>>> +       } st;
>>>>    };
>>>>
>>>>    static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg)
>>>> diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
>>>> index 4ba2312e5f8c..a9ba8185d4af 100644
>>>> --- a/arch/loongarch/include/asm/kvm_para.h
>>>> +++ b/arch/loongarch/include/asm/kvm_para.h
>>>> @@ -14,6 +14,7 @@
>>>>
>>>>    #define KVM_HCALL_SERVICE              HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SERVICE)
>>>>    #define  KVM_HCALL_FUNC_IPI            1
>>>> +#define  KVM_HCALL_FUNC_NOTIFY         2
>>>>
>>>>    #define KVM_HCALL_SWDBG                        HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG)
>>>>
>>>> @@ -24,6 +25,15 @@
>>>>    #define KVM_HCALL_INVALID_CODE         -1UL
>>>>    #define KVM_HCALL_INVALID_PARAMETER    -2UL
>>>>
>>>> +#define KVM_STEAL_PHYS_VALID           BIT_ULL(0)
>>>> +#define KVM_STEAL_PHYS_MASK            GENMASK_ULL(63, 6)
>>>> +struct kvm_steal_time {
>>>> +       __u64 steal;
>>>> +       __u32 version;
>>>> +       __u32 flags;
>>> I found that x86 has a preempted field here, in our internal repo the
>>> LoongArch version also has this field. Moreover,
>>> kvm_steal_time_set_preempted() and kvm_steal_time_clear_preempted()
>>> seems needed.
>> By my understanding, macro vcpu_is_preempted() is used together with pv
>> spinlock, and pv spinlock depends on pv stealtime. So I think preempted
>> flag is not part of pv stealtime, it is part of pv spinlock.
>>
>> We are going to add preempted field if pv spinlock is added.
>>>
>>>> +       __u32 pad[12];
>>>> +};
>>>> +
>>>>    /*
>>>>     * Hypercall interface for KVM hypervisor
>>>>     *
>>>> diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
>>>> index 590a92cb5416..d7e51300a89f 100644
>>>> --- a/arch/loongarch/include/asm/kvm_vcpu.h
>>>> +++ b/arch/loongarch/include/asm/kvm_vcpu.h
>>>> @@ -120,4 +120,8 @@ static inline void kvm_write_reg(struct kvm_vcpu *vcpu, int num, unsigned long v
>>>>           vcpu->arch.gprs[num] = val;
>>>>    }
>>>>
>>>> +static inline bool kvm_pvtime_supported(void)
>>>> +{
>>>> +       return !!sched_info_on();
>>>> +}
>>>>    #endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */
>>>> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
>>>> index eb09adda54b7..7a4633ef284b 100644
>>>> --- a/arch/loongarch/include/asm/loongarch.h
>>>> +++ b/arch/loongarch/include/asm/loongarch.h
>>>> @@ -169,6 +169,7 @@
>>>>    #define  KVM_SIGNATURE                 "KVM\0"
>>>>    #define CPUCFG_KVM_FEATURE             (CPUCFG_KVM_BASE + 4)
>>>>    #define  KVM_FEATURE_IPI               BIT(1)
>>>> +#define  KVM_FEATURE_STEAL_TIME                BIT(2)
>>>>
>>>>    #ifndef __ASSEMBLY__
>>>>
>>>> diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h
>>>> index f9abef382317..ddc5cab0ffd0 100644
>>>> --- a/arch/loongarch/include/uapi/asm/kvm.h
>>>> +++ b/arch/loongarch/include/uapi/asm/kvm.h
>>>> @@ -81,7 +81,11 @@ struct kvm_fpu {
>>>>    #define LOONGARCH_REG_64(TYPE, REG)    (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT))
>>>>    #define KVM_IOC_CSRID(REG)             LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG)
>>>>    #define KVM_IOC_CPUCFG(REG)            LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
>>>> +
>>>> +/* Device Control API on vcpu fd */
>>>>    #define KVM_LOONGARCH_VCPU_CPUCFG      0
>>>> +#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1
>>>> +#define  KVM_LOONGARCH_VCPU_PVTIME_GPA 0
>>>>
>>>>    struct kvm_debug_exit_arch {
>>>>    };
>>>> diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig
>>>> index c4ef2b4d9797..248744b4d086 100644
>>>> --- a/arch/loongarch/kvm/Kconfig
>>>> +++ b/arch/loongarch/kvm/Kconfig
>>>> @@ -29,6 +29,7 @@ config KVM
>>>>           select KVM_MMIO
>>>>           select HAVE_KVM_READONLY_MEM
>>>>           select KVM_XFER_TO_GUEST_WORK
>>>> +       select SCHED_INFO
>>>>           help
>>>>             Support hosting virtualized guest machines using
>>>>             hardware virtualization extensions. You will need
>>>> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
>>>> index c86e099af5ca..e2abd97fb13f 100644
>>>> --- a/arch/loongarch/kvm/exit.c
>>>> +++ b/arch/loongarch/kvm/exit.c
>>>> @@ -24,7 +24,7 @@
>>>>    static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
>>>>    {
>>>>           int rd, rj;
>>>> -       unsigned int index;
>>>> +       unsigned int index, ret;
>>>>
>>>>           if (inst.reg2_format.opcode != cpucfg_op)
>>>>                   return EMULATE_FAIL;
>>>> @@ -50,7 +50,10 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
>>>>                   vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
>>>>                   break;
>>>>           case CPUCFG_KVM_FEATURE:
>>>> -               vcpu->arch.gprs[rd] = KVM_FEATURE_IPI;
>>>> +               ret = KVM_FEATURE_IPI;
>>>> +               if (sched_info_on())
>>> What about replacing it with your helper function kvm_pvtime_supported()?
>> Sure, will replace it with helper function kvm_pvtime_supported().
> If you are sure this is the only issue, then needn't submit a new version.
OK, thanks.

By searching orginal submit of vcpu_is_preempt(), it can be located at
https://lore.kernel.org/lkml/1477642287-24104-1-git-send-email-xinhui.pan@linux.vnet.ibm.com/

It is separated one, only that is depends on pv-spinlock and 
pv-stealtime. And there is no capability indicator for guest kernel, it 
is enabled by default.

Regards
Bibo Mao

> 
> Huacai
> 
>>
>> Regards
>> Bibo Mao
>>>
>>> Huacai
>>>
>>>> +                       ret |= KVM_FEATURE_STEAL_TIME;
>>>> +               vcpu->arch.gprs[rd] = ret;
>>>>                   break;
>>>>           default:
>>>>                   vcpu->arch.gprs[rd] = 0;
>>>> @@ -687,6 +690,34 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
>>>>           return RESUME_GUEST;
>>>>    }
>>>>
>>>> +static long kvm_save_notify(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +       unsigned long id, data;
>>>> +
>>>> +       id   = kvm_read_reg(vcpu, LOONGARCH_GPR_A1);
>>>> +       data = kvm_read_reg(vcpu, LOONGARCH_GPR_A2);
>>>> +       switch (id) {
>>>> +       case KVM_FEATURE_STEAL_TIME:
>>>> +               if (!kvm_pvtime_supported())
>>>> +                       return KVM_HCALL_INVALID_CODE;
>>>> +
>>>> +               if (data & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
>>>> +                       return KVM_HCALL_INVALID_PARAMETER;
>>>> +
>>>> +               vcpu->arch.st.guest_addr = data;
>>>> +               if (!(data & KVM_STEAL_PHYS_VALID))
>>>> +                       break;
>>>> +
>>>> +               vcpu->arch.st.last_steal = current->sched_info.run_delay;
>>>> +               kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
>>>> +               break;
>>>> +       default:
>>>> +               break;
>>>> +       };
>>>> +
>>>> +       return 0;
>>>> +};
>>>> +
>>>>    /*
>>>>     * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root.
>>>>     * @vcpu:      Virtual CPU context.
>>>> @@ -758,6 +789,9 @@ static void kvm_handle_service(struct kvm_vcpu *vcpu)
>>>>                   kvm_send_pv_ipi(vcpu);
>>>>                   ret = KVM_HCALL_SUCCESS;
>>>>                   break;
>>>> +       case KVM_HCALL_FUNC_NOTIFY:
>>>> +               ret = kvm_save_notify(vcpu);
>>>> +               break;
>>>>           default:
>>>>                   ret = KVM_HCALL_INVALID_CODE;
>>>>                   break;
>>>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
>>>> index 9e8030d45129..382796f1d3e6 100644
>>>> --- a/arch/loongarch/kvm/vcpu.c
>>>> +++ b/arch/loongarch/kvm/vcpu.c
>>>> @@ -31,6 +31,117 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
>>>>                          sizeof(kvm_vcpu_stats_desc),
>>>>    };
>>>>
>>>> +static void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
>>>> +{
>>>> +       struct kvm_steal_time __user *st;
>>>> +       struct gfn_to_hva_cache *ghc;
>>>> +       struct kvm_memslots *slots;
>>>> +       gpa_t gpa;
>>>> +       u64 steal;
>>>> +       u32 version;
>>>> +
>>>> +       ghc = &vcpu->arch.st.cache;
>>>> +       gpa = vcpu->arch.st.guest_addr;
>>>> +       if (!(gpa & KVM_STEAL_PHYS_VALID))
>>>> +               return;
>>>> +
>>>> +       gpa &= KVM_STEAL_PHYS_MASK;
>>>> +       slots = kvm_memslots(vcpu->kvm);
>>>> +       if (slots->generation != ghc->generation || gpa != ghc->gpa) {
>>>> +               if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa,
>>>> +                                       sizeof(*st))) {
>>>> +                       ghc->gpa = INVALID_GPA;
>>>> +                       return;
>>>> +               }
>>>> +       }
>>>> +
>>>> +       st = (struct kvm_steal_time __user *)ghc->hva;
>>>> +       unsafe_get_user(version, &st->version, out);
>>>> +       if (version & 1)
>>>> +               version += 1;
>>>> +       version += 1;
>>>> +       unsafe_put_user(version, &st->version, out);
>>>> +       smp_wmb();
>>>> +
>>>> +       unsafe_get_user(steal, &st->steal, out);
>>>> +       steal += current->sched_info.run_delay -
>>>> +               vcpu->arch.st.last_steal;
>>>> +       vcpu->arch.st.last_steal = current->sched_info.run_delay;
>>>> +       unsafe_put_user(steal, &st->steal, out);
>>>> +
>>>> +       smp_wmb();
>>>> +       version += 1;
>>>> +       unsafe_put_user(version, &st->version, out);
>>>> +out:
>>>> +       mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
>>>> +}
>>>> +
>>>> +static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu,
>>>> +                                       struct kvm_device_attr *attr)
>>>> +{
>>>> +       if (!kvm_pvtime_supported() ||
>>>> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
>>>> +               return -ENXIO;
>>>> +
>>>> +       return 0;
>>>> +}
>>>> +
>>>> +static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu,
>>>> +                                       struct kvm_device_attr *attr)
>>>> +{
>>>> +       u64 __user *user = (u64 __user *)attr->addr;
>>>> +       u64 gpa;
>>>> +
>>>> +       if (!kvm_pvtime_supported() ||
>>>> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
>>>> +               return -ENXIO;
>>>> +
>>>> +       gpa = vcpu->arch.st.guest_addr;
>>>> +       if (put_user(gpa, user))
>>>> +               return -EFAULT;
>>>> +
>>>> +       return 0;
>>>> +}
>>>> +
>>>> +static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu,
>>>> +                                       struct kvm_device_attr *attr)
>>>> +{
>>>> +       u64 __user *user = (u64 __user *)attr->addr;
>>>> +       struct kvm *kvm = vcpu->kvm;
>>>> +       u64 gpa;
>>>> +       int ret = 0;
>>>> +       int idx;
>>>> +
>>>> +       if (!kvm_pvtime_supported() ||
>>>> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
>>>> +               return -ENXIO;
>>>> +
>>>> +       if (get_user(gpa, user))
>>>> +               return -EFAULT;
>>>> +
>>>> +       if (gpa & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
>>>> +               return -EINVAL;
>>>> +
>>>> +       if (!(gpa & KVM_STEAL_PHYS_VALID)) {
>>>> +               vcpu->arch.st.guest_addr = gpa;
>>>> +               return 0;
>>>> +       }
>>>> +
>>>> +       /* Check the address is in a valid memslot */
>>>> +       idx = srcu_read_lock(&kvm->srcu);
>>>> +       if (kvm_is_error_hva(gfn_to_hva(kvm, gpa >> PAGE_SHIFT)))
>>>> +               ret = -EINVAL;
>>>> +       srcu_read_unlock(&kvm->srcu, idx);
>>>> +
>>>> +       if (!ret) {
>>>> +               vcpu->arch.st.guest_addr = gpa;
>>>> +               vcpu->arch.st.last_steal = current->sched_info.run_delay;
>>>> +               kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
>>>> +       }
>>>> +
>>>> +       return ret;
>>>> +}
>>>> +
>>>>    /*
>>>>     * kvm_check_requests - check and handle pending vCPU requests
>>>>     *
>>>> @@ -48,6 +159,9 @@ static int kvm_check_requests(struct kvm_vcpu *vcpu)
>>>>           if (kvm_dirty_ring_check_request(vcpu))
>>>>                   return RESUME_HOST;
>>>>
>>>> +       if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
>>>> +               kvm_update_stolen_time(vcpu);
>>>> +
>>>>           return RESUME_GUEST;
>>>>    }
>>>>
>>>> @@ -671,6 +785,9 @@ static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu,
>>>>           case KVM_LOONGARCH_VCPU_CPUCFG:
>>>>                   ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr);
>>>>                   break;
>>>> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
>>>> +               ret = kvm_loongarch_pvtime_has_attr(vcpu, attr);
>>>> +               break;
>>>>           default:
>>>>                   break;
>>>>           }
>>>> @@ -703,6 +820,9 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu,
>>>>           case KVM_LOONGARCH_VCPU_CPUCFG:
>>>>                   ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr);
>>>>                   break;
>>>> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
>>>> +               ret = kvm_loongarch_pvtime_get_attr(vcpu, attr);
>>>> +               break;
>>>>           default:
>>>>                   break;
>>>>           }
>>>> @@ -725,6 +845,9 @@ static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu,
>>>>           case KVM_LOONGARCH_VCPU_CPUCFG:
>>>>                   ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr);
>>>>                   break;
>>>> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
>>>> +               ret = kvm_loongarch_pvtime_set_attr(vcpu, attr);
>>>> +               break;
>>>>           default:
>>>>                   break;
>>>>           }
>>>> @@ -1084,6 +1207,7 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>>>
>>>>           /* Control guest page CCA attribute */
>>>>           change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT);
>>>> +       kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
>>>>
>>>>           /* Don't bother restoring registers multiple times unless necessary */
>>>>           if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE)
>>>> --
>>>> 2.39.3
>>>>
>>
Huacai Chen July 8, 2024, 9:47 a.m. UTC | #5
On Mon, Jul 8, 2024 at 9:16 AM maobibo <maobibo@loongson.cn> wrote:
>
>
>
> On 2024/7/6 下午5:41, Huacai Chen wrote:
> > On Sat, Jul 6, 2024 at 2:59 PM maobibo <maobibo@loongson.cn> wrote:
> >>
> >> Huacai,
> >>
> >> On 2024/7/6 上午11:00, Huacai Chen wrote:
> >>> Hi, Bibo,
> >>>
> >>> On Fri, May 24, 2024 at 3:38 PM Bibo Mao <maobibo@loongson.cn> wrote:
> >>>>
> >>>> Steal time feature is added here in kvm side, VM can search supported
> >>>> features provided by KVM hypervisor, feature KVM_FEATURE_STEAL_TIME
> >>>> is added here. Like x86, steal time structure is saved in guest memory,
> >>>> one hypercall function KVM_HCALL_FUNC_NOTIFY is added to notify KVM to
> >>>> enable the feature.
> >>>>
> >>>> One cpu attr ioctl command KVM_LOONGARCH_VCPU_PVTIME_CTRL is added to
> >>>> save and restore base address of steal time structure when VM is migrated.
> >>>>
> >>>> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
> >>>> ---
> >>>>    arch/loongarch/include/asm/kvm_host.h  |   7 ++
> >>>>    arch/loongarch/include/asm/kvm_para.h  |  10 ++
> >>>>    arch/loongarch/include/asm/kvm_vcpu.h  |   4 +
> >>>>    arch/loongarch/include/asm/loongarch.h |   1 +
> >>>>    arch/loongarch/include/uapi/asm/kvm.h  |   4 +
> >>>>    arch/loongarch/kvm/Kconfig             |   1 +
> >>>>    arch/loongarch/kvm/exit.c              |  38 +++++++-
> >>>>    arch/loongarch/kvm/vcpu.c              | 124 +++++++++++++++++++++++++
> >>>>    8 files changed, 187 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
> >>>> index c87b6ea0ec47..2eb2f7572023 100644
> >>>> --- a/arch/loongarch/include/asm/kvm_host.h
> >>>> +++ b/arch/loongarch/include/asm/kvm_host.h
> >>>> @@ -30,6 +30,7 @@
> >>>>    #define KVM_PRIVATE_MEM_SLOTS          0
> >>>>
> >>>>    #define KVM_HALT_POLL_NS_DEFAULT       500000
> >>>> +#define KVM_REQ_STEAL_UPDATE           KVM_ARCH_REQ(1)
> >>>>
> >>>>    #define KVM_GUESTDBG_SW_BP_MASK                \
> >>>>           (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
> >>>> @@ -201,6 +202,12 @@ struct kvm_vcpu_arch {
> >>>>           struct kvm_mp_state mp_state;
> >>>>           /* cpucfg */
> >>>>           u32 cpucfg[KVM_MAX_CPUCFG_REGS];
> >>>> +       /* paravirt steal time */
> >>>> +       struct {
> >>>> +               u64 guest_addr;
> >>>> +               u64 last_steal;
> >>>> +               struct gfn_to_hva_cache cache;
> >>>> +       } st;
> >>>>    };
> >>>>
> >>>>    static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg)
> >>>> diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
> >>>> index 4ba2312e5f8c..a9ba8185d4af 100644
> >>>> --- a/arch/loongarch/include/asm/kvm_para.h
> >>>> +++ b/arch/loongarch/include/asm/kvm_para.h
> >>>> @@ -14,6 +14,7 @@
> >>>>
> >>>>    #define KVM_HCALL_SERVICE              HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SERVICE)
> >>>>    #define  KVM_HCALL_FUNC_IPI            1
> >>>> +#define  KVM_HCALL_FUNC_NOTIFY         2
> >>>>
> >>>>    #define KVM_HCALL_SWDBG                        HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG)
> >>>>
> >>>> @@ -24,6 +25,15 @@
> >>>>    #define KVM_HCALL_INVALID_CODE         -1UL
> >>>>    #define KVM_HCALL_INVALID_PARAMETER    -2UL
> >>>>
> >>>> +#define KVM_STEAL_PHYS_VALID           BIT_ULL(0)
> >>>> +#define KVM_STEAL_PHYS_MASK            GENMASK_ULL(63, 6)
> >>>> +struct kvm_steal_time {
> >>>> +       __u64 steal;
> >>>> +       __u32 version;
> >>>> +       __u32 flags;
> >>> I found that x86 has a preempted field here, in our internal repo the
> >>> LoongArch version also has this field. Moreover,
> >>> kvm_steal_time_set_preempted() and kvm_steal_time_clear_preempted()
> >>> seems needed.
> >> By my understanding, macro vcpu_is_preempted() is used together with pv
> >> spinlock, and pv spinlock depends on pv stealtime. So I think preempted
> >> flag is not part of pv stealtime, it is part of pv spinlock.
> >>
> >> We are going to add preempted field if pv spinlock is added.
> >>>
> >>>> +       __u32 pad[12];
> >>>> +};
> >>>> +
> >>>>    /*
> >>>>     * Hypercall interface for KVM hypervisor
> >>>>     *
> >>>> diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
> >>>> index 590a92cb5416..d7e51300a89f 100644
> >>>> --- a/arch/loongarch/include/asm/kvm_vcpu.h
> >>>> +++ b/arch/loongarch/include/asm/kvm_vcpu.h
> >>>> @@ -120,4 +120,8 @@ static inline void kvm_write_reg(struct kvm_vcpu *vcpu, int num, unsigned long v
> >>>>           vcpu->arch.gprs[num] = val;
> >>>>    }
> >>>>
> >>>> +static inline bool kvm_pvtime_supported(void)
> >>>> +{
> >>>> +       return !!sched_info_on();
> >>>> +}
> >>>>    #endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */
> >>>> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
> >>>> index eb09adda54b7..7a4633ef284b 100644
> >>>> --- a/arch/loongarch/include/asm/loongarch.h
> >>>> +++ b/arch/loongarch/include/asm/loongarch.h
> >>>> @@ -169,6 +169,7 @@
> >>>>    #define  KVM_SIGNATURE                 "KVM\0"
> >>>>    #define CPUCFG_KVM_FEATURE             (CPUCFG_KVM_BASE + 4)
> >>>>    #define  KVM_FEATURE_IPI               BIT(1)
> >>>> +#define  KVM_FEATURE_STEAL_TIME                BIT(2)
> >>>>
> >>>>    #ifndef __ASSEMBLY__
> >>>>
> >>>> diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h
> >>>> index f9abef382317..ddc5cab0ffd0 100644
> >>>> --- a/arch/loongarch/include/uapi/asm/kvm.h
> >>>> +++ b/arch/loongarch/include/uapi/asm/kvm.h
> >>>> @@ -81,7 +81,11 @@ struct kvm_fpu {
> >>>>    #define LOONGARCH_REG_64(TYPE, REG)    (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT))
> >>>>    #define KVM_IOC_CSRID(REG)             LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG)
> >>>>    #define KVM_IOC_CPUCFG(REG)            LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
> >>>> +
> >>>> +/* Device Control API on vcpu fd */
> >>>>    #define KVM_LOONGARCH_VCPU_CPUCFG      0
> >>>> +#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1
> >>>> +#define  KVM_LOONGARCH_VCPU_PVTIME_GPA 0
> >>>>
> >>>>    struct kvm_debug_exit_arch {
> >>>>    };
> >>>> diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig
> >>>> index c4ef2b4d9797..248744b4d086 100644
> >>>> --- a/arch/loongarch/kvm/Kconfig
> >>>> +++ b/arch/loongarch/kvm/Kconfig
> >>>> @@ -29,6 +29,7 @@ config KVM
> >>>>           select KVM_MMIO
> >>>>           select HAVE_KVM_READONLY_MEM
> >>>>           select KVM_XFER_TO_GUEST_WORK
> >>>> +       select SCHED_INFO
> >>>>           help
> >>>>             Support hosting virtualized guest machines using
> >>>>             hardware virtualization extensions. You will need
> >>>> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
> >>>> index c86e099af5ca..e2abd97fb13f 100644
> >>>> --- a/arch/loongarch/kvm/exit.c
> >>>> +++ b/arch/loongarch/kvm/exit.c
> >>>> @@ -24,7 +24,7 @@
> >>>>    static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
> >>>>    {
> >>>>           int rd, rj;
> >>>> -       unsigned int index;
> >>>> +       unsigned int index, ret;
> >>>>
> >>>>           if (inst.reg2_format.opcode != cpucfg_op)
> >>>>                   return EMULATE_FAIL;
> >>>> @@ -50,7 +50,10 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
> >>>>                   vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
> >>>>                   break;
> >>>>           case CPUCFG_KVM_FEATURE:
> >>>> -               vcpu->arch.gprs[rd] = KVM_FEATURE_IPI;
> >>>> +               ret = KVM_FEATURE_IPI;
> >>>> +               if (sched_info_on())
> >>> What about replacing it with your helper function kvm_pvtime_supported()?
> >> Sure, will replace it with helper function kvm_pvtime_supported().
> > If you are sure this is the only issue, then needn't submit a new version.
> OK, thanks.
>
> By searching orginal submit of vcpu_is_preempt(), it can be located at
> https://lore.kernel.org/lkml/1477642287-24104-1-git-send-email-xinhui.pan@linux.vnet.ibm.com/
>
> It is separated one, only that is depends on pv-spinlock and
> pv-stealtime. And there is no capability indicator for guest kernel, it
> is enabled by default.
Series applied with some modifications here, you can double-check the
correctness.
https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git/log/?h=loongarch-kvm

Huacai
>
> Regards
> Bibo Mao
>
> >
> > Huacai
> >
> >>
> >> Regards
> >> Bibo Mao
> >>>
> >>> Huacai
> >>>
> >>>> +                       ret |= KVM_FEATURE_STEAL_TIME;
> >>>> +               vcpu->arch.gprs[rd] = ret;
> >>>>                   break;
> >>>>           default:
> >>>>                   vcpu->arch.gprs[rd] = 0;
> >>>> @@ -687,6 +690,34 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
> >>>>           return RESUME_GUEST;
> >>>>    }
> >>>>
> >>>> +static long kvm_save_notify(struct kvm_vcpu *vcpu)
> >>>> +{
> >>>> +       unsigned long id, data;
> >>>> +
> >>>> +       id   = kvm_read_reg(vcpu, LOONGARCH_GPR_A1);
> >>>> +       data = kvm_read_reg(vcpu, LOONGARCH_GPR_A2);
> >>>> +       switch (id) {
> >>>> +       case KVM_FEATURE_STEAL_TIME:
> >>>> +               if (!kvm_pvtime_supported())
> >>>> +                       return KVM_HCALL_INVALID_CODE;
> >>>> +
> >>>> +               if (data & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
> >>>> +                       return KVM_HCALL_INVALID_PARAMETER;
> >>>> +
> >>>> +               vcpu->arch.st.guest_addr = data;
> >>>> +               if (!(data & KVM_STEAL_PHYS_VALID))
> >>>> +                       break;
> >>>> +
> >>>> +               vcpu->arch.st.last_steal = current->sched_info.run_delay;
> >>>> +               kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
> >>>> +               break;
> >>>> +       default:
> >>>> +               break;
> >>>> +       };
> >>>> +
> >>>> +       return 0;
> >>>> +};
> >>>> +
> >>>>    /*
> >>>>     * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root.
> >>>>     * @vcpu:      Virtual CPU context.
> >>>> @@ -758,6 +789,9 @@ static void kvm_handle_service(struct kvm_vcpu *vcpu)
> >>>>                   kvm_send_pv_ipi(vcpu);
> >>>>                   ret = KVM_HCALL_SUCCESS;
> >>>>                   break;
> >>>> +       case KVM_HCALL_FUNC_NOTIFY:
> >>>> +               ret = kvm_save_notify(vcpu);
> >>>> +               break;
> >>>>           default:
> >>>>                   ret = KVM_HCALL_INVALID_CODE;
> >>>>                   break;
> >>>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
> >>>> index 9e8030d45129..382796f1d3e6 100644
> >>>> --- a/arch/loongarch/kvm/vcpu.c
> >>>> +++ b/arch/loongarch/kvm/vcpu.c
> >>>> @@ -31,6 +31,117 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
> >>>>                          sizeof(kvm_vcpu_stats_desc),
> >>>>    };
> >>>>
> >>>> +static void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
> >>>> +{
> >>>> +       struct kvm_steal_time __user *st;
> >>>> +       struct gfn_to_hva_cache *ghc;
> >>>> +       struct kvm_memslots *slots;
> >>>> +       gpa_t gpa;
> >>>> +       u64 steal;
> >>>> +       u32 version;
> >>>> +
> >>>> +       ghc = &vcpu->arch.st.cache;
> >>>> +       gpa = vcpu->arch.st.guest_addr;
> >>>> +       if (!(gpa & KVM_STEAL_PHYS_VALID))
> >>>> +               return;
> >>>> +
> >>>> +       gpa &= KVM_STEAL_PHYS_MASK;
> >>>> +       slots = kvm_memslots(vcpu->kvm);
> >>>> +       if (slots->generation != ghc->generation || gpa != ghc->gpa) {
> >>>> +               if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa,
> >>>> +                                       sizeof(*st))) {
> >>>> +                       ghc->gpa = INVALID_GPA;
> >>>> +                       return;
> >>>> +               }
> >>>> +       }
> >>>> +
> >>>> +       st = (struct kvm_steal_time __user *)ghc->hva;
> >>>> +       unsafe_get_user(version, &st->version, out);
> >>>> +       if (version & 1)
> >>>> +               version += 1;
> >>>> +       version += 1;
> >>>> +       unsafe_put_user(version, &st->version, out);
> >>>> +       smp_wmb();
> >>>> +
> >>>> +       unsafe_get_user(steal, &st->steal, out);
> >>>> +       steal += current->sched_info.run_delay -
> >>>> +               vcpu->arch.st.last_steal;
> >>>> +       vcpu->arch.st.last_steal = current->sched_info.run_delay;
> >>>> +       unsafe_put_user(steal, &st->steal, out);
> >>>> +
> >>>> +       smp_wmb();
> >>>> +       version += 1;
> >>>> +       unsafe_put_user(version, &st->version, out);
> >>>> +out:
> >>>> +       mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
> >>>> +}
> >>>> +
> >>>> +static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu,
> >>>> +                                       struct kvm_device_attr *attr)
> >>>> +{
> >>>> +       if (!kvm_pvtime_supported() ||
> >>>> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
> >>>> +               return -ENXIO;
> >>>> +
> >>>> +       return 0;
> >>>> +}
> >>>> +
> >>>> +static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu,
> >>>> +                                       struct kvm_device_attr *attr)
> >>>> +{
> >>>> +       u64 __user *user = (u64 __user *)attr->addr;
> >>>> +       u64 gpa;
> >>>> +
> >>>> +       if (!kvm_pvtime_supported() ||
> >>>> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
> >>>> +               return -ENXIO;
> >>>> +
> >>>> +       gpa = vcpu->arch.st.guest_addr;
> >>>> +       if (put_user(gpa, user))
> >>>> +               return -EFAULT;
> >>>> +
> >>>> +       return 0;
> >>>> +}
> >>>> +
> >>>> +static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu,
> >>>> +                                       struct kvm_device_attr *attr)
> >>>> +{
> >>>> +       u64 __user *user = (u64 __user *)attr->addr;
> >>>> +       struct kvm *kvm = vcpu->kvm;
> >>>> +       u64 gpa;
> >>>> +       int ret = 0;
> >>>> +       int idx;
> >>>> +
> >>>> +       if (!kvm_pvtime_supported() ||
> >>>> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
> >>>> +               return -ENXIO;
> >>>> +
> >>>> +       if (get_user(gpa, user))
> >>>> +               return -EFAULT;
> >>>> +
> >>>> +       if (gpa & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
> >>>> +               return -EINVAL;
> >>>> +
> >>>> +       if (!(gpa & KVM_STEAL_PHYS_VALID)) {
> >>>> +               vcpu->arch.st.guest_addr = gpa;
> >>>> +               return 0;
> >>>> +       }
> >>>> +
> >>>> +       /* Check the address is in a valid memslot */
> >>>> +       idx = srcu_read_lock(&kvm->srcu);
> >>>> +       if (kvm_is_error_hva(gfn_to_hva(kvm, gpa >> PAGE_SHIFT)))
> >>>> +               ret = -EINVAL;
> >>>> +       srcu_read_unlock(&kvm->srcu, idx);
> >>>> +
> >>>> +       if (!ret) {
> >>>> +               vcpu->arch.st.guest_addr = gpa;
> >>>> +               vcpu->arch.st.last_steal = current->sched_info.run_delay;
> >>>> +               kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
> >>>> +       }
> >>>> +
> >>>> +       return ret;
> >>>> +}
> >>>> +
> >>>>    /*
> >>>>     * kvm_check_requests - check and handle pending vCPU requests
> >>>>     *
> >>>> @@ -48,6 +159,9 @@ static int kvm_check_requests(struct kvm_vcpu *vcpu)
> >>>>           if (kvm_dirty_ring_check_request(vcpu))
> >>>>                   return RESUME_HOST;
> >>>>
> >>>> +       if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
> >>>> +               kvm_update_stolen_time(vcpu);
> >>>> +
> >>>>           return RESUME_GUEST;
> >>>>    }
> >>>>
> >>>> @@ -671,6 +785,9 @@ static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu,
> >>>>           case KVM_LOONGARCH_VCPU_CPUCFG:
> >>>>                   ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr);
> >>>>                   break;
> >>>> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
> >>>> +               ret = kvm_loongarch_pvtime_has_attr(vcpu, attr);
> >>>> +               break;
> >>>>           default:
> >>>>                   break;
> >>>>           }
> >>>> @@ -703,6 +820,9 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu,
> >>>>           case KVM_LOONGARCH_VCPU_CPUCFG:
> >>>>                   ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr);
> >>>>                   break;
> >>>> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
> >>>> +               ret = kvm_loongarch_pvtime_get_attr(vcpu, attr);
> >>>> +               break;
> >>>>           default:
> >>>>                   break;
> >>>>           }
> >>>> @@ -725,6 +845,9 @@ static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu,
> >>>>           case KVM_LOONGARCH_VCPU_CPUCFG:
> >>>>                   ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr);
> >>>>                   break;
> >>>> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
> >>>> +               ret = kvm_loongarch_pvtime_set_attr(vcpu, attr);
> >>>> +               break;
> >>>>           default:
> >>>>                   break;
> >>>>           }
> >>>> @@ -1084,6 +1207,7 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> >>>>
> >>>>           /* Control guest page CCA attribute */
> >>>>           change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT);
> >>>> +       kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
> >>>>
> >>>>           /* Don't bother restoring registers multiple times unless necessary */
> >>>>           if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE)
> >>>> --
> >>>> 2.39.3
> >>>>
> >>
>
>
Bibo Mao July 9, 2024, 1:39 a.m. UTC | #6
On 2024/7/8 下午5:47, Huacai Chen wrote:
> On Mon, Jul 8, 2024 at 9:16 AM maobibo <maobibo@loongson.cn> wrote:
>>
>>
>>
>> On 2024/7/6 下午5:41, Huacai Chen wrote:
>>> On Sat, Jul 6, 2024 at 2:59 PM maobibo <maobibo@loongson.cn> wrote:
>>>>
>>>> Huacai,
>>>>
>>>> On 2024/7/6 上午11:00, Huacai Chen wrote:
>>>>> Hi, Bibo,
>>>>>
>>>>> On Fri, May 24, 2024 at 3:38 PM Bibo Mao <maobibo@loongson.cn> wrote:
>>>>>>
>>>>>> Steal time feature is added here in kvm side, VM can search supported
>>>>>> features provided by KVM hypervisor, feature KVM_FEATURE_STEAL_TIME
>>>>>> is added here. Like x86, steal time structure is saved in guest memory,
>>>>>> one hypercall function KVM_HCALL_FUNC_NOTIFY is added to notify KVM to
>>>>>> enable the feature.
>>>>>>
>>>>>> One cpu attr ioctl command KVM_LOONGARCH_VCPU_PVTIME_CTRL is added to
>>>>>> save and restore base address of steal time structure when VM is migrated.
>>>>>>
>>>>>> Signed-off-by: Bibo Mao <maobibo@loongson.cn>
>>>>>> ---
>>>>>>     arch/loongarch/include/asm/kvm_host.h  |   7 ++
>>>>>>     arch/loongarch/include/asm/kvm_para.h  |  10 ++
>>>>>>     arch/loongarch/include/asm/kvm_vcpu.h  |   4 +
>>>>>>     arch/loongarch/include/asm/loongarch.h |   1 +
>>>>>>     arch/loongarch/include/uapi/asm/kvm.h  |   4 +
>>>>>>     arch/loongarch/kvm/Kconfig             |   1 +
>>>>>>     arch/loongarch/kvm/exit.c              |  38 +++++++-
>>>>>>     arch/loongarch/kvm/vcpu.c              | 124 +++++++++++++++++++++++++
>>>>>>     8 files changed, 187 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
>>>>>> index c87b6ea0ec47..2eb2f7572023 100644
>>>>>> --- a/arch/loongarch/include/asm/kvm_host.h
>>>>>> +++ b/arch/loongarch/include/asm/kvm_host.h
>>>>>> @@ -30,6 +30,7 @@
>>>>>>     #define KVM_PRIVATE_MEM_SLOTS          0
>>>>>>
>>>>>>     #define KVM_HALT_POLL_NS_DEFAULT       500000
>>>>>> +#define KVM_REQ_STEAL_UPDATE           KVM_ARCH_REQ(1)
>>>>>>
>>>>>>     #define KVM_GUESTDBG_SW_BP_MASK                \
>>>>>>            (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
>>>>>> @@ -201,6 +202,12 @@ struct kvm_vcpu_arch {
>>>>>>            struct kvm_mp_state mp_state;
>>>>>>            /* cpucfg */
>>>>>>            u32 cpucfg[KVM_MAX_CPUCFG_REGS];
>>>>>> +       /* paravirt steal time */
>>>>>> +       struct {
>>>>>> +               u64 guest_addr;
>>>>>> +               u64 last_steal;
>>>>>> +               struct gfn_to_hva_cache cache;
>>>>>> +       } st;
>>>>>>     };
>>>>>>
>>>>>>     static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg)
>>>>>> diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
>>>>>> index 4ba2312e5f8c..a9ba8185d4af 100644
>>>>>> --- a/arch/loongarch/include/asm/kvm_para.h
>>>>>> +++ b/arch/loongarch/include/asm/kvm_para.h
>>>>>> @@ -14,6 +14,7 @@
>>>>>>
>>>>>>     #define KVM_HCALL_SERVICE              HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SERVICE)
>>>>>>     #define  KVM_HCALL_FUNC_IPI            1
>>>>>> +#define  KVM_HCALL_FUNC_NOTIFY         2
>>>>>>
>>>>>>     #define KVM_HCALL_SWDBG                        HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG)
>>>>>>
>>>>>> @@ -24,6 +25,15 @@
>>>>>>     #define KVM_HCALL_INVALID_CODE         -1UL
>>>>>>     #define KVM_HCALL_INVALID_PARAMETER    -2UL
>>>>>>
>>>>>> +#define KVM_STEAL_PHYS_VALID           BIT_ULL(0)
>>>>>> +#define KVM_STEAL_PHYS_MASK            GENMASK_ULL(63, 6)
>>>>>> +struct kvm_steal_time {
>>>>>> +       __u64 steal;
>>>>>> +       __u32 version;
>>>>>> +       __u32 flags;
>>>>> I found that x86 has a preempted field here, in our internal repo the
>>>>> LoongArch version also has this field. Moreover,
>>>>> kvm_steal_time_set_preempted() and kvm_steal_time_clear_preempted()
>>>>> seems needed.
>>>> By my understanding, macro vcpu_is_preempted() is used together with pv
>>>> spinlock, and pv spinlock depends on pv stealtime. So I think preempted
>>>> flag is not part of pv stealtime, it is part of pv spinlock.
>>>>
>>>> We are going to add preempted field if pv spinlock is added.
>>>>>
>>>>>> +       __u32 pad[12];
>>>>>> +};
>>>>>> +
>>>>>>     /*
>>>>>>      * Hypercall interface for KVM hypervisor
>>>>>>      *
>>>>>> diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
>>>>>> index 590a92cb5416..d7e51300a89f 100644
>>>>>> --- a/arch/loongarch/include/asm/kvm_vcpu.h
>>>>>> +++ b/arch/loongarch/include/asm/kvm_vcpu.h
>>>>>> @@ -120,4 +120,8 @@ static inline void kvm_write_reg(struct kvm_vcpu *vcpu, int num, unsigned long v
>>>>>>            vcpu->arch.gprs[num] = val;
>>>>>>     }
>>>>>>
>>>>>> +static inline bool kvm_pvtime_supported(void)
>>>>>> +{
>>>>>> +       return !!sched_info_on();
>>>>>> +}
>>>>>>     #endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */
>>>>>> diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
>>>>>> index eb09adda54b7..7a4633ef284b 100644
>>>>>> --- a/arch/loongarch/include/asm/loongarch.h
>>>>>> +++ b/arch/loongarch/include/asm/loongarch.h
>>>>>> @@ -169,6 +169,7 @@
>>>>>>     #define  KVM_SIGNATURE                 "KVM\0"
>>>>>>     #define CPUCFG_KVM_FEATURE             (CPUCFG_KVM_BASE + 4)
>>>>>>     #define  KVM_FEATURE_IPI               BIT(1)
>>>>>> +#define  KVM_FEATURE_STEAL_TIME                BIT(2)
>>>>>>
>>>>>>     #ifndef __ASSEMBLY__
>>>>>>
>>>>>> diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h
>>>>>> index f9abef382317..ddc5cab0ffd0 100644
>>>>>> --- a/arch/loongarch/include/uapi/asm/kvm.h
>>>>>> +++ b/arch/loongarch/include/uapi/asm/kvm.h
>>>>>> @@ -81,7 +81,11 @@ struct kvm_fpu {
>>>>>>     #define LOONGARCH_REG_64(TYPE, REG)    (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT))
>>>>>>     #define KVM_IOC_CSRID(REG)             LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG)
>>>>>>     #define KVM_IOC_CPUCFG(REG)            LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
>>>>>> +
>>>>>> +/* Device Control API on vcpu fd */
>>>>>>     #define KVM_LOONGARCH_VCPU_CPUCFG      0
>>>>>> +#define KVM_LOONGARCH_VCPU_PVTIME_CTRL 1
>>>>>> +#define  KVM_LOONGARCH_VCPU_PVTIME_GPA 0
>>>>>>
>>>>>>     struct kvm_debug_exit_arch {
>>>>>>     };
>>>>>> diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig
>>>>>> index c4ef2b4d9797..248744b4d086 100644
>>>>>> --- a/arch/loongarch/kvm/Kconfig
>>>>>> +++ b/arch/loongarch/kvm/Kconfig
>>>>>> @@ -29,6 +29,7 @@ config KVM
>>>>>>            select KVM_MMIO
>>>>>>            select HAVE_KVM_READONLY_MEM
>>>>>>            select KVM_XFER_TO_GUEST_WORK
>>>>>> +       select SCHED_INFO
>>>>>>            help
>>>>>>              Support hosting virtualized guest machines using
>>>>>>              hardware virtualization extensions. You will need
>>>>>> diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
>>>>>> index c86e099af5ca..e2abd97fb13f 100644
>>>>>> --- a/arch/loongarch/kvm/exit.c
>>>>>> +++ b/arch/loongarch/kvm/exit.c
>>>>>> @@ -24,7 +24,7 @@
>>>>>>     static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
>>>>>>     {
>>>>>>            int rd, rj;
>>>>>> -       unsigned int index;
>>>>>> +       unsigned int index, ret;
>>>>>>
>>>>>>            if (inst.reg2_format.opcode != cpucfg_op)
>>>>>>                    return EMULATE_FAIL;
>>>>>> @@ -50,7 +50,10 @@ static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
>>>>>>                    vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
>>>>>>                    break;
>>>>>>            case CPUCFG_KVM_FEATURE:
>>>>>> -               vcpu->arch.gprs[rd] = KVM_FEATURE_IPI;
>>>>>> +               ret = KVM_FEATURE_IPI;
>>>>>> +               if (sched_info_on())
>>>>> What about replacing it with your helper function kvm_pvtime_supported()?
>>>> Sure, will replace it with helper function kvm_pvtime_supported().
>>> If you are sure this is the only issue, then needn't submit a new version.
>> OK, thanks.
>>
>> By searching orginal submit of vcpu_is_preempt(), it can be located at
>> https://lore.kernel.org/lkml/1477642287-24104-1-git-send-email-xinhui.pan@linux.vnet.ibm.com/
>>
>> It is separated one, only that is depends on pv-spinlock and
>> pv-stealtime. And there is no capability indicator for guest kernel, it
>> is enabled by default.
> Series applied with some modifications here, you can double-check the
> correctness.
> https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git/log/?h=loongarch-kvm

Huacai,

I download and test. The stealtime works for me.


Regards
Bibo Mao
> 
> Huacai
>>
>> Regards
>> Bibo Mao
>>
>>>
>>> Huacai
>>>
>>>>
>>>> Regards
>>>> Bibo Mao
>>>>>
>>>>> Huacai
>>>>>
>>>>>> +                       ret |= KVM_FEATURE_STEAL_TIME;
>>>>>> +               vcpu->arch.gprs[rd] = ret;
>>>>>>                    break;
>>>>>>            default:
>>>>>>                    vcpu->arch.gprs[rd] = 0;
>>>>>> @@ -687,6 +690,34 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
>>>>>>            return RESUME_GUEST;
>>>>>>     }
>>>>>>
>>>>>> +static long kvm_save_notify(struct kvm_vcpu *vcpu)
>>>>>> +{
>>>>>> +       unsigned long id, data;
>>>>>> +
>>>>>> +       id   = kvm_read_reg(vcpu, LOONGARCH_GPR_A1);
>>>>>> +       data = kvm_read_reg(vcpu, LOONGARCH_GPR_A2);
>>>>>> +       switch (id) {
>>>>>> +       case KVM_FEATURE_STEAL_TIME:
>>>>>> +               if (!kvm_pvtime_supported())
>>>>>> +                       return KVM_HCALL_INVALID_CODE;
>>>>>> +
>>>>>> +               if (data & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
>>>>>> +                       return KVM_HCALL_INVALID_PARAMETER;
>>>>>> +
>>>>>> +               vcpu->arch.st.guest_addr = data;
>>>>>> +               if (!(data & KVM_STEAL_PHYS_VALID))
>>>>>> +                       break;
>>>>>> +
>>>>>> +               vcpu->arch.st.last_steal = current->sched_info.run_delay;
>>>>>> +               kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
>>>>>> +               break;
>>>>>> +       default:
>>>>>> +               break;
>>>>>> +       };
>>>>>> +
>>>>>> +       return 0;
>>>>>> +};
>>>>>> +
>>>>>>     /*
>>>>>>      * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root.
>>>>>>      * @vcpu:      Virtual CPU context.
>>>>>> @@ -758,6 +789,9 @@ static void kvm_handle_service(struct kvm_vcpu *vcpu)
>>>>>>                    kvm_send_pv_ipi(vcpu);
>>>>>>                    ret = KVM_HCALL_SUCCESS;
>>>>>>                    break;
>>>>>> +       case KVM_HCALL_FUNC_NOTIFY:
>>>>>> +               ret = kvm_save_notify(vcpu);
>>>>>> +               break;
>>>>>>            default:
>>>>>>                    ret = KVM_HCALL_INVALID_CODE;
>>>>>>                    break;
>>>>>> diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
>>>>>> index 9e8030d45129..382796f1d3e6 100644
>>>>>> --- a/arch/loongarch/kvm/vcpu.c
>>>>>> +++ b/arch/loongarch/kvm/vcpu.c
>>>>>> @@ -31,6 +31,117 @@ const struct kvm_stats_header kvm_vcpu_stats_header = {
>>>>>>                           sizeof(kvm_vcpu_stats_desc),
>>>>>>     };
>>>>>>
>>>>>> +static void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
>>>>>> +{
>>>>>> +       struct kvm_steal_time __user *st;
>>>>>> +       struct gfn_to_hva_cache *ghc;
>>>>>> +       struct kvm_memslots *slots;
>>>>>> +       gpa_t gpa;
>>>>>> +       u64 steal;
>>>>>> +       u32 version;
>>>>>> +
>>>>>> +       ghc = &vcpu->arch.st.cache;
>>>>>> +       gpa = vcpu->arch.st.guest_addr;
>>>>>> +       if (!(gpa & KVM_STEAL_PHYS_VALID))
>>>>>> +               return;
>>>>>> +
>>>>>> +       gpa &= KVM_STEAL_PHYS_MASK;
>>>>>> +       slots = kvm_memslots(vcpu->kvm);
>>>>>> +       if (slots->generation != ghc->generation || gpa != ghc->gpa) {
>>>>>> +               if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa,
>>>>>> +                                       sizeof(*st))) {
>>>>>> +                       ghc->gpa = INVALID_GPA;
>>>>>> +                       return;
>>>>>> +               }
>>>>>> +       }
>>>>>> +
>>>>>> +       st = (struct kvm_steal_time __user *)ghc->hva;
>>>>>> +       unsafe_get_user(version, &st->version, out);
>>>>>> +       if (version & 1)
>>>>>> +               version += 1;
>>>>>> +       version += 1;
>>>>>> +       unsafe_put_user(version, &st->version, out);
>>>>>> +       smp_wmb();
>>>>>> +
>>>>>> +       unsafe_get_user(steal, &st->steal, out);
>>>>>> +       steal += current->sched_info.run_delay -
>>>>>> +               vcpu->arch.st.last_steal;
>>>>>> +       vcpu->arch.st.last_steal = current->sched_info.run_delay;
>>>>>> +       unsafe_put_user(steal, &st->steal, out);
>>>>>> +
>>>>>> +       smp_wmb();
>>>>>> +       version += 1;
>>>>>> +       unsafe_put_user(version, &st->version, out);
>>>>>> +out:
>>>>>> +       mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
>>>>>> +}
>>>>>> +
>>>>>> +static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu,
>>>>>> +                                       struct kvm_device_attr *attr)
>>>>>> +{
>>>>>> +       if (!kvm_pvtime_supported() ||
>>>>>> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
>>>>>> +               return -ENXIO;
>>>>>> +
>>>>>> +       return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu,
>>>>>> +                                       struct kvm_device_attr *attr)
>>>>>> +{
>>>>>> +       u64 __user *user = (u64 __user *)attr->addr;
>>>>>> +       u64 gpa;
>>>>>> +
>>>>>> +       if (!kvm_pvtime_supported() ||
>>>>>> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
>>>>>> +               return -ENXIO;
>>>>>> +
>>>>>> +       gpa = vcpu->arch.st.guest_addr;
>>>>>> +       if (put_user(gpa, user))
>>>>>> +               return -EFAULT;
>>>>>> +
>>>>>> +       return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu,
>>>>>> +                                       struct kvm_device_attr *attr)
>>>>>> +{
>>>>>> +       u64 __user *user = (u64 __user *)attr->addr;
>>>>>> +       struct kvm *kvm = vcpu->kvm;
>>>>>> +       u64 gpa;
>>>>>> +       int ret = 0;
>>>>>> +       int idx;
>>>>>> +
>>>>>> +       if (!kvm_pvtime_supported() ||
>>>>>> +                       attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
>>>>>> +               return -ENXIO;
>>>>>> +
>>>>>> +       if (get_user(gpa, user))
>>>>>> +               return -EFAULT;
>>>>>> +
>>>>>> +       if (gpa & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
>>>>>> +               return -EINVAL;
>>>>>> +
>>>>>> +       if (!(gpa & KVM_STEAL_PHYS_VALID)) {
>>>>>> +               vcpu->arch.st.guest_addr = gpa;
>>>>>> +               return 0;
>>>>>> +       }
>>>>>> +
>>>>>> +       /* Check the address is in a valid memslot */
>>>>>> +       idx = srcu_read_lock(&kvm->srcu);
>>>>>> +       if (kvm_is_error_hva(gfn_to_hva(kvm, gpa >> PAGE_SHIFT)))
>>>>>> +               ret = -EINVAL;
>>>>>> +       srcu_read_unlock(&kvm->srcu, idx);
>>>>>> +
>>>>>> +       if (!ret) {
>>>>>> +               vcpu->arch.st.guest_addr = gpa;
>>>>>> +               vcpu->arch.st.last_steal = current->sched_info.run_delay;
>>>>>> +               kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
>>>>>> +       }
>>>>>> +
>>>>>> +       return ret;
>>>>>> +}
>>>>>> +
>>>>>>     /*
>>>>>>      * kvm_check_requests - check and handle pending vCPU requests
>>>>>>      *
>>>>>> @@ -48,6 +159,9 @@ static int kvm_check_requests(struct kvm_vcpu *vcpu)
>>>>>>            if (kvm_dirty_ring_check_request(vcpu))
>>>>>>                    return RESUME_HOST;
>>>>>>
>>>>>> +       if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
>>>>>> +               kvm_update_stolen_time(vcpu);
>>>>>> +
>>>>>>            return RESUME_GUEST;
>>>>>>     }
>>>>>>
>>>>>> @@ -671,6 +785,9 @@ static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu,
>>>>>>            case KVM_LOONGARCH_VCPU_CPUCFG:
>>>>>>                    ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr);
>>>>>>                    break;
>>>>>> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
>>>>>> +               ret = kvm_loongarch_pvtime_has_attr(vcpu, attr);
>>>>>> +               break;
>>>>>>            default:
>>>>>>                    break;
>>>>>>            }
>>>>>> @@ -703,6 +820,9 @@ static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu,
>>>>>>            case KVM_LOONGARCH_VCPU_CPUCFG:
>>>>>>                    ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr);
>>>>>>                    break;
>>>>>> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
>>>>>> +               ret = kvm_loongarch_pvtime_get_attr(vcpu, attr);
>>>>>> +               break;
>>>>>>            default:
>>>>>>                    break;
>>>>>>            }
>>>>>> @@ -725,6 +845,9 @@ static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu,
>>>>>>            case KVM_LOONGARCH_VCPU_CPUCFG:
>>>>>>                    ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr);
>>>>>>                    break;
>>>>>> +       case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
>>>>>> +               ret = kvm_loongarch_pvtime_set_attr(vcpu, attr);
>>>>>> +               break;
>>>>>>            default:
>>>>>>                    break;
>>>>>>            }
>>>>>> @@ -1084,6 +1207,7 @@ static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>>>>>>
>>>>>>            /* Control guest page CCA attribute */
>>>>>>            change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT);
>>>>>> +       kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
>>>>>>
>>>>>>            /* Don't bother restoring registers multiple times unless necessary */
>>>>>>            if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE)
>>>>>> --
>>>>>> 2.39.3
>>>>>>
>>>>
>>
>>
diff mbox series

Patch

diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h
index c87b6ea0ec47..2eb2f7572023 100644
--- a/arch/loongarch/include/asm/kvm_host.h
+++ b/arch/loongarch/include/asm/kvm_host.h
@@ -30,6 +30,7 @@ 
 #define KVM_PRIVATE_MEM_SLOTS		0
 
 #define KVM_HALT_POLL_NS_DEFAULT	500000
+#define KVM_REQ_STEAL_UPDATE		KVM_ARCH_REQ(1)
 
 #define KVM_GUESTDBG_SW_BP_MASK		\
 	(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)
@@ -201,6 +202,12 @@  struct kvm_vcpu_arch {
 	struct kvm_mp_state mp_state;
 	/* cpucfg */
 	u32 cpucfg[KVM_MAX_CPUCFG_REGS];
+	/* paravirt steal time */
+	struct {
+		u64 guest_addr;
+		u64 last_steal;
+		struct gfn_to_hva_cache cache;
+	} st;
 };
 
 static inline unsigned long readl_sw_gcsr(struct loongarch_csrs *csr, int reg)
diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h
index 4ba2312e5f8c..a9ba8185d4af 100644
--- a/arch/loongarch/include/asm/kvm_para.h
+++ b/arch/loongarch/include/asm/kvm_para.h
@@ -14,6 +14,7 @@ 
 
 #define KVM_HCALL_SERVICE		HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SERVICE)
 #define  KVM_HCALL_FUNC_IPI		1
+#define  KVM_HCALL_FUNC_NOTIFY		2
 
 #define KVM_HCALL_SWDBG			HYPERCALL_ENCODE(HYPERVISOR_KVM, KVM_HCALL_CODE_SWDBG)
 
@@ -24,6 +25,15 @@ 
 #define KVM_HCALL_INVALID_CODE		-1UL
 #define KVM_HCALL_INVALID_PARAMETER	-2UL
 
+#define KVM_STEAL_PHYS_VALID		BIT_ULL(0)
+#define KVM_STEAL_PHYS_MASK		GENMASK_ULL(63, 6)
+struct kvm_steal_time {
+	__u64 steal;
+	__u32 version;
+	__u32 flags;
+	__u32 pad[12];
+};
+
 /*
  * Hypercall interface for KVM hypervisor
  *
diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h
index 590a92cb5416..d7e51300a89f 100644
--- a/arch/loongarch/include/asm/kvm_vcpu.h
+++ b/arch/loongarch/include/asm/kvm_vcpu.h
@@ -120,4 +120,8 @@  static inline void kvm_write_reg(struct kvm_vcpu *vcpu, int num, unsigned long v
 	vcpu->arch.gprs[num] = val;
 }
 
+static inline bool kvm_pvtime_supported(void)
+{
+	return !!sched_info_on();
+}
 #endif /* __ASM_LOONGARCH_KVM_VCPU_H__ */
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index eb09adda54b7..7a4633ef284b 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -169,6 +169,7 @@ 
 #define  KVM_SIGNATURE			"KVM\0"
 #define CPUCFG_KVM_FEATURE		(CPUCFG_KVM_BASE + 4)
 #define  KVM_FEATURE_IPI		BIT(1)
+#define  KVM_FEATURE_STEAL_TIME		BIT(2)
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h
index f9abef382317..ddc5cab0ffd0 100644
--- a/arch/loongarch/include/uapi/asm/kvm.h
+++ b/arch/loongarch/include/uapi/asm/kvm.h
@@ -81,7 +81,11 @@  struct kvm_fpu {
 #define LOONGARCH_REG_64(TYPE, REG)	(TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT))
 #define KVM_IOC_CSRID(REG)		LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG)
 #define KVM_IOC_CPUCFG(REG)		LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG)
+
+/* Device Control API on vcpu fd */
 #define KVM_LOONGARCH_VCPU_CPUCFG	0
+#define KVM_LOONGARCH_VCPU_PVTIME_CTRL	1
+#define  KVM_LOONGARCH_VCPU_PVTIME_GPA	0
 
 struct kvm_debug_exit_arch {
 };
diff --git a/arch/loongarch/kvm/Kconfig b/arch/loongarch/kvm/Kconfig
index c4ef2b4d9797..248744b4d086 100644
--- a/arch/loongarch/kvm/Kconfig
+++ b/arch/loongarch/kvm/Kconfig
@@ -29,6 +29,7 @@  config KVM
 	select KVM_MMIO
 	select HAVE_KVM_READONLY_MEM
 	select KVM_XFER_TO_GUEST_WORK
+	select SCHED_INFO
 	help
 	  Support hosting virtualized guest machines using
 	  hardware virtualization extensions. You will need
diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c
index c86e099af5ca..e2abd97fb13f 100644
--- a/arch/loongarch/kvm/exit.c
+++ b/arch/loongarch/kvm/exit.c
@@ -24,7 +24,7 @@ 
 static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
 {
 	int rd, rj;
-	unsigned int index;
+	unsigned int index, ret;
 
 	if (inst.reg2_format.opcode != cpucfg_op)
 		return EMULATE_FAIL;
@@ -50,7 +50,10 @@  static int kvm_emu_cpucfg(struct kvm_vcpu *vcpu, larch_inst inst)
 		vcpu->arch.gprs[rd] = *(unsigned int *)KVM_SIGNATURE;
 		break;
 	case CPUCFG_KVM_FEATURE:
-		vcpu->arch.gprs[rd] = KVM_FEATURE_IPI;
+		ret = KVM_FEATURE_IPI;
+		if (sched_info_on())
+			ret |= KVM_FEATURE_STEAL_TIME;
+		vcpu->arch.gprs[rd] = ret;
 		break;
 	default:
 		vcpu->arch.gprs[rd] = 0;
@@ -687,6 +690,34 @@  static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu)
 	return RESUME_GUEST;
 }
 
+static long kvm_save_notify(struct kvm_vcpu *vcpu)
+{
+	unsigned long id, data;
+
+	id   = kvm_read_reg(vcpu, LOONGARCH_GPR_A1);
+	data = kvm_read_reg(vcpu, LOONGARCH_GPR_A2);
+	switch (id) {
+	case KVM_FEATURE_STEAL_TIME:
+		if (!kvm_pvtime_supported())
+			return KVM_HCALL_INVALID_CODE;
+
+		if (data & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
+			return KVM_HCALL_INVALID_PARAMETER;
+
+		vcpu->arch.st.guest_addr = data;
+		if (!(data & KVM_STEAL_PHYS_VALID))
+			break;
+
+		vcpu->arch.st.last_steal = current->sched_info.run_delay;
+		kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
+		break;
+	default:
+		break;
+	};
+
+	return 0;
+};
+
 /*
  * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root.
  * @vcpu:      Virtual CPU context.
@@ -758,6 +789,9 @@  static void kvm_handle_service(struct kvm_vcpu *vcpu)
 		kvm_send_pv_ipi(vcpu);
 		ret = KVM_HCALL_SUCCESS;
 		break;
+	case KVM_HCALL_FUNC_NOTIFY:
+		ret = kvm_save_notify(vcpu);
+		break;
 	default:
 		ret = KVM_HCALL_INVALID_CODE;
 		break;
diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c
index 9e8030d45129..382796f1d3e6 100644
--- a/arch/loongarch/kvm/vcpu.c
+++ b/arch/loongarch/kvm/vcpu.c
@@ -31,6 +31,117 @@  const struct kvm_stats_header kvm_vcpu_stats_header = {
 		       sizeof(kvm_vcpu_stats_desc),
 };
 
+static void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
+{
+	struct kvm_steal_time __user *st;
+	struct gfn_to_hva_cache *ghc;
+	struct kvm_memslots *slots;
+	gpa_t gpa;
+	u64 steal;
+	u32 version;
+
+	ghc = &vcpu->arch.st.cache;
+	gpa = vcpu->arch.st.guest_addr;
+	if (!(gpa & KVM_STEAL_PHYS_VALID))
+		return;
+
+	gpa &= KVM_STEAL_PHYS_MASK;
+	slots = kvm_memslots(vcpu->kvm);
+	if (slots->generation != ghc->generation || gpa != ghc->gpa) {
+		if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa,
+					sizeof(*st))) {
+			ghc->gpa = INVALID_GPA;
+			return;
+		}
+	}
+
+	st = (struct kvm_steal_time __user *)ghc->hva;
+	unsafe_get_user(version, &st->version, out);
+	if (version & 1)
+		version += 1;
+	version += 1;
+	unsafe_put_user(version, &st->version, out);
+	smp_wmb();
+
+	unsafe_get_user(steal, &st->steal, out);
+	steal += current->sched_info.run_delay -
+		vcpu->arch.st.last_steal;
+	vcpu->arch.st.last_steal = current->sched_info.run_delay;
+	unsafe_put_user(steal, &st->steal, out);
+
+	smp_wmb();
+	version += 1;
+	unsafe_put_user(version, &st->version, out);
+out:
+	mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
+}
+
+static int kvm_loongarch_pvtime_has_attr(struct kvm_vcpu *vcpu,
+					struct kvm_device_attr *attr)
+{
+	if (!kvm_pvtime_supported() ||
+			attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
+		return -ENXIO;
+
+	return 0;
+}
+
+static int kvm_loongarch_pvtime_get_attr(struct kvm_vcpu *vcpu,
+					struct kvm_device_attr *attr)
+{
+	u64 __user *user = (u64 __user *)attr->addr;
+	u64 gpa;
+
+	if (!kvm_pvtime_supported() ||
+			attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
+		return -ENXIO;
+
+	gpa = vcpu->arch.st.guest_addr;
+	if (put_user(gpa, user))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int kvm_loongarch_pvtime_set_attr(struct kvm_vcpu *vcpu,
+					struct kvm_device_attr *attr)
+{
+	u64 __user *user = (u64 __user *)attr->addr;
+	struct kvm *kvm = vcpu->kvm;
+	u64 gpa;
+	int ret = 0;
+	int idx;
+
+	if (!kvm_pvtime_supported() ||
+			attr->attr != KVM_LOONGARCH_VCPU_PVTIME_GPA)
+		return -ENXIO;
+
+	if (get_user(gpa, user))
+		return -EFAULT;
+
+	if (gpa & ~(KVM_STEAL_PHYS_MASK | KVM_STEAL_PHYS_VALID))
+		return -EINVAL;
+
+	if (!(gpa & KVM_STEAL_PHYS_VALID)) {
+		vcpu->arch.st.guest_addr = gpa;
+		return 0;
+	}
+
+	/* Check the address is in a valid memslot */
+	idx = srcu_read_lock(&kvm->srcu);
+	if (kvm_is_error_hva(gfn_to_hva(kvm, gpa >> PAGE_SHIFT)))
+		ret = -EINVAL;
+	srcu_read_unlock(&kvm->srcu, idx);
+
+	if (!ret) {
+		vcpu->arch.st.guest_addr = gpa;
+		vcpu->arch.st.last_steal = current->sched_info.run_delay;
+		kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
+	}
+
+	return ret;
+}
+
 /*
  * kvm_check_requests - check and handle pending vCPU requests
  *
@@ -48,6 +159,9 @@  static int kvm_check_requests(struct kvm_vcpu *vcpu)
 	if (kvm_dirty_ring_check_request(vcpu))
 		return RESUME_HOST;
 
+	if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
+		kvm_update_stolen_time(vcpu);
+
 	return RESUME_GUEST;
 }
 
@@ -671,6 +785,9 @@  static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu,
 	case KVM_LOONGARCH_VCPU_CPUCFG:
 		ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr);
 		break;
+	case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
+		ret = kvm_loongarch_pvtime_has_attr(vcpu, attr);
+		break;
 	default:
 		break;
 	}
@@ -703,6 +820,9 @@  static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu,
 	case KVM_LOONGARCH_VCPU_CPUCFG:
 		ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr);
 		break;
+	case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
+		ret = kvm_loongarch_pvtime_get_attr(vcpu, attr);
+		break;
 	default:
 		break;
 	}
@@ -725,6 +845,9 @@  static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu,
 	case KVM_LOONGARCH_VCPU_CPUCFG:
 		ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr);
 		break;
+	case KVM_LOONGARCH_VCPU_PVTIME_CTRL:
+		ret = kvm_loongarch_pvtime_set_attr(vcpu, attr);
+		break;
 	default:
 		break;
 	}
@@ -1084,6 +1207,7 @@  static int _kvm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 	/* Control guest page CCA attribute */
 	change_csr_gcfg(CSR_GCFG_MATC_MASK, CSR_GCFG_MATC_ROOT);
+	kvm_make_request(KVM_REQ_STEAL_UPDATE, vcpu);
 
 	/* Don't bother restoring registers multiple times unless necessary */
 	if (vcpu->arch.aux_inuse & KVM_LARCH_HWCSR_USABLE)