Message ID | 1483943091-1364-41-git-send-email-jintack@cs.columbia.edu (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Mon, Jan 09, 2017 at 01:24:36AM -0500, Jintack Lim wrote: > Each nested VM is supposed to have a mmu (i.e. shadow stage-2 page to have a 'struct kvm_mmu' ? > table), and we create it when the guest hypervisor writes to vttbr_el2 > with a new vmid. I think the commit message should also mention that you maintain a list of seen nested stage 2 translation contexts and associated shadow page tables. > > In case the guest hypervisor writes to vttbr_el2 with existing vmid, we > check if the base address is changed. If so, then what we have in the > shadow page table is not valid any more. So ummap it. unmap? We clear the entire shadow stage 2 page table, right? > > Signed-off-by: Jintack Lim <jintack@cs.columbia.edu> > --- > arch/arm/include/asm/kvm_host.h | 1 + > arch/arm/kvm/arm.c | 1 + > arch/arm64/include/asm/kvm_host.h | 1 + > arch/arm64/include/asm/kvm_mmu.h | 6 ++++ > arch/arm64/kvm/mmu-nested.c | 71 +++++++++++++++++++++++++++++++++++++++ > arch/arm64/kvm/sys_regs.c | 15 ++++++++- > 6 files changed, 94 insertions(+), 1 deletion(-) > > diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h > index fbde48d..ebf2810 100644 > --- a/arch/arm/include/asm/kvm_host.h > +++ b/arch/arm/include/asm/kvm_host.h > @@ -84,6 +84,7 @@ struct kvm_arch { > > /* Never used on arm but added to be compatible with arm64 */ > struct list_head nested_mmu_list; > + spinlock_t mmu_list_lock; > > /* Interrupt controller */ > struct vgic_dist vgic; > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index 147df97..6fa5754 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -147,6 +147,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > kvm->arch.mmu.vmid.vmid_gen = 0; > kvm->arch.mmu.el2_vmid.vmid_gen = 0; > INIT_LIST_HEAD(&kvm->arch.nested_mmu_list); > + spin_lock_init(&kvm->arch.mmu_list_lock); > > /* The maximum number of VCPUs is limited by the host's GIC model */ > kvm->arch.max_vcpus = vgic_present ? > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index 23e2267..52eea76 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -99,6 +99,7 @@ struct kvm_arch { > > /* Stage 2 shadow paging contexts for nested L2 VM */ > struct list_head nested_mmu_list; > + spinlock_t mmu_list_lock; I'm wondering if we really need the separate spin lock or if we could just grab the KVM mutex? > }; > > #define KVM_NR_MEM_OBJS 40 > diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h > index d1ef650..fdc9327 100644 > --- a/arch/arm64/include/asm/kvm_mmu.h > +++ b/arch/arm64/include/asm/kvm_mmu.h > @@ -327,6 +327,7 @@ static inline unsigned int kvm_get_vmid_bits(void) > #ifdef CONFIG_KVM_ARM_NESTED_HYP > struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr); > struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu); > +bool handle_vttbr_update(struct kvm_vcpu *vcpu, u64 vttbr); > #else > static inline struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, > u64 vttbr) > @@ -337,6 +338,11 @@ static inline struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu) > { > return &vcpu->kvm->arch.mmu; > } > + > +static inline bool handle_vttbr_update(struct kvm_vcpu *vcpu, u64 vttbr) > +{ > + return false; > +} > #endif > > static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid, > diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c > index d52078f..0811d94 100644 > --- a/arch/arm64/kvm/mmu-nested.c > +++ b/arch/arm64/kvm/mmu-nested.c > @@ -53,3 +53,74 @@ struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu) > > return &nested_mmu->mmu; > } > + > +static struct kvm_nested_s2_mmu *create_nested_mmu(struct kvm_vcpu *vcpu, > + u64 vttbr) > +{ > + struct kvm_nested_s2_mmu *nested_mmu, *tmp_mmu; > + struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list; > + bool need_free = false; > + int ret; > + > + nested_mmu = kzalloc(sizeof(struct kvm_nested_s2_mmu), GFP_KERNEL); > + if (!nested_mmu) > + return NULL; > + > + ret = __kvm_alloc_stage2_pgd(&nested_mmu->mmu); > + if (ret) { > + kfree(nested_mmu); > + return NULL; > + } > + > + spin_lock(&vcpu->kvm->arch.mmu_list_lock); > + tmp_mmu = get_nested_mmu(vcpu, vttbr); > + if (!tmp_mmu) > + list_add_rcu(&nested_mmu->list, nested_mmu_list); > + else /* Somebody already created and put a new nested_mmu to the list */ > + need_free = true; > + spin_unlock(&vcpu->kvm->arch.mmu_list_lock); > + > + if (need_free) { > + __kvm_free_stage2_pgd(&nested_mmu->mmu); > + kfree(nested_mmu); > + nested_mmu = tmp_mmu; > + } > + > + return nested_mmu; > +} > + > +static void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu) > +{ > + struct kvm_nested_s2_mmu *nested_mmu; > + struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list; > + > + list_for_each_entry_rcu(nested_mmu, nested_mmu_list, list) > + kvm_unmap_stage2_range(&nested_mmu->mmu, 0, KVM_PHYS_SIZE); > +} > + > +bool handle_vttbr_update(struct kvm_vcpu *vcpu, u64 vttbr) > +{ > + struct kvm_nested_s2_mmu *nested_mmu; > + > + /* See if we can relax this */ huh? > + if (!vttbr) why is this a special case? Theoretically an IPA of zero and VMID zero could be a valid page table base pointer, right? I'm gussing because the guest hypervisor occasionally writes zero into VTTBR_EL2, for example when not using stage 2 translation, so perhaps what you need to do is to defer creating a new nested mmu structure until you actually enter the VM with stage 2 paging enabled? > + return true; > + > + nested_mmu = (struct kvm_nested_s2_mmu *)get_nested_mmu(vcpu, vttbr); > + if (!nested_mmu) { > + nested_mmu = create_nested_mmu(vcpu, vttbr); > + if (!nested_mmu) > + return false; I'm wondering if this can be simplified by having get_nested_mmu lookup and allocate the struct and renaming the get_nested_mmu funtion to lookup_nested_mmu? This caller looks racy, even though it isn't, which would be improved by my suggestion as well. > + } else { > + /* > + * unmap the shadow page table if vttbr_el2 is While the function is called unmap, what we really do is clearing/flushing the shadow stage 2 page table. > + * changed to different value > + */ > + if (vttbr != nested_mmu->virtual_vttbr) > + kvm_nested_s2_unmap(vcpu); > + } > + > + nested_mmu->virtual_vttbr = vttbr; > + > + return true; > +} > diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c > index e66f40d..ddb641c 100644 > --- a/arch/arm64/kvm/sys_regs.c > +++ b/arch/arm64/kvm/sys_regs.c > @@ -960,6 +960,19 @@ static bool access_cpacr(struct kvm_vcpu *vcpu, > return true; > } > > +static bool access_vttbr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, > + const struct sys_reg_desc *r) > +{ > + u64 vttbr = p->regval; > + > + if (!p->is_write) { > + p->regval = vcpu_el2_reg(vcpu, r->reg); > + return true; > + } > + > + return handle_vttbr_update(vcpu, vttbr); > +} > + > static bool trap_el2_reg(struct kvm_vcpu *vcpu, > struct sys_reg_params *p, > const struct sys_reg_desc *r) > @@ -1306,7 +1319,7 @@ static bool trap_el2_reg(struct kvm_vcpu *vcpu, > trap_el2_reg, reset_el2_val, TCR_EL2, 0 }, > /* VTTBR_EL2 */ > { Op0(0b11), Op1(0b100), CRn(0b0010), CRm(0b0001), Op2(0b000), > - trap_el2_reg, reset_el2_val, VTTBR_EL2, 0 }, > + access_vttbr, reset_el2_val, VTTBR_EL2, 0 }, > /* VTCR_EL2 */ > { Op0(0b11), Op1(0b100), CRn(0b0010), CRm(0b0001), Op2(0b010), > trap_el2_reg, reset_el2_val, VTCR_EL2, 0 }, > -- > 1.9.1 > > Thanks, -Christoffer
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index fbde48d..ebf2810 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -84,6 +84,7 @@ struct kvm_arch { /* Never used on arm but added to be compatible with arm64 */ struct list_head nested_mmu_list; + spinlock_t mmu_list_lock; /* Interrupt controller */ struct vgic_dist vgic; diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 147df97..6fa5754 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -147,6 +147,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.mmu.vmid.vmid_gen = 0; kvm->arch.mmu.el2_vmid.vmid_gen = 0; INIT_LIST_HEAD(&kvm->arch.nested_mmu_list); + spin_lock_init(&kvm->arch.mmu_list_lock); /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->arch.max_vcpus = vgic_present ? diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 23e2267..52eea76 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -99,6 +99,7 @@ struct kvm_arch { /* Stage 2 shadow paging contexts for nested L2 VM */ struct list_head nested_mmu_list; + spinlock_t mmu_list_lock; }; #define KVM_NR_MEM_OBJS 40 diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index d1ef650..fdc9327 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -327,6 +327,7 @@ static inline unsigned int kvm_get_vmid_bits(void) #ifdef CONFIG_KVM_ARM_NESTED_HYP struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr); struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu); +bool handle_vttbr_update(struct kvm_vcpu *vcpu, u64 vttbr); #else static inline struct kvm_nested_s2_mmu *get_nested_mmu(struct kvm_vcpu *vcpu, u64 vttbr) @@ -337,6 +338,11 @@ static inline struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu) { return &vcpu->kvm->arch.mmu; } + +static inline bool handle_vttbr_update(struct kvm_vcpu *vcpu, u64 vttbr) +{ + return false; +} #endif static inline u64 kvm_get_vttbr(struct kvm_s2_vmid *vmid, diff --git a/arch/arm64/kvm/mmu-nested.c b/arch/arm64/kvm/mmu-nested.c index d52078f..0811d94 100644 --- a/arch/arm64/kvm/mmu-nested.c +++ b/arch/arm64/kvm/mmu-nested.c @@ -53,3 +53,74 @@ struct kvm_s2_mmu *vcpu_get_active_s2_mmu(struct kvm_vcpu *vcpu) return &nested_mmu->mmu; } + +static struct kvm_nested_s2_mmu *create_nested_mmu(struct kvm_vcpu *vcpu, + u64 vttbr) +{ + struct kvm_nested_s2_mmu *nested_mmu, *tmp_mmu; + struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list; + bool need_free = false; + int ret; + + nested_mmu = kzalloc(sizeof(struct kvm_nested_s2_mmu), GFP_KERNEL); + if (!nested_mmu) + return NULL; + + ret = __kvm_alloc_stage2_pgd(&nested_mmu->mmu); + if (ret) { + kfree(nested_mmu); + return NULL; + } + + spin_lock(&vcpu->kvm->arch.mmu_list_lock); + tmp_mmu = get_nested_mmu(vcpu, vttbr); + if (!tmp_mmu) + list_add_rcu(&nested_mmu->list, nested_mmu_list); + else /* Somebody already created and put a new nested_mmu to the list */ + need_free = true; + spin_unlock(&vcpu->kvm->arch.mmu_list_lock); + + if (need_free) { + __kvm_free_stage2_pgd(&nested_mmu->mmu); + kfree(nested_mmu); + nested_mmu = tmp_mmu; + } + + return nested_mmu; +} + +static void kvm_nested_s2_unmap(struct kvm_vcpu *vcpu) +{ + struct kvm_nested_s2_mmu *nested_mmu; + struct list_head *nested_mmu_list = &vcpu->kvm->arch.nested_mmu_list; + + list_for_each_entry_rcu(nested_mmu, nested_mmu_list, list) + kvm_unmap_stage2_range(&nested_mmu->mmu, 0, KVM_PHYS_SIZE); +} + +bool handle_vttbr_update(struct kvm_vcpu *vcpu, u64 vttbr) +{ + struct kvm_nested_s2_mmu *nested_mmu; + + /* See if we can relax this */ + if (!vttbr) + return true; + + nested_mmu = (struct kvm_nested_s2_mmu *)get_nested_mmu(vcpu, vttbr); + if (!nested_mmu) { + nested_mmu = create_nested_mmu(vcpu, vttbr); + if (!nested_mmu) + return false; + } else { + /* + * unmap the shadow page table if vttbr_el2 is + * changed to different value + */ + if (vttbr != nested_mmu->virtual_vttbr) + kvm_nested_s2_unmap(vcpu); + } + + nested_mmu->virtual_vttbr = vttbr; + + return true; +} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index e66f40d..ddb641c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -960,6 +960,19 @@ static bool access_cpacr(struct kvm_vcpu *vcpu, return true; } +static bool access_vttbr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u64 vttbr = p->regval; + + if (!p->is_write) { + p->regval = vcpu_el2_reg(vcpu, r->reg); + return true; + } + + return handle_vttbr_update(vcpu, vttbr); +} + static bool trap_el2_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *p, const struct sys_reg_desc *r) @@ -1306,7 +1319,7 @@ static bool trap_el2_reg(struct kvm_vcpu *vcpu, trap_el2_reg, reset_el2_val, TCR_EL2, 0 }, /* VTTBR_EL2 */ { Op0(0b11), Op1(0b100), CRn(0b0010), CRm(0b0001), Op2(0b000), - trap_el2_reg, reset_el2_val, VTTBR_EL2, 0 }, + access_vttbr, reset_el2_val, VTTBR_EL2, 0 }, /* VTCR_EL2 */ { Op0(0b11), Op1(0b100), CRn(0b0010), CRm(0b0001), Op2(0b010), trap_el2_reg, reset_el2_val, VTCR_EL2, 0 },
Each nested VM is supposed to have a mmu (i.e. shadow stage-2 page table), and we create it when the guest hypervisor writes to vttbr_el2 with a new vmid. In case the guest hypervisor writes to vttbr_el2 with existing vmid, we check if the base address is changed. If so, then what we have in the shadow page table is not valid any more. So ummap it. Signed-off-by: Jintack Lim <jintack@cs.columbia.edu> --- arch/arm/include/asm/kvm_host.h | 1 + arch/arm/kvm/arm.c | 1 + arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/include/asm/kvm_mmu.h | 6 ++++ arch/arm64/kvm/mmu-nested.c | 71 +++++++++++++++++++++++++++++++++++++++ arch/arm64/kvm/sys_regs.c | 15 ++++++++- 6 files changed, 94 insertions(+), 1 deletion(-)