Message ID | 1406249768-25315-3-git-send-email-m.smarduch@samsung.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 25.07.14 02:56, Mario Smarduch wrote: > Patch adds support for initial write protection VM memlsot. This patch series > assumes that huge PUDs will not be used in 2nd stage tables. Is this a valid assumption? > > Signed-off-by: Mario Smarduch <m.smarduch@samsung.com> > --- > arch/arm/include/asm/kvm_host.h | 1 + > arch/arm/include/asm/kvm_mmu.h | 20 ++++++ > arch/arm/include/asm/pgtable-3level.h | 1 + > arch/arm/kvm/arm.c | 9 +++ > arch/arm/kvm/mmu.c | 128 +++++++++++++++++++++++++++++++++ > 5 files changed, 159 insertions(+) > > diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h > index 042206f..6521a2d 100644 > --- a/arch/arm/include/asm/kvm_host.h > +++ b/arch/arm/include/asm/kvm_host.h > @@ -231,5 +231,6 @@ int kvm_perf_teardown(void); > u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); > int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); > void kvm_arch_flush_remote_tlbs(struct kvm *); > +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); > > #endif /* __ARM_KVM_HOST_H__ */ > diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h > index 5cc0b0f..08ab5e8 100644 > --- a/arch/arm/include/asm/kvm_mmu.h > +++ b/arch/arm/include/asm/kvm_mmu.h > @@ -114,6 +114,26 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) > pmd_val(*pmd) |= L_PMD_S2_RDWR; > } > > +static inline void kvm_set_s2pte_readonly(pte_t *pte) > +{ > + pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY; > +} > + > +static inline bool kvm_s2pte_readonly(pte_t *pte) > +{ > + return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY; > +} > + > +static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) > +{ > + pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY; > +} > + > +static inline bool kvm_s2pmd_readonly(pmd_t *pmd) > +{ > + return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY; > +} > + > /* Open coded p*d_addr_end that can deal with 64bit addresses */ > #define kvm_pgd_addr_end(addr, end) \ > ({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ > diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h > index 85c60ad..d8bb40b 100644 > --- a/arch/arm/include/asm/pgtable-3level.h > +++ b/arch/arm/include/asm/pgtable-3level.h > @@ -129,6 +129,7 @@ > #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ > #define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ > > +#define L_PMD_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ > #define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ > > /* > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index 3c82b37..e11c2dd 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -242,6 +242,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, > const struct kvm_memory_slot *old, > enum kvm_mr_change change) > { > +#ifdef CONFIG_ARM Same question on CONFIG_ARM here. Is this the define used to distinguish between 32bit and 64bit? Alex
On 07/24/2014 11:16 PM, Alexander Graf wrote: > > On 25.07.14 02:56, Mario Smarduch wrote: >> Patch adds support for initial write protection VM memlsot. This >> patch series >> assumes that huge PUDs will not be used in 2nd stage tables. > > Is this a valid assumption? Right now it's unclear if PUDs will be used to back guest memory, assuming so required quite a bit of additional code. After discussing on mailing list it was recommended to treat this as BUG_ON case for now. > >> >> Signed-off-by: Mario Smarduch <m.smarduch@samsung.com> >> --- >> arch/arm/include/asm/kvm_host.h | 1 + >> arch/arm/include/asm/kvm_mmu.h | 20 ++++++ >> arch/arm/include/asm/pgtable-3level.h | 1 + >> arch/arm/kvm/arm.c | 9 +++ >> arch/arm/kvm/mmu.c | 128 >> +++++++++++++++++++++++++++++++++ >> 5 files changed, 159 insertions(+) >> >> diff --git a/arch/arm/include/asm/kvm_host.h >> b/arch/arm/include/asm/kvm_host.h >> index 042206f..6521a2d 100644 >> --- a/arch/arm/include/asm/kvm_host.h >> +++ b/arch/arm/include/asm/kvm_host.h >> @@ -231,5 +231,6 @@ int kvm_perf_teardown(void); >> u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); >> int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); >> void kvm_arch_flush_remote_tlbs(struct kvm *); >> +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); >> #endif /* __ARM_KVM_HOST_H__ */ >> diff --git a/arch/arm/include/asm/kvm_mmu.h >> b/arch/arm/include/asm/kvm_mmu.h >> index 5cc0b0f..08ab5e8 100644 >> --- a/arch/arm/include/asm/kvm_mmu.h >> +++ b/arch/arm/include/asm/kvm_mmu.h >> @@ -114,6 +114,26 @@ static inline void kvm_set_s2pmd_writable(pmd_t >> *pmd) >> pmd_val(*pmd) |= L_PMD_S2_RDWR; >> } >> +static inline void kvm_set_s2pte_readonly(pte_t *pte) >> +{ >> + pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY; >> +} >> + >> +static inline bool kvm_s2pte_readonly(pte_t *pte) >> +{ >> + return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY; >> +} >> + >> +static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) >> +{ >> + pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY; >> +} >> + >> +static inline bool kvm_s2pmd_readonly(pmd_t *pmd) >> +{ >> + return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY; >> +} >> + >> /* Open coded p*d_addr_end that can deal with 64bit addresses */ >> #define kvm_pgd_addr_end(addr, end) \ >> ({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ >> diff --git a/arch/arm/include/asm/pgtable-3level.h >> b/arch/arm/include/asm/pgtable-3level.h >> index 85c60ad..d8bb40b 100644 >> --- a/arch/arm/include/asm/pgtable-3level.h >> +++ b/arch/arm/include/asm/pgtable-3level.h >> @@ -129,6 +129,7 @@ >> #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* >> HAP[1] */ >> #define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* >> HAP[2:1] */ >> +#define L_PMD_S2_RDONLY (_AT(pteval_t, 1) << 6) /* >> HAP[1] */ >> #define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* >> HAP[2:1] */ >> /* >> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c >> index 3c82b37..e11c2dd 100644 >> --- a/arch/arm/kvm/arm.c >> +++ b/arch/arm/kvm/arm.c >> @@ -242,6 +242,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, >> const struct kvm_memory_slot *old, >> enum kvm_mr_change change) >> { >> +#ifdef CONFIG_ARM > > Same question on CONFIG_ARM here. Is this the define used to distinguish > between 32bit and 64bit? Yes let ARM64 compile. Eventually we'll come back to ARM64 soon, and these will go. > > > Alex >
Remove the parenthesis from the subject line. On Thu, Jul 24, 2014 at 05:56:06PM -0700, Mario Smarduch wrote: > Patch adds support for initial write protection VM memlsot. This patch series ^^ ^ stray whitespace of > assumes that huge PUDs will not be used in 2nd stage tables. may be worth mentioning that this is always valid on ARMv7. > > Signed-off-by: Mario Smarduch <m.smarduch@samsung.com> > --- > arch/arm/include/asm/kvm_host.h | 1 + > arch/arm/include/asm/kvm_mmu.h | 20 ++++++ > arch/arm/include/asm/pgtable-3level.h | 1 + > arch/arm/kvm/arm.c | 9 +++ > arch/arm/kvm/mmu.c | 128 +++++++++++++++++++++++++++++++++ > 5 files changed, 159 insertions(+) > > diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h > index 042206f..6521a2d 100644 > --- a/arch/arm/include/asm/kvm_host.h > +++ b/arch/arm/include/asm/kvm_host.h > @@ -231,5 +231,6 @@ int kvm_perf_teardown(void); > u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); > int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); > void kvm_arch_flush_remote_tlbs(struct kvm *); > +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); > > #endif /* __ARM_KVM_HOST_H__ */ > diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h > index 5cc0b0f..08ab5e8 100644 > --- a/arch/arm/include/asm/kvm_mmu.h > +++ b/arch/arm/include/asm/kvm_mmu.h > @@ -114,6 +114,26 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) > pmd_val(*pmd) |= L_PMD_S2_RDWR; > } > > +static inline void kvm_set_s2pte_readonly(pte_t *pte) > +{ > + pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY; > +} > + > +static inline bool kvm_s2pte_readonly(pte_t *pte) > +{ > + return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY; > +} > + > +static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) > +{ > + pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY; > +} > + > +static inline bool kvm_s2pmd_readonly(pmd_t *pmd) > +{ > + return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY; > +} > + > /* Open coded p*d_addr_end that can deal with 64bit addresses */ > #define kvm_pgd_addr_end(addr, end) \ > ({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ > diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h > index 85c60ad..d8bb40b 100644 > --- a/arch/arm/include/asm/pgtable-3level.h > +++ b/arch/arm/include/asm/pgtable-3level.h > @@ -129,6 +129,7 @@ > #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ > #define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ > > +#define L_PMD_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ > #define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ > > /* > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index 3c82b37..e11c2dd 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -242,6 +242,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, > const struct kvm_memory_slot *old, > enum kvm_mr_change change) > { > +#ifdef CONFIG_ARM > + /* > + * At this point memslot has been committed and there is an > + * allocated dirty_bitmap[], dirty pages will be be tracked while the > + * memory slot is write protected. > + */ > + if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) > + kvm_mmu_wp_memory_region(kvm, mem->slot); > +#endif > } > > void kvm_arch_flush_shadow_all(struct kvm *kvm) > diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c > index 35254c6..7bfc792 100644 > --- a/arch/arm/kvm/mmu.c > +++ b/arch/arm/kvm/mmu.c > @@ -763,6 +763,134 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) > return false; > } > > +#ifdef CONFIG_ARM > +/** > + * stage2_wp_pte_range - write protect PTE range > + * @pmd: pointer to pmd entry > + * @addr: range start address > + * @end: range end address > + */ > +static void stage2_wp_pte_range(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) > +{ > + pte_t *pte; > + > + pte = pte_offset_kernel(pmd, addr); > + do { > + if (!pte_none(*pte)) { > + if (!kvm_s2pte_readonly(pte)) > + kvm_set_s2pte_readonly(pte); > + } > + } while (pte++, addr += PAGE_SIZE, addr != end); > +} > + > +/** > + * stage2_wp_pmd_range - write protect PMD range > + * @pud: pointer to pud entry > + * @addr: range start address > + * @end: range end address > + */ > +static void stage2_wp_pmd_range(pud_t *pud, phys_addr_t addr, phys_addr_t end) > +{ > + pmd_t *pmd; > + phys_addr_t next; > + > + pmd = pmd_offset(pud, addr); > + > + do { > + next = kvm_pmd_addr_end(addr, end); > + if (!pmd_none(*pmd)) { > + if (kvm_pmd_huge(*pmd)) { > + if (!kvm_s2pmd_readonly(pmd)) > + kvm_set_s2pmd_readonly(pmd); > + } else > + stage2_wp_pte_range(pmd, addr, next); please use a closing brace when the first part of the if-statement is a multi-line block with braces, as per the CodingStyle. > + stray blank line > + } > + } while (pmd++, addr = next, addr != end); > +} > + > +/** > + * stage2_wp_pud_range - write protect PUD range > + * @kvm: pointer to kvm structure > + * @pud: pointer to pgd entry pgd > + * @addr: range start address > + * @end: range end address > + * > + * While walking the PUD range huge PUD pages are ignored, in the future this range, huge PUDs are ignored. In the future... > + * may need to be revisited. Determine how to handle huge PUDs when logging > + * of dirty pages is enabled. I don't understand the last sentence? > + */ > +static void stage2_wp_pud_range(struct kvm *kvm, pgd_t *pgd, > + phys_addr_t addr, phys_addr_t end) > +{ > + pud_t *pud; > + phys_addr_t next; > + > + pud = pud_offset(pgd, addr); > + do { > + next = kvm_pud_addr_end(addr, end); > + /* TODO: huge PUD not supported, revisit later */ > + BUG_ON(pud_huge(*pud)); we should probably define kvm_pud_huge() > + if (!pud_none(*pud)) > + stage2_wp_pmd_range(pud, addr, next); > + } while (pud++, addr = next, addr != end); > +} > + > +/** > + * stage2_wp_range() - write protect stage2 memory region range > + * @kvm: The KVM pointer > + * @start: Start address of range > + * &end: End address of range > + */ > +static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) > +{ > + pgd_t *pgd; > + phys_addr_t next; > + > + pgd = kvm->arch.pgd + pgd_index(addr); > + do { > + /* > + * Release kvm_mmu_lock periodically if the memory region is > + * large features like detect hung task, lock detector or lock large. Otherwise, we may see panics due to.. > + * dep may panic. In addition holding the lock this long will extra white space ^^ Additionally, holding the lock for a long timer will > + * also starve other vCPUs. Applies to huge VM memory regions. ^^^ I don't understand this last remark. > + */ > + if (need_resched() || spin_needbreak(&kvm->mmu_lock)) > + cond_resched_lock(&kvm->mmu_lock); > + > + next = kvm_pgd_addr_end(addr, end); > + if (pgd_present(*pgd)) > + stage2_wp_pud_range(kvm, pgd, addr, next); > + } while (pgd++, addr = next, addr != end); > +} > + > +/** > + * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot > + * @kvm: The KVM pointer > + * @slot: The memory slot to write protect > + * > + * Called to start logging dirty pages after memory region > + * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns > + * all present PMD and PTEs are write protected in the memory region. > + * Afterwards read of dirty page log can be called. > + * > + * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired, > + * serializing operations for VM memory regions. > + */ > + > +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) > +{ > + struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot); > + phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; > + phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; > + > + spin_lock(&kvm->mmu_lock); > + stage2_wp_range(kvm, start, end); > + kvm_flush_remote_tlbs(kvm); > + spin_unlock(&kvm->mmu_lock); > +} > +#endif > + > static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > struct kvm_memory_slot *memslot, > unsigned long fault_status) > -- > 1.7.9.5 > Besides the commenting and whitespace stuff, this is beginning to look good. Thanks, -Christoffer
On 08/11/2014 12:12 PM, Christoffer Dall wrote: > Remove the parenthesis from the subject line. Hmmm have to check this don't see it my patch file. > > On Thu, Jul 24, 2014 at 05:56:06PM -0700, Mario Smarduch wrote: >> Patch adds support for initial write protection VM memlsot. This patch series > ^^ ^ > stray whitespace of > Need to watch out for these adds delays to review cycle. > >> assumes that huge PUDs will not be used in 2nd stage tables. > > may be worth mentioning that this is always valid on ARMv7. > Yep definitely. >> >> Signed-off-by: Mario Smarduch <m.smarduch@samsung.com> >> --- >> arch/arm/include/asm/kvm_host.h | 1 + >> arch/arm/include/asm/kvm_mmu.h | 20 ++++++ >> arch/arm/include/asm/pgtable-3level.h | 1 + >> arch/arm/kvm/arm.c | 9 +++ >> arch/arm/kvm/mmu.c | 128 +++++++++++++++++++++++++++++++++ >> 5 files changed, 159 insertions(+) >> >> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h >> index 042206f..6521a2d 100644 >> --- a/arch/arm/include/asm/kvm_host.h >> +++ b/arch/arm/include/asm/kvm_host.h >> @@ -231,5 +231,6 @@ int kvm_perf_teardown(void); >> u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); >> int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); >> void kvm_arch_flush_remote_tlbs(struct kvm *); >> +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); >> >> #endif /* __ARM_KVM_HOST_H__ */ >> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h >> index 5cc0b0f..08ab5e8 100644 >> --- a/arch/arm/include/asm/kvm_mmu.h >> +++ b/arch/arm/include/asm/kvm_mmu.h >> @@ -114,6 +114,26 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) >> pmd_val(*pmd) |= L_PMD_S2_RDWR; >> } >> >> +static inline void kvm_set_s2pte_readonly(pte_t *pte) >> +{ >> + pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY; >> +} >> + >> +static inline bool kvm_s2pte_readonly(pte_t *pte) >> +{ >> + return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY; >> +} >> + >> +static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) >> +{ >> + pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY; >> +} >> + >> +static inline bool kvm_s2pmd_readonly(pmd_t *pmd) >> +{ >> + return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY; >> +} >> + >> /* Open coded p*d_addr_end that can deal with 64bit addresses */ >> #define kvm_pgd_addr_end(addr, end) \ >> ({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ >> diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h >> index 85c60ad..d8bb40b 100644 >> --- a/arch/arm/include/asm/pgtable-3level.h >> +++ b/arch/arm/include/asm/pgtable-3level.h >> @@ -129,6 +129,7 @@ >> #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ >> #define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ >> >> +#define L_PMD_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ >> #define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ >> >> /* >> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c >> index 3c82b37..e11c2dd 100644 >> --- a/arch/arm/kvm/arm.c >> +++ b/arch/arm/kvm/arm.c >> @@ -242,6 +242,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, >> const struct kvm_memory_slot *old, >> enum kvm_mr_change change) >> { >> +#ifdef CONFIG_ARM >> + /* >> + * At this point memslot has been committed and there is an >> + * allocated dirty_bitmap[], dirty pages will be be tracked while the >> + * memory slot is write protected. >> + */ >> + if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) >> + kvm_mmu_wp_memory_region(kvm, mem->slot); >> +#endif >> } >> >> void kvm_arch_flush_shadow_all(struct kvm *kvm) >> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c >> index 35254c6..7bfc792 100644 >> --- a/arch/arm/kvm/mmu.c >> +++ b/arch/arm/kvm/mmu.c >> @@ -763,6 +763,134 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) >> return false; >> } >> >> +#ifdef CONFIG_ARM >> +/** >> + * stage2_wp_pte_range - write protect PTE range >> + * @pmd: pointer to pmd entry >> + * @addr: range start address >> + * @end: range end address >> + */ >> +static void stage2_wp_pte_range(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) >> +{ >> + pte_t *pte; >> + >> + pte = pte_offset_kernel(pmd, addr); >> + do { >> + if (!pte_none(*pte)) { >> + if (!kvm_s2pte_readonly(pte)) >> + kvm_set_s2pte_readonly(pte); >> + } >> + } while (pte++, addr += PAGE_SIZE, addr != end); >> +} >> + >> +/** >> + * stage2_wp_pmd_range - write protect PMD range >> + * @pud: pointer to pud entry >> + * @addr: range start address >> + * @end: range end address >> + */ >> +static void stage2_wp_pmd_range(pud_t *pud, phys_addr_t addr, phys_addr_t end) >> +{ >> + pmd_t *pmd; >> + phys_addr_t next; >> + >> + pmd = pmd_offset(pud, addr); >> + >> + do { >> + next = kvm_pmd_addr_end(addr, end); >> + if (!pmd_none(*pmd)) { >> + if (kvm_pmd_huge(*pmd)) { >> + if (!kvm_s2pmd_readonly(pmd)) >> + kvm_set_s2pmd_readonly(pmd); >> + } else >> + stage2_wp_pte_range(pmd, addr, next); > please use a closing brace when the first part of the if-statement is a > multi-line block with braces, as per the CodingStyle. Will fix. >> + > > stray blank line Not sure how it got by checkpatch, will fix. > >> + } >> + } while (pmd++, addr = next, addr != end); >> +} >> + >> +/** >> + * stage2_wp_pud_range - write protect PUD range >> + * @kvm: pointer to kvm structure >> + * @pud: pointer to pgd entry > pgd >> + * @addr: range start address >> + * @end: range end address >> + * >> + * While walking the PUD range huge PUD pages are ignored, in the future this > range, huge PUDs are ignored. In the future... >> + * may need to be revisited. Determine how to handle huge PUDs when logging >> + * of dirty pages is enabled. > > I don't understand the last sentence? Probably last two sentences should be combined. ".... to determine how to handle huge PUT...". Would that be clear enough? The overall theme is what to do about PUDs - mark all pages dirty in the region, attempt to breakup such huge regions? > >> + */ >> +static void stage2_wp_pud_range(struct kvm *kvm, pgd_t *pgd, >> + phys_addr_t addr, phys_addr_t end) >> +{ >> + pud_t *pud; >> + phys_addr_t next; >> + >> + pud = pud_offset(pgd, addr); >> + do { >> + next = kvm_pud_addr_end(addr, end); >> + /* TODO: huge PUD not supported, revisit later */ >> + BUG_ON(pud_huge(*pud)); > > we should probably define kvm_pud_huge() Yep will add it. > >> + if (!pud_none(*pud)) >> + stage2_wp_pmd_range(pud, addr, next); >> + } while (pud++, addr = next, addr != end); >> +} >> + >> +/** >> + * stage2_wp_range() - write protect stage2 memory region range >> + * @kvm: The KVM pointer >> + * @start: Start address of range >> + * &end: End address of range >> + */ >> +static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) >> +{ >> + pgd_t *pgd; >> + phys_addr_t next; >> + >> + pgd = kvm->arch.pgd + pgd_index(addr); >> + do { >> + /* >> + * Release kvm_mmu_lock periodically if the memory region is >> + * large features like detect hung task, lock detector or lock > large. Otherwise, we may see panics due to.. >> + * dep may panic. In addition holding the lock this long will > extra white space ^^ Additionally, holding the lock for a > long timer will >> + * also starve other vCPUs. Applies to huge VM memory regions. > ^^^ I don't understand this > last remark. >> + */ >> + if (need_resched() || spin_needbreak(&kvm->mmu_lock)) >> + cond_resched_lock(&kvm->mmu_lock); >> + >> + next = kvm_pgd_addr_end(addr, end); >> + if (pgd_present(*pgd)) >> + stage2_wp_pud_range(kvm, pgd, addr, next); >> + } while (pgd++, addr = next, addr != end); >> +} >> + >> +/** >> + * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot >> + * @kvm: The KVM pointer >> + * @slot: The memory slot to write protect >> + * >> + * Called to start logging dirty pages after memory region >> + * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns >> + * all present PMD and PTEs are write protected in the memory region. >> + * Afterwards read of dirty page log can be called. >> + * >> + * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired, >> + * serializing operations for VM memory regions. >> + */ >> + >> +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) >> +{ >> + struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot); >> + phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; >> + phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; >> + >> + spin_lock(&kvm->mmu_lock); >> + stage2_wp_range(kvm, start, end); >> + kvm_flush_remote_tlbs(kvm); >> + spin_unlock(&kvm->mmu_lock); >> +} >> +#endif >> + >> static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, >> struct kvm_memory_slot *memslot, >> unsigned long fault_status) >> -- >> 1.7.9.5 >> > > Besides the commenting and whitespace stuff, this is beginning to look > good. I'll clean it up for next iteration. > > Thanks, > -Christoffer >
On 08/11/2014 12:12 PM, Christoffer Dall wrote: > Remove the parenthesis from the subject line. > > On Thu, Jul 24, 2014 at 05:56:06PM -0700, Mario Smarduch wrote: >> Patch adds support for initial write protection VM memlsot. This patch series > ^^ ^ > stray whitespace of > > >> assumes that huge PUDs will not be used in 2nd stage tables. > > may be worth mentioning that this is always valid on ARMv7. > >> >> Signed-off-by: Mario Smarduch <m.smarduch@samsung.com> >> --- >> arch/arm/include/asm/kvm_host.h | 1 + >> arch/arm/include/asm/kvm_mmu.h | 20 ++++++ >> arch/arm/include/asm/pgtable-3level.h | 1 + >> arch/arm/kvm/arm.c | 9 +++ >> arch/arm/kvm/mmu.c | 128 +++++++++++++++++++++++++++++++++ >> 5 files changed, 159 insertions(+) >> >> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h >> index 042206f..6521a2d 100644 >> --- a/arch/arm/include/asm/kvm_host.h >> +++ b/arch/arm/include/asm/kvm_host.h >> @@ -231,5 +231,6 @@ int kvm_perf_teardown(void); >> u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); >> int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); >> void kvm_arch_flush_remote_tlbs(struct kvm *); >> +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); >> >> #endif /* __ARM_KVM_HOST_H__ */ >> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h >> index 5cc0b0f..08ab5e8 100644 >> --- a/arch/arm/include/asm/kvm_mmu.h >> +++ b/arch/arm/include/asm/kvm_mmu.h >> @@ -114,6 +114,26 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) >> pmd_val(*pmd) |= L_PMD_S2_RDWR; >> } >> >> +static inline void kvm_set_s2pte_readonly(pte_t *pte) >> +{ >> + pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY; >> +} >> + >> +static inline bool kvm_s2pte_readonly(pte_t *pte) >> +{ >> + return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY; >> +} >> + >> +static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) >> +{ >> + pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY; >> +} >> + >> +static inline bool kvm_s2pmd_readonly(pmd_t *pmd) >> +{ >> + return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY; >> +} >> + >> /* Open coded p*d_addr_end that can deal with 64bit addresses */ >> #define kvm_pgd_addr_end(addr, end) \ >> ({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ >> diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h >> index 85c60ad..d8bb40b 100644 >> --- a/arch/arm/include/asm/pgtable-3level.h >> +++ b/arch/arm/include/asm/pgtable-3level.h >> @@ -129,6 +129,7 @@ >> #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ >> #define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ >> >> +#define L_PMD_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ >> #define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ >> >> /* >> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c >> index 3c82b37..e11c2dd 100644 >> --- a/arch/arm/kvm/arm.c >> +++ b/arch/arm/kvm/arm.c >> @@ -242,6 +242,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, >> const struct kvm_memory_slot *old, >> enum kvm_mr_change change) >> { >> +#ifdef CONFIG_ARM >> + /* >> + * At this point memslot has been committed and there is an >> + * allocated dirty_bitmap[], dirty pages will be be tracked while the >> + * memory slot is write protected. >> + */ >> + if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) >> + kvm_mmu_wp_memory_region(kvm, mem->slot); >> +#endif >> } >> >> void kvm_arch_flush_shadow_all(struct kvm *kvm) >> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c >> index 35254c6..7bfc792 100644 >> --- a/arch/arm/kvm/mmu.c >> +++ b/arch/arm/kvm/mmu.c >> @@ -763,6 +763,134 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) >> return false; >> } >> >> +#ifdef CONFIG_ARM >> +/** >> + * stage2_wp_pte_range - write protect PTE range >> + * @pmd: pointer to pmd entry >> + * @addr: range start address >> + * @end: range end address >> + */ >> +static void stage2_wp_pte_range(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) >> +{ >> + pte_t *pte; >> + >> + pte = pte_offset_kernel(pmd, addr); >> + do { >> + if (!pte_none(*pte)) { >> + if (!kvm_s2pte_readonly(pte)) >> + kvm_set_s2pte_readonly(pte); >> + } >> + } while (pte++, addr += PAGE_SIZE, addr != end); >> +} >> + >> +/** >> + * stage2_wp_pmd_range - write protect PMD range >> + * @pud: pointer to pud entry >> + * @addr: range start address >> + * @end: range end address >> + */ >> +static void stage2_wp_pmd_range(pud_t *pud, phys_addr_t addr, phys_addr_t end) >> +{ >> + pmd_t *pmd; >> + phys_addr_t next; >> + >> + pmd = pmd_offset(pud, addr); >> + >> + do { >> + next = kvm_pmd_addr_end(addr, end); >> + if (!pmd_none(*pmd)) { >> + if (kvm_pmd_huge(*pmd)) { >> + if (!kvm_s2pmd_readonly(pmd)) >> + kvm_set_s2pmd_readonly(pmd); >> + } else >> + stage2_wp_pte_range(pmd, addr, next); > please use a closing brace when the first part of the if-statement is a > multi-line block with braces, as per the CodingStyle. >> + > > stray blank line > >> + } >> + } while (pmd++, addr = next, addr != end); >> +} >> + >> +/** >> + * stage2_wp_pud_range - write protect PUD range >> + * @kvm: pointer to kvm structure >> + * @pud: pointer to pgd entry > pgd >> + * @addr: range start address >> + * @end: range end address >> + * >> + * While walking the PUD range huge PUD pages are ignored, in the future this > range, huge PUDs are ignored. In the future... >> + * may need to be revisited. Determine how to handle huge PUDs when logging >> + * of dirty pages is enabled. > > I don't understand the last sentence? > >> + */ >> +static void stage2_wp_pud_range(struct kvm *kvm, pgd_t *pgd, >> + phys_addr_t addr, phys_addr_t end) >> +{ >> + pud_t *pud; >> + phys_addr_t next; >> + >> + pud = pud_offset(pgd, addr); >> + do { >> + next = kvm_pud_addr_end(addr, end); >> + /* TODO: huge PUD not supported, revisit later */ >> + BUG_ON(pud_huge(*pud)); > > we should probably define kvm_pud_huge() > >> + if (!pud_none(*pud)) >> + stage2_wp_pmd_range(pud, addr, next); >> + } while (pud++, addr = next, addr != end); >> +} >> + >> +/** >> + * stage2_wp_range() - write protect stage2 memory region range >> + * @kvm: The KVM pointer >> + * @start: Start address of range >> + * &end: End address of range >> + */ >> +static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) >> +{ >> + pgd_t *pgd; >> + phys_addr_t next; >> + >> + pgd = kvm->arch.pgd + pgd_index(addr); >> + do { >> + /* >> + * Release kvm_mmu_lock periodically if the memory region is >> + * large features like detect hung task, lock detector or lock > large. Otherwise, we may see panics due to.. >> + * dep may panic. In addition holding the lock this long will > extra white space ^^ Additionally, holding the lock for a > long timer will >> + * also starve other vCPUs. Applies to huge VM memory regions. > ^^^ I don't understand this > last remark. Sorry overlooked this. While testing - VM regions that were small (~1GB) holding the mmu_lock caused not problems, but when I was running memory regions around 2GB large some kernel lockup detection/lock contention options (some selected by default) caused deadlock warnings/panics in host kernel. This was in one my previous review comments sometime ago, I can go back and find the options. >> + */ >> + if (need_resched() || spin_needbreak(&kvm->mmu_lock)) >> + cond_resched_lock(&kvm->mmu_lock); >> + >> + next = kvm_pgd_addr_end(addr, end); >> + if (pgd_present(*pgd)) >> + stage2_wp_pud_range(kvm, pgd, addr, next); >> + } while (pgd++, addr = next, addr != end); >> +} >> + >> +/** >> + * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot >> + * @kvm: The KVM pointer >> + * @slot: The memory slot to write protect >> + * >> + * Called to start logging dirty pages after memory region >> + * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns >> + * all present PMD and PTEs are write protected in the memory region. >> + * Afterwards read of dirty page log can be called. >> + * >> + * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired, >> + * serializing operations for VM memory regions. >> + */ >> + >> +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) >> +{ >> + struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot); >> + phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; >> + phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; >> + >> + spin_lock(&kvm->mmu_lock); >> + stage2_wp_range(kvm, start, end); >> + kvm_flush_remote_tlbs(kvm); >> + spin_unlock(&kvm->mmu_lock); >> +} >> +#endif >> + >> static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, >> struct kvm_memory_slot *memslot, >> unsigned long fault_status) >> -- >> 1.7.9.5 >> > > Besides the commenting and whitespace stuff, this is beginning to look > good. > > Thanks, > -Christoffer >
On Mon, Aug 11, 2014 at 05:16:21PM -0700, Mario Smarduch wrote: > On 08/11/2014 12:12 PM, Christoffer Dall wrote: > > Remove the parenthesis from the subject line. > I just prefer not having the "(w/no huge PUD support)" part in the patch title. > Hmmm have to check this don't see it my patch file. > > > > On Thu, Jul 24, 2014 at 05:56:06PM -0700, Mario Smarduch wrote: > >> Patch adds support for initial write protection VM memlsot. This patch series > > ^^ ^ > > stray whitespace of > > > Need to watch out for these adds delays to review cycle. yes, I care quite a lot about proper English, syntax, grammar and spelling. Reading critically through your own patch files before mailing them out is a good exercise. You can even consider putting them through a spell-checker and/or configure your editor to mark double white space, trailing white space etc. [...] > >> + do { > >> + next = kvm_pmd_addr_end(addr, end); > >> + if (!pmd_none(*pmd)) { > >> + if (kvm_pmd_huge(*pmd)) { > >> + if (!kvm_s2pmd_readonly(pmd)) > >> + kvm_set_s2pmd_readonly(pmd); > >> + } else > >> + stage2_wp_pte_range(pmd, addr, next); > > please use a closing brace when the first part of the if-statement is a > > multi-line block with braces, as per the CodingStyle. > Will fix. > >> + > > > > stray blank line > > Not sure how it got by checkpatch, will fix. Not sure checkpatch will complain, but I do ;) No big deal anyway. > > > >> + } > >> + } while (pmd++, addr = next, addr != end); > >> +} > >> + > >> +/** > >> + * stage2_wp_pud_range - write protect PUD range > >> + * @kvm: pointer to kvm structure > >> + * @pud: pointer to pgd entry > > pgd > >> + * @addr: range start address > >> + * @end: range end address > >> + * > >> + * While walking the PUD range huge PUD pages are ignored, in the future this > > range, huge PUDs are ignored. In the future... > >> + * may need to be revisited. Determine how to handle huge PUDs when logging > >> + * of dirty pages is enabled. > > > > I don't understand the last sentence? > > Probably last two sentences should be combined. > ".... to determine how to handle huge PUT...". Would that be > clear enough? > > The overall theme is what to do about PUDs - mark all pages dirty > in the region, attempt to breakup such huge regions? > I think you should just state that this is not supported and worry about how to deal with it when it's properly supported. The TODO below is sufficient, so just drop all mentionings about the future in the function description above - it's likely to be forgotten when PUDs are in fact support anyhow. Thanks, -Christoffer
On Mon, Aug 11, 2014 at 06:36:14PM -0700, Mario Smarduch wrote: > On 08/11/2014 12:12 PM, Christoffer Dall wrote: [...] > >> +/** > >> + * stage2_wp_range() - write protect stage2 memory region range > >> + * @kvm: The KVM pointer > >> + * @start: Start address of range > >> + * &end: End address of range > >> + */ > >> +static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) > >> +{ > >> + pgd_t *pgd; > >> + phys_addr_t next; > >> + > >> + pgd = kvm->arch.pgd + pgd_index(addr); > >> + do { > >> + /* > >> + * Release kvm_mmu_lock periodically if the memory region is > >> + * large features like detect hung task, lock detector or lock > > large. Otherwise, we may see panics due to.. > >> + * dep may panic. In addition holding the lock this long will > > extra white space ^^ Additionally, holding the lock for a > > long timer will > >> + * also starve other vCPUs. Applies to huge VM memory regions. > > ^^^ I don't understand this > > last remark. > Sorry overlooked this. > > While testing - VM regions that were small (~1GB) holding the mmu_lock > caused not problems, but when I was running memory regions around 2GB large > some kernel lockup detection/lock contention options (some selected by default) > caused deadlock warnings/panics in host kernel. > > This was in one my previous review comments sometime ago, I can go back > and find the options. > Just drop the last part of the comment, so the whole thing reads: /* * Release kvm_mmu_lock periodically if the memory region is * large. Otherwise, we may see kernel panics from debugging features * such as "detect hung task", "lock detector" or "lock dep checks". * Additionally, holding the lock too long will also starve other vCPUs. */ And check the actual names of those debugging features or use the CONFIG_<WHATEVER> names and say "we may see kernel panics with CONFIG_X, CONFIG_Y, and CONFIG_Z. Makes sense? -Christoffer
On 08/12/2014 02:36 AM, Christoffer Dall wrote: > On Mon, Aug 11, 2014 at 06:36:14PM -0700, Mario Smarduch wrote: >> On 08/11/2014 12:12 PM, Christoffer Dall wrote: > > [...] > >>>> +/** >>>> + * stage2_wp_range() - write protect stage2 memory region range >>>> + * @kvm: The KVM pointer >>>> + * @start: Start address of range >>>> + * &end: End address of range >>>> + */ >>>> +static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) >>>> +{ >>>> + pgd_t *pgd; >>>> + phys_addr_t next; >>>> + >>>> + pgd = kvm->arch.pgd + pgd_index(addr); >>>> + do { >>>> + /* >>>> + * Release kvm_mmu_lock periodically if the memory region is >>>> + * large features like detect hung task, lock detector or lock >>> large. Otherwise, we may see panics due to.. >>>> + * dep may panic. In addition holding the lock this long will >>> extra white space ^^ Additionally, holding the lock for a >>> long timer will >>>> + * also starve other vCPUs. Applies to huge VM memory regions. >>> ^^^ I don't understand this >>> last remark. >> Sorry overlooked this. >> >> While testing - VM regions that were small (~1GB) holding the mmu_lock >> caused not problems, but when I was running memory regions around 2GB large >> some kernel lockup detection/lock contention options (some selected by default) >> caused deadlock warnings/panics in host kernel. >> >> This was in one my previous review comments sometime ago, I can go back >> and find the options. >> > > Just drop the last part of the comment, so the whole thing reads: > > /* > * Release kvm_mmu_lock periodically if the memory region is > * large. Otherwise, we may see kernel panics from debugging features > * such as "detect hung task", "lock detector" or "lock dep checks". > * Additionally, holding the lock too long will also starve other vCPUs. > */ > > And check the actual names of those debugging features or use the > CONFIG_<WHATEVER> names and say "we may see kernel panics with CONFIG_X, > CONFIG_Y, and CONFIG_Z. > > Makes sense? Yep sure does. > > -Christoffer >
On 08/12/2014 02:32 AM, Christoffer Dall wrote: > On Mon, Aug 11, 2014 at 05:16:21PM -0700, Mario Smarduch wrote: >> On 08/11/2014 12:12 PM, Christoffer Dall wrote: >>> Remove the parenthesis from the subject line. >> > > I just prefer not having the "(w/no huge PUD support)" part in the patch > title. Ah ok. > >> Hmmm have to check this don't see it my patch file. >>> >>> On Thu, Jul 24, 2014 at 05:56:06PM -0700, Mario Smarduch wrote: >>>> Patch adds support for initial write protection VM memlsot. This patch series >>> ^^ ^ >>> stray whitespace of >>> >> Need to watch out for these adds delays to review cycle. > > yes, I care quite a lot about proper English, syntax, grammar and > spelling. Reading critically through your own patch files before > mailing them out is a good exercise. You can even consider putting them > through a spell-checker and/or configure your editor to mark double > white space, trailing white space etc. > > [...] > >>>> + do { >>>> + next = kvm_pmd_addr_end(addr, end); >>>> + if (!pmd_none(*pmd)) { >>>> + if (kvm_pmd_huge(*pmd)) { >>>> + if (!kvm_s2pmd_readonly(pmd)) >>>> + kvm_set_s2pmd_readonly(pmd); >>>> + } else >>>> + stage2_wp_pte_range(pmd, addr, next); >>> please use a closing brace when the first part of the if-statement is a >>> multi-line block with braces, as per the CodingStyle. >> Will fix. >>>> + >>> >>> stray blank line >> >> Not sure how it got by checkpatch, will fix. > > Not sure checkpatch will complain, but I do ;) No big deal anyway. > >>> >>>> + } >>>> + } while (pmd++, addr = next, addr != end); >>>> +} >>>> + >>>> +/** >>>> + * stage2_wp_pud_range - write protect PUD range >>>> + * @kvm: pointer to kvm structure >>>> + * @pud: pointer to pgd entry >>> pgd >>>> + * @addr: range start address >>>> + * @end: range end address >>>> + * >>>> + * While walking the PUD range huge PUD pages are ignored, in the future this >>> range, huge PUDs are ignored. In the future... >>>> + * may need to be revisited. Determine how to handle huge PUDs when logging >>>> + * of dirty pages is enabled. >>> >>> I don't understand the last sentence? >> >> Probably last two sentences should be combined. >> ".... to determine how to handle huge PUT...". Would that be >> clear enough? >> >> The overall theme is what to do about PUDs - mark all pages dirty >> in the region, attempt to breakup such huge regions? >> > > I think you should just state that this is not supported and worry > about how to deal with it when it's properly supported. The TODO below > is sufficient, so just drop all mentionings about the future in the > function description above - it's likely to be forgotten when PUDs are > in fact support anyhow. Ok the simpler the better. > > Thanks, > -Christoffer >
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 042206f..6521a2d 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -231,5 +231,6 @@ int kvm_perf_teardown(void); u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); void kvm_arch_flush_remote_tlbs(struct kvm *); +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5cc0b0f..08ab5e8 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -114,6 +114,26 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) pmd_val(*pmd) |= L_PMD_S2_RDWR; } +static inline void kvm_set_s2pte_readonly(pte_t *pte) +{ + pte_val(*pte) = (pte_val(*pte) & ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY; +} + +static inline bool kvm_s2pte_readonly(pte_t *pte) +{ + return (pte_val(*pte) & L_PTE_S2_RDWR) == L_PTE_S2_RDONLY; +} + +static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) +{ + pmd_val(*pmd) = (pmd_val(*pmd) & ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY; +} + +static inline bool kvm_s2pmd_readonly(pmd_t *pmd) +{ + return (pmd_val(*pmd) & L_PMD_S2_RDWR) == L_PMD_S2_RDONLY; +} + /* Open coded p*d_addr_end that can deal with 64bit addresses */ #define kvm_pgd_addr_end(addr, end) \ ({ u64 __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 85c60ad..d8bb40b 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -129,6 +129,7 @@ #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ #define L_PTE_S2_RDWR (_AT(pteval_t, 3) << 6) /* HAP[2:1] */ +#define L_PMD_S2_RDONLY (_AT(pteval_t, 1) << 6) /* HAP[1] */ #define L_PMD_S2_RDWR (_AT(pmdval_t, 3) << 6) /* HAP[2:1] */ /* diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 3c82b37..e11c2dd 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -242,6 +242,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old, enum kvm_mr_change change) { +#ifdef CONFIG_ARM + /* + * At this point memslot has been committed and there is an + * allocated dirty_bitmap[], dirty pages will be be tracked while the + * memory slot is write protected. + */ + if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) + kvm_mmu_wp_memory_region(kvm, mem->slot); +#endif } void kvm_arch_flush_shadow_all(struct kvm *kvm) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 35254c6..7bfc792 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -763,6 +763,134 @@ static bool transparent_hugepage_adjust(pfn_t *pfnp, phys_addr_t *ipap) return false; } +#ifdef CONFIG_ARM +/** + * stage2_wp_pte_range - write protect PTE range + * @pmd: pointer to pmd entry + * @addr: range start address + * @end: range end address + */ +static void stage2_wp_pte_range(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) +{ + pte_t *pte; + + pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + if (!kvm_s2pte_readonly(pte)) + kvm_set_s2pte_readonly(pte); + } + } while (pte++, addr += PAGE_SIZE, addr != end); +} + +/** + * stage2_wp_pmd_range - write protect PMD range + * @pud: pointer to pud entry + * @addr: range start address + * @end: range end address + */ +static void stage2_wp_pmd_range(pud_t *pud, phys_addr_t addr, phys_addr_t end) +{ + pmd_t *pmd; + phys_addr_t next; + + pmd = pmd_offset(pud, addr); + + do { + next = kvm_pmd_addr_end(addr, end); + if (!pmd_none(*pmd)) { + if (kvm_pmd_huge(*pmd)) { + if (!kvm_s2pmd_readonly(pmd)) + kvm_set_s2pmd_readonly(pmd); + } else + stage2_wp_pte_range(pmd, addr, next); + + } + } while (pmd++, addr = next, addr != end); +} + +/** + * stage2_wp_pud_range - write protect PUD range + * @kvm: pointer to kvm structure + * @pud: pointer to pgd entry + * @addr: range start address + * @end: range end address + * + * While walking the PUD range huge PUD pages are ignored, in the future this + * may need to be revisited. Determine how to handle huge PUDs when logging + * of dirty pages is enabled. + */ +static void stage2_wp_pud_range(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) +{ + pud_t *pud; + phys_addr_t next; + + pud = pud_offset(pgd, addr); + do { + next = kvm_pud_addr_end(addr, end); + /* TODO: huge PUD not supported, revisit later */ + BUG_ON(pud_huge(*pud)); + if (!pud_none(*pud)) + stage2_wp_pmd_range(pud, addr, next); + } while (pud++, addr = next, addr != end); +} + +/** + * stage2_wp_range() - write protect stage2 memory region range + * @kvm: The KVM pointer + * @start: Start address of range + * &end: End address of range + */ +static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) +{ + pgd_t *pgd; + phys_addr_t next; + + pgd = kvm->arch.pgd + pgd_index(addr); + do { + /* + * Release kvm_mmu_lock periodically if the memory region is + * large features like detect hung task, lock detector or lock + * dep may panic. In addition holding the lock this long will + * also starve other vCPUs. Applies to huge VM memory regions. + */ + if (need_resched() || spin_needbreak(&kvm->mmu_lock)) + cond_resched_lock(&kvm->mmu_lock); + + next = kvm_pgd_addr_end(addr, end); + if (pgd_present(*pgd)) + stage2_wp_pud_range(kvm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +/** + * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot + * @kvm: The KVM pointer + * @slot: The memory slot to write protect + * + * Called to start logging dirty pages after memory region + * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns + * all present PMD and PTEs are write protected in the memory region. + * Afterwards read of dirty page log can be called. + * + * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired, + * serializing operations for VM memory regions. + */ + +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) +{ + struct kvm_memory_slot *memslot = id_to_memslot(kvm->memslots, slot); + phys_addr_t start = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; + + spin_lock(&kvm->mmu_lock); + stage2_wp_range(kvm, start, end); + kvm_flush_remote_tlbs(kvm); + spin_unlock(&kvm->mmu_lock); +} +#endif + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long fault_status)
Patch adds support for initial write protection VM memlsot. This patch series assumes that huge PUDs will not be used in 2nd stage tables. Signed-off-by: Mario Smarduch <m.smarduch@samsung.com> --- arch/arm/include/asm/kvm_host.h | 1 + arch/arm/include/asm/kvm_mmu.h | 20 ++++++ arch/arm/include/asm/pgtable-3level.h | 1 + arch/arm/kvm/arm.c | 9 +++ arch/arm/kvm/mmu.c | 128 +++++++++++++++++++++++++++++++++ 5 files changed, 159 insertions(+)