diff mbox

[3/3] add support for change_pte mmu notifiers

Message ID 1253730350-4364-4-git-send-email-ieidus@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Izik Eidus Sept. 23, 2009, 6:25 p.m. UTC
this is needed for kvm if it want ksm to directly map pages into its
shadow page tables.

Signed-off-by: Izik Eidus <ieidus@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |    1 +
 arch/x86/kvm/mmu.c              |   64 +++++++++++++++++++++++++++++++++-----
 virt/kvm/kvm_main.c             |   14 ++++++++
 3 files changed, 70 insertions(+), 9 deletions(-)

Comments

Izik Eidus Sept. 23, 2009, 6:32 p.m. UTC | #1
Izik Eidus wrote:
> this is needed for kvm if it want ksm to directly map pages into its
> shadow page tables.
>
> Signed-off-by: Izik Eidus <ieidus@redhat.com>
> ---
>  arch/x86/include/asm/kvm_host.h |    1 +
>  arch/x86/kvm/mmu.c              |   64 +++++++++++++++++++++++++++++++++-----
>  virt/kvm/kvm_main.c             |   14 ++++++++
>  3 files changed, 70 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 3be0004..d838922 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -796,6 +796,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
>  #define KVM_ARCH_WANT_MMU_NOTIFIER
>  int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
>  int kvm_age_hva(struct kvm *kvm, unsigned long hva);
> +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
>  int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
>  int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
>  int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 5cd8b4e..0905ca2 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -748,7 +748,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
>  	return write_protected;
>  }
>  
> -static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
> +static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
>  {
>  	u64 *spte;
>  	int need_tlb_flush = 0;
> @@ -763,8 +763,47 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
>  	return need_tlb_flush;
>  }
>  
> -static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
> -			  int (*handler)(struct kvm *kvm, unsigned long *rmapp))
> +static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
> +{
> +	int need_flush = 0;
> +	u64 *spte, new_spte;
> +	pte_t *ptep = (pte_t *)data;
> +	pfn_t new_pfn;
> +
> +	WARN_ON(pte_huge(*ptep));
> +	new_pfn = pte_pfn(*ptep);
> +	spte = rmap_next(kvm, rmapp, NULL);
> +	while (spte) {
> +		BUG_ON(!is_shadow_present_pte(*spte));
> +		rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
> +		need_flush = 1;
> +		if (pte_write(*ptep)) {
> +			rmap_remove(kvm, spte);
> +			__set_spte(spte, shadow_trap_nonpresent_pte);
> +			spte = rmap_next(kvm, rmapp, NULL);
> +		} else {
> +			new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
> +			new_spte |= new_pfn << PAGE_SHIFT;
> +
> +			if (!pte_write(*ptep)) {
>   

Just noticed that this if is not needed (we got to get here if we had 
"if (pte_write(*ptep)) {" few lines before...)..
I will resend
> +				new_spte &= ~PT_WRITABLE_MASK;
> +				new_spte &= ~SPTE_HOST_WRITEABLE;
> +				if (is_writeble_pte(*spte))
> +					kvm_set_pfn_dirty(spte_to_pfn(*spte));
> +			}
> +			__set_spte(spte, new_spte);
> +			spte = rmap_next(kvm, rmapp, spte);
> +		}
> +	}
> +	if (need_flush)
> +		kvm_flush_remote_tlbs(kvm);
> +

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3be0004..d838922 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -796,6 +796,7 @@  asmlinkage void kvm_handle_fault_on_reboot(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5cd8b4e..0905ca2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -748,7 +748,7 @@  static int rmap_write_protect(struct kvm *kvm, u64 gfn)
 	return write_protected;
 }
 
-static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
+static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
 {
 	u64 *spte;
 	int need_tlb_flush = 0;
@@ -763,8 +763,47 @@  static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
 	return need_tlb_flush;
 }
 
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
-			  int (*handler)(struct kvm *kvm, unsigned long *rmapp))
+static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
+{
+	int need_flush = 0;
+	u64 *spte, new_spte;
+	pte_t *ptep = (pte_t *)data;
+	pfn_t new_pfn;
+
+	WARN_ON(pte_huge(*ptep));
+	new_pfn = pte_pfn(*ptep);
+	spte = rmap_next(kvm, rmapp, NULL);
+	while (spte) {
+		BUG_ON(!is_shadow_present_pte(*spte));
+		rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
+		need_flush = 1;
+		if (pte_write(*ptep)) {
+			rmap_remove(kvm, spte);
+			__set_spte(spte, shadow_trap_nonpresent_pte);
+			spte = rmap_next(kvm, rmapp, NULL);
+		} else {
+			new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
+			new_spte |= new_pfn << PAGE_SHIFT;
+
+			if (!pte_write(*ptep)) {
+				new_spte &= ~PT_WRITABLE_MASK;
+				new_spte &= ~SPTE_HOST_WRITEABLE;
+				if (is_writeble_pte(*spte))
+					kvm_set_pfn_dirty(spte_to_pfn(*spte));
+			}
+			__set_spte(spte, new_spte);
+			spte = rmap_next(kvm, rmapp, spte);
+		}
+	}
+	if (need_flush)
+		kvm_flush_remote_tlbs(kvm);
+
+	return 0;
+}
+
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, u64 data,
+			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+					 u64 data))
 {
 	int i, j;
 	int retval = 0;
@@ -786,13 +825,15 @@  static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 		if (hva >= start && hva < end) {
 			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
 
-			retval |= handler(kvm, &memslot->rmap[gfn_offset]);
+			retval |= handler(kvm, &memslot->rmap[gfn_offset],
+					  data);
 
 			for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
 				int idx = gfn_offset;
 				idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
 				retval |= handler(kvm,
-					&memslot->lpage_info[j][idx].rmap_pde);
+					&memslot->lpage_info[j][idx].rmap_pde,
+					data);
 			}
 		}
 	}
@@ -802,10 +843,15 @@  static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 {
-	return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+	return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
+}
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+	kvm_handle_hva(kvm, hva, (u64)&pte, kvm_set_pte_rmapp);
 }
 
-static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
+static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, u64 data)
 {
 	u64 *spte;
 	int young = 0;
@@ -841,13 +887,13 @@  static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 	gfn = unalias_gfn(vcpu->kvm, gfn);
 	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
 
-	kvm_unmap_rmapp(vcpu->kvm, rmapp);
+	kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
 	kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
 int kvm_age_hva(struct kvm *kvm, unsigned long hva)
 {
-	return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+	return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
 }
 
 #ifdef MMU_DEBUG
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 897bff3..4cbd396 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -851,6 +851,19 @@  static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
 
 }
 
+static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
+					struct mm_struct *mm,
+					unsigned long address,
+					pte_t pte)
+{
+	struct kvm *kvm = mmu_notifier_to_kvm(mn);
+
+	spin_lock(&kvm->mmu_lock);
+	kvm->mmu_notifier_seq++;
+	kvm_set_spte_hva(kvm, address, pte);
+	spin_unlock(&kvm->mmu_lock);
+}
+
 static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 						    struct mm_struct *mm,
 						    unsigned long start,
@@ -930,6 +943,7 @@  static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
 	.invalidate_range_start	= kvm_mmu_notifier_invalidate_range_start,
 	.invalidate_range_end	= kvm_mmu_notifier_invalidate_range_end,
 	.clear_flush_young	= kvm_mmu_notifier_clear_flush_young,
+	.change_pte		= kvm_mmu_notifier_change_pte,
 	.release		= kvm_mmu_notifier_release,
 };
 #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */