@@ -1809,6 +1809,10 @@ void kvm_arch_exit(void)
kvm_vmm_info = NULL;
}
+void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+}
+
static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log)
{
@@ -418,6 +418,10 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
return -ENOTSUPP;
}
+void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -130,6 +130,10 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
return 0;
}
+void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -527,6 +527,7 @@ struct kvm_x86_ops {
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
+ int (*dirty_bit_support)(void);
};
extern struct kvm_x86_ops *kvm_x86_ops;
@@ -796,4 +797,6 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_age_hva(struct kvm *kvm, unsigned long hva);
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
+int is_dirty_and_clean_rmapp(struct kvm *kvm, unsigned long *rmapp);
+
#endif /* _ASM_X86_KVM_HOST_H */
@@ -140,6 +140,8 @@ module_param(oos_shadow, bool, 0644);
#define ACC_USER_MASK PT_USER_MASK
#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
+#define SPTE_DONT_DIRTY (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
+
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
struct kvm_rmap_desc {
@@ -629,6 +631,25 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
return NULL;
}
+int is_dirty_and_clean_rmapp(struct kvm *kvm, unsigned long *rmapp)
+{
+ u64 *spte;
+ int dirty = 0;
+
+ spte = rmap_next(kvm, rmapp, NULL);
+ while (spte) {
+ if (*spte & PT_DIRTY_MASK) {
+ set_shadow_pte(spte, (*spte &= ~PT_DIRTY_MASK) |
+ SPTE_DONT_DIRTY);
+ dirty = 1;
+ }
+ spte = rmap_next(kvm, rmapp, spte);
+ }
+
+ return dirty;
+}
+
+
static int rmap_write_protect(struct kvm *kvm, u64 gfn)
{
unsigned long *rmapp;
@@ -1676,7 +1697,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
* whether the guest actually used the pte (in order to detect
* demand paging).
*/
- spte = shadow_base_present_pte | shadow_dirty_mask;
+ spte = shadow_base_present_pte;
+ if (!(spte & SPTE_DONT_DIRTY))
+ spte |= shadow_dirty_mask;
+
if (!speculative)
spte |= shadow_accessed_mask;
if (!dirty)
@@ -1725,8 +1749,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
}
}
- if (pte_access & ACC_WRITE_MASK)
- mark_page_dirty(vcpu->kvm, gfn);
+ if (!shadow_dirty_mask) {
+ if (pte_access & ACC_WRITE_MASK)
+ mark_page_dirty(vcpu->kvm, gfn);
+ }
set_pte:
set_shadow_pte(shadow_pte, spte);
@@ -2724,6 +2724,11 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
return 0;
}
+static int svm_dirty_bit_support(void)
+{
+ return 1;
+}
+
static struct kvm_x86_ops svm_x86_ops = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -2785,6 +2790,8 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
.get_mt_mask = svm_get_mt_mask,
+
+ .dirty_bit_support = svm_dirty_bit_support,
};
static int __init svm_init(void)
@@ -3774,6 +3774,11 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
return ret;
}
+static int vmx_dirty_bit_support(void)
+{
+ return false;
+}
+
static struct kvm_x86_ops vmx_x86_ops = {
.cpu_has_kvm_support = cpu_has_kvm_support,
.disabled_by_bios = vmx_disabled_by_bios,
@@ -3833,6 +3838,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.set_tss_addr = vmx_set_tss_addr,
.get_tdp_level = get_ept_level,
.get_mt_mask = vmx_get_mt_mask,
+
+ .dirty_bit_support = vmx_dirty_bit_support,
};
static int __init vmx_init(void)
@@ -1963,6 +1963,19 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
return 0;
}
+void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+ int i;
+ gfn_t gfn;
+
+ for (i = 0; i < memslot->npages; ++i) {
+ if (!test_bit(i, memslot->dirty_bitmap)) {
+ if (is_dirty_and_clean_rmapp(kvm, &memslot->rmap[i]))
+ set_bit(i, memslot->dirty_bitmap);
+ }
+ }
+}
+
/*
* Get (and clear) the dirty memory log for a memory slot.
*/
@@ -1982,9 +1995,11 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
/* If nothing is dirty, don't bother messing with page tables. */
if (is_dirty) {
- spin_lock(&kvm->mmu_lock);
- kvm_mmu_slot_remove_write_access(kvm, log->slot);
- spin_unlock(&kvm->mmu_lock);
+ if (kvm_x86_ops->dirty_bit_support()) {
+ spin_lock(&kvm->mmu_lock);
+ kvm_mmu_slot_remove_write_access(kvm, log->slot);
+ spin_unlock(&kvm->mmu_lock);
+ }
kvm_flush_remote_tlbs(kvm);
memslot = &kvm->memslots[log->slot];
n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
@@ -250,6 +250,7 @@ int kvm_dev_ioctl_check_extension(long ext);
int kvm_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log, int *is_dirty);
+void kvm_arch_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot);
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_dirty_log *log);
@@ -998,14 +998,18 @@ out:
/*
* Free any memory in @free but not in @dont.
*/
-static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
+static void kvm_free_physmem_slot(struct kvm *kvm,
+ struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
if (!dont || free->rmap != dont->rmap)
vfree(free->rmap);
- if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
+ if (!dont || free->dirty_bitmap != dont->dirty_bitmap) {
+ if (dont && free->rmap == dont->rmap)
+ kvm_arch_flush_shadow(kvm);
vfree(free->dirty_bitmap);
+ }
if (!dont || free->lpage_info != dont->lpage_info)
vfree(free->lpage_info);
@@ -1021,7 +1025,7 @@ void kvm_free_physmem(struct kvm *kvm)
int i;
for (i = 0; i < kvm->nmemslots; ++i)
- kvm_free_physmem_slot(&kvm->memslots[i], NULL);
+ kvm_free_physmem_slot(kvm, &kvm->memslots[i], NULL);
}
static void kvm_destroy_vm(struct kvm *kvm)
@@ -1217,7 +1221,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
goto out_free;
}
- kvm_free_physmem_slot(&old, npages ? &new : NULL);
+ kvm_free_physmem_slot(kvm, &old, npages ? &new : NULL);
/* Slot deletion case: we have to update the current slot */
spin_lock(&kvm->mmu_lock);
if (!npages)
@@ -1232,7 +1236,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
return 0;
out_free:
- kvm_free_physmem_slot(&new, &old);
+ kvm_free_physmem_slot(kvm, &new, &old);
out:
return r;
@@ -1279,6 +1283,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
if (!memslot->dirty_bitmap)
goto out;
+ kvm_arch_get_dirty_log(kvm, memslot);
n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
for (i = 0; !any && i < n/sizeof(long); ++i)
right now the dirty page tracking work with the help of page faults, when we want to track a page for being dirty, we write protect it and we mark it dirty when we have write page fault, this code move into looking at the dirty bit of the spte. Signed-off-by: Izik Eidus <ieidus@redhat.com> --- arch/ia64/kvm/kvm-ia64.c | 4 ++++ arch/powerpc/kvm/powerpc.c | 4 ++++ arch/s390/kvm/kvm-s390.c | 4 ++++ arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/mmu.c | 32 +++++++++++++++++++++++++++++--- arch/x86/kvm/svm.c | 7 +++++++ arch/x86/kvm/vmx.c | 7 +++++++ arch/x86/kvm/x86.c | 21 ++++++++++++++++++--- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 15 ++++++++++----- 10 files changed, 87 insertions(+), 11 deletions(-)