Message ID | 1238164319-16092-5-git-send-email-joerg.roedel@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Joerg Roedel wrote: > This patch adds the necessary data structures to take care of write > protections in place within a second huge page sized page. > > > struct kvm_vcpu_arch { > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index 9936b45..7d4162d 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -390,6 +390,15 @@ static int *slot_largepage_idx(gfn_t gfn, struct kvm_memory_slot *slot) > return &slot->lpage_info[idx].write_count; > } > > +static int *slot_hugepage_idx(gfn_t gfn, struct kvm_memory_slot *slot) > +{ > + unsigned long idx; > + > + idx = (gfn / KVM_PAGES_PER_1G_PAGE) - > + (slot->base_gfn / KVM_PAGES_PER_1G_PAGE); > + return &slot->hpage_info[idx].write_count; > +} > A page level argument would remove the need for this duplication, as well as all the constants. > > +static int has_wrprotected_largepage(struct kvm *kvm, gfn_t gfn) > +{ > + struct kvm_memory_slot *slot; > + int *hugepage_idx; > + > + gfn = unalias_gfn(kvm, gfn); > + slot = gfn_to_memslot_unaliased(kvm, gfn); > + if (slot) { > + hugepage_idx = slot_hugepage_idx(gfn, slot); > slot_largepage_idx() here? I don't think we ever write protect large pages, so why is this needed? > + return *hugepage_idx; > + } > + > + return 1; > +} > > + > static enum kvm_page_size host_page_size(struct kvm *kvm, gfn_t gfn) > { > struct vm_area_struct *vma; > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 095ebb6..2f05d48 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -103,7 +103,7 @@ struct kvm_memory_slot { > struct { > unsigned long rmap_pde; > int write_count; > - } *lpage_info; > + } *lpage_info, *hpage_info; > } * lpage_info[KVM_NR_PAGE_LEVELS]; > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 8aa3b95..c4842f4 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -1001,10 +1001,14 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, > if (!dont || free->lpage_info != dont->lpage_info) > vfree(free->lpage_info); > > + if (!dont || free->hpage_info != dont->hpage_info) > + vfree(free->hpage_info); > loop > void kvm_free_physmem(struct kvm *kvm) > @@ -1170,6 +1174,28 @@ int __kvm_set_memory_region(struct kvm *kvm, > new.lpage_info[largepages-1].write_count = 1; > } > > +#ifdef KVM_PAGES_PER_LHPAGE > + if (npages && !new.hpage_info) { > + int hugepages = npages / KVM_PAGES_PER_LHPAGE; > + if (npages % KVM_PAGES_PER_LHPAGE) > + hugepages++; > + if (base_gfn % KVM_PAGES_PER_LHPAGE) > + hugepages++; > + > + new.hpage_info = vmalloc(hugepages * sizeof(*new.hpage_info)); > + > + if (!new.hpage_info) > + goto out_free; > + > + memset(new.hpage_info, 0, hugepages * sizeof(*new.hpage_info)); > + > + if (base_gfn % KVM_PAGES_PER_LHPAGE) > + new.hpage_info[0].write_count = 1; > + if ((base_gfn+npages) % KVM_PAGES_PER_LHPAGE) > + new.hpage_info[hugepages-1].write_count = 1; > + } > +#endif > Loop, KVM_NR_PAGE_LEVELS defined per arch.
On Sun, Mar 29, 2009 at 02:45:44PM +0300, Avi Kivity wrote: > Joerg Roedel wrote: >> This patch adds the necessary data structures to take care of write >> protections in place within a second huge page sized page. >> >> struct kvm_vcpu_arch { >> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c >> index 9936b45..7d4162d 100644 >> --- a/arch/x86/kvm/mmu.c >> +++ b/arch/x86/kvm/mmu.c >> @@ -390,6 +390,15 @@ static int *slot_largepage_idx(gfn_t gfn, struct kvm_memory_slot *slot) >> return &slot->lpage_info[idx].write_count; >> } >> +static int *slot_hugepage_idx(gfn_t gfn, struct kvm_memory_slot >> *slot) >> +{ >> + unsigned long idx; >> + >> + idx = (gfn / KVM_PAGES_PER_1G_PAGE) - >> + (slot->base_gfn / KVM_PAGES_PER_1G_PAGE); >> + return &slot->hpage_info[idx].write_count; >> +} >> > > A page level argument would remove the need for this duplication, as > well as all the constants. > >> +static int has_wrprotected_largepage(struct kvm *kvm, gfn_t gfn) >> +{ >> + struct kvm_memory_slot *slot; >> + int *hugepage_idx; >> + >> + gfn = unalias_gfn(kvm, gfn); >> + slot = gfn_to_memslot_unaliased(kvm, gfn); >> + if (slot) { >> + hugepage_idx = slot_hugepage_idx(gfn, slot); >> > > slot_largepage_idx() here? > > I don't think we ever write protect large pages, so why is this needed? For 2mb pages we need to check if there is a write-protected 4k page in it before we map a 2mb page for writing. If there is any write-protected 4k page in a 2mb area this 2mb page is considered write-protected. These 'write-protected' 2mb pages are accounted in the account_shadow() function. This information is taken into account when we decide if we can map a guest 1gb page as a 1gb page on the host too. Joerg -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Joerg Roedel wrote: >> >>> +static int has_wrprotected_largepage(struct kvm *kvm, gfn_t gfn) >>> +{ >>> + struct kvm_memory_slot *slot; >>> + int *hugepage_idx; >>> + >>> + gfn = unalias_gfn(kvm, gfn); >>> + slot = gfn_to_memslot_unaliased(kvm, gfn); >>> + if (slot) { >>> + hugepage_idx = slot_hugepage_idx(gfn, slot); >>> >>> >> slot_largepage_idx() here? >> >> I don't think we ever write protect large pages, so why is this needed? >> > > For 2mb pages we need to check if there is a write-protected 4k page in it > before we map a 2mb page for writing. If there is any write-protected 4k > page in a 2mb area this 2mb page is considered write-protected. These > 'write-protected' 2mb pages are accounted in the account_shadow() > function. This information is taken into account when we decide if we > can map a guest 1gb page as a 1gb page on the host too. > account_shadowed() actually increments a hugepage write_count by 1 for every 4K page, not 2M page, if I read the code correctly. The code I commented on is right though. The naming is confusing. I suggest has_wrprotected_page_in_{large,huge}page(). although with the a level parameter we can keep has_wrprotected_page(). btw, if we implement account_shadow() as you describe it (only account hugepages on largepage transition 0->1 or 1->0) we save a potential cacheline bounce on the hugepage write_count accounting array.
Joerg Roedel wrote: > This patch adds the necessary data structures to take care of write > protections in place within a second huge page sized page. > > > +#ifdef KVM_PAGES_PER_LHPAGE > + if (npages && !new.hpage_info) { > + int hugepages = npages / KVM_PAGES_PER_LHPAGE; > + if (npages % KVM_PAGES_PER_LHPAGE) > + hugepages++; > + if (base_gfn % KVM_PAGES_PER_LHPAGE) > + hugepages++; > Consider a slot with base_gfn == 1 and npages == 1. This will have hugepages == 2, which is wrong. I think the right calculation is ((base_gfn + npages - 1) / N) - (base_gfn / N) + 1 i.e. index of last page, plus one so we can store it. The small huge page calculation is off as well. > + > + new.hpage_info = vmalloc(hugepages * sizeof(*new.hpage_info)); > + > + if (!new.hpage_info) > + goto out_free; > + > + memset(new.hpage_info, 0, hugepages * sizeof(*new.hpage_info)); > + > + if (base_gfn % KVM_PAGES_PER_LHPAGE) > + new.hpage_info[0].write_count = 1; > + if ((base_gfn+npages) % KVM_PAGES_PER_LHPAGE) > + new.hpage_info[hugepages-1].write_count = 1; > + } > +#endif > + >
On Sun, Mar 29, 2009 at 04:15:07PM +0300, Avi Kivity wrote: > Joerg Roedel wrote: >>> >>>> +static int has_wrprotected_largepage(struct kvm *kvm, gfn_t gfn) >>>> +{ >>>> + struct kvm_memory_slot *slot; >>>> + int *hugepage_idx; >>>> + >>>> + gfn = unalias_gfn(kvm, gfn); >>>> + slot = gfn_to_memslot_unaliased(kvm, gfn); >>>> + if (slot) { >>>> + hugepage_idx = slot_hugepage_idx(gfn, slot); >>>> >>> slot_largepage_idx() here? >>> >>> I don't think we ever write protect large pages, so why is this needed? >>> >> >> For 2mb pages we need to check if there is a write-protected 4k page in it >> before we map a 2mb page for writing. If there is any write-protected 4k >> page in a 2mb area this 2mb page is considered write-protected. These >> 'write-protected' 2mb pages are accounted in the account_shadow() >> function. This information is taken into account when we decide if we >> can map a guest 1gb page as a 1gb page on the host too. >> > > account_shadowed() actually increments a hugepage write_count by 1 for > every 4K page, not 2M page, if I read the code correctly. The code I > commented on is right though. > > The naming is confusing. I suggest > has_wrprotected_page_in_{large,huge}page(). although with the a level > parameter we can keep has_wrprotected_page(). Yeah true, the name is a bit confusing. I think a level parameter for has_wrprotected_page() is the best solution. Joerg -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f268f99..a1df2a3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -58,7 +58,16 @@ #define KVM_PAGES_PER_2M_PAGE (KVM_2M_PAGE_SIZE / PAGE_SIZE) +#define KVM_1G_PAGE_SHIFT 30 +#define KVM_1G_PAGE_SIZE (1UL << KVM_1G_PAGE_SHIFT) +#define KVM_1G_PAGE_MASK (~(KVM_1G_PAGE_SIZE - 1)) + +#define KVM_PAGES_PER_1G_PAGE (KVM_1G_PAGE_SIZE / PAGE_SIZE) + +/* Huge Page */ #define KVM_PAGES_PER_HPAGE KVM_PAGES_PER_2M_PAGE +/* Large Huge Page ;-) */ +#define KVM_PAGES_PER_LHPAGE KVM_PAGES_PER_1G_PAGE #define DE_VECTOR 0 #define DB_VECTOR 1 @@ -268,6 +277,7 @@ struct kvm_mmu { enum kvm_page_size { KVM_PAGE_SIZE_4k = (1 << 12), KVM_PAGE_SIZE_2M = (1 << 21), + KVM_PAGE_SIZE_1G = (1 << 30), }; struct kvm_vcpu_arch { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9936b45..7d4162d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -390,6 +390,15 @@ static int *slot_largepage_idx(gfn_t gfn, struct kvm_memory_slot *slot) return &slot->lpage_info[idx].write_count; } +static int *slot_hugepage_idx(gfn_t gfn, struct kvm_memory_slot *slot) +{ + unsigned long idx; + + idx = (gfn / KVM_PAGES_PER_1G_PAGE) - + (slot->base_gfn / KVM_PAGES_PER_1G_PAGE); + return &slot->hpage_info[idx].write_count; +} + static void account_shadowed(struct kvm *kvm, gfn_t gfn) { int *write_count; @@ -398,6 +407,10 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn) write_count = slot_largepage_idx(gfn, gfn_to_memslot_unaliased(kvm, gfn)); *write_count += 1; + + write_count = slot_hugepage_idx(gfn, + gfn_to_memslot_unaliased(kvm, gfn)); + *write_count += 1; } static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) @@ -409,6 +422,11 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) gfn_to_memslot_unaliased(kvm, gfn)); *write_count -= 1; WARN_ON(*write_count < 0); + + write_count = slot_hugepage_idx(gfn, + gfn_to_memslot_unaliased(kvm, gfn)); + *write_count -= 1; + WARN_ON(*write_count < 0); } static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) @@ -426,6 +444,21 @@ static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) return 1; } +static int has_wrprotected_largepage(struct kvm *kvm, gfn_t gfn) +{ + struct kvm_memory_slot *slot; + int *hugepage_idx; + + gfn = unalias_gfn(kvm, gfn); + slot = gfn_to_memslot_unaliased(kvm, gfn); + if (slot) { + hugepage_idx = slot_hugepage_idx(gfn, slot); + return *hugepage_idx; + } + + return 1; +} + static enum kvm_page_size host_page_size(struct kvm *kvm, gfn_t gfn) { struct vm_area_struct *vma; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 095ebb6..2f05d48 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -103,7 +103,7 @@ struct kvm_memory_slot { struct { unsigned long rmap_pde; int write_count; - } *lpage_info; + } *lpage_info, *hpage_info; unsigned long userspace_addr; int user_alloc; }; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8aa3b95..c4842f4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1001,10 +1001,14 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, if (!dont || free->lpage_info != dont->lpage_info) vfree(free->lpage_info); + if (!dont || free->hpage_info != dont->hpage_info) + vfree(free->hpage_info); + free->npages = 0; free->dirty_bitmap = NULL; free->rmap = NULL; free->lpage_info = NULL; + free->hpage_info = NULL; } void kvm_free_physmem(struct kvm *kvm) @@ -1170,6 +1174,28 @@ int __kvm_set_memory_region(struct kvm *kvm, new.lpage_info[largepages-1].write_count = 1; } +#ifdef KVM_PAGES_PER_LHPAGE + if (npages && !new.hpage_info) { + int hugepages = npages / KVM_PAGES_PER_LHPAGE; + if (npages % KVM_PAGES_PER_LHPAGE) + hugepages++; + if (base_gfn % KVM_PAGES_PER_LHPAGE) + hugepages++; + + new.hpage_info = vmalloc(hugepages * sizeof(*new.hpage_info)); + + if (!new.hpage_info) + goto out_free; + + memset(new.hpage_info, 0, hugepages * sizeof(*new.hpage_info)); + + if (base_gfn % KVM_PAGES_PER_LHPAGE) + new.hpage_info[0].write_count = 1; + if ((base_gfn+npages) % KVM_PAGES_PER_LHPAGE) + new.hpage_info[hugepages-1].write_count = 1; + } +#endif + /* Allocate page dirty bitmap if needed */ if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8;
This patch adds the necessary data structures to take care of write protections in place within a second huge page sized page. Signed-off-by: Joerg Roedel <joerg.roedel@amd.com> --- arch/x86/include/asm/kvm_host.h | 10 ++++++++++ arch/x86/kvm/mmu.c | 33 +++++++++++++++++++++++++++++++++ include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 26 ++++++++++++++++++++++++++ 4 files changed, 70 insertions(+), 1 deletions(-)