@@ -15,6 +15,7 @@
/* Status bits only for huge segment entries */
#define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */
+#define _SEGMENT_ENTRY_GMAP_UC 0x4000 /* user dirty (migration) */
/**
* struct gmap_struct - guest address space
@@ -139,4 +140,6 @@ void gmap_pte_notify(struct mm_struct *, unsigned long addr, pte_t *,
int gmap_mprotect_notify(struct gmap *, unsigned long start,
unsigned long len, int prot);
+void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
+ unsigned long gaddr, unsigned long vmaddr);
#endif /* _ASM_S390_GMAP_H */
@@ -511,19 +511,24 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
}
static void kvm_s390_sync_dirty_log(struct kvm *kvm,
- struct kvm_memory_slot *memslot)
+ struct kvm_memory_slot *memslot)
{
gfn_t cur_gfn, last_gfn;
- unsigned long address;
+ unsigned long gaddr, vmaddr;
+ unsigned long *dirty = memslot->dirty_bitmap;
struct gmap *gmap = kvm->arch.gmap;
- /* Loop over all guest pages */
+ /* Loop over all guest segments */
+ cur_gfn = memslot->base_gfn;
last_gfn = memslot->base_gfn + memslot->npages;
- for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
- address = gfn_to_hva_memslot(memslot, cur_gfn);
+ for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES, dirty += 4) {
+ gaddr = gfn_to_gpa(cur_gfn);
+ vmaddr = gfn_to_hva_memslot(memslot, cur_gfn);
+ if (kvm_is_error_hva(vmaddr))
+ continue;
+
+ gmap_sync_dirty_log_pmd(gmap, dirty, gaddr, vmaddr);
- if (test_and_clear_guest_dirty(gmap->mm, address))
- mark_page_dirty(kvm, cur_gfn);
if (fatal_signal_pending(current))
return;
cond_resched();
@@ -15,6 +15,7 @@
#include <linux/swapops.h>
#include <linux/ksm.h>
#include <linux/mman.h>
+#include <linux/hugetlb.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -544,6 +545,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
+ pmd_t unprot;
int rc;
BUG_ON(gmap_is_shadow(gmap));
@@ -601,12 +603,19 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
vmaddr >> PMD_SHIFT, table);
if (!rc) {
if (pmd_large(*pmd)) {
- *table = pmd_val(*pmd) &
- _SEGMENT_ENTRY_HARDWARE_BITS_LARGE;
+ *table = (pmd_val(*pmd) &
+ _SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
+ | _SEGMENT_ENTRY_GMAP_UC;
} else
*table = pmd_val(*pmd) &
_SEGMENT_ENTRY_HARDWARE_BITS;
}
+ } else if (*table & _SEGMENT_ENTRY_PROTECT &&
+ !(pmd_val(*pmd) & _SEGMENT_ENTRY_PROTECT)) {
+ unprot = __pmd((*table & (_SEGMENT_ENTRY_HARDWARE_BITS_LARGE
+ & ~_SEGMENT_ENTRY_PROTECT))
+ | _SEGMENT_ENTRY_GMAP_UC);
+ gmap_pmdp_xchg(gmap, (pmd_t *)table, unprot, gaddr);
}
spin_unlock(&gmap->guest_table_lock);
spin_unlock(ptl);
@@ -937,6 +946,17 @@ static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr,
if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE)))
return -EAGAIN;
+ if (prot == PROT_NONE && !pmd_i) {
+ pmd_val(new) |= _SEGMENT_ENTRY_INVALID;
+ gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
+ }
+
+ if (prot == PROT_READ && !pmd_p) {
+ pmd_val(new) &= ~_SEGMENT_ENTRY_INVALID;
+ pmd_val(new) |= _SEGMENT_ENTRY_PROTECT;
+ gmap_pmdp_xchg(gmap, pmdp, new, gaddr);
+ }
+
if (bits & GMAP_NOTIFY_MPROT)
pmd_val(*pmdp) |= _SEGMENT_ENTRY_GMAP_IN;
return 0;
@@ -2390,6 +2410,65 @@ void gmap_pmdp_idte_global(struct mm_struct *mm, unsigned long vmaddr)
}
EXPORT_SYMBOL_GPL(gmap_pmdp_idte_global);
+/**
+ * gmap_test_and_clear_dirty_segment - test and reset segment dirty status
+ * @gmap: pointer to guest address space
+ * @pmdp: pointer to the pmd to be tested
+ * @gaddr: virtual address in the guest address space
+ *
+ * This function is assumed to be called with the guest_table_lock
+ * held.
+ */
+bool gmap_test_and_clear_dirty_segment(struct gmap *gmap, pmd_t *pmdp,
+ unsigned long gaddr)
+{
+ if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID)
+ return false;
+
+ /* Already protected memory, which did not change is clean */
+ if (pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT &&
+ !(pmd_val(*pmdp) & _SEGMENT_ENTRY_GMAP_UC))
+ return false;
+
+ /* Clear UC indication and reset protection */
+ pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_UC;
+ gmap_protect_pmd(gmap, gaddr, pmdp, PROT_READ, 0);
+ return true;
+}
+
+/**
+ * gmap_sync_dirty_log_pmd - set bitmap based on dirty status of segment
+ * @gmap: pointer to guest address space
+ * @bitmap: dirty bitmap for this pmd
+ * @gaddr: virtual address in the guest address space
+ * @vmaddr: virtual address in the host address space
+ *
+ * This function is assumed to be called with the guest_table_lock
+ * held.
+ */
+void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
+ unsigned long gaddr, unsigned long vmaddr)
+{
+ int i = 0;
+ pmd_t *pmdp;
+
+ pmdp = gmap_pmd_op_walk(gmap, gaddr);
+ if (!pmdp)
+ return;
+
+ if (pmd_large(*pmdp)) {
+ if (gmap_test_and_clear_dirty_segment(gmap, pmdp, gaddr))
+ memset(bitmap, 0xff, 32);
+ } else {
+ for (; i < _PAGE_ENTRIES; i++, vmaddr += PAGE_SIZE) {
+ if (test_and_clear_guest_dirty(gmap->mm, vmaddr))
+ set_bit_le(i, bitmap);
+ }
+ }
+ gmap_pmd_op_end(gmap, pmdp);
+}
+EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd);
+
static inline void thp_split_mm(struct mm_struct *mm)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -731,12 +731,6 @@ bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr)
pmd = pmd_alloc(mm, pud, addr);
if (!pmd)
return false;
- /* We can't run guests backed by huge pages, but userspace can
- * still set them up and then try to migrate them without any
- * migration support.
- */
- if (pmd_large(*pmd))
- return true;
ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl);
if (unlikely(!ptep))
To do dirty loging with huge pages, we protect huge pmds in the gmap. When they are written to, we unprotect them and mark them dirty. We introduce the function gmap_test_and_clear_dirty_segment which handles dirty sync for huge pages. Signed-off-by: Janosch Frank <frankja@linux.ibm.com> --- arch/s390/include/asm/gmap.h | 3 ++ arch/s390/kvm/kvm-s390.c | 19 ++++++---- arch/s390/mm/gmap.c | 83 ++++++++++++++++++++++++++++++++++++++++++-- arch/s390/mm/pgtable.c | 6 ---- 4 files changed, 96 insertions(+), 15 deletions(-)