Message ID | 20180627135510.117945-8-frankja@linux.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 27.06.2018 15:55, Janosch Frank wrote: > From: Janosch Frank <frankja@linux.vnet.ibm.com> > > Storage keys for guests with huge page mappings have to directly set > the key in hardware. There are no PGSTEs for PMDs that we could use to > retain the guests's logical view of the key. As we don't walk the actual gmap, we will never try to touch fake 4k page tables, right? > > As pmds are handled from userspace side, KVM skey emulation for split > pmds will always use the hardware's storage key. > > Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com> > --- > arch/s390/mm/pgtable.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 98 insertions(+), 6 deletions(-) > > diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c > index 7bc79aae3c25..158c880226fe 100644 > --- a/arch/s390/mm/pgtable.c > +++ b/arch/s390/mm/pgtable.c > @@ -811,12 +811,45 @@ EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty); > int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, > unsigned char key, bool nq) > { > - unsigned long keyul; > + unsigned long keyul, address; > spinlock_t *ptl; > pgste_t old, new; > + pgd_t *pgd; > + p4d_t *p4d; > + pud_t *pud; > + pmd_t *pmd; > pte_t *ptep; > > - ptep = get_locked_pte(mm, addr, &ptl); > + pgd = pgd_offset(mm, addr); > + p4d = p4d_alloc(mm, pgd, addr); > + if (!p4d) > + return -EFAULT; > + pud = pud_alloc(mm, p4d, addr); > + if (!pud) > + return -EFAULT; > + pmd = pmd_alloc(mm, pud, addr); > + if (!pmd) > + return -EFAULT; > + > + ptl = pmd_lock(mm, pmd); > + if (!pmd_present(*pmd)) { > + spin_unlock(ptl); Is there no pmd_unlock()? Maybe there should be one :) Using spin_unlock() here looks strange on first sight. > + return -EFAULT; > + } > + if (pmd_large(*pmd)) { > + address = pmd_val(*pmd) & HPAGE_MASK; > + address |= addr & ~HPAGE_MASK; > + /* > + * Huge pmds need quiescing operations, they are > + * always mapped. > + */ > + page_set_storage_key(address, key, 1); > + spin_unlock(ptl); > + return 0; > + } > + spin_unlock(ptl); > + > + ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); > if (unlikely(!ptep)) > return -EFAULT; > > @@ -827,7 +860,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, > pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; > pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; > if (!(pte_val(*ptep) & _PAGE_INVALID)) { > - unsigned long address, bits, skey; > + unsigned long bits, skey; > > address = pte_val(*ptep) & PAGE_MASK; > skey = (unsigned long) page_get_storage_key(address); > @@ -890,14 +923,43 @@ EXPORT_SYMBOL(cond_set_guest_storage_key); > int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) > { > spinlock_t *ptl; > + unsigned long address; > pgste_t old, new; > + pgd_t *pgd; > + p4d_t *p4d; > + pud_t *pud; > + pmd_t *pmd; > pte_t *ptep; > int cc = 0; > > - ptep = get_locked_pte(mm, addr, &ptl); > - if (unlikely(!ptep)) > + pgd = pgd_offset(mm, addr); > + p4d = p4d_alloc(mm, pgd, addr); > + if (!p4d) > + return -EFAULT; > + pud = pud_alloc(mm, p4d, addr); > + if (!pud) > + return -EFAULT; > + pmd = pmd_alloc(mm, pud, addr); > + if (!pmd) > return -EFAULT; > > + ptl = pmd_lock(mm, pmd); > + if (!pmd_present(*pmd)) { > + spin_unlock(ptl); > + return -EFAULT; > + } > + if (pmd_large(*pmd)) { > + address = pmd_val(*pmd) & HPAGE_MASK; > + address |= addr & ~HPAGE_MASK; > + cc = page_reset_referenced(addr); > + spin_unlock(ptl); > + return cc; > + } > + spin_unlock(ptl); > + > + ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); > + if (unlikely(!ptep)) > + return -EFAULT; > new = old = pgste_get_lock(ptep); > /* Reset guest reference bit only */ > pgste_val(new) &= ~PGSTE_GR_BIT; > @@ -922,11 +984,41 @@ EXPORT_SYMBOL(reset_guest_reference_bit); > int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, > unsigned char *key) > { > + unsigned long address; > spinlock_t *ptl; > pgste_t pgste; > + pgd_t *pgd; > + p4d_t *p4d; > + pud_t *pud; > + pmd_t *pmd; > pte_t *ptep; > > - ptep = get_locked_pte(mm, addr, &ptl); > + pgd = pgd_offset(mm, addr); > + p4d = p4d_alloc(mm, pgd, addr); > + if (!p4d) > + return -EFAULT; > + pud = pud_alloc(mm, p4d, addr); > + if (!pud) > + return -EFAULT; > + pmd = pmd_alloc(mm, pud, addr); > + if (!pmd) > + return -EFAULT; > + > + ptl = pmd_lock(mm, pmd); > + if (!pmd_present(*pmd)) { > + spin_unlock(ptl); > + return -EFAULT; > + } > + if (pmd_large(*pmd)) { > + address = pmd_val(*pmd) & HPAGE_MASK; > + address |= addr & ~HPAGE_MASK; > + *key = page_get_storage_key(address); (not having looked in detail through all the patches) We are guaranteed to have valid storage keys at this point, right? (patch #6) > + spin_unlock(ptl); > + return 0; > + } > + spin_unlock(ptl); > + > + ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); > if (unlikely(!ptep)) > return -EFAULT; > > In general, would a helper __get_locked_pmd() make sense? (could be s390x specific) We have quite some duplicate code here.
On 28.06.2018 09:49, David Hildenbrand wrote: > On 27.06.2018 15:55, Janosch Frank wrote: >> From: Janosch Frank <frankja@linux.vnet.ibm.com> >> >> Storage keys for guests with huge page mappings have to directly set >> the key in hardware. There are no PGSTEs for PMDs that we could use to >> retain the guests's logical view of the key. > > As we don't walk the actual gmap, we will never try to touch fake 4k > page tables, right? Correct > >> >> As pmds are handled from userspace side, KVM skey emulation for split >> pmds will always use the hardware's storage key. >> >> Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com> >> --- >> arch/s390/mm/pgtable.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++--- >> 1 file changed, 98 insertions(+), 6 deletions(-) >> >> diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c >> index 7bc79aae3c25..158c880226fe 100644 >> --- a/arch/s390/mm/pgtable.c >> +++ b/arch/s390/mm/pgtable.c >> @@ -811,12 +811,45 @@ EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty); >> int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, >> unsigned char key, bool nq) >> { >> - unsigned long keyul; >> + unsigned long keyul, address; >> spinlock_t *ptl; >> pgste_t old, new; >> + pgd_t *pgd; >> + p4d_t *p4d; >> + pud_t *pud; >> + pmd_t *pmd; >> pte_t *ptep; >> >> - ptep = get_locked_pte(mm, addr, &ptl); >> + pgd = pgd_offset(mm, addr); >> + p4d = p4d_alloc(mm, pgd, addr); >> + if (!p4d) >> + return -EFAULT; >> + pud = pud_alloc(mm, p4d, addr); >> + if (!pud) >> + return -EFAULT; >> + pmd = pmd_alloc(mm, pud, addr); >> + if (!pmd) >> + return -EFAULT; >> + >> + ptl = pmd_lock(mm, pmd); >> + if (!pmd_present(*pmd)) { >> + spin_unlock(ptl); > > Is there no pmd_unlock()? Maybe there should be one :) > > Using spin_unlock() here looks strange on first sight. There doesn't seem to be one in include/linux/mm.h where it is from and I'm not gonna create one. > >> + return -EFAULT; >> + } >> + if (pmd_large(*pmd)) { >> + address = pmd_val(*pmd) & HPAGE_MASK; >> + address |= addr & ~HPAGE_MASK; >> + /* >> + * Huge pmds need quiescing operations, they are >> + * always mapped. >> + */ >> + page_set_storage_key(address, key, 1); >> + spin_unlock(ptl); >> + return 0; >> + } >> + spin_unlock(ptl); >> + >> + ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); >> if (unlikely(!ptep)) >> return -EFAULT; >> [...] >> - ptep = get_locked_pte(mm, addr, &ptl); >> + pgd = pgd_offset(mm, addr); >> + p4d = p4d_alloc(mm, pgd, addr); >> + if (!p4d) >> + return -EFAULT; >> + pud = pud_alloc(mm, p4d, addr); >> + if (!pud) >> + return -EFAULT; >> + pmd = pmd_alloc(mm, pud, addr); >> + if (!pmd) >> + return -EFAULT; >> + >> + ptl = pmd_lock(mm, pmd); >> + if (!pmd_present(*pmd)) { >> + spin_unlock(ptl); >> + return -EFAULT; >> + } >> + if (pmd_large(*pmd)) { >> + address = pmd_val(*pmd) & HPAGE_MASK; >> + address |= addr & ~HPAGE_MASK; >> + *key = page_get_storage_key(address); > > (not having looked in detail through all the patches) > > We are guaranteed to have valid storage keys at this point, right? > (patch #6) Yes, the VM part goes through the enablement and kvm_s390_get_skeys uses the mm->context for checking. > >> + spin_unlock(ptl); >> + return 0; >> + } >> + spin_unlock(ptl); >> + >> + ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); >> if (unlikely(!ptep)) >> return -EFAULT; >> >> > > > In general, would a helper __get_locked_pmd() make sense? (could be > s390x specific) We have quite some duplicate code here. > In the gmap I use huge_pte_offset and cast it back to pmd, I'll change it for the next version.
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 7bc79aae3c25..158c880226fe 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -811,12 +811,45 @@ EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, bool nq) { - unsigned long keyul; + unsigned long keyul, address; spinlock_t *ptl; pgste_t old, new; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; pte_t *ptep; - ptep = get_locked_pte(mm, addr, &ptl); + pgd = pgd_offset(mm, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return -EFAULT; + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return -EFAULT; + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return -EFAULT; + + ptl = pmd_lock(mm, pmd); + if (!pmd_present(*pmd)) { + spin_unlock(ptl); + return -EFAULT; + } + if (pmd_large(*pmd)) { + address = pmd_val(*pmd) & HPAGE_MASK; + address |= addr & ~HPAGE_MASK; + /* + * Huge pmds need quiescing operations, they are + * always mapped. + */ + page_set_storage_key(address, key, 1); + spin_unlock(ptl); + return 0; + } + spin_unlock(ptl); + + ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -827,7 +860,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; if (!(pte_val(*ptep) & _PAGE_INVALID)) { - unsigned long address, bits, skey; + unsigned long bits, skey; address = pte_val(*ptep) & PAGE_MASK; skey = (unsigned long) page_get_storage_key(address); @@ -890,14 +923,43 @@ EXPORT_SYMBOL(cond_set_guest_storage_key); int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) { spinlock_t *ptl; + unsigned long address; pgste_t old, new; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; pte_t *ptep; int cc = 0; - ptep = get_locked_pte(mm, addr, &ptl); - if (unlikely(!ptep)) + pgd = pgd_offset(mm, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return -EFAULT; + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return -EFAULT; + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) return -EFAULT; + ptl = pmd_lock(mm, pmd); + if (!pmd_present(*pmd)) { + spin_unlock(ptl); + return -EFAULT; + } + if (pmd_large(*pmd)) { + address = pmd_val(*pmd) & HPAGE_MASK; + address |= addr & ~HPAGE_MASK; + cc = page_reset_referenced(addr); + spin_unlock(ptl); + return cc; + } + spin_unlock(ptl); + + ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); + if (unlikely(!ptep)) + return -EFAULT; new = old = pgste_get_lock(ptep); /* Reset guest reference bit only */ pgste_val(new) &= ~PGSTE_GR_BIT; @@ -922,11 +984,41 @@ EXPORT_SYMBOL(reset_guest_reference_bit); int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char *key) { + unsigned long address; spinlock_t *ptl; pgste_t pgste; + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; pte_t *ptep; - ptep = get_locked_pte(mm, addr, &ptl); + pgd = pgd_offset(mm, addr); + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return -EFAULT; + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return -EFAULT; + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return -EFAULT; + + ptl = pmd_lock(mm, pmd); + if (!pmd_present(*pmd)) { + spin_unlock(ptl); + return -EFAULT; + } + if (pmd_large(*pmd)) { + address = pmd_val(*pmd) & HPAGE_MASK; + address |= addr & ~HPAGE_MASK; + *key = page_get_storage_key(address); + spin_unlock(ptl); + return 0; + } + spin_unlock(ptl); + + ptep = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (unlikely(!ptep)) return -EFAULT;