Message ID | 1505899353-13554-4-git-send-email-aisaila@bitdefender.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
> -----Original Message----- > From: Alexandru Isaila [mailto:aisaila@bitdefender.com] > Sent: 20 September 2017 10:23 > To: xen-devel@lists.xen.org > Cc: Tim (Xen.org) <tim@xen.org>; George Dunlap > <George.Dunlap@citrix.com>; jbeulich@suse.com; Andrew Cooper > <Andrew.Cooper3@citrix.com>; Ian Jackson <Ian.Jackson@citrix.com>; > konrad.wilk@oracle.com; sstabellini@kernel.org; Wei Liu > <wei.liu2@citrix.com>; Paul Durrant <Paul.Durrant@citrix.com>; > boris.ostrovsky@oracle.com; suravee.suthikulpanit@amd.com; > jun.nakajima@intel.com; Kevin Tian <kevin.tian@intel.com>; Alexandru Isaila > <aisaila@bitdefender.com> > Subject: [PATCH v4 3/3] x86/hvm: Implement hvmemul_write() using real > mappings > > From: Andrew Cooper <andrew.cooper3@citrix.com> > > An access which crosses a page boundary is performed atomically by x86 > hardware, albeit with a severe performance penalty. An important corner > case > is when a straddled access hits two pages which differ in whether a > translation exists, or in net access rights. > > The use of hvm_copy*() in hvmemul_write() is problematic, because it > performs > a translation then completes the partial write, before moving onto the next > translation. > > If an individual emulated write straddles two pages, the first of which is > writable, and the second of which is not, the first half of the write will > complete before #PF is raised from the second half. > > This results in guest state corruption as a side effect of emulation, which > has been observed to cause windows to crash while under introspection. > > Introduce the hvmemul_{,un}map_linear_addr() helpers, which translate an > entire contents of a linear access, and vmap() the underlying frames to > provide a contiguous virtual mapping for the emulator to use. This is the > same mechanism as used by the shadow emulation code. > > This will catch any translation issues and abort the emulation before any > modifications occur. > > Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> > Signed-off-by: Alexandru Isaila <aisaila@bitdefender.com> > > --- > Changes since V3: > - Changed the if condition form ... > ... -1 to ... >= ... > - Removed long cast to the err var > - Moved the mfn++ closer to the end of the loop > - Integrated the ++frame in the while condition > --- > xen/arch/x86/hvm/emulate.c | 175 > ++++++++++++++++++++++++++++++++++---- > xen/include/asm-x86/hvm/emulate.h | 7 ++ > 2 files changed, 165 insertions(+), 17 deletions(-) > > diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c > index cc874ce..9866d64 100644 > --- a/xen/arch/x86/hvm/emulate.c > +++ b/xen/arch/x86/hvm/emulate.c > @@ -498,6 +498,157 @@ static int hvmemul_do_mmio_addr(paddr_t > mmio_gpa, > } > > /* > + * Map the frame(s) covering an individual linear access, for writeable > + * access. May return NULL for MMIO, or ERR_PTR(~X86EMUL_*) for other > errors > + * including ERR_PTR(~X86EMUL_OKAY) for write-discard mappings. > + * > + * In debug builds, map() checks that each slot in hvmemul_ctxt->mfn[] is > + * clean before use, and poisions unused slots with INVALID_MFN. > + */ > +static void *hvmemul_map_linear_addr( > + unsigned long linear, unsigned int bytes, uint32_t pfec, > + struct hvm_emulate_ctxt *hvmemul_ctxt) > +{ > + struct vcpu *curr = current; > + void *err, *mapping; > + > + /* First and final gfns which need mapping. */ > + unsigned long frame = linear >> PAGE_SHIFT, first = frame; > + unsigned long final = (linear + bytes - !!bytes) >> PAGE_SHIFT; > + > + /* > + * mfn points to the next free slot. All used slots have a page reference > + * held on them. > + */ > + mfn_t *mfn = &hvmemul_ctxt->mfn[0]; > + > + /* > + * The caller has no legitimate reason for trying a zero-byte write, but > + * final is calculate to fail safe in release builds. > + * > + * The maximum write size depends on the number of adjacent mfns[] > which > + * can be vmap()'d, accouting for possible misalignment within the region. > + * The higher level emulation callers are responsible for ensuring that > + * mfns[] is large enough for the requested write size. > + */ > + if ( bytes == 0 || > + final - first >= ARRAY_SIZE(hvmemul_ctxt->mfn) ) > + { > + ASSERT_UNREACHABLE(); > + goto unhandleable; > + } > + > + do { > + enum hvm_translation_result res; > + struct page_info *page; > + pagefault_info_t pfinfo; > + p2m_type_t p2mt; > + > + /* Error checking. Confirm that the current slot is clean. */ > + ASSERT(mfn_x(*mfn) == 0); > + > + res = hvm_translate_get_page(curr, frame << PAGE_SHIFT, true, pfec, > + &pfinfo, &page, NULL, &p2mt); > + > + switch ( res ) > + { > + case HVMTRANS_okay: > + break; > + > + case HVMTRANS_bad_linear_to_gfn: > + x86_emul_pagefault(pfinfo.ec, pfinfo.linear, &hvmemul_ctxt->ctxt); > + err = ERR_PTR(~X86EMUL_EXCEPTION); > + goto out; > + > + case HVMTRANS_bad_gfn_to_mfn: > + err = NULL; > + goto out; > + > + case HVMTRANS_gfn_paged_out: > + case HVMTRANS_gfn_shared: > + err = ERR_PTR(~X86EMUL_RETRY); > + goto out; > + > + default: > + goto unhandleable; > + } > + > + if ( p2m_is_discard_write(p2mt) ) > + { > + err = ERR_PTR(~X86EMUL_OKAY); > + goto out; > + } > + > + *mfn++ = _mfn(page_to_mfn(page)); > + Much better :-) But I'd ditch the blank line here (and the corresponding one... > + } while ( ++frame < final ); > + > + /* Entire access within a single frame? */ > + if ( first == final ) > + mapping = map_domain_page(hvmemul_ctxt->mfn[0]); > + /* Multiple frames? Need to vmap(). */ > + else if ( (mapping = vmap(hvmemul_ctxt->mfn, > + final - first + 1)) == NULL ) > + goto unhandleable; > + > +#ifndef NDEBUG /* Poision unused mfn[]s with INVALID_MFN. */ > + while ( mfn < hvmemul_ctxt->mfn + ARRAY_SIZE(hvmemul_ctxt->mfn) ) > + { > + ASSERT(mfn_x(*mfn) == 0); > + *mfn++ = INVALID_MFN; > + } > +#endif > + > + return mapping + (linear & ~PAGE_MASK); > + > + unhandleable: > + err = ERR_PTR(~X86EMUL_UNHANDLEABLE); > + > + out: > + /* Drop all held references. */ > + while ( mfn-- > hvmemul_ctxt->mfn ) > + put_page(mfn_to_page(mfn_x(*mfn))); > + > + return err; > +} > + > +static void hvmemul_unmap_linear_addr( > + void *mapping, unsigned long linear, unsigned int bytes, > + struct hvm_emulate_ctxt *hvmemul_ctxt) > +{ > + struct domain *currd = current->domain; > + unsigned long frame = linear >> PAGE_SHIFT; > + unsigned long final = (linear + bytes - !!bytes) >> PAGE_SHIFT; > + mfn_t *mfn = &hvmemul_ctxt->mfn[0]; > + > + ASSERT(bytes > 0); > + > + if ( frame == final ) > + unmap_domain_page(mapping); > + else > + vunmap(mapping); > + > + do > + { > + ASSERT(mfn_valid(*mfn)); > + paging_mark_dirty(currd, *mfn); > + put_page(mfn_to_page(mfn_x(*mfn))); > + > + *mfn++ = _mfn(0); /* Clean slot for map()'s error checking. */ > + ...here. I'm sure an obliging committer can fix that though, so Reviewed-by: Paul Durrant <paul.durrant@citrix.com> > + } while ( ++frame < final ); > + > + > +#ifndef NDEBUG /* Check (and clean) all unused mfns. */ > + while ( mfn < hvmemul_ctxt->mfn + ARRAY_SIZE(hvmemul_ctxt->mfn) ) > + { > + ASSERT(mfn_eq(*mfn, INVALID_MFN)); > + *mfn++ = _mfn(0); > + } > +#endif > +} > + > +/* > * Convert addr from linear to physical form, valid over the range > * [addr, addr + *reps * bytes_per_rep]. *reps is adjusted according to > * the valid computed range. It is always >0 when X86EMUL_OKAY is > returned. > @@ -988,11 +1139,11 @@ static int hvmemul_write( > struct hvm_emulate_ctxt *hvmemul_ctxt = > container_of(ctxt, struct hvm_emulate_ctxt, ctxt); > struct vcpu *curr = current; > - pagefault_info_t pfinfo; > unsigned long addr, reps = 1; > uint32_t pfec = PFEC_page_present | PFEC_write_access; > struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io; > int rc; > + void *mapping; > > if ( is_x86_system_segment(seg) ) > pfec |= PFEC_implicit; > @@ -1008,23 +1159,13 @@ static int hvmemul_write( > (vio->mmio_gla == (addr & PAGE_MASK)) ) > return hvmemul_linear_mmio_write(addr, bytes, p_data, pfec, > hvmemul_ctxt, 1); > > - rc = hvm_copy_to_guest_linear(addr, p_data, bytes, pfec, &pfinfo); > + mapping = hvmemul_map_linear_addr(addr, bytes, pfec, > hvmemul_ctxt); > + if ( IS_ERR(mapping) ) > + return ~PTR_ERR(mapping); > > - switch ( rc ) > - { > - case HVMTRANS_okay: > - break; > - case HVMTRANS_bad_linear_to_gfn: > - x86_emul_pagefault(pfinfo.ec, pfinfo.linear, &hvmemul_ctxt->ctxt); > - return X86EMUL_EXCEPTION; > - case HVMTRANS_bad_gfn_to_mfn: > - return hvmemul_linear_mmio_write(addr, bytes, p_data, pfec, > hvmemul_ctxt, 0); > - case HVMTRANS_gfn_paged_out: > - case HVMTRANS_gfn_shared: > - return X86EMUL_RETRY; > - default: > - return X86EMUL_UNHANDLEABLE; > - } > + memcpy(mapping, p_data, bytes); > + > + hvmemul_unmap_linear_addr(mapping, addr, bytes, hvmemul_ctxt); > > return X86EMUL_OKAY; > } > diff --git a/xen/include/asm-x86/hvm/emulate.h b/xen/include/asm- > x86/hvm/emulate.h > index 8864775..d379a4a 100644 > --- a/xen/include/asm-x86/hvm/emulate.h > +++ b/xen/include/asm-x86/hvm/emulate.h > @@ -37,6 +37,13 @@ struct hvm_emulate_ctxt { > unsigned long seg_reg_accessed; > unsigned long seg_reg_dirty; > > + /* > + * MFNs behind temporary mappings in the write callback. The length is > + * arbitrary, and can be increased if writes longer than PAGE_SIZE+1 are > + * needed. > + */ > + mfn_t mfn[2]; > + > uint32_t intr_shadow; > > bool_t set_context; > -- > 2.7.4
>>> On 20.09.17 at 11:22, <aisaila@bitdefender.com> wrote: > +static void *hvmemul_map_linear_addr( > + unsigned long linear, unsigned int bytes, uint32_t pfec, > + struct hvm_emulate_ctxt *hvmemul_ctxt) > +{ > + struct vcpu *curr = current; > + void *err, *mapping; > + > + /* First and final gfns which need mapping. */ > + unsigned long frame = linear >> PAGE_SHIFT, first = frame; > + unsigned long final = (linear + bytes - !!bytes) >> PAGE_SHIFT; > + > + /* > + * mfn points to the next free slot. All used slots have a page reference > + * held on them. > + */ > + mfn_t *mfn = &hvmemul_ctxt->mfn[0]; > + > + /* > + * The caller has no legitimate reason for trying a zero-byte write, but > + * final is calculate to fail safe in release builds. > + * > + * The maximum write size depends on the number of adjacent mfns[] which > + * can be vmap()'d, accouting for possible misalignment within the region. > + * The higher level emulation callers are responsible for ensuring that > + * mfns[] is large enough for the requested write size. > + */ > + if ( bytes == 0 || > + final - first >= ARRAY_SIZE(hvmemul_ctxt->mfn) ) > + { > + ASSERT_UNREACHABLE(); > + goto unhandleable; > + } > + > + do { > + enum hvm_translation_result res; > + struct page_info *page; > + pagefault_info_t pfinfo; > + p2m_type_t p2mt; > + > + /* Error checking. Confirm that the current slot is clean. */ > + ASSERT(mfn_x(*mfn) == 0); > + > + res = hvm_translate_get_page(curr, frame << PAGE_SHIFT, true, pfec, > + &pfinfo, &page, NULL, &p2mt); > + > + switch ( res ) > + { > + case HVMTRANS_okay: > + break; > + > + case HVMTRANS_bad_linear_to_gfn: > + x86_emul_pagefault(pfinfo.ec, pfinfo.linear, &hvmemul_ctxt->ctxt); > + err = ERR_PTR(~X86EMUL_EXCEPTION); > + goto out; > + > + case HVMTRANS_bad_gfn_to_mfn: > + err = NULL; > + goto out; > + > + case HVMTRANS_gfn_paged_out: > + case HVMTRANS_gfn_shared: > + err = ERR_PTR(~X86EMUL_RETRY); > + goto out; > + > + default: > + goto unhandleable; > + } > + > + if ( p2m_is_discard_write(p2mt) ) > + { > + err = ERR_PTR(~X86EMUL_OKAY); > + goto out; > + } > + > + *mfn++ = _mfn(page_to_mfn(page)); > + > + } while ( ++frame < final ); Interesting - I had specifically pointed out in a reply to v3 that the increment of mfn _cannot_ be moved down here: You're now leaking a page ref on the p2m_is_discard_write() error path afaict. Jan
On Mi, 2017-09-20 at 06:24 -0600, Jan Beulich wrote: > > > > > > > > > > > > > On 20.09.17 at 11:22, <aisaila@bitdefender.com> wrote: > > +static void *hvmemul_map_linear_addr( > > + unsigned long linear, unsigned int bytes, uint32_t pfec, > > + struct hvm_emulate_ctxt *hvmemul_ctxt) > > +{ > > + struct vcpu *curr = current; > > + void *err, *mapping; > > + > > + /* First and final gfns which need mapping. */ > > + unsigned long frame = linear >> PAGE_SHIFT, first = frame; > > + unsigned long final = (linear + bytes - !!bytes) >> > > PAGE_SHIFT; > > + > > + /* > > + * mfn points to the next free slot. All used slots have a > > page reference > > + * held on them. > > + */ > > + mfn_t *mfn = &hvmemul_ctxt->mfn[0]; > > + > > + /* > > + * The caller has no legitimate reason for trying a zero-byte > > write, but > > + * final is calculate to fail safe in release builds. > > + * > > + * The maximum write size depends on the number of adjacent > > mfns[] which > > + * can be vmap()'d, accouting for possible misalignment within > > the region. > > + * The higher level emulation callers are responsible for > > ensuring that > > + * mfns[] is large enough for the requested write size. > > + */ > > + if ( bytes == 0 || > > + final - first >= ARRAY_SIZE(hvmemul_ctxt->mfn) ) > > + { > > + ASSERT_UNREACHABLE(); > > + goto unhandleable; > > + } > > + > > + do { > > + enum hvm_translation_result res; > > + struct page_info *page; > > + pagefault_info_t pfinfo; > > + p2m_type_t p2mt; > > + > > + /* Error checking. Confirm that the current slot is > > clean. */ > > + ASSERT(mfn_x(*mfn) == 0); > > + > > + res = hvm_translate_get_page(curr, frame << PAGE_SHIFT, > > true, pfec, > > + &pfinfo, &page, NULL, &p2mt); > > + > > + switch ( res ) > > + { > > + case HVMTRANS_okay: > > + break; > > + > > + case HVMTRANS_bad_linear_to_gfn: > > + x86_emul_pagefault(pfinfo.ec, pfinfo.linear, > > &hvmemul_ctxt->ctxt); > > + err = ERR_PTR(~X86EMUL_EXCEPTION); > > + goto out; > > + > > + case HVMTRANS_bad_gfn_to_mfn: > > + err = NULL; > > + goto out; > > + > > + case HVMTRANS_gfn_paged_out: > > + case HVMTRANS_gfn_shared: > > + err = ERR_PTR(~X86EMUL_RETRY); > > + goto out; > > + > > + default: > > + goto unhandleable; > > + } > > + > > + if ( p2m_is_discard_write(p2mt) ) > > + { > > + err = ERR_PTR(~X86EMUL_OKAY); > > + goto out; > > + } > > + > > + *mfn++ = _mfn(page_to_mfn(page)); > > + > > + } while ( ++frame < final ); > Interesting - I had specifically pointed out in a reply to v3 that > the > increment of mfn _cannot_ be moved down here: You're now > leaking a page ref on the p2m_is_discard_write() error path afaict. > Sorry about that, I realized the error after reading it again. I'll wait for all the comments until doing the final version. Alex ________________________ This email was scanned by Bitdefender
> -----Original Message----- > From: Jan Beulich [mailto:JBeulich@suse.com] > Sent: 20 September 2017 13:24 > To: Alexandru Isaila <aisaila@bitdefender.com> > Cc: suravee.suthikulpanit@amd.com; Andrew Cooper > <Andrew.Cooper3@citrix.com>; Paul Durrant <Paul.Durrant@citrix.com>; > Wei Liu <wei.liu2@citrix.com>; George Dunlap <George.Dunlap@citrix.com>; > Ian Jackson <Ian.Jackson@citrix.com>; jun.nakajima@intel.com; Kevin Tian > <kevin.tian@intel.com>; sstabellini@kernel.org; xen-devel@lists.xen.org; > boris.ostrovsky@oracle.com; konrad.wilk@oracle.com; Tim (Xen.org) > <tim@xen.org> > Subject: Re: [PATCH v4 3/3] x86/hvm: Implement hvmemul_write() using > real mappings > > >>> On 20.09.17 at 11:22, <aisaila@bitdefender.com> wrote: > > +static void *hvmemul_map_linear_addr( > > + unsigned long linear, unsigned int bytes, uint32_t pfec, > > + struct hvm_emulate_ctxt *hvmemul_ctxt) > > +{ > > + struct vcpu *curr = current; > > + void *err, *mapping; > > + > > + /* First and final gfns which need mapping. */ > > + unsigned long frame = linear >> PAGE_SHIFT, first = frame; > > + unsigned long final = (linear + bytes - !!bytes) >> PAGE_SHIFT; > > + > > + /* > > + * mfn points to the next free slot. All used slots have a page reference > > + * held on them. > > + */ > > + mfn_t *mfn = &hvmemul_ctxt->mfn[0]; > > + > > + /* > > + * The caller has no legitimate reason for trying a zero-byte write, but > > + * final is calculate to fail safe in release builds. > > + * > > + * The maximum write size depends on the number of adjacent mfns[] > which > > + * can be vmap()'d, accouting for possible misalignment within the > region. > > + * The higher level emulation callers are responsible for ensuring that > > + * mfns[] is large enough for the requested write size. > > + */ > > + if ( bytes == 0 || > > + final - first >= ARRAY_SIZE(hvmemul_ctxt->mfn) ) > > + { > > + ASSERT_UNREACHABLE(); > > + goto unhandleable; > > + } > > + > > + do { > > + enum hvm_translation_result res; > > + struct page_info *page; > > + pagefault_info_t pfinfo; > > + p2m_type_t p2mt; > > + > > + /* Error checking. Confirm that the current slot is clean. */ > > + ASSERT(mfn_x(*mfn) == 0); > > + > > + res = hvm_translate_get_page(curr, frame << PAGE_SHIFT, true, > pfec, > > + &pfinfo, &page, NULL, &p2mt); > > + > > + switch ( res ) > > + { > > + case HVMTRANS_okay: > > + break; > > + > > + case HVMTRANS_bad_linear_to_gfn: > > + x86_emul_pagefault(pfinfo.ec, pfinfo.linear, &hvmemul_ctxt- > >ctxt); > > + err = ERR_PTR(~X86EMUL_EXCEPTION); > > + goto out; > > + > > + case HVMTRANS_bad_gfn_to_mfn: > > + err = NULL; > > + goto out; > > + > > + case HVMTRANS_gfn_paged_out: > > + case HVMTRANS_gfn_shared: > > + err = ERR_PTR(~X86EMUL_RETRY); > > + goto out; > > + > > + default: > > + goto unhandleable; > > + } > > + > > + if ( p2m_is_discard_write(p2mt) ) > > + { > > + err = ERR_PTR(~X86EMUL_OKAY); > > + goto out; > > + } > > + > > + *mfn++ = _mfn(page_to_mfn(page)); > > + > > + } while ( ++frame < final ); > > Interesting - I had specifically pointed out in a reply to v3 that the > increment of mfn _cannot_ be moved down here: You're now > leaking a page ref on the p2m_is_discard_write() error path afaict. It could be left here if a put_page() is added to the above error path, which I'd clearly deluded myself was already there. Paul > > Jan
On Mi, 2017-09-20 at 14:37 +0000, Paul Durrant wrote: > > > > -----Original Message----- > > From: Jan Beulich [mailto:JBeulich@suse.com] > > Sent: 20 September 2017 13:24 > > To: Alexandru Isaila <aisaila@bitdefender.com> > > Cc: suravee.suthikulpanit@amd.com; Andrew Cooper > > <Andrew.Cooper3@citrix.com>; Paul Durrant <Paul.Durrant@citrix.com> > > ; > > Wei Liu <wei.liu2@citrix.com>; George Dunlap <George.Dunlap@citrix. > > com>; > > Ian Jackson <Ian.Jackson@citrix.com>; jun.nakajima@intel.com; Kevin > > Tian > > <kevin.tian@intel.com>; sstabellini@kernel.org; xen-devel@lists.xen > > .org; > > boris.ostrovsky@oracle.com; konrad.wilk@oracle.com; Tim (Xen.org) > > <tim@xen.org> > > Subject: Re: [PATCH v4 3/3] x86/hvm: Implement hvmemul_write() > > using > > real mappings > > > > > > > > > > > > > > > > > > > On 20.09.17 at 11:22, <aisaila@bitdefender.com> wrote: > > > +static void *hvmemul_map_linear_addr( > > > + unsigned long linear, unsigned int bytes, uint32_t pfec, > > > + struct hvm_emulate_ctxt *hvmemul_ctxt) > > > +{ > > > + struct vcpu *curr = current; > > > + void *err, *mapping; > > > + > > > + /* First and final gfns which need mapping. */ > > > + unsigned long frame = linear >> PAGE_SHIFT, first = frame; > > > + unsigned long final = (linear + bytes - !!bytes) >> > > > PAGE_SHIFT; > > > + > > > + /* > > > + * mfn points to the next free slot. All used slots have a > > > page reference > > > + * held on them. > > > + */ > > > + mfn_t *mfn = &hvmemul_ctxt->mfn[0]; > > > + > > > + /* > > > + * The caller has no legitimate reason for trying a zero- > > > byte write, but > > > + * final is calculate to fail safe in release builds. > > > + * > > > + * The maximum write size depends on the number of adjacent > > > mfns[] > > which > > > > > > + * can be vmap()'d, accouting for possible misalignment > > > within the > > region. > > > > > > + * The higher level emulation callers are responsible for > > > ensuring that > > > + * mfns[] is large enough for the requested write size. > > > + */ > > > + if ( bytes == 0 || > > > + final - first >= ARRAY_SIZE(hvmemul_ctxt->mfn) ) > > > + { > > > + ASSERT_UNREACHABLE(); > > > + goto unhandleable; > > > + } > > > + > > > + do { > > > + enum hvm_translation_result res; > > > + struct page_info *page; > > > + pagefault_info_t pfinfo; > > > + p2m_type_t p2mt; > > > + > > > + /* Error checking. Confirm that the current slot is > > > clean. */ > > > + ASSERT(mfn_x(*mfn) == 0); > > > + > > > + res = hvm_translate_get_page(curr, frame << PAGE_SHIFT, > > > true, > > pfec, > > > > > > + &pfinfo, &page, NULL, > > > &p2mt); > > > + > > > + switch ( res ) > > > + { > > > + case HVMTRANS_okay: > > > + break; > > > + > > > + case HVMTRANS_bad_linear_to_gfn: > > > + x86_emul_pagefault(pfinfo.ec, pfinfo.linear, > > > &hvmemul_ctxt- > > > ctxt); > > > + err = ERR_PTR(~X86EMUL_EXCEPTION); > > > + goto out; > > > + > > > + case HVMTRANS_bad_gfn_to_mfn: > > > + err = NULL; > > > + goto out; > > > + > > > + case HVMTRANS_gfn_paged_out: > > > + case HVMTRANS_gfn_shared: > > > + err = ERR_PTR(~X86EMUL_RETRY); > > > + goto out; > > > + > > > + default: > > > + goto unhandleable; > > > + } > > > + > > > + if ( p2m_is_discard_write(p2mt) ) > > > + { > > > + err = ERR_PTR(~X86EMUL_OKAY); > > > + goto out; > > > + } > > > + > > > + *mfn++ = _mfn(page_to_mfn(page)); > > > + > > > + } while ( ++frame < final ); > > Interesting - I had specifically pointed out in a reply to v3 that > > the > > increment of mfn _cannot_ be moved down here: You're now > > leaking a page ref on the p2m_is_discard_write() error path afaict. > It could be left here if a put_page() is added to the above error > path, which I'd clearly deluded myself was already there. I think it's clearer to move it back. Alex ________________________ This email was scanned by Bitdefender
diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c index cc874ce..9866d64 100644 --- a/xen/arch/x86/hvm/emulate.c +++ b/xen/arch/x86/hvm/emulate.c @@ -498,6 +498,157 @@ static int hvmemul_do_mmio_addr(paddr_t mmio_gpa, } /* + * Map the frame(s) covering an individual linear access, for writeable + * access. May return NULL for MMIO, or ERR_PTR(~X86EMUL_*) for other errors + * including ERR_PTR(~X86EMUL_OKAY) for write-discard mappings. + * + * In debug builds, map() checks that each slot in hvmemul_ctxt->mfn[] is + * clean before use, and poisions unused slots with INVALID_MFN. + */ +static void *hvmemul_map_linear_addr( + unsigned long linear, unsigned int bytes, uint32_t pfec, + struct hvm_emulate_ctxt *hvmemul_ctxt) +{ + struct vcpu *curr = current; + void *err, *mapping; + + /* First and final gfns which need mapping. */ + unsigned long frame = linear >> PAGE_SHIFT, first = frame; + unsigned long final = (linear + bytes - !!bytes) >> PAGE_SHIFT; + + /* + * mfn points to the next free slot. All used slots have a page reference + * held on them. + */ + mfn_t *mfn = &hvmemul_ctxt->mfn[0]; + + /* + * The caller has no legitimate reason for trying a zero-byte write, but + * final is calculate to fail safe in release builds. + * + * The maximum write size depends on the number of adjacent mfns[] which + * can be vmap()'d, accouting for possible misalignment within the region. + * The higher level emulation callers are responsible for ensuring that + * mfns[] is large enough for the requested write size. + */ + if ( bytes == 0 || + final - first >= ARRAY_SIZE(hvmemul_ctxt->mfn) ) + { + ASSERT_UNREACHABLE(); + goto unhandleable; + } + + do { + enum hvm_translation_result res; + struct page_info *page; + pagefault_info_t pfinfo; + p2m_type_t p2mt; + + /* Error checking. Confirm that the current slot is clean. */ + ASSERT(mfn_x(*mfn) == 0); + + res = hvm_translate_get_page(curr, frame << PAGE_SHIFT, true, pfec, + &pfinfo, &page, NULL, &p2mt); + + switch ( res ) + { + case HVMTRANS_okay: + break; + + case HVMTRANS_bad_linear_to_gfn: + x86_emul_pagefault(pfinfo.ec, pfinfo.linear, &hvmemul_ctxt->ctxt); + err = ERR_PTR(~X86EMUL_EXCEPTION); + goto out; + + case HVMTRANS_bad_gfn_to_mfn: + err = NULL; + goto out; + + case HVMTRANS_gfn_paged_out: + case HVMTRANS_gfn_shared: + err = ERR_PTR(~X86EMUL_RETRY); + goto out; + + default: + goto unhandleable; + } + + if ( p2m_is_discard_write(p2mt) ) + { + err = ERR_PTR(~X86EMUL_OKAY); + goto out; + } + + *mfn++ = _mfn(page_to_mfn(page)); + + } while ( ++frame < final ); + + /* Entire access within a single frame? */ + if ( first == final ) + mapping = map_domain_page(hvmemul_ctxt->mfn[0]); + /* Multiple frames? Need to vmap(). */ + else if ( (mapping = vmap(hvmemul_ctxt->mfn, + final - first + 1)) == NULL ) + goto unhandleable; + +#ifndef NDEBUG /* Poision unused mfn[]s with INVALID_MFN. */ + while ( mfn < hvmemul_ctxt->mfn + ARRAY_SIZE(hvmemul_ctxt->mfn) ) + { + ASSERT(mfn_x(*mfn) == 0); + *mfn++ = INVALID_MFN; + } +#endif + + return mapping + (linear & ~PAGE_MASK); + + unhandleable: + err = ERR_PTR(~X86EMUL_UNHANDLEABLE); + + out: + /* Drop all held references. */ + while ( mfn-- > hvmemul_ctxt->mfn ) + put_page(mfn_to_page(mfn_x(*mfn))); + + return err; +} + +static void hvmemul_unmap_linear_addr( + void *mapping, unsigned long linear, unsigned int bytes, + struct hvm_emulate_ctxt *hvmemul_ctxt) +{ + struct domain *currd = current->domain; + unsigned long frame = linear >> PAGE_SHIFT; + unsigned long final = (linear + bytes - !!bytes) >> PAGE_SHIFT; + mfn_t *mfn = &hvmemul_ctxt->mfn[0]; + + ASSERT(bytes > 0); + + if ( frame == final ) + unmap_domain_page(mapping); + else + vunmap(mapping); + + do + { + ASSERT(mfn_valid(*mfn)); + paging_mark_dirty(currd, *mfn); + put_page(mfn_to_page(mfn_x(*mfn))); + + *mfn++ = _mfn(0); /* Clean slot for map()'s error checking. */ + + } while ( ++frame < final ); + + +#ifndef NDEBUG /* Check (and clean) all unused mfns. */ + while ( mfn < hvmemul_ctxt->mfn + ARRAY_SIZE(hvmemul_ctxt->mfn) ) + { + ASSERT(mfn_eq(*mfn, INVALID_MFN)); + *mfn++ = _mfn(0); + } +#endif +} + +/* * Convert addr from linear to physical form, valid over the range * [addr, addr + *reps * bytes_per_rep]. *reps is adjusted according to * the valid computed range. It is always >0 when X86EMUL_OKAY is returned. @@ -988,11 +1139,11 @@ static int hvmemul_write( struct hvm_emulate_ctxt *hvmemul_ctxt = container_of(ctxt, struct hvm_emulate_ctxt, ctxt); struct vcpu *curr = current; - pagefault_info_t pfinfo; unsigned long addr, reps = 1; uint32_t pfec = PFEC_page_present | PFEC_write_access; struct hvm_vcpu_io *vio = &curr->arch.hvm_vcpu.hvm_io; int rc; + void *mapping; if ( is_x86_system_segment(seg) ) pfec |= PFEC_implicit; @@ -1008,23 +1159,13 @@ static int hvmemul_write( (vio->mmio_gla == (addr & PAGE_MASK)) ) return hvmemul_linear_mmio_write(addr, bytes, p_data, pfec, hvmemul_ctxt, 1); - rc = hvm_copy_to_guest_linear(addr, p_data, bytes, pfec, &pfinfo); + mapping = hvmemul_map_linear_addr(addr, bytes, pfec, hvmemul_ctxt); + if ( IS_ERR(mapping) ) + return ~PTR_ERR(mapping); - switch ( rc ) - { - case HVMTRANS_okay: - break; - case HVMTRANS_bad_linear_to_gfn: - x86_emul_pagefault(pfinfo.ec, pfinfo.linear, &hvmemul_ctxt->ctxt); - return X86EMUL_EXCEPTION; - case HVMTRANS_bad_gfn_to_mfn: - return hvmemul_linear_mmio_write(addr, bytes, p_data, pfec, hvmemul_ctxt, 0); - case HVMTRANS_gfn_paged_out: - case HVMTRANS_gfn_shared: - return X86EMUL_RETRY; - default: - return X86EMUL_UNHANDLEABLE; - } + memcpy(mapping, p_data, bytes); + + hvmemul_unmap_linear_addr(mapping, addr, bytes, hvmemul_ctxt); return X86EMUL_OKAY; } diff --git a/xen/include/asm-x86/hvm/emulate.h b/xen/include/asm-x86/hvm/emulate.h index 8864775..d379a4a 100644 --- a/xen/include/asm-x86/hvm/emulate.h +++ b/xen/include/asm-x86/hvm/emulate.h @@ -37,6 +37,13 @@ struct hvm_emulate_ctxt { unsigned long seg_reg_accessed; unsigned long seg_reg_dirty; + /* + * MFNs behind temporary mappings in the write callback. The length is + * arbitrary, and can be increased if writes longer than PAGE_SIZE+1 are + * needed. + */ + mfn_t mfn[2]; + uint32_t intr_shadow; bool_t set_context;