Message ID | 1506049330-11196-29-git-send-email-tianyu.lan@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Sep 21, 2017 at 11:02:09PM -0400, Lan Tianyu wrote: > From: Chao Gao <chao.gao@intel.com> > > Queued Invalidation Interface is an expanded invalidation interface with > extended capabilities. Hardware implementations report support for queued > invalidation interface through the Extended Capability Register. The queued > invalidation interface uses an Invalidation Queue (IQ), which is a circular > buffer in system memory. Software submits commands by writing Invalidation > Descriptors to the IQ. > > In this patch, a new function viommu_process_iq() is used for emulating how > hardware handles invalidation requests through QI. > > Signed-off-by: Chao Gao <chao.gao@intel.com> > Signed-off-by: Lan Tianyu <tianyu.lan@intel.com> > --- > xen/drivers/passthrough/vtd/iommu.h | 19 ++- > xen/drivers/passthrough/vtd/vvtd.c | 232 ++++++++++++++++++++++++++++++++++++ > 2 files changed, 250 insertions(+), 1 deletion(-) > > diff --git a/xen/drivers/passthrough/vtd/iommu.h b/xen/drivers/passthrough/vtd/iommu.h > index c69cd21..c2b83f1 100644 > --- a/xen/drivers/passthrough/vtd/iommu.h > +++ b/xen/drivers/passthrough/vtd/iommu.h > @@ -177,6 +177,21 @@ > #define DMA_IRTA_S(val) (val & 0xf) > #define DMA_IRTA_SIZE(val) (1UL << (DMA_IRTA_S(val) + 1)) > > +/* IQA_REG */ > +#define DMA_IQA_ADDR(val) (val & ~0xfffULL) > +#define DMA_IQA_QS(val) (val & 0x7) > +#define DMA_IQA_RSVD 0xff8ULL > + > +/* IECTL_REG */ > +#define DMA_IECTL_IM_SHIFT 31 > +#define DMA_IECTL_IM (1 << DMA_IECTL_IM_SHIFT) Isn't this undefined behavior? It should be 1u. You should consider using u for all the defines added. > +#define DMA_IECTL_IP_SHIFT 30 > +#define DMA_IECTL_IP (1 << DMA_IECTL_IP_SHIFT) > + > +/* ICS_REG */ > +#define DMA_ICS_IWC_SHIFT 0 > +#define DMA_ICS_IWC (1 << DMA_ICS_IWC_SHIFT) > + > /* PMEN_REG */ > #define DMA_PMEN_EPM (((u32)1) << 31) > #define DMA_PMEN_PRS (((u32)1) << 0) > @@ -211,7 +226,8 @@ > #define DMA_FSTS_PPF (1U << DMA_FSTS_PPF_SHIFT) > #define DMA_FSTS_AFO (1U << 2) > #define DMA_FSTS_APF (1U << 3) > -#define DMA_FSTS_IQE (1U << 4) > +#define DMA_FSTS_IQE_SHIFT 4 > +#define DMA_FSTS_IQE (1U << DMA_FSTS_IQE_SHIFT) > #define DMA_FSTS_ICE (1U << 5) > #define DMA_FSTS_ITE (1U << 6) > #define DMA_FSTS_PRO_SHIFT 7 > @@ -562,6 +578,7 @@ struct qinval_entry { > > /* Queue invalidation head/tail shift */ > #define QINVAL_INDEX_SHIFT 4 > +#define QINVAL_INDEX_MASK 0x7fff0ULL > > #define qinval_present(v) ((v).lo & 1) > #define qinval_fault_disable(v) (((v).lo >> 1) & 1) > diff --git a/xen/drivers/passthrough/vtd/vvtd.c b/xen/drivers/passthrough/vtd/vvtd.c > index 55f7a46..668d0c9 100644 > --- a/xen/drivers/passthrough/vtd/vvtd.c > +++ b/xen/drivers/passthrough/vtd/vvtd.c > @@ -28,6 +28,7 @@ > #include <asm/current.h> > #include <asm/event.h> > #include <asm/hvm/domain.h> > +#include <asm/hvm/support.h> > #include <asm/io_apic.h> > #include <asm/page.h> > #include <asm/p2m.h> > @@ -419,6 +420,177 @@ static int vvtd_record_fault(struct vvtd *vvtd, > return X86EMUL_OKAY; > } > > +/* > + * Process a invalidation descriptor. Currently, only two types descriptors, > + * Interrupt Entry Cache Invalidation Descritor and Invalidation Wait > + * Descriptor are handled. > + * @vvtd: the virtual vtd instance > + * @i: the index of the invalidation descriptor to be processed > + * > + * If success return 0, or return non-zero when failure. > + */ > +static int process_iqe(struct vvtd *vvtd, int i) unsigned int. > +{ > + uint64_t iqa; > + struct qinval_entry *qinval_page; > + int ret = 0; > + > + iqa = vvtd_get_reg_quad(vvtd, DMAR_IQA_REG); > + qinval_page = map_guest_page(vvtd->domain, DMA_IQA_ADDR(iqa)>>PAGE_SHIFT); PFN_DOWN instead of open coding the shift. Both can be initialized at declaration. Also AFAICT iqa is only used once, so the local variable is not needed. > + if ( IS_ERR(qinval_page) ) > + { > + gdprintk(XENLOG_ERR, "Can't map guest IRT (rc %ld)", > + PTR_ERR(qinval_page)); > + return PTR_ERR(qinval_page); > + } > + > + switch ( qinval_page[i].q.inv_wait_dsc.lo.type ) > + { > + case TYPE_INVAL_WAIT: > + if ( qinval_page[i].q.inv_wait_dsc.lo.sw ) > + { > + uint32_t data = qinval_page[i].q.inv_wait_dsc.lo.sdata; > + uint64_t addr = (qinval_page[i].q.inv_wait_dsc.hi.saddr << 2); Unneeded parentheses. > + > + ret = hvm_copy_to_guest_phys(addr, &data, sizeof(data), current); > + if ( ret ) > + vvtd_info("Failed to write status address"); Don't you need to return or do something here? (like raise some kind of error?) > + } > + > + /* > + * The following code generates an invalidation completion event > + * indicating the invalidation wait descriptor completion. Note that > + * the following code fragment is not tested properly. > + */ > + if ( qinval_page[i].q.inv_wait_dsc.lo.iflag ) > + { > + uint32_t ie_data, ie_addr; Missing newline, but... > + if ( !vvtd_test_and_set_bit(vvtd, DMAR_ICS_REG, DMA_ICS_IWC_SHIFT) ) > + { > + vvtd_set_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT); > + if ( !vvtd_test_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IM_SHIFT) ) > + { > + ie_data = vvtd_get_reg(vvtd, DMAR_IEDATA_REG); > + ie_addr = vvtd_get_reg(vvtd, DMAR_IEADDR_REG); > + vvtd_generate_interrupt(vvtd, ie_addr, ie_data); ...you don't seem two need the two local variables. They are used only once. > + vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT); > + } > + } > + } > + break; > + > + case TYPE_INVAL_IEC: > + /* > + * Currently, no cache is preserved in hypervisor. Only need to update > + * pIRTEs which are modified in binding process. > + */ > + break; > + > + default: > + goto error; There's no reason to use a label that's only used for the default case. Simply place the code in the error label here. > + } > + > + unmap_guest_page((void*)qinval_page); > + return ret; > + > + error: > + unmap_guest_page((void*)qinval_page); > + gdprintk(XENLOG_ERR, "Internal error in Queue Invalidation.\n"); > + domain_crash(vvtd->domain); Do you really need to crash the domain in such case? > + return ret; > +} > + > +/* > + * Invalidate all the descriptors in Invalidation Queue. > + */ > +static void vvtd_process_iq(struct vvtd *vvtd) > +{ > + uint64_t iqh, iqt, iqa, max_entry, i; > + int err = 0; > + > + /* > + * No new descriptor is fetched from the Invalidation Queue until > + * software clears the IQE field in the Fault Status Register > + */ > + if ( vvtd_test_bit(vvtd, DMAR_FSTS_REG, DMA_FSTS_IQE_SHIFT) ) > + return; > + > + iqh = vvtd_get_reg_quad(vvtd, DMAR_IQH_REG); > + iqt = vvtd_get_reg_quad(vvtd, DMAR_IQT_REG); > + iqa = vvtd_get_reg_quad(vvtd, DMAR_IQA_REG); > + > + max_entry = 1 << (QINVAL_ENTRY_ORDER + DMA_IQA_QS(iqa)); > + iqh = MASK_EXTR(iqh, QINVAL_INDEX_MASK); > + iqt = MASK_EXTR(iqt, QINVAL_INDEX_MASK); This should be done above, when they are initialized. > + > + ASSERT(iqt < max_entry); > + if ( iqh == iqt ) > + return; > + > + for ( i = iqh; i != iqt; i = (i + 1) % max_entry ) > + { > + err = process_iqe(vvtd, i); process_iqe takes an int parameter, and here you are feeding it a uint64_t. > + if ( err ) > + break; > + } > + vvtd_set_reg_quad(vvtd, DMAR_IQH_REG, i << QINVAL_INDEX_SHIFT); > + > + /* > + * When IQE set, IQH references the desriptor associated with the error. > + */ > + if ( err ) > + vvtd_report_non_recoverable_fault(vvtd, DMA_FSTS_IQE_SHIFT); > +} > + > +static int vvtd_write_iqt(struct vvtd *vvtd, unsigned long val) Not sure there's much point in making this function return int, when all the return values are X86EMUL_OKAY. Sionce val here is a register AFAICT, I would rather prefer that you use an explicit type size (uint64_t or uint32_t as fits). > +{ > + uint64_t max_entry, iqa = vvtd_get_reg_quad(vvtd, DMAR_IQA_REG); > + > + if ( val & ~QINVAL_INDEX_MASK ) > + { > + vvtd_info("Attempt to set reserved bits in Invalidation Queue Tail"); > + return X86EMUL_OKAY; > + } > + > + max_entry = 1 << (QINVAL_ENTRY_ORDER + DMA_IQA_QS(iqa)); 1ull please. > + if ( MASK_EXTR(val, QINVAL_INDEX_MASK) >= max_entry ) > + { > + vvtd_info("IQT: Value %lx exceeded supported max index.", val); > + return X86EMUL_OKAY; > + } > + > + vvtd_set_reg_quad(vvtd, DMAR_IQT_REG, val); > + vvtd_process_iq(vvtd); > + > + return X86EMUL_OKAY; > +} > + > +static int vvtd_write_iqa(struct vvtd *vvtd, unsigned long val) Same here, it seems like this function should return void, because the current return value is meaningless, and same comment about 'val' being uintXX_t. > +{ > + uint32_t cap = vvtd_get_reg(vvtd, DMAR_CAP_REG); > + unsigned int guest_max_addr_width = cap_mgaw(cap); > + > + if ( val & (~((1ULL << guest_max_addr_width) - 1) | DMA_IQA_RSVD) ) > + { > + vvtd_info("Attempt to set reserved bits in Invalidation Queue Address"); > + return X86EMUL_OKAY; > + } > + > + vvtd_set_reg_quad(vvtd, DMAR_IQA_REG, val); > + return X86EMUL_OKAY; > +} > + > +static int vvtd_write_ics(struct vvtd *vvtd, uint32_t val) > +{ > + if ( val & DMA_ICS_IWC ) > + { > + vvtd_clear_bit(vvtd, DMAR_ICS_REG, DMA_ICS_IWC_SHIFT); > + /*When IWC field is cleared, the IP field needs to be cleared */ ^ missing space. > + vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT); > + } > + return X86EMUL_OKAY; This function wants to be void. > +} > + > static int vvtd_write_frcd3(struct vvtd *vvtd, uint32_t val) > { > /* Writing a 1 means clear fault */ > @@ -430,6 +602,30 @@ static int vvtd_write_frcd3(struct vvtd *vvtd, uint32_t val) > return X86EMUL_OKAY; > } > > +static int vvtd_write_iectl(struct vvtd *vvtd, uint32_t val) void please. > +{ > + /* > + * Only DMA_IECTL_IM bit is writable. Generate pending event when unmask. > + */ Single line comments use /* ... */ > + if ( !(val & DMA_IECTL_IM) ) > + { > + /* Clear IM and clear IP */ > + vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IM_SHIFT); > + if ( vvtd_test_and_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT) ) > + { > + uint32_t ie_data, ie_addr; > + > + ie_data = vvtd_get_reg(vvtd, DMAR_IEDATA_REG); > + ie_addr = vvtd_get_reg(vvtd, DMAR_IEADDR_REG); > + vvtd_generate_interrupt(vvtd, ie_addr, ie_data); No need for ie_data and ie_addr. > + } > + } > + else > + vvtd_set_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IM_SHIFT); > + > + return X86EMUL_OKAY; > +} > + > static int vvtd_write_fectl(struct vvtd *vvtd, uint32_t val) > { > /* > @@ -476,6 +672,10 @@ static int vvtd_write_fsts(struct vvtd *vvtd, uint32_t val) > if ( !((vvtd_get_reg(vvtd, DMAR_FSTS_REG) & DMA_FSTS_FAULTS)) ) > vvtd_clear_bit(vvtd, DMAR_FECTL_REG, DMA_FECTL_IP_SHIFT); > > + /* Continue to deal invalidation when IQE is clear */ > + if ( !vvtd_test_bit(vvtd, DMAR_FSTS_REG, DMA_FSTS_IQE_SHIFT) ) > + vvtd_process_iq(vvtd); > + > return X86EMUL_OKAY; > } > > @@ -611,6 +811,32 @@ static int vvtd_write(struct vcpu *v, unsigned long addr, > case DMAR_FECTL_REG: > return vvtd_write_fectl(vvtd, val); > > + case DMAR_IECTL_REG: > + return vvtd_write_iectl(vvtd, val); > + > + case DMAR_ICS_REG: > + return vvtd_write_ics(vvtd, val); > + > + case DMAR_IQT_REG: > + return vvtd_write_iqt(vvtd, (uint32_t)val); > + > + case DMAR_IQA_REG: > + { > + uint32_t iqa_hi; > + > + iqa_hi = vvtd_get_reg(vvtd, DMAR_IQA_REG_HI); Initialization at declaration time, but since it's used only once, I would rather prefer that you don't use a local variable at all. > + return vvtd_write_iqa(vvtd, > + (uint32_t)val | ((uint64_t)iqa_hi << 32)); > + } > + > + case DMAR_IQA_REG_HI: > + { > + uint32_t iqa_lo; > + > + iqa_lo = vvtd_get_reg(vvtd, DMAR_IQA_REG); > + return vvtd_write_iqa(vvtd, (val << 32) | iqa_lo); Same comment as above regarding iqa_lo. Thanks, Roger.
On Fri, Oct 20, 2017 at 12:20:06PM +0100, Roger Pau Monné wrote: >On Thu, Sep 21, 2017 at 11:02:09PM -0400, Lan Tianyu wrote: >> From: Chao Gao <chao.gao@intel.com> >> >> Queued Invalidation Interface is an expanded invalidation interface with >> extended capabilities. Hardware implementations report support for queued >> invalidation interface through the Extended Capability Register. The queued >> invalidation interface uses an Invalidation Queue (IQ), which is a circular >> buffer in system memory. Software submits commands by writing Invalidation >> Descriptors to the IQ. >> >> In this patch, a new function viommu_process_iq() is used for emulating how >> hardware handles invalidation requests through QI. >> >> Signed-off-by: Chao Gao <chao.gao@intel.com> >> Signed-off-by: Lan Tianyu <tianyu.lan@intel.com> >> --- >> +static int process_iqe(struct vvtd *vvtd, int i) > >unsigned int. > >> +{ >> + uint64_t iqa; >> + struct qinval_entry *qinval_page; >> + int ret = 0; >> + >> + iqa = vvtd_get_reg_quad(vvtd, DMAR_IQA_REG); >> + qinval_page = map_guest_page(vvtd->domain, DMA_IQA_ADDR(iqa)>>PAGE_SHIFT); > >PFN_DOWN instead of open coding the shift. Both can be initialized >at declaration. Also AFAICT iqa is only used once, so the local >variable is not needed. > >> + if ( IS_ERR(qinval_page) ) >> + { >> + gdprintk(XENLOG_ERR, "Can't map guest IRT (rc %ld)", >> + PTR_ERR(qinval_page)); >> + return PTR_ERR(qinval_page); >> + } >> + >> + switch ( qinval_page[i].q.inv_wait_dsc.lo.type ) >> + { >> + case TYPE_INVAL_WAIT: >> + if ( qinval_page[i].q.inv_wait_dsc.lo.sw ) >> + { >> + uint32_t data = qinval_page[i].q.inv_wait_dsc.lo.sdata; >> + uint64_t addr = (qinval_page[i].q.inv_wait_dsc.hi.saddr << 2); > >Unneeded parentheses. > >> + >> + ret = hvm_copy_to_guest_phys(addr, &data, sizeof(data), current); >> + if ( ret ) >> + vvtd_info("Failed to write status address"); > >Don't you need to return or do something here? (like raise some kind >of error?) The 'addr' is programmed by guest. Here vvtd cannot finish this write for some reason (i.e. the 'addr' may be not in the guest physical memory space). According to VT-d spec 6.5.2.8 Invalidation Wait Descriptor, "Hardware behavior is undefined if the Status Address specified is not an address route-able to memory (such as peer address, interrupt address range of 0xFEEX_XXXX etc.) I think that Xen can just ignore it. I should use vvtd_debug() for it is guest triggerable. >> + if ( !vvtd_test_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IM_SHIFT) ) >> + { >> + ie_data = vvtd_get_reg(vvtd, DMAR_IEDATA_REG); >> + ie_addr = vvtd_get_reg(vvtd, DMAR_IEADDR_REG); >> + vvtd_generate_interrupt(vvtd, ie_addr, ie_data); > >...you don't seem two need the two local variables. They are used only >once. > >> + vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT); >> + } >> + } >> + } >> + break; >> + >> + case TYPE_INVAL_IEC: >> + /* >> + * Currently, no cache is preserved in hypervisor. Only need to update >> + * pIRTEs which are modified in binding process. >> + */ >> + break; >> + >> + default: >> + goto error; > >There's no reason to use a label that's only used for the default >case. Simply place the code in the error label here. > >> + } >> + >> + unmap_guest_page((void*)qinval_page); >> + return ret; >> + >> + error: >> + unmap_guest_page((void*)qinval_page); >> + gdprintk(XENLOG_ERR, "Internal error in Queue Invalidation.\n"); >> + domain_crash(vvtd->domain); > >Do you really need to crash the domain in such case? We reach here when guest requests some operations vvtd doesn't claim supported or emulated. I am afraid it also can be triggered by guest. How about ignoring the invalidation request? I will change the error message for it isn't internal error. Thanks Chao
On Mon, Oct 23, 2017 at 09:57:16AM +0100, Roger Pau Monné wrote: >On Mon, Oct 23, 2017 at 03:50:24PM +0800, Chao Gao wrote: >> On Fri, Oct 20, 2017 at 12:20:06PM +0100, Roger Pau Monné wrote: >> >On Thu, Sep 21, 2017 at 11:02:09PM -0400, Lan Tianyu wrote: >> >> From: Chao Gao <chao.gao@intel.com> >> >> + } >> >> + >> >> + unmap_guest_page((void*)qinval_page); >> >> + return ret; >> >> + >> >> + error: >> >> + unmap_guest_page((void*)qinval_page); >> >> + gdprintk(XENLOG_ERR, "Internal error in Queue Invalidation.\n"); >> >> + domain_crash(vvtd->domain); >> > >> >Do you really need to crash the domain in such case? >> >> We reach here when guest requests some operations vvtd doesn't claim >> supported or emulated. I am afraid it also can be triggered by guest. >> How about ignoring the invalidation request? > >What would real hardware do in such case? After reading the spec again, I think hardware may generate a fault event, seeing VT-d spec 10.4.9 Fault Status Register: Hardware detected an error associated with the invalidation queue. This could be due to either a hardware error while fetching a descriptor from the invalidation queue, or hardware detecting an erroneous or invalid descriptor in the invalidation queue. At this time, a fault event may be generated based on the programming of the Fault Event Control register Thanks Chao
On Mon, Oct 23, 2017 at 03:50:24PM +0800, Chao Gao wrote: > On Fri, Oct 20, 2017 at 12:20:06PM +0100, Roger Pau Monné wrote: > >On Thu, Sep 21, 2017 at 11:02:09PM -0400, Lan Tianyu wrote: > >> From: Chao Gao <chao.gao@intel.com> > >> + } > >> + > >> + unmap_guest_page((void*)qinval_page); > >> + return ret; > >> + > >> + error: > >> + unmap_guest_page((void*)qinval_page); > >> + gdprintk(XENLOG_ERR, "Internal error in Queue Invalidation.\n"); > >> + domain_crash(vvtd->domain); > > > >Do you really need to crash the domain in such case? > > We reach here when guest requests some operations vvtd doesn't claim > supported or emulated. I am afraid it also can be triggered by guest. > How about ignoring the invalidation request? What would real hardware do in such case? Roger.
> From: Gao, Chao > Sent: Monday, October 23, 2017 4:52 PM > > On Mon, Oct 23, 2017 at 09:57:16AM +0100, Roger Pau Monné wrote: > >On Mon, Oct 23, 2017 at 03:50:24PM +0800, Chao Gao wrote: > >> On Fri, Oct 20, 2017 at 12:20:06PM +0100, Roger Pau Monné wrote: > >> >On Thu, Sep 21, 2017 at 11:02:09PM -0400, Lan Tianyu wrote: > >> >> From: Chao Gao <chao.gao@intel.com> > >> >> + } > >> >> + > >> >> + unmap_guest_page((void*)qinval_page); > >> >> + return ret; > >> >> + > >> >> + error: > >> >> + unmap_guest_page((void*)qinval_page); > >> >> + gdprintk(XENLOG_ERR, "Internal error in Queue Invalidation.\n"); > >> >> + domain_crash(vvtd->domain); > >> > > >> >Do you really need to crash the domain in such case? > >> > >> We reach here when guest requests some operations vvtd doesn't claim > >> supported or emulated. I am afraid it also can be triggered by guest. > >> How about ignoring the invalidation request? > > > >What would real hardware do in such case? > > After reading the spec again, I think hardware may generate a fault > event, seeing VT-d spec 10.4.9 Fault Status Register: > Hardware detected an error associated with the invalidation queue. This > could be due to either a hardware error while fetching a descriptor from > the invalidation queue, or hardware detecting an erroneous or invalid > descriptor in the invalidation queue. At this time, a fault event may be > generated based on the programming of the Fault Event Control register > Please do proper emulation according to hardware behavior.
diff --git a/xen/drivers/passthrough/vtd/iommu.h b/xen/drivers/passthrough/vtd/iommu.h index c69cd21..c2b83f1 100644 --- a/xen/drivers/passthrough/vtd/iommu.h +++ b/xen/drivers/passthrough/vtd/iommu.h @@ -177,6 +177,21 @@ #define DMA_IRTA_S(val) (val & 0xf) #define DMA_IRTA_SIZE(val) (1UL << (DMA_IRTA_S(val) + 1)) +/* IQA_REG */ +#define DMA_IQA_ADDR(val) (val & ~0xfffULL) +#define DMA_IQA_QS(val) (val & 0x7) +#define DMA_IQA_RSVD 0xff8ULL + +/* IECTL_REG */ +#define DMA_IECTL_IM_SHIFT 31 +#define DMA_IECTL_IM (1 << DMA_IECTL_IM_SHIFT) +#define DMA_IECTL_IP_SHIFT 30 +#define DMA_IECTL_IP (1 << DMA_IECTL_IP_SHIFT) + +/* ICS_REG */ +#define DMA_ICS_IWC_SHIFT 0 +#define DMA_ICS_IWC (1 << DMA_ICS_IWC_SHIFT) + /* PMEN_REG */ #define DMA_PMEN_EPM (((u32)1) << 31) #define DMA_PMEN_PRS (((u32)1) << 0) @@ -211,7 +226,8 @@ #define DMA_FSTS_PPF (1U << DMA_FSTS_PPF_SHIFT) #define DMA_FSTS_AFO (1U << 2) #define DMA_FSTS_APF (1U << 3) -#define DMA_FSTS_IQE (1U << 4) +#define DMA_FSTS_IQE_SHIFT 4 +#define DMA_FSTS_IQE (1U << DMA_FSTS_IQE_SHIFT) #define DMA_FSTS_ICE (1U << 5) #define DMA_FSTS_ITE (1U << 6) #define DMA_FSTS_PRO_SHIFT 7 @@ -562,6 +578,7 @@ struct qinval_entry { /* Queue invalidation head/tail shift */ #define QINVAL_INDEX_SHIFT 4 +#define QINVAL_INDEX_MASK 0x7fff0ULL #define qinval_present(v) ((v).lo & 1) #define qinval_fault_disable(v) (((v).lo >> 1) & 1) diff --git a/xen/drivers/passthrough/vtd/vvtd.c b/xen/drivers/passthrough/vtd/vvtd.c index 55f7a46..668d0c9 100644 --- a/xen/drivers/passthrough/vtd/vvtd.c +++ b/xen/drivers/passthrough/vtd/vvtd.c @@ -28,6 +28,7 @@ #include <asm/current.h> #include <asm/event.h> #include <asm/hvm/domain.h> +#include <asm/hvm/support.h> #include <asm/io_apic.h> #include <asm/page.h> #include <asm/p2m.h> @@ -419,6 +420,177 @@ static int vvtd_record_fault(struct vvtd *vvtd, return X86EMUL_OKAY; } +/* + * Process a invalidation descriptor. Currently, only two types descriptors, + * Interrupt Entry Cache Invalidation Descritor and Invalidation Wait + * Descriptor are handled. + * @vvtd: the virtual vtd instance + * @i: the index of the invalidation descriptor to be processed + * + * If success return 0, or return non-zero when failure. + */ +static int process_iqe(struct vvtd *vvtd, int i) +{ + uint64_t iqa; + struct qinval_entry *qinval_page; + int ret = 0; + + iqa = vvtd_get_reg_quad(vvtd, DMAR_IQA_REG); + qinval_page = map_guest_page(vvtd->domain, DMA_IQA_ADDR(iqa)>>PAGE_SHIFT); + if ( IS_ERR(qinval_page) ) + { + gdprintk(XENLOG_ERR, "Can't map guest IRT (rc %ld)", + PTR_ERR(qinval_page)); + return PTR_ERR(qinval_page); + } + + switch ( qinval_page[i].q.inv_wait_dsc.lo.type ) + { + case TYPE_INVAL_WAIT: + if ( qinval_page[i].q.inv_wait_dsc.lo.sw ) + { + uint32_t data = qinval_page[i].q.inv_wait_dsc.lo.sdata; + uint64_t addr = (qinval_page[i].q.inv_wait_dsc.hi.saddr << 2); + + ret = hvm_copy_to_guest_phys(addr, &data, sizeof(data), current); + if ( ret ) + vvtd_info("Failed to write status address"); + } + + /* + * The following code generates an invalidation completion event + * indicating the invalidation wait descriptor completion. Note that + * the following code fragment is not tested properly. + */ + if ( qinval_page[i].q.inv_wait_dsc.lo.iflag ) + { + uint32_t ie_data, ie_addr; + if ( !vvtd_test_and_set_bit(vvtd, DMAR_ICS_REG, DMA_ICS_IWC_SHIFT) ) + { + vvtd_set_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT); + if ( !vvtd_test_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IM_SHIFT) ) + { + ie_data = vvtd_get_reg(vvtd, DMAR_IEDATA_REG); + ie_addr = vvtd_get_reg(vvtd, DMAR_IEADDR_REG); + vvtd_generate_interrupt(vvtd, ie_addr, ie_data); + vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT); + } + } + } + break; + + case TYPE_INVAL_IEC: + /* + * Currently, no cache is preserved in hypervisor. Only need to update + * pIRTEs which are modified in binding process. + */ + break; + + default: + goto error; + } + + unmap_guest_page((void*)qinval_page); + return ret; + + error: + unmap_guest_page((void*)qinval_page); + gdprintk(XENLOG_ERR, "Internal error in Queue Invalidation.\n"); + domain_crash(vvtd->domain); + return ret; +} + +/* + * Invalidate all the descriptors in Invalidation Queue. + */ +static void vvtd_process_iq(struct vvtd *vvtd) +{ + uint64_t iqh, iqt, iqa, max_entry, i; + int err = 0; + + /* + * No new descriptor is fetched from the Invalidation Queue until + * software clears the IQE field in the Fault Status Register + */ + if ( vvtd_test_bit(vvtd, DMAR_FSTS_REG, DMA_FSTS_IQE_SHIFT) ) + return; + + iqh = vvtd_get_reg_quad(vvtd, DMAR_IQH_REG); + iqt = vvtd_get_reg_quad(vvtd, DMAR_IQT_REG); + iqa = vvtd_get_reg_quad(vvtd, DMAR_IQA_REG); + + max_entry = 1 << (QINVAL_ENTRY_ORDER + DMA_IQA_QS(iqa)); + iqh = MASK_EXTR(iqh, QINVAL_INDEX_MASK); + iqt = MASK_EXTR(iqt, QINVAL_INDEX_MASK); + + ASSERT(iqt < max_entry); + if ( iqh == iqt ) + return; + + for ( i = iqh; i != iqt; i = (i + 1) % max_entry ) + { + err = process_iqe(vvtd, i); + if ( err ) + break; + } + vvtd_set_reg_quad(vvtd, DMAR_IQH_REG, i << QINVAL_INDEX_SHIFT); + + /* + * When IQE set, IQH references the desriptor associated with the error. + */ + if ( err ) + vvtd_report_non_recoverable_fault(vvtd, DMA_FSTS_IQE_SHIFT); +} + +static int vvtd_write_iqt(struct vvtd *vvtd, unsigned long val) +{ + uint64_t max_entry, iqa = vvtd_get_reg_quad(vvtd, DMAR_IQA_REG); + + if ( val & ~QINVAL_INDEX_MASK ) + { + vvtd_info("Attempt to set reserved bits in Invalidation Queue Tail"); + return X86EMUL_OKAY; + } + + max_entry = 1 << (QINVAL_ENTRY_ORDER + DMA_IQA_QS(iqa)); + if ( MASK_EXTR(val, QINVAL_INDEX_MASK) >= max_entry ) + { + vvtd_info("IQT: Value %lx exceeded supported max index.", val); + return X86EMUL_OKAY; + } + + vvtd_set_reg_quad(vvtd, DMAR_IQT_REG, val); + vvtd_process_iq(vvtd); + + return X86EMUL_OKAY; +} + +static int vvtd_write_iqa(struct vvtd *vvtd, unsigned long val) +{ + uint32_t cap = vvtd_get_reg(vvtd, DMAR_CAP_REG); + unsigned int guest_max_addr_width = cap_mgaw(cap); + + if ( val & (~((1ULL << guest_max_addr_width) - 1) | DMA_IQA_RSVD) ) + { + vvtd_info("Attempt to set reserved bits in Invalidation Queue Address"); + return X86EMUL_OKAY; + } + + vvtd_set_reg_quad(vvtd, DMAR_IQA_REG, val); + return X86EMUL_OKAY; +} + +static int vvtd_write_ics(struct vvtd *vvtd, uint32_t val) +{ + if ( val & DMA_ICS_IWC ) + { + vvtd_clear_bit(vvtd, DMAR_ICS_REG, DMA_ICS_IWC_SHIFT); + /*When IWC field is cleared, the IP field needs to be cleared */ + vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT); + } + return X86EMUL_OKAY; +} + static int vvtd_write_frcd3(struct vvtd *vvtd, uint32_t val) { /* Writing a 1 means clear fault */ @@ -430,6 +602,30 @@ static int vvtd_write_frcd3(struct vvtd *vvtd, uint32_t val) return X86EMUL_OKAY; } +static int vvtd_write_iectl(struct vvtd *vvtd, uint32_t val) +{ + /* + * Only DMA_IECTL_IM bit is writable. Generate pending event when unmask. + */ + if ( !(val & DMA_IECTL_IM) ) + { + /* Clear IM and clear IP */ + vvtd_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IM_SHIFT); + if ( vvtd_test_and_clear_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IP_SHIFT) ) + { + uint32_t ie_data, ie_addr; + + ie_data = vvtd_get_reg(vvtd, DMAR_IEDATA_REG); + ie_addr = vvtd_get_reg(vvtd, DMAR_IEADDR_REG); + vvtd_generate_interrupt(vvtd, ie_addr, ie_data); + } + } + else + vvtd_set_bit(vvtd, DMAR_IECTL_REG, DMA_IECTL_IM_SHIFT); + + return X86EMUL_OKAY; +} + static int vvtd_write_fectl(struct vvtd *vvtd, uint32_t val) { /* @@ -476,6 +672,10 @@ static int vvtd_write_fsts(struct vvtd *vvtd, uint32_t val) if ( !((vvtd_get_reg(vvtd, DMAR_FSTS_REG) & DMA_FSTS_FAULTS)) ) vvtd_clear_bit(vvtd, DMAR_FECTL_REG, DMA_FECTL_IP_SHIFT); + /* Continue to deal invalidation when IQE is clear */ + if ( !vvtd_test_bit(vvtd, DMAR_FSTS_REG, DMA_FSTS_IQE_SHIFT) ) + vvtd_process_iq(vvtd); + return X86EMUL_OKAY; } @@ -611,6 +811,32 @@ static int vvtd_write(struct vcpu *v, unsigned long addr, case DMAR_FECTL_REG: return vvtd_write_fectl(vvtd, val); + case DMAR_IECTL_REG: + return vvtd_write_iectl(vvtd, val); + + case DMAR_ICS_REG: + return vvtd_write_ics(vvtd, val); + + case DMAR_IQT_REG: + return vvtd_write_iqt(vvtd, (uint32_t)val); + + case DMAR_IQA_REG: + { + uint32_t iqa_hi; + + iqa_hi = vvtd_get_reg(vvtd, DMAR_IQA_REG_HI); + return vvtd_write_iqa(vvtd, + (uint32_t)val | ((uint64_t)iqa_hi << 32)); + } + + case DMAR_IQA_REG_HI: + { + uint32_t iqa_lo; + + iqa_lo = vvtd_get_reg(vvtd, DMAR_IQA_REG); + return vvtd_write_iqa(vvtd, (val << 32) | iqa_lo); + } + case DMAR_IEDATA_REG: case DMAR_IEADDR_REG: case DMAR_IEUADDR_REG: @@ -637,6 +863,12 @@ static int vvtd_write(struct vcpu *v, unsigned long addr, vvtd_set_reg_quad(vvtd, DMAR_IRTA_REG, val); break; + case DMAR_IQT_REG: + return vvtd_write_iqt(vvtd, val); + + case DMAR_IQA_REG: + return vvtd_write_iqa(vvtd, val); + default: if ( offset == fault_offset + DMA_FRCD2_OFFSET ) return vvtd_write_frcd3(vvtd, val >> 32);