diff mbox

[V3,12/29] x86/vvtd: Add MMIO handler for VVTD

Message ID 1506049330-11196-13-git-send-email-tianyu.lan@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

lan,Tianyu Sept. 22, 2017, 3:01 a.m. UTC
From: Chao Gao <chao.gao@intel.com>

This patch adds VVTD MMIO handler to deal with MMIO access.

Signed-off-by: Chao Gao <chao.gao@intel.com>
Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
---
 xen/drivers/passthrough/vtd/vvtd.c | 91 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)

Comments

Roger Pau Monné Oct. 19, 2017, 11:34 a.m. UTC | #1
On Thu, Sep 21, 2017 at 11:01:53PM -0400, Lan Tianyu wrote:
> From: Chao Gao <chao.gao@intel.com>
> 
> This patch adds VVTD MMIO handler to deal with MMIO access.
> 
> Signed-off-by: Chao Gao <chao.gao@intel.com>
> Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
> ---
>  xen/drivers/passthrough/vtd/vvtd.c | 91 ++++++++++++++++++++++++++++++++++++++
>  1 file changed, 91 insertions(+)
> 
> diff --git a/xen/drivers/passthrough/vtd/vvtd.c b/xen/drivers/passthrough/vtd/vvtd.c
> index c851ec7..a3002c3 100644
> --- a/xen/drivers/passthrough/vtd/vvtd.c
> +++ b/xen/drivers/passthrough/vtd/vvtd.c
> @@ -47,6 +47,29 @@ struct vvtd {
>      struct page_info *regs_page;
>  };
>  
> +/* Setting viommu_verbose enables debugging messages of vIOMMU */
> +bool __read_mostly viommu_verbose;
> +boolean_runtime_param("viommu_verbose", viommu_verbose);
> +
> +#ifndef NDEBUG
> +#define vvtd_info(fmt...) do {                    \
> +    if ( viommu_verbose )                         \
> +        gprintk(XENLOG_G_INFO, ## fmt);           \

If you use gprintk you should use XENLOG_INFO, the '_G_' variants are
only used with plain printk.

> +} while(0)
> +#define vvtd_debug(fmt...) do {                   \
> +    if ( viommu_verbose && printk_ratelimit() )   \

Not sure why you need printk_ratelimit, XENLOG_G_DEBUG is already
rate-limited.

> +        printk(XENLOG_G_DEBUG fmt);               \

Any reason why vvtd_info uses gprintk and here you use printk?

> +} while(0)
> +#else
> +#define vvtd_info(fmt...) do {} while(0)
> +#define vvtd_debug(fmt...) do {} while(0)

No need for 'fmt...' just '...' will suffice since you are discarding
the parameters anyway.

> +#endif
> +
> +struct vvtd *domain_vvtd(struct domain *d)
> +{
> +    return (d->viommu) ? d->viommu->priv : NULL;

Unneeded parentheses around d->viommu.

Also, it seems wring to call domain_vvtd with !d->viommu. So I think
this helper should just be removed, and d->viommu->priv fetched
directly.

> +}
> +
>  static inline void vvtd_set_reg(struct vvtd *vtd, uint32_t reg, uint32_t value)
>  {
>      vtd->regs->data32[reg/sizeof(uint32_t)] = value;
> @@ -68,6 +91,73 @@ static inline uint64_t vvtd_get_reg_quad(struct vvtd *vtd, uint32_t reg)
>      return vtd->regs->data64[reg/sizeof(uint64_t)];
>  }
>  
> +static int vvtd_in_range(struct vcpu *v, unsigned long addr)
> +{
> +    struct vvtd *vvtd = domain_vvtd(v->domain);
> +
> +    if ( vvtd )
> +        return (addr >= vvtd->base_addr) &&
> +               (addr < vvtd->base_addr + PAGE_SIZE);

So the register set covers a PAGE_SIZE, but hvm_hw_vvtd_regs only
covers from 0 to 1024B, it seems like there's something wrong here...

> +    return 0;
> +}
> +
> +static int vvtd_read(struct vcpu *v, unsigned long addr,
> +                     unsigned int len, unsigned long *pval)
> +{
> +    struct vvtd *vvtd = domain_vvtd(v->domain);
> +    unsigned int offset = addr - vvtd->base_addr;
> +
> +    vvtd_info("Read offset %x len %d\n", offset, len);
> +
> +    if ( (len != 4 && len != 8) || (offset & (len - 1)) )

What value does hardware return when performing unaligned reads or
reads with wrong size?

Here you return with pval not set, which is dangerous.

> +        return X86EMUL_OKAY;
> +
> +    if ( len == 4 )
> +        *pval = vvtd_get_reg(vvtd, offset);
> +    else
> +        *pval = vvtd_get_reg_quad(vvtd, offset);

...yet here you don't check for offset < 1024.

> +
> +    return X86EMUL_OKAY;
> +}
> +
> +static int vvtd_write(struct vcpu *v, unsigned long addr,
> +                      unsigned int len, unsigned long val)
> +{
> +    struct vvtd *vvtd = domain_vvtd(v->domain);
> +    unsigned int offset = addr - vvtd->base_addr;
> +
> +    vvtd_info("Write offset %x len %d val %lx\n", offset, len, val);
> +
> +    if ( (len != 4 && len != 8) || (offset & (len - 1)) )
> +        return X86EMUL_OKAY;
> +
> +    if ( len == 4 )
> +    {
> +        switch ( offset )
> +        {
> +        case DMAR_IEDATA_REG:
> +        case DMAR_IEADDR_REG:
> +        case DMAR_IEUADDR_REG:
> +        case DMAR_FEDATA_REG:
> +        case DMAR_FEADDR_REG:
> +        case DMAR_FEUADDR_REG:
> +            vvtd_set_reg(vvtd, offset, val);

Hm, so you are using a full page when you only care for 6 4B
registers? Seem like quite of a waste of memory.

Thanks, Roger.
Chao Gao Oct. 20, 2017, 2:58 a.m. UTC | #2
On Thu, Oct 19, 2017 at 12:34:54PM +0100, Roger Pau Monné wrote:
>On Thu, Sep 21, 2017 at 11:01:53PM -0400, Lan Tianyu wrote:
>> From: Chao Gao <chao.gao@intel.com>
>> 
>> This patch adds VVTD MMIO handler to deal with MMIO access.
>> 
>> Signed-off-by: Chao Gao <chao.gao@intel.com>
>> Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
>> ---
>>  xen/drivers/passthrough/vtd/vvtd.c | 91 ++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 91 insertions(+)
>> 
>> diff --git a/xen/drivers/passthrough/vtd/vvtd.c b/xen/drivers/passthrough/vtd/vvtd.c
>> index c851ec7..a3002c3 100644
>> --- a/xen/drivers/passthrough/vtd/vvtd.c
>> +++ b/xen/drivers/passthrough/vtd/vvtd.c
>> @@ -47,6 +47,29 @@ struct vvtd {
>>      struct page_info *regs_page;
>>  };
>>  
>> +/* Setting viommu_verbose enables debugging messages of vIOMMU */
>> +bool __read_mostly viommu_verbose;
>> +boolean_runtime_param("viommu_verbose", viommu_verbose);
>> +
>> +#ifndef NDEBUG
>> +#define vvtd_info(fmt...) do {                    \
>> +    if ( viommu_verbose )                         \
>> +        gprintk(XENLOG_G_INFO, ## fmt);           \
>
>If you use gprintk you should use XENLOG_INFO, the '_G_' variants are
>only used with plain printk.
>
>> +} while(0)
>> +#define vvtd_debug(fmt...) do {                   \
>> +    if ( viommu_verbose && printk_ratelimit() )   \
>
>Not sure why you need printk_ratelimit, XENLOG_G_DEBUG is already
>rate-limited.
>
>> +        printk(XENLOG_G_DEBUG fmt);               \
>
>Any reason why vvtd_info uses gprintk and here you use printk?
>
>> +} while(0)
>> +#else
>> +#define vvtd_info(fmt...) do {} while(0)
>> +#define vvtd_debug(fmt...) do {} while(0)
>
>No need for 'fmt...' just '...' will suffice since you are discarding
>the parameters anyway.
>
>> +#endif
>> +
>> +struct vvtd *domain_vvtd(struct domain *d)
>> +{
>> +    return (d->viommu) ? d->viommu->priv : NULL;
>
>Unneeded parentheses around d->viommu.
>
>Also, it seems wring to call domain_vvtd with !d->viommu. So I think
>this helper should just be removed, and d->viommu->priv fetched
>directly.
>
>> +}
>> +
>>  static inline void vvtd_set_reg(struct vvtd *vtd, uint32_t reg, uint32_t value)
>>  {
>>      vtd->regs->data32[reg/sizeof(uint32_t)] = value;
>> @@ -68,6 +91,73 @@ static inline uint64_t vvtd_get_reg_quad(struct vvtd *vtd, uint32_t reg)
>>      return vtd->regs->data64[reg/sizeof(uint64_t)];
>>  }
>>  
>> +static int vvtd_in_range(struct vcpu *v, unsigned long addr)
>> +{
>> +    struct vvtd *vvtd = domain_vvtd(v->domain);
>> +
>> +    if ( vvtd )
>> +        return (addr >= vvtd->base_addr) &&
>> +               (addr < vvtd->base_addr + PAGE_SIZE);
>
>So the register set covers a PAGE_SIZE, but hvm_hw_vvtd_regs only
>covers from 0 to 1024B, it seems like there's something wrong here...
>
>> +    return 0;
>> +}
>> +
>> +static int vvtd_read(struct vcpu *v, unsigned long addr,
>> +                     unsigned int len, unsigned long *pval)
>> +{
>> +    struct vvtd *vvtd = domain_vvtd(v->domain);
>> +    unsigned int offset = addr - vvtd->base_addr;
>> +
>> +    vvtd_info("Read offset %x len %d\n", offset, len);
>> +
>> +    if ( (len != 4 && len != 8) || (offset & (len - 1)) )
>
>What value does hardware return when performing unaligned reads or
>reads with wrong size?

According to VT-d spec section 10.2, "Software must access 64-bit and
128-bit registers as either aligned quadwords or aligned doublewords".
I am afraid there is no specific hardware action for unaligned access
information. We can treat it as undefined? Then do nothing.
But I did see windows driver has such accesses. We need to add a
workaround for windows later.

>
>Here you return with pval not set, which is dangerous.

Indeed. But I need check whether the pval is initialized by the caller.
If that, it is safe.

>
>> +        return X86EMUL_OKAY;
>> +
>> +    if ( len == 4 )
>> +        *pval = vvtd_get_reg(vvtd, offset);
>> +    else
>> +        *pval = vvtd_get_reg_quad(vvtd, offset);
>
>...yet here you don't check for offset < 1024.
>
>> +
>> +    return X86EMUL_OKAY;
>> +}
>> +
>> +static int vvtd_write(struct vcpu *v, unsigned long addr,
>> +                      unsigned int len, unsigned long val)
>> +{
>> +    struct vvtd *vvtd = domain_vvtd(v->domain);
>> +    unsigned int offset = addr - vvtd->base_addr;
>> +
>> +    vvtd_info("Write offset %x len %d val %lx\n", offset, len, val);
>> +
>> +    if ( (len != 4 && len != 8) || (offset & (len - 1)) )
>> +        return X86EMUL_OKAY;
>> +
>> +    if ( len == 4 )
>> +    {
>> +        switch ( offset )
>> +        {
>> +        case DMAR_IEDATA_REG:
>> +        case DMAR_IEADDR_REG:
>> +        case DMAR_IEUADDR_REG:
>> +        case DMAR_FEDATA_REG:
>> +        case DMAR_FEADDR_REG:
>> +        case DMAR_FEUADDR_REG:
>> +            vvtd_set_reg(vvtd, offset, val);
>
>Hm, so you are using a full page when you only care for 6 4B
>registers? Seem like quite of a waste of memory.

Registers are added here when according features are introduced.

Thanks
Chao
Roger Pau Monné Oct. 20, 2017, 9:51 a.m. UTC | #3
On Fri, Oct 20, 2017 at 10:58:32AM +0800, Chao Gao wrote:
> On Thu, Oct 19, 2017 at 12:34:54PM +0100, Roger Pau Monné wrote:
> >On Thu, Sep 21, 2017 at 11:01:53PM -0400, Lan Tianyu wrote:
> >> +    return 0;
> >> +}
> >> +
> >> +static int vvtd_read(struct vcpu *v, unsigned long addr,
> >> +                     unsigned int len, unsigned long *pval)
> >> +{
> >> +    struct vvtd *vvtd = domain_vvtd(v->domain);
> >> +    unsigned int offset = addr - vvtd->base_addr;
> >> +
> >> +    vvtd_info("Read offset %x len %d\n", offset, len);
> >> +
> >> +    if ( (len != 4 && len != 8) || (offset & (len - 1)) )
> >
> >What value does hardware return when performing unaligned reads or
> >reads with wrong size?
> 
> According to VT-d spec section 10.2, "Software must access 64-bit and
> 128-bit registers as either aligned quadwords or aligned doublewords".
> I am afraid there is no specific hardware action for unaligned access
> information. We can treat it as undefined? Then do nothing.
> But I did see windows driver has such accesses. We need to add a
> workaround for windows later.

I would recommend that you do *pval = ~0ul; in that case then.

> >
> >Here you return with pval not set, which is dangerous.
> 
> Indeed. But I need check whether the pval is initialized by the caller.
> If that, it is safe.

Yes, this was recently fixed as part of a XSA, but I would rather
prefer to set pval here in the error case.

> >
> >> +        return X86EMUL_OKAY;
> >> +
> >> +    if ( len == 4 )
> >> +        *pval = vvtd_get_reg(vvtd, offset);
> >> +    else
> >> +        *pval = vvtd_get_reg_quad(vvtd, offset);
> >
> >...yet here you don't check for offset < 1024.
> >
> >> +
> >> +    return X86EMUL_OKAY;
> >> +}
> >> +
> >> +static int vvtd_write(struct vcpu *v, unsigned long addr,
> >> +                      unsigned int len, unsigned long val)
> >> +{
> >> +    struct vvtd *vvtd = domain_vvtd(v->domain);
> >> +    unsigned int offset = addr - vvtd->base_addr;
> >> +
> >> +    vvtd_info("Write offset %x len %d val %lx\n", offset, len, val);
> >> +
> >> +    if ( (len != 4 && len != 8) || (offset & (len - 1)) )
> >> +        return X86EMUL_OKAY;
> >> +
> >> +    if ( len == 4 )
> >> +    {
> >> +        switch ( offset )
> >> +        {
> >> +        case DMAR_IEDATA_REG:
> >> +        case DMAR_IEADDR_REG:
> >> +        case DMAR_IEUADDR_REG:
> >> +        case DMAR_FEDATA_REG:
> >> +        case DMAR_FEADDR_REG:
> >> +        case DMAR_FEUADDR_REG:
> >> +            vvtd_set_reg(vvtd, offset, val);
> >
> >Hm, so you are using a full page when you only care for 6 4B
> >registers? Seem like quite of a waste of memory.
> 
> Registers are added here when according features are introduced.

Even at the end of the series it doesn't seem like you are adding
support to 256 registers. From the code it seems like you allow writes
to 16 4B registers, and although you allow read access to all the
register space it seems quite dubious that you need 256 registers.
Hence the question about trying to minimize memory usage to what's
really needed.

Roger.
diff mbox

Patch

diff --git a/xen/drivers/passthrough/vtd/vvtd.c b/xen/drivers/passthrough/vtd/vvtd.c
index c851ec7..a3002c3 100644
--- a/xen/drivers/passthrough/vtd/vvtd.c
+++ b/xen/drivers/passthrough/vtd/vvtd.c
@@ -47,6 +47,29 @@  struct vvtd {
     struct page_info *regs_page;
 };
 
+/* Setting viommu_verbose enables debugging messages of vIOMMU */
+bool __read_mostly viommu_verbose;
+boolean_runtime_param("viommu_verbose", viommu_verbose);
+
+#ifndef NDEBUG
+#define vvtd_info(fmt...) do {                    \
+    if ( viommu_verbose )                         \
+        gprintk(XENLOG_G_INFO, ## fmt);           \
+} while(0)
+#define vvtd_debug(fmt...) do {                   \
+    if ( viommu_verbose && printk_ratelimit() )   \
+        printk(XENLOG_G_DEBUG fmt);               \
+} while(0)
+#else
+#define vvtd_info(fmt...) do {} while(0)
+#define vvtd_debug(fmt...) do {} while(0)
+#endif
+
+struct vvtd *domain_vvtd(struct domain *d)
+{
+    return (d->viommu) ? d->viommu->priv : NULL;
+}
+
 static inline void vvtd_set_reg(struct vvtd *vtd, uint32_t reg, uint32_t value)
 {
     vtd->regs->data32[reg/sizeof(uint32_t)] = value;
@@ -68,6 +91,73 @@  static inline uint64_t vvtd_get_reg_quad(struct vvtd *vtd, uint32_t reg)
     return vtd->regs->data64[reg/sizeof(uint64_t)];
 }
 
+static int vvtd_in_range(struct vcpu *v, unsigned long addr)
+{
+    struct vvtd *vvtd = domain_vvtd(v->domain);
+
+    if ( vvtd )
+        return (addr >= vvtd->base_addr) &&
+               (addr < vvtd->base_addr + PAGE_SIZE);
+    return 0;
+}
+
+static int vvtd_read(struct vcpu *v, unsigned long addr,
+                     unsigned int len, unsigned long *pval)
+{
+    struct vvtd *vvtd = domain_vvtd(v->domain);
+    unsigned int offset = addr - vvtd->base_addr;
+
+    vvtd_info("Read offset %x len %d\n", offset, len);
+
+    if ( (len != 4 && len != 8) || (offset & (len - 1)) )
+        return X86EMUL_OKAY;
+
+    if ( len == 4 )
+        *pval = vvtd_get_reg(vvtd, offset);
+    else
+        *pval = vvtd_get_reg_quad(vvtd, offset);
+
+    return X86EMUL_OKAY;
+}
+
+static int vvtd_write(struct vcpu *v, unsigned long addr,
+                      unsigned int len, unsigned long val)
+{
+    struct vvtd *vvtd = domain_vvtd(v->domain);
+    unsigned int offset = addr - vvtd->base_addr;
+
+    vvtd_info("Write offset %x len %d val %lx\n", offset, len, val);
+
+    if ( (len != 4 && len != 8) || (offset & (len - 1)) )
+        return X86EMUL_OKAY;
+
+    if ( len == 4 )
+    {
+        switch ( offset )
+        {
+        case DMAR_IEDATA_REG:
+        case DMAR_IEADDR_REG:
+        case DMAR_IEUADDR_REG:
+        case DMAR_FEDATA_REG:
+        case DMAR_FEADDR_REG:
+        case DMAR_FEUADDR_REG:
+            vvtd_set_reg(vvtd, offset, val);
+            break;
+
+        default:
+            break;
+        }
+    }
+
+    return X86EMUL_OKAY;
+}
+
+static const struct hvm_mmio_ops vvtd_mmio_ops = {
+    .check = vvtd_in_range,
+    .read = vvtd_read,
+    .write = vvtd_write
+};
+
 static void vvtd_reset(struct vvtd *vvtd, uint64_t capability)
 {
     uint64_t cap = cap_set_num_fault_regs(1ULL) |
@@ -109,6 +199,7 @@  static int vvtd_create(struct domain *d, struct viommu *viommu)
     vvtd_reset(vvtd, viommu->caps);
     vvtd->base_addr = viommu->base_address;
     vvtd->domain = d;
+    register_mmio_handler(d, &vvtd_mmio_ops);
 
     viommu->priv = vvtd;