diff mbox

[V2,11/25] x86/hvm: Introduce a emulated VTD for HVM

Message ID 1502310866-10450-12-git-send-email-tianyu.lan@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

lan,Tianyu Aug. 9, 2017, 8:34 p.m. UTC
From: Chao Gao <chao.gao@intel.com>

This patch adds create/destroy/query function for the emulated VTD
and adapts it to the common VIOMMU abstraction.

Signed-off-by: Chao Gao <chao.gao@intel.com>
Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
---
 xen/drivers/passthrough/vtd/Makefile |   7 +-
 xen/drivers/passthrough/vtd/iommu.h  |  99 +++++++++++++++++-----
 xen/drivers/passthrough/vtd/vvtd.c   | 158 +++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/viommu.h         |   3 +
 4 files changed, 241 insertions(+), 26 deletions(-)
 create mode 100644 xen/drivers/passthrough/vtd/vvtd.c

Comments

Roger Pau Monné Aug. 23, 2017, 7:58 a.m. UTC | #1
On Wed, Aug 09, 2017 at 04:34:12PM -0400, Lan Tianyu wrote:
> From: Chao Gao <chao.gao@intel.com>
> 
> This patch adds create/destroy/query function for the emulated VTD
> and adapts it to the common VIOMMU abstraction.
> 
> Signed-off-by: Chao Gao <chao.gao@intel.com>
> Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
> ---
>  xen/drivers/passthrough/vtd/Makefile |   7 +-
>  xen/drivers/passthrough/vtd/iommu.h  |  99 +++++++++++++++++-----
>  xen/drivers/passthrough/vtd/vvtd.c   | 158 +++++++++++++++++++++++++++++++++++
>  xen/include/asm-x86/viommu.h         |   3 +
>  4 files changed, 241 insertions(+), 26 deletions(-)
>  create mode 100644 xen/drivers/passthrough/vtd/vvtd.c
> 
> diff --git a/xen/drivers/passthrough/vtd/Makefile b/xen/drivers/passthrough/vtd/Makefile
> index f302653..163c7fe 100644
> --- a/xen/drivers/passthrough/vtd/Makefile
> +++ b/xen/drivers/passthrough/vtd/Makefile
> @@ -1,8 +1,9 @@
>  subdir-$(CONFIG_X86) += x86
>  
> -obj-y += iommu.o
>  obj-y += dmar.o
> -obj-y += utils.o
> -obj-y += qinval.o
>  obj-y += intremap.o
> +obj-y += iommu.o
> +obj-y += qinval.o
>  obj-y += quirks.o
> +obj-y += utils.o
> +obj-$(CONFIG_VIOMMU) += vvtd.o
> diff --git a/xen/drivers/passthrough/vtd/iommu.h b/xen/drivers/passthrough/vtd/iommu.h
> index 72c1a2e..55f3b6e 100644
> --- a/xen/drivers/passthrough/vtd/iommu.h
> +++ b/xen/drivers/passthrough/vtd/iommu.h
> @@ -23,31 +23,54 @@
>  #include <asm/msi.h>
>  
>  /*
> - * Intel IOMMU register specification per version 1.0 public spec.
> + * Intel IOMMU register specification per version 2.4 public spec.
>   */
>  
> -#define    DMAR_VER_REG    0x0    /* Arch version supported by this IOMMU */
> -#define    DMAR_CAP_REG    0x8    /* Hardware supported capabilities */
> -#define    DMAR_ECAP_REG    0x10    /* Extended capabilities supported */
> -#define    DMAR_GCMD_REG    0x18    /* Global command register */
> -#define    DMAR_GSTS_REG    0x1c    /* Global status register */
> -#define    DMAR_RTADDR_REG    0x20    /* Root entry table */
> -#define    DMAR_CCMD_REG    0x28    /* Context command reg */
> -#define    DMAR_FSTS_REG    0x34    /* Fault Status register */
> -#define    DMAR_FECTL_REG    0x38    /* Fault control register */
> -#define    DMAR_FEDATA_REG    0x3c    /* Fault event interrupt data register */
> -#define    DMAR_FEADDR_REG    0x40    /* Fault event interrupt addr register */
> -#define    DMAR_FEUADDR_REG 0x44    /* Upper address register */
> -#define    DMAR_AFLOG_REG    0x58    /* Advanced Fault control */
> -#define    DMAR_PMEN_REG    0x64    /* Enable Protected Memory Region */
> -#define    DMAR_PLMBASE_REG 0x68    /* PMRR Low addr */
> -#define    DMAR_PLMLIMIT_REG 0x6c    /* PMRR low limit */
> -#define    DMAR_PHMBASE_REG 0x70    /* pmrr high base addr */
> -#define    DMAR_PHMLIMIT_REG 0x78    /* pmrr high limit */
> -#define    DMAR_IQH_REG    0x80    /* invalidation queue head */
> -#define    DMAR_IQT_REG    0x88    /* invalidation queue tail */
> -#define    DMAR_IQA_REG    0x90    /* invalidation queue addr */
> -#define    DMAR_IRTA_REG   0xB8    /* intr remap */
> +#define DMAR_VER_REG            0x0  /* Arch version supported by this IOMMU */
> +#define DMAR_CAP_REG            0x8  /* Hardware supported capabilities */
> +#define DMAR_ECAP_REG           0x10 /* Extended capabilities supported */
> +#define DMAR_GCMD_REG           0x18 /* Global command register */
> +#define DMAR_GSTS_REG           0x1c /* Global status register */
> +#define DMAR_RTADDR_REG         0x20 /* Root entry table */
> +#define DMAR_CCMD_REG           0x28 /* Context command reg */
> +#define DMAR_FSTS_REG           0x34 /* Fault Status register */
> +#define DMAR_FECTL_REG          0x38 /* Fault control register */
> +#define DMAR_FEDATA_REG         0x3c /* Fault event interrupt data register */
> +#define DMAR_FEADDR_REG         0x40 /* Fault event interrupt addr register */
> +#define DMAR_FEUADDR_REG        0x44 /* Upper address register */
> +#define DMAR_AFLOG_REG          0x58 /* Advanced Fault control */
> +#define DMAR_PMEN_REG           0x64 /* Enable Protected Memory Region */
> +#define DMAR_PLMBASE_REG        0x68 /* PMRR Low addr */
> +#define DMAR_PLMLIMIT_REG       0x6c /* PMRR low limit */
> +#define DMAR_PHMBASE_REG        0x70 /* pmrr high base addr */
> +#define DMAR_PHMLIMIT_REG       0x78 /* pmrr high limit */
> +#define DMAR_IQH_REG            0x80 /* invalidation queue head */
> +#define DMAR_IQT_REG            0x88 /* invalidation queue tail */
> +#define DMAR_IQT_REG_HI         0x8c
> +#define DMAR_IQA_REG            0x90 /* invalidation queue addr */
> +#define DMAR_IQA_REG_HI         0x94
> +#define DMAR_ICS_REG            0x9c /* Invalidation complete status */
> +#define DMAR_IECTL_REG          0xa0 /* Invalidation event control */
> +#define DMAR_IEDATA_REG         0xa4 /* Invalidation event data */
> +#define DMAR_IEADDR_REG         0xa8 /* Invalidation event address */
> +#define DMAR_IEUADDR_REG        0xac /* Invalidation event address */
> +#define DMAR_IRTA_REG           0xb8 /* Interrupt remapping table addr */
> +#define DMAR_IRTA_REG_HI        0xbc
> +#define DMAR_PQH_REG            0xc0 /* Page request queue head */
> +#define DMAR_PQH_REG_HI         0xc4
> +#define DMAR_PQT_REG            0xc8 /* Page request queue tail*/
> +#define DMAR_PQT_REG_HI         0xcc
> +#define DMAR_PQA_REG            0xd0 /* Page request queue address */
> +#define DMAR_PQA_REG_HI         0xd4
> +#define DMAR_PRS_REG            0xdc /* Page request status */
> +#define DMAR_PECTL_REG          0xe0 /* Page request event control */
> +#define DMAR_PEDATA_REG         0xe4 /* Page request event data */
> +#define DMAR_PEADDR_REG         0xe8 /* Page request event address */
> +#define DMAR_PEUADDR_REG        0xec /* Page event upper address */
> +#define DMAR_MTRRCAP_REG        0x100 /* MTRR capability */
> +#define DMAR_MTRRCAP_REG_HI     0x104
> +#define DMAR_MTRRDEF_REG        0x108 /* MTRR default type */
> +#define DMAR_MTRRDEF_REG_HI     0x10c
>  
>  #define OFFSET_STRIDE        (9)
>  #define dmar_readl(dmar, reg) readl((dmar) + (reg))
> @@ -58,6 +81,30 @@
>  #define VER_MAJOR(v)        (((v) & 0xf0) >> 4)
>  #define VER_MINOR(v)        ((v) & 0x0f)
>  
> +/* CAP_REG */
> +#define DMA_DOMAIN_ID_SHIFT         16  /* 16-bit domain id for 64K domains */
> +#define DMA_DOMAIN_ID_MASK          ((1UL << DMA_DOMAIN_ID_SHIFT) - 1)
> +#define DMA_CAP_ND                  (((DMA_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL)
> +#define DMA_MGAW                    39  /* Maximum Guest Address Width */
> +#define DMA_CAP_MGAW                (((DMA_MGAW - 1) & 0x3fULL) << 16)
> +#define DMA_MAMV                    18ULL
> +#define DMA_CAP_MAMV                (DMA_MAMV << 48)
> +#define DMA_CAP_PSI                 (1ULL << 39)
> +#define DMA_CAP_SLLPS               ((1ULL << 34) | (1ULL << 35))
> +#define DMA_FRCD_REG_NR             1ULL
> +#define DMA_CAP_NFR                 ((DMA_FRCD_REG_NR - 1) << 40)
> +#define DMA_CAP_FRO_OFFSET          0x220ULL
> +#define DMA_CAP_FRO                 (DMA_CAP_FRO_OFFSET << 20)
> +
> +/* Supported Adjusted Guest Address Widths */
> +#define DMA_CAP_SAGAW_SHIFT         8
> +#define DMA_CAP_SAGAW_MASK          (0x1fULL << DMA_CAP_SAGAW_SHIFT)
> + /* 39-bit AGAW, 3-level page-table */
> +#define DMA_CAP_SAGAW_39bit         (0x2ULL << DMA_CAP_SAGAW_SHIFT)
> + /* 48-bit AGAW, 4-level page-table */
> +#define DMA_CAP_SAGAW_48bit         (0x4ULL << DMA_CAP_SAGAW_SHIFT)
> +#define DMA_CAP_SAGAW               DMA_CAP_SAGAW_39bit
> +
>  /*
>   * Decoding Capability Register
>   */
> @@ -89,6 +136,12 @@
>  #define cap_afl(c)        (((c) >> 3) & 1)
>  #define cap_ndoms(c)        (1 << (4 + 2 * ((c) & 0x7)))
>  
> +/* ECAP_REG */
> +#define DMA_ECAP_QI                 (1ULL << 1)
> +#define DMA_ECAP_IR                 (1ULL << 3)
> +#define DMA_ECAP_EIM                (1ULL << 4)
> +#define DMA_ECAP_MHMV               (15ULL << 20)

Wow, what's this chunk above? The description only mentions adding
functions for the VDT IOMMU implementation, yet there seems to be some
code movement here. Please split it into a separate patch.

>  /*
>   * Extended Capability Register
>   */
> diff --git a/xen/drivers/passthrough/vtd/vvtd.c b/xen/drivers/passthrough/vtd/vvtd.c
> new file mode 100644
> index 0000000..353fafe
> --- /dev/null
> +++ b/xen/drivers/passthrough/vtd/vvtd.c
> @@ -0,0 +1,158 @@
> +/*
> + * vvtd.c
> + *
> + * virtualize VTD for HVM.
> + *
> + * Copyright (C) 2017 Chao Gao, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms and conditions of the GNU General Public
> + * License, version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public
> + * License along with this program; If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <xen/domain_page.h>
> +#include <xen/sched.h>
> +#include <xen/types.h>
> +#include <xen/viommu.h>
> +#include <xen/xmalloc.h>
> +#include <asm/current.h>
> +#include <asm/hvm/domain.h>
> +#include <asm/page.h>
> +
> +#include "iommu.h"
> +
> +struct hvm_hw_vvtd_regs {
> +    uint8_t data[1024];
> +};
> +
> +/* Status field of struct vvtd */
> +#define VIOMMU_STATUS_DEFAULT                   (0)
> +#define VIOMMU_STATUS_IRQ_REMAPPING_ENABLED     (1 << 0)
> +#define VIOMMU_STATUS_DMA_REMAPPING_ENABLED     (1 << 1)
> +
> +struct vvtd {
> +    /* VIOMMU_STATUS_XXX */
> +    int status;

unsigned int if it's a bitfield.

> +    /* Address range of remapping hardware register-set */
> +    uint64_t base_addr;
> +    uint64_t length;
> +    /* Point back to the owner domain */
> +    struct domain *domain;
> +    struct hvm_hw_vvtd_regs *regs;
> +    struct page_info *regs_page;
> +};
> +
> +static inline void vvtd_set_reg(struct vvtd *vtd, uint32_t reg,
> +                                uint32_t value)
> +{
> +    *((uint32_t *)(&vtd->regs->data[reg])) = value;
> +}
> +
> +static inline uint32_t vvtd_get_reg(struct vvtd *vtd, uint32_t reg)
> +{
> +    return *((uint32_t *)(&vtd->regs->data[reg]));
> +}
> +
> +static inline uint8_t vvtd_get_reg_byte(struct vvtd *vtd, uint32_t reg)
> +{
> +    return *((uint8_t *)(&vtd->regs->data[reg]));

You don't need castings here, data is already an array of
uint8_t.

> +}
> +
> +#define vvtd_get_reg_quad(vvtd, reg, val) do {  \
> +    (val) = vvtd_get_reg(vvtd, (reg) + 4 );     \
> +    (val) = (val) << 32;                        \
> +    (val) += vvtd_get_reg(vvtd, reg);           \
> +} while(0)
> +#define vvtd_set_reg_quad(vvtd, reg, val) do {  \
> +    vvtd_set_reg(vvtd, reg, (val));             \
> +    vvtd_set_reg(vvtd, (reg) + 4, (val) >> 32); \
> +} while(0)

You seem to need to access hvm_hw_vvtd_regs using different sizes, why
not do:

union hvm_hw_vvtd_regs {
    uint8_t  data8[1024];
    uint16_t data16[512];
    uint32_t data32[256];
    uint64_t data64[128];
};

Then the access is much more straightforward and you don't need the
complicated helpers that you have above.

> +static void vvtd_reset(struct vvtd *vvtd, uint64_t capability)
> +{
> +    uint64_t cap = DMA_CAP_NFR | DMA_CAP_SLLPS | DMA_CAP_FRO |
> +                   DMA_CAP_MGAW | DMA_CAP_SAGAW | DMA_CAP_ND;
> +    uint64_t ecap = DMA_ECAP_IR | DMA_ECAP_EIM | DMA_ECAP_QI;
> +
> +    vvtd_set_reg(vvtd, DMAR_VER_REG, 0x10UL);
> +    vvtd_set_reg_quad(vvtd, DMAR_CAP_REG, cap);
> +    vvtd_set_reg_quad(vvtd, DMAR_ECAP_REG, ecap);
> +    vvtd_set_reg(vvtd, DMAR_FECTL_REG, 0x80000000UL);
> +    vvtd_set_reg(vvtd, DMAR_IECTL_REG, 0x80000000UL);
> +}
> +
> +static u64 vvtd_query_caps(struct domain *d)
> +{
> +    return VIOMMU_CAP_IRQ_REMAPPING;
> +}
> +
> +static int vvtd_create(struct domain *d, struct viommu *viommu)
> +{
> +    struct vvtd *vvtd;
> +    int ret;
> +
> +    if ( !is_hvm_domain(d) || (viommu->length != PAGE_SIZE) ||
> +        (~vvtd_query_caps(d) & viommu->caps) )
> +        return -EINVAL;
> +
> +    ret = -ENOMEM;
> +    vvtd = xmalloc_bytes(sizeof(struct vvtd));
> +    if ( !vvtd )
> +        return ret;
> +
> +    vvtd->regs_page = alloc_domheap_page(d, MEMF_no_owner);
> +    if ( !vvtd->regs_page )
> +        goto out1;
> +
> +    vvtd->regs = __map_domain_page_global(vvtd->regs_page);
> +    if ( !vvtd->regs )
> +        goto out2;
> +    clear_page(vvtd->regs);
> +
> +    vvtd_reset(vvtd, viommu->caps);
> +    vvtd->base_addr = viommu->base_address;

Don't you need to perform any checks on the base_address? It needs to
be page aligned at least.

Roger.
lan,Tianyu Aug. 24, 2017, 2:16 a.m. UTC | #2
On 2017年08月23日 15:58, Roger Pau Monné wrote:
> On Wed, Aug 09, 2017 at 04:34:12PM -0400, Lan Tianyu wrote:
>> From: Chao Gao <chao.gao@intel.com>
>>
>> This patch adds create/destroy/query function for the emulated VTD
>> and adapts it to the common VIOMMU abstraction.
>>
>> Signed-off-by: Chao Gao <chao.gao@intel.com>
>> Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
>> ---
>>  xen/drivers/passthrough/vtd/Makefile |   7 +-
>>  xen/drivers/passthrough/vtd/iommu.h  |  99 +++++++++++++++++-----
>>  xen/drivers/passthrough/vtd/vvtd.c   | 158 +++++++++++++++++++++++++++++++++++
>>  xen/include/asm-x86/viommu.h         |   3 +
>>  4 files changed, 241 insertions(+), 26 deletions(-)
>>  create mode 100644 xen/drivers/passthrough/vtd/vvtd.c
>>
>> diff --git a/xen/drivers/passthrough/vtd/Makefile b/xen/drivers/passthrough/vtd/Makefile
>> index f302653..163c7fe 100644
>> --- a/xen/drivers/passthrough/vtd/Makefile
>> +++ b/xen/drivers/passthrough/vtd/Makefile
>> @@ -1,8 +1,9 @@
>>  subdir-$(CONFIG_X86) += x86
>>  
>> -obj-y += iommu.o
>>  obj-y += dmar.o
>> -obj-y += utils.o
>> -obj-y += qinval.o
>>  obj-y += intremap.o
>> +obj-y += iommu.o
>> +obj-y += qinval.o
>>  obj-y += quirks.o
>> +obj-y += utils.o
>> +obj-$(CONFIG_VIOMMU) += vvtd.o
>> diff --git a/xen/drivers/passthrough/vtd/iommu.h b/xen/drivers/passthrough/vtd/iommu.h
>> index 72c1a2e..55f3b6e 100644
>> --- a/xen/drivers/passthrough/vtd/iommu.h
>> +++ b/xen/drivers/passthrough/vtd/iommu.h
>> @@ -23,31 +23,54 @@
>>  #include <asm/msi.h>
>>  
>>  /*
>> - * Intel IOMMU register specification per version 1.0 public spec.
>> + * Intel IOMMU register specification per version 2.4 public spec.
>>   */
>>  
>> -#define    DMAR_VER_REG    0x0    /* Arch version supported by this IOMMU */
>> -#define    DMAR_CAP_REG    0x8    /* Hardware supported capabilities */
>> -#define    DMAR_ECAP_REG    0x10    /* Extended capabilities supported */
>> -#define    DMAR_GCMD_REG    0x18    /* Global command register */
>> -#define    DMAR_GSTS_REG    0x1c    /* Global status register */
>> -#define    DMAR_RTADDR_REG    0x20    /* Root entry table */
>> -#define    DMAR_CCMD_REG    0x28    /* Context command reg */
>> -#define    DMAR_FSTS_REG    0x34    /* Fault Status register */
>> -#define    DMAR_FECTL_REG    0x38    /* Fault control register */
>> -#define    DMAR_FEDATA_REG    0x3c    /* Fault event interrupt data register */
>> -#define    DMAR_FEADDR_REG    0x40    /* Fault event interrupt addr register */
>> -#define    DMAR_FEUADDR_REG 0x44    /* Upper address register */
>> -#define    DMAR_AFLOG_REG    0x58    /* Advanced Fault control */
>> -#define    DMAR_PMEN_REG    0x64    /* Enable Protected Memory Region */
>> -#define    DMAR_PLMBASE_REG 0x68    /* PMRR Low addr */
>> -#define    DMAR_PLMLIMIT_REG 0x6c    /* PMRR low limit */
>> -#define    DMAR_PHMBASE_REG 0x70    /* pmrr high base addr */
>> -#define    DMAR_PHMLIMIT_REG 0x78    /* pmrr high limit */
>> -#define    DMAR_IQH_REG    0x80    /* invalidation queue head */
>> -#define    DMAR_IQT_REG    0x88    /* invalidation queue tail */
>> -#define    DMAR_IQA_REG    0x90    /* invalidation queue addr */
>> -#define    DMAR_IRTA_REG   0xB8    /* intr remap */
>> +#define DMAR_VER_REG            0x0  /* Arch version supported by this IOMMU */
>> +#define DMAR_CAP_REG            0x8  /* Hardware supported capabilities */
>> +#define DMAR_ECAP_REG           0x10 /* Extended capabilities supported */
>> +#define DMAR_GCMD_REG           0x18 /* Global command register */
>> +#define DMAR_GSTS_REG           0x1c /* Global status register */
>> +#define DMAR_RTADDR_REG         0x20 /* Root entry table */
>> +#define DMAR_CCMD_REG           0x28 /* Context command reg */
>> +#define DMAR_FSTS_REG           0x34 /* Fault Status register */
>> +#define DMAR_FECTL_REG          0x38 /* Fault control register */
>> +#define DMAR_FEDATA_REG         0x3c /* Fault event interrupt data register */
>> +#define DMAR_FEADDR_REG         0x40 /* Fault event interrupt addr register */
>> +#define DMAR_FEUADDR_REG        0x44 /* Upper address register */
>> +#define DMAR_AFLOG_REG          0x58 /* Advanced Fault control */
>> +#define DMAR_PMEN_REG           0x64 /* Enable Protected Memory Region */
>> +#define DMAR_PLMBASE_REG        0x68 /* PMRR Low addr */
>> +#define DMAR_PLMLIMIT_REG       0x6c /* PMRR low limit */
>> +#define DMAR_PHMBASE_REG        0x70 /* pmrr high base addr */
>> +#define DMAR_PHMLIMIT_REG       0x78 /* pmrr high limit */
>> +#define DMAR_IQH_REG            0x80 /* invalidation queue head */
>> +#define DMAR_IQT_REG            0x88 /* invalidation queue tail */
>> +#define DMAR_IQT_REG_HI         0x8c
>> +#define DMAR_IQA_REG            0x90 /* invalidation queue addr */
>> +#define DMAR_IQA_REG_HI         0x94
>> +#define DMAR_ICS_REG            0x9c /* Invalidation complete status */
>> +#define DMAR_IECTL_REG          0xa0 /* Invalidation event control */
>> +#define DMAR_IEDATA_REG         0xa4 /* Invalidation event data */
>> +#define DMAR_IEADDR_REG         0xa8 /* Invalidation event address */
>> +#define DMAR_IEUADDR_REG        0xac /* Invalidation event address */
>> +#define DMAR_IRTA_REG           0xb8 /* Interrupt remapping table addr */
>> +#define DMAR_IRTA_REG_HI        0xbc
>> +#define DMAR_PQH_REG            0xc0 /* Page request queue head */
>> +#define DMAR_PQH_REG_HI         0xc4
>> +#define DMAR_PQT_REG            0xc8 /* Page request queue tail*/
>> +#define DMAR_PQT_REG_HI         0xcc
>> +#define DMAR_PQA_REG            0xd0 /* Page request queue address */
>> +#define DMAR_PQA_REG_HI         0xd4
>> +#define DMAR_PRS_REG            0xdc /* Page request status */
>> +#define DMAR_PECTL_REG          0xe0 /* Page request event control */
>> +#define DMAR_PEDATA_REG         0xe4 /* Page request event data */
>> +#define DMAR_PEADDR_REG         0xe8 /* Page request event address */
>> +#define DMAR_PEUADDR_REG        0xec /* Page event upper address */
>> +#define DMAR_MTRRCAP_REG        0x100 /* MTRR capability */
>> +#define DMAR_MTRRCAP_REG_HI     0x104
>> +#define DMAR_MTRRDEF_REG        0x108 /* MTRR default type */
>> +#define DMAR_MTRRDEF_REG_HI     0x10c
>>  
>>  #define OFFSET_STRIDE        (9)
>>  #define dmar_readl(dmar, reg) readl((dmar) + (reg))
>> @@ -58,6 +81,30 @@
>>  #define VER_MAJOR(v)        (((v) & 0xf0) >> 4)
>>  #define VER_MINOR(v)        ((v) & 0x0f)
>>  
>> +/* CAP_REG */
>> +#define DMA_DOMAIN_ID_SHIFT         16  /* 16-bit domain id for 64K domains */
>> +#define DMA_DOMAIN_ID_MASK          ((1UL << DMA_DOMAIN_ID_SHIFT) - 1)
>> +#define DMA_CAP_ND                  (((DMA_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL)
>> +#define DMA_MGAW                    39  /* Maximum Guest Address Width */
>> +#define DMA_CAP_MGAW                (((DMA_MGAW - 1) & 0x3fULL) << 16)
>> +#define DMA_MAMV                    18ULL
>> +#define DMA_CAP_MAMV                (DMA_MAMV << 48)
>> +#define DMA_CAP_PSI                 (1ULL << 39)
>> +#define DMA_CAP_SLLPS               ((1ULL << 34) | (1ULL << 35))
>> +#define DMA_FRCD_REG_NR             1ULL
>> +#define DMA_CAP_NFR                 ((DMA_FRCD_REG_NR - 1) << 40)
>> +#define DMA_CAP_FRO_OFFSET          0x220ULL
>> +#define DMA_CAP_FRO                 (DMA_CAP_FRO_OFFSET << 20)
>> +
>> +/* Supported Adjusted Guest Address Widths */
>> +#define DMA_CAP_SAGAW_SHIFT         8
>> +#define DMA_CAP_SAGAW_MASK          (0x1fULL << DMA_CAP_SAGAW_SHIFT)
>> + /* 39-bit AGAW, 3-level page-table */
>> +#define DMA_CAP_SAGAW_39bit         (0x2ULL << DMA_CAP_SAGAW_SHIFT)
>> + /* 48-bit AGAW, 4-level page-table */
>> +#define DMA_CAP_SAGAW_48bit         (0x4ULL << DMA_CAP_SAGAW_SHIFT)
>> +#define DMA_CAP_SAGAW               DMA_CAP_SAGAW_39bit
>> +
>>  /*
>>   * Decoding Capability Register
>>   */
>> @@ -89,6 +136,12 @@
>>  #define cap_afl(c)        (((c) >> 3) & 1)
>>  #define cap_ndoms(c)        (1 << (4 + 2 * ((c) & 0x7)))
>>  
>> +/* ECAP_REG */
>> +#define DMA_ECAP_QI                 (1ULL << 1)
>> +#define DMA_ECAP_IR                 (1ULL << 3)
>> +#define DMA_ECAP_EIM                (1ULL << 4)
>> +#define DMA_ECAP_MHMV               (15ULL << 20)
> 
> Wow, what's this chunk above? The description only mentions adding
> functions for the VDT IOMMU implementation, yet there seems to be some
> code movement here. Please split it into a separate patch.
> 

Sure. Will split.

>>  /*
>>   * Extended Capability Register
>>   */
>> diff --git a/xen/drivers/passthrough/vtd/vvtd.c b/xen/drivers/passthrough/vtd/vvtd.c
>> new file mode 100644
>> index 0000000..353fafe
>> --- /dev/null
>> +++ b/xen/drivers/passthrough/vtd/vvtd.c
>> @@ -0,0 +1,158 @@
>> +/*
>> + * vvtd.c
>> + *
>> + * virtualize VTD for HVM.
>> + *
>> + * Copyright (C) 2017 Chao Gao, Intel Corporation.
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms and conditions of the GNU General Public
>> + * License, version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU General Public
>> + * License along with this program; If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +
>> +#include <xen/domain_page.h>
>> +#include <xen/sched.h>
>> +#include <xen/types.h>
>> +#include <xen/viommu.h>
>> +#include <xen/xmalloc.h>
>> +#include <asm/current.h>
>> +#include <asm/hvm/domain.h>
>> +#include <asm/page.h>
>> +
>> +#include "iommu.h"
>> +
>> +struct hvm_hw_vvtd_regs {
>> +    uint8_t data[1024];
>> +};
>> +
>> +/* Status field of struct vvtd */
>> +#define VIOMMU_STATUS_DEFAULT                   (0)
>> +#define VIOMMU_STATUS_IRQ_REMAPPING_ENABLED     (1 << 0)
>> +#define VIOMMU_STATUS_DMA_REMAPPING_ENABLED     (1 << 1)
>> +
>> +struct vvtd {
>> +    /* VIOMMU_STATUS_XXX */
>> +    int status;
> 
> unsigned int if it's a bitfield.
> 
>> +    /* Address range of remapping hardware register-set */
>> +    uint64_t base_addr;
>> +    uint64_t length;
>> +    /* Point back to the owner domain */
>> +    struct domain *domain;
>> +    struct hvm_hw_vvtd_regs *regs;
>> +    struct page_info *regs_page;
>> +};
>> +
>> +static inline void vvtd_set_reg(struct vvtd *vtd, uint32_t reg,
>> +                                uint32_t value)
>> +{
>> +    *((uint32_t *)(&vtd->regs->data[reg])) = value;
>> +}
>> +
>> +static inline uint32_t vvtd_get_reg(struct vvtd *vtd, uint32_t reg)
>> +{
>> +    return *((uint32_t *)(&vtd->regs->data[reg]));
>> +}
>> +
>> +static inline uint8_t vvtd_get_reg_byte(struct vvtd *vtd, uint32_t reg)
>> +{
>> +    return *((uint8_t *)(&vtd->regs->data[reg]));
> 
> You don't need castings here, data is already an array of
> uint8_t.
> 
>> +}
>> +
>> +#define vvtd_get_reg_quad(vvtd, reg, val) do {  \
>> +    (val) = vvtd_get_reg(vvtd, (reg) + 4 );     \
>> +    (val) = (val) << 32;                        \
>> +    (val) += vvtd_get_reg(vvtd, reg);           \
>> +} while(0)
>> +#define vvtd_set_reg_quad(vvtd, reg, val) do {  \
>> +    vvtd_set_reg(vvtd, reg, (val));             \
>> +    vvtd_set_reg(vvtd, (reg) + 4, (val) >> 32); \
>> +} while(0)
> 
> You seem to need to access hvm_hw_vvtd_regs using different sizes, why
> not do:
> 
> union hvm_hw_vvtd_regs {
>     uint8_t  data8[1024];
>     uint16_t data16[512];
>     uint32_t data32[256];
>     uint64_t data64[128];
> };
> 
> Then the access is much more straightforward and you don't need the
> complicated helpers that you have above.

Yes, that will be simpler.

> 
>> +static void vvtd_reset(struct vvtd *vvtd, uint64_t capability)
>> +{
>> +    uint64_t cap = DMA_CAP_NFR | DMA_CAP_SLLPS | DMA_CAP_FRO |
>> +                   DMA_CAP_MGAW | DMA_CAP_SAGAW | DMA_CAP_ND;
>> +    uint64_t ecap = DMA_ECAP_IR | DMA_ECAP_EIM | DMA_ECAP_QI;
>> +
>> +    vvtd_set_reg(vvtd, DMAR_VER_REG, 0x10UL);
>> +    vvtd_set_reg_quad(vvtd, DMAR_CAP_REG, cap);
>> +    vvtd_set_reg_quad(vvtd, DMAR_ECAP_REG, ecap);
>> +    vvtd_set_reg(vvtd, DMAR_FECTL_REG, 0x80000000UL);
>> +    vvtd_set_reg(vvtd, DMAR_IECTL_REG, 0x80000000UL);
>> +}
>> +
>> +static u64 vvtd_query_caps(struct domain *d)
>> +{
>> +    return VIOMMU_CAP_IRQ_REMAPPING;
>> +}
>> +
>> +static int vvtd_create(struct domain *d, struct viommu *viommu)
>> +{
>> +    struct vvtd *vvtd;
>> +    int ret;
>> +
>> +    if ( !is_hvm_domain(d) || (viommu->length != PAGE_SIZE) ||
>> +        (~vvtd_query_caps(d) & viommu->caps) )
>> +        return -EINVAL;
>> +
>> +    ret = -ENOMEM;
>> +    vvtd = xmalloc_bytes(sizeof(struct vvtd));
>> +    if ( !vvtd )
>> +        return ret;
>> +
>> +    vvtd->regs_page = alloc_domheap_page(d, MEMF_no_owner);
>> +    if ( !vvtd->regs_page )
>> +        goto out1;
>> +
>> +    vvtd->regs = __map_domain_page_global(vvtd->regs_page);
>> +    if ( !vvtd->regs )
>> +        goto out2;
>> +    clear_page(vvtd->regs);
>> +
>> +    vvtd_reset(vvtd, viommu->caps);
>> +    vvtd->base_addr = viommu->base_address;
> 
> Don't you need to perform any checks on the base_address? It needs to
> be page aligned at least.

Yes, it should be check.
Roger Pau Monné Aug. 24, 2017, 8:49 a.m. UTC | #3
On Thu, Aug 24, 2017 at 10:16:32AM +0800, Lan Tianyu wrote:
> On 2017年08月23日 15:58, Roger Pau Monné wrote:
> > On Wed, Aug 09, 2017 at 04:34:12PM -0400, Lan Tianyu wrote:
> >> From: Chao Gao <chao.gao@intel.com>
> >> +}
> >> +
> >> +#define vvtd_get_reg_quad(vvtd, reg, val) do {  \
> >> +    (val) = vvtd_get_reg(vvtd, (reg) + 4 );     \
> >> +    (val) = (val) << 32;                        \
> >> +    (val) += vvtd_get_reg(vvtd, reg);           \
> >> +} while(0)
> >> +#define vvtd_set_reg_quad(vvtd, reg, val) do {  \
> >> +    vvtd_set_reg(vvtd, reg, (val));             \
> >> +    vvtd_set_reg(vvtd, (reg) + 4, (val) >> 32); \
> >> +} while(0)
> > 
> > You seem to need to access hvm_hw_vvtd_regs using different sizes, why
> > not do:
> > 
> > union hvm_hw_vvtd_regs {
> >     uint8_t  data8[1024];
> >     uint16_t data16[512];
> >     uint32_t data32[256];
> >     uint64_t data64[128];
> > };
> > 
> > Then the access is much more straightforward and you don't need the
> > complicated helpers that you have above.
> 
> Yes, that will be simpler.

Keep in mind (as said in another patch) that this approach will only
work correctly as long as you force accesses to be size aligned, which
you where not doing now.

I've looked at the VT-d spec, but I cannot find any section that
explains the restrictions on access sizes and alignments.

Roger.
lan,Tianyu Aug. 24, 2017, 8:54 a.m. UTC | #4
On 2017年08月24日 16:49, Roger Pau Monné wrote:
> On Thu, Aug 24, 2017 at 10:16:32AM +0800, Lan Tianyu wrote:
>> On 2017年08月23日 15:58, Roger Pau Monné wrote:
>>> On Wed, Aug 09, 2017 at 04:34:12PM -0400, Lan Tianyu wrote:
>>>> From: Chao Gao <chao.gao@intel.com>
>>>> +}
>>>> +
>>>> +#define vvtd_get_reg_quad(vvtd, reg, val) do {  \
>>>> +    (val) = vvtd_get_reg(vvtd, (reg) + 4 );     \
>>>> +    (val) = (val) << 32;                        \
>>>> +    (val) += vvtd_get_reg(vvtd, reg);           \
>>>> +} while(0)
>>>> +#define vvtd_set_reg_quad(vvtd, reg, val) do {  \
>>>> +    vvtd_set_reg(vvtd, reg, (val));             \
>>>> +    vvtd_set_reg(vvtd, (reg) + 4, (val) >> 32); \
>>>> +} while(0)
>>>
>>> You seem to need to access hvm_hw_vvtd_regs using different sizes, why
>>> not do:
>>>
>>> union hvm_hw_vvtd_regs {
>>>     uint8_t  data8[1024];
>>>     uint16_t data16[512];
>>>     uint32_t data32[256];
>>>     uint64_t data64[128];
>>> };
>>>
>>> Then the access is much more straightforward and you don't need the
>>> complicated helpers that you have above.
>>
>> Yes, that will be simpler.
> 
> Keep in mind (as said in another patch) that this approach will only
> work correctly as long as you force accesses to be size aligned, which
> you where not doing now.
> 
> I've looked at the VT-d spec, but I cannot find any section that
> explains the restrictions on access sizes and alignments.
> 

10.2 Software Access to Registers?
Roger Pau Monné Aug. 24, 2017, 9:02 a.m. UTC | #5
On Thu, Aug 24, 2017 at 04:54:25PM +0800, Lan Tianyu wrote:
> On 2017年08月24日 16:49, Roger Pau Monné wrote:
> > On Thu, Aug 24, 2017 at 10:16:32AM +0800, Lan Tianyu wrote:
> >> On 2017年08月23日 15:58, Roger Pau Monné wrote:
> >>> On Wed, Aug 09, 2017 at 04:34:12PM -0400, Lan Tianyu wrote:
> >>>> From: Chao Gao <chao.gao@intel.com>
> >>>> +}
> >>>> +
> >>>> +#define vvtd_get_reg_quad(vvtd, reg, val) do {  \
> >>>> +    (val) = vvtd_get_reg(vvtd, (reg) + 4 );     \
> >>>> +    (val) = (val) << 32;                        \
> >>>> +    (val) += vvtd_get_reg(vvtd, reg);           \
> >>>> +} while(0)
> >>>> +#define vvtd_set_reg_quad(vvtd, reg, val) do {  \
> >>>> +    vvtd_set_reg(vvtd, reg, (val));             \
> >>>> +    vvtd_set_reg(vvtd, (reg) + 4, (val) >> 32); \
> >>>> +} while(0)
> >>>
> >>> You seem to need to access hvm_hw_vvtd_regs using different sizes, why
> >>> not do:
> >>>
> >>> union hvm_hw_vvtd_regs {
> >>>     uint8_t  data8[1024];
> >>>     uint16_t data16[512];
> >>>     uint32_t data32[256];
> >>>     uint64_t data64[128];
> >>> };
> >>>
> >>> Then the access is much more straightforward and you don't need the
> >>> complicated helpers that you have above.
> >>
> >> Yes, that will be simpler.
> > 
> > Keep in mind (as said in another patch) that this approach will only
> > work correctly as long as you force accesses to be size aligned, which
> > you where not doing now.
> > 
> > I've looked at the VT-d spec, but I cannot find any section that
> > explains the restrictions on access sizes and alignments.
> > 
> 
> 10.2 Software Access to Registers?

Oh, thanks. I was grepping for "access sizes".

So yes only size aligned accesses are allowed. You should fix the
patch where you check the access size/alignment so it checks for (addr
& (len - 1)) (IIRC you where checking addr & 3).

Roger.
diff mbox

Patch

diff --git a/xen/drivers/passthrough/vtd/Makefile b/xen/drivers/passthrough/vtd/Makefile
index f302653..163c7fe 100644
--- a/xen/drivers/passthrough/vtd/Makefile
+++ b/xen/drivers/passthrough/vtd/Makefile
@@ -1,8 +1,9 @@ 
 subdir-$(CONFIG_X86) += x86
 
-obj-y += iommu.o
 obj-y += dmar.o
-obj-y += utils.o
-obj-y += qinval.o
 obj-y += intremap.o
+obj-y += iommu.o
+obj-y += qinval.o
 obj-y += quirks.o
+obj-y += utils.o
+obj-$(CONFIG_VIOMMU) += vvtd.o
diff --git a/xen/drivers/passthrough/vtd/iommu.h b/xen/drivers/passthrough/vtd/iommu.h
index 72c1a2e..55f3b6e 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -23,31 +23,54 @@ 
 #include <asm/msi.h>
 
 /*
- * Intel IOMMU register specification per version 1.0 public spec.
+ * Intel IOMMU register specification per version 2.4 public spec.
  */
 
-#define    DMAR_VER_REG    0x0    /* Arch version supported by this IOMMU */
-#define    DMAR_CAP_REG    0x8    /* Hardware supported capabilities */
-#define    DMAR_ECAP_REG    0x10    /* Extended capabilities supported */
-#define    DMAR_GCMD_REG    0x18    /* Global command register */
-#define    DMAR_GSTS_REG    0x1c    /* Global status register */
-#define    DMAR_RTADDR_REG    0x20    /* Root entry table */
-#define    DMAR_CCMD_REG    0x28    /* Context command reg */
-#define    DMAR_FSTS_REG    0x34    /* Fault Status register */
-#define    DMAR_FECTL_REG    0x38    /* Fault control register */
-#define    DMAR_FEDATA_REG    0x3c    /* Fault event interrupt data register */
-#define    DMAR_FEADDR_REG    0x40    /* Fault event interrupt addr register */
-#define    DMAR_FEUADDR_REG 0x44    /* Upper address register */
-#define    DMAR_AFLOG_REG    0x58    /* Advanced Fault control */
-#define    DMAR_PMEN_REG    0x64    /* Enable Protected Memory Region */
-#define    DMAR_PLMBASE_REG 0x68    /* PMRR Low addr */
-#define    DMAR_PLMLIMIT_REG 0x6c    /* PMRR low limit */
-#define    DMAR_PHMBASE_REG 0x70    /* pmrr high base addr */
-#define    DMAR_PHMLIMIT_REG 0x78    /* pmrr high limit */
-#define    DMAR_IQH_REG    0x80    /* invalidation queue head */
-#define    DMAR_IQT_REG    0x88    /* invalidation queue tail */
-#define    DMAR_IQA_REG    0x90    /* invalidation queue addr */
-#define    DMAR_IRTA_REG   0xB8    /* intr remap */
+#define DMAR_VER_REG            0x0  /* Arch version supported by this IOMMU */
+#define DMAR_CAP_REG            0x8  /* Hardware supported capabilities */
+#define DMAR_ECAP_REG           0x10 /* Extended capabilities supported */
+#define DMAR_GCMD_REG           0x18 /* Global command register */
+#define DMAR_GSTS_REG           0x1c /* Global status register */
+#define DMAR_RTADDR_REG         0x20 /* Root entry table */
+#define DMAR_CCMD_REG           0x28 /* Context command reg */
+#define DMAR_FSTS_REG           0x34 /* Fault Status register */
+#define DMAR_FECTL_REG          0x38 /* Fault control register */
+#define DMAR_FEDATA_REG         0x3c /* Fault event interrupt data register */
+#define DMAR_FEADDR_REG         0x40 /* Fault event interrupt addr register */
+#define DMAR_FEUADDR_REG        0x44 /* Upper address register */
+#define DMAR_AFLOG_REG          0x58 /* Advanced Fault control */
+#define DMAR_PMEN_REG           0x64 /* Enable Protected Memory Region */
+#define DMAR_PLMBASE_REG        0x68 /* PMRR Low addr */
+#define DMAR_PLMLIMIT_REG       0x6c /* PMRR low limit */
+#define DMAR_PHMBASE_REG        0x70 /* pmrr high base addr */
+#define DMAR_PHMLIMIT_REG       0x78 /* pmrr high limit */
+#define DMAR_IQH_REG            0x80 /* invalidation queue head */
+#define DMAR_IQT_REG            0x88 /* invalidation queue tail */
+#define DMAR_IQT_REG_HI         0x8c
+#define DMAR_IQA_REG            0x90 /* invalidation queue addr */
+#define DMAR_IQA_REG_HI         0x94
+#define DMAR_ICS_REG            0x9c /* Invalidation complete status */
+#define DMAR_IECTL_REG          0xa0 /* Invalidation event control */
+#define DMAR_IEDATA_REG         0xa4 /* Invalidation event data */
+#define DMAR_IEADDR_REG         0xa8 /* Invalidation event address */
+#define DMAR_IEUADDR_REG        0xac /* Invalidation event address */
+#define DMAR_IRTA_REG           0xb8 /* Interrupt remapping table addr */
+#define DMAR_IRTA_REG_HI        0xbc
+#define DMAR_PQH_REG            0xc0 /* Page request queue head */
+#define DMAR_PQH_REG_HI         0xc4
+#define DMAR_PQT_REG            0xc8 /* Page request queue tail*/
+#define DMAR_PQT_REG_HI         0xcc
+#define DMAR_PQA_REG            0xd0 /* Page request queue address */
+#define DMAR_PQA_REG_HI         0xd4
+#define DMAR_PRS_REG            0xdc /* Page request status */
+#define DMAR_PECTL_REG          0xe0 /* Page request event control */
+#define DMAR_PEDATA_REG         0xe4 /* Page request event data */
+#define DMAR_PEADDR_REG         0xe8 /* Page request event address */
+#define DMAR_PEUADDR_REG        0xec /* Page event upper address */
+#define DMAR_MTRRCAP_REG        0x100 /* MTRR capability */
+#define DMAR_MTRRCAP_REG_HI     0x104
+#define DMAR_MTRRDEF_REG        0x108 /* MTRR default type */
+#define DMAR_MTRRDEF_REG_HI     0x10c
 
 #define OFFSET_STRIDE        (9)
 #define dmar_readl(dmar, reg) readl((dmar) + (reg))
@@ -58,6 +81,30 @@ 
 #define VER_MAJOR(v)        (((v) & 0xf0) >> 4)
 #define VER_MINOR(v)        ((v) & 0x0f)
 
+/* CAP_REG */
+#define DMA_DOMAIN_ID_SHIFT         16  /* 16-bit domain id for 64K domains */
+#define DMA_DOMAIN_ID_MASK          ((1UL << DMA_DOMAIN_ID_SHIFT) - 1)
+#define DMA_CAP_ND                  (((DMA_DOMAIN_ID_SHIFT - 4) / 2) & 7ULL)
+#define DMA_MGAW                    39  /* Maximum Guest Address Width */
+#define DMA_CAP_MGAW                (((DMA_MGAW - 1) & 0x3fULL) << 16)
+#define DMA_MAMV                    18ULL
+#define DMA_CAP_MAMV                (DMA_MAMV << 48)
+#define DMA_CAP_PSI                 (1ULL << 39)
+#define DMA_CAP_SLLPS               ((1ULL << 34) | (1ULL << 35))
+#define DMA_FRCD_REG_NR             1ULL
+#define DMA_CAP_NFR                 ((DMA_FRCD_REG_NR - 1) << 40)
+#define DMA_CAP_FRO_OFFSET          0x220ULL
+#define DMA_CAP_FRO                 (DMA_CAP_FRO_OFFSET << 20)
+
+/* Supported Adjusted Guest Address Widths */
+#define DMA_CAP_SAGAW_SHIFT         8
+#define DMA_CAP_SAGAW_MASK          (0x1fULL << DMA_CAP_SAGAW_SHIFT)
+ /* 39-bit AGAW, 3-level page-table */
+#define DMA_CAP_SAGAW_39bit         (0x2ULL << DMA_CAP_SAGAW_SHIFT)
+ /* 48-bit AGAW, 4-level page-table */
+#define DMA_CAP_SAGAW_48bit         (0x4ULL << DMA_CAP_SAGAW_SHIFT)
+#define DMA_CAP_SAGAW               DMA_CAP_SAGAW_39bit
+
 /*
  * Decoding Capability Register
  */
@@ -89,6 +136,12 @@ 
 #define cap_afl(c)        (((c) >> 3) & 1)
 #define cap_ndoms(c)        (1 << (4 + 2 * ((c) & 0x7)))
 
+/* ECAP_REG */
+#define DMA_ECAP_QI                 (1ULL << 1)
+#define DMA_ECAP_IR                 (1ULL << 3)
+#define DMA_ECAP_EIM                (1ULL << 4)
+#define DMA_ECAP_MHMV               (15ULL << 20)
+
 /*
  * Extended Capability Register
  */
diff --git a/xen/drivers/passthrough/vtd/vvtd.c b/xen/drivers/passthrough/vtd/vvtd.c
new file mode 100644
index 0000000..353fafe
--- /dev/null
+++ b/xen/drivers/passthrough/vtd/vvtd.c
@@ -0,0 +1,158 @@ 
+/*
+ * vvtd.c
+ *
+ * virtualize VTD for HVM.
+ *
+ * Copyright (C) 2017 Chao Gao, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms and conditions of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <xen/domain_page.h>
+#include <xen/sched.h>
+#include <xen/types.h>
+#include <xen/viommu.h>
+#include <xen/xmalloc.h>
+#include <asm/current.h>
+#include <asm/hvm/domain.h>
+#include <asm/page.h>
+
+#include "iommu.h"
+
+struct hvm_hw_vvtd_regs {
+    uint8_t data[1024];
+};
+
+/* Status field of struct vvtd */
+#define VIOMMU_STATUS_DEFAULT                   (0)
+#define VIOMMU_STATUS_IRQ_REMAPPING_ENABLED     (1 << 0)
+#define VIOMMU_STATUS_DMA_REMAPPING_ENABLED     (1 << 1)
+
+struct vvtd {
+    /* VIOMMU_STATUS_XXX */
+    int status;
+    /* Address range of remapping hardware register-set */
+    uint64_t base_addr;
+    uint64_t length;
+    /* Point back to the owner domain */
+    struct domain *domain;
+    struct hvm_hw_vvtd_regs *regs;
+    struct page_info *regs_page;
+};
+
+static inline void vvtd_set_reg(struct vvtd *vtd, uint32_t reg,
+                                uint32_t value)
+{
+    *((uint32_t *)(&vtd->regs->data[reg])) = value;
+}
+
+static inline uint32_t vvtd_get_reg(struct vvtd *vtd, uint32_t reg)
+{
+    return *((uint32_t *)(&vtd->regs->data[reg]));
+}
+
+static inline uint8_t vvtd_get_reg_byte(struct vvtd *vtd, uint32_t reg)
+{
+    return *((uint8_t *)(&vtd->regs->data[reg]));
+}
+
+#define vvtd_get_reg_quad(vvtd, reg, val) do {  \
+    (val) = vvtd_get_reg(vvtd, (reg) + 4 );     \
+    (val) = (val) << 32;                        \
+    (val) += vvtd_get_reg(vvtd, reg);           \
+} while(0)
+#define vvtd_set_reg_quad(vvtd, reg, val) do {  \
+    vvtd_set_reg(vvtd, reg, (val));             \
+    vvtd_set_reg(vvtd, (reg) + 4, (val) >> 32); \
+} while(0)
+
+static void vvtd_reset(struct vvtd *vvtd, uint64_t capability)
+{
+    uint64_t cap = DMA_CAP_NFR | DMA_CAP_SLLPS | DMA_CAP_FRO |
+                   DMA_CAP_MGAW | DMA_CAP_SAGAW | DMA_CAP_ND;
+    uint64_t ecap = DMA_ECAP_IR | DMA_ECAP_EIM | DMA_ECAP_QI;
+
+    vvtd_set_reg(vvtd, DMAR_VER_REG, 0x10UL);
+    vvtd_set_reg_quad(vvtd, DMAR_CAP_REG, cap);
+    vvtd_set_reg_quad(vvtd, DMAR_ECAP_REG, ecap);
+    vvtd_set_reg(vvtd, DMAR_FECTL_REG, 0x80000000UL);
+    vvtd_set_reg(vvtd, DMAR_IECTL_REG, 0x80000000UL);
+}
+
+static u64 vvtd_query_caps(struct domain *d)
+{
+    return VIOMMU_CAP_IRQ_REMAPPING;
+}
+
+static int vvtd_create(struct domain *d, struct viommu *viommu)
+{
+    struct vvtd *vvtd;
+    int ret;
+
+    if ( !is_hvm_domain(d) || (viommu->length != PAGE_SIZE) ||
+        (~vvtd_query_caps(d) & viommu->caps) )
+        return -EINVAL;
+
+    ret = -ENOMEM;
+    vvtd = xmalloc_bytes(sizeof(struct vvtd));
+    if ( !vvtd )
+        return ret;
+
+    vvtd->regs_page = alloc_domheap_page(d, MEMF_no_owner);
+    if ( !vvtd->regs_page )
+        goto out1;
+
+    vvtd->regs = __map_domain_page_global(vvtd->regs_page);
+    if ( !vvtd->regs )
+        goto out2;
+    clear_page(vvtd->regs);
+
+    vvtd_reset(vvtd, viommu->caps);
+    vvtd->base_addr = viommu->base_address;
+    vvtd->length = viommu->length;
+    vvtd->domain = d;
+    vvtd->status = VIOMMU_STATUS_DEFAULT;
+    return 0;
+
+ out2:
+    free_domheap_page(vvtd->regs_page);
+ out1:
+    xfree(vvtd);
+    return ret;
+}
+
+static int vvtd_destroy(struct viommu *viommu)
+{
+    struct vvtd *vvtd = viommu->priv;
+
+    if ( vvtd )
+    {
+        unmap_domain_page_global(vvtd->regs);
+        free_domheap_page(vvtd->regs_page);
+        xfree(vvtd);
+    }
+    return 0;
+}
+
+struct viommu_ops vvtd_hvm_vmx_ops = {
+    .query_caps = vvtd_query_caps,
+    .create = vvtd_create,
+    .destroy = vvtd_destroy
+};
+
+static int vvtd_register(void)
+{
+    viommu_register_type(VIOMMU_TYPE_INTEL_VTD, &vvtd_hvm_vmx_ops);
+    return 0;
+}
+__initcall(vvtd_register);
diff --git a/xen/include/asm-x86/viommu.h b/xen/include/asm-x86/viommu.h
index 1e8d4be..b730e65 100644
--- a/xen/include/asm-x86/viommu.h
+++ b/xen/include/asm-x86/viommu.h
@@ -22,6 +22,9 @@ 
 
 #include <xen/viommu.h>
 #include <asm/types.h>
+#include <asm/processor.h>
+
+extern struct viommu_ops vvtd_hvm_vmx_ops;
 
 /* IRQ request type */
 #define VIOMMU_REQUEST_IRQ_MSI          0