diff mbox

[V3,11/29] x86/hvm: Introduce a emulated VTD for HVM

Message ID 1506049330-11196-12-git-send-email-tianyu.lan@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

lan,Tianyu Sept. 22, 2017, 3:01 a.m. UTC
From: Chao Gao <chao.gao@intel.com>

This patch adds create/destroy function for the emulated VTD
and adapts it to the common VIOMMU abstraction.

Signed-off-by: Chao Gao <chao.gao@intel.com>
Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
---
 xen/drivers/passthrough/vtd/Makefile |   7 +-
 xen/drivers/passthrough/vtd/iommu.h  |  23 +++++-
 xen/drivers/passthrough/vtd/vvtd.c   | 147 +++++++++++++++++++++++++++++++++++
 3 files changed, 170 insertions(+), 7 deletions(-)
 create mode 100644 xen/drivers/passthrough/vtd/vvtd.c

Comments

Roger Pau Monné Oct. 19, 2017, 11:20 a.m. UTC | #1
On Thu, Sep 21, 2017 at 11:01:52PM -0400, Lan Tianyu wrote:
> From: Chao Gao <chao.gao@intel.com>
> 
> This patch adds create/destroy function for the emulated VTD
> and adapts it to the common VIOMMU abstraction.
> 
> Signed-off-by: Chao Gao <chao.gao@intel.com>
> Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
> ---
>  xen/drivers/passthrough/vtd/Makefile |   7 +-
>  xen/drivers/passthrough/vtd/iommu.h  |  23 +++++-
>  xen/drivers/passthrough/vtd/vvtd.c   | 147 +++++++++++++++++++++++++++++++++++
>  3 files changed, 170 insertions(+), 7 deletions(-)
>  create mode 100644 xen/drivers/passthrough/vtd/vvtd.c
> 
> diff --git a/xen/drivers/passthrough/vtd/Makefile b/xen/drivers/passthrough/vtd/Makefile
> index f302653..163c7fe 100644
> --- a/xen/drivers/passthrough/vtd/Makefile
> +++ b/xen/drivers/passthrough/vtd/Makefile
> @@ -1,8 +1,9 @@
>  subdir-$(CONFIG_X86) += x86
>  
> -obj-y += iommu.o
>  obj-y += dmar.o
> -obj-y += utils.o
> -obj-y += qinval.o
>  obj-y += intremap.o
> +obj-y += iommu.o
> +obj-y += qinval.o
>  obj-y += quirks.o
> +obj-y += utils.o

Why do you need to shuffle the list above?

Also I'm not sure the Intel vIOMMU implementation should live here. As
you can see the path is:

xen/drivers/passthrough/vtd/

The vIOMMU is not tied to passthrough at all, so I would rather place
it in:

xen/drivers/vvtd/

Or maybe you can create something like:

xen/drivers/viommu/

So that all vIOMMU implementations can share some code.

> +obj-$(CONFIG_VIOMMU) += vvtd.o
> diff --git a/xen/drivers/passthrough/vtd/iommu.h b/xen/drivers/passthrough/vtd/iommu.h
> index d7e433e..ef038c9 100644
> --- a/xen/drivers/passthrough/vtd/iommu.h
> +++ b/xen/drivers/passthrough/vtd/iommu.h
> @@ -66,6 +66,12 @@
>  #define VER_MAJOR(v)        (((v) & 0xf0) >> 4)
>  #define VER_MINOR(v)        ((v) & 0x0f)
>  
> +/* Supported Adjusted Guest Address Widths */
> +#define DMA_CAP_SAGAW_SHIFT         8
> + /* 39-bit AGAW, 3-level page-table */
> +#define DMA_CAP_SAGAW_39bit         (0x2ULL << DMA_CAP_SAGAW_SHIFT)
> +#define DMA_CAP_ND_64K              6ULL
> +
>  /*
>   * Decoding Capability Register
>   */
> @@ -74,6 +80,7 @@
>  #define cap_write_drain(c)     (((c) >> 54) & 1)
>  #define cap_max_amask_val(c)   (((c) >> 48) & 0x3f)
>  #define cap_num_fault_regs(c)  ((((c) >> 40) & 0xff) + 1)
> +#define cap_set_num_fault_regs(c)  ((((c) - 1) & 0xff) << 40)
>  #define cap_pgsel_inv(c)       (((c) >> 39) & 1)
>  
>  #define cap_super_page_val(c)  (((c) >> 34) & 0xf)
> @@ -85,11 +92,13 @@
>  #define cap_sps_1tb(c)         ((c >> 37) & 1)
>  
>  #define cap_fault_reg_offset(c)    ((((c) >> 24) & 0x3ff) * 16)
> +#define cap_set_fault_reg_offset(c) ((((c) / 16) & 0x3ff) << 24 )
>  
>  #define cap_isoch(c)        (((c) >> 23) & 1)
>  #define cap_qos(c)        (((c) >> 22) & 1)
>  #define cap_mgaw(c)        ((((c) >> 16) & 0x3f) + 1)
> -#define cap_sagaw(c)        (((c) >> 8) & 0x1f)
> +#define cap_set_mgaw(c)     ((((c) - 1) & 0x3f) << 16)
> +#define cap_sagaw(c)        (((c) >> DMA_CAP_SAGAW_SHIFT) & 0x1f)
>  #define cap_caching_mode(c)    (((c) >> 7) & 1)
>  #define cap_phmr(c)        (((c) >> 6) & 1)
>  #define cap_plmr(c)        (((c) >> 5) & 1)
> @@ -104,10 +113,16 @@
>  #define ecap_niotlb_iunits(e)    ((((e) >> 24) & 0xff) + 1)
>  #define ecap_iotlb_offset(e)     ((((e) >> 8) & 0x3ff) * 16)
>  #define ecap_coherent(e)         ((e >> 0) & 0x1)
> -#define ecap_queued_inval(e)     ((e >> 1) & 0x1)
> +#define DMA_ECAP_QI_SHIFT        1
> +#define DMA_ECAP_QI              (1ULL << DMA_ECAP_QI_SHIFT)
> +#define ecap_queued_inval(e)     ((e >> DMA_ECAP_QI_SHIFT) & 0x1)

Looks like this could be based on MASK_EXTR instead, but seeing how
the file is full of open-coded mask extracts I'm not sure it's worth
it anymore.

>  #define ecap_dev_iotlb(e)        ((e >> 2) & 0x1)
> -#define ecap_intr_remap(e)       ((e >> 3) & 0x1)
> -#define ecap_eim(e)              ((e >> 4) & 0x1)
> +#define DMA_ECAP_IR_SHIFT        3
> +#define DMA_ECAP_IR              (1ULL << DMA_ECAP_IR_SHIFT)
> +#define ecap_intr_remap(e)       ((e >> DMA_ECAP_IR_SHIFT) & 0x1)
> +#define DMA_ECAP_EIM_SHIFT       4
> +#define DMA_ECAP_EIM             (1ULL << DMA_ECAP_EIM_SHIFT)
> +#define ecap_eim(e)              ((e >> DMA_ECAP_EIM_SHIFT) & 0x1)

Maybe worth placing all the DMA_ECAP_* defines in a separate section?
Seems like how it's done for other features like DMA_FSTS or
DMA_CCMD.

>  #define ecap_cache_hints(e)      ((e >> 5) & 0x1)
>  #define ecap_pass_thru(e)        ((e >> 6) & 0x1)
>  #define ecap_snp_ctl(e)          ((e >> 7) & 0x1)
> diff --git a/xen/drivers/passthrough/vtd/vvtd.c b/xen/drivers/passthrough/vtd/vvtd.c
> new file mode 100644
> index 0000000..c851ec7
> --- /dev/null
> +++ b/xen/drivers/passthrough/vtd/vvtd.c
> @@ -0,0 +1,147 @@
> +/*
> + * vvtd.c
> + *
> + * virtualize VTD for HVM.
> + *
> + * Copyright (C) 2017 Chao Gao, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms and conditions of the GNU General Public
> + * License, version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public
> + * License along with this program; If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <xen/domain_page.h>
> +#include <xen/sched.h>
> +#include <xen/types.h>
> +#include <xen/viommu.h>
> +#include <xen/xmalloc.h>
> +#include <asm/current.h>
> +#include <asm/hvm/domain.h>
> +#include <asm/page.h>
> +
> +#include "iommu.h"
> +
> +/* Supported capabilities by vvtd */
> +unsigned int vvtd_caps = VIOMMU_CAP_IRQ_REMAPPING;

static?

Or even better, why is this not a define like VIOMMU_MAX_CAPS or
similar.

> +
> +union hvm_hw_vvtd_regs {
> +    uint32_t data32[256];
> +    uint64_t data64[128];
> +};

Do you really need to store all the register space instead of only
storing specific registers?

> +
> +struct vvtd {
> +    /* Address range of remapping hardware register-set */
> +    uint64_t base_addr;
> +    uint64_t length;

The length field doesn't seem to be used below.

> +    /* Point back to the owner domain */
> +    struct domain *domain;
> +    union hvm_hw_vvtd_regs *regs;

Does this need to be a pointer?

> +    struct page_info *regs_page;
> +};
> +
> +static inline void vvtd_set_reg(struct vvtd *vtd, uint32_t reg, uint32_t value)
> +{
> +    vtd->regs->data32[reg/sizeof(uint32_t)] = value;
> +}
> +
> +static inline uint32_t vvtd_get_reg(struct vvtd *vtd, uint32_t reg)
> +{
> +    return vtd->regs->data32[reg/sizeof(uint32_t)];
> +}
> +
> +static inline void vvtd_set_reg_quad(struct vvtd *vtd, uint32_t reg,
> +                                     uint64_t value)
> +{
> +    vtd->regs->data64[reg/sizeof(uint64_t)] = value;
> +}
> +
> +static inline uint64_t vvtd_get_reg_quad(struct vvtd *vtd, uint32_t reg)
> +{
> +    return vtd->regs->data64[reg/sizeof(uint64_t)];
> +}
> +
> +static void vvtd_reset(struct vvtd *vvtd, uint64_t capability)
> +{
> +    uint64_t cap = cap_set_num_fault_regs(1ULL) |
> +                   cap_set_fault_reg_offset(0x220ULL) |
> +                   cap_set_mgaw(39ULL) | DMA_CAP_SAGAW_39bit |
> +                   DMA_CAP_ND_64K;
> +    uint64_t ecap = DMA_ECAP_IR | DMA_ECAP_EIM | DMA_ECAP_QI;
> +
> +    vvtd_set_reg(vvtd, DMAR_VER_REG, 0x10UL);
> +    vvtd_set_reg_quad(vvtd, DMAR_CAP_REG, cap);
> +    vvtd_set_reg_quad(vvtd, DMAR_ECAP_REG, ecap);
> +    vvtd_set_reg(vvtd, DMAR_FECTL_REG, 0x80000000UL);
> +    vvtd_set_reg(vvtd, DMAR_IECTL_REG, 0x80000000UL);
> +}
> +
> +static int vvtd_create(struct domain *d, struct viommu *viommu)
> +{
> +    struct vvtd *vvtd;
> +    int ret;
> +
> +    if ( !is_hvm_domain(d) || (viommu->base_address & (PAGE_SIZE - 1)) ||
> +        (~vvtd_caps & viommu->caps) )
> +        return -EINVAL;
> +
> +    ret = -ENOMEM;
> +    vvtd = xzalloc_bytes(sizeof(struct vvtd));
> +    if ( !vvtd )
> +        return ret;
> +
> +    vvtd->regs_page = alloc_domheap_page(d, MEMF_no_owner);
> +    if ( !vvtd->regs_page )
> +        goto out1;
> +
> +    vvtd->regs = __map_domain_page_global(vvtd->regs_page);
> +    if ( !vvtd->regs )
> +        goto out2;
> +    clear_page(vvtd->regs);

Not sure why vvtd->regs needs to be a pointer, and why it needs to use
a full page. AFAICT the size of hvm_hw_vvtd_regs is 1024B, so you are
wasting 3/4 of a page.

> +
> +    vvtd_reset(vvtd, viommu->caps);
> +    vvtd->base_addr = viommu->base_address;
> +    vvtd->domain = d;
> +
> +    viommu->priv = vvtd;
> +
> +    return 0;
> +
> + out2:
> +    free_domheap_page(vvtd->regs_page);
> + out1:
> +    xfree(vvtd);
> +    return ret;

You should try to avoid using labels. I think this can be solved by
not allocating a separate page for regs.

> +}
> +
> +static int vvtd_destroy(struct viommu *viommu)
> +{
> +    struct vvtd *vvtd = viommu->priv;
> +
> +    if ( vvtd )
> +    {
> +        unmap_domain_page_global(vvtd->regs);
> +        free_domheap_page(vvtd->regs_page);
> +        xfree(vvtd);
> +    }
> +    return 0;
> +}
> +
> +struct viommu_ops vvtd_hvm_vmx_ops = {
> +    .create = vvtd_create,
> +    .destroy = vvtd_destroy
> +};
> +
> +static int vvtd_register(void)
> +{
> +    viommu_register_type(VIOMMU_TYPE_INTEL_VTD, &vvtd_hvm_vmx_ops);
> +    return 0;
> +}
> +__initcall(vvtd_register);

As commented in another patch I think the vIOMMU types should be
registered using a method similar to REGISTER_SCHEDULER.

Thanks, Roger.
Chao Gao Oct. 20, 2017, 2:46 a.m. UTC | #2
On Thu, Oct 19, 2017 at 12:20:35PM +0100, Roger Pau Monné wrote:
>On Thu, Sep 21, 2017 at 11:01:52PM -0400, Lan Tianyu wrote:
>> From: Chao Gao <chao.gao@intel.com>
>> 
>> This patch adds create/destroy function for the emulated VTD
>> and adapts it to the common VIOMMU abstraction.
>> 
>> Signed-off-by: Chao Gao <chao.gao@intel.com>
>> Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
>> ---
>>  
>> -obj-y += iommu.o
>>  obj-y += dmar.o
>> -obj-y += utils.o
>> -obj-y += qinval.o
>>  obj-y += intremap.o
>> +obj-y += iommu.o
>> +obj-y += qinval.o
>>  obj-y += quirks.o
>> +obj-y += utils.o
>
>Why do you need to shuffle the list above?

I placed them in alphabetic order.

>
>Also I'm not sure the Intel vIOMMU implementation should live here. As
>you can see the path is:
>
>xen/drivers/passthrough/vtd/
>
>The vIOMMU is not tied to passthrough at all, so I would rather place
>it in:
>
>xen/drivers/vvtd/
>
>Or maybe you can create something like:
>
>xen/drivers/viommu/
>
>So that all vIOMMU implementations can share some code.
>

vvtd and vtd use the same header files (i.g. vtd.h). That is why we put
it there.  If that, we shoule move the related header files to a public
directory.

>>  #define cap_isoch(c)        (((c) >> 23) & 1)
>>  #define cap_qos(c)        (((c) >> 22) & 1)
>>  #define cap_mgaw(c)        ((((c) >> 16) & 0x3f) + 1)
>> -#define cap_sagaw(c)        (((c) >> 8) & 0x1f)
>> +#define cap_set_mgaw(c)     ((((c) - 1) & 0x3f) << 16)
>> +#define cap_sagaw(c)        (((c) >> DMA_CAP_SAGAW_SHIFT) & 0x1f)
>>  #define cap_caching_mode(c)    (((c) >> 7) & 1)
>>  #define cap_phmr(c)        (((c) >> 6) & 1)
>>  #define cap_plmr(c)        (((c) >> 5) & 1)
>> @@ -104,10 +113,16 @@
>>  #define ecap_niotlb_iunits(e)    ((((e) >> 24) & 0xff) + 1)
>>  #define ecap_iotlb_offset(e)     ((((e) >> 8) & 0x3ff) * 16)
>>  #define ecap_coherent(e)         ((e >> 0) & 0x1)
>> -#define ecap_queued_inval(e)     ((e >> 1) & 0x1)
>> +#define DMA_ECAP_QI_SHIFT        1
>> +#define DMA_ECAP_QI              (1ULL << DMA_ECAP_QI_SHIFT)
>> +#define ecap_queued_inval(e)     ((e >> DMA_ECAP_QI_SHIFT) & 0x1)
>
>Looks like this could be based on MASK_EXTR instead, but seeing how
>the file is full of open-coded mask extracts I'm not sure it's worth
>it anymore.
>
>>  #define ecap_dev_iotlb(e)        ((e >> 2) & 0x1)
>> -#define ecap_intr_remap(e)       ((e >> 3) & 0x1)
>> -#define ecap_eim(e)              ((e >> 4) & 0x1)
>> +#define DMA_ECAP_IR_SHIFT        3
>> +#define DMA_ECAP_IR              (1ULL << DMA_ECAP_IR_SHIFT)
>> +#define ecap_intr_remap(e)       ((e >> DMA_ECAP_IR_SHIFT) & 0x1)
>> +#define DMA_ECAP_EIM_SHIFT       4
>> +#define DMA_ECAP_EIM             (1ULL << DMA_ECAP_EIM_SHIFT)
>> +#define ecap_eim(e)              ((e >> DMA_ECAP_EIM_SHIFT) & 0x1)
>
>Maybe worth placing all the DMA_ECAP_* defines in a separate section?
>Seems like how it's done for other features like DMA_FSTS or
>DMA_CCMD.

Got it.

>> +
>> +/* Supported capabilities by vvtd */
>> +unsigned int vvtd_caps = VIOMMU_CAP_IRQ_REMAPPING;
>
>static?
>
>Or even better, why is this not a define like VIOMMU_MAX_CAPS or
>similar.

Yeah. It should be renamed to VVTD_MAX_CAPS.

>
>> +
>> +union hvm_hw_vvtd_regs {
>> +    uint32_t data32[256];
>> +    uint64_t data64[128];
>> +};
>
>Do you really need to store all the register space instead of only
>storing specific registers?

I prefer to store all the registers for we don't need a trick to map
the real offset in hardware to the index in the array.

>
>> +
>> +struct vvtd {
>> +    /* Address range of remapping hardware register-set */
>> +    uint64_t base_addr;
>> +    uint64_t length;
>
>The length field doesn't seem to be used below.

will remove it.

>
>> +    /* Point back to the owner domain */
>> +    struct domain *domain;
>> +    union hvm_hw_vvtd_regs *regs;
>
>Does this need to be a pointer?

Seems not.
>
>> +    struct page_info *regs_page;
>> +};
>> +
>> +static int vvtd_create(struct domain *d, struct viommu *viommu)
>> +{
>> +    struct vvtd *vvtd;
>> +    int ret;
>> +
>> +    if ( !is_hvm_domain(d) || (viommu->base_address & (PAGE_SIZE - 1)) ||
>> +        (~vvtd_caps & viommu->caps) )
>> +        return -EINVAL;
>> +
>> +    ret = -ENOMEM;
>> +    vvtd = xzalloc_bytes(sizeof(struct vvtd));
>> +    if ( !vvtd )
>> +        return ret;
>> +
>> +    vvtd->regs_page = alloc_domheap_page(d, MEMF_no_owner);
>> +    if ( !vvtd->regs_page )
>> +        goto out1;
>> +
>> +    vvtd->regs = __map_domain_page_global(vvtd->regs_page);
>> +    if ( !vvtd->regs )
>> +        goto out2;
>> +    clear_page(vvtd->regs);
>
>Not sure why vvtd->regs needs to be a pointer, and why it needs to use
>a full page. AFAICT the size of hvm_hw_vvtd_regs is 1024B, so you are
>wasting 3/4 of a page.

I will define registers as an array directly and 
shrink the size to the number we are really used now.

>> +struct viommu_ops vvtd_hvm_vmx_ops = {
>> +    .create = vvtd_create,
>> +    .destroy = vvtd_destroy
>> +};
>> +
>> +static int vvtd_register(void)
>> +{
>> +    viommu_register_type(VIOMMU_TYPE_INTEL_VTD, &vvtd_hvm_vmx_ops);
>> +    return 0;
>> +}
>> +__initcall(vvtd_register);
>
>As commented in another patch I think the vIOMMU types should be
>registered using a method similar to REGISTER_SCHEDULER.

Both are ok to me. Will follow your suggestion.

Thanks
Chao
Chao Gao Oct. 20, 2017, 6:12 a.m. UTC | #3
On Fri, Oct 20, 2017 at 12:56:03AM -0600, Jan Beulich wrote:
>>>> On 20.10.17 at 04:46, <chao.gao@intel.com> wrote:
>> On Thu, Oct 19, 2017 at 12:20:35PM +0100, Roger Pau Monné wrote:
>>>On Thu, Sep 21, 2017 at 11:01:52PM -0400, Lan Tianyu wrote:
>>>> From: Chao Gao <chao.gao@intel.com>
>>>> 
>>>> This patch adds create/destroy function for the emulated VTD
>>>> and adapts it to the common VIOMMU abstraction.
>>>> 
>>>> Signed-off-by: Chao Gao <chao.gao@intel.com>
>>>> Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
>>>> ---
>>>>  
>>>> -obj-y += iommu.o
>>>>  obj-y += dmar.o
>>>> -obj-y += utils.o
>>>> -obj-y += qinval.o
>>>>  obj-y += intremap.o
>>>> +obj-y += iommu.o
>>>> +obj-y += qinval.o
>>>>  obj-y += quirks.o
>>>> +obj-y += utils.o
>>>
>>>Why do you need to shuffle the list above?
>> 
>> I placed them in alphabetic order.
>
>Which is appreciated. But this being non-essential for the patch, it
>would avoid (valid) reviewer questions if you said in the description
>this is an intended but non-essential change.

Sure. I will keep this in mind.

>
>>>Also I'm not sure the Intel vIOMMU implementation should live here. As
>>>you can see the path is:
>>>
>>>xen/drivers/passthrough/vtd/
>>>
>>>The vIOMMU is not tied to passthrough at all, so I would rather place
>>>it in:
>
>Hmm, is vIOMMU usable without an actual backing IOMMU?

I think yes. Now, All vIOMMU features are emulated.

Thanks
Chao
Jan Beulich Oct. 20, 2017, 6:56 a.m. UTC | #4
>>> On 20.10.17 at 04:46, <chao.gao@intel.com> wrote:
> On Thu, Oct 19, 2017 at 12:20:35PM +0100, Roger Pau Monné wrote:
>>On Thu, Sep 21, 2017 at 11:01:52PM -0400, Lan Tianyu wrote:
>>> From: Chao Gao <chao.gao@intel.com>
>>> 
>>> This patch adds create/destroy function for the emulated VTD
>>> and adapts it to the common VIOMMU abstraction.
>>> 
>>> Signed-off-by: Chao Gao <chao.gao@intel.com>
>>> Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
>>> ---
>>>  
>>> -obj-y += iommu.o
>>>  obj-y += dmar.o
>>> -obj-y += utils.o
>>> -obj-y += qinval.o
>>>  obj-y += intremap.o
>>> +obj-y += iommu.o
>>> +obj-y += qinval.o
>>>  obj-y += quirks.o
>>> +obj-y += utils.o
>>
>>Why do you need to shuffle the list above?
> 
> I placed them in alphabetic order.

Which is appreciated. But this being non-essential for the patch, it
would avoid (valid) reviewer questions if you said in the description
this is an intended but non-essential change.

>>Also I'm not sure the Intel vIOMMU implementation should live here. As
>>you can see the path is:
>>
>>xen/drivers/passthrough/vtd/
>>
>>The vIOMMU is not tied to passthrough at all, so I would rather place
>>it in:

Hmm, is vIOMMU usable without an actual backing IOMMU?

>>xen/drivers/vvtd/
>>
>>Or maybe you can create something like:
>>
>>xen/drivers/viommu/
>>
>>So that all vIOMMU implementations can share some code.
>>
> 
> vvtd and vtd use the same header files (i.g. vtd.h). That is why we put
> it there.  If that, we shoule move the related header files to a public
> directory.

And AMD (long ago) had placed their (still incomplete) virtual
implementation into the same directory as well. I.e. at this point
I'm not really opposed to the proposed placement here, albeit
I can see the point of Roger's argument.

Jan
lan,Tianyu Oct. 20, 2017, 8:37 a.m. UTC | #5
On 2017年10月20日 14:56, Jan Beulich wrote:
>>>> On 20.10.17 at 04:46, <chao.gao@intel.com> wrote:
>> On Thu, Oct 19, 2017 at 12:20:35PM +0100, Roger Pau Monné wrote:
>>> On Thu, Sep 21, 2017 at 11:01:52PM -0400, Lan Tianyu wrote:
>>>> From: Chao Gao <chao.gao@intel.com>
>>>>
>>>> This patch adds create/destroy function for the emulated VTD
>>>> and adapts it to the common VIOMMU abstraction.
>>>>
>>>> Signed-off-by: Chao Gao <chao.gao@intel.com>
>>>> Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
>>>> ---
>>>>  
>>>> -obj-y += iommu.o
>>>>  obj-y += dmar.o
>>>> -obj-y += utils.o
>>>> -obj-y += qinval.o
>>>>  obj-y += intremap.o
>>>> +obj-y += iommu.o
>>>> +obj-y += qinval.o
>>>>  obj-y += quirks.o
>>>> +obj-y += utils.o
>>>
>>> Why do you need to shuffle the list above?
>>
>> I placed them in alphabetic order.
> 
> Which is appreciated. But this being non-essential for the patch, it
> would avoid (valid) reviewer questions if you said in the description
> this is an intended but non-essential change.
> 
>>> Also I'm not sure the Intel vIOMMU implementation should live here. As
>>> you can see the path is:
>>>
>>> xen/drivers/passthrough/vtd/
>>>
>>> The vIOMMU is not tied to passthrough at all, so I would rather place
>>> it in:
> 
> Hmm, is vIOMMU usable without an actual backing IOMMU?

For interrupt remapping support, we can emulate it without physical IOMMU.

> 
>>> xen/drivers/vvtd/
>>>
>>> Or maybe you can create something like:
>>>
>>> xen/drivers/viommu/
>>>
>>> So that all vIOMMU implementations can share some code.
>>>
>>
>> vvtd and vtd use the same header files (i.g. vtd.h). That is why we put
>> it there.  If that, we shoule move the related header files to a public
>> directory.
> 
> And AMD (long ago) had placed their (still incomplete) virtual
> implementation into the same directory as well. I.e. at this point
> I'm not really opposed to the proposed placement here, albeit
> I can see the point of Roger's argument.
> 
> Jan
>
diff mbox

Patch

diff --git a/xen/drivers/passthrough/vtd/Makefile b/xen/drivers/passthrough/vtd/Makefile
index f302653..163c7fe 100644
--- a/xen/drivers/passthrough/vtd/Makefile
+++ b/xen/drivers/passthrough/vtd/Makefile
@@ -1,8 +1,9 @@ 
 subdir-$(CONFIG_X86) += x86
 
-obj-y += iommu.o
 obj-y += dmar.o
-obj-y += utils.o
-obj-y += qinval.o
 obj-y += intremap.o
+obj-y += iommu.o
+obj-y += qinval.o
 obj-y += quirks.o
+obj-y += utils.o
+obj-$(CONFIG_VIOMMU) += vvtd.o
diff --git a/xen/drivers/passthrough/vtd/iommu.h b/xen/drivers/passthrough/vtd/iommu.h
index d7e433e..ef038c9 100644
--- a/xen/drivers/passthrough/vtd/iommu.h
+++ b/xen/drivers/passthrough/vtd/iommu.h
@@ -66,6 +66,12 @@ 
 #define VER_MAJOR(v)        (((v) & 0xf0) >> 4)
 #define VER_MINOR(v)        ((v) & 0x0f)
 
+/* Supported Adjusted Guest Address Widths */
+#define DMA_CAP_SAGAW_SHIFT         8
+ /* 39-bit AGAW, 3-level page-table */
+#define DMA_CAP_SAGAW_39bit         (0x2ULL << DMA_CAP_SAGAW_SHIFT)
+#define DMA_CAP_ND_64K              6ULL
+
 /*
  * Decoding Capability Register
  */
@@ -74,6 +80,7 @@ 
 #define cap_write_drain(c)     (((c) >> 54) & 1)
 #define cap_max_amask_val(c)   (((c) >> 48) & 0x3f)
 #define cap_num_fault_regs(c)  ((((c) >> 40) & 0xff) + 1)
+#define cap_set_num_fault_regs(c)  ((((c) - 1) & 0xff) << 40)
 #define cap_pgsel_inv(c)       (((c) >> 39) & 1)
 
 #define cap_super_page_val(c)  (((c) >> 34) & 0xf)
@@ -85,11 +92,13 @@ 
 #define cap_sps_1tb(c)         ((c >> 37) & 1)
 
 #define cap_fault_reg_offset(c)    ((((c) >> 24) & 0x3ff) * 16)
+#define cap_set_fault_reg_offset(c) ((((c) / 16) & 0x3ff) << 24 )
 
 #define cap_isoch(c)        (((c) >> 23) & 1)
 #define cap_qos(c)        (((c) >> 22) & 1)
 #define cap_mgaw(c)        ((((c) >> 16) & 0x3f) + 1)
-#define cap_sagaw(c)        (((c) >> 8) & 0x1f)
+#define cap_set_mgaw(c)     ((((c) - 1) & 0x3f) << 16)
+#define cap_sagaw(c)        (((c) >> DMA_CAP_SAGAW_SHIFT) & 0x1f)
 #define cap_caching_mode(c)    (((c) >> 7) & 1)
 #define cap_phmr(c)        (((c) >> 6) & 1)
 #define cap_plmr(c)        (((c) >> 5) & 1)
@@ -104,10 +113,16 @@ 
 #define ecap_niotlb_iunits(e)    ((((e) >> 24) & 0xff) + 1)
 #define ecap_iotlb_offset(e)     ((((e) >> 8) & 0x3ff) * 16)
 #define ecap_coherent(e)         ((e >> 0) & 0x1)
-#define ecap_queued_inval(e)     ((e >> 1) & 0x1)
+#define DMA_ECAP_QI_SHIFT        1
+#define DMA_ECAP_QI              (1ULL << DMA_ECAP_QI_SHIFT)
+#define ecap_queued_inval(e)     ((e >> DMA_ECAP_QI_SHIFT) & 0x1)
 #define ecap_dev_iotlb(e)        ((e >> 2) & 0x1)
-#define ecap_intr_remap(e)       ((e >> 3) & 0x1)
-#define ecap_eim(e)              ((e >> 4) & 0x1)
+#define DMA_ECAP_IR_SHIFT        3
+#define DMA_ECAP_IR              (1ULL << DMA_ECAP_IR_SHIFT)
+#define ecap_intr_remap(e)       ((e >> DMA_ECAP_IR_SHIFT) & 0x1)
+#define DMA_ECAP_EIM_SHIFT       4
+#define DMA_ECAP_EIM             (1ULL << DMA_ECAP_EIM_SHIFT)
+#define ecap_eim(e)              ((e >> DMA_ECAP_EIM_SHIFT) & 0x1)
 #define ecap_cache_hints(e)      ((e >> 5) & 0x1)
 #define ecap_pass_thru(e)        ((e >> 6) & 0x1)
 #define ecap_snp_ctl(e)          ((e >> 7) & 0x1)
diff --git a/xen/drivers/passthrough/vtd/vvtd.c b/xen/drivers/passthrough/vtd/vvtd.c
new file mode 100644
index 0000000..c851ec7
--- /dev/null
+++ b/xen/drivers/passthrough/vtd/vvtd.c
@@ -0,0 +1,147 @@ 
+/*
+ * vvtd.c
+ *
+ * virtualize VTD for HVM.
+ *
+ * Copyright (C) 2017 Chao Gao, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms and conditions of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <xen/domain_page.h>
+#include <xen/sched.h>
+#include <xen/types.h>
+#include <xen/viommu.h>
+#include <xen/xmalloc.h>
+#include <asm/current.h>
+#include <asm/hvm/domain.h>
+#include <asm/page.h>
+
+#include "iommu.h"
+
+/* Supported capabilities by vvtd */
+unsigned int vvtd_caps = VIOMMU_CAP_IRQ_REMAPPING;
+
+union hvm_hw_vvtd_regs {
+    uint32_t data32[256];
+    uint64_t data64[128];
+};
+
+struct vvtd {
+    /* Address range of remapping hardware register-set */
+    uint64_t base_addr;
+    uint64_t length;
+    /* Point back to the owner domain */
+    struct domain *domain;
+    union hvm_hw_vvtd_regs *regs;
+    struct page_info *regs_page;
+};
+
+static inline void vvtd_set_reg(struct vvtd *vtd, uint32_t reg, uint32_t value)
+{
+    vtd->regs->data32[reg/sizeof(uint32_t)] = value;
+}
+
+static inline uint32_t vvtd_get_reg(struct vvtd *vtd, uint32_t reg)
+{
+    return vtd->regs->data32[reg/sizeof(uint32_t)];
+}
+
+static inline void vvtd_set_reg_quad(struct vvtd *vtd, uint32_t reg,
+                                     uint64_t value)
+{
+    vtd->regs->data64[reg/sizeof(uint64_t)] = value;
+}
+
+static inline uint64_t vvtd_get_reg_quad(struct vvtd *vtd, uint32_t reg)
+{
+    return vtd->regs->data64[reg/sizeof(uint64_t)];
+}
+
+static void vvtd_reset(struct vvtd *vvtd, uint64_t capability)
+{
+    uint64_t cap = cap_set_num_fault_regs(1ULL) |
+                   cap_set_fault_reg_offset(0x220ULL) |
+                   cap_set_mgaw(39ULL) | DMA_CAP_SAGAW_39bit |
+                   DMA_CAP_ND_64K;
+    uint64_t ecap = DMA_ECAP_IR | DMA_ECAP_EIM | DMA_ECAP_QI;
+
+    vvtd_set_reg(vvtd, DMAR_VER_REG, 0x10UL);
+    vvtd_set_reg_quad(vvtd, DMAR_CAP_REG, cap);
+    vvtd_set_reg_quad(vvtd, DMAR_ECAP_REG, ecap);
+    vvtd_set_reg(vvtd, DMAR_FECTL_REG, 0x80000000UL);
+    vvtd_set_reg(vvtd, DMAR_IECTL_REG, 0x80000000UL);
+}
+
+static int vvtd_create(struct domain *d, struct viommu *viommu)
+{
+    struct vvtd *vvtd;
+    int ret;
+
+    if ( !is_hvm_domain(d) || (viommu->base_address & (PAGE_SIZE - 1)) ||
+        (~vvtd_caps & viommu->caps) )
+        return -EINVAL;
+
+    ret = -ENOMEM;
+    vvtd = xzalloc_bytes(sizeof(struct vvtd));
+    if ( !vvtd )
+        return ret;
+
+    vvtd->regs_page = alloc_domheap_page(d, MEMF_no_owner);
+    if ( !vvtd->regs_page )
+        goto out1;
+
+    vvtd->regs = __map_domain_page_global(vvtd->regs_page);
+    if ( !vvtd->regs )
+        goto out2;
+    clear_page(vvtd->regs);
+
+    vvtd_reset(vvtd, viommu->caps);
+    vvtd->base_addr = viommu->base_address;
+    vvtd->domain = d;
+
+    viommu->priv = vvtd;
+
+    return 0;
+
+ out2:
+    free_domheap_page(vvtd->regs_page);
+ out1:
+    xfree(vvtd);
+    return ret;
+}
+
+static int vvtd_destroy(struct viommu *viommu)
+{
+    struct vvtd *vvtd = viommu->priv;
+
+    if ( vvtd )
+    {
+        unmap_domain_page_global(vvtd->regs);
+        free_domheap_page(vvtd->regs_page);
+        xfree(vvtd);
+    }
+    return 0;
+}
+
+struct viommu_ops vvtd_hvm_vmx_ops = {
+    .create = vvtd_create,
+    .destroy = vvtd_destroy
+};
+
+static int vvtd_register(void)
+{
+    viommu_register_type(VIOMMU_TYPE_INTEL_VTD, &vvtd_hvm_vmx_ops);
+    return 0;
+}
+__initcall(vvtd_register);