diff mbox

[v2,22/30] xen/x86: support PVHv2 Dom0 BAR remapping

Message ID 1474991845-27962-23-git-send-email-roger.pau@citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Roger Pau Monné Sept. 27, 2016, 3:57 p.m. UTC
Add handlers to detect attemps from a PVHv2 Dom0 to change the position of
the PCI BARs and properly remap them.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
Cc: Paul Durrant <paul.durrant@citrix.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
---
 xen/arch/x86/hvm/io.c         |   2 +
 xen/drivers/passthrough/pci.c | 307 ++++++++++++++++++++++++++++++++++++++++++
 xen/include/asm-x86/hvm/io.h  |  19 +++
 xen/include/xen/pci.h         |   3 +
 4 files changed, 331 insertions(+)

Comments

Paul Durrant Oct. 3, 2016, 10:10 a.m. UTC | #1
> -----Original Message-----

> From: Roger Pau Monne [mailto:roger.pau@citrix.com]

> Sent: 27 September 2016 16:57

> To: xen-devel@lists.xenproject.org

> Cc: konrad.wilk@oracle.com; boris.ostrovsky@oracle.com; Roger Pau Monne

> <roger.pau@citrix.com>; Paul Durrant <Paul.Durrant@citrix.com>; Jan

> Beulich <jbeulich@suse.com>; Andrew Cooper

> <Andrew.Cooper3@citrix.com>

> Subject: [PATCH v2 22/30] xen/x86: support PVHv2 Dom0 BAR remapping

> 

> Add handlers to detect attemps from a PVHv2 Dom0 to change the position

> of the PCI BARs and properly remap them.

> 

> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>

> ---

> Cc: Paul Durrant <paul.durrant@citrix.com>

> Cc: Jan Beulich <jbeulich@suse.com>

> Cc: Andrew Cooper <andrew.cooper3@citrix.com>

> ---

>  xen/arch/x86/hvm/io.c         |   2 +

>  xen/drivers/passthrough/pci.c | 307

> ++++++++++++++++++++++++++++++++++++++++++

>  xen/include/asm-x86/hvm/io.h  |  19 +++

>  xen/include/xen/pci.h         |   3 +

>  4 files changed, 331 insertions(+)

> 

> diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c index

> 7de1de3..4db0266 100644

> --- a/xen/arch/x86/hvm/io.c

> +++ b/xen/arch/x86/hvm/io.c

> @@ -862,6 +862,8 @@ static int hvm_pt_add_register(struct hvm_pt_device

> *dev,  }

> 

>  static struct hvm_pt_handler_init *hwdom_pt_handlers[] = {

> +    &hvm_pt_bar_init,

> +    &hvm_pt_vf_bar_init,

>  };

> 

>  int hwdom_add_device(struct pci_dev *pdev) diff --git

> a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index

> 6d831dd..60c9e74 100644

> --- a/xen/drivers/passthrough/pci.c

> +++ b/xen/drivers/passthrough/pci.c

> @@ -633,6 +633,313 @@ static int pci_size_bar(unsigned int seg, unsigned

> int bus, unsigned int slot,

>      return 0;

>  }

> 

> +static bool bar_reg_is_vf(uint32_t real_offset, uint32_t

> +handler_offset) {

> +    if ( real_offset - handler_offset == PCI_SRIOV_BAR )

> +        return true;

> +    else

> +        return false;

> +}

> +


Return the bool expression rather than the if-then-else?

> +static int bar_reg_init(struct hvm_pt_device *s,

> +                        struct hvm_pt_reg_handler *handler,

> +                        uint32_t real_offset, uint32_t *data) {

> +    uint8_t seg, bus, slot, func;

> +    uint64_t addr, size;

> +    uint32_t val;

> +    unsigned int index = handler->offset / 4;

> +    bool vf = bar_reg_is_vf(real_offset, handler->offset);

> +    struct hvm_pt_bar *bars = (vf ? s->vf_bars : s->bars);

> +    int num_bars = (vf ? PCI_SRIOV_NUM_BARS : s->num_bars);

> +    int rc;

> +

> +    if ( index >= num_bars )

> +    {

> +        *data = HVM_PT_INVALID_REG;

> +        return 0;

> +    }

> +

> +    seg = s->pdev->seg;

> +    bus = s->pdev->bus;

> +    slot = PCI_SLOT(s->pdev->devfn);

> +    func = PCI_FUNC(s->pdev->devfn);

> +    val = pci_conf_read32(seg, bus, slot, func, real_offset);

> +

> +    if ( index > 0 && bars[index - 1].type == HVM_PT_BAR_MEM64_LO )

> +        bars[index].type = HVM_PT_BAR_MEM64_HI;

> +    else if ( (val & PCI_BASE_ADDRESS_SPACE) ==

> PCI_BASE_ADDRESS_SPACE_IO )

> +    {

> +        bars[index].type = HVM_PT_BAR_UNUSED;

> +    }

> +    else if ( (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==

> +              PCI_BASE_ADDRESS_MEM_TYPE_64 )

> +        bars[index].type = HVM_PT_BAR_MEM64_LO;

> +    else

> +        bars[index].type = HVM_PT_BAR_MEM32;

> +

> +    if ( bars[index].type == HVM_PT_BAR_MEM32 ||

> +         bars[index].type == HVM_PT_BAR_MEM64_LO )

> +    {

> +        /* Size the BAR and map it. */

> +        rc = pci_size_bar(seg, bus, slot, func, real_offset - handler->offset,

> +                          num_bars, &index, &addr, &size);

> +        if ( rc )

> +        {

> +            printk_pdev(s->pdev, XENLOG_ERR, "unable to size BAR#%d\n",

> +                        index);

> +            return rc;

> +        }

> +

> +        if ( size == 0 )

> +            bars[index].type = HVM_PT_BAR_UNUSED;

> +        else

> +        {

> +            printk_pdev(s->pdev, XENLOG_DEBUG,

> +                        "Mapping BAR#%u: %#lx size: %u\n", index, addr, size);

> +            rc = modify_mmio_11(s->pdev->domain, PFN_DOWN(addr),

> +                                DIV_ROUND_UP(size, PAGE_SIZE), true);

> +            if ( rc )

> +            {

> +                printk_pdev(s->pdev, XENLOG_ERR,

> +                            "failed to map BAR#%d into memory map: %d\n",

> +                            index, rc);

> +                return rc;

> +            }

> +        }

> +    }

> +

> +    *data = bars[index].type == HVM_PT_BAR_UNUSED ?

> HVM_PT_INVALID_REG : val;

> +    return 0;

> +}

> +

> +/* Only allow writes to check the size of the BARs */ static int

> +allow_bar_write(struct hvm_pt_bar *bar, struct hvm_pt_reg *reg,

> +                           struct pci_dev *pdev, uint32_t val) {

> +    uint32_t mask;

> +

> +    if ( bar->type == HVM_PT_BAR_MEM64_HI )

> +        mask = ~0;

> +    else

> +        mask = (uint32_t) PCI_BASE_ADDRESS_MEM_MASK;

> +

> +    if ( val != ~0 && (val & mask) != (reg->val.dword & mask) )

> +    {

> +        printk_pdev(pdev, XENLOG_ERR,

> +                "changing the position of the BARs is not yet supported: %#x\n",

> +                    val);


This doesn't seem to quite tally with commit comment. Can BARs be re-programmed or not?

> +        return -EINVAL;

> +    }

> +

> +    return 0;

> +}

> +

> +static int bar_reg_write(struct hvm_pt_device *s, struct hvm_pt_reg *reg,

> +                         uint32_t *val, uint32_t dev_value, uint32_t

> +valid_mask) {

> +    int index = reg->handler->offset / 4;

> +

> +    return allow_bar_write(&s->bars[index], reg, s->pdev, *val); }

> +

> +static int vf_bar_reg_write(struct hvm_pt_device *s, struct hvm_pt_reg

> *reg,

> +                            uint32_t *val, uint32_t dev_value,

> +                            uint32_t valid_mask) {

> +    int index = reg->handler->offset / 4;

> +

> +    return allow_bar_write(&s->vf_bars[index], reg, s->pdev, *val); }

> +

> +/* BAR regs static information table */ static struct

> +hvm_pt_reg_handler bar_handler[] = {

> +    /* BAR 0 reg */

> +    /* mask of BAR need to be decided later, depends on IO/MEM type */

> +    {

> +        .offset     = 0,

> +        .size       = 4,

> +        .init_val   = 0x00000000,

> +        .init       = bar_reg_init,

> +        .u.dw.write = bar_reg_write,

> +    },

> +    /* BAR 1 reg */

> +    {

> +        .offset     = 4,

> +        .size       = 4,

> +        .init_val   = 0x00000000,

> +        .init       = bar_reg_init,

> +        .u.dw.write = bar_reg_write,

> +    },

> +    /* BAR 2 reg */

> +    {

> +        .offset     = 8,

> +        .size       = 4,

> +        .init_val   = 0x00000000,

> +        .init       = bar_reg_init,

> +        .u.dw.write = bar_reg_write,

> +    },

> +    /* BAR 3 reg */

> +    {

> +        .offset     = 12,

> +        .size       = 4,

> +        .init_val   = 0x00000000,

> +        .init       = bar_reg_init,

> +        .u.dw.write = bar_reg_write,

> +    },

> +    /* BAR 4 reg */

> +    {

> +        .offset     = 16,

> +        .size       = 4,

> +        .init_val   = 0x00000000,

> +        .init       = bar_reg_init,

> +        .u.dw.write = bar_reg_write,

> +    },

> +    /* BAR 5 reg */

> +    {

> +        .offset     = 20,

> +        .size       = 4,

> +        .init_val   = 0x00000000,

> +        .init       = bar_reg_init,

> +        .u.dw.write = bar_reg_write,

> +    },

> +    /* TODO: we should also trap accesses to the expansion ROM base

> address. */

> +    /* End. */

> +    {

> +        .size = 0,

> +    },

> +};

> +

> +static int bar_group_init(struct hvm_pt_device *dev,

> +                          struct hvm_pt_reg_group *group) {

> +    uint8_t seg, bus, slot, func;

> +

> +    seg = dev->pdev->seg;

> +    bus = dev->pdev->bus;

> +    slot = PCI_SLOT(dev->pdev->devfn);

> +    func = PCI_FUNC(dev->pdev->devfn);

> +    dev->htype = pci_conf_read8(seg, bus, slot, func, PCI_HEADER_TYPE) &

> 0x7f;

> +    switch ( dev->htype )

> +    {

> +    case PCI_HEADER_TYPE_NORMAL:

> +        group->size = 36;

> +        dev->num_bars = 6;

> +        break;

> +    case PCI_HEADER_TYPE_BRIDGE:

> +        group->size = 44;

> +        dev->num_bars = 2;

> +        break;

> +    default:

> +        printk_pdev(dev->pdev, XENLOG_ERR, "device type %#x not

> supported\n",

> +                    dev->htype);

> +        return -EINVAL;

> +    }

> +    group->base_offset = PCI_BASE_ADDRESS_0;

> +

> +    return 0;

> +}

> +

> +struct hvm_pt_handler_init hvm_pt_bar_init = {

> +    .handlers = bar_handler,

> +    .init = bar_group_init,

> +};

> +

> +/* BAR regs static information table */ static struct

> +hvm_pt_reg_handler vf_bar_handler[] = {

> +    /* BAR 0 reg */

> +    /* mask of BAR need to be decided later, depends on IO/MEM type */

> +    {

> +        .offset     = 0,

> +        .size       = 4,

> +        .init_val   = 0x00000000,

> +        .init       = bar_reg_init,

> +        .u.dw.write = vf_bar_reg_write,

> +    },

> +    /* BAR 1 reg */

> +    {

> +        .offset     = 4,

> +        .size       = 4,

> +        .init_val   = 0x00000000,

> +        .init       = bar_reg_init,

> +        .u.dw.write = vf_bar_reg_write,

> +    },

> +    /* BAR 2 reg */

> +    {

> +        .offset     = 8,

> +        .size       = 4,

> +        .init_val   = 0x00000000,

> +        .init       = bar_reg_init,

> +        .u.dw.write = vf_bar_reg_write,

> +    },

> +    /* BAR 3 reg */

> +    {

> +        .offset     = 12,

> +        .size       = 4,

> +        .init_val   = 0x00000000,

> +        .init       = bar_reg_init,

> +        .u.dw.write = vf_bar_reg_write,

> +    },

> +    /* BAR 4 reg */

> +    {

> +        .offset     = 16,

> +        .size       = 4,

> +        .init_val   = 0x00000000,

> +        .init       = bar_reg_init,

> +        .u.dw.write = vf_bar_reg_write,

> +    },

> +    /* BAR 5 reg */

> +    {

> +        .offset     = 20,

> +        .size       = 4,

> +        .init_val   = 0x00000000,

> +        .init       = bar_reg_init,

> +        .u.dw.write = vf_bar_reg_write,

> +    },

> +    /* End. */

> +    {

> +        .size = 0,

> +    },

> +};

> +

> +static int vf_bar_group_init(struct hvm_pt_device *dev,

> +                          struct hvm_pt_reg_group *group) {

> +    uint8_t seg, bus, slot, func;

> +    struct pci_dev *pdev = dev->pdev;

> +    int sr_offset;

> +    uint16_t ctrl;

> +

> +    seg = dev->pdev->seg;

> +    bus = dev->pdev->bus;

> +    slot = PCI_SLOT(dev->pdev->devfn);

> +    func = PCI_FUNC(dev->pdev->devfn);

> +

> +    sr_offset = pci_find_ext_capability(seg, bus, pdev->devfn,

> +                                        PCI_EXT_CAP_ID_SRIOV);

> +    if ( sr_offset == 0 )

> +        return -EINVAL;

> +

> +    printk_pdev(pdev, XENLOG_DEBUG, "found SR-IOV capabilities\n");

> +    ctrl = pci_conf_read16(seg, bus, slot, func, sr_offset + PCI_SRIOV_CTRL);

> +    if ( (ctrl & (PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE)) )

> +    {

> +        printk_pdev(pdev, XENLOG_ERR,

> +                    "SR-IOV functions already enabled (%#04x)\n", ctrl);

> +        return -EINVAL;

> +    }

> +

> +    group->base_offset = sr_offset + PCI_SRIOV_BAR;

> +    group->size = PCI_SRIOV_NUM_BARS * 4;

> +

> +    return 0;

> +}

> +

> +struct hvm_pt_handler_init hvm_pt_vf_bar_init = {

> +    .handlers = vf_bar_handler,

> +    .init = vf_bar_group_init,

> +};

> +

>  int pci_add_device(u16 seg, u8 bus, u8 devfn,

>                     const struct pci_dev_info *info, nodeid_t node)  { diff --git

> a/xen/include/asm-x86/hvm/io.h b/xen/include/asm-x86/hvm/io.h index

> 80f830d..25af036 100644

> --- a/xen/include/asm-x86/hvm/io.h

> +++ b/xen/include/asm-x86/hvm/io.h

> @@ -19,6 +19,7 @@

>  #ifndef __ASM_X86_HVM_IO_H__

>  #define __ASM_X86_HVM_IO_H__

> 

> +#include <xen/pci_regs.h>

>  #include <asm/hvm/vpic.h>

>  #include <asm/hvm/vioapic.h>

>  #include <public/hvm/ioreq.h>

> @@ -275,6 +276,16 @@ struct hvm_pt_msi {

>      bool_t mapped;       /* when pirq is mapped */

>  };

> 

> +struct hvm_pt_bar {

> +    uint32_t val;

> +    enum bar_type {

> +        HVM_PT_BAR_UNUSED,

> +        HVM_PT_BAR_MEM32,

> +        HVM_PT_BAR_MEM64_LO,

> +        HVM_PT_BAR_MEM64_HI,

> +    } type;

> +};

> +

>  /*

>   * Guest passed-through PCI device.

>   */

> @@ -289,6 +300,14 @@ struct hvm_pt_device {

>      /* MSI status. */

>      struct hvm_pt_msi msi;

> 

> +    /* PCI header type. */

> +    uint8_t htype;

> +

> +    /* BAR tracking. */

> +    int num_bars;

> +    struct hvm_pt_bar bars[6];

> +    struct hvm_pt_bar vf_bars[PCI_SRIOV_NUM_BARS];

> +

>      struct list_head register_groups;

>  };

> 

> diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h index

> b21a891..51e0255 100644

> --- a/xen/include/xen/pci.h

> +++ b/xen/include/xen/pci.h

> @@ -174,4 +174,7 @@ int msixtbl_pt_register(struct domain *, struct pirq *,

> uint64_t gtable);  void msixtbl_pt_unregister(struct domain *, struct pirq *);

> void msixtbl_pt_cleanup(struct domain *d);

> 

> +extern struct hvm_pt_handler_init hvm_pt_bar_init; extern struct

> +hvm_pt_handler_init hvm_pt_vf_bar_init;

> +

>  #endif /* __XEN_PCI_H__ */

> --

> 2.7.4 (Apple Git-66)
Roger Pau Monné Oct. 6, 2016, 3:25 p.m. UTC | #2
On Mon, Oct 03, 2016 at 11:10:44AM +0100, Paul Durrant wrote:
> > -----Original Message-----
> > From: Roger Pau Monne [mailto:roger.pau@citrix.com]
> > Sent: 27 September 2016 16:57
> > To: xen-devel@lists.xenproject.org
> > Cc: konrad.wilk@oracle.com; boris.ostrovsky@oracle.com; Roger Pau Monne
> > <roger.pau@citrix.com>; Paul Durrant <Paul.Durrant@citrix.com>; Jan
> > Beulich <jbeulich@suse.com>; Andrew Cooper
> > <Andrew.Cooper3@citrix.com>
> > Subject: [PATCH v2 22/30] xen/x86: support PVHv2 Dom0 BAR remapping
> > 
> > Add handlers to detect attemps from a PVHv2 Dom0 to change the position
> > of the PCI BARs and properly remap them.
> > 
> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> > ---
> > Cc: Paul Durrant <paul.durrant@citrix.com>
> > Cc: Jan Beulich <jbeulich@suse.com>
> > Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> > ---
> >  xen/arch/x86/hvm/io.c         |   2 +
> >  xen/drivers/passthrough/pci.c | 307
> > ++++++++++++++++++++++++++++++++++++++++++
> >  xen/include/asm-x86/hvm/io.h  |  19 +++
> >  xen/include/xen/pci.h         |   3 +
> >  4 files changed, 331 insertions(+)
> > 
> > diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c index
> > 7de1de3..4db0266 100644
> > --- a/xen/arch/x86/hvm/io.c
> > +++ b/xen/arch/x86/hvm/io.c
> > @@ -862,6 +862,8 @@ static int hvm_pt_add_register(struct hvm_pt_device
> > *dev,  }
> > 
> >  static struct hvm_pt_handler_init *hwdom_pt_handlers[] = {
> > +    &hvm_pt_bar_init,
> > +    &hvm_pt_vf_bar_init,
> >  };
> > 
> >  int hwdom_add_device(struct pci_dev *pdev) diff --git
> > a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index
> > 6d831dd..60c9e74 100644
> > --- a/xen/drivers/passthrough/pci.c
> > +++ b/xen/drivers/passthrough/pci.c
> > @@ -633,6 +633,313 @@ static int pci_size_bar(unsigned int seg, unsigned
> > int bus, unsigned int slot,
> >      return 0;
> >  }
> > 
> > +static bool bar_reg_is_vf(uint32_t real_offset, uint32_t
> > +handler_offset) {
> > +    if ( real_offset - handler_offset == PCI_SRIOV_BAR )
> > +        return true;
> > +    else
> > +        return false;
> > +}
> > +
> 
> Return the bool expression rather than the if-then-else?

Done.
 
> > +static int bar_reg_init(struct hvm_pt_device *s,
> > +                        struct hvm_pt_reg_handler *handler,
> > +                        uint32_t real_offset, uint32_t *data) {
> > +    uint8_t seg, bus, slot, func;
> > +    uint64_t addr, size;
> > +    uint32_t val;
> > +    unsigned int index = handler->offset / 4;
> > +    bool vf = bar_reg_is_vf(real_offset, handler->offset);
> > +    struct hvm_pt_bar *bars = (vf ? s->vf_bars : s->bars);
> > +    int num_bars = (vf ? PCI_SRIOV_NUM_BARS : s->num_bars);
> > +    int rc;
> > +
> > +    if ( index >= num_bars )
> > +    {
> > +        *data = HVM_PT_INVALID_REG;
> > +        return 0;
> > +    }
> > +
> > +    seg = s->pdev->seg;
> > +    bus = s->pdev->bus;
> > +    slot = PCI_SLOT(s->pdev->devfn);
> > +    func = PCI_FUNC(s->pdev->devfn);
> > +    val = pci_conf_read32(seg, bus, slot, func, real_offset);
> > +
> > +    if ( index > 0 && bars[index - 1].type == HVM_PT_BAR_MEM64_LO )
> > +        bars[index].type = HVM_PT_BAR_MEM64_HI;
> > +    else if ( (val & PCI_BASE_ADDRESS_SPACE) ==
> > PCI_BASE_ADDRESS_SPACE_IO )
> > +    {
> > +        bars[index].type = HVM_PT_BAR_UNUSED;
> > +    }
> > +    else if ( (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
> > +              PCI_BASE_ADDRESS_MEM_TYPE_64 )
> > +        bars[index].type = HVM_PT_BAR_MEM64_LO;
> > +    else
> > +        bars[index].type = HVM_PT_BAR_MEM32;
> > +
> > +    if ( bars[index].type == HVM_PT_BAR_MEM32 ||
> > +         bars[index].type == HVM_PT_BAR_MEM64_LO )
> > +    {
> > +        /* Size the BAR and map it. */
> > +        rc = pci_size_bar(seg, bus, slot, func, real_offset - handler->offset,
> > +                          num_bars, &index, &addr, &size);
> > +        if ( rc )
> > +        {
> > +            printk_pdev(s->pdev, XENLOG_ERR, "unable to size BAR#%d\n",
> > +                        index);
> > +            return rc;
> > +        }
> > +
> > +        if ( size == 0 )
> > +            bars[index].type = HVM_PT_BAR_UNUSED;
> > +        else
> > +        {
> > +            printk_pdev(s->pdev, XENLOG_DEBUG,
> > +                        "Mapping BAR#%u: %#lx size: %u\n", index, addr, size);
> > +            rc = modify_mmio_11(s->pdev->domain, PFN_DOWN(addr),
> > +                                DIV_ROUND_UP(size, PAGE_SIZE), true);
> > +            if ( rc )
> > +            {
> > +                printk_pdev(s->pdev, XENLOG_ERR,
> > +                            "failed to map BAR#%d into memory map: %d\n",
> > +                            index, rc);
> > +                return rc;
> > +            }
> > +        }
> > +    }
> > +
> > +    *data = bars[index].type == HVM_PT_BAR_UNUSED ?
> > HVM_PT_INVALID_REG : val;
> > +    return 0;
> > +}
> > +
> > +/* Only allow writes to check the size of the BARs */ static int
> > +allow_bar_write(struct hvm_pt_bar *bar, struct hvm_pt_reg *reg,
> > +                           struct pci_dev *pdev, uint32_t val) {
> > +    uint32_t mask;
> > +
> > +    if ( bar->type == HVM_PT_BAR_MEM64_HI )
> > +        mask = ~0;
> > +    else
> > +        mask = (uint32_t) PCI_BASE_ADDRESS_MEM_MASK;
> > +
> > +    if ( val != ~0 && (val & mask) != (reg->val.dword & mask) )
> > +    {
> > +        printk_pdev(pdev, XENLOG_ERR,
> > +                "changing the position of the BARs is not yet supported: %#x\n",
> > +                    val);
> 
> This doesn't seem to quite tally with commit comment. Can BARs be re-programmed or not?

Right, I got messed up in the commit meesage. This _prevents_ remapping BARs 
(which would also not work anyway). Further code to allow remapping BARs can 
be based on top of this handlers.

Roger.
diff mbox

Patch

diff --git a/xen/arch/x86/hvm/io.c b/xen/arch/x86/hvm/io.c
index 7de1de3..4db0266 100644
--- a/xen/arch/x86/hvm/io.c
+++ b/xen/arch/x86/hvm/io.c
@@ -862,6 +862,8 @@  static int hvm_pt_add_register(struct hvm_pt_device *dev,
 }
 
 static struct hvm_pt_handler_init *hwdom_pt_handlers[] = {
+    &hvm_pt_bar_init,
+    &hvm_pt_vf_bar_init,
 };
 
 int hwdom_add_device(struct pci_dev *pdev)
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 6d831dd..60c9e74 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -633,6 +633,313 @@  static int pci_size_bar(unsigned int seg, unsigned int bus, unsigned int slot,
     return 0;
 }
 
+static bool bar_reg_is_vf(uint32_t real_offset, uint32_t handler_offset)
+{
+    if ( real_offset - handler_offset == PCI_SRIOV_BAR )
+        return true;
+    else
+        return false;
+}
+
+static int bar_reg_init(struct hvm_pt_device *s,
+                        struct hvm_pt_reg_handler *handler,
+                        uint32_t real_offset, uint32_t *data)
+{
+    uint8_t seg, bus, slot, func;
+    uint64_t addr, size;
+    uint32_t val;
+    unsigned int index = handler->offset / 4;
+    bool vf = bar_reg_is_vf(real_offset, handler->offset);
+    struct hvm_pt_bar *bars = (vf ? s->vf_bars : s->bars);
+    int num_bars = (vf ? PCI_SRIOV_NUM_BARS : s->num_bars);
+    int rc;
+
+    if ( index >= num_bars )
+    {
+        *data = HVM_PT_INVALID_REG;
+        return 0;
+    }
+
+    seg = s->pdev->seg;
+    bus = s->pdev->bus;
+    slot = PCI_SLOT(s->pdev->devfn);
+    func = PCI_FUNC(s->pdev->devfn);
+    val = pci_conf_read32(seg, bus, slot, func, real_offset);
+
+    if ( index > 0 && bars[index - 1].type == HVM_PT_BAR_MEM64_LO )
+        bars[index].type = HVM_PT_BAR_MEM64_HI;
+    else if ( (val & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO )
+    {
+        bars[index].type = HVM_PT_BAR_UNUSED;
+    }
+    else if ( (val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) ==
+              PCI_BASE_ADDRESS_MEM_TYPE_64 )
+        bars[index].type = HVM_PT_BAR_MEM64_LO;
+    else
+        bars[index].type = HVM_PT_BAR_MEM32;
+
+    if ( bars[index].type == HVM_PT_BAR_MEM32 ||
+         bars[index].type == HVM_PT_BAR_MEM64_LO )
+    {
+        /* Size the BAR and map it. */
+        rc = pci_size_bar(seg, bus, slot, func, real_offset - handler->offset,
+                          num_bars, &index, &addr, &size);
+        if ( rc )
+        {
+            printk_pdev(s->pdev, XENLOG_ERR, "unable to size BAR#%d\n",
+                        index);
+            return rc;
+        }
+
+        if ( size == 0 )
+            bars[index].type = HVM_PT_BAR_UNUSED;
+        else
+        {
+            printk_pdev(s->pdev, XENLOG_DEBUG,
+                        "Mapping BAR#%u: %#lx size: %u\n", index, addr, size);
+            rc = modify_mmio_11(s->pdev->domain, PFN_DOWN(addr),
+                                DIV_ROUND_UP(size, PAGE_SIZE), true);
+            if ( rc )
+            {
+                printk_pdev(s->pdev, XENLOG_ERR,
+                            "failed to map BAR#%d into memory map: %d\n",
+                            index, rc);
+                return rc;
+            }
+        }
+    }
+
+    *data = bars[index].type == HVM_PT_BAR_UNUSED ? HVM_PT_INVALID_REG : val;
+    return 0;
+}
+
+/* Only allow writes to check the size of the BARs */
+static int allow_bar_write(struct hvm_pt_bar *bar, struct hvm_pt_reg *reg,
+                           struct pci_dev *pdev, uint32_t val)
+{
+    uint32_t mask;
+
+    if ( bar->type == HVM_PT_BAR_MEM64_HI )
+        mask = ~0;
+    else
+        mask = (uint32_t) PCI_BASE_ADDRESS_MEM_MASK;
+
+    if ( val != ~0 && (val & mask) != (reg->val.dword & mask) )
+    {
+        printk_pdev(pdev, XENLOG_ERR,
+                "changing the position of the BARs is not yet supported: %#x\n",
+                    val);
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+static int bar_reg_write(struct hvm_pt_device *s, struct hvm_pt_reg *reg,
+                         uint32_t *val, uint32_t dev_value, uint32_t valid_mask)
+{
+    int index = reg->handler->offset / 4;
+
+    return allow_bar_write(&s->bars[index], reg, s->pdev, *val);
+}
+
+static int vf_bar_reg_write(struct hvm_pt_device *s, struct hvm_pt_reg *reg,
+                            uint32_t *val, uint32_t dev_value,
+                            uint32_t valid_mask)
+{
+    int index = reg->handler->offset / 4;
+
+    return allow_bar_write(&s->vf_bars[index], reg, s->pdev, *val);
+}
+
+/* BAR regs static information table */
+static struct hvm_pt_reg_handler bar_handler[] = {
+    /* BAR 0 reg */
+    /* mask of BAR need to be decided later, depends on IO/MEM type */
+    {
+        .offset     = 0,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = bar_reg_init,
+        .u.dw.write = bar_reg_write,
+    },
+    /* BAR 1 reg */
+    {
+        .offset     = 4,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = bar_reg_init,
+        .u.dw.write = bar_reg_write,
+    },
+    /* BAR 2 reg */
+    {
+        .offset     = 8,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = bar_reg_init,
+        .u.dw.write = bar_reg_write,
+    },
+    /* BAR 3 reg */
+    {
+        .offset     = 12,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = bar_reg_init,
+        .u.dw.write = bar_reg_write,
+    },
+    /* BAR 4 reg */
+    {
+        .offset     = 16,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = bar_reg_init,
+        .u.dw.write = bar_reg_write,
+    },
+    /* BAR 5 reg */
+    {
+        .offset     = 20,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = bar_reg_init,
+        .u.dw.write = bar_reg_write,
+    },
+    /* TODO: we should also trap accesses to the expansion ROM base address. */
+    /* End. */
+    {
+        .size = 0,
+    },
+};
+
+static int bar_group_init(struct hvm_pt_device *dev,
+                          struct hvm_pt_reg_group *group)
+{
+    uint8_t seg, bus, slot, func;
+
+    seg = dev->pdev->seg;
+    bus = dev->pdev->bus;
+    slot = PCI_SLOT(dev->pdev->devfn);
+    func = PCI_FUNC(dev->pdev->devfn);
+    dev->htype = pci_conf_read8(seg, bus, slot, func, PCI_HEADER_TYPE) & 0x7f;
+    switch ( dev->htype )
+    {
+    case PCI_HEADER_TYPE_NORMAL:
+        group->size = 36;
+        dev->num_bars = 6;
+        break;
+    case PCI_HEADER_TYPE_BRIDGE:
+        group->size = 44;
+        dev->num_bars = 2;
+        break;
+    default:
+        printk_pdev(dev->pdev, XENLOG_ERR, "device type %#x not supported\n",
+                    dev->htype);
+        return -EINVAL;
+    }
+    group->base_offset = PCI_BASE_ADDRESS_0;
+
+    return 0;
+}
+
+struct hvm_pt_handler_init hvm_pt_bar_init = {
+    .handlers = bar_handler,
+    .init = bar_group_init,
+};
+
+/* BAR regs static information table */
+static struct hvm_pt_reg_handler vf_bar_handler[] = {
+    /* BAR 0 reg */
+    /* mask of BAR need to be decided later, depends on IO/MEM type */
+    {
+        .offset     = 0,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = bar_reg_init,
+        .u.dw.write = vf_bar_reg_write,
+    },
+    /* BAR 1 reg */
+    {
+        .offset     = 4,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = bar_reg_init,
+        .u.dw.write = vf_bar_reg_write,
+    },
+    /* BAR 2 reg */
+    {
+        .offset     = 8,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = bar_reg_init,
+        .u.dw.write = vf_bar_reg_write,
+    },
+    /* BAR 3 reg */
+    {
+        .offset     = 12,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = bar_reg_init,
+        .u.dw.write = vf_bar_reg_write,
+    },
+    /* BAR 4 reg */
+    {
+        .offset     = 16,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = bar_reg_init,
+        .u.dw.write = vf_bar_reg_write,
+    },
+    /* BAR 5 reg */
+    {
+        .offset     = 20,
+        .size       = 4,
+        .init_val   = 0x00000000,
+        .init       = bar_reg_init,
+        .u.dw.write = vf_bar_reg_write,
+    },
+    /* End. */
+    {
+        .size = 0,
+    },
+};
+
+static int vf_bar_group_init(struct hvm_pt_device *dev,
+                          struct hvm_pt_reg_group *group)
+{
+    uint8_t seg, bus, slot, func;
+    struct pci_dev *pdev = dev->pdev;
+    int sr_offset;
+    uint16_t ctrl;
+
+    seg = dev->pdev->seg;
+    bus = dev->pdev->bus;
+    slot = PCI_SLOT(dev->pdev->devfn);
+    func = PCI_FUNC(dev->pdev->devfn);
+
+    sr_offset = pci_find_ext_capability(seg, bus, pdev->devfn,
+                                        PCI_EXT_CAP_ID_SRIOV);
+    if ( sr_offset == 0 )
+        return -EINVAL;
+
+    printk_pdev(pdev, XENLOG_DEBUG, "found SR-IOV capabilities\n");
+    ctrl = pci_conf_read16(seg, bus, slot, func, sr_offset + PCI_SRIOV_CTRL);
+    if ( (ctrl & (PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE)) )
+    {
+        printk_pdev(pdev, XENLOG_ERR,
+                    "SR-IOV functions already enabled (%#04x)\n", ctrl);
+        return -EINVAL;
+    }
+
+    group->base_offset = sr_offset + PCI_SRIOV_BAR;
+    group->size = PCI_SRIOV_NUM_BARS * 4;
+
+    return 0;
+}
+
+struct hvm_pt_handler_init hvm_pt_vf_bar_init = {
+    .handlers = vf_bar_handler,
+    .init = vf_bar_group_init,
+};
+
 int pci_add_device(u16 seg, u8 bus, u8 devfn,
                    const struct pci_dev_info *info, nodeid_t node)
 {
diff --git a/xen/include/asm-x86/hvm/io.h b/xen/include/asm-x86/hvm/io.h
index 80f830d..25af036 100644
--- a/xen/include/asm-x86/hvm/io.h
+++ b/xen/include/asm-x86/hvm/io.h
@@ -19,6 +19,7 @@ 
 #ifndef __ASM_X86_HVM_IO_H__
 #define __ASM_X86_HVM_IO_H__
 
+#include <xen/pci_regs.h>
 #include <asm/hvm/vpic.h>
 #include <asm/hvm/vioapic.h>
 #include <public/hvm/ioreq.h>
@@ -275,6 +276,16 @@  struct hvm_pt_msi {
     bool_t mapped;       /* when pirq is mapped */
 };
 
+struct hvm_pt_bar {
+    uint32_t val;
+    enum bar_type {
+        HVM_PT_BAR_UNUSED,
+        HVM_PT_BAR_MEM32,
+        HVM_PT_BAR_MEM64_LO,
+        HVM_PT_BAR_MEM64_HI,
+    } type;
+};
+
 /*
  * Guest passed-through PCI device.
  */
@@ -289,6 +300,14 @@  struct hvm_pt_device {
     /* MSI status. */
     struct hvm_pt_msi msi;
 
+    /* PCI header type. */
+    uint8_t htype;
+
+    /* BAR tracking. */
+    int num_bars;
+    struct hvm_pt_bar bars[6];
+    struct hvm_pt_bar vf_bars[PCI_SRIOV_NUM_BARS];
+
     struct list_head register_groups;
 };
 
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index b21a891..51e0255 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -174,4 +174,7 @@  int msixtbl_pt_register(struct domain *, struct pirq *, uint64_t gtable);
 void msixtbl_pt_unregister(struct domain *, struct pirq *);
 void msixtbl_pt_cleanup(struct domain *d);
 
+extern struct hvm_pt_handler_init hvm_pt_bar_init;
+extern struct hvm_pt_handler_init hvm_pt_vf_bar_init;
+
 #endif /* __XEN_PCI_H__ */