@@ -43,6 +43,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector);
void msi_send_message(PCIDevice *dev, MSIMessage msg);
void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len);
unsigned int msi_nr_vectors_allocated(const PCIDevice *dev);
+void msi_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp);
static inline bool msi_present(const PCIDevice *dev)
{
@@ -36,6 +36,7 @@ void msix_clr_pending(PCIDevice *dev, int vector);
int msix_vector_use(PCIDevice *dev, unsigned vector);
void msix_vector_unuse(PCIDevice *dev, unsigned vector);
void msix_unuse_all_vectors(PCIDevice *dev);
+void msix_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp);
void msix_notify(PCIDevice *dev, unsigned vector);
@@ -16,6 +16,7 @@ extern bool pci_available;
#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
#define PCI_FUNC(devfn) ((devfn) & 0x07)
#define PCI_BUILD_BDF(bus, devfn) ((bus << 8) | (devfn))
+#define PCI_BDF_TO_DEVFN(x) ((x) & 0xff)
#define PCI_BUS_MAX 256
#define PCI_DEVFN_MAX 256
#define PCI_SLOT_MAX 32
@@ -127,6 +128,10 @@ typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
pcibus_t addr, pcibus_t size, int type);
typedef void PCIUnregisterFunc(PCIDevice *pci_dev);
+typedef void MSITriggerFunc(PCIDevice *dev, MSIMessage msg);
+typedef MSIMessage MSIPrepareMessageFunc(PCIDevice *dev, unsigned vector);
+typedef MSIMessage MSIxPrepareMessageFunc(PCIDevice *dev, unsigned vector);
+
typedef struct PCIIORegion {
pcibus_t addr; /* current PCI mapping address. -1 means not mapped */
#define PCI_BAR_UNMAPPED (~(pcibus_t)0)
@@ -329,6 +334,14 @@ struct PCIDevice {
/* Space to store MSIX table & pending bit array */
uint8_t *msix_table;
uint8_t *msix_pba;
+
+ /* May be used by INTx or MSI during interrupt notification */
+ void *irq_opaque;
+
+ MSITriggerFunc *msi_trigger;
+ MSIPrepareMessageFunc *msi_prepare_message;
+ MSIxPrepareMessageFunc *msix_prepare_message;
+
/* MemoryRegion container for msix exclusive BAR setup */
MemoryRegion msix_exclusive_bar;
/* Memory Regions for MSIX table and pending bit entries. */
new file mode 100644
@@ -0,0 +1,6 @@
+#ifndef VFIO_USER_OBJ_H
+#define VFIO_USER_OBJ_H
+
+void vfu_object_set_bus_irq(PCIBus *pci_bus);
+
+#endif
@@ -134,7 +134,7 @@ void msi_set_message(PCIDevice *dev, MSIMessage msg)
pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
}
-MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector)
+static MSIMessage msi_prepare_message(PCIDevice *dev, unsigned int vector)
{
uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
@@ -159,6 +159,11 @@ MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector)
return msg;
}
+MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector)
+{
+ return dev->msi_prepare_message(dev, vector);
+}
+
bool msi_enabled(const PCIDevice *dev)
{
return msi_present(dev) &&
@@ -241,6 +246,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors));
}
+ dev->msi_prepare_message = msi_prepare_message;
+
return 0;
}
@@ -256,6 +263,7 @@ void msi_uninit(struct PCIDevice *dev)
cap_size = msi_cap_sizeof(flags);
pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size);
dev->cap_present &= ~QEMU_PCI_CAP_MSI;
+ dev->msi_prepare_message = NULL;
MSI_DEV_PRINTF(dev, "uninit\n");
}
@@ -307,6 +315,39 @@ bool msi_is_masked(const PCIDevice *dev, unsigned int vector)
return mask & (1U << vector);
}
+void msi_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp)
+{
+ ERRP_GUARD();
+ uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
+ bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
+ uint32_t irq_state, vector_mask, pending;
+
+ if (vector > PCI_MSI_VECTORS_MAX) {
+ error_setg(errp, "msi: vector %d not allocated. max vector is %d",
+ vector, PCI_MSI_VECTORS_MAX);
+ return;
+ }
+
+ vector_mask = (1U << vector);
+
+ irq_state = pci_get_long(dev->config + msi_mask_off(dev, msi64bit));
+
+ if (mask) {
+ irq_state |= vector_mask;
+ } else {
+ irq_state &= ~vector_mask;
+ }
+
+ pci_set_long(dev->config + msi_mask_off(dev, msi64bit), irq_state);
+
+ pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit));
+ if (!mask && (pending & vector_mask)) {
+ pending &= ~vector_mask;
+ pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending);
+ msi_notify(dev, vector);
+ }
+}
+
void msi_notify(PCIDevice *dev, unsigned int vector)
{
uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
@@ -334,11 +375,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
void msi_send_message(PCIDevice *dev, MSIMessage msg)
{
- MemTxAttrs attrs = {};
-
- attrs.requester_id = pci_requester_id(dev);
- address_space_stl_le(&dev->bus_master_as, msg.address, msg.data,
- attrs, NULL);
+ dev->msi_trigger(dev, msg);
}
/* Normally called by pci_default_write_config(). */
@@ -31,7 +31,7 @@
#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
#define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
-MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
+static MSIMessage msix_prepare_message(PCIDevice *dev, unsigned vector)
{
uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
MSIMessage msg;
@@ -41,6 +41,11 @@ MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
return msg;
}
+MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
+{
+ return dev->msix_prepare_message(dev, vector);
+}
+
/*
* Special API for POWER to configure the vectors through
* a side channel. Should never be used by devices.
@@ -131,6 +136,31 @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
}
}
+void msix_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp)
+{
+ ERRP_GUARD();
+ unsigned offset;
+ bool was_masked;
+
+ if (vector > dev->msix_entries_nr) {
+ error_setg(errp, "msix: vector %d not allocated. max vector is %d",
+ vector, dev->msix_entries_nr);
+ return;
+ }
+
+ offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
+
+ was_masked = msix_is_masked(dev, vector);
+
+ if (mask) {
+ dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
+ } else {
+ dev->msix_table[offset] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
+ }
+
+ msix_handle_mask_update(dev, vector, was_masked);
+}
+
static bool msix_masked(PCIDevice *dev)
{
return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK;
@@ -344,6 +374,8 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
"msix-pba", pba_size);
memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
+ dev->msix_prepare_message = msix_prepare_message;
+
return 0;
}
@@ -429,6 +461,7 @@ void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
g_free(dev->msix_entry_used);
dev->msix_entry_used = NULL;
dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
+ dev->msix_prepare_message = NULL;
}
void msix_uninit_exclusive_bar(PCIDevice *dev)
@@ -317,6 +317,15 @@ void pci_device_deassert_intx(PCIDevice *dev)
}
}
+static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg)
+{
+ MemTxAttrs attrs = {};
+
+ attrs.requester_id = pci_requester_id(dev);
+ address_space_stl_le(&dev->bus_master_as, msg.address, msg.data,
+ attrs, NULL);
+}
+
static void pci_reset_regions(PCIDevice *dev)
{
int r;
@@ -1212,6 +1221,8 @@ static void pci_qdev_unrealize(DeviceState *dev)
pci_device_deassert_intx(pci_dev);
do_pci_unregister_device(pci_dev);
+
+ pci_dev->msi_trigger = NULL;
}
void pci_register_bar(PCIDevice *pci_dev, int region_num,
@@ -2251,6 +2262,8 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
}
pci_set_power(pci_dev, true);
+
+ pci_dev->msi_trigger = pci_msi_trigger;
}
PCIDevice *pci_new_multifunction(int devfn, bool multifunction,
@@ -23,6 +23,8 @@
#include "hw/remote/iommu.h"
#include "hw/qdev-core.h"
#include "hw/remote/iommu.h"
+#include "hw/remote/vfio-user-obj.h"
+#include "hw/pci/msi.h"
static void remote_machine_init(MachineState *machine)
{
@@ -54,12 +56,16 @@ static void remote_machine_init(MachineState *machine)
if (s->vfio_user) {
remote_iommu_setup(pci_host->bus);
- }
- remote_iohub_init(&s->iohub);
+ msi_nonbroken = true;
+
+ vfu_object_set_bus_irq(pci_host->bus);
+ } else {
+ remote_iohub_init(&s->iohub);
- pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq,
- &s->iohub, REMOTE_IOHUB_NB_PIRQS);
+ pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq,
+ &s->iohub, REMOTE_IOHUB_NB_PIRQS);
+ }
qbus_set_hotplug_handler(BUS(pci_host->bus), OBJECT(s));
}
@@ -53,6 +53,9 @@
#include "hw/pci/pci.h"
#include "qemu/timer.h"
#include "exec/memory.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
+#include "hw/remote/vfio-user-obj.h"
#define TYPE_VFU_OBJECT "x-vfio-user-server"
OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
@@ -96,6 +99,10 @@ struct VfuObject {
Error *unplug_blocker;
int vfu_poll_fd;
+
+ MSITriggerFunc *default_msi_trigger;
+ MSIPrepareMessageFunc *default_msi_prepare_message;
+ MSIxPrepareMessageFunc *default_msix_prepare_message;
};
static void vfu_object_init_ctx(VfuObject *o, Error **errp);
@@ -520,6 +527,155 @@ static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev)
}
}
+static int vfu_object_map_irq(PCIDevice *pci_dev, int intx)
+{
+ int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
+ pci_dev->devfn);
+
+ return pci_bdf;
+}
+
+static void vfu_object_set_irq(void *opaque, int pirq, int level)
+{
+ PCIBus *pci_bus = opaque;
+ PCIDevice *pci_dev = NULL;
+ vfu_ctx_t *vfu_ctx = NULL;
+ int pci_bus_num, devfn;
+
+ if (level) {
+ pci_bus_num = PCI_BUS_NUM(pirq);
+ devfn = PCI_BDF_TO_DEVFN(pirq);
+
+ /*
+ * pci_find_device() performs at O(1) if the device is attached
+ * to the root PCI bus. Whereas, if the device is attached to a
+ * secondary PCI bus (such as when a root port is involved),
+ * finding the parent PCI bus could take O(n)
+ */
+ pci_dev = pci_find_device(pci_bus, pci_bus_num, devfn);
+
+ vfu_ctx = pci_dev->irq_opaque;
+
+ g_assert(vfu_ctx);
+
+ vfu_irq_trigger(vfu_ctx, 0);
+ }
+}
+
+static MSIMessage vfu_object_msi_prepare_msg(PCIDevice *pci_dev,
+ unsigned int vector)
+{
+ MSIMessage msg;
+
+ msg.address = 0;
+ msg.data = vector;
+
+ return msg;
+}
+
+static void vfu_object_msi_trigger(PCIDevice *pci_dev, MSIMessage msg)
+{
+ vfu_ctx_t *vfu_ctx = pci_dev->irq_opaque;
+
+ vfu_irq_trigger(vfu_ctx, msg.data);
+}
+
+static void vfu_object_setup_msi_cbs(VfuObject *o)
+{
+ o->default_msi_trigger = o->pci_dev->msi_trigger;
+ o->default_msi_prepare_message = o->pci_dev->msi_prepare_message;
+ o->default_msix_prepare_message = o->pci_dev->msix_prepare_message;
+
+ o->pci_dev->msi_trigger = vfu_object_msi_trigger;
+ o->pci_dev->msi_prepare_message = vfu_object_msi_prepare_msg;
+ o->pci_dev->msix_prepare_message = vfu_object_msi_prepare_msg;
+}
+
+static void vfu_object_restore_msi_cbs(VfuObject *o)
+{
+ o->pci_dev->msi_trigger = o->default_msi_trigger;
+ o->pci_dev->msi_prepare_message = o->default_msi_prepare_message;
+ o->pci_dev->msix_prepare_message = o->default_msix_prepare_message;
+}
+
+static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
+ uint32_t count, bool mask)
+{
+ VfuObject *o = vfu_get_private(vfu_ctx);
+ Error *err = NULL;
+ uint32_t vector;
+
+ for (vector = start; vector < count; vector++) {
+ msix_set_mask(o->pci_dev, vector, mask, &err);
+ if (err) {
+ VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device,
+ error_get_pretty(err));
+ error_free(err);
+ err = NULL;
+ }
+ }
+}
+
+static void vfu_msi_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
+ uint32_t count, bool mask)
+{
+ VfuObject *o = vfu_get_private(vfu_ctx);
+ Error *err = NULL;
+ uint32_t vector;
+
+ for (vector = start; vector < count; vector++) {
+ msi_set_mask(o->pci_dev, vector, mask, &err);
+ if (err) {
+ VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device,
+ error_get_pretty(err));
+ error_free(err);
+ err = NULL;
+ }
+ }
+}
+
+static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev)
+{
+ vfu_ctx_t *vfu_ctx = o->vfu_ctx;
+ int ret;
+
+ ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (msix_nr_vectors_allocated(pci_dev)) {
+ ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ,
+ msix_nr_vectors_allocated(pci_dev));
+ vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSIX_IRQ,
+ &vfu_msix_irq_state);
+ } else if (msi_nr_vectors_allocated(pci_dev)) {
+ ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ,
+ msi_nr_vectors_allocated(pci_dev));
+ vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSI_IRQ,
+ &vfu_msi_irq_state);
+ }
+
+ if (ret < 0) {
+ return ret;
+ }
+
+ vfu_object_setup_msi_cbs(o);
+
+ pci_dev->irq_opaque = vfu_ctx;
+
+ return 0;
+}
+
+void vfu_object_set_bus_irq(PCIBus *pci_bus)
+{
+ int bus_num = pci_bus_num(pci_bus);
+ int max_bdf = PCI_BUILD_BDF(bus_num, PCI_DEVFN_MAX - 1);
+
+ pci_bus_irqs(pci_bus, vfu_object_set_irq, vfu_object_map_irq, pci_bus,
+ max_bdf);
+}
+
/*
* TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
* properties. It also depends on devices instantiated in QEMU. These
@@ -632,6 +788,13 @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp)
vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
+ ret = vfu_object_setup_irqs(o, o->pci_dev);
+ if (ret < 0) {
+ error_setg(errp, "vfu: Failed to setup interrupts for %s",
+ o->device);
+ goto fail;
+ }
+
ret = vfu_realize_ctx(o->vfu_ctx);
if (ret < 0) {
error_setg(errp, "vfu: Failed to realize device %s- %s",
@@ -657,6 +820,8 @@ fail:
o->unplug_blocker = NULL;
}
if (o->pci_dev) {
+ vfu_object_restore_msi_cbs(o);
+ o->pci_dev->irq_opaque = NULL;
object_unref(OBJECT(o->pci_dev));
o->pci_dev = NULL;
}
@@ -716,6 +881,8 @@ static void vfu_object_finalize(Object *obj)
}
if (o->pci_dev) {
+ vfu_object_restore_msi_cbs(o);
+ o->pci_dev->irq_opaque = NULL;
object_unref(OBJECT(o->pci_dev));
o->pci_dev = NULL;
}
new file mode 100644
@@ -0,0 +1,6 @@
+#include "qemu/osdep.h"
+#include "hw/remote/vfio-user-obj.h"
+
+void vfu_object_set_bus_irq(PCIBus *pci_bus)
+{
+}
@@ -3634,6 +3634,7 @@ F: hw/remote/iohub.c
F: include/hw/remote/iohub.h
F: subprojects/libvfio-user
F: hw/remote/vfio-user-obj.c
+F: include/hw/remote/vfio-user-obj.h
F: hw/remote/iommu.c
F: include/hw/remote/iommu.h
@@ -12,3 +12,4 @@ vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 0x%"PRIx64""
vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr 0x%"PRIx64" size 0x%"PRIx64""
vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR address 0x%"PRIx64""
vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR address 0x%"PRIx64""
+vfu_interrupt(int pirq) "vfu: sending interrupt to device - PIRQ %d"
@@ -60,3 +60,4 @@ if have_system
else
stub_ss.add(files('qdev.c'))
endif
+stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false: files('vfio-user-obj.c'))