@@ -35,6 +35,7 @@
#define VENDOR_ERR_MR_SMALL 0x208
#define THR_NAME_LEN 16
+#define THR_POLL_TO 5000
typedef struct BackendCtx {
uint64_t req_id;
@@ -91,35 +92,82 @@ static void *comp_handler_thread(void *arg)
int rc;
struct ibv_cq *ev_cq;
void *ev_ctx;
+ int flags;
+ GPollFD pfds[1];
+
+ /* Change to non-blocking mode */
+ flags = fcntl(backend_dev->channel->fd, F_GETFL);
+ rc = fcntl(backend_dev->channel->fd, F_SETFL, flags | O_NONBLOCK);
+ if (rc < 0) {
+ pr_dbg("Fail to change to non-blocking mode\n");
+ return NULL;
+ }
pr_dbg("Starting\n");
+ pfds[0].fd = backend_dev->channel->fd;
+ pfds[0].events = G_IO_IN | G_IO_HUP | G_IO_ERR;
+
+ backend_dev->comp_thread.is_running = true;
+
while (backend_dev->comp_thread.run) {
- pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel);
- rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx);
- pr_dbg("ibv_get_cq_event=%d\n", rc);
- if (unlikely(rc)) {
- pr_dbg("---> ibv_get_cq_event (%d)\n", rc);
- continue;
- }
+ do {
+ rc = qemu_poll_ns(pfds, 1, THR_POLL_TO * (int64_t)SCALE_MS);
+ } while (!rc && backend_dev->comp_thread.run);
+
+ if (backend_dev->comp_thread.run) {
+ pr_dbg("Waiting for completion on channel %p\n", backend_dev->channel);
+ rc = ibv_get_cq_event(backend_dev->channel, &ev_cq, &ev_ctx);
+ pr_dbg("ibv_get_cq_event=%d\n", rc);
+ if (unlikely(rc)) {
+ pr_dbg("---> ibv_get_cq_event (%d)\n", rc);
+ continue;
+ }
- rc = ibv_req_notify_cq(ev_cq, 0);
- if (unlikely(rc)) {
- pr_dbg("Error %d from ibv_req_notify_cq\n", rc);
- }
+ rc = ibv_req_notify_cq(ev_cq, 0);
+ if (unlikely(rc)) {
+ pr_dbg("Error %d from ibv_req_notify_cq\n", rc);
+ }
- poll_cq(backend_dev->rdma_dev_res, ev_cq);
+ poll_cq(backend_dev->rdma_dev_res, ev_cq);
- ibv_ack_cq_events(ev_cq, 1);
+ ibv_ack_cq_events(ev_cq, 1);
+ }
}
pr_dbg("Going down\n");
/* TODO: Post cqe for all remaining buffs that were posted */
+ backend_dev->comp_thread.is_running = false;
+
+ qemu_thread_exit(0);
+
return NULL;
}
+static void stop_comp_thread(RdmaBackendDev *backend_dev)
+{
+ backend_dev->comp_thread.run = false;
+ while (backend_dev->comp_thread.is_running) {
+ pr_dbg("Waiting for thread to complete\n");
+ sleep(THR_POLL_TO / SCALE_US / 2);
+ }
+}
+
+static void start_comp_thread(RdmaBackendDev *backend_dev)
+{
+ char thread_name[THR_NAME_LEN] = {0};
+
+ stop_comp_thread(backend_dev);
+
+ snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s",
+ ibv_get_device_name(backend_dev->ib_dev));
+ backend_dev->comp_thread.run = true;
+ qemu_thread_create(&backend_dev->comp_thread.thread, thread_name,
+ comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
+}
+
void rdma_backend_register_comp_handler(void (*handler)(int status,
unsigned int vendor_err, void *ctx))
{
@@ -706,7 +754,6 @@ int rdma_backend_init(RdmaBackendDev *backend_dev,
int i;
int ret = 0;
int num_ibv_devices;
- char thread_name[THR_NAME_LEN] = {0};
struct ibv_device **dev_list;
struct ibv_port_attr port_attr;
@@ -800,11 +847,8 @@ int rdma_backend_init(RdmaBackendDev *backend_dev,
pr_dbg("interface_id=0x%" PRIx64 "\n",
be64_to_cpu(backend_dev->gid.global.interface_id));
- snprintf(thread_name, sizeof(thread_name), "rdma_comp_%s",
- ibv_get_device_name(backend_dev->ib_dev));
- backend_dev->comp_thread.run = true;
- qemu_thread_create(&backend_dev->comp_thread.thread, thread_name,
- comp_handler_thread, backend_dev, QEMU_THREAD_DETACHED);
+ backend_dev->comp_thread.run = false;
+ backend_dev->comp_thread.is_running = false;
ah_cache_init();
@@ -823,8 +867,22 @@ out:
return ret;
}
+
+void rdma_backend_start(RdmaBackendDev *backend_dev)
+{
+ pr_dbg("Starting rdma_backend\n");
+ start_comp_thread(backend_dev);
+}
+
+void rdma_backend_stop(RdmaBackendDev *backend_dev)
+{
+ pr_dbg("Stopping rdma_backend\n");
+ stop_comp_thread(backend_dev);
+}
+
void rdma_backend_fini(RdmaBackendDev *backend_dev)
{
+ rdma_backend_stop(backend_dev);
g_hash_table_destroy(ah_hash);
ibv_destroy_comp_channel(backend_dev->channel);
ibv_close_device(backend_dev->context);
@@ -52,6 +52,8 @@ int rdma_backend_init(RdmaBackendDev *backend_dev,
uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr,
Error **errp);
void rdma_backend_fini(RdmaBackendDev *backend_dev);
+void rdma_backend_start(RdmaBackendDev *backend_dev);
+void rdma_backend_stop(RdmaBackendDev *backend_dev);
void rdma_backend_register_comp_handler(void (*handler)(int status,
unsigned int vendor_err, void *ctx));
void rdma_backend_unregister_comp_handler(void);
@@ -24,7 +24,8 @@ typedef struct RdmaDeviceResources RdmaDeviceResources;
typedef struct RdmaBackendThread {
QemuThread thread;
QemuMutex mutex;
- bool run;
+ bool run; /* Set by thread manager to let thread know it should exit */
+ bool is_running; /* Set by the thread to report its status */
} RdmaBackendThread;
typedef struct RdmaBackendDev {
@@ -286,8 +286,78 @@ static void init_ports(PVRDMADev *dev, Error **errp)
}
}
+static void uninit_msix(PCIDevice *pdev, int used_vectors)
+{
+ PVRDMADev *dev = PVRDMA_DEV(pdev);
+ int i;
+
+ for (i = 0; i < used_vectors; i++) {
+ msix_vector_unuse(pdev, i);
+ }
+
+ msix_uninit(pdev, &dev->msix, &dev->msix);
+}
+
+static int init_msix(PCIDevice *pdev, Error **errp)
+{
+ PVRDMADev *dev = PVRDMA_DEV(pdev);
+ int i;
+ int rc;
+
+ rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX,
+ RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX,
+ RDMA_MSIX_PBA, 0, NULL);
+
+ if (rc < 0) {
+ error_setg(errp, "Failed to initialize MSI-X");
+ return rc;
+ }
+
+ for (i = 0; i < RDMA_MAX_INTRS; i++) {
+ rc = msix_vector_use(PCI_DEVICE(dev), i);
+ if (rc < 0) {
+ error_setg(errp, "Fail mark MSI-X vector %d", i);
+ uninit_msix(pdev, i);
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+static void pvrdma_fini(PCIDevice *pdev)
+{
+ PVRDMADev *dev = PVRDMA_DEV(pdev);
+
+ pr_dbg("Closing device %s %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn),
+ PCI_FUNC(pdev->devfn));
+
+ pvrdma_qp_ops_fini();
+
+ rdma_rm_fini(&dev->rdma_dev_res);
+
+ rdma_backend_fini(&dev->backend_dev);
+
+ free_dsr(dev);
+
+ if (msix_enabled(pdev)) {
+ uninit_msix(pdev, RDMA_MAX_INTRS);
+ }
+}
+
+static void pvrdma_stop(PVRDMADev *dev)
+{
+ rdma_backend_stop(&dev->backend_dev);
+}
+
+static void pvrdma_start(PVRDMADev *dev)
+{
+ rdma_backend_start(&dev->backend_dev);
+}
+
static void activate_device(PVRDMADev *dev)
{
+ pvrdma_start(dev);
set_reg_val(dev, PVRDMA_REG_ERR, 0);
pr_dbg("Device activated\n");
}
@@ -300,7 +370,10 @@ static int unquiesce_device(PVRDMADev *dev)
static int reset_device(PVRDMADev *dev)
{
+ pvrdma_stop(dev);
+
pr_dbg("Device reset complete\n");
+
return 0;
}
@@ -469,45 +542,6 @@ static void init_regs(PCIDevice *pdev)
set_reg_val(dev, PVRDMA_REG_ERR, 0xFFFF);
}
-static void uninit_msix(PCIDevice *pdev, int used_vectors)
-{
- PVRDMADev *dev = PVRDMA_DEV(pdev);
- int i;
-
- for (i = 0; i < used_vectors; i++) {
- msix_vector_unuse(pdev, i);
- }
-
- msix_uninit(pdev, &dev->msix, &dev->msix);
-}
-
-static int init_msix(PCIDevice *pdev, Error **errp)
-{
- PVRDMADev *dev = PVRDMA_DEV(pdev);
- int i;
- int rc;
-
- rc = msix_init(pdev, RDMA_MAX_INTRS, &dev->msix, RDMA_MSIX_BAR_IDX,
- RDMA_MSIX_TABLE, &dev->msix, RDMA_MSIX_BAR_IDX,
- RDMA_MSIX_PBA, 0, NULL);
-
- if (rc < 0) {
- error_setg(errp, "Failed to initialize MSI-X");
- return rc;
- }
-
- for (i = 0; i < RDMA_MAX_INTRS; i++) {
- rc = msix_vector_use(PCI_DEVICE(dev), i);
- if (rc < 0) {
- error_setg(errp, "Fail mark MSI-X vercor %d", i);
- uninit_msix(pdev, i);
- return rc;
- }
- }
-
- return 0;
-}
-
static void init_dev_caps(PVRDMADev *dev)
{
size_t pg_tbl_bytes = TARGET_PAGE_SIZE *
@@ -602,22 +636,7 @@ out:
static void pvrdma_exit(PCIDevice *pdev)
{
- PVRDMADev *dev = PVRDMA_DEV(pdev);
-
- pr_dbg("Closing device %s %x.%x\n", pdev->name, PCI_SLOT(pdev->devfn),
- PCI_FUNC(pdev->devfn));
-
- pvrdma_qp_ops_fini();
-
- rdma_rm_fini(&dev->rdma_dev_res);
-
- rdma_backend_fini(&dev->backend_dev);
-
- free_dsr(dev);
-
- if (msix_enabled(pdev)) {
- uninit_msix(pdev, RDMA_MAX_INTRS);
- }
+ pvrdma_fini(pdev);
}
static void pvrdma_class_init(ObjectClass *klass, void *data)