diff mbox

[2/2] vus: Introduce a vhost-user-scsi sample application

Message ID 1477495586-5508-3-git-send-email-felipe@nutanix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Felipe Franciosi Oct. 26, 2016, 3:26 p.m. UTC
This commit introduces a vhost-user-scsi backend sample application. It
must be linked with libiscsi and libvhost-user.

To use it, compile with:
make tests/vhost-user-scsi

And run as follows:
tests/vhost-user-scsi -u /tmp/vus.sock -i iscsi://uri_to_target/

The application is currently limited at one LUN only and it processes
requests synchronously (therefore only achieving QD1). The purpose of
the code is to show how a backend can be implemented and to test the
vhost-user-scsi Qemu implementation.

If a different instance of this vhost-user-scsi application is executed
at a remote host, a VM can be live migrated to such a host.

Signed-off-by: Felipe Franciosi <felipe@nutanix.com>
---
 tests/Makefile.include  |   2 +
 tests/vhost-user-scsi.c | 862 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 864 insertions(+)
 create mode 100644 tests/vhost-user-scsi.c

Comments

Paolo Bonzini Oct. 27, 2016, 12:16 p.m. UTC | #1
On 26/10/2016 17:26, Felipe Franciosi wrote:
> This commit introduces a vhost-user-scsi backend sample application. It
> must be linked with libiscsi and libvhost-user.
> 
> To use it, compile with:
> make tests/vhost-user-scsi
> 
> And run as follows:
> tests/vhost-user-scsi -u /tmp/vus.sock -i iscsi://uri_to_target/
> 
> The application is currently limited at one LUN only and it processes
> requests synchronously (therefore only achieving QD1). The purpose of
> the code is to show how a backend can be implemented and to test the
> vhost-user-scsi Qemu implementation.
> 
> If a different instance of this vhost-user-scsi application is executed
> at a remote host, a VM can be live migrated to such a host.

Hi,

the right directory for this is contrib/.

Is it possible to use GSource and GIOChannel instead for the event loop?
 There is some dead code (for example cb2 as far as I can see) and
having the millionth implementation of an event loop distracts from the
meat of the code. :)

Thanks,

Paolo

> Signed-off-by: Felipe Franciosi <felipe@nutanix.com>
> ---
>  tests/Makefile.include  |   2 +
>  tests/vhost-user-scsi.c | 862 ++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 864 insertions(+)
>  create mode 100644 tests/vhost-user-scsi.c
> 
> diff --git a/tests/Makefile.include b/tests/Makefile.include
> index 7e6fd23..e61fe54 100644
> --- a/tests/Makefile.include
> +++ b/tests/Makefile.include
> @@ -685,6 +685,8 @@ tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-ob
>  tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y)
>  tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y)
>  tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o contrib/libvhost-user/libvhost-user.o $(test-util-obj-y)
> +tests/vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS)
> +tests/vhost-user-scsi$(EXESUF): tests/vhost-user-scsi.o contrib/libvhost-user/libvhost-user.o $(test-util-obj-y) $(test-block-obj-y)
>  tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y)
>  tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o
>  
> diff --git a/tests/vhost-user-scsi.c b/tests/vhost-user-scsi.c
> new file mode 100644
> index 0000000..c92b3b2
> --- /dev/null
> +++ b/tests/vhost-user-scsi.c
> @@ -0,0 +1,862 @@
> +/*
> + * vhost-user-scsi sample application
> + *
> + * Copyright (c) 2016 Nutanix Inc. All rights reserved.
> + *
> + * Author:
> + *  Felipe Franciosi <felipe@nutanix.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 only.
> + * See the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "contrib/libvhost-user/libvhost-user.h"
> +#include "hw/virtio/virtio-scsi.h"
> +#include "iscsi/iscsi.h"
> +
> +#include <poll.h>
> +
> +#define VHOST_USER_SCSI_DEBUG 1
> +
> +/** Log helpers **/
> +
> +#define PPRE                                                          \
> +    struct timespec ts;                                               \
> +    char   timebuf[64];                                               \
> +    struct tm tm;                                                     \
> +    (void)clock_gettime(CLOCK_REALTIME, &ts);                         \
> +    (void)strftime(timebuf, 64, "%Y%m%d %T", gmtime_r(&ts.tv_sec, &tm))
> +
> +#define PEXT(lvl, msg, ...) do {                                      \
> +    PPRE;                                                             \
> +    fprintf(stderr, "%s.%06ld " lvl ": %s:%s():%d: " msg "\n",        \
> +            timebuf, ts.tv_nsec/1000,                                 \
> +            __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__);        \
> +} while(0)
> +
> +#define PNOR(lvl, msg, ...) do {                                      \
> +    PPRE;                                                             \
> +    fprintf(stderr, "%s.%06ld " lvl ": " msg "\n",                    \
> +            timebuf, ts.tv_nsec/1000, ## __VA_ARGS__);                \
> +} while(0);
> +
> +#ifdef VHOST_USER_SCSI_DEBUG
> +#define PDBG(msg, ...) PEXT("DBG", msg, ## __VA_ARGS__)
> +#define PERR(msg, ...) PEXT("ERR", msg, ## __VA_ARGS__)
> +#define PLOG(msg, ...) PEXT("LOG", msg, ## __VA_ARGS__)
> +#else
> +#define PDBG(msg, ...) { }
> +#define PERR(msg, ...) PNOR("ERR", msg, ## __VA_ARGS__)
> +#define PLOG(msg, ...) PNOR("LOG", msg, ## __VA_ARGS__)
> +#endif
> +
> +/** vhost-user-scsi specific definitions **/
> +
> +/* TODO: MAX is defined at 8, should be 1024 */
> +#define VUS_SCHED_MAX_FDS (1 + (2*VHOST_MAX_NR_VIRTQUEUE))
> +
> +#define VDEV_SCSI_MAX_LUNS 1 // Only 1 lun supported today
> +#define VDEV_SCSI_MAX_DEVS 1 // Only 1 devices supported today
> +
> +#define ISCSI_INITIATOR "iqn.2016-10.com.nutanix:vhost-user-scsi"
> +
> +typedef void (*misc_cb) (short evt, void *pvt);
> +
> +typedef struct sched_data {
> +    vu_watch_cb cb1;
> +    misc_cb cb2;
> +    void *pvt;
> +    short evt;
> +} sched_data_t;
> +
> +typedef struct sched {
> +    VuDev *vu_dev;
> +    nfds_t nfds;
> +    struct pollfd fds[VUS_SCHED_MAX_FDS];
> +    sched_data_t data[VUS_SCHED_MAX_FDS];
> +    int quit;
> +} sched_t;
> +
> +typedef struct iscsi_lun {
> +    struct iscsi_context *iscsi_ctx;
> +    int iscsi_lun;
> +} iscsi_lun_t;
> +
> +typedef struct vhost_scsi_dev {
> +    VuDev vu_dev;
> +    int server_sock;
> +    sched_t sched;
> +    iscsi_lun_t luns[VDEV_SCSI_MAX_LUNS];
> +} vhost_scsi_dev_t;
> +
> +static vhost_scsi_dev_t *vhost_scsi_devs[VDEV_SCSI_MAX_DEVS];
> +
> +static vhost_scsi_dev_t *vdev_scsi_find_by_vu(VuDev *vu_dev);
> +
> +/** poll-based scheduler for libvhost-user and misc callbacks **/
> +
> +static int sched_add(sched_t *sched, int fd, short int evt,
> +                     vu_watch_cb cb1, misc_cb cb2, void *pvt) {
> +    int i;
> +
> +    assert(sched);
> +    assert(fd >= 0);
> +    assert(evt);
> +    assert(cb1 || cb2);
> +    assert(!(cb1 && cb2)); // only one of the cbs should be used
> +
> +    for (i=0; i<sched->nfds && i<VUS_SCHED_MAX_FDS; i++) {
> +        if (sched->fds[i].fd == fd) {
> +            break;
> +        }
> +    }
> +    if (i == VUS_SCHED_MAX_FDS) {
> +        PERR("Error adding fd: max number of fds reached");
> +        return -1;
> +    }
> +
> +    sched->fds[i].fd = fd;
> +    sched->fds[i].events = evt;
> +    sched->data[i].cb1 = cb1;
> +    sched->data[i].cb2 = cb2;
> +    sched->data[i].pvt = pvt;
> +    sched->data[i].evt = evt;
> +
> +    if (sched->nfds <= i) {
> +        sched->nfds = i+1;
> +    }
> +
> +    PDBG("sched@%p: add fd %d to slot %i", sched, fd, i);
> +
> +    return 0;
> +}
> +
> +static int sched_del(sched_t *sched, int fd) {
> +    int i;
> +
> +    assert(sched);
> +    assert(fd >= 0);
> +
> +    for (i=0; i<sched->nfds; i++) {
> +        if (sched->fds[i].fd == fd) {
> +            break;
> +        }
> +    }
> +    if (sched->nfds == i) {
> +#ifdef VUS_PEDANTIC_SCHEDULER
> +        PERR("Error deleting fd %d: fd not found", fd);
> +        return -1;
> +#else
> +        return 0;
> +#endif
> +    }
> +
> +    sched->nfds--;
> +    if (sched->nfds > 0) {
> +        // Overwrite deleted entry with last entry from scheduler
> +        memcpy(&sched->fds[i],  &sched->fds[sched->nfds],
> +               sizeof(struct pollfd));
> +        memcpy(&sched->data[i], &sched->data[sched->nfds],
> +               sizeof(sched_data_t));
> +    }
> +    memset(&sched->fds[sched->nfds], 0, sizeof(struct pollfd));
> +    memset(&sched->data[sched->nfds], 0, sizeof(sched_data_t));
> +
> +    PDBG("sched@%p: del fd %d from slot %i", sched, fd, i);
> +
> +    return 0;
> +}
> +
> +static int sched_loop(sched_t *sched) {
> +    int i, n;
> +
> +    assert(sched);
> +    assert(sched->nfds > 0);
> +
> +    while (!sched->quit) {
> +        n = poll(sched->fds, sched->nfds, -1);
> +        if (n < 0) {
> +            PERR("Error polling: %s", strerror(errno));
> +            return -1;
> +        }
> +
> +        for (i=0; i<sched->nfds && n; i++) {
> +            if (sched->fds[i].revents != 0) {
> +
> +                if (sched->data[i].cb1) {
> +                    int vu_evt = 0;
> +
> +                    if (sched->fds[i].revents & POLLIN)  vu_evt |= VU_WATCH_IN;
> +                    if (sched->fds[i].revents & POLLOUT) vu_evt |= VU_WATCH_OUT;
> +                    if (sched->fds[i].revents & POLLPRI) vu_evt |= VU_WATCH_PRI;
> +                    if (sched->fds[i].revents & POLLERR) vu_evt |= VU_WATCH_ERR;
> +                    if (sched->fds[i].revents & POLLHUP) vu_evt |= VU_WATCH_HUP;
> +
> +                    PDBG("sched@%p: fd[%d] (%d): cb1(%p, %d, %p)", sched, i,
> +                         sched->fds[i].fd, sched->vu_dev, vu_evt,
> +                         sched->data[i].pvt);
> +
> +                    sched->data[i].cb1(sched->vu_dev, vu_evt,
> +                                       sched->data[i].pvt);
> +                } else {
> +                    PDBG("sched@%p: fd[%d] (%d): cbb(%hd, %p)", sched, i,
> +                         sched->fds[i].fd, sched->fds[i].revents,
> +                         sched->data[i].pvt);
> +
> +                    sched->data[i].cb2(sched->fds[i].revents,
> +                                       sched->data[i].pvt);
> +                }
> +
> +                n--;
> +            }
> +        }
> +    }
> +
> +    return 0;
> +}
> +
> +/** from libiscsi's scsi-lowlevel.h **/
> +
> +#define SCSI_CDB_MAX_SIZE           16
> +
> +struct scsi_iovector {
> +    struct scsi_iovec *iov;
> +    int niov;
> +    int nalloc;
> +    size_t offset;
> +    int consumed;
> +};
> +
> +struct scsi_allocated_memory {
> +    struct scsi_allocated_memory *next;
> +    char buf[0];
> +};
> +
> +struct scsi_data {
> +    int            size;
> +    unsigned char *data;
> +};
> +
> +enum scsi_sense_key {
> +    SCSI_SENSE_NO_SENSE            = 0x00,
> +    SCSI_SENSE_RECOVERED_ERROR     = 0x01,
> +    SCSI_SENSE_NOT_READY           = 0x02,
> +    SCSI_SENSE_MEDIUM_ERROR        = 0x03,
> +    SCSI_SENSE_HARDWARE_ERROR      = 0x04,
> +    SCSI_SENSE_ILLEGAL_REQUEST     = 0x05,
> +    SCSI_SENSE_UNIT_ATTENTION      = 0x06,
> +    SCSI_SENSE_DATA_PROTECTION     = 0x07,
> +    SCSI_SENSE_BLANK_CHECK         = 0x08,
> +    SCSI_SENSE_VENDOR_SPECIFIC     = 0x09,
> +    SCSI_SENSE_COPY_ABORTED        = 0x0a,
> +    SCSI_SENSE_COMMAND_ABORTED     = 0x0b,
> +    SCSI_SENSE_OBSOLETE_ERROR_CODE = 0x0c,
> +    SCSI_SENSE_OVERFLOW_COMMAND    = 0x0d,
> +    SCSI_SENSE_MISCOMPARE          = 0x0e
> +};
> +
> +struct scsi_sense {
> +    unsigned char       error_type;
> +    enum scsi_sense_key key;
> +    int                 ascq;
> +    unsigned            sense_specific:1;
> +    unsigned            ill_param_in_cdb:1;
> +    unsigned            bit_pointer_valid:1;
> +    unsigned char       bit_pointer;
> +    uint16_t            field_pointer;
> +};
> +
> +enum scsi_residual {
> +    SCSI_RESIDUAL_NO_RESIDUAL = 0,
> +    SCSI_RESIDUAL_UNDERFLOW,
> +    SCSI_RESIDUAL_OVERFLOW
> +};
> +
> +struct scsi_task {
> +    int status;
> +    int cdb_size;
> +    int xfer_dir;
> +    int expxferlen;
> +    unsigned char cdb[SCSI_CDB_MAX_SIZE];
> +    enum scsi_residual residual_status;
> +    size_t residual;
> +    struct scsi_sense sense;
> +    struct scsi_data datain;
> +    struct scsi_allocated_memory *mem;
> +    void *ptr;
> +
> +    uint32_t itt;
> +    uint32_t cmdsn;
> +    uint32_t lun;
> +
> +    struct scsi_iovector iovector_in;
> +    struct scsi_iovector iovector_out;
> +};
> +
> +/** libiscsi integration **/
> +
> +static int iscsi_add_lun(iscsi_lun_t *lun, char *iscsi_uri) {
> +    struct iscsi_url *iscsi_url;
> +    struct iscsi_context *iscsi_ctx;
> +    int ret = 0;
> +
> +    assert(lun);
> +
> +    iscsi_ctx = iscsi_create_context(ISCSI_INITIATOR);
> +    if (!iscsi_ctx) {
> +        PERR("Unable to create iSCSI context");
> +        return -1;
> +    }
> +
> +    iscsi_url = iscsi_parse_full_url(iscsi_ctx, iscsi_uri);
> +    if (!iscsi_url) {
> +        PERR("Unable to parse iSCSI URL: %s", iscsi_get_error(iscsi_ctx));
> +        goto fail;
> +    }
> +
> +    iscsi_set_session_type(iscsi_ctx, ISCSI_SESSION_NORMAL);
> +    iscsi_set_header_digest(iscsi_ctx, ISCSI_HEADER_DIGEST_NONE_CRC32C);
> +    if (iscsi_full_connect_sync(iscsi_ctx, iscsi_url->portal, iscsi_url->lun)) {
> +        PERR("Unable to login to iSCSI portal: %s", iscsi_get_error(iscsi_ctx));
> +        goto fail;
> +    }
> +
> +    lun->iscsi_ctx = iscsi_ctx;
> +    lun->iscsi_lun = iscsi_url->lun;
> +
> +    PDBG("Context %p created for lun 0: %s", iscsi_ctx, iscsi_uri);
> +
> +out:
> +    if (iscsi_url) {
> +        iscsi_destroy_url(iscsi_url);
> +    }
> +    return ret;
> +
> +fail:
> +    (void)iscsi_destroy_context(iscsi_ctx);
> +    ret = -1;
> +    goto out;
> +}
> +
> +static struct scsi_task *scsi_task_new(int cdb_len, uint8_t *cdb, int dir,
> +                                       int xfer_len) {
> +    struct scsi_task *task;
> +
> +    assert(cdb_len > 0);
> +    assert(cdb);
> +
> +    task = calloc(1, sizeof(struct scsi_task));
> +    if (!task) {
> +        PERR("Error allocating task: %s", strerror(errno));
> +        return NULL;
> +    }
> +
> +    memcpy(task->cdb, cdb, cdb_len);
> +    task->cdb_size = cdb_len;
> +    task->xfer_dir = dir;
> +    task->expxferlen = xfer_len;
> +
> +    return task;
> +}
> +
> +static int get_cdb_len(uint8_t *cdb) {
> +    switch(cdb[0] >> 5){
> +    case 0:
> +        return 6;
> +    case 1:
> +    case 2:
> +        return 10;
> +    case 4:
> +        return 16;
> +    case 5:
> +        return 12;
> +    }
> +    PERR("Unable to determine cdb len (0x%02hhX)", cdb[0]>>5);
> +    return -1;
> +}
> +
> +static int handle_cmd_sync(struct iscsi_context *ctx, 
> +                           VirtIOSCSICmdReq *req,
> +                           struct iovec *out, unsigned int out_len,
> +                           VirtIOSCSICmdResp *rsp,
> +                           struct iovec *in, unsigned int in_len) {
> +    struct scsi_task *task;
> +    uint32_t dir;
> +    uint32_t len;
> +    int cdb_len;
> +    int i;
> +
> +    if (!((!req->lun[1]) && (req->lun[2] == 0x40) && (!req->lun[3]))) {
> +        // Ignore anything different than target=0, lun=0
> +        PDBG("Ignoring unconnected lun (0x%hhX, 0x%hhX)",
> +             req->lun[1], req->lun[3]);
> +        rsp->status = SCSI_STATUS_CHECK_CONDITION;
> +        memset(rsp->sense, 0, sizeof(rsp->sense));
> +        rsp->sense_len = 18;
> +        rsp->sense[0] = 0x70;
> +        rsp->sense[2] = 0x05; // ILLEGAL_REQUEST
> +        rsp->sense[7] = 10;
> +        rsp->sense[12] = 0x24;
> +
> +        return 0;
> +    }
> +
> +    cdb_len = get_cdb_len(req->cdb);
> +    if (cdb_len == -1) {
> +        return -1;
> +    }
> +
> +    len = 0;
> +    if (!out_len && !in_len) {
> +        dir = SCSI_XFER_NONE;
> +    } else if (out_len) {
> +        dir = SCSI_XFER_TO_DEV;
> +        for (i=0; i<out_len; i++) {
> +            len += out[i].iov_len;
> +        }
> +    } else {
> +        dir = SCSI_XFER_FROM_DEV;
> +        for (i=0; i<in_len; i++) {
> +            len += in[i].iov_len;
> +        }
> +    }
> +
> +    task = scsi_task_new(cdb_len, req->cdb, dir, len);
> +    if (!task) {
> +        PERR("Unable to create iscsi task");
> +        return -1;
> +    }
> +
> +    if (dir == SCSI_XFER_TO_DEV) {
> +        task->iovector_out.iov = (struct scsi_iovec *)out;
> +        task->iovector_out.niov = out_len;
> +    } else if (dir == SCSI_XFER_FROM_DEV) {
> +        task->iovector_in.iov = (struct scsi_iovec *)in;
> +        task->iovector_in.niov = in_len;
> +    }
> +
> +    PDBG("Sending iscsi cmd (cdb_len=%d, dir=%d, task=%p)",
> +         cdb_len, dir, task);
> +    if (!iscsi_scsi_command_sync(ctx, 0, task, NULL)) {
> +        PERR("Error serving SCSI command");
> +        free(task);
> +        return -1;
> +    }
> +
> +    memset(rsp, 0, sizeof(*rsp));
> +
> +    rsp->status = task->status;
> +    rsp->resid  = task->residual;
> +
> +    if (task->status == SCSI_STATUS_CHECK_CONDITION) {
> +        rsp->response = VIRTIO_SCSI_S_FAILURE;
> +        rsp->sense_len = task->datain.size - 2;
> +        memcpy(rsp->sense, &task->datain.data[2], rsp->sense_len);
> +    }
> +
> +    free(task);
> +
> +    PDBG("Filled in rsp: status=%hhX, resid=%u, response=%hhX, sense_len=%u",
> +         rsp->status, rsp->resid, rsp->response, rsp->sense_len);
> +
> +    return 0;
> +}
> +
> +/** libvhost-user callbacks **/
> +
> +static void vus_panic_cb(VuDev *vu_dev, const char *buf) {
> +    vhost_scsi_dev_t *vdev_scsi;
> +
> +    assert(vu_dev);
> +
> +    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
> +
> +    if (buf) {
> +        PERR("vu_panic: %s", buf);
> +    }
> +
> +    if (vdev_scsi) {
> +        vdev_scsi->sched.quit = 1;
> +    }
> +}
> +
> +static void vus_add_watch_cb(VuDev *vu_dev, int fd, int vu_evt, vu_watch_cb cb,
> +                             void *pvt) {
> +    vhost_scsi_dev_t *vdev_scsi;
> +    int poll_evt = 0;
> +
> +    assert(vu_dev);
> +    assert(fd >= 0);
> +    assert(cb);
> +
> +    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
> +    if (!vdev_scsi) {
> +        vus_panic_cb(vu_dev, NULL);
> +    }
> +
> +    /* TODO: VU_WATCH_* should match POLL*, check it */
> +    if (vu_evt & VU_WATCH_IN)  poll_evt |= POLLIN;
> +    if (vu_evt & VU_WATCH_OUT) poll_evt |= POLLOUT;
> +    if (vu_evt & VU_WATCH_PRI) poll_evt |= POLLPRI;
> +    if (vu_evt & VU_WATCH_ERR) poll_evt |= POLLERR;
> +    if (vu_evt & VU_WATCH_HUP) poll_evt |= POLLHUP;
> +
> +    if (sched_add(&vdev_scsi->sched, fd, poll_evt, cb, NULL, pvt)) {
> +        vus_panic_cb(vu_dev, NULL);
> +    }
> +}
> +
> +static void vus_del_watch_cb(VuDev *vu_dev, int fd) {
> +    vhost_scsi_dev_t *vdev_scsi;
> +
> +    assert(vu_dev);
> +    assert(fd >= 0);
> +
> +    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
> +    if (!vdev_scsi) {
> +        vus_panic_cb(vu_dev, NULL);
> +        return;
> +    }
> +
> +    if (sched_del(&vdev_scsi->sched, fd)) {
> +        vus_panic_cb(vu_dev, NULL);
> +    }
> +}
> +
> +static void vus_proc_ctl(VuDev *vu_dev, int idx) {
> +    /* Control VQ not implemented */
> +}
> +
> +static void vus_proc_evt(VuDev *vu_dev, int idx) {
> +    /* Event VQ not implemented */
> +}
> +
> +static void vus_proc_req(VuDev *vu_dev, int idx) {
> +    vhost_scsi_dev_t *vdev_scsi;
> +    VuVirtq *vq;
> +
> +    assert(vu_dev);
> +
> +    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
> +    if (!vdev_scsi) {
> +        vus_panic_cb(vu_dev, NULL);
> +        return;
> +    }
> +
> +    if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
> +        PERR("VQ Index out of range: %d", idx);
> +        vus_panic_cb(vu_dev, NULL);
> +        return;
> +    }
> +
> +    vq = vu_get_queue(vu_dev, idx);
> +    if (!vq) {
> +        PERR("Error fetching VQ (dev=%p, idx=%d)", vu_dev, idx);
> +        vus_panic_cb(vu_dev, NULL);
> +        return;
> +    }
> +
> +    PDBG("Got kicked on vq[%d]@%p", idx, vq);
> +
> +    while(1) {
> +        VuVirtqElement *elem;
> +        VirtIOSCSICmdReq *req;
> +        VirtIOSCSICmdResp *rsp;
> +
> +        elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement));
> +        if (!elem) {
> +            PDBG("No more elements pending on vq[%d]@%p", idx, vq);
> +            break;
> +        }
> +        PDBG("Popped elem@%p", elem);
> +
> +        assert(!((elem->out_num > 1) && (elem->in_num > 1)));
> +        assert((elem->out_num > 0) && (elem->in_num > 0));
> +
> +        if (elem->out_sg[0].iov_len < sizeof(VirtIOSCSICmdReq)) {
> +            PERR("Invalid virtio-scsi req header");
> +            vus_panic_cb(vu_dev, NULL);
> +            break;
> +        }
> +        req = (VirtIOSCSICmdReq *)elem->out_sg[0].iov_base;
> +
> +        if (elem->in_sg[0].iov_len < sizeof(VirtIOSCSICmdResp)) {
> +            PERR("Invalid virtio-scsi rsp header");
> +            vus_panic_cb(vu_dev, NULL);
> +            break;
> +        }
> +        rsp = (VirtIOSCSICmdResp *)elem->in_sg[0].iov_base;
> +
> +        if (handle_cmd_sync(vdev_scsi->luns[0].iscsi_ctx,
> +                            req, &elem->out_sg[1], elem->out_num-1,
> +                            rsp, &elem->in_sg[1], elem->in_num-1) != 0) {
> +            vus_panic_cb(vu_dev, NULL);
> +            break;
> +        }
> +
> +        vu_queue_push(vu_dev, vq, elem, 0);
> +        vu_queue_notify(vu_dev, vq);
> +
> +        free(elem);
> +    }
> +
> +}
> +
> +static void vus_queue_set_started(VuDev *vu_dev, int idx, bool started) {
> +    VuVirtq *vq;
> +
> +    assert(vu_dev);
> +
> +    if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
> +        PERR("VQ Index out of range: %d", idx);
> +        vus_panic_cb(vu_dev, NULL);
> +        return;
> +    }
> +
> +    vq = vu_get_queue(vu_dev, idx);
> +
> +    switch(idx) {
> +    case 0:
> +        vu_set_queue_handler(vu_dev, vq, started?vus_proc_ctl:NULL);
> +        break;
> +    case 1:
> +        vu_set_queue_handler(vu_dev, vq, started?vus_proc_evt:NULL);
> +        break;
> +    default:
> +        vu_set_queue_handler(vu_dev, vq, started?vus_proc_req:NULL);
> +    }
> +}
> +
> +static const VuDevIface vus_iface = { 
> +    .queue_set_started = vus_queue_set_started,
> +};
> +
> +static void vus_vhost_cb(VuDev *vu_dev, int vu_evt, void *data) {
> +    assert(vu_dev);
> +
> +    if (!vu_dispatch(vu_dev) != 0) {
> +        PERR("Error processing vhost message");
> +        vus_panic_cb(vu_dev, NULL);
> +    }
> +}
> +
> +/** util **/
> +
> +static int unix_sock_new(char *unix_fn) {
> +    int sock;
> +    struct sockaddr_un un;
> +    size_t len;
> +
> +    assert(unix_fn);
> +
> +    sock = socket(AF_UNIX, SOCK_STREAM, 0);
> +    if (sock <= 0) {
> +        perror("socket");
> +        return -1;
> +    }
> +
> +    un.sun_family = AF_UNIX;
> +    (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
> +    len = sizeof(un.sun_family) + strlen(un.sun_path);
> +
> +    (void)unlink(unix_fn);
> +    if (bind(sock, (struct sockaddr *)&un, len) < 0) {
> +        perror("bind");
> +        goto fail;
> +    }
> +
> +    if (listen(sock, 1) < 0) {
> +        perror("listen");
> +        goto fail;
> +    }
> +
> +    return sock;
> +
> +fail:
> +    (void)close(sock);
> +
> +    return -1;
> +}
> +
> +/** vhost-user-scsi **/
> +
> +static vhost_scsi_dev_t *vdev_scsi_find_by_vu(VuDev *vu_dev) {
> +    int i;
> +
> +    assert(vu_dev);
> +
> +    for (i=0; i<VDEV_SCSI_MAX_DEVS; i++) {
> +        if (&vhost_scsi_devs[i]->vu_dev == vu_dev) {
> +            return vhost_scsi_devs[i];
> +        }
> +    }
> +
> +    PERR("Unknown VuDev %p", vu_dev);
> +    return NULL;
> +}
> +
> +static void vdev_scsi_deinit(vhost_scsi_dev_t *vdev_scsi) {
> +    if (!vdev_scsi) {
> +        return;
> +    }
> +
> +    if (vdev_scsi->server_sock >= 0) {
> +        struct sockaddr_storage ss;
> +        socklen_t sslen = sizeof(ss);
> +
> +        if (getsockname(vdev_scsi->server_sock, (struct sockaddr *)&ss,
> +                        &sslen) == 0) {
> +            struct sockaddr_un *su = (struct sockaddr_un *)&ss;
> +            (void)unlink(su->sun_path);
> +        }
> +
> +        (void)close(vdev_scsi->server_sock);
> +    }
> +}
> +
> +static vhost_scsi_dev_t *vdev_scsi_new(char *unix_fn) {
> +    vhost_scsi_dev_t *vdev_scsi;
> +
> +    assert(unix_fn);
> +
> +    vdev_scsi = calloc(1, sizeof(vhost_scsi_dev_t));
> +    if (!vdev_scsi) {
> +        perror("calloc");
> +        return NULL;
> +    }
> +
> +    vdev_scsi->server_sock = unix_sock_new(unix_fn);
> +    if (vdev_scsi->server_sock < 0) {
> +        free(vdev_scsi);
> +        return NULL;
> +    }
> +
> +    vdev_scsi->sched.vu_dev = &vdev_scsi->vu_dev;
> +
> +    return vdev_scsi;
> +}
> +
> +static int vdev_scsi_iscsi_add_lun(vhost_scsi_dev_t *vdev_scsi,
> +                                   char *iscsi_uri, uint32_t lun) {
> +    assert(vdev_scsi);
> +    assert(iscsi_uri);
> +    assert(lun < VDEV_SCSI_MAX_LUNS);
> +
> +    if (vdev_scsi->luns[lun].iscsi_ctx) {
> +        PERR("Lun %d already configured", lun);
> +        return -1;
> +    }
> +
> +    if (iscsi_add_lun(&vdev_scsi->luns[lun], iscsi_uri) != 0) {
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int vdev_scsi_run(vhost_scsi_dev_t *vdev_scsi) {
> +    int cli_sock;
> +    int ret = 0;
> +
> +    assert(vdev_scsi);
> +    assert(vdev_scsi->server_sock >= 0);
> +
> +    cli_sock = accept(vdev_scsi->server_sock, (void *)0, (void *)0);
> +    if (cli_sock < 0) {
> +        perror("accept");
> +        return -1;
> +    }
> +
> +    vu_init(&vdev_scsi->vu_dev,
> +            cli_sock,
> +            vus_panic_cb,
> +            vus_add_watch_cb,
> +            vus_del_watch_cb,
> +            &vus_iface);
> +
> +    ret = sched_add(&vdev_scsi->sched, cli_sock, POLLIN, vus_vhost_cb, NULL, 0);
> +    if (ret) {
> +        goto fail;
> +    }
> +
> +    if (sched_loop(&vdev_scsi->sched) != 0) {
> +        goto fail;
> +    }
> +
> +out:
> +    vu_deinit(&vdev_scsi->vu_dev);
> +
> +    return ret;
> +
> +fail:
> +    ret = -1;
> +    goto out;
> +}
> +
> +int main(int argc, char **argv)
> +{
> +    vhost_scsi_dev_t *vdev_scsi = NULL;
> +    char *unix_fn = NULL;
> +    char *iscsi_uri = NULL;
> +    int opt, err = EXIT_SUCCESS;
> +
> +    while ((opt = getopt(argc, argv, "u:i:")) != -1) {
> +        switch (opt) {
> +        case 'h':
> +            goto help;
> +        case 'u':
> +            unix_fn = strdup(optarg);
> +            break;
> +        case 'i':
> +            iscsi_uri = strdup(optarg);
> +            break;
> +        default:
> +            goto help;
> +        }
> +    }
> +    if (!unix_fn || !iscsi_uri) {
> +        goto help;
> +    }
> +
> +    vdev_scsi = vdev_scsi_new(unix_fn);
> +    if (!vdev_scsi) {
> +        goto err;
> +    }
> +    vhost_scsi_devs[0] = vdev_scsi;
> +
> +    if (vdev_scsi_iscsi_add_lun(vdev_scsi, iscsi_uri, 0) != 0) {
> +        goto err;
> +    }
> +
> +    if (vdev_scsi_run(vdev_scsi) != 0) {
> +        goto err;
> +    }
> +
> +out:
> +    if (vdev_scsi) {
> +        vdev_scsi_deinit(vdev_scsi);
> +        free(vdev_scsi);
> +    }
> +    if (unix_fn) {
> +        free(unix_fn);
> +    }
> +    if (iscsi_uri) {
> +        free(iscsi_uri);
> +    }
> +
> +    return err;
> +
> +err:
> +    err = EXIT_FAILURE;
> +    goto out;
> +
> +help:
> +    fprintf(stderr, "Usage: %s [ -u unix_sock_path -i iscsi_uri ] | [ -h ]\n",
> +            argv[0]);
> +    fprintf(stderr, "          -u path to unix socket\n");
> +    fprintf(stderr, "          -i iscsi uri for lun 0\n");
> +    fprintf(stderr, "          -h print help and quit\n");
> +
> +    goto err;
> +}
>
Felipe Franciosi Oct. 27, 2016, 12:48 p.m. UTC | #2
Hello,

> On 27 Oct 2016, at 13:16, Paolo Bonzini <pbonzini@redhat.com> wrote:
> 
> 
> 
> On 26/10/2016 17:26, Felipe Franciosi wrote:
>> This commit introduces a vhost-user-scsi backend sample application. It
>> must be linked with libiscsi and libvhost-user.
>> 
>> To use it, compile with:
>> make tests/vhost-user-scsi
>> 
>> And run as follows:
>> tests/vhost-user-scsi -u /tmp/vus.sock -i iscsi://uri_to_target/
>> 
>> The application is currently limited at one LUN only and it processes
>> requests synchronously (therefore only achieving QD1). The purpose of
>> the code is to show how a backend can be implemented and to test the
>> vhost-user-scsi Qemu implementation.
>> 
>> If a different instance of this vhost-user-scsi application is executed
>> at a remote host, a VM can be live migrated to such a host.
> 
> Hi,
> 
> the right directory for this is contrib/.

Cool. I was following suit from vhost-user-bridge which lives in tests/ today. To me, it makes more sense for these to be in contrib/. I'll place my sample application there for v2 and perhaps we should move vhost-user-bridge later?

> 
> Is it possible to use GSource and GIOChannel instead for the event loop?
> There is some dead code (for example cb2 as far as I can see) and
> having the millionth implementation of an event loop distracts from the
> meat of the code. :)

That's true. I'll have a stab at using glib's event loop. The cb2 was meant to be used for libiscsi's async submission, but I ended up with QD1 for simplicity. You're right, it looks pretty dead at the minute. :)

Cheers,
Felipe

> 
> Thanks,
> 
> Paolo
> 
>> Signed-off-by: Felipe Franciosi <felipe@nutanix.com>
>> ---
>> tests/Makefile.include  |   2 +
>> tests/vhost-user-scsi.c | 862 ++++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 864 insertions(+)
>> create mode 100644 tests/vhost-user-scsi.c
>> 
>> diff --git a/tests/Makefile.include b/tests/Makefile.include
>> index 7e6fd23..e61fe54 100644
>> --- a/tests/Makefile.include
>> +++ b/tests/Makefile.include
>> @@ -685,6 +685,8 @@ tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-ob
>> tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y)
>> tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y)
>> tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o contrib/libvhost-user/libvhost-user.o $(test-util-obj-y)
>> +tests/vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS)
>> +tests/vhost-user-scsi$(EXESUF): tests/vhost-user-scsi.o contrib/libvhost-user/libvhost-user.o $(test-util-obj-y) $(test-block-obj-y)
>> tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y)
>> tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o
>> 
>> diff --git a/tests/vhost-user-scsi.c b/tests/vhost-user-scsi.c
>> new file mode 100644
>> index 0000000..c92b3b2
>> --- /dev/null
>> +++ b/tests/vhost-user-scsi.c
>> @@ -0,0 +1,862 @@
>> +/*
>> + * vhost-user-scsi sample application
>> + *
>> + * Copyright (c) 2016 Nutanix Inc. All rights reserved.
>> + *
>> + * Author:
>> + *  Felipe Franciosi <felipe@nutanix.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 only.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "contrib/libvhost-user/libvhost-user.h"
>> +#include "hw/virtio/virtio-scsi.h"
>> +#include "iscsi/iscsi.h"
>> +
>> +#include <poll.h>
>> +
>> +#define VHOST_USER_SCSI_DEBUG 1
>> +
>> +/** Log helpers **/
>> +
>> +#define PPRE                                                          \
>> +    struct timespec ts;                                               \
>> +    char   timebuf[64];                                               \
>> +    struct tm tm;                                                     \
>> +    (void)clock_gettime(CLOCK_REALTIME, &ts);                         \
>> +    (void)strftime(timebuf, 64, "%Y%m%d %T", gmtime_r(&ts.tv_sec, &tm))
>> +
>> +#define PEXT(lvl, msg, ...) do {                                      \
>> +    PPRE;                                                             \
>> +    fprintf(stderr, "%s.%06ld " lvl ": %s:%s():%d: " msg "\n",        \
>> +            timebuf, ts.tv_nsec/1000,                                 \
>> +            __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__);        \
>> +} while(0)
>> +
>> +#define PNOR(lvl, msg, ...) do {                                      \
>> +    PPRE;                                                             \
>> +    fprintf(stderr, "%s.%06ld " lvl ": " msg "\n",                    \
>> +            timebuf, ts.tv_nsec/1000, ## __VA_ARGS__);                \
>> +} while(0);
>> +
>> +#ifdef VHOST_USER_SCSI_DEBUG
>> +#define PDBG(msg, ...) PEXT("DBG", msg, ## __VA_ARGS__)
>> +#define PERR(msg, ...) PEXT("ERR", msg, ## __VA_ARGS__)
>> +#define PLOG(msg, ...) PEXT("LOG", msg, ## __VA_ARGS__)
>> +#else
>> +#define PDBG(msg, ...) { }
>> +#define PERR(msg, ...) PNOR("ERR", msg, ## __VA_ARGS__)
>> +#define PLOG(msg, ...) PNOR("LOG", msg, ## __VA_ARGS__)
>> +#endif
>> +
>> +/** vhost-user-scsi specific definitions **/
>> +
>> +/* TODO: MAX is defined at 8, should be 1024 */
>> +#define VUS_SCHED_MAX_FDS (1 + (2*VHOST_MAX_NR_VIRTQUEUE))
>> +
>> +#define VDEV_SCSI_MAX_LUNS 1 // Only 1 lun supported today
>> +#define VDEV_SCSI_MAX_DEVS 1 // Only 1 devices supported today
>> +
>> +#define ISCSI_INITIATOR "iqn.2016-10.com.nutanix:vhost-user-scsi"
>> +
>> +typedef void (*misc_cb) (short evt, void *pvt);
>> +
>> +typedef struct sched_data {
>> +    vu_watch_cb cb1;
>> +    misc_cb cb2;
>> +    void *pvt;
>> +    short evt;
>> +} sched_data_t;
>> +
>> +typedef struct sched {
>> +    VuDev *vu_dev;
>> +    nfds_t nfds;
>> +    struct pollfd fds[VUS_SCHED_MAX_FDS];
>> +    sched_data_t data[VUS_SCHED_MAX_FDS];
>> +    int quit;
>> +} sched_t;
>> +
>> +typedef struct iscsi_lun {
>> +    struct iscsi_context *iscsi_ctx;
>> +    int iscsi_lun;
>> +} iscsi_lun_t;
>> +
>> +typedef struct vhost_scsi_dev {
>> +    VuDev vu_dev;
>> +    int server_sock;
>> +    sched_t sched;
>> +    iscsi_lun_t luns[VDEV_SCSI_MAX_LUNS];
>> +} vhost_scsi_dev_t;
>> +
>> +static vhost_scsi_dev_t *vhost_scsi_devs[VDEV_SCSI_MAX_DEVS];
>> +
>> +static vhost_scsi_dev_t *vdev_scsi_find_by_vu(VuDev *vu_dev);
>> +
>> +/** poll-based scheduler for libvhost-user and misc callbacks **/
>> +
>> +static int sched_add(sched_t *sched, int fd, short int evt,
>> +                     vu_watch_cb cb1, misc_cb cb2, void *pvt) {
>> +    int i;
>> +
>> +    assert(sched);
>> +    assert(fd >= 0);
>> +    assert(evt);
>> +    assert(cb1 || cb2);
>> +    assert(!(cb1 && cb2)); // only one of the cbs should be used
>> +
>> +    for (i=0; i<sched->nfds && i<VUS_SCHED_MAX_FDS; i++) {
>> +        if (sched->fds[i].fd == fd) {
>> +            break;
>> +        }
>> +    }
>> +    if (i == VUS_SCHED_MAX_FDS) {
>> +        PERR("Error adding fd: max number of fds reached");
>> +        return -1;
>> +    }
>> +
>> +    sched->fds[i].fd = fd;
>> +    sched->fds[i].events = evt;
>> +    sched->data[i].cb1 = cb1;
>> +    sched->data[i].cb2 = cb2;
>> +    sched->data[i].pvt = pvt;
>> +    sched->data[i].evt = evt;
>> +
>> +    if (sched->nfds <= i) {
>> +        sched->nfds = i+1;
>> +    }
>> +
>> +    PDBG("sched@%p: add fd %d to slot %i", sched, fd, i);
>> +
>> +    return 0;
>> +}
>> +
>> +static int sched_del(sched_t *sched, int fd) {
>> +    int i;
>> +
>> +    assert(sched);
>> +    assert(fd >= 0);
>> +
>> +    for (i=0; i<sched->nfds; i++) {
>> +        if (sched->fds[i].fd == fd) {
>> +            break;
>> +        }
>> +    }
>> +    if (sched->nfds == i) {
>> +#ifdef VUS_PEDANTIC_SCHEDULER
>> +        PERR("Error deleting fd %d: fd not found", fd);
>> +        return -1;
>> +#else
>> +        return 0;
>> +#endif
>> +    }
>> +
>> +    sched->nfds--;
>> +    if (sched->nfds > 0) {
>> +        // Overwrite deleted entry with last entry from scheduler
>> +        memcpy(&sched->fds[i],  &sched->fds[sched->nfds],
>> +               sizeof(struct pollfd));
>> +        memcpy(&sched->data[i], &sched->data[sched->nfds],
>> +               sizeof(sched_data_t));
>> +    }
>> +    memset(&sched->fds[sched->nfds], 0, sizeof(struct pollfd));
>> +    memset(&sched->data[sched->nfds], 0, sizeof(sched_data_t));
>> +
>> +    PDBG("sched@%p: del fd %d from slot %i", sched, fd, i);
>> +
>> +    return 0;
>> +}
>> +
>> +static int sched_loop(sched_t *sched) {
>> +    int i, n;
>> +
>> +    assert(sched);
>> +    assert(sched->nfds > 0);
>> +
>> +    while (!sched->quit) {
>> +        n = poll(sched->fds, sched->nfds, -1);
>> +        if (n < 0) {
>> +            PERR("Error polling: %s", strerror(errno));
>> +            return -1;
>> +        }
>> +
>> +        for (i=0; i<sched->nfds && n; i++) {
>> +            if (sched->fds[i].revents != 0) {
>> +
>> +                if (sched->data[i].cb1) {
>> +                    int vu_evt = 0;
>> +
>> +                    if (sched->fds[i].revents & POLLIN)  vu_evt |= VU_WATCH_IN;
>> +                    if (sched->fds[i].revents & POLLOUT) vu_evt |= VU_WATCH_OUT;
>> +                    if (sched->fds[i].revents & POLLPRI) vu_evt |= VU_WATCH_PRI;
>> +                    if (sched->fds[i].revents & POLLERR) vu_evt |= VU_WATCH_ERR;
>> +                    if (sched->fds[i].revents & POLLHUP) vu_evt |= VU_WATCH_HUP;
>> +
>> +                    PDBG("sched@%p: fd[%d] (%d): cb1(%p, %d, %p)", sched, i,
>> +                         sched->fds[i].fd, sched->vu_dev, vu_evt,
>> +                         sched->data[i].pvt);
>> +
>> +                    sched->data[i].cb1(sched->vu_dev, vu_evt,
>> +                                       sched->data[i].pvt);
>> +                } else {
>> +                    PDBG("sched@%p: fd[%d] (%d): cbb(%hd, %p)", sched, i,
>> +                         sched->fds[i].fd, sched->fds[i].revents,
>> +                         sched->data[i].pvt);
>> +
>> +                    sched->data[i].cb2(sched->fds[i].revents,
>> +                                       sched->data[i].pvt);
>> +                }
>> +
>> +                n--;
>> +            }
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +/** from libiscsi's scsi-lowlevel.h **/
>> +
>> +#define SCSI_CDB_MAX_SIZE           16
>> +
>> +struct scsi_iovector {
>> +    struct scsi_iovec *iov;
>> +    int niov;
>> +    int nalloc;
>> +    size_t offset;
>> +    int consumed;
>> +};
>> +
>> +struct scsi_allocated_memory {
>> +    struct scsi_allocated_memory *next;
>> +    char buf[0];
>> +};
>> +
>> +struct scsi_data {
>> +    int            size;
>> +    unsigned char *data;
>> +};
>> +
>> +enum scsi_sense_key {
>> +    SCSI_SENSE_NO_SENSE            = 0x00,
>> +    SCSI_SENSE_RECOVERED_ERROR     = 0x01,
>> +    SCSI_SENSE_NOT_READY           = 0x02,
>> +    SCSI_SENSE_MEDIUM_ERROR        = 0x03,
>> +    SCSI_SENSE_HARDWARE_ERROR      = 0x04,
>> +    SCSI_SENSE_ILLEGAL_REQUEST     = 0x05,
>> +    SCSI_SENSE_UNIT_ATTENTION      = 0x06,
>> +    SCSI_SENSE_DATA_PROTECTION     = 0x07,
>> +    SCSI_SENSE_BLANK_CHECK         = 0x08,
>> +    SCSI_SENSE_VENDOR_SPECIFIC     = 0x09,
>> +    SCSI_SENSE_COPY_ABORTED        = 0x0a,
>> +    SCSI_SENSE_COMMAND_ABORTED     = 0x0b,
>> +    SCSI_SENSE_OBSOLETE_ERROR_CODE = 0x0c,
>> +    SCSI_SENSE_OVERFLOW_COMMAND    = 0x0d,
>> +    SCSI_SENSE_MISCOMPARE          = 0x0e
>> +};
>> +
>> +struct scsi_sense {
>> +    unsigned char       error_type;
>> +    enum scsi_sense_key key;
>> +    int                 ascq;
>> +    unsigned            sense_specific:1;
>> +    unsigned            ill_param_in_cdb:1;
>> +    unsigned            bit_pointer_valid:1;
>> +    unsigned char       bit_pointer;
>> +    uint16_t            field_pointer;
>> +};
>> +
>> +enum scsi_residual {
>> +    SCSI_RESIDUAL_NO_RESIDUAL = 0,
>> +    SCSI_RESIDUAL_UNDERFLOW,
>> +    SCSI_RESIDUAL_OVERFLOW
>> +};
>> +
>> +struct scsi_task {
>> +    int status;
>> +    int cdb_size;
>> +    int xfer_dir;
>> +    int expxferlen;
>> +    unsigned char cdb[SCSI_CDB_MAX_SIZE];
>> +    enum scsi_residual residual_status;
>> +    size_t residual;
>> +    struct scsi_sense sense;
>> +    struct scsi_data datain;
>> +    struct scsi_allocated_memory *mem;
>> +    void *ptr;
>> +
>> +    uint32_t itt;
>> +    uint32_t cmdsn;
>> +    uint32_t lun;
>> +
>> +    struct scsi_iovector iovector_in;
>> +    struct scsi_iovector iovector_out;
>> +};
>> +
>> +/** libiscsi integration **/
>> +
>> +static int iscsi_add_lun(iscsi_lun_t *lun, char *iscsi_uri) {
>> +    struct iscsi_url *iscsi_url;
>> +    struct iscsi_context *iscsi_ctx;
>> +    int ret = 0;
>> +
>> +    assert(lun);
>> +
>> +    iscsi_ctx = iscsi_create_context(ISCSI_INITIATOR);
>> +    if (!iscsi_ctx) {
>> +        PERR("Unable to create iSCSI context");
>> +        return -1;
>> +    }
>> +
>> +    iscsi_url = iscsi_parse_full_url(iscsi_ctx, iscsi_uri);
>> +    if (!iscsi_url) {
>> +        PERR("Unable to parse iSCSI URL: %s", iscsi_get_error(iscsi_ctx));
>> +        goto fail;
>> +    }
>> +
>> +    iscsi_set_session_type(iscsi_ctx, ISCSI_SESSION_NORMAL);
>> +    iscsi_set_header_digest(iscsi_ctx, ISCSI_HEADER_DIGEST_NONE_CRC32C);
>> +    if (iscsi_full_connect_sync(iscsi_ctx, iscsi_url->portal, iscsi_url->lun)) {
>> +        PERR("Unable to login to iSCSI portal: %s", iscsi_get_error(iscsi_ctx));
>> +        goto fail;
>> +    }
>> +
>> +    lun->iscsi_ctx = iscsi_ctx;
>> +    lun->iscsi_lun = iscsi_url->lun;
>> +
>> +    PDBG("Context %p created for lun 0: %s", iscsi_ctx, iscsi_uri);
>> +
>> +out:
>> +    if (iscsi_url) {
>> +        iscsi_destroy_url(iscsi_url);
>> +    }
>> +    return ret;
>> +
>> +fail:
>> +    (void)iscsi_destroy_context(iscsi_ctx);
>> +    ret = -1;
>> +    goto out;
>> +}
>> +
>> +static struct scsi_task *scsi_task_new(int cdb_len, uint8_t *cdb, int dir,
>> +                                       int xfer_len) {
>> +    struct scsi_task *task;
>> +
>> +    assert(cdb_len > 0);
>> +    assert(cdb);
>> +
>> +    task = calloc(1, sizeof(struct scsi_task));
>> +    if (!task) {
>> +        PERR("Error allocating task: %s", strerror(errno));
>> +        return NULL;
>> +    }
>> +
>> +    memcpy(task->cdb, cdb, cdb_len);
>> +    task->cdb_size = cdb_len;
>> +    task->xfer_dir = dir;
>> +    task->expxferlen = xfer_len;
>> +
>> +    return task;
>> +}
>> +
>> +static int get_cdb_len(uint8_t *cdb) {
>> +    switch(cdb[0] >> 5){
>> +    case 0:
>> +        return 6;
>> +    case 1:
>> +    case 2:
>> +        return 10;
>> +    case 4:
>> +        return 16;
>> +    case 5:
>> +        return 12;
>> +    }
>> +    PERR("Unable to determine cdb len (0x%02hhX)", cdb[0]>>5);
>> +    return -1;
>> +}
>> +
>> +static int handle_cmd_sync(struct iscsi_context *ctx, 
>> +                           VirtIOSCSICmdReq *req,
>> +                           struct iovec *out, unsigned int out_len,
>> +                           VirtIOSCSICmdResp *rsp,
>> +                           struct iovec *in, unsigned int in_len) {
>> +    struct scsi_task *task;
>> +    uint32_t dir;
>> +    uint32_t len;
>> +    int cdb_len;
>> +    int i;
>> +
>> +    if (!((!req->lun[1]) && (req->lun[2] == 0x40) && (!req->lun[3]))) {
>> +        // Ignore anything different than target=0, lun=0
>> +        PDBG("Ignoring unconnected lun (0x%hhX, 0x%hhX)",
>> +             req->lun[1], req->lun[3]);
>> +        rsp->status = SCSI_STATUS_CHECK_CONDITION;
>> +        memset(rsp->sense, 0, sizeof(rsp->sense));
>> +        rsp->sense_len = 18;
>> +        rsp->sense[0] = 0x70;
>> +        rsp->sense[2] = 0x05; // ILLEGAL_REQUEST
>> +        rsp->sense[7] = 10;
>> +        rsp->sense[12] = 0x24;
>> +
>> +        return 0;
>> +    }
>> +
>> +    cdb_len = get_cdb_len(req->cdb);
>> +    if (cdb_len == -1) {
>> +        return -1;
>> +    }
>> +
>> +    len = 0;
>> +    if (!out_len && !in_len) {
>> +        dir = SCSI_XFER_NONE;
>> +    } else if (out_len) {
>> +        dir = SCSI_XFER_TO_DEV;
>> +        for (i=0; i<out_len; i++) {
>> +            len += out[i].iov_len;
>> +        }
>> +    } else {
>> +        dir = SCSI_XFER_FROM_DEV;
>> +        for (i=0; i<in_len; i++) {
>> +            len += in[i].iov_len;
>> +        }
>> +    }
>> +
>> +    task = scsi_task_new(cdb_len, req->cdb, dir, len);
>> +    if (!task) {
>> +        PERR("Unable to create iscsi task");
>> +        return -1;
>> +    }
>> +
>> +    if (dir == SCSI_XFER_TO_DEV) {
>> +        task->iovector_out.iov = (struct scsi_iovec *)out;
>> +        task->iovector_out.niov = out_len;
>> +    } else if (dir == SCSI_XFER_FROM_DEV) {
>> +        task->iovector_in.iov = (struct scsi_iovec *)in;
>> +        task->iovector_in.niov = in_len;
>> +    }
>> +
>> +    PDBG("Sending iscsi cmd (cdb_len=%d, dir=%d, task=%p)",
>> +         cdb_len, dir, task);
>> +    if (!iscsi_scsi_command_sync(ctx, 0, task, NULL)) {
>> +        PERR("Error serving SCSI command");
>> +        free(task);
>> +        return -1;
>> +    }
>> +
>> +    memset(rsp, 0, sizeof(*rsp));
>> +
>> +    rsp->status = task->status;
>> +    rsp->resid  = task->residual;
>> +
>> +    if (task->status == SCSI_STATUS_CHECK_CONDITION) {
>> +        rsp->response = VIRTIO_SCSI_S_FAILURE;
>> +        rsp->sense_len = task->datain.size - 2;
>> +        memcpy(rsp->sense, &task->datain.data[2], rsp->sense_len);
>> +    }
>> +
>> +    free(task);
>> +
>> +    PDBG("Filled in rsp: status=%hhX, resid=%u, response=%hhX, sense_len=%u",
>> +         rsp->status, rsp->resid, rsp->response, rsp->sense_len);
>> +
>> +    return 0;
>> +}
>> +
>> +/** libvhost-user callbacks **/
>> +
>> +static void vus_panic_cb(VuDev *vu_dev, const char *buf) {
>> +    vhost_scsi_dev_t *vdev_scsi;
>> +
>> +    assert(vu_dev);
>> +
>> +    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
>> +
>> +    if (buf) {
>> +        PERR("vu_panic: %s", buf);
>> +    }
>> +
>> +    if (vdev_scsi) {
>> +        vdev_scsi->sched.quit = 1;
>> +    }
>> +}
>> +
>> +static void vus_add_watch_cb(VuDev *vu_dev, int fd, int vu_evt, vu_watch_cb cb,
>> +                             void *pvt) {
>> +    vhost_scsi_dev_t *vdev_scsi;
>> +    int poll_evt = 0;
>> +
>> +    assert(vu_dev);
>> +    assert(fd >= 0);
>> +    assert(cb);
>> +
>> +    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
>> +    if (!vdev_scsi) {
>> +        vus_panic_cb(vu_dev, NULL);
>> +    }
>> +
>> +    /* TODO: VU_WATCH_* should match POLL*, check it */
>> +    if (vu_evt & VU_WATCH_IN)  poll_evt |= POLLIN;
>> +    if (vu_evt & VU_WATCH_OUT) poll_evt |= POLLOUT;
>> +    if (vu_evt & VU_WATCH_PRI) poll_evt |= POLLPRI;
>> +    if (vu_evt & VU_WATCH_ERR) poll_evt |= POLLERR;
>> +    if (vu_evt & VU_WATCH_HUP) poll_evt |= POLLHUP;
>> +
>> +    if (sched_add(&vdev_scsi->sched, fd, poll_evt, cb, NULL, pvt)) {
>> +        vus_panic_cb(vu_dev, NULL);
>> +    }
>> +}
>> +
>> +static void vus_del_watch_cb(VuDev *vu_dev, int fd) {
>> +    vhost_scsi_dev_t *vdev_scsi;
>> +
>> +    assert(vu_dev);
>> +    assert(fd >= 0);
>> +
>> +    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
>> +    if (!vdev_scsi) {
>> +        vus_panic_cb(vu_dev, NULL);
>> +        return;
>> +    }
>> +
>> +    if (sched_del(&vdev_scsi->sched, fd)) {
>> +        vus_panic_cb(vu_dev, NULL);
>> +    }
>> +}
>> +
>> +static void vus_proc_ctl(VuDev *vu_dev, int idx) {
>> +    /* Control VQ not implemented */
>> +}
>> +
>> +static void vus_proc_evt(VuDev *vu_dev, int idx) {
>> +    /* Event VQ not implemented */
>> +}
>> +
>> +static void vus_proc_req(VuDev *vu_dev, int idx) {
>> +    vhost_scsi_dev_t *vdev_scsi;
>> +    VuVirtq *vq;
>> +
>> +    assert(vu_dev);
>> +
>> +    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
>> +    if (!vdev_scsi) {
>> +        vus_panic_cb(vu_dev, NULL);
>> +        return;
>> +    }
>> +
>> +    if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
>> +        PERR("VQ Index out of range: %d", idx);
>> +        vus_panic_cb(vu_dev, NULL);
>> +        return;
>> +    }
>> +
>> +    vq = vu_get_queue(vu_dev, idx);
>> +    if (!vq) {
>> +        PERR("Error fetching VQ (dev=%p, idx=%d)", vu_dev, idx);
>> +        vus_panic_cb(vu_dev, NULL);
>> +        return;
>> +    }
>> +
>> +    PDBG("Got kicked on vq[%d]@%p", idx, vq);
>> +
>> +    while(1) {
>> +        VuVirtqElement *elem;
>> +        VirtIOSCSICmdReq *req;
>> +        VirtIOSCSICmdResp *rsp;
>> +
>> +        elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement));
>> +        if (!elem) {
>> +            PDBG("No more elements pending on vq[%d]@%p", idx, vq);
>> +            break;
>> +        }
>> +        PDBG("Popped elem@%p", elem);
>> +
>> +        assert(!((elem->out_num > 1) && (elem->in_num > 1)));
>> +        assert((elem->out_num > 0) && (elem->in_num > 0));
>> +
>> +        if (elem->out_sg[0].iov_len < sizeof(VirtIOSCSICmdReq)) {
>> +            PERR("Invalid virtio-scsi req header");
>> +            vus_panic_cb(vu_dev, NULL);
>> +            break;
>> +        }
>> +        req = (VirtIOSCSICmdReq *)elem->out_sg[0].iov_base;
>> +
>> +        if (elem->in_sg[0].iov_len < sizeof(VirtIOSCSICmdResp)) {
>> +            PERR("Invalid virtio-scsi rsp header");
>> +            vus_panic_cb(vu_dev, NULL);
>> +            break;
>> +        }
>> +        rsp = (VirtIOSCSICmdResp *)elem->in_sg[0].iov_base;
>> +
>> +        if (handle_cmd_sync(vdev_scsi->luns[0].iscsi_ctx,
>> +                            req, &elem->out_sg[1], elem->out_num-1,
>> +                            rsp, &elem->in_sg[1], elem->in_num-1) != 0) {
>> +            vus_panic_cb(vu_dev, NULL);
>> +            break;
>> +        }
>> +
>> +        vu_queue_push(vu_dev, vq, elem, 0);
>> +        vu_queue_notify(vu_dev, vq);
>> +
>> +        free(elem);
>> +    }
>> +
>> +}
>> +
>> +static void vus_queue_set_started(VuDev *vu_dev, int idx, bool started) {
>> +    VuVirtq *vq;
>> +
>> +    assert(vu_dev);
>> +
>> +    if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
>> +        PERR("VQ Index out of range: %d", idx);
>> +        vus_panic_cb(vu_dev, NULL);
>> +        return;
>> +    }
>> +
>> +    vq = vu_get_queue(vu_dev, idx);
>> +
>> +    switch(idx) {
>> +    case 0:
>> +        vu_set_queue_handler(vu_dev, vq, started?vus_proc_ctl:NULL);
>> +        break;
>> +    case 1:
>> +        vu_set_queue_handler(vu_dev, vq, started?vus_proc_evt:NULL);
>> +        break;
>> +    default:
>> +        vu_set_queue_handler(vu_dev, vq, started?vus_proc_req:NULL);
>> +    }
>> +}
>> +
>> +static const VuDevIface vus_iface = { 
>> +    .queue_set_started = vus_queue_set_started,
>> +};
>> +
>> +static void vus_vhost_cb(VuDev *vu_dev, int vu_evt, void *data) {
>> +    assert(vu_dev);
>> +
>> +    if (!vu_dispatch(vu_dev) != 0) {
>> +        PERR("Error processing vhost message");
>> +        vus_panic_cb(vu_dev, NULL);
>> +    }
>> +}
>> +
>> +/** util **/
>> +
>> +static int unix_sock_new(char *unix_fn) {
>> +    int sock;
>> +    struct sockaddr_un un;
>> +    size_t len;
>> +
>> +    assert(unix_fn);
>> +
>> +    sock = socket(AF_UNIX, SOCK_STREAM, 0);
>> +    if (sock <= 0) {
>> +        perror("socket");
>> +        return -1;
>> +    }
>> +
>> +    un.sun_family = AF_UNIX;
>> +    (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
>> +    len = sizeof(un.sun_family) + strlen(un.sun_path);
>> +
>> +    (void)unlink(unix_fn);
>> +    if (bind(sock, (struct sockaddr *)&un, len) < 0) {
>> +        perror("bind");
>> +        goto fail;
>> +    }
>> +
>> +    if (listen(sock, 1) < 0) {
>> +        perror("listen");
>> +        goto fail;
>> +    }
>> +
>> +    return sock;
>> +
>> +fail:
>> +    (void)close(sock);
>> +
>> +    return -1;
>> +}
>> +
>> +/** vhost-user-scsi **/
>> +
>> +static vhost_scsi_dev_t *vdev_scsi_find_by_vu(VuDev *vu_dev) {
>> +    int i;
>> +
>> +    assert(vu_dev);
>> +
>> +    for (i=0; i<VDEV_SCSI_MAX_DEVS; i++) {
>> +        if (&vhost_scsi_devs[i]->vu_dev == vu_dev) {
>> +            return vhost_scsi_devs[i];
>> +        }
>> +    }
>> +
>> +    PERR("Unknown VuDev %p", vu_dev);
>> +    return NULL;
>> +}
>> +
>> +static void vdev_scsi_deinit(vhost_scsi_dev_t *vdev_scsi) {
>> +    if (!vdev_scsi) {
>> +        return;
>> +    }
>> +
>> +    if (vdev_scsi->server_sock >= 0) {
>> +        struct sockaddr_storage ss;
>> +        socklen_t sslen = sizeof(ss);
>> +
>> +        if (getsockname(vdev_scsi->server_sock, (struct sockaddr *)&ss,
>> +                        &sslen) == 0) {
>> +            struct sockaddr_un *su = (struct sockaddr_un *)&ss;
>> +            (void)unlink(su->sun_path);
>> +        }
>> +
>> +        (void)close(vdev_scsi->server_sock);
>> +    }
>> +}
>> +
>> +static vhost_scsi_dev_t *vdev_scsi_new(char *unix_fn) {
>> +    vhost_scsi_dev_t *vdev_scsi;
>> +
>> +    assert(unix_fn);
>> +
>> +    vdev_scsi = calloc(1, sizeof(vhost_scsi_dev_t));
>> +    if (!vdev_scsi) {
>> +        perror("calloc");
>> +        return NULL;
>> +    }
>> +
>> +    vdev_scsi->server_sock = unix_sock_new(unix_fn);
>> +    if (vdev_scsi->server_sock < 0) {
>> +        free(vdev_scsi);
>> +        return NULL;
>> +    }
>> +
>> +    vdev_scsi->sched.vu_dev = &vdev_scsi->vu_dev;
>> +
>> +    return vdev_scsi;
>> +}
>> +
>> +static int vdev_scsi_iscsi_add_lun(vhost_scsi_dev_t *vdev_scsi,
>> +                                   char *iscsi_uri, uint32_t lun) {
>> +    assert(vdev_scsi);
>> +    assert(iscsi_uri);
>> +    assert(lun < VDEV_SCSI_MAX_LUNS);
>> +
>> +    if (vdev_scsi->luns[lun].iscsi_ctx) {
>> +        PERR("Lun %d already configured", lun);
>> +        return -1;
>> +    }
>> +
>> +    if (iscsi_add_lun(&vdev_scsi->luns[lun], iscsi_uri) != 0) {
>> +        return -1;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static int vdev_scsi_run(vhost_scsi_dev_t *vdev_scsi) {
>> +    int cli_sock;
>> +    int ret = 0;
>> +
>> +    assert(vdev_scsi);
>> +    assert(vdev_scsi->server_sock >= 0);
>> +
>> +    cli_sock = accept(vdev_scsi->server_sock, (void *)0, (void *)0);
>> +    if (cli_sock < 0) {
>> +        perror("accept");
>> +        return -1;
>> +    }
>> +
>> +    vu_init(&vdev_scsi->vu_dev,
>> +            cli_sock,
>> +            vus_panic_cb,
>> +            vus_add_watch_cb,
>> +            vus_del_watch_cb,
>> +            &vus_iface);
>> +
>> +    ret = sched_add(&vdev_scsi->sched, cli_sock, POLLIN, vus_vhost_cb, NULL, 0);
>> +    if (ret) {
>> +        goto fail;
>> +    }
>> +
>> +    if (sched_loop(&vdev_scsi->sched) != 0) {
>> +        goto fail;
>> +    }
>> +
>> +out:
>> +    vu_deinit(&vdev_scsi->vu_dev);
>> +
>> +    return ret;
>> +
>> +fail:
>> +    ret = -1;
>> +    goto out;
>> +}
>> +
>> +int main(int argc, char **argv)
>> +{
>> +    vhost_scsi_dev_t *vdev_scsi = NULL;
>> +    char *unix_fn = NULL;
>> +    char *iscsi_uri = NULL;
>> +    int opt, err = EXIT_SUCCESS;
>> +
>> +    while ((opt = getopt(argc, argv, "u:i:")) != -1) {
>> +        switch (opt) {
>> +        case 'h':
>> +            goto help;
>> +        case 'u':
>> +            unix_fn = strdup(optarg);
>> +            break;
>> +        case 'i':
>> +            iscsi_uri = strdup(optarg);
>> +            break;
>> +        default:
>> +            goto help;
>> +        }
>> +    }
>> +    if (!unix_fn || !iscsi_uri) {
>> +        goto help;
>> +    }
>> +
>> +    vdev_scsi = vdev_scsi_new(unix_fn);
>> +    if (!vdev_scsi) {
>> +        goto err;
>> +    }
>> +    vhost_scsi_devs[0] = vdev_scsi;
>> +
>> +    if (vdev_scsi_iscsi_add_lun(vdev_scsi, iscsi_uri, 0) != 0) {
>> +        goto err;
>> +    }
>> +
>> +    if (vdev_scsi_run(vdev_scsi) != 0) {
>> +        goto err;
>> +    }
>> +
>> +out:
>> +    if (vdev_scsi) {
>> +        vdev_scsi_deinit(vdev_scsi);
>> +        free(vdev_scsi);
>> +    }
>> +    if (unix_fn) {
>> +        free(unix_fn);
>> +    }
>> +    if (iscsi_uri) {
>> +        free(iscsi_uri);
>> +    }
>> +
>> +    return err;
>> +
>> +err:
>> +    err = EXIT_FAILURE;
>> +    goto out;
>> +
>> +help:
>> +    fprintf(stderr, "Usage: %s [ -u unix_sock_path -i iscsi_uri ] | [ -h ]\n",
>> +            argv[0]);
>> +    fprintf(stderr, "          -u path to unix socket\n");
>> +    fprintf(stderr, "          -i iscsi uri for lun 0\n");
>> +    fprintf(stderr, "          -h print help and quit\n");
>> +
>> +    goto err;
>> +}
>>
Paolo Bonzini Oct. 27, 2016, 12:58 p.m. UTC | #3
On 27/10/2016 14:48, Felipe Franciosi wrote:
> Hello,
> 
>> On 27 Oct 2016, at 13:16, Paolo Bonzini <pbonzini@redhat.com> wrote:
>>
>>
>>
>> On 26/10/2016 17:26, Felipe Franciosi wrote:
>>> This commit introduces a vhost-user-scsi backend sample application. It
>>> must be linked with libiscsi and libvhost-user.
>>>
>>> To use it, compile with:
>>> make tests/vhost-user-scsi
>>>
>>> And run as follows:
>>> tests/vhost-user-scsi -u /tmp/vus.sock -i iscsi://uri_to_target/
>>>
>>> The application is currently limited at one LUN only and it processes
>>> requests synchronously (therefore only achieving QD1). The purpose of
>>> the code is to show how a backend can be implemented and to test the
>>> vhost-user-scsi Qemu implementation.
>>>
>>> If a different instance of this vhost-user-scsi application is executed
>>> at a remote host, a VM can be live migrated to such a host.
>>
>> Hi,
>>
>> the right directory for this is contrib/.
> 
> Cool. I was following suit from vhost-user-bridge which lives in
> tests/ today. To me, it makes more sense for these to be in contrib/.
> I'll place my sample application there for v2 and perhaps we should move
> vhost-user-bridge later?

Yes, that would make sense.  Adding Victor in Cc.

Paolo
diff mbox

Patch

diff --git a/tests/Makefile.include b/tests/Makefile.include
index 7e6fd23..e61fe54 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -685,6 +685,8 @@  tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-ob
 tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y)
 tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y)
 tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o contrib/libvhost-user/libvhost-user.o $(test-util-obj-y)
+tests/vhost-user-scsi.o-cflags := $(LIBISCSI_CFLAGS)
+tests/vhost-user-scsi$(EXESUF): tests/vhost-user-scsi.o contrib/libvhost-user/libvhost-user.o $(test-util-obj-y) $(test-block-obj-y)
 tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y)
 tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o
 
diff --git a/tests/vhost-user-scsi.c b/tests/vhost-user-scsi.c
new file mode 100644
index 0000000..c92b3b2
--- /dev/null
+++ b/tests/vhost-user-scsi.c
@@ -0,0 +1,862 @@ 
+/*
+ * vhost-user-scsi sample application
+ *
+ * Copyright (c) 2016 Nutanix Inc. All rights reserved.
+ *
+ * Author:
+ *  Felipe Franciosi <felipe@nutanix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 only.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "contrib/libvhost-user/libvhost-user.h"
+#include "hw/virtio/virtio-scsi.h"
+#include "iscsi/iscsi.h"
+
+#include <poll.h>
+
+#define VHOST_USER_SCSI_DEBUG 1
+
+/** Log helpers **/
+
+#define PPRE                                                          \
+    struct timespec ts;                                               \
+    char   timebuf[64];                                               \
+    struct tm tm;                                                     \
+    (void)clock_gettime(CLOCK_REALTIME, &ts);                         \
+    (void)strftime(timebuf, 64, "%Y%m%d %T", gmtime_r(&ts.tv_sec, &tm))
+
+#define PEXT(lvl, msg, ...) do {                                      \
+    PPRE;                                                             \
+    fprintf(stderr, "%s.%06ld " lvl ": %s:%s():%d: " msg "\n",        \
+            timebuf, ts.tv_nsec/1000,                                 \
+            __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__);        \
+} while(0)
+
+#define PNOR(lvl, msg, ...) do {                                      \
+    PPRE;                                                             \
+    fprintf(stderr, "%s.%06ld " lvl ": " msg "\n",                    \
+            timebuf, ts.tv_nsec/1000, ## __VA_ARGS__);                \
+} while(0);
+
+#ifdef VHOST_USER_SCSI_DEBUG
+#define PDBG(msg, ...) PEXT("DBG", msg, ## __VA_ARGS__)
+#define PERR(msg, ...) PEXT("ERR", msg, ## __VA_ARGS__)
+#define PLOG(msg, ...) PEXT("LOG", msg, ## __VA_ARGS__)
+#else
+#define PDBG(msg, ...) { }
+#define PERR(msg, ...) PNOR("ERR", msg, ## __VA_ARGS__)
+#define PLOG(msg, ...) PNOR("LOG", msg, ## __VA_ARGS__)
+#endif
+
+/** vhost-user-scsi specific definitions **/
+
+/* TODO: MAX is defined at 8, should be 1024 */
+#define VUS_SCHED_MAX_FDS (1 + (2*VHOST_MAX_NR_VIRTQUEUE))
+
+#define VDEV_SCSI_MAX_LUNS 1 // Only 1 lun supported today
+#define VDEV_SCSI_MAX_DEVS 1 // Only 1 devices supported today
+
+#define ISCSI_INITIATOR "iqn.2016-10.com.nutanix:vhost-user-scsi"
+
+typedef void (*misc_cb) (short evt, void *pvt);
+
+typedef struct sched_data {
+    vu_watch_cb cb1;
+    misc_cb cb2;
+    void *pvt;
+    short evt;
+} sched_data_t;
+
+typedef struct sched {
+    VuDev *vu_dev;
+    nfds_t nfds;
+    struct pollfd fds[VUS_SCHED_MAX_FDS];
+    sched_data_t data[VUS_SCHED_MAX_FDS];
+    int quit;
+} sched_t;
+
+typedef struct iscsi_lun {
+    struct iscsi_context *iscsi_ctx;
+    int iscsi_lun;
+} iscsi_lun_t;
+
+typedef struct vhost_scsi_dev {
+    VuDev vu_dev;
+    int server_sock;
+    sched_t sched;
+    iscsi_lun_t luns[VDEV_SCSI_MAX_LUNS];
+} vhost_scsi_dev_t;
+
+static vhost_scsi_dev_t *vhost_scsi_devs[VDEV_SCSI_MAX_DEVS];
+
+static vhost_scsi_dev_t *vdev_scsi_find_by_vu(VuDev *vu_dev);
+
+/** poll-based scheduler for libvhost-user and misc callbacks **/
+
+static int sched_add(sched_t *sched, int fd, short int evt,
+                     vu_watch_cb cb1, misc_cb cb2, void *pvt) {
+    int i;
+
+    assert(sched);
+    assert(fd >= 0);
+    assert(evt);
+    assert(cb1 || cb2);
+    assert(!(cb1 && cb2)); // only one of the cbs should be used
+
+    for (i=0; i<sched->nfds && i<VUS_SCHED_MAX_FDS; i++) {
+        if (sched->fds[i].fd == fd) {
+            break;
+        }
+    }
+    if (i == VUS_SCHED_MAX_FDS) {
+        PERR("Error adding fd: max number of fds reached");
+        return -1;
+    }
+
+    sched->fds[i].fd = fd;
+    sched->fds[i].events = evt;
+    sched->data[i].cb1 = cb1;
+    sched->data[i].cb2 = cb2;
+    sched->data[i].pvt = pvt;
+    sched->data[i].evt = evt;
+
+    if (sched->nfds <= i) {
+        sched->nfds = i+1;
+    }
+
+    PDBG("sched@%p: add fd %d to slot %i", sched, fd, i);
+
+    return 0;
+}
+
+static int sched_del(sched_t *sched, int fd) {
+    int i;
+
+    assert(sched);
+    assert(fd >= 0);
+
+    for (i=0; i<sched->nfds; i++) {
+        if (sched->fds[i].fd == fd) {
+            break;
+        }
+    }
+    if (sched->nfds == i) {
+#ifdef VUS_PEDANTIC_SCHEDULER
+        PERR("Error deleting fd %d: fd not found", fd);
+        return -1;
+#else
+        return 0;
+#endif
+    }
+
+    sched->nfds--;
+    if (sched->nfds > 0) {
+        // Overwrite deleted entry with last entry from scheduler
+        memcpy(&sched->fds[i],  &sched->fds[sched->nfds],
+               sizeof(struct pollfd));
+        memcpy(&sched->data[i], &sched->data[sched->nfds],
+               sizeof(sched_data_t));
+    }
+    memset(&sched->fds[sched->nfds], 0, sizeof(struct pollfd));
+    memset(&sched->data[sched->nfds], 0, sizeof(sched_data_t));
+
+    PDBG("sched@%p: del fd %d from slot %i", sched, fd, i);
+
+    return 0;
+}
+
+static int sched_loop(sched_t *sched) {
+    int i, n;
+
+    assert(sched);
+    assert(sched->nfds > 0);
+
+    while (!sched->quit) {
+        n = poll(sched->fds, sched->nfds, -1);
+        if (n < 0) {
+            PERR("Error polling: %s", strerror(errno));
+            return -1;
+        }
+
+        for (i=0; i<sched->nfds && n; i++) {
+            if (sched->fds[i].revents != 0) {
+
+                if (sched->data[i].cb1) {
+                    int vu_evt = 0;
+
+                    if (sched->fds[i].revents & POLLIN)  vu_evt |= VU_WATCH_IN;
+                    if (sched->fds[i].revents & POLLOUT) vu_evt |= VU_WATCH_OUT;
+                    if (sched->fds[i].revents & POLLPRI) vu_evt |= VU_WATCH_PRI;
+                    if (sched->fds[i].revents & POLLERR) vu_evt |= VU_WATCH_ERR;
+                    if (sched->fds[i].revents & POLLHUP) vu_evt |= VU_WATCH_HUP;
+
+                    PDBG("sched@%p: fd[%d] (%d): cb1(%p, %d, %p)", sched, i,
+                         sched->fds[i].fd, sched->vu_dev, vu_evt,
+                         sched->data[i].pvt);
+
+                    sched->data[i].cb1(sched->vu_dev, vu_evt,
+                                       sched->data[i].pvt);
+                } else {
+                    PDBG("sched@%p: fd[%d] (%d): cbb(%hd, %p)", sched, i,
+                         sched->fds[i].fd, sched->fds[i].revents,
+                         sched->data[i].pvt);
+
+                    sched->data[i].cb2(sched->fds[i].revents,
+                                       sched->data[i].pvt);
+                }
+
+                n--;
+            }
+        }
+    }
+
+    return 0;
+}
+
+/** from libiscsi's scsi-lowlevel.h **/
+
+#define SCSI_CDB_MAX_SIZE           16
+
+struct scsi_iovector {
+    struct scsi_iovec *iov;
+    int niov;
+    int nalloc;
+    size_t offset;
+    int consumed;
+};
+
+struct scsi_allocated_memory {
+    struct scsi_allocated_memory *next;
+    char buf[0];
+};
+
+struct scsi_data {
+    int            size;
+    unsigned char *data;
+};
+
+enum scsi_sense_key {
+    SCSI_SENSE_NO_SENSE            = 0x00,
+    SCSI_SENSE_RECOVERED_ERROR     = 0x01,
+    SCSI_SENSE_NOT_READY           = 0x02,
+    SCSI_SENSE_MEDIUM_ERROR        = 0x03,
+    SCSI_SENSE_HARDWARE_ERROR      = 0x04,
+    SCSI_SENSE_ILLEGAL_REQUEST     = 0x05,
+    SCSI_SENSE_UNIT_ATTENTION      = 0x06,
+    SCSI_SENSE_DATA_PROTECTION     = 0x07,
+    SCSI_SENSE_BLANK_CHECK         = 0x08,
+    SCSI_SENSE_VENDOR_SPECIFIC     = 0x09,
+    SCSI_SENSE_COPY_ABORTED        = 0x0a,
+    SCSI_SENSE_COMMAND_ABORTED     = 0x0b,
+    SCSI_SENSE_OBSOLETE_ERROR_CODE = 0x0c,
+    SCSI_SENSE_OVERFLOW_COMMAND    = 0x0d,
+    SCSI_SENSE_MISCOMPARE          = 0x0e
+};
+
+struct scsi_sense {
+    unsigned char       error_type;
+    enum scsi_sense_key key;
+    int                 ascq;
+    unsigned            sense_specific:1;
+    unsigned            ill_param_in_cdb:1;
+    unsigned            bit_pointer_valid:1;
+    unsigned char       bit_pointer;
+    uint16_t            field_pointer;
+};
+
+enum scsi_residual {
+    SCSI_RESIDUAL_NO_RESIDUAL = 0,
+    SCSI_RESIDUAL_UNDERFLOW,
+    SCSI_RESIDUAL_OVERFLOW
+};
+
+struct scsi_task {
+    int status;
+    int cdb_size;
+    int xfer_dir;
+    int expxferlen;
+    unsigned char cdb[SCSI_CDB_MAX_SIZE];
+    enum scsi_residual residual_status;
+    size_t residual;
+    struct scsi_sense sense;
+    struct scsi_data datain;
+    struct scsi_allocated_memory *mem;
+    void *ptr;
+
+    uint32_t itt;
+    uint32_t cmdsn;
+    uint32_t lun;
+
+    struct scsi_iovector iovector_in;
+    struct scsi_iovector iovector_out;
+};
+
+/** libiscsi integration **/
+
+static int iscsi_add_lun(iscsi_lun_t *lun, char *iscsi_uri) {
+    struct iscsi_url *iscsi_url;
+    struct iscsi_context *iscsi_ctx;
+    int ret = 0;
+
+    assert(lun);
+
+    iscsi_ctx = iscsi_create_context(ISCSI_INITIATOR);
+    if (!iscsi_ctx) {
+        PERR("Unable to create iSCSI context");
+        return -1;
+    }
+
+    iscsi_url = iscsi_parse_full_url(iscsi_ctx, iscsi_uri);
+    if (!iscsi_url) {
+        PERR("Unable to parse iSCSI URL: %s", iscsi_get_error(iscsi_ctx));
+        goto fail;
+    }
+
+    iscsi_set_session_type(iscsi_ctx, ISCSI_SESSION_NORMAL);
+    iscsi_set_header_digest(iscsi_ctx, ISCSI_HEADER_DIGEST_NONE_CRC32C);
+    if (iscsi_full_connect_sync(iscsi_ctx, iscsi_url->portal, iscsi_url->lun)) {
+        PERR("Unable to login to iSCSI portal: %s", iscsi_get_error(iscsi_ctx));
+        goto fail;
+    }
+
+    lun->iscsi_ctx = iscsi_ctx;
+    lun->iscsi_lun = iscsi_url->lun;
+
+    PDBG("Context %p created for lun 0: %s", iscsi_ctx, iscsi_uri);
+
+out:
+    if (iscsi_url) {
+        iscsi_destroy_url(iscsi_url);
+    }
+    return ret;
+
+fail:
+    (void)iscsi_destroy_context(iscsi_ctx);
+    ret = -1;
+    goto out;
+}
+
+static struct scsi_task *scsi_task_new(int cdb_len, uint8_t *cdb, int dir,
+                                       int xfer_len) {
+    struct scsi_task *task;
+
+    assert(cdb_len > 0);
+    assert(cdb);
+
+    task = calloc(1, sizeof(struct scsi_task));
+    if (!task) {
+        PERR("Error allocating task: %s", strerror(errno));
+        return NULL;
+    }
+
+    memcpy(task->cdb, cdb, cdb_len);
+    task->cdb_size = cdb_len;
+    task->xfer_dir = dir;
+    task->expxferlen = xfer_len;
+
+    return task;
+}
+
+static int get_cdb_len(uint8_t *cdb) {
+    switch(cdb[0] >> 5){
+    case 0:
+        return 6;
+    case 1:
+    case 2:
+        return 10;
+    case 4:
+        return 16;
+    case 5:
+        return 12;
+    }
+    PERR("Unable to determine cdb len (0x%02hhX)", cdb[0]>>5);
+    return -1;
+}
+
+static int handle_cmd_sync(struct iscsi_context *ctx, 
+                           VirtIOSCSICmdReq *req,
+                           struct iovec *out, unsigned int out_len,
+                           VirtIOSCSICmdResp *rsp,
+                           struct iovec *in, unsigned int in_len) {
+    struct scsi_task *task;
+    uint32_t dir;
+    uint32_t len;
+    int cdb_len;
+    int i;
+
+    if (!((!req->lun[1]) && (req->lun[2] == 0x40) && (!req->lun[3]))) {
+        // Ignore anything different than target=0, lun=0
+        PDBG("Ignoring unconnected lun (0x%hhX, 0x%hhX)",
+             req->lun[1], req->lun[3]);
+        rsp->status = SCSI_STATUS_CHECK_CONDITION;
+        memset(rsp->sense, 0, sizeof(rsp->sense));
+        rsp->sense_len = 18;
+        rsp->sense[0] = 0x70;
+        rsp->sense[2] = 0x05; // ILLEGAL_REQUEST
+        rsp->sense[7] = 10;
+        rsp->sense[12] = 0x24;
+
+        return 0;
+    }
+
+    cdb_len = get_cdb_len(req->cdb);
+    if (cdb_len == -1) {
+        return -1;
+    }
+
+    len = 0;
+    if (!out_len && !in_len) {
+        dir = SCSI_XFER_NONE;
+    } else if (out_len) {
+        dir = SCSI_XFER_TO_DEV;
+        for (i=0; i<out_len; i++) {
+            len += out[i].iov_len;
+        }
+    } else {
+        dir = SCSI_XFER_FROM_DEV;
+        for (i=0; i<in_len; i++) {
+            len += in[i].iov_len;
+        }
+    }
+
+    task = scsi_task_new(cdb_len, req->cdb, dir, len);
+    if (!task) {
+        PERR("Unable to create iscsi task");
+        return -1;
+    }
+
+    if (dir == SCSI_XFER_TO_DEV) {
+        task->iovector_out.iov = (struct scsi_iovec *)out;
+        task->iovector_out.niov = out_len;
+    } else if (dir == SCSI_XFER_FROM_DEV) {
+        task->iovector_in.iov = (struct scsi_iovec *)in;
+        task->iovector_in.niov = in_len;
+    }
+
+    PDBG("Sending iscsi cmd (cdb_len=%d, dir=%d, task=%p)",
+         cdb_len, dir, task);
+    if (!iscsi_scsi_command_sync(ctx, 0, task, NULL)) {
+        PERR("Error serving SCSI command");
+        free(task);
+        return -1;
+    }
+
+    memset(rsp, 0, sizeof(*rsp));
+
+    rsp->status = task->status;
+    rsp->resid  = task->residual;
+
+    if (task->status == SCSI_STATUS_CHECK_CONDITION) {
+        rsp->response = VIRTIO_SCSI_S_FAILURE;
+        rsp->sense_len = task->datain.size - 2;
+        memcpy(rsp->sense, &task->datain.data[2], rsp->sense_len);
+    }
+
+    free(task);
+
+    PDBG("Filled in rsp: status=%hhX, resid=%u, response=%hhX, sense_len=%u",
+         rsp->status, rsp->resid, rsp->response, rsp->sense_len);
+
+    return 0;
+}
+
+/** libvhost-user callbacks **/
+
+static void vus_panic_cb(VuDev *vu_dev, const char *buf) {
+    vhost_scsi_dev_t *vdev_scsi;
+
+    assert(vu_dev);
+
+    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
+
+    if (buf) {
+        PERR("vu_panic: %s", buf);
+    }
+
+    if (vdev_scsi) {
+        vdev_scsi->sched.quit = 1;
+    }
+}
+
+static void vus_add_watch_cb(VuDev *vu_dev, int fd, int vu_evt, vu_watch_cb cb,
+                             void *pvt) {
+    vhost_scsi_dev_t *vdev_scsi;
+    int poll_evt = 0;
+
+    assert(vu_dev);
+    assert(fd >= 0);
+    assert(cb);
+
+    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
+    if (!vdev_scsi) {
+        vus_panic_cb(vu_dev, NULL);
+    }
+
+    /* TODO: VU_WATCH_* should match POLL*, check it */
+    if (vu_evt & VU_WATCH_IN)  poll_evt |= POLLIN;
+    if (vu_evt & VU_WATCH_OUT) poll_evt |= POLLOUT;
+    if (vu_evt & VU_WATCH_PRI) poll_evt |= POLLPRI;
+    if (vu_evt & VU_WATCH_ERR) poll_evt |= POLLERR;
+    if (vu_evt & VU_WATCH_HUP) poll_evt |= POLLHUP;
+
+    if (sched_add(&vdev_scsi->sched, fd, poll_evt, cb, NULL, pvt)) {
+        vus_panic_cb(vu_dev, NULL);
+    }
+}
+
+static void vus_del_watch_cb(VuDev *vu_dev, int fd) {
+    vhost_scsi_dev_t *vdev_scsi;
+
+    assert(vu_dev);
+    assert(fd >= 0);
+
+    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
+    if (!vdev_scsi) {
+        vus_panic_cb(vu_dev, NULL);
+        return;
+    }
+
+    if (sched_del(&vdev_scsi->sched, fd)) {
+        vus_panic_cb(vu_dev, NULL);
+    }
+}
+
+static void vus_proc_ctl(VuDev *vu_dev, int idx) {
+    /* Control VQ not implemented */
+}
+
+static void vus_proc_evt(VuDev *vu_dev, int idx) {
+    /* Event VQ not implemented */
+}
+
+static void vus_proc_req(VuDev *vu_dev, int idx) {
+    vhost_scsi_dev_t *vdev_scsi;
+    VuVirtq *vq;
+
+    assert(vu_dev);
+
+    vdev_scsi = vdev_scsi_find_by_vu(vu_dev);
+    if (!vdev_scsi) {
+        vus_panic_cb(vu_dev, NULL);
+        return;
+    }
+
+    if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
+        PERR("VQ Index out of range: %d", idx);
+        vus_panic_cb(vu_dev, NULL);
+        return;
+    }
+
+    vq = vu_get_queue(vu_dev, idx);
+    if (!vq) {
+        PERR("Error fetching VQ (dev=%p, idx=%d)", vu_dev, idx);
+        vus_panic_cb(vu_dev, NULL);
+        return;
+    }
+
+    PDBG("Got kicked on vq[%d]@%p", idx, vq);
+
+    while(1) {
+        VuVirtqElement *elem;
+        VirtIOSCSICmdReq *req;
+        VirtIOSCSICmdResp *rsp;
+
+        elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement));
+        if (!elem) {
+            PDBG("No more elements pending on vq[%d]@%p", idx, vq);
+            break;
+        }
+        PDBG("Popped elem@%p", elem);
+
+        assert(!((elem->out_num > 1) && (elem->in_num > 1)));
+        assert((elem->out_num > 0) && (elem->in_num > 0));
+
+        if (elem->out_sg[0].iov_len < sizeof(VirtIOSCSICmdReq)) {
+            PERR("Invalid virtio-scsi req header");
+            vus_panic_cb(vu_dev, NULL);
+            break;
+        }
+        req = (VirtIOSCSICmdReq *)elem->out_sg[0].iov_base;
+
+        if (elem->in_sg[0].iov_len < sizeof(VirtIOSCSICmdResp)) {
+            PERR("Invalid virtio-scsi rsp header");
+            vus_panic_cb(vu_dev, NULL);
+            break;
+        }
+        rsp = (VirtIOSCSICmdResp *)elem->in_sg[0].iov_base;
+
+        if (handle_cmd_sync(vdev_scsi->luns[0].iscsi_ctx,
+                            req, &elem->out_sg[1], elem->out_num-1,
+                            rsp, &elem->in_sg[1], elem->in_num-1) != 0) {
+            vus_panic_cb(vu_dev, NULL);
+            break;
+        }
+
+        vu_queue_push(vu_dev, vq, elem, 0);
+        vu_queue_notify(vu_dev, vq);
+
+        free(elem);
+    }
+
+}
+
+static void vus_queue_set_started(VuDev *vu_dev, int idx, bool started) {
+    VuVirtq *vq;
+
+    assert(vu_dev);
+
+    if ((idx < 0) || (idx >= VHOST_MAX_NR_VIRTQUEUE)) {
+        PERR("VQ Index out of range: %d", idx);
+        vus_panic_cb(vu_dev, NULL);
+        return;
+    }
+
+    vq = vu_get_queue(vu_dev, idx);
+
+    switch(idx) {
+    case 0:
+        vu_set_queue_handler(vu_dev, vq, started?vus_proc_ctl:NULL);
+        break;
+    case 1:
+        vu_set_queue_handler(vu_dev, vq, started?vus_proc_evt:NULL);
+        break;
+    default:
+        vu_set_queue_handler(vu_dev, vq, started?vus_proc_req:NULL);
+    }
+}
+
+static const VuDevIface vus_iface = { 
+    .queue_set_started = vus_queue_set_started,
+};
+
+static void vus_vhost_cb(VuDev *vu_dev, int vu_evt, void *data) {
+    assert(vu_dev);
+
+    if (!vu_dispatch(vu_dev) != 0) {
+        PERR("Error processing vhost message");
+        vus_panic_cb(vu_dev, NULL);
+    }
+}
+
+/** util **/
+
+static int unix_sock_new(char *unix_fn) {
+    int sock;
+    struct sockaddr_un un;
+    size_t len;
+
+    assert(unix_fn);
+
+    sock = socket(AF_UNIX, SOCK_STREAM, 0);
+    if (sock <= 0) {
+        perror("socket");
+        return -1;
+    }
+
+    un.sun_family = AF_UNIX;
+    (void)snprintf(un.sun_path, sizeof(un.sun_path), "%s", unix_fn);
+    len = sizeof(un.sun_family) + strlen(un.sun_path);
+
+    (void)unlink(unix_fn);
+    if (bind(sock, (struct sockaddr *)&un, len) < 0) {
+        perror("bind");
+        goto fail;
+    }
+
+    if (listen(sock, 1) < 0) {
+        perror("listen");
+        goto fail;
+    }
+
+    return sock;
+
+fail:
+    (void)close(sock);
+
+    return -1;
+}
+
+/** vhost-user-scsi **/
+
+static vhost_scsi_dev_t *vdev_scsi_find_by_vu(VuDev *vu_dev) {
+    int i;
+
+    assert(vu_dev);
+
+    for (i=0; i<VDEV_SCSI_MAX_DEVS; i++) {
+        if (&vhost_scsi_devs[i]->vu_dev == vu_dev) {
+            return vhost_scsi_devs[i];
+        }
+    }
+
+    PERR("Unknown VuDev %p", vu_dev);
+    return NULL;
+}
+
+static void vdev_scsi_deinit(vhost_scsi_dev_t *vdev_scsi) {
+    if (!vdev_scsi) {
+        return;
+    }
+
+    if (vdev_scsi->server_sock >= 0) {
+        struct sockaddr_storage ss;
+        socklen_t sslen = sizeof(ss);
+
+        if (getsockname(vdev_scsi->server_sock, (struct sockaddr *)&ss,
+                        &sslen) == 0) {
+            struct sockaddr_un *su = (struct sockaddr_un *)&ss;
+            (void)unlink(su->sun_path);
+        }
+
+        (void)close(vdev_scsi->server_sock);
+    }
+}
+
+static vhost_scsi_dev_t *vdev_scsi_new(char *unix_fn) {
+    vhost_scsi_dev_t *vdev_scsi;
+
+    assert(unix_fn);
+
+    vdev_scsi = calloc(1, sizeof(vhost_scsi_dev_t));
+    if (!vdev_scsi) {
+        perror("calloc");
+        return NULL;
+    }
+
+    vdev_scsi->server_sock = unix_sock_new(unix_fn);
+    if (vdev_scsi->server_sock < 0) {
+        free(vdev_scsi);
+        return NULL;
+    }
+
+    vdev_scsi->sched.vu_dev = &vdev_scsi->vu_dev;
+
+    return vdev_scsi;
+}
+
+static int vdev_scsi_iscsi_add_lun(vhost_scsi_dev_t *vdev_scsi,
+                                   char *iscsi_uri, uint32_t lun) {
+    assert(vdev_scsi);
+    assert(iscsi_uri);
+    assert(lun < VDEV_SCSI_MAX_LUNS);
+
+    if (vdev_scsi->luns[lun].iscsi_ctx) {
+        PERR("Lun %d already configured", lun);
+        return -1;
+    }
+
+    if (iscsi_add_lun(&vdev_scsi->luns[lun], iscsi_uri) != 0) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static int vdev_scsi_run(vhost_scsi_dev_t *vdev_scsi) {
+    int cli_sock;
+    int ret = 0;
+
+    assert(vdev_scsi);
+    assert(vdev_scsi->server_sock >= 0);
+
+    cli_sock = accept(vdev_scsi->server_sock, (void *)0, (void *)0);
+    if (cli_sock < 0) {
+        perror("accept");
+        return -1;
+    }
+
+    vu_init(&vdev_scsi->vu_dev,
+            cli_sock,
+            vus_panic_cb,
+            vus_add_watch_cb,
+            vus_del_watch_cb,
+            &vus_iface);
+
+    ret = sched_add(&vdev_scsi->sched, cli_sock, POLLIN, vus_vhost_cb, NULL, 0);
+    if (ret) {
+        goto fail;
+    }
+
+    if (sched_loop(&vdev_scsi->sched) != 0) {
+        goto fail;
+    }
+
+out:
+    vu_deinit(&vdev_scsi->vu_dev);
+
+    return ret;
+
+fail:
+    ret = -1;
+    goto out;
+}
+
+int main(int argc, char **argv)
+{
+    vhost_scsi_dev_t *vdev_scsi = NULL;
+    char *unix_fn = NULL;
+    char *iscsi_uri = NULL;
+    int opt, err = EXIT_SUCCESS;
+
+    while ((opt = getopt(argc, argv, "u:i:")) != -1) {
+        switch (opt) {
+        case 'h':
+            goto help;
+        case 'u':
+            unix_fn = strdup(optarg);
+            break;
+        case 'i':
+            iscsi_uri = strdup(optarg);
+            break;
+        default:
+            goto help;
+        }
+    }
+    if (!unix_fn || !iscsi_uri) {
+        goto help;
+    }
+
+    vdev_scsi = vdev_scsi_new(unix_fn);
+    if (!vdev_scsi) {
+        goto err;
+    }
+    vhost_scsi_devs[0] = vdev_scsi;
+
+    if (vdev_scsi_iscsi_add_lun(vdev_scsi, iscsi_uri, 0) != 0) {
+        goto err;
+    }
+
+    if (vdev_scsi_run(vdev_scsi) != 0) {
+        goto err;
+    }
+
+out:
+    if (vdev_scsi) {
+        vdev_scsi_deinit(vdev_scsi);
+        free(vdev_scsi);
+    }
+    if (unix_fn) {
+        free(unix_fn);
+    }
+    if (iscsi_uri) {
+        free(iscsi_uri);
+    }
+
+    return err;
+
+err:
+    err = EXIT_FAILURE;
+    goto out;
+
+help:
+    fprintf(stderr, "Usage: %s [ -u unix_sock_path -i iscsi_uri ] | [ -h ]\n",
+            argv[0]);
+    fprintf(stderr, "          -u path to unix socket\n");
+    fprintf(stderr, "          -i iscsi uri for lun 0\n");
+    fprintf(stderr, "          -h print help and quit\n");
+
+    goto err;
+}