@@ -293,12 +293,31 @@ static int srp_new_cm_id(struct srp_target_port *target)
return 0;
}
+static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
+{
+ struct srp_device *dev = target->srp_host->srp_dev;
+ struct ib_fmr_pool_param fmr_param;
+
+ memset(&fmr_param, 0, sizeof(fmr_param));
+ fmr_param.pool_size = target->scsi_host->can_queue;
+ fmr_param.dirty_watermark = fmr_param.pool_size / 4;
+ fmr_param.cache = 1;
+ fmr_param.max_pages_per_fmr = dev->max_pages_per_fmr;
+ fmr_param.page_shift = ilog2(dev->fmr_page_size);
+ fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ);
+
+ return ib_create_fmr_pool(dev->pd, &fmr_param);
+}
+
static int srp_create_target_ib(struct srp_target_port *target)
{
struct srp_device *dev = target->srp_host->srp_dev;
struct ib_qp_init_attr *init_attr;
struct ib_cq *recv_cq, *send_cq;
struct ib_qp *qp;
+ struct ib_fmr_pool *fmr_pool = NULL;
int ret;
init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
@@ -341,6 +360,21 @@ static int srp_create_target_ib(struct srp_target_port *target)
if (ret)
goto err_qp;
+ if (!target->qp || target->fmr_pool) {
+ fmr_pool = srp_alloc_fmr_pool(target);
+ if (IS_ERR(fmr_pool)) {
+ ret = PTR_ERR(fmr_pool);
+ shost_printk(KERN_WARNING, target->scsi_host, PFX
+ "FMR pool allocation failed (%d)\n", ret);
+ if (target->qp)
+ goto err_qp;
+ fmr_pool = NULL;
+ }
+ if (target->fmr_pool)
+ ib_destroy_fmr_pool(target->fmr_pool);
+ target->fmr_pool = fmr_pool;
+ }
+
if (target->qp)
ib_destroy_qp(target->qp);
if (target->recv_cq)
@@ -377,6 +411,8 @@ static void srp_free_target_ib(struct srp_target_port *target)
{
int i;
+ if (target->fmr_pool)
+ ib_destroy_fmr_pool(target->fmr_pool);
ib_destroy_qp(target->qp);
ib_destroy_cq(target->send_cq);
ib_destroy_cq(target->recv_cq);
@@ -936,11 +972,10 @@ static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
static int srp_map_finish_fmr(struct srp_map_state *state,
struct srp_target_port *target)
{
- struct srp_device *dev = target->srp_host->srp_dev;
struct ib_pool_fmr *fmr;
u64 io_addr = 0;
- fmr = ib_fmr_pool_map_phys(dev->fmr_pool, state->pages,
+ fmr = ib_fmr_pool_map_phys(target->fmr_pool, state->pages,
state->npages, io_addr);
if (IS_ERR(fmr))
return PTR_ERR(fmr);
@@ -1077,7 +1112,7 @@ static void srp_map_fmr(struct srp_map_state *state,
state->pages = req->map_page;
state->next_fmr = req->fmr_list;
- use_fmr = dev->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR;
+ use_fmr = target->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR;
for_each_sg(scat, sg, count, i) {
if (srp_map_sg_entry(state, target, sg, i, use_fmr)) {
@@ -1555,7 +1590,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
struct srp_cmd *cmd;
struct ib_device *dev;
unsigned long flags;
- int len, result;
+ int len, ret;
const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
/*
@@ -1567,12 +1602,9 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
if (in_scsi_eh)
mutex_lock(&rport->mutex);
- result = srp_chkready(target->rport);
- if (unlikely(result)) {
- scmnd->result = result;
- scmnd->scsi_done(scmnd);
- goto unlock_rport;
- }
+ scmnd->result = srp_chkready(target->rport);
+ if (unlikely(scmnd->result))
+ goto err;
spin_lock_irqsave(&target->lock, flags);
iu = __srp_get_tx_iu(target, SRP_IU_CMD);
@@ -1587,7 +1619,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
DMA_TO_DEVICE);
- scmnd->result = 0;
scmnd->host_scribble = (void *) req;
cmd = iu->buf;
@@ -1604,7 +1635,15 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
len = srp_map_data(scmnd, target, req);
if (len < 0) {
shost_printk(KERN_ERR, target->scsi_host,
- PFX "Failed to map data\n");
+ PFX "Failed to map data (%d)\n", len);
+ /*
+ * If we ran out of memory descriptors (-ENOMEM) because an
+ * application is queuing many requests with more than
+ * max_pages_per_fmr sg-list elements, tell the SCSI mid-layer
+ * to reduce queue depth temporarily.
+ */
+ scmnd->result = len == -ENOMEM ?
+ DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
goto err_iu;
}
@@ -1616,11 +1655,13 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
goto err_unmap;
}
+ ret = 0;
+
unlock_rport:
if (in_scsi_eh)
mutex_unlock(&rport->mutex);
- return 0;
+ return ret;
err_unmap:
srp_unmap_data(scmnd, target, req);
@@ -1640,10 +1681,15 @@ err_iu:
err_unlock:
spin_unlock_irqrestore(&target->lock, flags);
- if (in_scsi_eh)
- mutex_unlock(&rport->mutex);
+err:
+ if (scmnd->result) {
+ scmnd->scsi_done(scmnd);
+ ret = 0;
+ } else {
+ ret = SCSI_MLQUEUE_HOST_BUSY;
+ }
- return SCSI_MLQUEUE_HOST_BUSY;
+ goto unlock_rport;
}
/*
@@ -2692,39 +2738,39 @@ static ssize_t srp_create_target(struct device *dev,
goto err;
}
- if (!host->srp_dev->fmr_pool && !target->allow_ext_sg &&
- target->cmd_sg_cnt < target->sg_tablesize) {
- pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
- target->sg_tablesize = target->cmd_sg_cnt;
- }
-
- target_host->sg_tablesize = target->sg_tablesize;
- target->indirect_size = target->sg_tablesize *
- sizeof (struct srp_direct_buf);
- target->max_iu_len = sizeof (struct srp_cmd) +
- sizeof (struct srp_indirect_buf) +
- target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
-
INIT_WORK(&target->tl_err_work, srp_tl_err_work);
INIT_WORK(&target->remove_work, srp_remove_work);
spin_lock_init(&target->lock);
INIT_LIST_HEAD(&target->free_tx);
- ret = srp_alloc_req_data(target);
- if (ret)
- goto err_free_mem;
-
ret = ib_query_gid(ibdev, host->port, 0, &target->path.sgid);
if (ret)
- goto err_free_mem;
+ goto err;
ret = srp_create_target_ib(target);
if (ret)
- goto err_free_mem;
+ goto err;
- ret = srp_new_cm_id(target);
+ if (!target->fmr_pool && !target->allow_ext_sg &&
+ target->cmd_sg_cnt < target->sg_tablesize) {
+ pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
+ target->sg_tablesize = target->cmd_sg_cnt;
+ }
+
+ target_host->sg_tablesize = target->sg_tablesize;
+ target->indirect_size = target->sg_tablesize *
+ sizeof(struct srp_direct_buf);
+ target->max_iu_len = sizeof(struct srp_cmd) +
+ sizeof(struct srp_indirect_buf) +
+ target->cmd_sg_cnt * sizeof(struct srp_direct_buf);
+
+ ret = srp_alloc_req_data(target);
if (ret)
goto err_free_ib;
+ ret = srp_new_cm_id(target);
+ if (ret)
+ goto err_free_mem;
+
ret = srp_connect_target(target);
if (ret) {
shost_printk(KERN_ERR, target->scsi_host,
@@ -2756,12 +2802,12 @@ err_disconnect:
err_cm_id:
ib_destroy_cm_id(target->cm_id);
-err_free_ib:
- srp_free_target_ib(target);
-
err_free_mem:
srp_free_req_data(target);
+err_free_ib:
+ srp_free_target_ib(target);
+
err:
scsi_host_put(target_host);
goto out;
@@ -2832,9 +2878,8 @@ static void srp_add_one(struct ib_device *device)
{
struct srp_device *srp_dev;
struct ib_device_attr *dev_attr;
- struct ib_fmr_pool_param fmr_param;
struct srp_host *host;
- int max_pages_per_fmr, fmr_page_shift, s, e, p;
+ int fmr_page_shift, s, e, p;
dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
if (!dev_attr)
@@ -2857,7 +2902,10 @@ static void srp_add_one(struct ib_device *device)
fmr_page_shift = max(12, ffs(dev_attr->page_size_cap) - 1);
srp_dev->fmr_page_size = 1 << fmr_page_shift;
srp_dev->fmr_page_mask = ~((u64) srp_dev->fmr_page_size - 1);
- srp_dev->fmr_max_size = srp_dev->fmr_page_size * SRP_FMR_SIZE;
+ srp_dev->max_pages_per_fmr = min_t(u64, SRP_FMR_SIZE,
+ dev_attr->max_mr_size / srp_dev->fmr_page_size);
+ srp_dev->fmr_max_size = srp_dev->fmr_page_size *
+ srp_dev->max_pages_per_fmr;
INIT_LIST_HEAD(&srp_dev->dev_list);
@@ -2873,27 +2921,6 @@ static void srp_add_one(struct ib_device *device)
if (IS_ERR(srp_dev->mr))
goto err_pd;
- for (max_pages_per_fmr = SRP_FMR_SIZE;
- max_pages_per_fmr >= SRP_FMR_MIN_SIZE;
- max_pages_per_fmr /= 2, srp_dev->fmr_max_size /= 2) {
- memset(&fmr_param, 0, sizeof fmr_param);
- fmr_param.pool_size = SRP_FMR_POOL_SIZE;
- fmr_param.dirty_watermark = SRP_FMR_DIRTY_SIZE;
- fmr_param.cache = 1;
- fmr_param.max_pages_per_fmr = max_pages_per_fmr;
- fmr_param.page_shift = fmr_page_shift;
- fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_REMOTE_WRITE |
- IB_ACCESS_REMOTE_READ);
-
- srp_dev->fmr_pool = ib_create_fmr_pool(srp_dev->pd, &fmr_param);
- if (!IS_ERR(srp_dev->fmr_pool))
- break;
- }
-
- if (IS_ERR(srp_dev->fmr_pool))
- srp_dev->fmr_pool = NULL;
-
if (device->node_type == RDMA_NODE_IB_SWITCH) {
s = 0;
e = 0;
@@ -2956,8 +2983,6 @@ static void srp_remove_one(struct ib_device *device)
kfree(host);
}
- if (srp_dev->fmr_pool)
- ib_destroy_fmr_pool(srp_dev->fmr_pool);
ib_dereg_mr(srp_dev->mr);
ib_dealloc_pd(srp_dev->pd);
@@ -67,9 +67,6 @@ enum {
SRP_TAG_TSK_MGMT = 1U << 31,
SRP_FMR_SIZE = 512,
- SRP_FMR_MIN_SIZE = 128,
- SRP_FMR_POOL_SIZE = 1024,
- SRP_FMR_DIRTY_SIZE = SRP_FMR_POOL_SIZE / 4,
SRP_MAP_ALLOW_FMR = 0,
SRP_MAP_NO_FMR = 1,
@@ -91,10 +88,10 @@ struct srp_device {
struct ib_device *dev;
struct ib_pd *pd;
struct ib_mr *mr;
- struct ib_fmr_pool *fmr_pool;
u64 fmr_page_mask;
int fmr_page_size;
int fmr_max_size;
+ int max_pages_per_fmr;
};
struct srp_host {
@@ -131,6 +128,7 @@ struct srp_target_port {
struct ib_cq *send_cq ____cacheline_aligned_in_smp;
struct ib_cq *recv_cq;
struct ib_qp *qp;
+ struct ib_fmr_pool *fmr_pool;
u32 lkey;
u32 rkey;
enum srp_target_state state;
Allocate one FMR pool per SRP connection instead of one SRP pool per HCA. This improves scalability of the SRP initiator. Only request the SCSI mid-layer to retry a SCSI command after a temporary mapping failure (-ENOMEM) but not after a permanent mapping failure. This avoids that SCSI commands are retried indefinitely if a permanent memory mapping failure occurs. Tell the SCSI mid-layer to reduce queue depth temporarily in the unlikely case where an application is queuing many requests with more than max_pages_per_fmr sg-list elements. For FMR pool allocation, base the max_pages_per_fmr parameter on the HCA memory registration limit. Signed-off-by: Bart Van Assche <bvanassche@acm.org> Cc: Sagi Grimberg <sagig@mellanox.com> Cc: Roland Dreier <roland@purestorage.com> Cc: David Dillow <dave@thedillows.org> Cc: Vu Pham <vu@mellanox.com> Cc: Sebastian Parschauer <sebastian.riemer@profitbricks.com> --- drivers/infiniband/ulp/srp/ib_srp.c | 155 +++++++++++++++++++++--------------- drivers/infiniband/ulp/srp/ib_srp.h | 6 +- 2 files changed, 92 insertions(+), 69 deletions(-)