===================================================================
@@ -78,6 +77,13 @@
MODULE_PARM_DESC(mellanox_workarounds,
"Enable workarounds for Mellanox SRP target bugs if != 0");
+static int srp_dev_loss_tmo = 60;
+
+module_param(srp_dev_loss_tmo, int, 0444);
+MODULE_PARM_DESC(srp_dev_loss_tmo,
+ "Default number of seconds that srp transport should \
+ insulate the lost of a remote port (default is 60 secs");
+
static void srp_add_one(struct ib_device *device);
static void srp_remove_one(struct ib_device *device);
static void srp_completion(struct ib_cq *cq, void *target_ptr);
@@ -898,6 +926,48 @@
DMA_FROM_DEVICE);
}
+static void srp_reconnect_work(struct work_struct *work)
+{
+ struct srp_target_port *target =
+ container_of(work, struct srp_target_port, work);
+
+ srp_reconnect_target(target);
+ target->work_in_progress = 0;
+}
+
+static void srp_qp_in_err_timer(unsigned long data)
+{
+ struct srp_target_port *target = (struct srp_target_port *)data;
+ struct srp_request *req, *tmp;
+
+ if (target->state != SRP_TARGET_LIVE)
+ return;
+
+ spin_lock_irq(target->scsi_host->host_lock);
+ list_for_each_entry_safe(req, tmp, &target->req_queue, list)
+ srp_reset_req(target, req);
+ spin_unlock_irq(target->scsi_host->host_lock);
+
+ spin_lock_irq(target->scsi_host->host_lock);
+ if (!target->work_in_progress) {
+ target->work_in_progress = 1;
+ INIT_WORK(&target->work, srp_reconnect_work);
+ schedule_work(&target->work);
+ }
+ spin_unlock_irq(target->scsi_host->host_lock);
+}
+
+static void srp_qp_err_add_timer(struct srp_target_port *target, int time)
+{
+ if (!timer_pending(&target->qp_err_timer)) {
+ setup_timer(&target->qp_err_timer,
+ srp_qp_in_err_timer,
+ (unsigned long)target);
+ target->qp_err_timer.expires = time * HZ + jiffies;
+ add_timer(&target->qp_err_timer);
+ }
+}
+
static void srp_completion(struct ib_cq *cq, void *target_ptr)
{
struct srp_target_port *target = target_ptr;
@@ -960,11 +980,20 @@
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
while (ib_poll_cq(cq, 1, &wc) > 0) {
if (wc.status) {
+ unsigned long flags;
+
shost_printk(KERN_ERR, target->scsi_host,
PFX "failed %s status %d\n",
wc.wr_id & SRP_OP_RECV ? "receive" : "send",
wc.status);
- target->qp_in_error = 1;
+ spin_lock_irqsave(target->scsi_host->host_lock, flags);
+ if (!target->qp_in_error &&
+ target->state == SRP_TARGET_LIVE) {
+ target->qp_in_error = 1;
+ srp_qp_err_add_timer(target,
+ srp_dev_loss_tmo - 55);
+ }
+ spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
break;
}
@@ -1274,5 +1299,6 @@
int attr_mask = 0;
int comp = 0;
int opcode = 0;
+ unsigned long flags;
switch (event->event) {
@@ -1301,6 +1381,14 @@
shost_printk(KERN_ERR, target->scsi_host,
PFX "connection closed\n");
+ spin_lock_irqsave(target->scsi_host->host_lock, flags);
+ if (!target->qp_in_error &&
+ target->state == SRP_TARGET_LIVE) {
+ target->qp_in_error = 1;
+ srp_qp_err_add_timer(target,
+ srp_dev_loss_tmo - 55);
+ }
+ spin_unlock_irqrestore(target->scsi_host->host_lock, flags);
target->status = 0;
break;
@@ -1443,9 +1529,22 @@
static int srp_reset_host(struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(scmnd->device->host);
+ struct srp_request *req, *tmp;
int ret = FAILED;
- shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
+ shost_printk(KERN_ERR, target->scsi_host,
+ PFX "SRP reset_host called state %d qp_err %d\n",
+ target->state, target->qp_in_error);
+
+ spin_lock_irq(target->scsi_host->host_lock);
+ if (timer_pending(&target->qp_err_timer) || target->qp_in_error ||
+ target->state != SRP_TARGET_LIVE) {
+ list_for_each_entry_safe(req, tmp, &target->req_queue, list)
+ srp_reset_req(target, req);
+ spin_unlock_irq(target->scsi_host->host_lock);
+ return SUCCESS;
+ }
+ spin_unlock_irq(target->scsi_host->host_lock);
if (!srp_reconnect_target(target))
ret = SUCCESS;
@@ -2150,6 +2342,9 @@
sizeof (struct srp_indirect_buf) +
srp_sg_tablesize * 16);
+ if (srp_dev_loss_tmo < 60)
+ srp_dev_loss_tmo = 60;
+
ret = class_register(&srp_class);
if (ret) {
printk(KERN_ERR PFX "couldn't register class infiniband_srp\n");
===================================================================
@@ -153,12 +159,14 @@
struct srp_request req_ring[SRP_SQ_SIZE];
struct work_struct work;
+ int work_in_progress;
struct list_head list;
struct completion done;
int status;
enum srp_target_state state;
int qp_in_error;
+ struct timer_list qp_err_timer;
};
struct srp_iu {