diff mbox

[17/23] scsi_dh_alua: use unique device id

Message ID 1440679281-13234-18-git-send-email-hare@suse.de (mailing list archive)
State New, archived
Headers show

Commit Message

Hannes Reinecke Aug. 27, 2015, 12:41 p.m. UTC
Use scsi_vpd_lun_id() to assign a unique device identification
to the alua port group structure.

Signed-off-by: Hannes Reinecke <hare@suse.de>
---
 drivers/scsi/device_handler/scsi_dh_alua.c | 70 +++++++++++++++++++++++++++---
 1 file changed, 65 insertions(+), 5 deletions(-)

Comments

Christoph Hellwig Sept. 1, 2015, 10:25 a.m. UTC | #1
> +		 * Internal error: TPGS supported by no
> +		 * device identifcation found.
> +		 * Disable ALUA support.

s/by/but/

Also I think the comment could fit into two lines if you try :)

> +	spin_lock(&port_group_lock);
> +	list_for_each_entry(tmp_pg, &port_group_list, node) {
> +		if (tmp_pg->group_id != group_id)
> +			continue;
> +		if (tmp_pg->device_id_size != device_id_size)
> +			continue;
> +		if (strncmp(tmp_pg->device_id_str, device_id_str,
> +			    device_id_size))
> +			continue;
> +		h->pg = tmp_pg;
> +		kref_get(&tmp_pg->kref);
> +		break;
> +	}
> +	spin_unlock(&port_group_lock);

This exact code appears twice in this patch, please factor it into
a helper.

> +	if (device_id_size)
> +		strncpy(pg->device_id_str, device_id_str, 256);
> +	else
> +		pg->device_id_str[0] = '\0';
> +

How could we end up with a zero device ID length?  Shouldn't
have error out earlier on that?

> +	 * Re-check list again to catch
> +	 * concurrent updates
> +	 */

Could fit onto one line..
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ewan Milne Sept. 22, 2015, 7:31 p.m. UTC | #2
On Thu, 2015-08-27 at 14:41 +0200, Hannes Reinecke wrote:
> Use scsi_vpd_lun_id() to assign a unique device identification
> to the alua port group structure.
> 
> Signed-off-by: Hannes Reinecke <hare@suse.de>
> ---
>  drivers/scsi/device_handler/scsi_dh_alua.c | 70 +++++++++++++++++++++++++++---
>  1 file changed, 65 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
> index dbe9ff2..c2b2100b 100644
> --- a/drivers/scsi/device_handler/scsi_dh_alua.c
> +++ b/drivers/scsi/device_handler/scsi_dh_alua.c
> @@ -70,6 +70,8 @@ static DEFINE_SPINLOCK(port_group_lock);
>  struct alua_port_group {
>  	struct kref		kref;
>  	struct list_head	node;
> +	unsigned char		device_id_str[256];
> +	int			device_id_size;

I prefer _len instead of _size, _size should refer to the size of the buffer,
not the current length of the data in it.

>  	int			group_id;
>  	int			tpgs;
>  	int			state;
> @@ -229,7 +231,9 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h)
>  {
>  	unsigned char *d;
>  	int group_id = -1;
> -	struct alua_port_group *pg = NULL;
> +	char device_id_str[256];
> +	int device_id_size;
> +	struct alua_port_group *tmp_pg, *pg = NULL;
>  
>  	if (!sdev->vpd_pg83)
>  		return SCSI_DH_DEV_UNSUPP;
> @@ -266,9 +270,39 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h)
>  		h->tpgs = TPGS_MODE_NONE;
>  		return SCSI_DH_DEV_UNSUPP;
>  	}
> +	device_id_size = scsi_vpd_lun_id(sdev, device_id_str, 256);

should be sizeof(device_id_str) not hardcoded 256

> +	if (device_id_size <= 0) {
> +		/*
> +		 * Internal error: TPGS supported by no

"Internal error: TPGS supported by no"  should be "but no"

> +		 * device identifcation found.
> +		 * Disable ALUA support.
> +		 */
> +		sdev_printk(KERN_INFO, sdev,
> +			    "%s: No device descriptors found\n",
> +			    ALUA_DH_NAME);
> +		h->tpgs = TPGS_MODE_NONE;
> +		return SCSI_DH_DEV_UNSUPP;
> +	}
>  	sdev_printk(KERN_INFO, sdev,
> -		    "%s: port group %02x rel port %02x\n",
> -		    ALUA_DH_NAME, group_id, h->rel_port);
> +		    "%s: device %s port group %02x "
> +		    "rel port %02x\n", ALUA_DH_NAME,
> +		    device_id_str, group_id, h->rel_port);
> +	spin_lock(&port_group_lock);
> +	list_for_each_entry(tmp_pg, &port_group_list, node) {
> +		if (tmp_pg->group_id != group_id)
> +			continue;
> +		if (tmp_pg->device_id_size != device_id_size)
> +			continue;
> +		if (strncmp(tmp_pg->device_id_str, device_id_str,
> +			    device_id_size))
> +			continue;
> +		h->pg = tmp_pg;
> +		kref_get(&tmp_pg->kref);
> +		break;
> +	}
> +	spin_unlock(&port_group_lock);
> +	if (h->pg)
> +		return SCSI_DH_OK;

The lookup checks whether h->pg == NULL but the function never
explicitly sets it to NULL before iterating.

>  
>  	pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL);
>  	if (!pg) {
> @@ -278,13 +312,39 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h)
>  		/* Temporary failure, bypass */
>  		return SCSI_DH_DEV_TEMP_BUSY;
>  	}
> +	if (device_id_size)
> +		strncpy(pg->device_id_str, device_id_str, 256);

should be sizeof(device_id_str) not hardcoded 256

> +	else
> +		pg->device_id_str[0] = '\0';
> +
> +	pg->device_id_size = device_id_size;
>  	pg->group_id = group_id;
>  	pg->tpgs = h->tpgs;
>  	pg->state = TPGS_STATE_OPTIMIZED;
>  	kref_init(&pg->kref);
>  	spin_lock(&port_group_lock);
> -	list_add(&pg->node, &port_group_list);
> -	h->pg = pg;
> +	/*
> +	 * Re-check list again to catch
> +	 * concurrent updates
> +	 */
> +	list_for_each_entry(tmp_pg, &port_group_list, node) {
> +		if (tmp_pg->group_id != pg->group_id)
> +			continue;
> +		if (tmp_pg->device_id_size != pg->device_id_size)
> +			continue;
> +		if (strncmp(tmp_pg->device_id_str, pg->device_id_str,
> +			    device_id_size))
> +			continue;
> +		h->pg = tmp_pg;
> +		kref_get(&tmp_pg->kref);
> +		kfree(pg);

With the added check for an existing alua_port_group object, and the kfree() of
the alua_port_group that had been allocated if an existing one is found, the code does not
do a destroy_workqueue() on pg->work_q.  

> +		pg = NULL;
> +		break;
> +	}
> +	if (pg) {
> +		list_add(&pg->node, &port_group_list);
> +		h->pg = pg;
> +	}
>  	spin_unlock(&port_group_lock);
>  
>  	return SCSI_DH_OK;
 
An explanation in the comments about the ALUA topology and what the device_id vs.
the group_id represents might be helpful.  It occurred to me that if someone doesn't
understand that you can have the same device_id behind different port groups they
won't understand this code.

HOST  ---->     STORAGE CTRL PG 1   ---->    LUN X
      ---->                         ---->    LUN X
      ---->     STORAGE CTRL PG 2   ---->    LUN X
      ---->                         ---->    LUN X

Reviewed-by: Ewan D. Milne <emilne@redhat.com>


--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Hannes Reinecke Sept. 28, 2015, 7:41 a.m. UTC | #3
On 09/22/2015 09:31 PM, Ewan Milne wrote:
> On Thu, 2015-08-27 at 14:41 +0200, Hannes Reinecke wrote:
>> Use scsi_vpd_lun_id() to assign a unique device identification
>> to the alua port group structure.
>>
>> Signed-off-by: Hannes Reinecke <hare@suse.de>
>> ---
>>  drivers/scsi/device_handler/scsi_dh_alua.c | 70 +++++++++++++++++++++++++++---
>>  1 file changed, 65 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
>> index dbe9ff2..c2b2100b 100644
>> --- a/drivers/scsi/device_handler/scsi_dh_alua.c
>> +++ b/drivers/scsi/device_handler/scsi_dh_alua.c
>> @@ -70,6 +70,8 @@ static DEFINE_SPINLOCK(port_group_lock);
>>  struct alua_port_group {
>>  	struct kref		kref;
>>  	struct list_head	node;
>> +	unsigned char		device_id_str[256];
>> +	int			device_id_size;
> 
> I prefer _len instead of _size, _size should refer to the size of the buffer,
> not the current length of the data in it.
> 
>>  	int			group_id;
>>  	int			tpgs;
>>  	int			state;
>> @@ -229,7 +231,9 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h)
>>  {
>>  	unsigned char *d;
>>  	int group_id = -1;
>> -	struct alua_port_group *pg = NULL;
>> +	char device_id_str[256];
>> +	int device_id_size;
>> +	struct alua_port_group *tmp_pg, *pg = NULL;
>>  
>>  	if (!sdev->vpd_pg83)
>>  		return SCSI_DH_DEV_UNSUPP;
>> @@ -266,9 +270,39 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h)
>>  		h->tpgs = TPGS_MODE_NONE;
>>  		return SCSI_DH_DEV_UNSUPP;
>>  	}
>> +	device_id_size = scsi_vpd_lun_id(sdev, device_id_str, 256);
> 
> should be sizeof(device_id_str) not hardcoded 256
> 
Okay.
>> +	if (device_id_size <= 0) {
>> +		/*
>> +		 * Internal error: TPGS supported by no
> 
> "Internal error: TPGS supported by no"  should be "but no"
> 
Fixed now.

>> +		 * device identifcation found.
>> +		 * Disable ALUA support.
>> +		 */
>> +		sdev_printk(KERN_INFO, sdev,
>> +			    "%s: No device descriptors found\n",
>> +			    ALUA_DH_NAME);
>> +		h->tpgs = TPGS_MODE_NONE;
>> +		return SCSI_DH_DEV_UNSUPP;
>> +	}
>>  	sdev_printk(KERN_INFO, sdev,
>> -		    "%s: port group %02x rel port %02x\n",
>> -		    ALUA_DH_NAME, group_id, h->rel_port);
>> +		    "%s: device %s port group %02x "
>> +		    "rel port %02x\n", ALUA_DH_NAME,
>> +		    device_id_str, group_id, h->rel_port);
>> +	spin_lock(&port_group_lock);
>> +	list_for_each_entry(tmp_pg, &port_group_list, node) {
>> +		if (tmp_pg->group_id != group_id)
>> +			continue;
>> +		if (tmp_pg->device_id_size != device_id_size)
>> +			continue;
>> +		if (strncmp(tmp_pg->device_id_str, device_id_str,
>> +			    device_id_size))
>> +			continue;
>> +		h->pg = tmp_pg;
>> +		kref_get(&tmp_pg->kref);
>> +		break;
>> +	}
>> +	spin_unlock(&port_group_lock);
>> +	if (h->pg)
>> +		return SCSI_DH_OK;
> 
> The lookup checks whether h->pg == NULL but the function never
> explicitly sets it to NULL before iterating.
> 
For my next iteration I've reworked this so that h->pg is explicitly
set.

>>  
>>  	pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL);
>>  	if (!pg) {
>> @@ -278,13 +312,39 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h)
>>  		/* Temporary failure, bypass */
>>  		return SCSI_DH_DEV_TEMP_BUSY;
>>  	}
>> +	if (device_id_size)
>> +		strncpy(pg->device_id_str, device_id_str, 256);
> 
> should be sizeof(device_id_str) not hardcoded 256
> 
Okay.

>> +	else
>> +		pg->device_id_str[0] = '\0';
>> +
>> +	pg->device_id_size = device_id_size;
>>  	pg->group_id = group_id;
>>  	pg->tpgs = h->tpgs;
>>  	pg->state = TPGS_STATE_OPTIMIZED;
>>  	kref_init(&pg->kref);
>>  	spin_lock(&port_group_lock);
>> -	list_add(&pg->node, &port_group_list);
>> -	h->pg = pg;
>> +	/*
>> +	 * Re-check list again to catch
>> +	 * concurrent updates
>> +	 */
>> +	list_for_each_entry(tmp_pg, &port_group_list, node) {
>> +		if (tmp_pg->group_id != pg->group_id)
>> +			continue;
>> +		if (tmp_pg->device_id_size != pg->device_id_size)
>> +			continue;
>> +		if (strncmp(tmp_pg->device_id_str, pg->device_id_str,
>> +			    device_id_size))
>> +			continue;
>> +		h->pg = tmp_pg;
>> +		kref_get(&tmp_pg->kref);
>> +		kfree(pg);
> 
> With the added check for an existing alua_port_group object, and the kfree() of
> the alua_port_group that had been allocated if an existing one is found, the code does not
> do a destroy_workqueue() on pg->work_q.  
> 
With the current rework I've removed the per-pg workqueues, so that
issue doesn't occur anymore.

>> +		pg = NULL;
>> +		break;
>> +	}
>> +	if (pg) {
>> +		list_add(&pg->node, &port_group_list);
>> +		h->pg = pg;
>> +	}
>>  	spin_unlock(&port_group_lock);
>>  
>>  	return SCSI_DH_OK;
>  
> An explanation in the comments about the ALUA topology and what the device_id vs.
> the group_id represents might be helpful.  It occurred to me that if someone doesn't
> understand that you can have the same device_id behind different port groups they
> won't understand this code.
> 
> HOST  ---->     STORAGE CTRL PG 1   ---->    LUN X
>       ---->                         ---->    LUN X
>       ---->     STORAGE CTRL PG 2   ---->    LUN X
>       ---->                         ---->    LUN X
> 
Hmm. Someone willing to understand this code should be reasonable
familiar with SPC, so I doubt that'll be an issue.

Cheers,

Hannes
diff mbox

Patch

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index dbe9ff2..c2b2100b 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -70,6 +70,8 @@  static DEFINE_SPINLOCK(port_group_lock);
 struct alua_port_group {
 	struct kref		kref;
 	struct list_head	node;
+	unsigned char		device_id_str[256];
+	int			device_id_size;
 	int			group_id;
 	int			tpgs;
 	int			state;
@@ -229,7 +231,9 @@  static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h)
 {
 	unsigned char *d;
 	int group_id = -1;
-	struct alua_port_group *pg = NULL;
+	char device_id_str[256];
+	int device_id_size;
+	struct alua_port_group *tmp_pg, *pg = NULL;
 
 	if (!sdev->vpd_pg83)
 		return SCSI_DH_DEV_UNSUPP;
@@ -266,9 +270,39 @@  static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h)
 		h->tpgs = TPGS_MODE_NONE;
 		return SCSI_DH_DEV_UNSUPP;
 	}
+	device_id_size = scsi_vpd_lun_id(sdev, device_id_str, 256);
+	if (device_id_size <= 0) {
+		/*
+		 * Internal error: TPGS supported by no
+		 * device identifcation found.
+		 * Disable ALUA support.
+		 */
+		sdev_printk(KERN_INFO, sdev,
+			    "%s: No device descriptors found\n",
+			    ALUA_DH_NAME);
+		h->tpgs = TPGS_MODE_NONE;
+		return SCSI_DH_DEV_UNSUPP;
+	}
 	sdev_printk(KERN_INFO, sdev,
-		    "%s: port group %02x rel port %02x\n",
-		    ALUA_DH_NAME, group_id, h->rel_port);
+		    "%s: device %s port group %02x "
+		    "rel port %02x\n", ALUA_DH_NAME,
+		    device_id_str, group_id, h->rel_port);
+	spin_lock(&port_group_lock);
+	list_for_each_entry(tmp_pg, &port_group_list, node) {
+		if (tmp_pg->group_id != group_id)
+			continue;
+		if (tmp_pg->device_id_size != device_id_size)
+			continue;
+		if (strncmp(tmp_pg->device_id_str, device_id_str,
+			    device_id_size))
+			continue;
+		h->pg = tmp_pg;
+		kref_get(&tmp_pg->kref);
+		break;
+	}
+	spin_unlock(&port_group_lock);
+	if (h->pg)
+		return SCSI_DH_OK;
 
 	pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL);
 	if (!pg) {
@@ -278,13 +312,39 @@  static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h)
 		/* Temporary failure, bypass */
 		return SCSI_DH_DEV_TEMP_BUSY;
 	}
+	if (device_id_size)
+		strncpy(pg->device_id_str, device_id_str, 256);
+	else
+		pg->device_id_str[0] = '\0';
+
+	pg->device_id_size = device_id_size;
 	pg->group_id = group_id;
 	pg->tpgs = h->tpgs;
 	pg->state = TPGS_STATE_OPTIMIZED;
 	kref_init(&pg->kref);
 	spin_lock(&port_group_lock);
-	list_add(&pg->node, &port_group_list);
-	h->pg = pg;
+	/*
+	 * Re-check list again to catch
+	 * concurrent updates
+	 */
+	list_for_each_entry(tmp_pg, &port_group_list, node) {
+		if (tmp_pg->group_id != pg->group_id)
+			continue;
+		if (tmp_pg->device_id_size != pg->device_id_size)
+			continue;
+		if (strncmp(tmp_pg->device_id_str, pg->device_id_str,
+			    device_id_size))
+			continue;
+		h->pg = tmp_pg;
+		kref_get(&tmp_pg->kref);
+		kfree(pg);
+		pg = NULL;
+		break;
+	}
+	if (pg) {
+		list_add(&pg->node, &port_group_list);
+		h->pg = pg;
+	}
 	spin_unlock(&port_group_lock);
 
 	return SCSI_DH_OK;