diff mbox series

[v2] scsi: libsas: Fix disk not being scanned in after being removed

Message ID 20240227090149.29039-1-yangxingui@huawei.com (mailing list archive)
State Superseded
Headers show
Series [v2] scsi: libsas: Fix disk not being scanned in after being removed | expand

Commit Message

yangxingui Feb. 27, 2024, 9:01 a.m. UTC
As of commit d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to
update PHY info"), do discovery will send a new SMP_DISCOVER and update
phy->phy_change_count. We found that if the disk is reconnected and phy
change_count changes at this time, the disk scanning process will not be
triggered.

So update the PHY info with the last query results.

Fixes: d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to update PHY info")
Signed-off-by: Xingui Yang <yangxingui@huawei.com>
---
v1 -> v2:
Use sas_get_phy_discover() instead of sas_get_phy_attached_dev() in
sas_rediscover_dev() and use disc_resp to update phy info.
---
 drivers/scsi/libsas/sas_expander.c | 37 ++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 9 deletions(-)

Comments

Jason Yan March 5, 2024, 3 a.m. UTC | #1
On 2024/2/27 17:01, Xingui Yang wrote:
> As of commit d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to
> update PHY info"), do discovery will send a new SMP_DISCOVER and update
> phy->phy_change_count. We found that if the disk is reconnected and phy
> change_count changes at this time, the disk scanning process will not be
> triggered.
> 
> So update the PHY info with the last query results.
> 
> Fixes: d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to update PHY info")
> Signed-off-by: Xingui Yang<yangxingui@huawei.com>
> ---
> v1 -> v2:
> Use sas_get_phy_discover() instead of sas_get_phy_attached_dev() in
> sas_rediscover_dev() and use disc_resp to update phy info.
> ---
>   drivers/scsi/libsas/sas_expander.c | 37 ++++++++++++++++++++++--------
>   1 file changed, 28 insertions(+), 9 deletions(-)

Looks good to me:

Reviewed-by: Jason Yan <yanaijie@huawei.com>
John Garry March 6, 2024, 6:43 p.m. UTC | #2
On 27/02/2024 09:01, Xingui Yang wrote:
> As of commit d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to
> update PHY info"), do discovery will send a new SMP_DISCOVER and update
> phy->phy_change_count. We found that if the disk is reconnected and phy
> change_count changes at this time, the disk scanning process will not be
> triggered.
> 
> So update the PHY info with the last query results.

Please mention that sas_unregister_devs_sas_addr() should be called 
before sas_set_ex_phy() for this case.

> 
> Fixes: d8649fc1c5e4 ("scsi: libsas: Do discovery on empty PHY to update PHY info")
> Signed-off-by: Xingui Yang <yangxingui@huawei.com>
> ---
> v1 -> v2:
> Use sas_get_phy_discover() instead of sas_get_phy_attached_dev() in
> sas_rediscover_dev() and use disc_resp to update phy info.
> ---
>   drivers/scsi/libsas/sas_expander.c | 37 ++++++++++++++++++++++--------
>   1 file changed, 28 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
> index a2204674b680..a75dcce7a9ba 100644
> --- a/drivers/scsi/libsas/sas_expander.c
> +++ b/drivers/scsi/libsas/sas_expander.c
> @@ -1940,6 +1940,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
>   	struct expander_device *ex = &dev->ex_dev;
>   	struct ex_phy *phy = &ex->ex_phy[phy_id];
>   	enum sas_device_type type = SAS_PHY_UNUSED;
> +	struct smp_disc_resp *disc_resp;
>   	u8 sas_addr[SAS_ADDR_SIZE];
>   	char msg[80] = "";
>   	int res;
> @@ -1951,33 +1952,47 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
>   		 SAS_ADDR(dev->sas_addr), phy_id, msg);
>   
>   	memset(sas_addr, 0, SAS_ADDR_SIZE);
> -	res = sas_get_phy_attached_dev(dev, phy_id, sas_addr, &type);
> +	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
> +	if (!disc_resp)
> +		return -ENOMEM;
> +
> +	res = sas_get_phy_discover(dev, phy_id, disc_resp);
>   	switch (res) {
>   	case SMP_RESP_NO_PHY:
>   		phy->phy_state = PHY_NOT_PRESENT;
>   		sas_unregister_devs_sas_addr(dev, phy_id, last);
> -		return res;
> +		goto out;
>   	case SMP_RESP_PHY_VACANT:
>   		phy->phy_state = PHY_VACANT;
>   		sas_unregister_devs_sas_addr(dev, phy_id, last);
> -		return res;
> +		goto out;
>   	case SMP_RESP_FUNC_ACC:
>   		break;
>   	case -ECOMM:
>   		break;
>   	default:
> -		return res;
> +		goto out;
> +	}
> +
> +	if (res == 0) {
> +		struct discover_resp *dr = &disc_resp->disc;
> +
> +		memcpy(sas_addr, dr->attached_sas_addr, SAS_ADDR_SIZE);
> +		type = to_dev_type(dr);
> +		if (type == 0)

Please use SAS_PHY_UNUSED


> +			memset(sas_addr, 0, SAS_ADDR_SIZE);
>   	}

Any chance we can factor out some of this code with 
sas_get_phy_attached_dev() (where it has been copied from)?

>   
>   	if ((SAS_ADDR(sas_addr) == 0) || (res == -ECOMM)) {
>   		phy->phy_state = PHY_EMPTY;
>   		sas_unregister_devs_sas_addr(dev, phy_id, last);
>   		/*
> -		 * Even though the PHY is empty, for convenience we discover
> -		 * the PHY to update the PHY info, like negotiated linkrate.
> +		 * Even though the PHY is empty, for convenience we update
> +		 * the PHY info, like negotiated linkrate.
>   		 */
> -		sas_ex_phy_discover(dev, phy_id);
> -		return res;
> +		if (res == 0)
> +			sas_set_ex_phy(dev, phy_id, disc_resp);
> +		goto out;
>   	} else if (SAS_ADDR(sas_addr) == SAS_ADDR(phy->attached_sas_addr) &&
>   		   dev_type_flutter(type, phy->attached_dev_type)) {
>   		struct domain_device *ata_dev = sas_ex_to_ata(dev, phy_id);
> @@ -1989,7 +2004,7 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
>   			action = ", needs recovery";
>   		pr_debug("ex %016llx phy%02d broadcast flutter%s\n",
>   			 SAS_ADDR(dev->sas_addr), phy_id, action);
> -		return res;
> +		goto out;

is there any way to get rid of all the goto's? Ideally we would have 
just one location to call kfree(disc_resp)

>   	}
>   
>   	/* we always have to delete the old device when we went here */
> @@ -1998,7 +2013,11 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
>   		SAS_ADDR(phy->attached_sas_addr));
>   	sas_unregister_devs_sas_addr(dev, phy_id, last);
>   
> +	kfree(disc_resp);
>   	return sas_discover_new(dev, phy_id);
> +out:

Calling this simply "out" is odd, as we exit in many places

> +	kfree(disc_resp);
> +	return res;
>   }
>   
>   /**


As an aside, could libsas - and your changes here - be simpler if we 
changed smp_execute_task() like this:

static int smp_execute_task(struct domain_device *dev, void *req /* can 
be on the stack */, int req_size,
			    void *resp /* can be on the stack */, int resp_size)
{
	struct scatterlist req_sg;
	struct scatterlist resp_sg;
	int ret;
	void *_req = kmemdup(req, req_size, GFP_KERNEL);
	void *_resp = alloc_smp_resp(resp_size);
	if (!_req || !resp)
		return -ENOMEM;

	sg_init_one(&req_sg, _req, req_size);
	sg_init_one(&resp_sg, _resp, resp_size);
	ret = smp_execute_task_sg(dev, &req_sg, &resp_sg);
	memcpy(resp, _resp, resp_size);
	kfree(_req);
	kfree(_resp);
	return ret;
}

We need to use alloc_smp_resp() and alloc_smp_req() as we can't allocate 
these memories on the stack for calling sg_init_one(). But if we changed 
smp_execute_task() to memcpy from/to data on the stack, it might make 
callers simpler. I'm not sure.

Thanks,
John
Jason Yan March 7, 2024, 2:13 a.m. UTC | #3
On 2024/3/7 2:43, John Garry wrote:
> As an aside, could libsas - and your changes here - be simpler if we 
> changed smp_execute_task() like this:
> 
> static int smp_execute_task(struct domain_device *dev, void *req /* can 
> be on the stack */, int req_size,
>                  void *resp /* can be on the stack */, int resp_size)
> {
>      struct scatterlist req_sg;
>      struct scatterlist resp_sg;
>      int ret;
>      void *_req = kmemdup(req, req_size, GFP_KERNEL);
>      void *_resp = alloc_smp_resp(resp_size);
>      if (!_req || !resp)
>          return -ENOMEM;
> 
>      sg_init_one(&req_sg, _req, req_size);
>      sg_init_one(&resp_sg, _resp, resp_size);
>      ret = smp_execute_task_sg(dev, &req_sg, &resp_sg);
>      memcpy(resp, _resp, resp_size);
>      kfree(_req);
>      kfree(_resp);
>      return ret;
> }
> 
> We need to use alloc_smp_resp() and alloc_smp_req() as we can't allocate 
> these memories on the stack for calling sg_init_one(). But if we changed 
> smp_execute_task() to memcpy from/to data on the stack, it might make 
> callers simpler. I'm not sure.

Maybe simpler. I have not check all the length of these buffers, but 
there is still a risk of stack overflow if the buffer on stack is too 
large.

Thanks,
Jason
diff mbox series

Patch

diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index a2204674b680..a75dcce7a9ba 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1940,6 +1940,7 @@  static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
 	struct expander_device *ex = &dev->ex_dev;
 	struct ex_phy *phy = &ex->ex_phy[phy_id];
 	enum sas_device_type type = SAS_PHY_UNUSED;
+	struct smp_disc_resp *disc_resp;
 	u8 sas_addr[SAS_ADDR_SIZE];
 	char msg[80] = "";
 	int res;
@@ -1951,33 +1952,47 @@  static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
 		 SAS_ADDR(dev->sas_addr), phy_id, msg);
 
 	memset(sas_addr, 0, SAS_ADDR_SIZE);
-	res = sas_get_phy_attached_dev(dev, phy_id, sas_addr, &type);
+	disc_resp = alloc_smp_resp(DISCOVER_RESP_SIZE);
+	if (!disc_resp)
+		return -ENOMEM;
+
+	res = sas_get_phy_discover(dev, phy_id, disc_resp);
 	switch (res) {
 	case SMP_RESP_NO_PHY:
 		phy->phy_state = PHY_NOT_PRESENT;
 		sas_unregister_devs_sas_addr(dev, phy_id, last);
-		return res;
+		goto out;
 	case SMP_RESP_PHY_VACANT:
 		phy->phy_state = PHY_VACANT;
 		sas_unregister_devs_sas_addr(dev, phy_id, last);
-		return res;
+		goto out;
 	case SMP_RESP_FUNC_ACC:
 		break;
 	case -ECOMM:
 		break;
 	default:
-		return res;
+		goto out;
+	}
+
+	if (res == 0) {
+		struct discover_resp *dr = &disc_resp->disc;
+
+		memcpy(sas_addr, dr->attached_sas_addr, SAS_ADDR_SIZE);
+		type = to_dev_type(dr);
+		if (type == 0)
+			memset(sas_addr, 0, SAS_ADDR_SIZE);
 	}
 
 	if ((SAS_ADDR(sas_addr) == 0) || (res == -ECOMM)) {
 		phy->phy_state = PHY_EMPTY;
 		sas_unregister_devs_sas_addr(dev, phy_id, last);
 		/*
-		 * Even though the PHY is empty, for convenience we discover
-		 * the PHY to update the PHY info, like negotiated linkrate.
+		 * Even though the PHY is empty, for convenience we update
+		 * the PHY info, like negotiated linkrate.
 		 */
-		sas_ex_phy_discover(dev, phy_id);
-		return res;
+		if (res == 0)
+			sas_set_ex_phy(dev, phy_id, disc_resp);
+		goto out;
 	} else if (SAS_ADDR(sas_addr) == SAS_ADDR(phy->attached_sas_addr) &&
 		   dev_type_flutter(type, phy->attached_dev_type)) {
 		struct domain_device *ata_dev = sas_ex_to_ata(dev, phy_id);
@@ -1989,7 +2004,7 @@  static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
 			action = ", needs recovery";
 		pr_debug("ex %016llx phy%02d broadcast flutter%s\n",
 			 SAS_ADDR(dev->sas_addr), phy_id, action);
-		return res;
+		goto out;
 	}
 
 	/* we always have to delete the old device when we went here */
@@ -1998,7 +2013,11 @@  static int sas_rediscover_dev(struct domain_device *dev, int phy_id,
 		SAS_ADDR(phy->attached_sas_addr));
 	sas_unregister_devs_sas_addr(dev, phy_id, last);
 
+	kfree(disc_resp);
 	return sas_discover_new(dev, phy_id);
+out:
+	kfree(disc_resp);
+	return res;
 }
 
 /**