diff mbox

[1/3] IB/vmw_pvrdma: Defer activating device until vmxnet3 link is up

Message ID 4b515ded56300f12cedc67253d42ab8fbc52134e.1484075557.git.aditr@vmware.com (mailing list archive)
State Rejected
Headers show

Commit Message

Adit Ranadive Jan. 10, 2017, 7:15 p.m. UTC
From: Aditya Sarwade <asarwade@vmware.com>

Currently on bootup, ethernet drivers seem to load before RDMA ones
in Linux. So while the vmxnet3 module is loaded before vmw_pvrdma,
the vmxnet3 link state may not necessarily be enabled by the stack
before pvrdma is loaded. This is a problem because if the pvrdma
module is loaded on bootup (by installing it in /lib/modules/*),
the pvrdma device comes up in a port down state.

Since this is the most common use case scenario, defer the activation
of the device till the paired vmxnet3 link actually comes up. One
downside of doing this is, if a user doesn't have the vmxnet3 link
up when the pvrdma driver is loaded, they may not see any output
for ibv_devinfo until the paired vmxnet3 link is enabled too. The
users somehow need to be aware of this.

This only changes how the device is activated the first time. Once
enabled if the link goes down, a pvrdma driver reload is still required
after link up.

Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver")
Signed-off-by: Aditya Sarwade <asarwade@vmware.com>
Reviewed-by: Bryan Tan <bryantan@vmware.com>
Signed-off-by: Adit Ranadive <aditr@vmware.com>
---
 drivers/infiniband/hw/vmw_pvrdma/pvrdma.h      |  1 +
 drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 99 +++++++++++++++++---------
 2 files changed, 65 insertions(+), 35 deletions(-)

Comments

Yuval Shaia Jan. 11, 2017, 8:31 a.m. UTC | #1
On Tue, Jan 10, 2017 at 11:15:39AM -0800, Adit Ranadive wrote:
> From: Aditya Sarwade <asarwade@vmware.com>
> 
> Currently on bootup, ethernet drivers seem to load before RDMA ones
> in Linux. So while the vmxnet3 module is loaded before vmw_pvrdma,
> the vmxnet3 link state may not necessarily be enabled by the stack
> before pvrdma is loaded. This is a problem because if the pvrdma
> module is loaded on bootup (by installing it in /lib/modules/*),
> the pvrdma device comes up in a port down state.
> 
> Since this is the most common use case scenario, defer the activation
> of the device till the paired vmxnet3 link actually comes up. One
> downside of doing this is, if a user doesn't have the vmxnet3 link
> up when the pvrdma driver is loaded, they may not see any output
> for ibv_devinfo until the paired vmxnet3 link is enabled too. The
> users somehow need to be aware of this.
> 
> This only changes how the device is activated the first time. Once
> enabled if the link goes down, a pvrdma driver reload is still required
> after link up.
> 
> Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver")
> Signed-off-by: Aditya Sarwade <asarwade@vmware.com>
> Reviewed-by: Bryan Tan <bryantan@vmware.com>
> Signed-off-by: Adit Ranadive <aditr@vmware.com>
> ---
>  drivers/infiniband/hw/vmw_pvrdma/pvrdma.h      |  1 +
>  drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 99 +++++++++++++++++---------
>  2 files changed, 65 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
> index 71e1d55..540a54b 100644
> --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
> +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
> @@ -221,6 +221,7 @@ struct pvrdma_dev {
>  	u32 port_cap_mask;
>  	struct mutex port_mutex; /* Port modification mutex. */
>  	bool ib_active;
> +	bool enabled;
>  	atomic_t num_qps;
>  	atomic_t num_cqs;
>  	atomic_t num_pds;
> diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> index 231a1ce..b57132f 100644
> --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> @@ -74,6 +74,8 @@ static int pvrdma_del_gid(struct ib_device *ibdev,
>  			  void **context);
>  
>  
> +static int pvrdma_enable_dev(struct pvrdma_dev *dev);
> +
>  static ssize_t show_hca(struct device *device, struct device_attribute *attr,
>  			char *buf)
>  {
> @@ -755,6 +757,10 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
>  		pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
>  		break;
>  	case NETDEV_UP:
> +		if (!dev->enabled && pvrdma_enable_dev(dev)) {
> +			dev_err(&dev->pdev->dev, "failed to enable device\n");
> +			break;
> +		}
>  		pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
>  		break;
>  	default:
> @@ -801,6 +807,48 @@ static int pvrdma_netdevice_event(struct notifier_block *this,
>  	return NOTIFY_DONE;
>  }
>  
> +static void pvrdma_disable_dev(struct pvrdma_dev *dev)
> +{
> +	if (dev->enabled) {
> +		ib_unregister_device(&dev->ib_dev);
> +		dev->enabled = false;
> +	}
> +}
> +
> +static int pvrdma_enable_dev(struct pvrdma_dev *dev)
> +{
> +	int ret;
> +	pvrdma_enable_intrs(dev);
> +
> +	/* Activate pvrdma device */
> +	pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE);
> +
> +	/* Make sure the write is complete before reading status. */
> +	mb();
> +
> +	/* Check if device was successfully activated */
> +	ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR);
> +	if (ret) {
> +		dev_err(&dev->pdev->dev, "failed to activate device\n");
> +		ret = -EFAULT;
> +		goto err_disable_intrs;
> +	}
> +
> +	/* Register IB device */
> +	ret = pvrdma_register_device(dev);
> +	if (ret) {
> +		dev_err(&dev->pdev->dev, "failed to register IB device\n");
> +		goto err_disable_intrs;
> +	}
> +
> +	dev->enabled = true;
> +	return 0;
> +
> +err_disable_intrs:
> +	pvrdma_disable_intrs(dev);
> +	return ret;
> +}
> +
>  static int pvrdma_pci_probe(struct pci_dev *pdev,
>  			    const struct pci_device_id *id)
>  {
> @@ -867,14 +915,14 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
>  	/* Enable 64-Bit DMA */
>  	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
>  		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
> -		if (ret != 0) {
> +		if (ret) {
>  			dev_err(&pdev->dev,
>  				"pci_set_consistent_dma_mask failed\n");
>  			goto err_free_resource;
>  		}
>  	} else {
>  		ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
> -		if (ret != 0) {
> +		if (ret) {
>  			dev_err(&pdev->dev,
>  				"pci_set_dma_mask failed\n");
>  			goto err_free_resource;
> @@ -1029,7 +1077,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
>  	if (ret) {
>  		dev_err(&pdev->dev, "failed to allocate interrupts\n");
>  		ret = -ENOMEM;
> -		goto err_netdevice;
> +		goto err_free_cq_ring;

This fix seems to be true regardless of $subject, right?

>  	}
>  
>  	/* Allocate UAR table. */
> @@ -1049,51 +1097,35 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
>  	}
>  	dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len);
>  
> -	pvrdma_enable_intrs(dev);
> -
> -	/* Activate pvrdma device */
> -	pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE);
> -
> -	/* Make sure the write is complete before reading status. */
> -	mb();
> -
> -	/* Check if device was successfully activated */
> -	ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR);
> -	if (ret != 0) {
> -		dev_err(&pdev->dev, "failed to activate device\n");
> -		ret = -EFAULT;
> -		goto err_disable_intr;
> -	}
> -
> -	/* Register IB device */
> -	ret = pvrdma_register_device(dev);
> -	if (ret) {
> -		dev_err(&pdev->dev, "failed to register IB device\n");
> -		goto err_disable_intr;
> +	if (netif_running(dev->netdev) && netif_carrier_ok(dev->netdev)) {
> +		ret = pvrdma_enable_dev(dev);
> +		if (ret) {
> +			dev_err(&pdev->dev, "failed to enable device\n");
> +			goto err_free_sgid_tbl;
> +		}
> +	} else {
> +		dev_info(&pdev->dev, "pvrdma netdev link is down\n");
>  	}
>  
>  	dev->nb_netdev.notifier_call = pvrdma_netdevice_event;
>  	ret = register_netdevice_notifier(&dev->nb_netdev);
>  	if (ret) {
>  		dev_err(&pdev->dev, "failed to register netdevice events\n");
> -		goto err_unreg_ibdev;
> +		goto err_disable_dev;
>  	}
>  
>  	dev_info(&pdev->dev, "attached to device\n");
>  	return 0;
>  
> -err_unreg_ibdev:
> -	ib_unregister_device(&dev->ib_dev);
> -err_disable_intr:
> -	pvrdma_disable_intrs(dev);
> +err_disable_dev:
> +	pvrdma_disable_dev(dev);
> +err_free_sgid_tbl:
>  	kfree(dev->sgid_tbl);
>  err_free_uar_table:
>  	pvrdma_uar_table_cleanup(dev);
>  err_free_intrs:
>  	pvrdma_free_irq(dev);
>  	pvrdma_disable_msi_all(dev);
> -err_netdevice:
> -	unregister_netdevice_notifier(&dev->nb_netdev);
>  err_free_cq_ring:
>  	pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
>  err_free_async_ring:
> @@ -1132,10 +1164,7 @@ static void pvrdma_pci_remove(struct pci_dev *pdev)
>  	unregister_netdevice_notifier(&dev->nb_netdev);
>  	dev->nb_netdev.notifier_call = NULL;
>  
> -	flush_workqueue(event_wq);
> -
> -	/* Unregister ib device */
> -	ib_unregister_device(&dev->ib_dev);
> +	pvrdma_disable_dev(dev);
>  
>  	mutex_lock(&pvrdma_device_list_lock);
>  	list_del(&dev->device_link);

Reviewed-by: Yuval Shaia <yuval.shaia@oracle.com>

> -- 
> 2.7.4
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Adit Ranadive Jan. 11, 2017, 10:19 p.m. UTC | #2
On Wed, Jan 11, 2017 at 10:31:32AM +0200, Yuval Shaia wrote:
> On Tue, Jan 10, 2017 at 11:15:39AM -0800, Adit Ranadive wrote:
> > From: Aditya Sarwade <asarwade@vmware.com>
> > 
> > Currently on bootup, ethernet drivers seem to load before RDMA ones
> > in Linux. So while the vmxnet3 module is loaded before vmw_pvrdma,
> > the vmxnet3 link state may not necessarily be enabled by the stack
> > before pvrdma is loaded. This is a problem because if the pvrdma
> > module is loaded on bootup (by installing it in /lib/modules/*),
> > the pvrdma device comes up in a port down state.
> > 
> > Since this is the most common use case scenario, defer the activation
> > of the device till the paired vmxnet3 link actually comes up. One
> > downside of doing this is, if a user doesn't have the vmxnet3 link
> > up when the pvrdma driver is loaded, they may not see any output
> > for ibv_devinfo until the paired vmxnet3 link is enabled too. The
> > users somehow need to be aware of this.
> > 
> > This only changes how the device is activated the first time. Once
> > enabled if the link goes down, a pvrdma driver reload is still required
> > after link up.
> > 
> > Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver")
> > Signed-off-by: Aditya Sarwade <asarwade@vmware.com>
> > Reviewed-by: Bryan Tan <bryantan@vmware.com>
> > Signed-off-by: Adit Ranadive <aditr@vmware.com>
> > ---
> >  drivers/infiniband/hw/vmw_pvrdma/pvrdma.h      |  1 +
> >  drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 99 +++++++++++++++++---------
> >  2 files changed, 65 insertions(+), 35 deletions(-)
> > 

<...>

> >  static int pvrdma_pci_probe(struct pci_dev *pdev,
> >  			    const struct pci_device_id *id)
> >  {
> > @@ -867,14 +915,14 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
> >  	/* Enable 64-Bit DMA */
> >  	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
> >  		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
> > -		if (ret != 0) {
> > +		if (ret) {
> >  			dev_err(&pdev->dev,
> >  				"pci_set_consistent_dma_mask failed\n");
> >  			goto err_free_resource;
> >  		}
> >  	} else {
> >  		ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
> > -		if (ret != 0) {
> > +		if (ret) {
> >  			dev_err(&pdev->dev,
> >  				"pci_set_dma_mask failed\n");
> >  			goto err_free_resource;
> > @@ -1029,7 +1077,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
> >  	if (ret) {
> >  		dev_err(&pdev->dev, "failed to allocate interrupts\n");
> >  		ret = -ENOMEM;
> > -		goto err_netdevice;
> > +		goto err_free_cq_ring;
> 
> This fix seems to be true regardless of $subject, right?

Correct. Thanks for the review!
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Leon Romanovsky Jan. 16, 2017, 7:38 a.m. UTC | #3
On Tue, Jan 10, 2017 at 11:15:39AM -0800, Adit Ranadive wrote:
> From: Aditya Sarwade <asarwade@vmware.com>
>
> Currently on bootup, ethernet drivers seem to load before RDMA ones
> in Linux. So while the vmxnet3 module is loaded before vmw_pvrdma,
> the vmxnet3 link state may not necessarily be enabled by the stack
> before pvrdma is loaded. This is a problem because if the pvrdma
> module is loaded on bootup (by installing it in /lib/modules/*),
> the pvrdma device comes up in a port down state.
>
> Since this is the most common use case scenario, defer the activation
> of the device till the paired vmxnet3 link actually comes up. One
> downside of doing this is, if a user doesn't have the vmxnet3 link
> up when the pvrdma driver is loaded, they may not see any output
> for ibv_devinfo until the paired vmxnet3 link is enabled too. The
> users somehow need to be aware of this.
>

I wouldn't call it "fix", the more appropriate name is "hack".
It doesn't look right that users should be aware of such non-intuitive
driver behavior.


> This only changes how the device is activated the first time. Once
> enabled if the link goes down, a pvrdma driver reload is still required
> after link up.
>
> Fixes: 29c8d9eba550 ("IB: Add vmw_pvrdma driver")
> Signed-off-by: Aditya Sarwade <asarwade@vmware.com>
> Reviewed-by: Bryan Tan <bryantan@vmware.com>
> Signed-off-by: Adit Ranadive <aditr@vmware.com>
> ---
>  drivers/infiniband/hw/vmw_pvrdma/pvrdma.h      |  1 +
>  drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c | 99 +++++++++++++++++---------
>  2 files changed, 65 insertions(+), 35 deletions(-)
>
> diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
> index 71e1d55..540a54b 100644
> --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
> +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
> @@ -221,6 +221,7 @@ struct pvrdma_dev {
>  	u32 port_cap_mask;
>  	struct mutex port_mutex; /* Port modification mutex. */
>  	bool ib_active;
> +	bool enabled;
>  	atomic_t num_qps;
>  	atomic_t num_cqs;
>  	atomic_t num_pds;
> diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> index 231a1ce..b57132f 100644
> --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
> @@ -74,6 +74,8 @@ static int pvrdma_del_gid(struct ib_device *ibdev,
>  			  void **context);
>
>
> +static int pvrdma_enable_dev(struct pvrdma_dev *dev);
> +
>  static ssize_t show_hca(struct device *device, struct device_attribute *attr,
>  			char *buf)
>  {
> @@ -755,6 +757,10 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
>  		pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
>  		break;
>  	case NETDEV_UP:
> +		if (!dev->enabled && pvrdma_enable_dev(dev)) {
> +			dev_err(&dev->pdev->dev, "failed to enable device\n");
> +			break;
> +		}
>  		pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
>  		break;
>  	default:
> @@ -801,6 +807,48 @@ static int pvrdma_netdevice_event(struct notifier_block *this,
>  	return NOTIFY_DONE;
>  }
>
> +static void pvrdma_disable_dev(struct pvrdma_dev *dev)
> +{
> +	if (dev->enabled) {
> +		ib_unregister_device(&dev->ib_dev);
> +		dev->enabled = false;
> +	}
> +}
> +
> +static int pvrdma_enable_dev(struct pvrdma_dev *dev)
> +{
> +	int ret;
> +	pvrdma_enable_intrs(dev);
> +
> +	/* Activate pvrdma device */
> +	pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE);
> +
> +	/* Make sure the write is complete before reading status. */
> +	mb();
> +
> +	/* Check if device was successfully activated */
> +	ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR);
> +	if (ret) {
> +		dev_err(&dev->pdev->dev, "failed to activate device\n");
> +		ret = -EFAULT;
> +		goto err_disable_intrs;
> +	}
> +
> +	/* Register IB device */
> +	ret = pvrdma_register_device(dev);
> +	if (ret) {
> +		dev_err(&dev->pdev->dev, "failed to register IB device\n");
> +		goto err_disable_intrs;
> +	}
> +
> +	dev->enabled = true;
> +	return 0;
> +
> +err_disable_intrs:
> +	pvrdma_disable_intrs(dev);
> +	return ret;
> +}
> +
>  static int pvrdma_pci_probe(struct pci_dev *pdev,
>  			    const struct pci_device_id *id)
>  {
> @@ -867,14 +915,14 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
>  	/* Enable 64-Bit DMA */
>  	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
>  		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
> -		if (ret != 0) {
> +		if (ret) {
>  			dev_err(&pdev->dev,
>  				"pci_set_consistent_dma_mask failed\n");
>  			goto err_free_resource;
>  		}
>  	} else {
>  		ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
> -		if (ret != 0) {
> +		if (ret) {
>  			dev_err(&pdev->dev,
>  				"pci_set_dma_mask failed\n");
>  			goto err_free_resource;
> @@ -1029,7 +1077,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
>  	if (ret) {
>  		dev_err(&pdev->dev, "failed to allocate interrupts\n");
>  		ret = -ENOMEM;
> -		goto err_netdevice;
> +		goto err_free_cq_ring;
>  	}
>
>  	/* Allocate UAR table. */
> @@ -1049,51 +1097,35 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
>  	}
>  	dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len);
>
> -	pvrdma_enable_intrs(dev);
> -
> -	/* Activate pvrdma device */
> -	pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE);
> -
> -	/* Make sure the write is complete before reading status. */
> -	mb();
> -
> -	/* Check if device was successfully activated */
> -	ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR);
> -	if (ret != 0) {
> -		dev_err(&pdev->dev, "failed to activate device\n");
> -		ret = -EFAULT;
> -		goto err_disable_intr;
> -	}
> -
> -	/* Register IB device */
> -	ret = pvrdma_register_device(dev);
> -	if (ret) {
> -		dev_err(&pdev->dev, "failed to register IB device\n");
> -		goto err_disable_intr;
> +	if (netif_running(dev->netdev) && netif_carrier_ok(dev->netdev)) {
> +		ret = pvrdma_enable_dev(dev);
> +		if (ret) {
> +			dev_err(&pdev->dev, "failed to enable device\n");
> +			goto err_free_sgid_tbl;
> +		}
> +	} else {
> +		dev_info(&pdev->dev, "pvrdma netdev link is down\n");
>  	}
>
>  	dev->nb_netdev.notifier_call = pvrdma_netdevice_event;
>  	ret = register_netdevice_notifier(&dev->nb_netdev);
>  	if (ret) {
>  		dev_err(&pdev->dev, "failed to register netdevice events\n");
> -		goto err_unreg_ibdev;
> +		goto err_disable_dev;
>  	}
>
>  	dev_info(&pdev->dev, "attached to device\n");
>  	return 0;
>
> -err_unreg_ibdev:
> -	ib_unregister_device(&dev->ib_dev);
> -err_disable_intr:
> -	pvrdma_disable_intrs(dev);
> +err_disable_dev:
> +	pvrdma_disable_dev(dev);
> +err_free_sgid_tbl:
>  	kfree(dev->sgid_tbl);
>  err_free_uar_table:
>  	pvrdma_uar_table_cleanup(dev);
>  err_free_intrs:
>  	pvrdma_free_irq(dev);
>  	pvrdma_disable_msi_all(dev);
> -err_netdevice:
> -	unregister_netdevice_notifier(&dev->nb_netdev);
>  err_free_cq_ring:
>  	pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
>  err_free_async_ring:
> @@ -1132,10 +1164,7 @@ static void pvrdma_pci_remove(struct pci_dev *pdev)
>  	unregister_netdevice_notifier(&dev->nb_netdev);
>  	dev->nb_netdev.notifier_call = NULL;
>
> -	flush_workqueue(event_wq);
> -
> -	/* Unregister ib device */
> -	ib_unregister_device(&dev->ib_dev);
> +	pvrdma_disable_dev(dev);
>
>  	mutex_lock(&pvrdma_device_list_lock);
>  	list_del(&dev->device_link);
> --
> 2.7.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Adit Ranadive Jan. 18, 2017, 12:31 a.m. UTC | #4
On Mon, Jan 16, 2017 at 09:38:58AM +0200, Leon Romanovsky wrote:
> On Tue, Jan 10, 2017 at 11:15:39AM -0800, Adit Ranadive wrote:
> > From: Aditya Sarwade <asarwade@vmware.com>
> >
> > Currently on bootup, ethernet drivers seem to load before RDMA ones
> > in Linux. So while the vmxnet3 module is loaded before vmw_pvrdma,
> > the vmxnet3 link state may not necessarily be enabled by the stack
> > before pvrdma is loaded. This is a problem because if the pvrdma
> > module is loaded on bootup (by installing it in /lib/modules/*),
> > the pvrdma device comes up in a port down state.
> >
> > Since this is the most common use case scenario, defer the activation
> > of the device till the paired vmxnet3 link actually comes up. One
> > downside of doing this is, if a user doesn't have the vmxnet3 link
> > up when the pvrdma driver is loaded, they may not see any output
> > for ibv_devinfo until the paired vmxnet3 link is enabled too. The
> > users somehow need to be aware of this.
> >
> 
> I wouldn't call it "fix", the more appropriate name is "hack".
> It doesn't look right that users should be aware of such non-intuitive
> driver behavior.

Well its a partial fix for 4.10 to let the RDMA device come up correctly
Since this is a RoCE device we want to mimic to eth port status for the
RDMA link. Currently, for us there isnt an easy way to do this if the
vmxnet3 device comes up after we have already registered the IB device.
This patch moves the IB device registration to after the ethernet link is
up.
I wanted to get this in so that in the common case of VM power on, the 
PVRDMA device is in the port active state along with the vmxnet3 link.

Doug, any thoughts?

- Adit
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Parav Pandit Jan. 18, 2017, 4:21 a.m. UTC | #5
Hi Adit,

> -----Original Message-----
> From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-
> owner@vger.kernel.org] On Behalf Of Adit Ranadive
> Sent: Tuesday, January 17, 2017 6:31 PM
> To: Leon Romanovsky <leon@kernel.org>
> Cc: dledford@redhat.com; linux-rdma@vger.kernel.org; pv-
> drivers@vmware.com; Aditya Sarwade <asarwade@vmware.com>
> Subject: Re: [PATCH 1/3] IB/vmw_pvrdma: Defer activating device until
> vmxnet3 link is up
> 
> On Mon, Jan 16, 2017 at 09:38:58AM +0200, Leon Romanovsky wrote:
> > On Tue, Jan 10, 2017 at 11:15:39AM -0800, Adit Ranadive wrote:
> > > From: Aditya Sarwade <asarwade@vmware.com>
> > >
> > > Currently on bootup, ethernet drivers seem to load before RDMA ones
> > > in Linux. So while the vmxnet3 module is loaded before vmw_pvrdma,
> > > the vmxnet3 link state may not necessarily be enabled by the stack
> > > before pvrdma is loaded. This is a problem because if the pvrdma
> > > module is loaded on bootup (by installing it in /lib/modules/*), the
> > > pvrdma device comes up in a port down state.
> > >
> > > Since this is the most common use case scenario, defer the
> > > activation of the device till the paired vmxnet3 link actually comes
> > > up. One downside of doing this is, if a user doesn't have the
> > > vmxnet3 link up when the pvrdma driver is loaded, they may not see
> > > any output for ibv_devinfo until the paired vmxnet3 link is enabled
> > > too. The users somehow need to be aware of this.
> > >
> >
> > I wouldn't call it "fix", the more appropriate name is "hack".
> > It doesn't look right that users should be aware of such non-intuitive
> > driver behavior.
> 
> Well its a partial fix for 4.10 to let the RDMA device come up correctly Since
> this is a RoCE device we want to mimic to eth port status for the RDMA link.
> Currently, for us there isnt an easy way to do this if the
> vmxnet3 device comes up after we have already registered the IB device.
vmxnet3 device come up and its link come are two different events.
I haven't reviewed the patches or code either.
So asking dumb question.
Can't you load RDMA driver when vmxnet3 device come up?
You should bring up the link of RDMA device when vmxnet3 device's link is up.
(Similar to what other RDMA drivers do which depend on their underlying NIC drivers).

> This patch moves the IB device registration to after the ethernet link is up.
> I wanted to get this in so that in the common case of VM power on, the
> PVRDMA device is in the port active state along with the vmxnet3 link.
>
What happens when link goes down? Does it call ib_unregister()?

 
> Doug, any thoughts?
> 
> - Adit
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the
> body of a message to majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Adit Ranadive Jan. 18, 2017, 6:30 p.m. UTC | #6
On Wed, Jan 18, 2017 at 04:21:13AM +0000, Parav Pandit wrote:
> Hi Adit,
> 
> > -----Original Message-----
> > From: linux-rdma-owner@vger.kernel.org [mailto:linux-rdma-
> > owner@vger.kernel.org] On Behalf Of Adit Ranadive
> > Sent: Tuesday, January 17, 2017 6:31 PM
> > To: Leon Romanovsky <leon@kernel.org>
> > Cc: dledford@redhat.com; linux-rdma@vger.kernel.org; pv-
> > drivers@vmware.com; Aditya Sarwade <asarwade@vmware.com>
> > Subject: Re: [PATCH 1/3] IB/vmw_pvrdma: Defer activating device until
> > vmxnet3 link is up
> > 
> > On Mon, Jan 16, 2017 at 09:38:58AM +0200, Leon Romanovsky wrote:
> > > On Tue, Jan 10, 2017 at 11:15:39AM -0800, Adit Ranadive wrote:
> > > > From: Aditya Sarwade <asarwade@vmware.com>
> > > >
> > > > Currently on bootup, ethernet drivers seem to load before RDMA ones
> > > > in Linux. So while the vmxnet3 module is loaded before vmw_pvrdma,
> > > > the vmxnet3 link state may not necessarily be enabled by the stack
> > > > before pvrdma is loaded. This is a problem because if the pvrdma
> > > > module is loaded on bootup (by installing it in /lib/modules/*), the
> > > > pvrdma device comes up in a port down state.
> > > >
> > > > Since this is the most common use case scenario, defer the
> > > > activation of the device till the paired vmxnet3 link actually comes
> > > > up. One downside of doing this is, if a user doesn't have the
> > > > vmxnet3 link up when the pvrdma driver is loaded, they may not see
> > > > any output for ibv_devinfo until the paired vmxnet3 link is enabled
> > > > too. The users somehow need to be aware of this.
> > > >
> > >
> > > I wouldn't call it "fix", the more appropriate name is "hack".
> > > It doesn't look right that users should be aware of such non-intuitive
> > > driver behavior.
> > 
> > Well its a partial fix for 4.10 to let the RDMA device come up correctly Since
> > this is a RoCE device we want to mimic to eth port status for the RDMA link.
> > Currently, for us there isnt an easy way to do this if the
> > vmxnet3 device comes up after we have already registered the IB device.
> vmxnet3 device come up and its link come are two different events.

You're right that they are two separate events - I was referring to the
vmxnet3 link being up. 

> I haven't reviewed the patches or code either.
> So asking dumb question.
> Can't you load RDMA driver when vmxnet3 device come up?

Are you referring to loading the pvrdma module or just calling ib_register
after the vmxnet3 link is up? The latter is waht we do in this patch anyways.

> You should bring up the link of RDMA device when vmxnet3 device's link is up.
> (Similar to what other RDMA drivers do which depend on their underlying NIC drivers).

We do register for netdev events so we get callbacks for the
link state. The problem happens when we call ib_register before the vmxnet3
link is up. At this point the ib_core stack already has created the QP1, etc.
Once the vmxnet3 link is up we would have to call ib_unregister/ib_register
to recreate QP1. This might fail/stuck if the there is a user app thats created
QPs and we're just waiting to free those resources.

> > This patch moves the IB device registration to after the ethernet link is up.
> > I wanted to get this in so that in the common case of VM power on, the
> > PVRDMA device is in the port active state along with the vmxnet3 link.
> >
> What happens when link goes down? Does it call ib_unregister()?
> 
>  
> > Doug, any thoughts?
> > 
> > - Adit
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Jan. 18, 2017, 6:42 p.m. UTC | #7
On Wed, Jan 18, 2017 at 10:30:20AM -0800, Adit Ranadive wrote:

> We do register for netdev events so we get callbacks for the
> link state. The problem happens when we call ib_register before the vmxnet3
> link is up. At this point the ib_core stack already has created the QP1, etc.
> Once the vmxnet3 link is up we would have to call ib_unregister/ib_register
> to recreate QP1. This might fail/stuck if the there is a user app thats created
> QPs and we're just waiting to free those resources.

Having to register/unregister on every link transition sounds like a
horrible design, you should fix that :)

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Doug Ledford Jan. 18, 2017, 8:23 p.m. UTC | #8
On Tue, 2017-01-17 at 16:31 -0800, Adit Ranadive wrote:
> On Mon, Jan 16, 2017 at 09:38:58AM +0200, Leon Romanovsky wrote:
> > 
> > On Tue, Jan 10, 2017 at 11:15:39AM -0800, Adit Ranadive wrote:
> > > 
> > > From: Aditya Sarwade <asarwade@vmware.com>
> > > 
> > > Currently on bootup, ethernet drivers seem to load before RDMA
> > > ones
> > > in Linux. So while the vmxnet3 module is loaded before
> > > vmw_pvrdma,
> > > the vmxnet3 link state may not necessarily be enabled by the
> > > stack
> > > before pvrdma is loaded. This is a problem because if the pvrdma
> > > module is loaded on bootup (by installing it in /lib/modules/*),
> > > the pvrdma device comes up in a port down state.
> > > 
> > > Since this is the most common use case scenario, defer the
> > > activation
> > > of the device till the paired vmxnet3 link actually comes up. One
> > > downside of doing this is, if a user doesn't have the vmxnet3
> > > link
> > > up when the pvrdma driver is loaded, they may not see any output
> > > for ibv_devinfo until the paired vmxnet3 link is enabled too. The
> > > users somehow need to be aware of this.
> > > 
> > 
> > I wouldn't call it "fix", the more appropriate name is "hack".
> > It doesn't look right that users should be aware of such non-
> > intuitive
> > driver behavior.
> 
> Well its a partial fix for 4.10 to let the RDMA device come up
> correctly
> Since this is a RoCE device we want to mimic to eth port status for
> the
> RDMA link. Currently, for us there isnt an easy way to do this if the
> vmxnet3 device comes up after we have already registered the IB
> device.
> This patch moves the IB device registration to after the ethernet
> link is
> up.
> I wanted to get this in so that in the common case of VM power on,
> the 
> PVRDMA device is in the port active state along with the vmxnet3
> link.
> 
> Doug, any thoughts?

Yes, I'm wondering why the vmw_pvrdma driver doesn't detect link state
changes and act appropriately?
Doug Ledford Jan. 18, 2017, 8:25 p.m. UTC | #9
On Wed, 2017-01-18 at 11:42 -0700, Jason Gunthorpe wrote:
> On Wed, Jan 18, 2017 at 10:30:20AM -0800, Adit Ranadive wrote:
> 
> > 
> > We do register for netdev events so we get callbacks for the
> > link state. The problem happens when we call ib_register before the
> > vmxnet3
> > link is up. At this point the ib_core stack already has created the
> > QP1, etc.
> > Once the vmxnet3 link is up we would have to call
> > ib_unregister/ib_register
> > to recreate QP1. This might fail/stuck if the there is a user app
> > thats created
> > QPs and we're just waiting to free those resources.
> 
> Having to register/unregister on every link transition sounds like a
> horrible design, you should fix that :)

Indeed.
Adit Ranadive Jan. 18, 2017, 8:30 p.m. UTC | #10
On Wed, Jan 18, 2017 at 11:42:36AM -0700, Jason Gunthorpe wrote:
> On Wed, Jan 18, 2017 at 10:30:20AM -0800, Adit Ranadive wrote:
> 
>> We do register for netdev events so we get callbacks for the
>> link state. The problem happens when we call ib_register before the vmxnet3
>> link is up. At this point the ib_core stack already has created the QP1, etc.
>> Once the vmxnet3 link is up we would have to call ib_unregister/ib_register
>> to recreate QP1. This might fail/stuck if the there is a user app thats created
>> QPs and we're just waiting to free those resources.
> 
> Having to register/unregister on every link transition sounds like a
> horrible design, you should fix that :)

We try to avoid the register if the link is down with this patch but the
issue boils down to the handling of QP1. This gets us a step closer.

In order to recover QP1 correctly after a link transition we had to go through a
register/unregister since it is placed in error state when the port is down.
In this case can the MAD layer re-initialize QP1 if link transitions are performed?







--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Adit Ranadive Jan. 18, 2017, 8:41 p.m. UTC | #11
On Wed, Jan 18, 2017 at 3:23:10PM -0500, Doug Ledford wrote:
> On Tue, 2017-01-17 at 16:31 -0800, Adit Ranadive wrote:
>> On Mon, Jan 16, 2017 at 09:38:58AM +0200, Leon Romanovsky wrote:
>> > 
>> > On Tue, Jan 10, 2017 at 11:15:39AM -0800, Adit Ranadive wrote:
>> > > 
>> > > From: Aditya Sarwade <asarwade@vmware.com>
>> > > 
>> > > Currently on bootup, ethernet drivers seem to load before RDMA
>> > > ones
>> > > in Linux. So while the vmxnet3 module is loaded before
>> > > vmw_pvrdma,
>> > > the vmxnet3 link state may not necessarily be enabled by the
>> > > stack
>> > > before pvrdma is loaded. This is a problem because if the pvrdma
>> > > module is loaded on bootup (by installing it in /lib/modules/*),
>> > > the pvrdma device comes up in a port down state.
>> > > 
>> > > Since this is the most common use case scenario, defer the
>> > > activation
>> > > of the device till the paired vmxnet3 link actually comes up. One
>> > > downside of doing this is, if a user doesn't have the vmxnet3
>> > > link
>> > > up when the pvrdma driver is loaded, they may not see any output
>> > > for ibv_devinfo until the paired vmxnet3 link is enabled too. The
>> > > users somehow need to be aware of this.
>> > > 
>> > 
>> > I wouldn't call it "fix", the more appropriate name is "hack".
>> > It doesn't look right that users should be aware of such non-
>> > intuitive
>> > driver behavior.
>> 
>> Well its a partial fix for 4.10 to let the RDMA device come up
>> correctly
>> Since this is a RoCE device we want to mimic to eth port status for
>> the
>> RDMA link. Currently, for us there isnt an easy way to do this if the
>> vmxnet3 device comes up after we have already registered the IB
>> device.
>> This patch moves the IB device registration to after the ethernet
>> link is
>> up.
>> I wanted to get this in so that in the common case of VM power on,
>> the 
>> PVRDMA device is in the port active state along with the vmxnet3
>> link.
>> 
>> Doug, any thoughts?
> 
> Yes, I'm wondering why the vmw_pvrdma driver doesn't detect link state
> changes and act appropriately?

We do through netdev events. Unfortunately, currently we are limited to
registering/unregistering to recover everything correctly.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe Jan. 18, 2017, 8:41 p.m. UTC | #12
On Wed, Jan 18, 2017 at 12:30:07PM -0800, Adit Ranadive wrote:

> In order to recover QP1 correctly after a link transition we had to go through a
> register/unregister since it is placed in error state when the port is down.
> In this case can the MAD layer re-initialize QP1 if link transitions are performed?

Your driver needs to deal with this. Do whatever it takes to recover
your QP1 without re-registring and do not bother the rest of the
system with it.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Adit Ranadive Jan. 18, 2017, 8:52 p.m. UTC | #13
On Wed, Jan 18, 2017 at 1:41:30PM -0700, Jason Gunthorpe wrote:
> On Wed, Jan 18, 2017 at 12:30:07PM -0800, Adit Ranadive wrote:
> 
>> In order to recover QP1 correctly after a link transition we had to go through a
>> register/unregister since it is placed in error state when the port is down.
>> In this case can the MAD layer re-initialize QP1 if link transitions are performed?
> 
> Your driver needs to deal with this. Do whatever it takes to recover
> your QP1 without re-registring and do not bother the rest of the
> system with it.

Ok. We'll investigate internally on for a better recovery process for QP1.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Parav Pandit Jan. 18, 2017, 10:50 p.m. UTC | #14
Hi Adit,

> -----Original Message-----
> From: Adit Ranadive [mailto:aditr@vmware.com]
> Sent: Wednesday, January 18, 2017 2:53 PM
> To: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
> Cc: Parav Pandit <parav@mellanox.com>; Leon Romanovsky
> <leon@kernel.org>; dledford@redhat.com; linux-rdma@vger.kernel.org; pv-
> drivers@vmware.com; Aditya Sarwade <asarwade@vmware.com>
> Subject: Re: [PATCH 1/3] IB/vmw_pvrdma: Defer activating device until
> vmxnet3 link is up
> 
> On Wed, Jan 18, 2017 at 1:41:30PM -0700, Jason Gunthorpe wrote:
> > On Wed, Jan 18, 2017 at 12:30:07PM -0800, Adit Ranadive wrote:
> >
> >> In order to recover QP1 correctly after a link transition we had to
> >> go through a register/unregister since it is placed in error state when the
> port is down.
> >> In this case can the MAD layer re-initialize QP1 if link transitions are
> performed?
> >
> > Your driver needs to deal with this. Do whatever it takes to recover
> > your QP1 without re-registring and do not bother the rest of the
> > system with it.
> 
> Ok. We'll investigate internally on for a better recovery process for QP1.

By the time I reply, most others requested same.
Thanks for addressing it.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Adit Ranadive Jan. 19, 2017, 12:31 a.m. UTC | #15
On Wed, Jan 18, 2017 at 10:50:54PM +0000, Parav Pandit wrote:
> Hi Adit,
> 
>> -----Original Message-----
>> From: Adit Ranadive [mailto:aditr@vmware.com]
>> Sent: Wednesday, January 18, 2017 2:53 PM
>> To: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
>> Cc: Parav Pandit <parav@mellanox.com>; Leon Romanovsky
>> <leon@kernel.org>; dledford@redhat.com; linux-rdma@vger.kernel.org; pv-
>> drivers@vmware.com; Aditya Sarwade <asarwade@vmware.com>
>> Subject: Re: [PATCH 1/3] IB/vmw_pvrdma: Defer activating device until
>> vmxnet3 link is up
>> 
>> On Wed, Jan 18, 2017 at 1:41:30PM -0700, Jason Gunthorpe wrote:
>> > On Wed, Jan 18, 2017 at 12:30:07PM -0800, Adit Ranadive wrote:
>> >
>> >> In order to recover QP1 correctly after a link transition we had to
>> >> go through a register/unregister since it is placed in error state when the
>> port is down.
>> >> In this case can the MAD layer re-initialize QP1 if link transitions are
>> performed?
>> >
>> > Your driver needs to deal with this. Do whatever it takes to recover
>> > your QP1 without re-registring and do not bother the rest of the
>> > system with it.
>> 
>> Ok. We'll investigate internally on for a better recovery process for QP1.
> 
> By the time I reply, most others requested same.
> Thanks for addressing it.

Hi Doug,

Please consider taking the other two patches in this series if possible
for your next set of rc fixes.

Thanks,
Adit

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Doug Ledford Jan. 19, 2017, 6:45 a.m. UTC | #16
On Wed, 2017-01-18 at 16:31 -0800, Adit Ranadive wrote:
> On Wed, Jan 18, 2017 at 10:50:54PM +0000, Parav Pandit wrote:
> > 
> > Hi Adit,
> > 
> > > 
> > > -----Original Message-----
> > > From: Adit Ranadive [mailto:aditr@vmware.com]
> > > Sent: Wednesday, January 18, 2017 2:53 PM
> > > To: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
> > > Cc: Parav Pandit <parav@mellanox.com>; Leon Romanovsky
> > > <leon@kernel.org>; dledford@redhat.com; linux-rdma@vger.kernel.or
> > > g; pv-
> > > drivers@vmware.com; Aditya Sarwade <asarwade@vmware.com>
> > > Subject: Re: [PATCH 1/3] IB/vmw_pvrdma: Defer activating device
> > > until
> > > vmxnet3 link is up
> > > 
> > > On Wed, Jan 18, 2017 at 1:41:30PM -0700, Jason Gunthorpe wrote:
> > > > 
> > > > On Wed, Jan 18, 2017 at 12:30:07PM -0800, Adit Ranadive wrote:
> > > > 
> > > > > 
> > > > > In order to recover QP1 correctly after a link transition we
> > > > > had to
> > > > > go through a register/unregister since it is placed in error
> > > > > state when the
> > > port is down.
> > > > 
> > > > > 
> > > > > In this case can the MAD layer re-initialize QP1 if link
> > > > > transitions are
> > > performed?
> > > > 
> > > > 
> > > > Your driver needs to deal with this. Do whatever it takes to
> > > > recover
> > > > your QP1 without re-registring and do not bother the rest of
> > > > the
> > > > system with it.
> > > 
> > > Ok. We'll investigate internally on for a better recovery process
> > > for QP1.
> > 
> > By the time I reply, most others requested same.
> > Thanks for addressing it.
> 
> Hi Doug,
> 
> Please consider taking the other two patches in this series if
> possible
> for your next set of rc fixes.

If you want me to take these for the -rc cycle, then they need to be
presented appropriately.  This patch (1/3) is gone just because it was
the wrong solution (but it at least sounded like a fix).  Patch 2/3
starts its subject with "Cleanup unused...", that doesn't sound
remotely like a fix.  Patch 3/3 starts with "Don't hardcode...", which
also doesn't sound remotely like a fix.  If you want these in the -rc
cycle, then please repost the two other patches with suitable subjects
and commit logs that make it clear these are real fixes needed in an
-rc and not simple patches that should get applied in the for-next
area.
Adit Ranadive Jan. 19, 2017, 8:09 p.m. UTC | #17
On Thu, Jan 19, 2017 at 01:45:17AM -0500, Doug Ledford wrote:
> On Wed, 2017-01-18 at 16:31 -0800, Adit Ranadive wrote:
> > On Wed, Jan 18, 2017 at 10:50:54PM +0000, Parav Pandit wrote:
> > > 
> > > Hi Adit,
> > > 
> > > > 
> > > > -----Original Message-----
> > > > From: Adit Ranadive [mailto:aditr@vmware.com]
> > > > Sent: Wednesday, January 18, 2017 2:53 PM
> > > > To: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
> > > > Cc: Parav Pandit <parav@mellanox.com>; Leon Romanovsky
> > > > <leon@kernel.org>; dledford@redhat.com; linux-rdma@vger.kernel.or
> > > > g; pv-
> > > > drivers@vmware.com; Aditya Sarwade <asarwade@vmware.com>
> > > > Subject: Re: [PATCH 1/3] IB/vmw_pvrdma: Defer activating device
> > > > until
> > > > vmxnet3 link is up
> > > > 
> > > > On Wed, Jan 18, 2017 at 1:41:30PM -0700, Jason Gunthorpe wrote:
> > > > > 
> > > > > On Wed, Jan 18, 2017 at 12:30:07PM -0800, Adit Ranadive wrote:
> > > > > 
> > > > > > 
> > > > > > In order to recover QP1 correctly after a link transition we
> > > > > > had to
> > > > > > go through a register/unregister since it is placed in error
> > > > > > state when the
> > > > port is down.
> > > > > 
> > > > > > 
> > > > > > In this case can the MAD layer re-initialize QP1 if link
> > > > > > transitions are
> > > > performed?
> > > > > 
> > > > > 
> > > > > Your driver needs to deal with this. Do whatever it takes to
> > > > > recover
> > > > > your QP1 without re-registring and do not bother the rest of
> > > > > the
> > > > > system with it.
> > > > 
> > > > Ok. We'll investigate internally on for a better recovery process
> > > > for QP1.
> > > 
> > > By the time I reply, most others requested same.
> > > Thanks for addressing it.
> > 
> > Hi Doug,
> > 
> > Please consider taking the other two patches in this series if
> > possible
> > for your next set of rc fixes.
> 
> If you want me to take these for the -rc cycle, then they need to be
> presented appropriately.  This patch (1/3) is gone just because it was
> the wrong solution (but it at least sounded like a fix).  Patch 2/3
> starts its subject with "Cleanup unused...", that doesn't sound
> remotely like a fix.  Patch 3/3 starts with "Don't hardcode...", which
> also doesn't sound remotely like a fix.  If you want these in the -rc
> cycle, then please repost the two other patches with suitable subjects
> and commit logs that make it clear these are real fixes needed in an
> -rc and not simple patches that should get applied in the for-next
> area.

Ah ok. Sorry for my misunderstanding. In that case there are two oneline
fixes - info leaked to user and an incorrect cleanup on the pci probe error
path. I'll separate them out from these patches and post separately.
Please, drop this series and I'll repost the patches 2,3 separately meant
for for-next.

Thanks,
Adit
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 71e1d55..540a54b 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -221,6 +221,7 @@  struct pvrdma_dev {
 	u32 port_cap_mask;
 	struct mutex port_mutex; /* Port modification mutex. */
 	bool ib_active;
+	bool enabled;
 	atomic_t num_qps;
 	atomic_t num_cqs;
 	atomic_t num_pds;
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 231a1ce..b57132f 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -74,6 +74,8 @@  static int pvrdma_del_gid(struct ib_device *ibdev,
 			  void **context);
 
 
+static int pvrdma_enable_dev(struct pvrdma_dev *dev);
+
 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
 			char *buf)
 {
@@ -755,6 +757,10 @@  static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
 		pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
 		break;
 	case NETDEV_UP:
+		if (!dev->enabled && pvrdma_enable_dev(dev)) {
+			dev_err(&dev->pdev->dev, "failed to enable device\n");
+			break;
+		}
 		pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
 		break;
 	default:
@@ -801,6 +807,48 @@  static int pvrdma_netdevice_event(struct notifier_block *this,
 	return NOTIFY_DONE;
 }
 
+static void pvrdma_disable_dev(struct pvrdma_dev *dev)
+{
+	if (dev->enabled) {
+		ib_unregister_device(&dev->ib_dev);
+		dev->enabled = false;
+	}
+}
+
+static int pvrdma_enable_dev(struct pvrdma_dev *dev)
+{
+	int ret;
+	pvrdma_enable_intrs(dev);
+
+	/* Activate pvrdma device */
+	pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE);
+
+	/* Make sure the write is complete before reading status. */
+	mb();
+
+	/* Check if device was successfully activated */
+	ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR);
+	if (ret) {
+		dev_err(&dev->pdev->dev, "failed to activate device\n");
+		ret = -EFAULT;
+		goto err_disable_intrs;
+	}
+
+	/* Register IB device */
+	ret = pvrdma_register_device(dev);
+	if (ret) {
+		dev_err(&dev->pdev->dev, "failed to register IB device\n");
+		goto err_disable_intrs;
+	}
+
+	dev->enabled = true;
+	return 0;
+
+err_disable_intrs:
+	pvrdma_disable_intrs(dev);
+	return ret;
+}
+
 static int pvrdma_pci_probe(struct pci_dev *pdev,
 			    const struct pci_device_id *id)
 {
@@ -867,14 +915,14 @@  static int pvrdma_pci_probe(struct pci_dev *pdev,
 	/* Enable 64-Bit DMA */
 	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
 		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-		if (ret != 0) {
+		if (ret) {
 			dev_err(&pdev->dev,
 				"pci_set_consistent_dma_mask failed\n");
 			goto err_free_resource;
 		}
 	} else {
 		ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (ret != 0) {
+		if (ret) {
 			dev_err(&pdev->dev,
 				"pci_set_dma_mask failed\n");
 			goto err_free_resource;
@@ -1029,7 +1077,7 @@  static int pvrdma_pci_probe(struct pci_dev *pdev,
 	if (ret) {
 		dev_err(&pdev->dev, "failed to allocate interrupts\n");
 		ret = -ENOMEM;
-		goto err_netdevice;
+		goto err_free_cq_ring;
 	}
 
 	/* Allocate UAR table. */
@@ -1049,51 +1097,35 @@  static int pvrdma_pci_probe(struct pci_dev *pdev,
 	}
 	dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len);
 
-	pvrdma_enable_intrs(dev);
-
-	/* Activate pvrdma device */
-	pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE);
-
-	/* Make sure the write is complete before reading status. */
-	mb();
-
-	/* Check if device was successfully activated */
-	ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR);
-	if (ret != 0) {
-		dev_err(&pdev->dev, "failed to activate device\n");
-		ret = -EFAULT;
-		goto err_disable_intr;
-	}
-
-	/* Register IB device */
-	ret = pvrdma_register_device(dev);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to register IB device\n");
-		goto err_disable_intr;
+	if (netif_running(dev->netdev) && netif_carrier_ok(dev->netdev)) {
+		ret = pvrdma_enable_dev(dev);
+		if (ret) {
+			dev_err(&pdev->dev, "failed to enable device\n");
+			goto err_free_sgid_tbl;
+		}
+	} else {
+		dev_info(&pdev->dev, "pvrdma netdev link is down\n");
 	}
 
 	dev->nb_netdev.notifier_call = pvrdma_netdevice_event;
 	ret = register_netdevice_notifier(&dev->nb_netdev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to register netdevice events\n");
-		goto err_unreg_ibdev;
+		goto err_disable_dev;
 	}
 
 	dev_info(&pdev->dev, "attached to device\n");
 	return 0;
 
-err_unreg_ibdev:
-	ib_unregister_device(&dev->ib_dev);
-err_disable_intr:
-	pvrdma_disable_intrs(dev);
+err_disable_dev:
+	pvrdma_disable_dev(dev);
+err_free_sgid_tbl:
 	kfree(dev->sgid_tbl);
 err_free_uar_table:
 	pvrdma_uar_table_cleanup(dev);
 err_free_intrs:
 	pvrdma_free_irq(dev);
 	pvrdma_disable_msi_all(dev);
-err_netdevice:
-	unregister_netdevice_notifier(&dev->nb_netdev);
 err_free_cq_ring:
 	pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
 err_free_async_ring:
@@ -1132,10 +1164,7 @@  static void pvrdma_pci_remove(struct pci_dev *pdev)
 	unregister_netdevice_notifier(&dev->nb_netdev);
 	dev->nb_netdev.notifier_call = NULL;
 
-	flush_workqueue(event_wq);
-
-	/* Unregister ib device */
-	ib_unregister_device(&dev->ib_dev);
+	pvrdma_disable_dev(dev);
 
 	mutex_lock(&pvrdma_device_list_lock);
 	list_del(&dev->device_link);