diff mbox series

[for-next,8/9] RDMA/hns: Kernel notify usr space to stop ring db

Message ID 1565343666-73193-9-git-send-email-oulijun@huawei.com (mailing list archive)
State Changes Requested
Headers show
Series Bugfixes for 5.3-rc2 | expand

Commit Message

Lijun Ou Aug. 9, 2019, 9:41 a.m. UTC
From: Yangyang Li <liyangyang20@huawei.com>

In the reset scenario, if the kernel receives the reset signal,
it needs to notify the user space to stop ring doorbell.

Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
---
 drivers/infiniband/hw/hns/hns_roce_device.h |  4 +++
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 52 ++++++++++++++++++++++++++++-
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h  |  4 +++
 drivers/infiniband/hw/hns/hns_roce_main.c   | 22 ++++++------
 4 files changed, 70 insertions(+), 12 deletions(-)

Comments

Leon Romanovsky Aug. 12, 2019, 5:52 a.m. UTC | #1
On Fri, Aug 09, 2019 at 05:41:05PM +0800, Lijun Ou wrote:
> From: Yangyang Li <liyangyang20@huawei.com>
>
> In the reset scenario, if the kernel receives the reset signal,
> it needs to notify the user space to stop ring doorbell.

I doubt about it, it is racy like hell and relies on assumption that
userspace will honor such request to stop.

>
> Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
> ---
>  drivers/infiniband/hw/hns/hns_roce_device.h |  4 +++
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 52 ++++++++++++++++++++++++++++-
>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  |  4 +++
>  drivers/infiniband/hw/hns/hns_roce_main.c   | 22 ++++++------
>  4 files changed, 70 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
> index 32465f5..be65fce 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_device.h
> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
> @@ -268,6 +268,8 @@ enum {
>
>  #define PAGE_ADDR_SHIFT				12
>
> +#define HNS_ROCE_IS_RESETTING			1
> +
>  struct hns_roce_uar {
>  	u64		pfn;
>  	unsigned long	index;
> @@ -1043,6 +1045,8 @@ struct hns_roce_dev {
>  	u32			odb_offset;
>  	dma_addr_t		tptr_dma_addr;	/* only for hw v1 */
>  	u32			tptr_size;	/* only for hw v1 */
> +	struct page		*reset_page; /* store reset state */
> +	void			*reset_kaddr; /* addr of reset page */
>  	const struct hns_roce_hw *hw;
>  	void			*priv;
>  	struct workqueue_struct *irq_workq;
> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> index d33341e..138e5a8 100644
> --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> @@ -1867,17 +1867,49 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev,
>  			  link_tbl->table.map);
>  }
>
> +static int hns_roce_v2_get_reset_page(struct hns_roce_dev *hr_dev)
> +{
> +	hr_dev->reset_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +	if (!hr_dev->reset_page)
> +		return -ENOMEM;
> +
> +	hr_dev->reset_kaddr = vmap(&hr_dev->reset_page, 1, VM_MAP, PAGE_KERNEL);
> +	if (!hr_dev->reset_kaddr)
> +		goto err_with_vmap;
> +
> +	return 0;
> +
> +err_with_vmap:
> +	put_page(hr_dev->reset_page);

Are you sure that pages allocated with alloc_page() are released with put_page()?

I would say with high confidence that whole this patch is wrong.

Thanks
Jason Gunthorpe Aug. 12, 2019, 1:14 p.m. UTC | #2
On Mon, Aug 12, 2019 at 08:52:20AM +0300, Leon Romanovsky wrote:
> On Fri, Aug 09, 2019 at 05:41:05PM +0800, Lijun Ou wrote:
> > From: Yangyang Li <liyangyang20@huawei.com>
> >
> > In the reset scenario, if the kernel receives the reset signal,
> > it needs to notify the user space to stop ring doorbell.
> 
> I doubt about it, it is racy like hell and relies on assumption that
> userspace will honor such request to stop.

Sounds like this is the device unplug flow we already have support
for, use the APIs to drop the VMA refering to the doorbell

> > Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
> >  drivers/infiniband/hw/hns/hns_roce_device.h |  4 +++
> >  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 52 ++++++++++++++++++++++++++++-
> >  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  |  4 +++
> >  drivers/infiniband/hw/hns/hns_roce_main.c   | 22 ++++++------
> >  4 files changed, 70 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
> > index 32465f5..be65fce 100644
> > +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
> > @@ -268,6 +268,8 @@ enum {
> >
> >  #define PAGE_ADDR_SHIFT				12
> >
> > +#define HNS_ROCE_IS_RESETTING			1
> > +
> >  struct hns_roce_uar {
> >  	u64		pfn;
> >  	unsigned long	index;
> > @@ -1043,6 +1045,8 @@ struct hns_roce_dev {
> >  	u32			odb_offset;
> >  	dma_addr_t		tptr_dma_addr;	/* only for hw v1 */
> >  	u32			tptr_size;	/* only for hw v1 */
> > +	struct page		*reset_page; /* store reset state */
> > +	void			*reset_kaddr; /* addr of reset page */
> >  	const struct hns_roce_hw *hw;
> >  	void			*priv;
> >  	struct workqueue_struct *irq_workq;
> > diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> > index d33341e..138e5a8 100644
> > +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
> > @@ -1867,17 +1867,49 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev,
> >  			  link_tbl->table.map);
> >  }
> >
> > +static int hns_roce_v2_get_reset_page(struct hns_roce_dev *hr_dev)
> > +{
> > +	hr_dev->reset_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> > +	if (!hr_dev->reset_page)
> > +		return -ENOMEM;
> > +
> > +	hr_dev->reset_kaddr = vmap(&hr_dev->reset_page, 1, VM_MAP, PAGE_KERNEL);
> > +	if (!hr_dev->reset_kaddr)
> > +		goto err_with_vmap;

Yes, this vmap is nonsense too, get_zeroed_page() is the right API

Jason
Yangyang Li Aug. 14, 2019, 5:54 a.m. UTC | #3
Hi, Leon & Jason
Thanks a lot for your reply.

在 2019/8/12 21:14, Jason Gunthorpe 写道:
> On Mon, Aug 12, 2019 at 08:52:20AM +0300, Leon Romanovsky wrote:
>> On Fri, Aug 09, 2019 at 05:41:05PM +0800, Lijun Ou wrote:
>>> From: Yangyang Li <liyangyang20@huawei.com>
>>>
>>> In the reset scenario, if the kernel receives the reset signal,
>>> it needs to notify the user space to stop ring doorbell.
>>
>> I doubt about it, it is racy like hell and relies on assumption that
>> userspace will honor such request to stop.
> 
> Sounds like this is the device unplug flow we already have support
> for, use the APIs to drop the VMA refering to the doorbell

Thanks for the suggestion, I have found this new unplug API,
and  I am trying to use it in the driver..

> 
>>> Signed-off-by: Yangyang Li <liyangyang20@huawei.com>
>>>  drivers/infiniband/hw/hns/hns_roce_device.h |  4 +++
>>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.c  | 52 ++++++++++++++++++++++++++++-
>>>  drivers/infiniband/hw/hns/hns_roce_hw_v2.h  |  4 +++
>>>  drivers/infiniband/hw/hns/hns_roce_main.c   | 22 ++++++------
>>>  4 files changed, 70 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
>>> index 32465f5..be65fce 100644
>>> +++ b/drivers/infiniband/hw/hns/hns_roce_device.h
>>> @@ -268,6 +268,8 @@ enum {
>>>
>>>  #define PAGE_ADDR_SHIFT				12
>>>
>>> +#define HNS_ROCE_IS_RESETTING			1
>>> +
>>>  struct hns_roce_uar {
>>>  	u64		pfn;
>>>  	unsigned long	index;
>>> @@ -1043,6 +1045,8 @@ struct hns_roce_dev {
>>>  	u32			odb_offset;
>>>  	dma_addr_t		tptr_dma_addr;	/* only for hw v1 */
>>>  	u32			tptr_size;	/* only for hw v1 */
>>> +	struct page		*reset_page; /* store reset state */
>>> +	void			*reset_kaddr; /* addr of reset page */
>>>  	const struct hns_roce_hw *hw;
>>>  	void			*priv;
>>>  	struct workqueue_struct *irq_workq;
>>> diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>>> index d33341e..138e5a8 100644
>>> +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
>>> @@ -1867,17 +1867,49 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev,
>>>  			  link_tbl->table.map);
>>>  }
>>>
>>> +static int hns_roce_v2_get_reset_page(struct hns_roce_dev *hr_dev)
>>> +{
>>> +	hr_dev->reset_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>>> +	if (!hr_dev->reset_page)
>>> +		return -ENOMEM;
>>> +
>>> +	hr_dev->reset_kaddr = vmap(&hr_dev->reset_page, 1, VM_MAP, PAGE_KERNEL);
>>> +	if (!hr_dev->reset_kaddr)
>>> +		goto err_with_vmap;
> 
> Yes, this vmap is nonsense too, get_zeroed_page() is the right API
> 
> Jason
> 
> 
Thanks for the suggestion, put_page is not the correct usage,
I will use the appropriate API to fix it in the next patch.

Thanks
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 32465f5..be65fce 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -268,6 +268,8 @@  enum {
 
 #define PAGE_ADDR_SHIFT				12
 
+#define HNS_ROCE_IS_RESETTING			1
+
 struct hns_roce_uar {
 	u64		pfn;
 	unsigned long	index;
@@ -1043,6 +1045,8 @@  struct hns_roce_dev {
 	u32			odb_offset;
 	dma_addr_t		tptr_dma_addr;	/* only for hw v1 */
 	u32			tptr_size;	/* only for hw v1 */
+	struct page		*reset_page; /* store reset state */
+	void			*reset_kaddr; /* addr of reset page */
 	const struct hns_roce_hw *hw;
 	void			*priv;
 	struct workqueue_struct *irq_workq;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index d33341e..138e5a8 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -1867,17 +1867,49 @@  static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev,
 			  link_tbl->table.map);
 }
 
+static int hns_roce_v2_get_reset_page(struct hns_roce_dev *hr_dev)
+{
+	hr_dev->reset_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!hr_dev->reset_page)
+		return -ENOMEM;
+
+	hr_dev->reset_kaddr = vmap(&hr_dev->reset_page, 1, VM_MAP, PAGE_KERNEL);
+	if (!hr_dev->reset_kaddr)
+		goto err_with_vmap;
+
+	return 0;
+
+err_with_vmap:
+	put_page(hr_dev->reset_page);
+	return -ENOMEM;
+}
+
+static void hns_roce_v2_put_reset_page(struct hns_roce_dev *hr_dev)
+{
+	vunmap(hr_dev->reset_kaddr);
+	hr_dev->reset_kaddr = NULL;
+	put_page(hr_dev->reset_page);
+	hr_dev->reset_page = NULL;
+}
+
 static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
 {
 	struct hns_roce_v2_priv *priv = hr_dev->priv;
 	int qpc_count, cqc_count;
 	int ret, i;
 
+	ret = hns_roce_v2_get_reset_page(hr_dev);
+	if (ret) {
+		dev_err(hr_dev->dev,
+			"reset state init failed, ret = %d.\n", ret);
+		return ret;
+	}
+
 	/* TSQ includes SQ doorbell and ack doorbell */
 	ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE);
 	if (ret) {
 		dev_err(hr_dev->dev, "TSQ init failed, ret = %d.\n", ret);
-		return ret;
+		goto err_tsq_init_failed;
 	}
 
 	ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE);
@@ -1923,6 +1955,9 @@  static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
 err_tpq_init_failed:
 	hns_roce_free_link_table(hr_dev, &priv->tsq);
 
+err_tsq_init_failed:
+	hns_roce_v2_put_reset_page(hr_dev);
+
 	return ret;
 }
 
@@ -1935,6 +1970,7 @@  static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
 
 	hns_roce_free_link_table(hr_dev, &priv->tpq);
 	hns_roce_free_link_table(hr_dev, &priv->tsq);
+	hns_roce_v2_put_reset_page(hr_dev);
 }
 
 static int hns_roce_query_mbox_status(struct hns_roce_dev *hr_dev)
@@ -6434,6 +6470,19 @@  static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle,
 
 	handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT;
 }
+
+static void hns_roce_v2_reset_notify_user(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_v2_reset_state *state;
+
+	state = (struct hns_roce_v2_reset_state *) hr_dev->reset_kaddr;
+
+	state->reset_state = HNS_ROCE_IS_RESETTING;
+
+	/* Ensure reset state was flushed in memory */
+	wmb();
+}
+
 static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
 {
 	struct hns_roce_dev *hr_dev;
@@ -6454,6 +6503,7 @@  static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
 	hr_dev->is_reset = true;
 	hr_dev->active = false;
 	hr_dev->dis_db = true;
+	hns_roce_v2_reset_notify_user(hr_dev);
 
 	event.event = IB_EVENT_DEVICE_FATAL;
 	event.device = &hr_dev->ib_dev;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 58931b5..392bc03 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -1622,6 +1622,10 @@  struct hns_roce_link_table_entry {
 #define HNS_ROCE_LINK_TABLE_NXT_PTR_S 20
 #define HNS_ROCE_LINK_TABLE_NXT_PTR_M GENMASK(31, 20)
 
+struct hns_roce_v2_reset_state {
+	u32 reset_state; /* stored to use in user space */
+};
+
 struct hns_roce_v2_priv {
 	struct hnae3_handle *handle;
 	struct hns_roce_v2_cmq cmq;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 1e4ba48..1bda7a5 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -360,18 +360,18 @@  static int hns_roce_mmap(struct ib_ucontext *context,
 					 PAGE_SIZE,
 					 pgprot_noncached(vma->vm_page_prot));
 
-	/* vm_pgoff: 1 -- TPTR */
+	/* vm_pgoff: 1 -- TPTR(hw v1), reset_page(hw v2) */
 	case 1:
-		if (!hr_dev->tptr_dma_addr || !hr_dev->tptr_size)
-			return -EINVAL;
-		/*
-		 * FIXME: using io_remap_pfn_range on the dma address returned
-		 * by dma_alloc_coherent is totally wrong.
-		 */
-		return rdma_user_mmap_io(context, vma,
-					 hr_dev->tptr_dma_addr >> PAGE_SHIFT,
-					 hr_dev->tptr_size,
-					 vma->vm_page_prot);
+		if (hr_dev->tptr_dma_addr && hr_dev->tptr_size)
+			return rdma_user_mmap_io(context, vma,
+					    hr_dev->tptr_dma_addr >> PAGE_SHIFT,
+					    hr_dev->tptr_size,
+					    vma->vm_page_prot);
+
+		if (hr_dev->reset_page)
+			return remap_pfn_range(vma, vma->vm_start,
+					  page_to_pfn(hr_dev->reset_page),
+					  PAGE_SIZE, vma->vm_page_prot);
 
 	default:
 		return -EINVAL;