[V2,1/2] PCI/DOE: Remove the pci_doe_flush_mb() call

Message ID	20221122155324.1878416-2-ira.weiny@intel.com (mailing list archive)
State	Superseded
Headers	show Return-Path: <linux-pci-owner@kernel.org> From: ira.weiny@intel.com To: Dan Williams <dan.j.williams@intel.com>, Bjorn Helgaas <bhelgaas@google.com> Cc: Ira Weiny <ira.weiny@intel.com>, Lukas Wunner <lukas@wunner.de>, Alison Schofield <alison.schofield@intel.com>, Vishal Verma <vishal.l.verma@intel.com>, Jonathan Cameron <Jonathan.Cameron@huawei.com>, Gregory Price <gregory.price@memverge.com>, "Li, Ming" <ming4.li@intel.com>, linux-pci@vger.kernel.org, linux-kernel@vger.kernel.org, linux-cxl@vger.kernel.org Subject: [PATCH V2 1/2] PCI/DOE: Remove the pci_doe_flush_mb() call Date: Tue, 22 Nov 2022 07:53:23 -0800 Message-Id: <20221122155324.1878416-2-ira.weiny@intel.com> In-Reply-To: <20221122155324.1878416-1-ira.weiny@intel.com> References: <20221122155324.1878416-1-ira.weiny@intel.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	PCI/DOE: Remove asynchronous task support \| expand [V2,0/2] PCI/DOE: Remove asynchronous task support [V2,1/2] PCI/DOE: Remove the pci_doe_flush_mb() call [V2,2/2] PCI/DOE: Remove asynchronous task support

Ira Weiny Nov. 22, 2022, 3:53 p.m. UTC

From: Ira Weiny <ira.weiny@intel.com>

Each struct doe_mb is managed as part of the PCI device.  They can't go
away as long as the PCI device exists.  pci_doe_flush_mb() was set up to
flush the workqueue and prevent any further submissions to the mailboxes
when the PCI device goes away.  Unfortunately, this was fundamentally
flawed.  There was no guarantee that a struct doe_mb remained after
pci_doe_flush_mb() returned.  Therefore, the doe_mb state could be
invalid when those threads waiting on the workqueue were flushed.

Fortunately the current code is safe because all callers make a
synchronous call to pci_doe_submit_task() and maintain a reference on the
PCI device.

For these reasons, pci_doe_flush_mb() will never be called while tasks
are being processed and there is no use for it.

Remove the dead code around pci_doe_flush_mb().

Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
 drivers/pci/doe.c | 48 ++++-------------------------------------------
 1 file changed, 4 insertions(+), 44 deletions(-)

Jonathan Cameron Nov. 22, 2022, 4:34 p.m. UTC | #1

On Tue, 22 Nov 2022 07:53:23 -0800
ira.weiny@intel.com wrote:

> From: Ira Weiny <ira.weiny@intel.com>
> 
> Each struct doe_mb is managed as part of the PCI device.  They can't go
> away as long as the PCI device exists.  pci_doe_flush_mb() was set up to
> flush the workqueue and prevent any further submissions to the mailboxes
> when the PCI device goes away.  Unfortunately, this was fundamentally
> flawed.  There was no guarantee that a struct doe_mb remained after
> pci_doe_flush_mb() returned.  Therefore, the doe_mb state could be
> invalid when those threads waiting on the workqueue were flushed.
> 
> Fortunately the current code is safe because all callers make a
> synchronous call to pci_doe_submit_task() and maintain a reference on the
> PCI device.
> 
> For these reasons, pci_doe_flush_mb() will never be called while tasks
> are being processed and there is no use for it.
> 
> Remove the dead code around pci_doe_flush_mb().
> 
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>

Looks fine I think, though one question inline.
 
> ---
>  drivers/pci/doe.c | 48 ++++-------------------------------------------
>  1 file changed, 4 insertions(+), 44 deletions(-)
> 
> diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
> index e402f05068a5..260313e9052e 100644
> --- a/drivers/pci/doe.c
> +++ b/drivers/pci/doe.c
> @@ -24,10 +24,9 @@
>  
>  /* Timeout of 1 second from 6.30.2 Operation, PCI Spec r6.0 */
>  #define PCI_DOE_TIMEOUT HZ
> -#define PCI_DOE_POLL_INTERVAL	(PCI_DOE_TIMEOUT / 128)
> +#define PCI_DOE_POLL_INTERVAL	8

Why this change?  

>  
> -#define PCI_DOE_FLAG_CANCEL	0
> -#define PCI_DOE_FLAG_DEAD	1
> +#define PCI_DOE_FLAG_DEAD	0
>  
>  /**
>   * struct pci_doe_mb - State for a single DOE mailbox
> @@ -53,15 +52,6 @@ struct pci_doe_mb {
>  	unsigned long flags;
>  };
>  
> -static int pci_doe_wait(struct pci_doe_mb *doe_mb, unsigned long timeout)
> -{
> -	if (wait_event_timeout(doe_mb->wq,
> -			       test_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags),
> -			       timeout))
> -		return -EIO;
> -	return 0;
> -}
> -
>  static void pci_doe_write_ctrl(struct pci_doe_mb *doe_mb, u32 val)
>  {
>  	struct pci_dev *pdev = doe_mb->pdev;
> @@ -82,12 +72,9 @@ static int pci_doe_abort(struct pci_doe_mb *doe_mb)
>  	pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_ABORT);
>  
>  	do {
> -		int rc;
>  		u32 val;
>  
> -		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
> -		if (rc)
> -			return rc;
> +		msleep_interruptible(PCI_DOE_POLL_INTERVAL);
>  		pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
>  
>  		/* Abort success! */
> @@ -278,11 +265,7 @@ static void doe_statemachine_work(struct work_struct *work)
>  			signal_task_abort(task, -EIO);
>  			return;
>  		}
> -		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
> -		if (rc) {
> -			signal_task_abort(task, rc);
> -			return;
> -		}
> +		msleep_interruptible(PCI_DOE_POLL_INTERVAL);
>  		goto retry_resp;
>  	}
>  
> @@ -383,21 +366,6 @@ static void pci_doe_destroy_workqueue(void *mb)
>  	destroy_workqueue(doe_mb->work_queue);
>  }
>  
> -static void pci_doe_flush_mb(void *mb)
> -{
> -	struct pci_doe_mb *doe_mb = mb;
> -
> -	/* Stop all pending work items from starting */
> -	set_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags);
> -
> -	/* Cancel an in progress work item, if necessary */
> -	set_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags);
> -	wake_up(&doe_mb->wq);
> -
> -	/* Flush all work items */
> -	flush_workqueue(doe_mb->work_queue);
> -}
> -
>  /**
>   * pcim_doe_create_mb() - Create a DOE mailbox object
>   *
> @@ -450,14 +418,6 @@ struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset)
>  		return ERR_PTR(rc);
>  	}
>  
> -	/*
> -	 * The state machine and the mailbox should be in sync now;
> -	 * Set up mailbox flush prior to using the mailbox to query protocols.
> -	 */
> -	rc = devm_add_action_or_reset(dev, pci_doe_flush_mb, doe_mb);
> -	if (rc)
> -		return ERR_PTR(rc);
> -
>  	rc = pci_doe_cache_protocols(doe_mb);
>  	if (rc) {
>  		pci_err(pdev, "[%x] failed to cache protocols : %d\n",

Lukas Wunner Nov. 22, 2022, 7:53 p.m. UTC | #2

On Tue, Nov 22, 2022 at 07:53:23AM -0800, ira.weiny@intel.com wrote:
> Each struct doe_mb is managed as part of the PCI device.  They can't go
> away as long as the PCI device exists.  pci_doe_flush_mb() was set up to
> flush the workqueue and prevent any further submissions to the mailboxes
> when the PCI device goes away.  Unfortunately, this was fundamentally
> flawed.  There was no guarantee that a struct doe_mb remained after
> pci_doe_flush_mb() returned.  Therefore, the doe_mb state could be
> invalid when those threads waiting on the workqueue were flushed.
> 
> Fortunately the current code is safe because all callers make a
> synchronous call to pci_doe_submit_task() and maintain a reference on the
> PCI device.
> 
> For these reasons, pci_doe_flush_mb() will never be called while tasks
> are being processed and there is no use for it.

Going forward my plan is to allocate all existing DOE mailboxes
of a device upon enumeration.  That will allow concurrent use
of a mailbox by multiple drivers.

When a pci_dev goes away, say, because it's been hot-removed,
we need a way to abort all ongoing DOE exchanges.

pci_doe_flush_mb() seems to do just that so I'm not sure why
it's being removed?

Thanks,

Lukas

Ira Weiny Nov. 23, 2022, 5:35 p.m. UTC | #3

On Tue, Nov 22, 2022 at 04:34:26PM +0000, Jonathan Cameron wrote:
> On Tue, 22 Nov 2022 07:53:23 -0800
> ira.weiny@intel.com wrote:
> 
> > From: Ira Weiny <ira.weiny@intel.com>
> > 
> > Each struct doe_mb is managed as part of the PCI device.  They can't go
> > away as long as the PCI device exists.  pci_doe_flush_mb() was set up to
> > flush the workqueue and prevent any further submissions to the mailboxes
> > when the PCI device goes away.  Unfortunately, this was fundamentally
> > flawed.  There was no guarantee that a struct doe_mb remained after
> > pci_doe_flush_mb() returned.  Therefore, the doe_mb state could be
> > invalid when those threads waiting on the workqueue were flushed.
> > 
> > Fortunately the current code is safe because all callers make a
> > synchronous call to pci_doe_submit_task() and maintain a reference on the
> > PCI device.
> > 
> > For these reasons, pci_doe_flush_mb() will never be called while tasks
> > are being processed and there is no use for it.
> > 
> > Remove the dead code around pci_doe_flush_mb().
> > 
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> Looks fine I think, though one question inline.
>  
> > ---
> >  drivers/pci/doe.c | 48 ++++-------------------------------------------
> >  1 file changed, 4 insertions(+), 44 deletions(-)
> > 
> > diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
> > index e402f05068a5..260313e9052e 100644
> > --- a/drivers/pci/doe.c
> > +++ b/drivers/pci/doe.c
> > @@ -24,10 +24,9 @@
> >  
> >  /* Timeout of 1 second from 6.30.2 Operation, PCI Spec r6.0 */
> >  #define PCI_DOE_TIMEOUT HZ
> > -#define PCI_DOE_POLL_INTERVAL	(PCI_DOE_TIMEOUT / 128)
> > +#define PCI_DOE_POLL_INTERVAL	8
> 
> Why this change?  

msleep_interruptible() takes a millisecond value and wait_event_timeout() takes
jiffies.  1/128 of a second is ~8ms.

While for most configs (HZ == 1000) the value does not change.  I don't believe
this would be true for all configs.  Thus a more explicit define.

I'll add a comment.

Ira

> 
> >  
> > -#define PCI_DOE_FLAG_CANCEL	0
> > -#define PCI_DOE_FLAG_DEAD	1
> > +#define PCI_DOE_FLAG_DEAD	0
> >  
> >  /**
> >   * struct pci_doe_mb - State for a single DOE mailbox
> > @@ -53,15 +52,6 @@ struct pci_doe_mb {
> >  	unsigned long flags;
> >  };
> >  
> > -static int pci_doe_wait(struct pci_doe_mb *doe_mb, unsigned long timeout)
> > -{
> > -	if (wait_event_timeout(doe_mb->wq,
> > -			       test_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags),
> > -			       timeout))
> > -		return -EIO;
> > -	return 0;
> > -}
> > -
> >  static void pci_doe_write_ctrl(struct pci_doe_mb *doe_mb, u32 val)
> >  {
> >  	struct pci_dev *pdev = doe_mb->pdev;
> > @@ -82,12 +72,9 @@ static int pci_doe_abort(struct pci_doe_mb *doe_mb)
> >  	pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_ABORT);
> >  
> >  	do {
> > -		int rc;
> >  		u32 val;
> >  
> > -		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
> > -		if (rc)
> > -			return rc;
> > +		msleep_interruptible(PCI_DOE_POLL_INTERVAL);
> >  		pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
> >  
> >  		/* Abort success! */
> > @@ -278,11 +265,7 @@ static void doe_statemachine_work(struct work_struct *work)
> >  			signal_task_abort(task, -EIO);
> >  			return;
> >  		}
> > -		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
> > -		if (rc) {
> > -			signal_task_abort(task, rc);
> > -			return;
> > -		}
> > +		msleep_interruptible(PCI_DOE_POLL_INTERVAL);
> >  		goto retry_resp;
> >  	}
> >  
> > @@ -383,21 +366,6 @@ static void pci_doe_destroy_workqueue(void *mb)
> >  	destroy_workqueue(doe_mb->work_queue);
> >  }
> >  
> > -static void pci_doe_flush_mb(void *mb)
> > -{
> > -	struct pci_doe_mb *doe_mb = mb;
> > -
> > -	/* Stop all pending work items from starting */
> > -	set_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags);
> > -
> > -	/* Cancel an in progress work item, if necessary */
> > -	set_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags);
> > -	wake_up(&doe_mb->wq);
> > -
> > -	/* Flush all work items */
> > -	flush_workqueue(doe_mb->work_queue);
> > -}
> > -
> >  /**
> >   * pcim_doe_create_mb() - Create a DOE mailbox object
> >   *
> > @@ -450,14 +418,6 @@ struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset)
> >  		return ERR_PTR(rc);
> >  	}
> >  
> > -	/*
> > -	 * The state machine and the mailbox should be in sync now;
> > -	 * Set up mailbox flush prior to using the mailbox to query protocols.
> > -	 */
> > -	rc = devm_add_action_or_reset(dev, pci_doe_flush_mb, doe_mb);
> > -	if (rc)
> > -		return ERR_PTR(rc);
> > -
> >  	rc = pci_doe_cache_protocols(doe_mb);
> >  	if (rc) {
> >  		pci_err(pdev, "[%x] failed to cache protocols : %d\n",
>

Ira Weiny Nov. 23, 2022, 5:39 p.m. UTC | #4

On Tue, Nov 22, 2022 at 08:53:16PM +0100, Lukas Wunner wrote:
> On Tue, Nov 22, 2022 at 07:53:23AM -0800, ira.weiny@intel.com wrote:
> > Each struct doe_mb is managed as part of the PCI device.  They can't go
> > away as long as the PCI device exists.  pci_doe_flush_mb() was set up to
> > flush the workqueue and prevent any further submissions to the mailboxes
> > when the PCI device goes away.  Unfortunately, this was fundamentally
> > flawed.  There was no guarantee that a struct doe_mb remained after
> > pci_doe_flush_mb() returned.  Therefore, the doe_mb state could be
> > invalid when those threads waiting on the workqueue were flushed.
> > 
> > Fortunately the current code is safe because all callers make a
> > synchronous call to pci_doe_submit_task() and maintain a reference on the
> > PCI device.
> > 
> > For these reasons, pci_doe_flush_mb() will never be called while tasks
> > are being processed and there is no use for it.
> 
> Going forward my plan is to allocate all existing DOE mailboxes
> of a device upon enumeration.  That will allow concurrent use
> of a mailbox by multiple drivers.
> 
> When a pci_dev goes away, say, because it's been hot-removed,
> we need a way to abort all ongoing DOE exchanges.
> 
> pci_doe_flush_mb() seems to do just that so I'm not sure why
> it's being removed?

I guess I was not explicit enough in the commit message.  1) it is not used
today.  More importantly 2) it does not work.

Because we are not using it currently I thought it best to remove it rather
than try and fix it.  Once we have a use then we can figure out how to make
sure the doe_mb[*] is valid until all tasks are flushed.

Ira

[*] Probably with a reference on the struct as was discussed before.

	-- https://lore.kernel.org/all/20221122094627.00003f2c@Huawei.com/

> 
> Thanks,
> 
> Lukas

Jonathan Cameron Nov. 24, 2022, 11:19 a.m. UTC | #5

On Wed, 23 Nov 2022 09:35:37 -0800
Ira Weiny <ira.weiny@intel.com> wrote:

> On Tue, Nov 22, 2022 at 04:34:26PM +0000, Jonathan Cameron wrote:
> > On Tue, 22 Nov 2022 07:53:23 -0800
> > ira.weiny@intel.com wrote:
> >   
> > > From: Ira Weiny <ira.weiny@intel.com>
> > > 
> > > Each struct doe_mb is managed as part of the PCI device.  They can't go
> > > away as long as the PCI device exists.  pci_doe_flush_mb() was set up to
> > > flush the workqueue and prevent any further submissions to the mailboxes
> > > when the PCI device goes away.  Unfortunately, this was fundamentally
> > > flawed.  There was no guarantee that a struct doe_mb remained after
> > > pci_doe_flush_mb() returned.  Therefore, the doe_mb state could be
> > > invalid when those threads waiting on the workqueue were flushed.
> > > 
> > > Fortunately the current code is safe because all callers make a
> > > synchronous call to pci_doe_submit_task() and maintain a reference on the
> > > PCI device.
> > > 
> > > For these reasons, pci_doe_flush_mb() will never be called while tasks
> > > are being processed and there is no use for it.
> > > 
> > > Remove the dead code around pci_doe_flush_mb().
> > > 
> > > Signed-off-by: Ira Weiny <ira.weiny@intel.com>  
> > 
> > Looks fine I think, though one question inline.
> >    
> > > ---
> > >  drivers/pci/doe.c | 48 ++++-------------------------------------------
> > >  1 file changed, 4 insertions(+), 44 deletions(-)
> > > 
> > > diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
> > > index e402f05068a5..260313e9052e 100644
> > > --- a/drivers/pci/doe.c
> > > +++ b/drivers/pci/doe.c
> > > @@ -24,10 +24,9 @@
> > >  
> > >  /* Timeout of 1 second from 6.30.2 Operation, PCI Spec r6.0 */
> > >  #define PCI_DOE_TIMEOUT HZ
> > > -#define PCI_DOE_POLL_INTERVAL	(PCI_DOE_TIMEOUT / 128)
> > > +#define PCI_DOE_POLL_INTERVAL	8  
> > 
> > Why this change?    
> 
> msleep_interruptible() takes a millisecond value and wait_event_timeout() takes
> jiffies.  1/128 of a second is ~8ms.
> 
> While for most configs (HZ == 1000) the value does not change.  I don't believe
> this would be true for all configs.  Thus a more explicit define.
> 

Makes sense. Maybe add a postfix as well to make it clear it's not in same units
as the PCI_DOE_TIMEOUT?
PCI_DOE_POLL_INTERVAL_MSECS




> I'll add a comment.
> 
> Ira
> 
> >   
> > >  
> > > -#define PCI_DOE_FLAG_CANCEL	0
> > > -#define PCI_DOE_FLAG_DEAD	1
> > > +#define PCI_DOE_FLAG_DEAD	0
> > >  
> > >  /**
> > >   * struct pci_doe_mb - State for a single DOE mailbox
> > > @@ -53,15 +52,6 @@ struct pci_doe_mb {
> > >  	unsigned long flags;
> > >  };
> > >  
> > > -static int pci_doe_wait(struct pci_doe_mb *doe_mb, unsigned long timeout)
> > > -{
> > > -	if (wait_event_timeout(doe_mb->wq,
> > > -			       test_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags),
> > > -			       timeout))
> > > -		return -EIO;
> > > -	return 0;
> > > -}
> > > -
> > >  static void pci_doe_write_ctrl(struct pci_doe_mb *doe_mb, u32 val)
> > >  {
> > >  	struct pci_dev *pdev = doe_mb->pdev;
> > > @@ -82,12 +72,9 @@ static int pci_doe_abort(struct pci_doe_mb *doe_mb)
> > >  	pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_ABORT);
> > >  
> > >  	do {
> > > -		int rc;
> > >  		u32 val;
> > >  
> > > -		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
> > > -		if (rc)
> > > -			return rc;
> > > +		msleep_interruptible(PCI_DOE_POLL_INTERVAL);
> > >  		pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
> > >  
> > >  		/* Abort success! */
> > > @@ -278,11 +265,7 @@ static void doe_statemachine_work(struct work_struct *work)
> > >  			signal_task_abort(task, -EIO);
> > >  			return;
> > >  		}
> > > -		rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
> > > -		if (rc) {
> > > -			signal_task_abort(task, rc);
> > > -			return;
> > > -		}
> > > +		msleep_interruptible(PCI_DOE_POLL_INTERVAL);
> > >  		goto retry_resp;
> > >  	}
> > >  
> > > @@ -383,21 +366,6 @@ static void pci_doe_destroy_workqueue(void *mb)
> > >  	destroy_workqueue(doe_mb->work_queue);
> > >  }
> > >  
> > > -static void pci_doe_flush_mb(void *mb)
> > > -{
> > > -	struct pci_doe_mb *doe_mb = mb;
> > > -
> > > -	/* Stop all pending work items from starting */
> > > -	set_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags);
> > > -
> > > -	/* Cancel an in progress work item, if necessary */
> > > -	set_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags);
> > > -	wake_up(&doe_mb->wq);
> > > -
> > > -	/* Flush all work items */
> > > -	flush_workqueue(doe_mb->work_queue);
> > > -}
> > > -
> > >  /**
> > >   * pcim_doe_create_mb() - Create a DOE mailbox object
> > >   *
> > > @@ -450,14 +418,6 @@ struct pci_doe_mb *pcim_doe_create_mb(struct pci_dev *pdev, u16 cap_offset)
> > >  		return ERR_PTR(rc);
> > >  	}
> > >  
> > > -	/*
> > > -	 * The state machine and the mailbox should be in sync now;
> > > -	 * Set up mailbox flush prior to using the mailbox to query protocols.
> > > -	 */
> > > -	rc = devm_add_action_or_reset(dev, pci_doe_flush_mb, doe_mb);
> > > -	if (rc)
> > > -		return ERR_PTR(rc);
> > > -
> > >  	rc = pci_doe_cache_protocols(doe_mb);
> > >  	if (rc) {
> > >  		pci_err(pdev, "[%x] failed to cache protocols : %d\n",  
> >   
>

[V2,1/2] PCI/DOE: Remove the pci_doe_flush_mb() call

Commit Message

Comments

Patch