diff mbox series

[v3,02/40] cxl/pci: Implement Interface Ready Timeout

Message ID 164298412919.3018233.12491722885382120190.stgit@dwillia2-desk3.amr.corp.intel.com
State New, archived
Headers show
Series CXL.mem Topology Discovery and Hotplug Support | expand

Commit Message

Dan Williams Jan. 24, 2022, 12:28 a.m. UTC
From: Ben Widawsky <ben.widawsky@intel.com>

The original driver implementation used the doorbell timeout for the
Mailbox Interface Ready bit to piggy back off of, since the latter does
not have a defined timeout. This functionality, introduced in commit
8adaf747c9f0 ("cxl/mem: Find device capabilities"), needs improvement as
the recent "Add Mailbox Ready Time" ECN timeout indicates that the
mailbox ready time can be significantly longer that 2 seconds.

While the specification limits the maximum timeout to 256s, the cxl_pci
driver gives up on the mailbox after 60s. This value corresponds with
important timeout values already present in the kernel. A module
parameter is provided as an emergency override and represents the
default Linux policy for all devices.

Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
[djbw: add modparam, drop check_device_status()]
Co-developed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/pci.c |   35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

Comments

Ben Widawsky Jan. 31, 2022, 10:21 p.m. UTC | #1
On 22-01-23 16:28:49, Dan Williams wrote:
> From: Ben Widawsky <ben.widawsky@intel.com>
> 
> The original driver implementation used the doorbell timeout for the
> Mailbox Interface Ready bit to piggy back off of, since the latter does
> not have a defined timeout. This functionality, introduced in commit
> 8adaf747c9f0 ("cxl/mem: Find device capabilities"), needs improvement as
> the recent "Add Mailbox Ready Time" ECN timeout indicates that the
> mailbox ready time can be significantly longer that 2 seconds.
> 
> While the specification limits the maximum timeout to 256s, the cxl_pci
> driver gives up on the mailbox after 60s. This value corresponds with
> important timeout values already present in the kernel. A module
> parameter is provided as an emergency override and represents the
> default Linux policy for all devices.
> 
> Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> [djbw: add modparam, drop check_device_status()]
> Co-developed-by: Dan Williams <dan.j.williams@intel.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  drivers/cxl/pci.c |   35 +++++++++++++++++++++++++++++++++++
>  1 file changed, 35 insertions(+)
> 
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 8dc91fd3396a..ed8de9eac970 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -1,7 +1,9 @@
>  // SPDX-License-Identifier: GPL-2.0-only
>  /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
>  #include <linux/io-64-nonatomic-lo-hi.h>
> +#include <linux/moduleparam.h>
>  #include <linux/module.h>
> +#include <linux/delay.h>
>  #include <linux/sizes.h>
>  #include <linux/mutex.h>
>  #include <linux/list.h>
> @@ -35,6 +37,20 @@
>  /* CXL 2.0 - 8.2.8.4 */
>  #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
>  
> +/*
> + * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to
> + * dictate how long to wait for the mailbox to become ready. The new
> + * field allows the device to tell software the amount of time to wait
> + * before mailbox ready. This field per the spec theoretically allows
> + * for up to 255 seconds. 255 seconds is unreasonably long, its longer
> + * than the maximum SATA port link recovery wait. Default to 60 seconds
> + * until someone builds a CXL device that needs more time in practice.
> + */
> +static unsigned short mbox_ready_timeout = 60;
> +module_param(mbox_ready_timeout, ushort, 0600);

Any reason not to make it 0644?

> +MODULE_PARM_DESC(mbox_ready_timeout,
> +		 "seconds to wait for mailbox ready status");
> +
>  static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
>  {
>  	const unsigned long start = jiffies;
> @@ -281,6 +297,25 @@ static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *c
>  static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
>  {
>  	const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
> +	unsigned long timeout;
> +	u64 md_status;
> +
> +	timeout = jiffies + mbox_ready_timeout * HZ;
> +	do {
> +		md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
> +		if (md_status & CXLMDEV_MBOX_IF_READY)
> +			break;
> +		if (msleep_interruptible(100))
> +			break;
> +	} while (!time_after(jiffies, timeout));

Just pointing out the [probably] obvious. If the user specifies a zero second
timeout, the code will still wait 100ms.

> +
> +	if (!(md_status & CXLMDEV_MBOX_IF_READY)) {
> +		dev_err(cxlds->dev,
> +			"timeout awaiting mailbox ready, device state:%s%s\n",
> +			md_status & CXLMDEV_DEV_FATAL ? " fatal" : "",
> +			md_status & CXLMDEV_FW_HALT ? " firmware-halt" : "");
> +		return -EIO;
> +	}
>  
>  	cxlds->mbox_send = cxl_pci_mbox_send;
>  	cxlds->payload_size =
>
Dan Williams Jan. 31, 2022, 11:11 p.m. UTC | #2
On Mon, Jan 31, 2022 at 2:21 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
>
> On 22-01-23 16:28:49, Dan Williams wrote:
> > From: Ben Widawsky <ben.widawsky@intel.com>
> >
> > The original driver implementation used the doorbell timeout for the
> > Mailbox Interface Ready bit to piggy back off of, since the latter does
> > not have a defined timeout. This functionality, introduced in commit
> > 8adaf747c9f0 ("cxl/mem: Find device capabilities"), needs improvement as
> > the recent "Add Mailbox Ready Time" ECN timeout indicates that the
> > mailbox ready time can be significantly longer that 2 seconds.
> >
> > While the specification limits the maximum timeout to 256s, the cxl_pci
> > driver gives up on the mailbox after 60s. This value corresponds with
> > important timeout values already present in the kernel. A module
> > parameter is provided as an emergency override and represents the
> > default Linux policy for all devices.
> >
> > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > [djbw: add modparam, drop check_device_status()]
> > Co-developed-by: Dan Williams <dan.j.williams@intel.com>
> > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> > ---
> >  drivers/cxl/pci.c |   35 +++++++++++++++++++++++++++++++++++
> >  1 file changed, 35 insertions(+)
> >
> > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> > index 8dc91fd3396a..ed8de9eac970 100644
> > --- a/drivers/cxl/pci.c
> > +++ b/drivers/cxl/pci.c
> > @@ -1,7 +1,9 @@
> >  // SPDX-License-Identifier: GPL-2.0-only
> >  /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
> >  #include <linux/io-64-nonatomic-lo-hi.h>
> > +#include <linux/moduleparam.h>
> >  #include <linux/module.h>
> > +#include <linux/delay.h>
> >  #include <linux/sizes.h>
> >  #include <linux/mutex.h>
> >  #include <linux/list.h>
> > @@ -35,6 +37,20 @@
> >  /* CXL 2.0 - 8.2.8.4 */
> >  #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
> >
> > +/*
> > + * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to
> > + * dictate how long to wait for the mailbox to become ready. The new
> > + * field allows the device to tell software the amount of time to wait
> > + * before mailbox ready. This field per the spec theoretically allows
> > + * for up to 255 seconds. 255 seconds is unreasonably long, its longer
> > + * than the maximum SATA port link recovery wait. Default to 60 seconds
> > + * until someone builds a CXL device that needs more time in practice.
> > + */
> > +static unsigned short mbox_ready_timeout = 60;
> > +module_param(mbox_ready_timeout, ushort, 0600);
>
> Any reason not to make it 0644?
>

Are there any tooling scenarios where this information is usable by non-root?

> > +MODULE_PARM_DESC(mbox_ready_timeout,
> > +              "seconds to wait for mailbox ready status");
> > +
> >  static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
> >  {
> >       const unsigned long start = jiffies;
> > @@ -281,6 +297,25 @@ static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *c
> >  static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
> >  {
> >       const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
> > +     unsigned long timeout;
> > +     u64 md_status;
> > +
> > +     timeout = jiffies + mbox_ready_timeout * HZ;
> > +     do {
> > +             md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
> > +             if (md_status & CXLMDEV_MBOX_IF_READY)
> > +                     break;
> > +             if (msleep_interruptible(100))
> > +                     break;
> > +     } while (!time_after(jiffies, timeout));
>
> Just pointing out the [probably] obvious. If the user specifies a zero second
> timeout, the code will still wait 100ms.

Sure, is that going to be a problem in practice? I expect the
overwhelming common case is that the mailbox is already ready by this
point, so it's a zero-wait.

>
> > +
> > +     if (!(md_status & CXLMDEV_MBOX_IF_READY)) {
> > +             dev_err(cxlds->dev,
> > +                     "timeout awaiting mailbox ready, device state:%s%s\n",
> > +                     md_status & CXLMDEV_DEV_FATAL ? " fatal" : "",
> > +                     md_status & CXLMDEV_FW_HALT ? " firmware-halt" : "");
> > +             return -EIO;
> > +     }
> >
> >       cxlds->mbox_send = cxl_pci_mbox_send;
> >       cxlds->payload_size =
> >
Ben Widawsky Jan. 31, 2022, 11:25 p.m. UTC | #3
On 22-01-31 15:11:05, Dan Williams wrote:
> On Mon, Jan 31, 2022 at 2:21 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
> >
> > On 22-01-23 16:28:49, Dan Williams wrote:
> > > From: Ben Widawsky <ben.widawsky@intel.com>
> > >
> > > The original driver implementation used the doorbell timeout for the
> > > Mailbox Interface Ready bit to piggy back off of, since the latter does
> > > not have a defined timeout. This functionality, introduced in commit
> > > 8adaf747c9f0 ("cxl/mem: Find device capabilities"), needs improvement as
> > > the recent "Add Mailbox Ready Time" ECN timeout indicates that the
> > > mailbox ready time can be significantly longer that 2 seconds.
> > >
> > > While the specification limits the maximum timeout to 256s, the cxl_pci
> > > driver gives up on the mailbox after 60s. This value corresponds with
> > > important timeout values already present in the kernel. A module
> > > parameter is provided as an emergency override and represents the
> > > default Linux policy for all devices.
> > >
> > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
> > > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > > [djbw: add modparam, drop check_device_status()]
> > > Co-developed-by: Dan Williams <dan.j.williams@intel.com>
> > > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> > > ---
> > >  drivers/cxl/pci.c |   35 +++++++++++++++++++++++++++++++++++
> > >  1 file changed, 35 insertions(+)
> > >
> > > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> > > index 8dc91fd3396a..ed8de9eac970 100644
> > > --- a/drivers/cxl/pci.c
> > > +++ b/drivers/cxl/pci.c
> > > @@ -1,7 +1,9 @@
> > >  // SPDX-License-Identifier: GPL-2.0-only
> > >  /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
> > >  #include <linux/io-64-nonatomic-lo-hi.h>
> > > +#include <linux/moduleparam.h>
> > >  #include <linux/module.h>
> > > +#include <linux/delay.h>
> > >  #include <linux/sizes.h>
> > >  #include <linux/mutex.h>
> > >  #include <linux/list.h>
> > > @@ -35,6 +37,20 @@
> > >  /* CXL 2.0 - 8.2.8.4 */
> > >  #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
> > >
> > > +/*
> > > + * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to
> > > + * dictate how long to wait for the mailbox to become ready. The new
> > > + * field allows the device to tell software the amount of time to wait
> > > + * before mailbox ready. This field per the spec theoretically allows
> > > + * for up to 255 seconds. 255 seconds is unreasonably long, its longer
> > > + * than the maximum SATA port link recovery wait. Default to 60 seconds
> > > + * until someone builds a CXL device that needs more time in practice.
> > > + */
> > > +static unsigned short mbox_ready_timeout = 60;
> > > +module_param(mbox_ready_timeout, ushort, 0600);
> >
> > Any reason not to make it 0644?
> >
> 
> Are there any tooling scenarios where this information is usable by non-root?

Just for ease of debug. If I get a bug report with this, first thing I'm going
to do is ask for the timeout value. Perhaps it's expected the person who filed
the bug will have root access.

> 
> > > +MODULE_PARM_DESC(mbox_ready_timeout,
> > > +              "seconds to wait for mailbox ready status");
> > > +
> > >  static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
> > >  {
> > >       const unsigned long start = jiffies;
> > > @@ -281,6 +297,25 @@ static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *c
> > >  static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
> > >  {
> > >       const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
> > > +     unsigned long timeout;
> > > +     u64 md_status;
> > > +
> > > +     timeout = jiffies + mbox_ready_timeout * HZ;
> > > +     do {
> > > +             md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
> > > +             if (md_status & CXLMDEV_MBOX_IF_READY)
> > > +                     break;
> > > +             if (msleep_interruptible(100))
> > > +                     break;
> > > +     } while (!time_after(jiffies, timeout));
> >
> > Just pointing out the [probably] obvious. If the user specifies a zero second
> > timeout, the code will still wait 100ms.
> 
> Sure, is that going to be a problem in practice? I expect the
> overwhelming common case is that the mailbox is already ready by this
> point, so it's a zero-wait.
> 

No problem I can see in practice.

> >
> > > +
> > > +     if (!(md_status & CXLMDEV_MBOX_IF_READY)) {
> > > +             dev_err(cxlds->dev,
> > > +                     "timeout awaiting mailbox ready, device state:%s%s\n",
> > > +                     md_status & CXLMDEV_DEV_FATAL ? " fatal" : "",
> > > +                     md_status & CXLMDEV_FW_HALT ? " firmware-halt" : "");
> > > +             return -EIO;
> > > +     }
> > >
> > >       cxlds->mbox_send = cxl_pci_mbox_send;
> > >       cxlds->payload_size =
> > >
Dan Williams Jan. 31, 2022, 11:47 p.m. UTC | #4
On Mon, Jan 31, 2022 at 3:25 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
>
> On 22-01-31 15:11:05, Dan Williams wrote:
> > On Mon, Jan 31, 2022 at 2:21 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
> > >
> > > On 22-01-23 16:28:49, Dan Williams wrote:
> > > > From: Ben Widawsky <ben.widawsky@intel.com>
> > > >
> > > > The original driver implementation used the doorbell timeout for the
> > > > Mailbox Interface Ready bit to piggy back off of, since the latter does
> > > > not have a defined timeout. This functionality, introduced in commit
> > > > 8adaf747c9f0 ("cxl/mem: Find device capabilities"), needs improvement as
> > > > the recent "Add Mailbox Ready Time" ECN timeout indicates that the
> > > > mailbox ready time can be significantly longer that 2 seconds.
> > > >
> > > > While the specification limits the maximum timeout to 256s, the cxl_pci
> > > > driver gives up on the mailbox after 60s. This value corresponds with
> > > > important timeout values already present in the kernel. A module
> > > > parameter is provided as an emergency override and represents the
> > > > default Linux policy for all devices.
> > > >
> > > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
> > > > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > > > [djbw: add modparam, drop check_device_status()]
> > > > Co-developed-by: Dan Williams <dan.j.williams@intel.com>
> > > > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> > > > ---
> > > >  drivers/cxl/pci.c |   35 +++++++++++++++++++++++++++++++++++
> > > >  1 file changed, 35 insertions(+)
> > > >
> > > > diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> > > > index 8dc91fd3396a..ed8de9eac970 100644
> > > > --- a/drivers/cxl/pci.c
> > > > +++ b/drivers/cxl/pci.c
> > > > @@ -1,7 +1,9 @@
> > > >  // SPDX-License-Identifier: GPL-2.0-only
> > > >  /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
> > > >  #include <linux/io-64-nonatomic-lo-hi.h>
> > > > +#include <linux/moduleparam.h>
> > > >  #include <linux/module.h>
> > > > +#include <linux/delay.h>
> > > >  #include <linux/sizes.h>
> > > >  #include <linux/mutex.h>
> > > >  #include <linux/list.h>
> > > > @@ -35,6 +37,20 @@
> > > >  /* CXL 2.0 - 8.2.8.4 */
> > > >  #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
> > > >
> > > > +/*
> > > > + * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to
> > > > + * dictate how long to wait for the mailbox to become ready. The new
> > > > + * field allows the device to tell software the amount of time to wait
> > > > + * before mailbox ready. This field per the spec theoretically allows
> > > > + * for up to 255 seconds. 255 seconds is unreasonably long, its longer
> > > > + * than the maximum SATA port link recovery wait. Default to 60 seconds
> > > > + * until someone builds a CXL device that needs more time in practice.
> > > > + */
> > > > +static unsigned short mbox_ready_timeout = 60;
> > > > +module_param(mbox_ready_timeout, ushort, 0600);
> > >
> > > Any reason not to make it 0644?
> > >
> >
> > Are there any tooling scenarios where this information is usable by non-root?
>
> Just for ease of debug. If I get a bug report with this, first thing I'm going
> to do is ask for the timeout value. Perhaps it's expected the person who filed
> the bug will have root access.

They would have already needed to be root to change it from the
default in the first instance, or the kernel command line from the
dmesg would show it being overridden. That said, there's nothing
security sensitive about emitting it, so 0644 it is.
diff mbox series

Patch

diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 8dc91fd3396a..ed8de9eac970 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -1,7 +1,9 @@ 
 // SPDX-License-Identifier: GPL-2.0-only
 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/moduleparam.h>
 #include <linux/module.h>
+#include <linux/delay.h>
 #include <linux/sizes.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
@@ -35,6 +37,20 @@ 
 /* CXL 2.0 - 8.2.8.4 */
 #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
 
+/*
+ * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to
+ * dictate how long to wait for the mailbox to become ready. The new
+ * field allows the device to tell software the amount of time to wait
+ * before mailbox ready. This field per the spec theoretically allows
+ * for up to 255 seconds. 255 seconds is unreasonably long, its longer
+ * than the maximum SATA port link recovery wait. Default to 60 seconds
+ * until someone builds a CXL device that needs more time in practice.
+ */
+static unsigned short mbox_ready_timeout = 60;
+module_param(mbox_ready_timeout, ushort, 0600);
+MODULE_PARM_DESC(mbox_ready_timeout,
+		 "seconds to wait for mailbox ready status");
+
 static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
 {
 	const unsigned long start = jiffies;
@@ -281,6 +297,25 @@  static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *c
 static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
 {
 	const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
+	unsigned long timeout;
+	u64 md_status;
+
+	timeout = jiffies + mbox_ready_timeout * HZ;
+	do {
+		md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
+		if (md_status & CXLMDEV_MBOX_IF_READY)
+			break;
+		if (msleep_interruptible(100))
+			break;
+	} while (!time_after(jiffies, timeout));
+
+	if (!(md_status & CXLMDEV_MBOX_IF_READY)) {
+		dev_err(cxlds->dev,
+			"timeout awaiting mailbox ready, device state:%s%s\n",
+			md_status & CXLMDEV_DEV_FATAL ? " fatal" : "",
+			md_status & CXLMDEV_FW_HALT ? " firmware-halt" : "");
+		return -EIO;
+	}
 
 	cxlds->mbox_send = cxl_pci_mbox_send;
 	cxlds->payload_size =