diff mbox series

[1/7] cxl/mbox: Add background cmd handling machinery

Message ID 20230224194652.1990604-2-dave@stgolabs.net
State Superseded
Headers show
Series cxl: Background cmds and device sanitation | expand

Commit Message

Davidlohr Bueso Feb. 24, 2023, 7:46 p.m. UTC
This adds support for handling background operations, as defined in
the CXL 3.0 spec. Commands that can take too long (over ~2 seconds)
can run in the background asynchronously (to the hardware). Currently
these are limited to Maintenance, transfer/activate Firmware, Scan
Media, Sanitize (aka overwrite), and VPPB bind/unbind.

The driver will deal with such commands synchronously, blocking
all other incoming commands for a specified period of time, allowing
time-slicing the command such that the caller can send incremental
requests to avoid monopolizing the driver/device. This approach
makes the code simpler, where any out of sync (timeout) between the
driver and hardware is just disregarded as an invalid state until
the next successful submission.

On devices where mbox interrupts are supported, this will still use
a poller that will wakeup in the specified wait intervals. The irq
handler will simply awake a blocked cmd, which is also safe vs a
task that is either waking (timing out) or already awoken.

Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
---
 drivers/cxl/cxl.h    |   7 +++
 drivers/cxl/cxlmem.h |   6 +++
 drivers/cxl/pci.c    | 100 +++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 109 insertions(+), 4 deletions(-)

Comments

Dave Jiang Feb. 28, 2023, 4:27 p.m. UTC | #1
On 2/24/23 12:46 PM, Davidlohr Bueso wrote:
> This adds support for handling background operations, as defined in
> the CXL 3.0 spec. Commands that can take too long (over ~2 seconds)
> can run in the background asynchronously (to the hardware). Currently
> these are limited to Maintenance, transfer/activate Firmware, Scan
> Media, Sanitize (aka overwrite), and VPPB bind/unbind.
> 
> The driver will deal with such commands synchronously, blocking
> all other incoming commands for a specified period of time, allowing
> time-slicing the command such that the caller can send incremental
> requests to avoid monopolizing the driver/device. This approach
> makes the code simpler, where any out of sync (timeout) between the
> driver and hardware is just disregarded as an invalid state until
> the next successful submission.
> 
> On devices where mbox interrupts are supported, this will still use
> a poller that will wakeup in the specified wait intervals. The irq
> handler will simply awake a blocked cmd, which is also safe vs a
> task that is either waking (timing out) or already awoken.
> 
> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>

Just a minor comment inline. Otherwise
Reviewed-by: Dave Jiang <dave.jiang@intel.com>

> ---
>   drivers/cxl/cxl.h    |   7 +++
>   drivers/cxl/cxlmem.h |   6 +++
>   drivers/cxl/pci.c    | 100 +++++++++++++++++++++++++++++++++++++++++--
>   3 files changed, 109 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index d853a0238ad7..b834e55375e3 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -176,14 +176,21 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
>   /* CXL 2.0 8.2.8.4 Mailbox Registers */
>   #define CXLDEV_MBOX_CAPS_OFFSET 0x00
>   #define   CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK GENMASK(4, 0)
> +#define   CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK GENMASK(10, 7)
> +#define   CXLDEV_MBOX_CAP_BG_CMD_IRQ BIT(6)
>   #define CXLDEV_MBOX_CTRL_OFFSET 0x04
>   #define   CXLDEV_MBOX_CTRL_DOORBELL BIT(0)
> +#define   CXLDEV_MBOX_CTRL_BG_CMD_IRQ BIT(2)
>   #define CXLDEV_MBOX_CMD_OFFSET 0x08
>   #define   CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK GENMASK_ULL(15, 0)
>   #define   CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK GENMASK_ULL(36, 16)
>   #define CXLDEV_MBOX_STATUS_OFFSET 0x10
> +#define   CXLDEV_MBOX_STATUS_BG_CMD BIT(0)
>   #define   CXLDEV_MBOX_STATUS_RET_CODE_MASK GENMASK_ULL(47, 32)
>   #define CXLDEV_MBOX_BG_CMD_STATUS_OFFSET 0x18
> +#define   CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK GENMASK_ULL(15, 0)
> +#define   CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK GENMASK_ULL(22, 16)
> +#define   CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK GENMASK_ULL(47, 32)
>   #define CXLDEV_MBOX_PAYLOAD_OFFSET 0x20
>   
>   /*
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index ccbafc05a636..934076254d52 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -108,6 +108,9 @@ static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
>    *            variable sized output commands, it tells the exact number of bytes
>    *            written.
>    * @min_out: (input) internal command output payload size validation
> + * @poll_count: (input)  Number of timeouts to attempt.
> + * @poll_interval: (input) Number of ms between mailbox background command
> + *                 polling intervals timeouts.
>    * @return_code: (output) Error code returned from hardware.
>    *
>    * This is the primary mechanism used to send commands to the hardware.
> @@ -123,6 +126,8 @@ struct cxl_mbox_cmd {
>   	size_t size_in;
>   	size_t size_out;
>   	size_t min_out;
> +	int poll_count;
> +	u64 poll_interval;
>   	u16 return_code;
>   };
>   
> @@ -322,6 +327,7 @@ enum cxl_opcode {
>   	CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS	= 0x4303,
>   	CXL_MBOX_OP_SCAN_MEDIA		= 0x4304,
>   	CXL_MBOX_OP_GET_SCAN_MEDIA	= 0x4305,
> +	CXL_MBOX_OP_SANITIZE		= 0x4400,
>   	CXL_MBOX_OP_GET_SECURITY_STATE	= 0x4500,
>   	CXL_MBOX_OP_SET_PASSPHRASE	= 0x4501,
>   	CXL_MBOX_OP_DISABLE_PASSPHRASE	= 0x4502,
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 60b23624d167..26b6105e2797 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -52,6 +52,8 @@ static unsigned short mbox_ready_timeout = 60;
>   module_param(mbox_ready_timeout, ushort, 0644);
>   MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready");
>   
> +static DECLARE_WAIT_QUEUE_HEAD(mbox_wait);
> +
>   static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
>   {
>   	const unsigned long start = jiffies;
> @@ -85,6 +87,25 @@ static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
>   			    status & CXLMDEV_DEV_FATAL ? " fatal" : "",        \
>   			    status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
>   
> +static irqreturn_t cxl_mbox_irq(int irq, void *id)
> +{
> +	/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
> +	wake_up(&mbox_wait);
> +	return IRQ_HANDLED;
> +}
> +
> +static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds)
> +{
> +	u64 bgcmd_status_reg;
> +	u32 pct;
> +
> +	bgcmd_status_reg = readq(cxlds->regs.mbox +
> +				 CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
> +	pct = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, bgcmd_status_reg);
> +
> +	return pct == 100;
> +}
> +
>   /**
>    * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
>    * @cxlds: The device state to communicate with.
> @@ -178,6 +199,56 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
>   	mbox_cmd->return_code =
>   		FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
>   
> +	/*
> +	 * Handle the background command in a synchronous manner.
> +	 *
> +	 * All other mailbox commands will serialize/queue on the mbox_mutex,
> +	 * which we currently hold. Furthermore this also guarantees that
> +	 * cxl_mbox_background_complete() checks are safe amongst each other,
> +	 * in that no new bg operation can occur in between.
> +	 *
> +	 * With the exception of special cases that merit monopolizing the
> +	 * driver/device, bg operations are timesliced in accordance with
> +	 * the nature of the command being sent.
> +	 *
> +	 * In the event of timeout, the mailbox state is indeterminate
> +	 * until the next successful command submission and the driver
> +	 * can get back in sync with the hardware state.
> +	 */
> +	if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) {
> +		u64 bg_status_reg;
> +		const bool timeslice = mbox_cmd->opcode != CXL_MBOX_OP_SANITIZE;
> +
> +		dev_dbg(dev, "Mailbox background operation started\n");
> +
> +		while (1) {
> +			if (wait_event_interruptible_timeout(
> +				mbox_wait, cxl_mbox_background_complete(cxlds),
> +				msecs_to_jiffies(mbox_cmd->poll_interval)) > 0)
> +				break;
> +
> +			if (timeslice && !--mbox_cmd->poll_count)
> +				break;
> +		}
> +
> +		if (!cxl_mbox_background_complete(cxlds)) {
> +			u64 md_status =
> +				readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
> +
> +			cxl_cmd_err(cxlds->dev, mbox_cmd, md_status,
> +				    "background timeout");
> +			return -ETIMEDOUT;
> +		}
> +
> +		bg_status_reg = readq(cxlds->regs.mbox +
> +				      CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
> +		mbox_cmd->return_code =
> +			FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK,
> +				  bg_status_reg);
> +
> +		dev_dbg(dev, "Mailbox background operation completed\n");

May be helpful to also emit the return_code in case of errors.

DJ

> +	}
> +
>   	if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) {
>   		dev_dbg(dev, "Mailbox operation had an error: %s\n",
>   			cxl_mbox_cmd_rc2str(mbox_cmd));
> @@ -222,8 +293,11 @@ static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *c
>   static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
>   {
>   	const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
> +	struct device *dev = cxlds->dev;
> +	struct pci_dev *pdev = to_pci_dev(dev);
>   	unsigned long timeout;
>   	u64 md_status;
> +	int rc, irq;
>   
>   	timeout = jiffies + mbox_ready_timeout * HZ;
>   	do {
> @@ -272,6 +346,24 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
>   	dev_dbg(cxlds->dev, "Mailbox payload sized %zu",
>   		cxlds->payload_size);
>   
> +	if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ)) {
> +		dev_dbg(dev, "Only Mailbox polling is supported");
> +		return 0;
> +	}
> +
> +	irq = pci_irq_vector(pdev,
> +			     FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap));
> +	if (irq < 0)
> +		return irq;
> +
> +	rc = devm_request_irq(dev, irq, cxl_mbox_irq,
> +			      IRQF_SHARED, "mailbox", cxlds);
> +	if (rc)
> +		return rc;
> +
> +	writel(CXLDEV_MBOX_CTRL_BG_CMD_IRQ,
> +	       cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
> +
>   	return 0;
>   }
>   
> @@ -757,6 +849,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>   	if (rc)
>   		dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
>   
> +	rc = cxl_alloc_irq_vectors(pdev);
> +	if (rc)
> +		return rc;
> +
>   	rc = cxl_pci_setup_mailbox(cxlds);
>   	if (rc)
>   		return rc;
> @@ -777,10 +873,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>   	if (rc)
>   		return rc;
>   
> -	rc = cxl_alloc_irq_vectors(pdev);
> -	if (rc)
> -		return rc;
> -
>   	cxlmd = devm_cxl_add_memdev(cxlds);
>   	if (IS_ERR(cxlmd))
>   		return PTR_ERR(cxlmd);
Davidlohr Bueso Feb. 28, 2023, 8:18 p.m. UTC | #2
On Tue, 28 Feb 2023, Dave Jiang wrote:
>>+
>>+		dev_dbg(dev, "Mailbox background operation completed\n");
>
>May be helpful to also emit the return_code in case of errors.

Agreed, but that's given next:

>
>>+	}
>>+
>>	if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) {
>>		dev_dbg(dev, "Mailbox operation had an error: %s\n",
>>			cxl_mbox_cmd_rc2str(mbox_cmd));

here.

Thanks,
Davidlohr
Dave Jiang Feb. 28, 2023, 11:35 p.m. UTC | #3
On 2/28/23 1:18 PM, Davidlohr Bueso wrote:
> On Tue, 28 Feb 2023, Dave Jiang wrote:
>>> +
>>> +        dev_dbg(dev, "Mailbox background operation completed\n");
>>
>> May be helpful to also emit the return_code in case of errors.
> 
> Agreed, but that's given next:
> 
>>
>>> +    }
>>> +
>>>     if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) {
>>>         dev_dbg(dev, "Mailbox operation had an error: %s\n",
>>>             cxl_mbox_cmd_rc2str(mbox_cmd));
> 
> here.

Ah ok.

> 
> Thanks,
> Davidlohr
Dan Williams March 27, 2023, 9:57 p.m. UTC | #4
Davidlohr Bueso wrote:

Hi Davidlohr, I am finally circling back to this, some comments below,
but I think this is looking good.

> This adds support for handling background operations, as defined in
> the CXL 3.0 spec. Commands that can take too long (over ~2 seconds)
> can run in the background asynchronously (to the hardware). Currently
> these are limited to Maintenance, transfer/activate Firmware, Scan
> Media, Sanitize (aka overwrite), and VPPB bind/unbind.
> 
> The driver will deal with such commands synchronously, blocking
> all other incoming commands for a specified period of time, allowing
> time-slicing the command such that the caller can send incremental
> requests to avoid monopolizing the driver/device. This approach
> makes the code simpler, where any out of sync (timeout) between the
> driver and hardware is just disregarded as an invalid state until
> the next successful submission.
> 
> On devices where mbox interrupts are supported, this will still use
> a poller that will wakeup in the specified wait intervals. The irq
> handler will simply awake a blocked cmd, which is also safe vs a
> task that is either waking (timing out) or already awoken.
> 
> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
> ---
>  drivers/cxl/cxl.h    |   7 +++
>  drivers/cxl/cxlmem.h |   6 +++
>  drivers/cxl/pci.c    | 100 +++++++++++++++++++++++++++++++++++++++++--
>  3 files changed, 109 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index d853a0238ad7..b834e55375e3 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -176,14 +176,21 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
>  /* CXL 2.0 8.2.8.4 Mailbox Registers */
>  #define CXLDEV_MBOX_CAPS_OFFSET 0x00
>  #define   CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK GENMASK(4, 0)
> +#define   CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK GENMASK(10, 7)
> +#define   CXLDEV_MBOX_CAP_BG_CMD_IRQ BIT(6)
>  #define CXLDEV_MBOX_CTRL_OFFSET 0x04
>  #define   CXLDEV_MBOX_CTRL_DOORBELL BIT(0)
> +#define   CXLDEV_MBOX_CTRL_BG_CMD_IRQ BIT(2)
>  #define CXLDEV_MBOX_CMD_OFFSET 0x08
>  #define   CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK GENMASK_ULL(15, 0)
>  #define   CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK GENMASK_ULL(36, 16)
>  #define CXLDEV_MBOX_STATUS_OFFSET 0x10
> +#define   CXLDEV_MBOX_STATUS_BG_CMD BIT(0)
>  #define   CXLDEV_MBOX_STATUS_RET_CODE_MASK GENMASK_ULL(47, 32)
>  #define CXLDEV_MBOX_BG_CMD_STATUS_OFFSET 0x18
> +#define   CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK GENMASK_ULL(15, 0)
> +#define   CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK GENMASK_ULL(22, 16)
> +#define   CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK GENMASK_ULL(47, 32)
>  #define CXLDEV_MBOX_PAYLOAD_OFFSET 0x20
>  
>  /*
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index ccbafc05a636..934076254d52 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -108,6 +108,9 @@ static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
>   *            variable sized output commands, it tells the exact number of bytes
>   *            written.
>   * @min_out: (input) internal command output payload size validation
> + * @poll_count: (input)  Number of timeouts to attempt.
> + * @poll_interval: (input) Number of ms between mailbox background command
> + *                 polling intervals timeouts.
>   * @return_code: (output) Error code returned from hardware.
>   *
>   * This is the primary mechanism used to send commands to the hardware.
> @@ -123,6 +126,8 @@ struct cxl_mbox_cmd {
>  	size_t size_in;
>  	size_t size_out;
>  	size_t min_out;
> +	int poll_count;
> +	u64 poll_interval;

An int will do, right? This value should likely never be above 1000.

>  	u16 return_code;
>  };
>  
> @@ -322,6 +327,7 @@ enum cxl_opcode {
>  	CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS	= 0x4303,
>  	CXL_MBOX_OP_SCAN_MEDIA		= 0x4304,
>  	CXL_MBOX_OP_GET_SCAN_MEDIA	= 0x4305,
> +	CXL_MBOX_OP_SANITIZE		= 0x4400,
>  	CXL_MBOX_OP_GET_SECURITY_STATE	= 0x4500,
>  	CXL_MBOX_OP_SET_PASSPHRASE	= 0x4501,
>  	CXL_MBOX_OP_DISABLE_PASSPHRASE	= 0x4502,
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 60b23624d167..26b6105e2797 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -52,6 +52,8 @@ static unsigned short mbox_ready_timeout = 60;
>  module_param(mbox_ready_timeout, ushort, 0644);
>  MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready");
>  
> +static DECLARE_WAIT_QUEUE_HEAD(mbox_wait);
> +
>  static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
>  {
>  	const unsigned long start = jiffies;
> @@ -85,6 +87,25 @@ static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
>  			    status & CXLMDEV_DEV_FATAL ? " fatal" : "",        \
>  			    status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
>  
> +static irqreturn_t cxl_mbox_irq(int irq, void *id)
> +{
> +	/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
> +	wake_up(&mbox_wait);
> +	return IRQ_HANDLED;
> +}
> +
> +static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds)
> +{
> +	u64 bgcmd_status_reg;
> +	u32 pct;
> +
> +	bgcmd_status_reg = readq(cxlds->regs.mbox +
> +				 CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
> +	pct = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, bgcmd_status_reg);
> +
> +	return pct == 100;
> +}
> +
>  /**
>   * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
>   * @cxlds: The device state to communicate with.
> @@ -178,6 +199,56 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
>  	mbox_cmd->return_code =
>  		FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
>  
> +	/*
> +	 * Handle the background command in a synchronous manner.
> +	 *
> +	 * All other mailbox commands will serialize/queue on the mbox_mutex,
> +	 * which we currently hold. Furthermore this also guarantees that
> +	 * cxl_mbox_background_complete() checks are safe amongst each other,
> +	 * in that no new bg operation can occur in between.
> +	 *
> +	 * With the exception of special cases that merit monopolizing the
> +	 * driver/device, bg operations are timesliced in accordance with
> +	 * the nature of the command being sent.
> +	 *
> +	 * In the event of timeout, the mailbox state is indeterminate
> +	 * until the next successful command submission and the driver
> +	 * can get back in sync with the hardware state.
> +	 */
> +	if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) {
> +		u64 bg_status_reg;
> +		const bool timeslice = mbox_cmd->opcode != CXL_MBOX_OP_SANITIZE;

I understand that sanitize is special, but it feels out of place for
this to never give up polling when sanitize is in progress.

Yes, sanitize is a special case because the device is media disabled and
not much else can happen with it. However because of that I expect the
admin thread that submitted it may be different than the thread that
wants to check the status and re-enable the device.

cxl disable-memdev memX
echo 1 > /sys/bus/cxl/devices/memX/security/sanitize
<select()/poll() on /sys/bus/cxl/devices/memX/security/sanitize>
cxl enable-memdev memX

Per the spec other foreground commands can be allowed through while
sanitize is in flight and the device is expected to fail them. With
background commands I think the need to be careful and queue them up to
avoid clobbering the background status goes away with santize. If the
device has sanitize in progress the driver can just reject/fail new
commands with the "Background Operation" effect until it has had a
chance to reap the sanitize result.

> +
> +		dev_dbg(dev, "Mailbox background operation started\n");
> +
> +		while (1) {
> +			if (wait_event_interruptible_timeout(
> +				mbox_wait, cxl_mbox_background_complete(cxlds),
> +				msecs_to_jiffies(mbox_cmd->poll_interval)) > 0)
> +				break;

Given that this does not check for -EINTR what is the rationale for
making the wait interruptible?

> +
> +			if (timeslice && !--mbox_cmd->poll_count)
> +				break;

Per-above all commands that the kernel would synchronously wait for are
always timesliced, and sanitize has special behavior.

> +		}
> +
> +		if (!cxl_mbox_background_complete(cxlds)) {
> +			u64 md_status =
> +				readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
> +
> +			cxl_cmd_err(cxlds->dev, mbox_cmd, md_status,
> +				    "background timeout");
> +			return -ETIMEDOUT;
> +		}
> +
> +		bg_status_reg = readq(cxlds->regs.mbox +
> +				      CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
> +		mbox_cmd->return_code =
> +			FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK,
> +				  bg_status_reg);
> +
> +		dev_dbg(dev, "Mailbox background operation completed\n");

Might be nice to include the opcode here and the other dev_dbg() above.

> +	}
> +
>  	if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) {
>  		dev_dbg(dev, "Mailbox operation had an error: %s\n",
>  			cxl_mbox_cmd_rc2str(mbox_cmd));
> @@ -222,8 +293,11 @@ static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *c
>  static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
>  {
>  	const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
> +	struct device *dev = cxlds->dev;
> +	struct pci_dev *pdev = to_pci_dev(dev);
>  	unsigned long timeout;
>  	u64 md_status;
> +	int rc, irq;
>  
>  	timeout = jiffies + mbox_ready_timeout * HZ;
>  	do {
> @@ -272,6 +346,24 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
>  	dev_dbg(cxlds->dev, "Mailbox payload sized %zu",
>  		cxlds->payload_size);
>  
> +	if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ)) {
> +		dev_dbg(dev, "Only Mailbox polling is supported");
> +		return 0;
> +	}
> +
> +	irq = pci_irq_vector(pdev,
> +			     FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap));
> +	if (irq < 0)
> +		return irq;
> +
> +	rc = devm_request_irq(dev, irq, cxl_mbox_irq,
> +			      IRQF_SHARED, "mailbox", cxlds);
> +	if (rc)
> +		return rc;
> +
> +	writel(CXLDEV_MBOX_CTRL_BG_CMD_IRQ,
> +	       cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
> +
>  	return 0;
>  }
>  
> @@ -757,6 +849,10 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	if (rc)
>  		dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
>  
> +	rc = cxl_alloc_irq_vectors(pdev);
> +	if (rc)
> +		return rc;
> +
>  	rc = cxl_pci_setup_mailbox(cxlds);
>  	if (rc)
>  		return rc;
> @@ -777,10 +873,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	if (rc)
>  		return rc;
>  
> -	rc = cxl_alloc_irq_vectors(pdev);
> -	if (rc)
> -		return rc;
> -

This hunk wants to be its own lead-in patch so it can be bisected
independently of introducing background command support.
diff mbox series

Patch

diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index d853a0238ad7..b834e55375e3 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -176,14 +176,21 @@  static inline int ways_to_eiw(unsigned int ways, u8 *eiw)
 /* CXL 2.0 8.2.8.4 Mailbox Registers */
 #define CXLDEV_MBOX_CAPS_OFFSET 0x00
 #define   CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK GENMASK(4, 0)
+#define   CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK GENMASK(10, 7)
+#define   CXLDEV_MBOX_CAP_BG_CMD_IRQ BIT(6)
 #define CXLDEV_MBOX_CTRL_OFFSET 0x04
 #define   CXLDEV_MBOX_CTRL_DOORBELL BIT(0)
+#define   CXLDEV_MBOX_CTRL_BG_CMD_IRQ BIT(2)
 #define CXLDEV_MBOX_CMD_OFFSET 0x08
 #define   CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK GENMASK_ULL(15, 0)
 #define   CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK GENMASK_ULL(36, 16)
 #define CXLDEV_MBOX_STATUS_OFFSET 0x10
+#define   CXLDEV_MBOX_STATUS_BG_CMD BIT(0)
 #define   CXLDEV_MBOX_STATUS_RET_CODE_MASK GENMASK_ULL(47, 32)
 #define CXLDEV_MBOX_BG_CMD_STATUS_OFFSET 0x18
+#define   CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK GENMASK_ULL(15, 0)
+#define   CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK GENMASK_ULL(22, 16)
+#define   CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK GENMASK_ULL(47, 32)
 #define CXLDEV_MBOX_PAYLOAD_OFFSET 0x20
 
 /*
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index ccbafc05a636..934076254d52 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -108,6 +108,9 @@  static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
  *            variable sized output commands, it tells the exact number of bytes
  *            written.
  * @min_out: (input) internal command output payload size validation
+ * @poll_count: (input)  Number of timeouts to attempt.
+ * @poll_interval: (input) Number of ms between mailbox background command
+ *                 polling intervals timeouts.
  * @return_code: (output) Error code returned from hardware.
  *
  * This is the primary mechanism used to send commands to the hardware.
@@ -123,6 +126,8 @@  struct cxl_mbox_cmd {
 	size_t size_in;
 	size_t size_out;
 	size_t min_out;
+	int poll_count;
+	u64 poll_interval;
 	u16 return_code;
 };
 
@@ -322,6 +327,7 @@  enum cxl_opcode {
 	CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS	= 0x4303,
 	CXL_MBOX_OP_SCAN_MEDIA		= 0x4304,
 	CXL_MBOX_OP_GET_SCAN_MEDIA	= 0x4305,
+	CXL_MBOX_OP_SANITIZE		= 0x4400,
 	CXL_MBOX_OP_GET_SECURITY_STATE	= 0x4500,
 	CXL_MBOX_OP_SET_PASSPHRASE	= 0x4501,
 	CXL_MBOX_OP_DISABLE_PASSPHRASE	= 0x4502,
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 60b23624d167..26b6105e2797 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -52,6 +52,8 @@  static unsigned short mbox_ready_timeout = 60;
 module_param(mbox_ready_timeout, ushort, 0644);
 MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready");
 
+static DECLARE_WAIT_QUEUE_HEAD(mbox_wait);
+
 static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
 {
 	const unsigned long start = jiffies;
@@ -85,6 +87,25 @@  static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
 			    status & CXLMDEV_DEV_FATAL ? " fatal" : "",        \
 			    status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
 
+static irqreturn_t cxl_mbox_irq(int irq, void *id)
+{
+	/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
+	wake_up(&mbox_wait);
+	return IRQ_HANDLED;
+}
+
+static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds)
+{
+	u64 bgcmd_status_reg;
+	u32 pct;
+
+	bgcmd_status_reg = readq(cxlds->regs.mbox +
+				 CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
+	pct = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, bgcmd_status_reg);
+
+	return pct == 100;
+}
+
 /**
  * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
  * @cxlds: The device state to communicate with.
@@ -178,6 +199,56 @@  static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
 	mbox_cmd->return_code =
 		FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
 
+	/*
+	 * Handle the background command in a synchronous manner.
+	 *
+	 * All other mailbox commands will serialize/queue on the mbox_mutex,
+	 * which we currently hold. Furthermore this also guarantees that
+	 * cxl_mbox_background_complete() checks are safe amongst each other,
+	 * in that no new bg operation can occur in between.
+	 *
+	 * With the exception of special cases that merit monopolizing the
+	 * driver/device, bg operations are timesliced in accordance with
+	 * the nature of the command being sent.
+	 *
+	 * In the event of timeout, the mailbox state is indeterminate
+	 * until the next successful command submission and the driver
+	 * can get back in sync with the hardware state.
+	 */
+	if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) {
+		u64 bg_status_reg;
+		const bool timeslice = mbox_cmd->opcode != CXL_MBOX_OP_SANITIZE;
+
+		dev_dbg(dev, "Mailbox background operation started\n");
+
+		while (1) {
+			if (wait_event_interruptible_timeout(
+				mbox_wait, cxl_mbox_background_complete(cxlds),
+				msecs_to_jiffies(mbox_cmd->poll_interval)) > 0)
+				break;
+
+			if (timeslice && !--mbox_cmd->poll_count)
+				break;
+		}
+
+		if (!cxl_mbox_background_complete(cxlds)) {
+			u64 md_status =
+				readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
+
+			cxl_cmd_err(cxlds->dev, mbox_cmd, md_status,
+				    "background timeout");
+			return -ETIMEDOUT;
+		}
+
+		bg_status_reg = readq(cxlds->regs.mbox +
+				      CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
+		mbox_cmd->return_code =
+			FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK,
+				  bg_status_reg);
+
+		dev_dbg(dev, "Mailbox background operation completed\n");
+	}
+
 	if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) {
 		dev_dbg(dev, "Mailbox operation had an error: %s\n",
 			cxl_mbox_cmd_rc2str(mbox_cmd));
@@ -222,8 +293,11 @@  static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *c
 static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
 {
 	const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
+	struct device *dev = cxlds->dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
 	unsigned long timeout;
 	u64 md_status;
+	int rc, irq;
 
 	timeout = jiffies + mbox_ready_timeout * HZ;
 	do {
@@ -272,6 +346,24 @@  static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
 	dev_dbg(cxlds->dev, "Mailbox payload sized %zu",
 		cxlds->payload_size);
 
+	if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ)) {
+		dev_dbg(dev, "Only Mailbox polling is supported");
+		return 0;
+	}
+
+	irq = pci_irq_vector(pdev,
+			     FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap));
+	if (irq < 0)
+		return irq;
+
+	rc = devm_request_irq(dev, irq, cxl_mbox_irq,
+			      IRQF_SHARED, "mailbox", cxlds);
+	if (rc)
+		return rc;
+
+	writel(CXLDEV_MBOX_CTRL_BG_CMD_IRQ,
+	       cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
+
 	return 0;
 }
 
@@ -757,6 +849,10 @@  static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (rc)
 		dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
 
+	rc = cxl_alloc_irq_vectors(pdev);
+	if (rc)
+		return rc;
+
 	rc = cxl_pci_setup_mailbox(cxlds);
 	if (rc)
 		return rc;
@@ -777,10 +873,6 @@  static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (rc)
 		return rc;
 
-	rc = cxl_alloc_irq_vectors(pdev);
-	if (rc)
-		return rc;
-
 	cxlmd = devm_cxl_add_memdev(cxlds);
 	if (IS_ERR(cxlmd))
 		return PTR_ERR(cxlmd);