diff mbox series

[3/7] cxl/mbox: Add sanitation handling machinery

Message ID 20230612181038.14421-4-dave@stgolabs.net
State Accepted
Commit 0c36b6ad436a38b167af16e6c690c890b8b2df62
Headers show
Series cxl: Support device sanitation | expand

Commit Message

Davidlohr Bueso June 12, 2023, 6:10 p.m. UTC
Sanitation is by definition a device-monopolizing operation, and thus
the timeslicing rules for other background commands do not apply.
As such handle this special case asynchronously and return immediately.
Subsequent changes will allow completion to be pollable from userspace
via a sysfs file interface.

For devices that don't support interrupts for notifying background
command completion, self-poll with the caveat that the poller can
be out of sync with the ready hardware, and therefore care must be
taken to not allow any new commands to go through until the poller
sees the hw completion. The poller takes the mbox_mutex to stabilize
the flagging, minimizing any runtime overhead in the send path to
check for 'sanitize_tmo' for uncommon poll scenarios.

The irq case is much simpler as hardware will serialize/error
appropriately.

Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
---
 drivers/cxl/core/memdev.c | 10 +++++
 drivers/cxl/cxlmem.h      |  7 ++++
 drivers/cxl/pci.c         | 77 +++++++++++++++++++++++++++++++++++++--
 3 files changed, 91 insertions(+), 3 deletions(-)

Comments

Jonathan Cameron June 13, 2023, 4:07 p.m. UTC | #1
On Mon, 12 Jun 2023 11:10:34 -0700
Davidlohr Bueso <dave@stgolabs.net> wrote:

> Sanitation is by definition a device-monopolizing operation, and thus
> the timeslicing rules for other background commands do not apply.
> As such handle this special case asynchronously and return immediately.
> Subsequent changes will allow completion to be pollable from userspace
> via a sysfs file interface.
> 
> For devices that don't support interrupts for notifying background
> command completion, self-poll with the caveat that the poller can
> be out of sync with the ready hardware, and therefore care must be
> taken to not allow any new commands to go through until the poller
> sees the hw completion. The poller takes the mbox_mutex to stabilize
> the flagging, minimizing any runtime overhead in the send path to
> check for 'sanitize_tmo' for uncommon poll scenarios.
> 
> The irq case is much simpler as hardware will serialize/error
> appropriately.
> 
> Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>

Not updated the check against -1 for whether to poll or not.

Which I'm guessing is the bug I'm seeing whilst testing this on qemu

> ---
>  drivers/cxl/core/memdev.c | 10 +++++
>  drivers/cxl/cxlmem.h      |  7 ++++
>  drivers/cxl/pci.c         | 77 +++++++++++++++++++++++++++++++++++++--
>  3 files changed, 91 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index 1bbb7e39fc93..834f418b6bcb 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -460,11 +460,21 @@ void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cm
>  }
>  EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL);
>  
> +static void cxl_memdev_security_shutdown(struct device *dev)
> +{
> +	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> +	struct cxl_dev_state *cxlds = cxlmd->cxlds;
> +
> +	if (cxlds->security.poll)
> +		cancel_delayed_work_sync(&cxlds->security.poll_dwork);
> +}
> +
>  static void cxl_memdev_shutdown(struct device *dev)
>  {
>  	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
>  
>  	down_write(&cxl_memdev_rwsem);
> +	cxl_memdev_security_shutdown(dev);
>  	cxlmd->cxlds = NULL;
>  	up_write(&cxl_memdev_rwsem);
>  }
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 091f1200736b..3a9df1044144 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -264,9 +264,15 @@ struct cxl_poison_state {
>   * struct cxl_security_state - Device security state
>   *
>   * @state: state of last security operation
> + * @poll: polling for sanitation is enabled, device has no mbox irq support
> + * @poll_tmo_secs: polling timeout
> + * @poll_dwork: polling work item
>   */
>  struct cxl_security_state {
>  	unsigned long state;
> +	bool poll;
> +	int poll_tmo_secs;
> +	struct delayed_work poll_dwork;
>  };
>  
>  /**
> @@ -379,6 +385,7 @@ enum cxl_opcode {
>  	CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS	= 0x4303,
>  	CXL_MBOX_OP_SCAN_MEDIA		= 0x4304,
>  	CXL_MBOX_OP_GET_SCAN_MEDIA	= 0x4305,
> +	CXL_MBOX_OP_SANITIZE		= 0x4400,
>  	CXL_MBOX_OP_GET_SECURITY_STATE	= 0x4500,
>  	CXL_MBOX_OP_SET_PASSPHRASE	= 0x4501,
>  	CXL_MBOX_OP_DISABLE_PASSPHRASE	= 0x4502,
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 4b2575502f49..c92eab55a5a7 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -115,18 +115,52 @@ static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds)
>  
>  static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
>  {
> +	u64 reg;
> +	u16 opcode;
>  	struct cxl_dev_id *dev_id = id;
>  	struct cxl_dev_state *cxlds = dev_id->cxlds;
>  
>  	if (!cxl_mbox_background_complete(cxlds))
>  		return IRQ_NONE;
>  
> -	/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
> -	rcuwait_wake_up(&cxlds->mbox_wait);
> +	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
> +	opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg);
> +	if (opcode == CXL_MBOX_OP_SANITIZE) {
> +		dev_dbg(cxlds->dev, "Sanitation operation ended\n");
> +	} else {
> +		/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
> +		rcuwait_wake_up(&cxlds->mbox_wait);
> +	}
>  
>  	return IRQ_HANDLED;
>  }
>  
> +/*
> + * Sanitation operation polling mode.
> + */
> +static void cxl_mbox_sanitize_work(struct work_struct *work)
> +{
> +	struct cxl_dev_state *cxlds;
> +
> +	cxlds = container_of(work,
> +			     struct cxl_dev_state, security.poll_dwork.work);
> +
> +	mutex_lock(&cxlds->mbox_mutex);
> +	if (cxl_mbox_background_complete(cxlds)) {
> +		cxlds->security.poll_tmo_secs = 0;
> +		put_device(cxlds->dev);
> +
> +		dev_dbg(cxlds->dev, "Sanitation operation ended\n");
> +	} else {
> +		int timeout = cxlds->security.poll_tmo_secs + 10;
> +
> +		cxlds->security.poll_tmo_secs = min(15 * 60, timeout);
> +		queue_delayed_work(system_wq, &cxlds->security.poll_dwork,
> +				   timeout * HZ);
> +	}
> +	mutex_unlock(&cxlds->mbox_mutex);
> +}
> +
>  /**
>   * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
>   * @cxlds: The device state to communicate with.
> @@ -187,6 +221,16 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
>  		return -EBUSY;
>  	}
>  
> +	/*
> +	 * With sanitize polling, hardware might be done and the poller still
> +	 * not be in sync. Ensure no new command comes in until so. Keep the
> +	 * hardware semantics and only allow device health status.
> +	 */
> +	if (unlikely(cxlds->security.poll_tmo_secs > 0)) {
> +		if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO)
> +			return -EBUSY;
> +	}
> +
>  	cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
>  			     mbox_cmd->opcode);
>  	if (mbox_cmd->size_in) {
> @@ -235,11 +279,34 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
>  	 */
>  	if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) {
>  		u64 bg_status_reg;
> -		int i, timeout = mbox_cmd->poll_interval_ms;
> +		int i, timeout;
> +
> +		/*
> ++		 * Sanitation is a special case which monopolizes the device
> +		 * and cannot be timesliced. Handle asynchronously instead,
> +		 * and allow userspace to poll(2) for completion.
> +		 */
> +		if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) {
> +			if (cxlds->security.poll_tmo_secs != -1) {
Should be checking your new poll boolean.

Jonathan
> +				/* hold the device throughout */
Davidlohr Bueso June 13, 2023, 4:28 p.m. UTC | #2
On Tue, 13 Jun 2023, Jonathan Cameron wrote:

>> +		if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) {
>> +			if (cxlds->security.poll_tmo_secs != -1) {
>Should be checking your new poll boolean.

Yes, again sorry for the oversight.

Thanks,
Davidlohr
Jonathan Cameron June 14, 2023, 8:36 a.m. UTC | #3
On Tue, 13 Jun 2023 09:28:54 -0700
Davidlohr Bueso <dave@stgolabs.net> wrote:

> On Tue, 13 Jun 2023, Jonathan Cameron wrote:
> 
> >> +		if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) {
> >> +			if (cxlds->security.poll_tmo_secs != -1) {  
> >Should be checking your new poll boolean.  
> 
> Yes, again sorry for the oversight.

Other than that, LGTM and with the fix

Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

> 
> Thanks,
> Davidlohr
Dan Williams June 25, 2023, 10:13 p.m. UTC | #4
Davidlohr Bueso wrote:
> Sanitation is by definition a device-monopolizing operation, and thus
> the timeslicing rules for other background commands do not apply.
> As such handle this special case asynchronously and return immediately.
> Subsequent changes will allow completion to be pollable from userspace
> via a sysfs file interface.
> 
> For devices that don't support interrupts for notifying background
> command completion, self-poll with the caveat that the poller can
> be out of sync with the ready hardware, and therefore care must be
> taken to not allow any new commands to go through until the poller
> sees the hw completion. The poller takes the mbox_mutex to stabilize
> the flagging, minimizing any runtime overhead in the send path to
> check for 'sanitize_tmo' for uncommon poll scenarios.
> 
> The irq case is much simpler as hardware will serialize/error
> appropriately.

Some minor things to fixup below, if this is all I find I can likely
handle this on applying:

> 
> Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
> ---
>  drivers/cxl/core/memdev.c | 10 +++++
>  drivers/cxl/cxlmem.h      |  7 ++++
>  drivers/cxl/pci.c         | 77 +++++++++++++++++++++++++++++++++++++--
>  3 files changed, 91 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
> index 1bbb7e39fc93..834f418b6bcb 100644
> --- a/drivers/cxl/core/memdev.c
> +++ b/drivers/cxl/core/memdev.c
> @@ -460,11 +460,21 @@ void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cm
>  }
>  EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL);
>  
> +static void cxl_memdev_security_shutdown(struct device *dev)
> +{
> +	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
> +	struct cxl_dev_state *cxlds = cxlmd->cxlds;
> +
> +	if (cxlds->security.poll)
> +		cancel_delayed_work_sync(&cxlds->security.poll_dwork);
> +}
> +
>  static void cxl_memdev_shutdown(struct device *dev)
>  {
>  	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
>  
>  	down_write(&cxl_memdev_rwsem);
> +	cxl_memdev_security_shutdown(dev);
>  	cxlmd->cxlds = NULL;
>  	up_write(&cxl_memdev_rwsem);
>  }
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 091f1200736b..3a9df1044144 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -264,9 +264,15 @@ struct cxl_poison_state {
>   * struct cxl_security_state - Device security state
>   *
>   * @state: state of last security operation
> + * @poll: polling for sanitation is enabled, device has no mbox irq support
> + * @poll_tmo_secs: polling timeout
> + * @poll_dwork: polling work item
>   */
>  struct cxl_security_state {
>  	unsigned long state;
> +	bool poll;
> +	int poll_tmo_secs;
> +	struct delayed_work poll_dwork;
>  };
>  
>  /**
> @@ -379,6 +385,7 @@ enum cxl_opcode {
>  	CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS	= 0x4303,
>  	CXL_MBOX_OP_SCAN_MEDIA		= 0x4304,
>  	CXL_MBOX_OP_GET_SCAN_MEDIA	= 0x4305,
> +	CXL_MBOX_OP_SANITIZE		= 0x4400,
>  	CXL_MBOX_OP_GET_SECURITY_STATE	= 0x4500,
>  	CXL_MBOX_OP_SET_PASSPHRASE	= 0x4501,
>  	CXL_MBOX_OP_DISABLE_PASSPHRASE	= 0x4502,
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 4b2575502f49..c92eab55a5a7 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -115,18 +115,52 @@ static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds)
>  
>  static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
>  {
> +	u64 reg;
> +	u16 opcode;
>  	struct cxl_dev_id *dev_id = id;
>  	struct cxl_dev_state *cxlds = dev_id->cxlds;
>  
>  	if (!cxl_mbox_background_complete(cxlds))
>  		return IRQ_NONE;
>  
> -	/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
> -	rcuwait_wake_up(&cxlds->mbox_wait);
> +	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
> +	opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg);
> +	if (opcode == CXL_MBOX_OP_SANITIZE) {
> +		dev_dbg(cxlds->dev, "Sanitation operation ended\n");
> +	} else {
> +		/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
> +		rcuwait_wake_up(&cxlds->mbox_wait);

Just a question, is there any harm in awaking this even though nothing
is waiting? I.e. just wondering if this has functional purpose or is
just for cleanliness?

> +	}
>  
>  	return IRQ_HANDLED;
>  }
>  
> +/*
> + * Sanitation operation polling mode.
> + */
> +static void cxl_mbox_sanitize_work(struct work_struct *work)
> +{
> +	struct cxl_dev_state *cxlds;
> +
> +	cxlds = container_of(work,
> +			     struct cxl_dev_state, security.poll_dwork.work);
> +
> +	mutex_lock(&cxlds->mbox_mutex);
> +	if (cxl_mbox_background_complete(cxlds)) {
> +		cxlds->security.poll_tmo_secs = 0;
> +		put_device(cxlds->dev);
> +
> +		dev_dbg(cxlds->dev, "Sanitation operation ended\n");
> +	} else {
> +		int timeout = cxlds->security.poll_tmo_secs + 10;
> +
> +		cxlds->security.poll_tmo_secs = min(15 * 60, timeout);
> +		queue_delayed_work(system_wq, &cxlds->security.poll_dwork,
> +				   timeout * HZ);
> +	}
> +	mutex_unlock(&cxlds->mbox_mutex);
> +}
> +
>  /**
>   * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
>   * @cxlds: The device state to communicate with.
> @@ -187,6 +221,16 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
>  		return -EBUSY;
>  	}
>  
> +	/*
> +	 * With sanitize polling, hardware might be done and the poller still
> +	 * not be in sync. Ensure no new command comes in until so. Keep the
> +	 * hardware semantics and only allow device health status.
> +	 */
> +	if (unlikely(cxlds->security.poll_tmo_secs > 0)) {

CPUs and compilers do a decent job at likely/unlikely branch prediction,
and given mailbox operations are a slow path I can not imagine this
unlikely() annotation makes any measurable difference.

> +		if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO)
> +			return -EBUSY;
> +	}
> +
>  	cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
>  			     mbox_cmd->opcode);
>  	if (mbox_cmd->size_in) {
> @@ -235,11 +279,34 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
>  	 */
>  	if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) {
>  		u64 bg_status_reg;
> -		int i, timeout = mbox_cmd->poll_interval_ms;
> +		int i, timeout;
> +
> +		/*
> ++		 * Sanitation is a special case which monopolizes the device

   ^ extra '+' character?

> +		 * and cannot be timesliced. Handle asynchronously instead,
> +		 * and allow userspace to poll(2) for completion.
> +		 */
> +		if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) {
> +			if (cxlds->security.poll_tmo_secs != -1) {
> +				/* hold the device throughout */
> +				get_device(cxlds->dev);
> +
> +				/* give first timeout a second */
> +				timeout = 1;
> +				cxlds->security.poll_tmo_secs = timeout;
> +				queue_delayed_work(system_wq,
> +						   &cxlds->security.poll_dwork,
> +						   timeout * HZ);
> +			}
> +
> +			dev_dbg(dev, "Sanitation operation started\n");
> +			goto success;
> +		}
>  
>  		dev_dbg(dev, "Mailbox background operation (0x%04x) started\n",
>  			mbox_cmd->opcode);
>  
> +		timeout = mbox_cmd->poll_interval_ms;
>  		for (i = 0; i < mbox_cmd->poll_count; i++) {
>  			if (rcuwait_wait_event_timeout(&cxlds->mbox_wait,
>  				       cxl_mbox_background_complete(cxlds),
> @@ -270,6 +337,7 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
>  		return 0; /* completed but caller must check return_code */
>  	}
>  
> +success:
>  	/* #7 */
>  	cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
>  	out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
> @@ -382,6 +450,9 @@ static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
>  	}
>  
>  mbox_poll:
> +	cxlds->security.poll = true;
> +	INIT_DELAYED_WORK(&cxlds->security.poll_dwork, cxl_mbox_sanitize_work);
> +
>  	dev_dbg(cxlds->dev, "Mailbox interrupts are unsupported");
>  	return 0;
>  }
> -- 
> 2.41.0
>
Dan Williams June 25, 2023, 10:18 p.m. UTC | #5
Davidlohr Bueso wrote:
> Sanitation is by definition a device-monopolizing operation, and thus
> the timeslicing rules for other background commands do not apply.
> As such handle this special case asynchronously and return immediately.
> Subsequent changes will allow completion to be pollable from userspace
> via a sysfs file interface.
> 
> For devices that don't support interrupts for notifying background
> command completion, self-poll with the caveat that the poller can
> be out of sync with the ready hardware, and therefore care must be
> taken to not allow any new commands to go through until the poller
> sees the hw completion. The poller takes the mbox_mutex to stabilize
> the flagging, minimizing any runtime overhead in the send path to
> check for 'sanitize_tmo' for uncommon poll scenarios.
> 
> The irq case is much simpler as hardware will serialize/error
> appropriately.

I noticed that this series bounces back and forth between "sanitation" and
"sanitization". I think everywhere it mention "sanitation" it means
"sanitization", right?
Davidlohr Bueso June 26, 2023, 6:17 p.m. UTC | #6
On Sun, 25 Jun 2023, Dan Williams wrote:

>Davidlohr Bueso wrote:
>>  static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
>>  {
>> +	u64 reg;
>> +	u16 opcode;
>>	struct cxl_dev_id *dev_id = id;
>>	struct cxl_dev_state *cxlds = dev_id->cxlds;
>>
>>	if (!cxl_mbox_background_complete(cxlds))
>>		return IRQ_NONE;
>>
>> -	/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
>> -	rcuwait_wake_up(&cxlds->mbox_wait);
>> +	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
>> +	opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg);
>> +	if (opcode == CXL_MBOX_OP_SANITIZE) {
>> +		dev_dbg(cxlds->dev, "Sanitation operation ended\n");
>> +	} else {
>> +		/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
>> +		rcuwait_wake_up(&cxlds->mbox_wait);
>
>Just a question, is there any harm in awaking this even though nothing
>is waiting? I.e. just wondering if this has functional purpose or is
>just for cleanliness?

No, there is no harm in calling the wakeup if nothing is blocked.
rcuwait will check for nil task pointer before calling wake_up_process().
In such cases it will be a nop.

...

>>
>> +	/*
>> +	 * With sanitize polling, hardware might be done and the poller still
>> +	 * not be in sync. Ensure no new command comes in until so. Keep the
>> +	 * hardware semantics and only allow device health status.
>> +	 */
>> +	if (unlikely(cxlds->security.poll_tmo_secs > 0)) {
>
>CPUs and compilers do a decent job at likely/unlikely branch prediction,
>and given mailbox operations are a slow path I can not imagine this
>unlikely() annotation makes any measurable difference.

So this was more about documenting the rare case more than an actual performance
optimization. Either way is fine I guess.

Thanks,
Davidlohr
diff mbox series

Patch

diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 1bbb7e39fc93..834f418b6bcb 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -460,11 +460,21 @@  void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cm
 }
 EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL);
 
+static void cxl_memdev_security_shutdown(struct device *dev)
+{
+	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+	struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+	if (cxlds->security.poll)
+		cancel_delayed_work_sync(&cxlds->security.poll_dwork);
+}
+
 static void cxl_memdev_shutdown(struct device *dev)
 {
 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
 
 	down_write(&cxl_memdev_rwsem);
+	cxl_memdev_security_shutdown(dev);
 	cxlmd->cxlds = NULL;
 	up_write(&cxl_memdev_rwsem);
 }
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 091f1200736b..3a9df1044144 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -264,9 +264,15 @@  struct cxl_poison_state {
  * struct cxl_security_state - Device security state
  *
  * @state: state of last security operation
+ * @poll: polling for sanitation is enabled, device has no mbox irq support
+ * @poll_tmo_secs: polling timeout
+ * @poll_dwork: polling work item
  */
 struct cxl_security_state {
 	unsigned long state;
+	bool poll;
+	int poll_tmo_secs;
+	struct delayed_work poll_dwork;
 };
 
 /**
@@ -379,6 +385,7 @@  enum cxl_opcode {
 	CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS	= 0x4303,
 	CXL_MBOX_OP_SCAN_MEDIA		= 0x4304,
 	CXL_MBOX_OP_GET_SCAN_MEDIA	= 0x4305,
+	CXL_MBOX_OP_SANITIZE		= 0x4400,
 	CXL_MBOX_OP_GET_SECURITY_STATE	= 0x4500,
 	CXL_MBOX_OP_SET_PASSPHRASE	= 0x4501,
 	CXL_MBOX_OP_DISABLE_PASSPHRASE	= 0x4502,
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 4b2575502f49..c92eab55a5a7 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -115,18 +115,52 @@  static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds)
 
 static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
 {
+	u64 reg;
+	u16 opcode;
 	struct cxl_dev_id *dev_id = id;
 	struct cxl_dev_state *cxlds = dev_id->cxlds;
 
 	if (!cxl_mbox_background_complete(cxlds))
 		return IRQ_NONE;
 
-	/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
-	rcuwait_wake_up(&cxlds->mbox_wait);
+	reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
+	opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg);
+	if (opcode == CXL_MBOX_OP_SANITIZE) {
+		dev_dbg(cxlds->dev, "Sanitation operation ended\n");
+	} else {
+		/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
+		rcuwait_wake_up(&cxlds->mbox_wait);
+	}
 
 	return IRQ_HANDLED;
 }
 
+/*
+ * Sanitation operation polling mode.
+ */
+static void cxl_mbox_sanitize_work(struct work_struct *work)
+{
+	struct cxl_dev_state *cxlds;
+
+	cxlds = container_of(work,
+			     struct cxl_dev_state, security.poll_dwork.work);
+
+	mutex_lock(&cxlds->mbox_mutex);
+	if (cxl_mbox_background_complete(cxlds)) {
+		cxlds->security.poll_tmo_secs = 0;
+		put_device(cxlds->dev);
+
+		dev_dbg(cxlds->dev, "Sanitation operation ended\n");
+	} else {
+		int timeout = cxlds->security.poll_tmo_secs + 10;
+
+		cxlds->security.poll_tmo_secs = min(15 * 60, timeout);
+		queue_delayed_work(system_wq, &cxlds->security.poll_dwork,
+				   timeout * HZ);
+	}
+	mutex_unlock(&cxlds->mbox_mutex);
+}
+
 /**
  * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
  * @cxlds: The device state to communicate with.
@@ -187,6 +221,16 @@  static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
 		return -EBUSY;
 	}
 
+	/*
+	 * With sanitize polling, hardware might be done and the poller still
+	 * not be in sync. Ensure no new command comes in until so. Keep the
+	 * hardware semantics and only allow device health status.
+	 */
+	if (unlikely(cxlds->security.poll_tmo_secs > 0)) {
+		if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO)
+			return -EBUSY;
+	}
+
 	cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
 			     mbox_cmd->opcode);
 	if (mbox_cmd->size_in) {
@@ -235,11 +279,34 @@  static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
 	 */
 	if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) {
 		u64 bg_status_reg;
-		int i, timeout = mbox_cmd->poll_interval_ms;
+		int i, timeout;
+
+		/*
++		 * Sanitation is a special case which monopolizes the device
+		 * and cannot be timesliced. Handle asynchronously instead,
+		 * and allow userspace to poll(2) for completion.
+		 */
+		if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) {
+			if (cxlds->security.poll_tmo_secs != -1) {
+				/* hold the device throughout */
+				get_device(cxlds->dev);
+
+				/* give first timeout a second */
+				timeout = 1;
+				cxlds->security.poll_tmo_secs = timeout;
+				queue_delayed_work(system_wq,
+						   &cxlds->security.poll_dwork,
+						   timeout * HZ);
+			}
+
+			dev_dbg(dev, "Sanitation operation started\n");
+			goto success;
+		}
 
 		dev_dbg(dev, "Mailbox background operation (0x%04x) started\n",
 			mbox_cmd->opcode);
 
+		timeout = mbox_cmd->poll_interval_ms;
 		for (i = 0; i < mbox_cmd->poll_count; i++) {
 			if (rcuwait_wait_event_timeout(&cxlds->mbox_wait,
 				       cxl_mbox_background_complete(cxlds),
@@ -270,6 +337,7 @@  static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
 		return 0; /* completed but caller must check return_code */
 	}
 
+success:
 	/* #7 */
 	cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
 	out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
@@ -382,6 +450,9 @@  static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
 	}
 
 mbox_poll:
+	cxlds->security.poll = true;
+	INIT_DELAYED_WORK(&cxlds->security.poll_dwork, cxl_mbox_sanitize_work);
+
 	dev_dbg(cxlds->dev, "Mailbox interrupts are unsupported");
 	return 0;
 }