diff mbox series

[v3,03/20] cxl/pci: add check for validating capabilities

Message ID 20240907081836.5801-4-alejandro.lucero-palau@amd.com (mailing list archive)
State Changes Requested
Headers show
Series cxl: add Type2 device support | expand

Checks

Context Check Description
netdev/series_format fail Series longer than 15 patches
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 18 this patch: 18
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 8 maintainers not CCed: alison.schofield@intel.com vishal.l.verma@intel.com jonathan.cameron@huawei.com terry.bowman@amd.com rrichter@amd.com dave.jiang@intel.com ira.weiny@intel.com dave@stgolabs.net
netdev/build_clang success Errors and warnings before: 18 this patch: 18
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 18 this patch: 18
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 71 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 4 this patch: 4
netdev/source_inline success Was 0 now: 0

Commit Message

Lucero Palau, Alejandro Sept. 7, 2024, 8:18 a.m. UTC
From: Alejandro Lucero <alucerop@amd.com>

During CXL device initialization supported capabilities by the device
are discovered. Type3 and Type2 devices have different mandatory
capabilities and a Type2 expects a specific set including optional
capabilities.

Add a function for checking expected capabilities against those found
during initialization.

Rely on this function for validating capabilities instead of when CXL
regs are probed.

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
---
 drivers/cxl/core/pci.c  | 17 +++++++++++++++++
 drivers/cxl/core/regs.c |  9 ---------
 drivers/cxl/pci.c       | 12 ++++++++++++
 include/linux/cxl/cxl.h |  2 ++
 4 files changed, 31 insertions(+), 9 deletions(-)

Comments

Li, Ming4 Sept. 10, 2024, 3:26 a.m. UTC | #1
On 9/7/2024 4:18 PM, alejandro.lucero-palau@amd.com wrote:
> From: Alejandro Lucero <alucerop@amd.com>
>
> During CXL device initialization supported capabilities by the device
> are discovered. Type3 and Type2 devices have different mandatory
> capabilities and a Type2 expects a specific set including optional
> capabilities.
>
> Add a function for checking expected capabilities against those found
> during initialization.
>
> Rely on this function for validating capabilities instead of when CXL
> regs are probed.
>
> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
> ---
>  drivers/cxl/core/pci.c  | 17 +++++++++++++++++
>  drivers/cxl/core/regs.c |  9 ---------
>  drivers/cxl/pci.c       | 12 ++++++++++++
>  include/linux/cxl/cxl.h |  2 ++
>  4 files changed, 31 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 3d6564dbda57..57370d9beb32 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -7,6 +7,7 @@
>  #include <linux/pci.h>
>  #include <linux/pci-doe.h>
>  #include <linux/aer.h>
> +#include <linux/cxl/cxl.h>
>  #include <linux/cxl/pci.h>
>  #include <cxlpci.h>
>  #include <cxlmem.h>
> @@ -1077,3 +1078,19 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port)
>  				     __cxl_endpoint_decoder_reset_detected);
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, CXL);
> +
> +bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
> +			u32 *current_caps)
> +{
> +	if (current_caps)
> +		*current_caps = cxlds->capabilities;
> +
> +	dev_dbg(cxlds->dev, "Checking cxlds caps 0x%08x vs expected caps 0x%08x\n",
> +		cxlds->capabilities, expected_caps);
> +
> +	if ((cxlds->capabilities & expected_caps) != expected_caps)
> +		return false;
> +
> +	return true;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_pci_check_caps, CXL);

Why has to use this 'u32 *current_caps' as a parameter? if user wants to know the capabilities of a device, they can get it from cxlds->capabilities directly.


> diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
> index 8b8abcadcb93..35f6dc97be6e 100644
> --- a/drivers/cxl/core/regs.c
> +++ b/drivers/cxl/core/regs.c
> @@ -443,15 +443,6 @@ static int cxl_probe_regs(struct cxl_register_map *map, u32 *caps)
>  	case CXL_REGLOC_RBI_MEMDEV:
>  		dev_map = &map->device_map;
>  		cxl_probe_device_regs(host, base, dev_map, caps);
> -		if (!dev_map->status.valid || !dev_map->mbox.valid ||
> -		    !dev_map->memdev.valid) {
> -			dev_err(host, "registers not found: %s%s%s\n",
> -				!dev_map->status.valid ? "status " : "",
> -				!dev_map->mbox.valid ? "mbox " : "",
> -				!dev_map->memdev.valid ? "memdev " : "");
> -			return -ENXIO;
> -		}
> -
>  		dev_dbg(host, "Probing device registers...\n");
>  		break;
>  	default:
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 58f325019886..bec660357eec 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -796,6 +796,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	struct cxl_register_map map;
>  	struct cxl_memdev *cxlmd;
>  	int i, rc, pmu_count;
> +	u32 expected, found;
>  	bool irq_avail;
>  	u16 dvsec;
>  
> @@ -852,6 +853,17 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	if (rc)
>  		dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
>  
> +	/* These are the mandatory capabilities for a Type3 device */
> +	expected = BIT(CXL_DEV_CAP_HDM) | BIT(CXL_DEV_CAP_DEV_STATUS) |
> +		   BIT(CXL_DEV_CAP_MAILBOX_PRIMARY) | BIT(CXL_DEV_CAP_MEMDEV);
> +
> +	if (!cxl_pci_check_caps(cxlds, expected, &found)) {
> +		dev_err(&pdev->dev,
> +			"Expected capabilities not matching with found capabilities: (%08x - %08x)\n",
> +			expected, found);
> +		return -ENXIO;
> +	}
> +

Same as above, the capabilities already are cached in cxlds->capabilities. seems like that the 'found' can be removed and using cxlds->capabilities directly here.


>  	rc = cxl_await_media_ready(cxlds);
>  	if (rc == 0)
>  		cxlds->media_ready = true;
> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
> index 930b1b9c1d6a..4a57bf60403d 100644
> --- a/include/linux/cxl/cxl.h
> +++ b/include/linux/cxl/cxl.h
> @@ -48,4 +48,6 @@ void cxl_set_dvsec(struct cxl_dev_state *cxlds, u16 dvsec);
>  void cxl_set_serial(struct cxl_dev_state *cxlds, u64 serial);
>  int cxl_set_resource(struct cxl_dev_state *cxlds, struct resource res,
>  		     enum cxl_resource);
> +bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
> +			u32 *current_caps);
>  #endif
Li, Ming4 Sept. 10, 2024, 6:24 a.m. UTC | #2
On 9/10/2024 11:26 AM, Li, Ming4 wrote:
> On 9/7/2024 4:18 PM, alejandro.lucero-palau@amd.com wrote:
>> From: Alejandro Lucero <alucerop@amd.com>
>>
>> During CXL device initialization supported capabilities by the device
>> are discovered. Type3 and Type2 devices have different mandatory
>> capabilities and a Type2 expects a specific set including optional
>> capabilities.
>>
>> Add a function for checking expected capabilities against those found
>> during initialization.
>>
>> Rely on this function for validating capabilities instead of when CXL
>> regs are probed.
>>
>> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
>> ---
>>  drivers/cxl/core/pci.c  | 17 +++++++++++++++++
>>  drivers/cxl/core/regs.c |  9 ---------
>>  drivers/cxl/pci.c       | 12 ++++++++++++
>>  include/linux/cxl/cxl.h |  2 ++
>>  4 files changed, 31 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
>> index 3d6564dbda57..57370d9beb32 100644
>> --- a/drivers/cxl/core/pci.c
>> +++ b/drivers/cxl/core/pci.c
>> @@ -7,6 +7,7 @@
>>  #include <linux/pci.h>
>>  #include <linux/pci-doe.h>
>>  #include <linux/aer.h>
>> +#include <linux/cxl/cxl.h>
>>  #include <linux/cxl/pci.h>
>>  #include <cxlpci.h>
>>  #include <cxlmem.h>
>> @@ -1077,3 +1078,19 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port)
>>  				     __cxl_endpoint_decoder_reset_detected);
>>  }
>>  EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, CXL);
>> +
>> +bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
>> +			u32 *current_caps)
>> +{
>> +	if (current_caps)
>> +		*current_caps = cxlds->capabilities;
>> +
>> +	dev_dbg(cxlds->dev, "Checking cxlds caps 0x%08x vs expected caps 0x%08x\n",
>> +		cxlds->capabilities, expected_caps);
>> +
>> +	if ((cxlds->capabilities & expected_caps) != expected_caps)
>> +		return false;
>> +
>> +	return true;
>> +}
>> +EXPORT_SYMBOL_NS_GPL(cxl_pci_check_caps, CXL);
> Why has to use this 'u32 *current_caps' as a parameter? if user wants to know the capabilities of a device, they can get it from cxlds->capabilities directly.
>
Sorry, I missed something implemented in PATCH #1, seems like you can not access struct cxl_dev_state from efx driver side. right?


>> diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
>> index 8b8abcadcb93..35f6dc97be6e 100644
>> --- a/drivers/cxl/core/regs.c
>> +++ b/drivers/cxl/core/regs.c
>> @@ -443,15 +443,6 @@ static int cxl_probe_regs(struct cxl_register_map *map, u32 *caps)
>>  	case CXL_REGLOC_RBI_MEMDEV:
>>  		dev_map = &map->device_map;
>>  		cxl_probe_device_regs(host, base, dev_map, caps);
>> -		if (!dev_map->status.valid || !dev_map->mbox.valid ||
>> -		    !dev_map->memdev.valid) {
>> -			dev_err(host, "registers not found: %s%s%s\n",
>> -				!dev_map->status.valid ? "status " : "",
>> -				!dev_map->mbox.valid ? "mbox " : "",
>> -				!dev_map->memdev.valid ? "memdev " : "");
>> -			return -ENXIO;
>> -		}
>> -
>>  		dev_dbg(host, "Probing device registers...\n");
>>  		break;
>>  	default:
>> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
>> index 58f325019886..bec660357eec 100644
>> --- a/drivers/cxl/pci.c
>> +++ b/drivers/cxl/pci.c
>> @@ -796,6 +796,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>  	struct cxl_register_map map;
>>  	struct cxl_memdev *cxlmd;
>>  	int i, rc, pmu_count;
>> +	u32 expected, found;
>>  	bool irq_avail;
>>  	u16 dvsec;
>>  
>> @@ -852,6 +853,17 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>  	if (rc)
>>  		dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
>>  
>> +	/* These are the mandatory capabilities for a Type3 device */
>> +	expected = BIT(CXL_DEV_CAP_HDM) | BIT(CXL_DEV_CAP_DEV_STATUS) |
>> +		   BIT(CXL_DEV_CAP_MAILBOX_PRIMARY) | BIT(CXL_DEV_CAP_MEMDEV);
>> +
>> +	if (!cxl_pci_check_caps(cxlds, expected, &found)) {
>> +		dev_err(&pdev->dev,
>> +			"Expected capabilities not matching with found capabilities: (%08x - %08x)\n",
>> +			expected, found);
>> +		return -ENXIO;
>> +	}
>> +
> Same as above, the capabilities already are cached in cxlds->capabilities. seems like that the 'found' can be removed and using cxlds->capabilities directly here.
>
>
>>  	rc = cxl_await_media_ready(cxlds);
>>  	if (rc == 0)
>>  		cxlds->media_ready = true;
>> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
>> index 930b1b9c1d6a..4a57bf60403d 100644
>> --- a/include/linux/cxl/cxl.h
>> +++ b/include/linux/cxl/cxl.h
>> @@ -48,4 +48,6 @@ void cxl_set_dvsec(struct cxl_dev_state *cxlds, u16 dvsec);
>>  void cxl_set_serial(struct cxl_dev_state *cxlds, u64 serial);
>>  int cxl_set_resource(struct cxl_dev_state *cxlds, struct resource res,
>>  		     enum cxl_resource);
>> +bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
>> +			u32 *current_caps);
>>  #endif
>
>
Alejandro Lucero Palau Sept. 10, 2024, 7:31 a.m. UTC | #3
On 9/10/24 07:24, Li, Ming4 wrote:
> On 9/10/2024 11:26 AM, Li, Ming4 wrote:
>> On 9/7/2024 4:18 PM, alejandro.lucero-palau@amd.com wrote:
>>> From: Alejandro Lucero <alucerop@amd.com>
>>>
>>> During CXL device initialization supported capabilities by the device
>>> are discovered. Type3 and Type2 devices have different mandatory
>>> capabilities and a Type2 expects a specific set including optional
>>> capabilities.
>>>
>>> Add a function for checking expected capabilities against those found
>>> during initialization.
>>>
>>> Rely on this function for validating capabilities instead of when CXL
>>> regs are probed.
>>>
>>> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
>>> ---
>>>   drivers/cxl/core/pci.c  | 17 +++++++++++++++++
>>>   drivers/cxl/core/regs.c |  9 ---------
>>>   drivers/cxl/pci.c       | 12 ++++++++++++
>>>   include/linux/cxl/cxl.h |  2 ++
>>>   4 files changed, 31 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
>>> index 3d6564dbda57..57370d9beb32 100644
>>> --- a/drivers/cxl/core/pci.c
>>> +++ b/drivers/cxl/core/pci.c
>>> @@ -7,6 +7,7 @@
>>>   #include <linux/pci.h>
>>>   #include <linux/pci-doe.h>
>>>   #include <linux/aer.h>
>>> +#include <linux/cxl/cxl.h>
>>>   #include <linux/cxl/pci.h>
>>>   #include <cxlpci.h>
>>>   #include <cxlmem.h>
>>> @@ -1077,3 +1078,19 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port)
>>>   				     __cxl_endpoint_decoder_reset_detected);
>>>   }
>>>   EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, CXL);
>>> +
>>> +bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
>>> +			u32 *current_caps)
>>> +{
>>> +	if (current_caps)
>>> +		*current_caps = cxlds->capabilities;
>>> +
>>> +	dev_dbg(cxlds->dev, "Checking cxlds caps 0x%08x vs expected caps 0x%08x\n",
>>> +		cxlds->capabilities, expected_caps);
>>> +
>>> +	if ((cxlds->capabilities & expected_caps) != expected_caps)
>>> +		return false;
>>> +
>>> +	return true;
>>> +}
>>> +EXPORT_SYMBOL_NS_GPL(cxl_pci_check_caps, CXL);
>> Why has to use this 'u32 *current_caps' as a parameter? if user wants to know the capabilities of a device, they can get it from cxlds->capabilities directly.
>>
> Sorry, I missed something implemented in PATCH #1, seems like you can not access struct cxl_dev_state from efx driver side. right?


Yes. The idea is to avoid that for facilitating changes to those updated 
structs. Initially, during the RFC review, it was said let's enforce 
because accel drivers should not be trusted, but I think the main 
concern is maintenance.


>
>>> diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
>>> index 8b8abcadcb93..35f6dc97be6e 100644
>>> --- a/drivers/cxl/core/regs.c
>>> +++ b/drivers/cxl/core/regs.c
>>> @@ -443,15 +443,6 @@ static int cxl_probe_regs(struct cxl_register_map *map, u32 *caps)
>>>   	case CXL_REGLOC_RBI_MEMDEV:
>>>   		dev_map = &map->device_map;
>>>   		cxl_probe_device_regs(host, base, dev_map, caps);
>>> -		if (!dev_map->status.valid || !dev_map->mbox.valid ||
>>> -		    !dev_map->memdev.valid) {
>>> -			dev_err(host, "registers not found: %s%s%s\n",
>>> -				!dev_map->status.valid ? "status " : "",
>>> -				!dev_map->mbox.valid ? "mbox " : "",
>>> -				!dev_map->memdev.valid ? "memdev " : "");
>>> -			return -ENXIO;
>>> -		}
>>> -
>>>   		dev_dbg(host, "Probing device registers...\n");
>>>   		break;
>>>   	default:
>>> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
>>> index 58f325019886..bec660357eec 100644
>>> --- a/drivers/cxl/pci.c
>>> +++ b/drivers/cxl/pci.c
>>> @@ -796,6 +796,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>>   	struct cxl_register_map map;
>>>   	struct cxl_memdev *cxlmd;
>>>   	int i, rc, pmu_count;
>>> +	u32 expected, found;
>>>   	bool irq_avail;
>>>   	u16 dvsec;
>>>   
>>> @@ -852,6 +853,17 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>>   	if (rc)
>>>   		dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
>>>   
>>> +	/* These are the mandatory capabilities for a Type3 device */
>>> +	expected = BIT(CXL_DEV_CAP_HDM) | BIT(CXL_DEV_CAP_DEV_STATUS) |
>>> +		   BIT(CXL_DEV_CAP_MAILBOX_PRIMARY) | BIT(CXL_DEV_CAP_MEMDEV);
>>> +
>>> +	if (!cxl_pci_check_caps(cxlds, expected, &found)) {
>>> +		dev_err(&pdev->dev,
>>> +			"Expected capabilities not matching with found capabilities: (%08x - %08x)\n",
>>> +			expected, found);
>>> +		return -ENXIO;
>>> +	}
>>> +
>> Same as above, the capabilities already are cached in cxlds->capabilities. seems like that the 'found' can be removed and using cxlds->capabilities directly here.
>>
>>
>>>   	rc = cxl_await_media_ready(cxlds);
>>>   	if (rc == 0)
>>>   		cxlds->media_ready = true;
>>> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
>>> index 930b1b9c1d6a..4a57bf60403d 100644
>>> --- a/include/linux/cxl/cxl.h
>>> +++ b/include/linux/cxl/cxl.h
>>> @@ -48,4 +48,6 @@ void cxl_set_dvsec(struct cxl_dev_state *cxlds, u16 dvsec);
>>>   void cxl_set_serial(struct cxl_dev_state *cxlds, u64 serial);
>>>   int cxl_set_resource(struct cxl_dev_state *cxlds, struct resource res,
>>>   		     enum cxl_resource);
>>> +bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
>>> +			u32 *current_caps);
>>>   #endif
>>
Dave Jiang Sept. 11, 2024, 11:06 p.m. UTC | #4
On 9/7/24 1:18 AM, alejandro.lucero-palau@amd.com wrote:
> From: Alejandro Lucero <alucerop@amd.com>
> 
> During CXL device initialization supported capabilities by the device
> are discovered. Type3 and Type2 devices have different mandatory
> capabilities and a Type2 expects a specific set including optional
> capabilities.
> 
> Add a function for checking expected capabilities against those found
> during initialization.
> 
> Rely on this function for validating capabilities instead of when CXL
> regs are probed.
> 
> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
> ---
>  drivers/cxl/core/pci.c  | 17 +++++++++++++++++
>  drivers/cxl/core/regs.c |  9 ---------
>  drivers/cxl/pci.c       | 12 ++++++++++++
>  include/linux/cxl/cxl.h |  2 ++
>  4 files changed, 31 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 3d6564dbda57..57370d9beb32 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -7,6 +7,7 @@
>  #include <linux/pci.h>
>  #include <linux/pci-doe.h>
>  #include <linux/aer.h>
> +#include <linux/cxl/cxl.h>
>  #include <linux/cxl/pci.h>
>  #include <cxlpci.h>
>  #include <cxlmem.h>
> @@ -1077,3 +1078,19 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port)
>  				     __cxl_endpoint_decoder_reset_detected);
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, CXL);
> +
> +bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
> +			u32 *current_caps)
> +{
> +	if (current_caps)
> +		*current_caps = cxlds->capabilities;
> +
> +	dev_dbg(cxlds->dev, "Checking cxlds caps 0x%08x vs expected caps 0x%08x\n",
> +		cxlds->capabilities, expected_caps);
> +
> +	if ((cxlds->capabilities & expected_caps) != expected_caps)
> +		return false;
> +
> +	return true;


I think you can just do 
return (cxlds->capabilities & expected_caps) == expected_caps;

> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_pci_check_caps, CXL);
> diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
> index 8b8abcadcb93..35f6dc97be6e 100644
> --- a/drivers/cxl/core/regs.c
> +++ b/drivers/cxl/core/regs.c
> @@ -443,15 +443,6 @@ static int cxl_probe_regs(struct cxl_register_map *map, u32 *caps)
>  	case CXL_REGLOC_RBI_MEMDEV:
>  		dev_map = &map->device_map;
>  		cxl_probe_device_regs(host, base, dev_map, caps);
> -		if (!dev_map->status.valid || !dev_map->mbox.valid ||
> -		    !dev_map->memdev.valid) {
> -			dev_err(host, "registers not found: %s%s%s\n",
> -				!dev_map->status.valid ? "status " : "",
> -				!dev_map->mbox.valid ? "mbox " : "",
> -				!dev_map->memdev.valid ? "memdev " : "");
> -			return -ENXIO;
> -		}
> -
>  		dev_dbg(host, "Probing device registers...\n");
>  		break;
>  	default:
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 58f325019886..bec660357eec 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -796,6 +796,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	struct cxl_register_map map;
>  	struct cxl_memdev *cxlmd;
>  	int i, rc, pmu_count;
> +	u32 expected, found;
>  	bool irq_avail;
>  	u16 dvsec;
>  
> @@ -852,6 +853,17 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	if (rc)
>  		dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
>  
> +	/* These are the mandatory capabilities for a Type3 device */
> +	expected = BIT(CXL_DEV_CAP_HDM) | BIT(CXL_DEV_CAP_DEV_STATUS) |
> +		   BIT(CXL_DEV_CAP_MAILBOX_PRIMARY) | BIT(CXL_DEV_CAP_MEMDEV);

Maybe we can create a static mask for the expected mandatory type3 caps.

I also wonder if cxl_pci_check_caps() can key on pci device type or class type and know which expected mask to use and no need to pass in the expected mask. Or since the driver is being attached to a certain device type, it should know what it is already, maybe we can attach static driver data to it that can be retrieved as the expected_caps:

struct cxl_driver_data {
	u32 expected_caps;
};

static struct cxl_driver_data cxl_driver_data = {
	.expected_caps = BIT(CXL_DEV_CAP_HDM) | BIT(CXL_DEV_CAP_DEV_STATUS) |
			 BIT(CXL_DEV_CAP_MAILBOX_PRIMARY) | BIT(CXL_DEV_CAP_MEMDEV),
};

static const struct pci_device_id cxl_mem_pci_tbl[] = {
/* Maybe need a new PCI_DEVICE_CLASS_DATA() macro */
	{
		.class = (PCI_CLASS_MEMORY_CXL << 8  | CXL_MEMORY_PROGIF),
		.class_mask = ~0,
		.vendor = PCI_ANY_ID,
		.device = PCI_ANY_ID,
		.sub_vendor = PCI_ANY_ID,
		.subdevice = PCI_ANY_ID,
		.driver_data = (kernel_ulong_t)&cxl_driver_data,
	},
	{},
};
MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); 

static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
	struct cxl_driver_data *data = (struct cxl_driver_data *)id->driver_data;

	rc = cxl_pci_check_caps(cxlds, data->expected_caps, &found);
....
}

> +
> +	if (!cxl_pci_check_caps(cxlds, expected, &found)) {
> +		dev_err(&pdev->dev,
> +			"Expected capabilities not matching with found capabilities: (%08x - %08x)\n",
> +			expected, found);
> +		return -ENXIO;
> +	}
> +
>  	rc = cxl_await_media_ready(cxlds);
>  	if (rc == 0)
>  		cxlds->media_ready = true;
> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
> index 930b1b9c1d6a..4a57bf60403d 100644
> --- a/include/linux/cxl/cxl.h
> +++ b/include/linux/cxl/cxl.h
> @@ -48,4 +48,6 @@ void cxl_set_dvsec(struct cxl_dev_state *cxlds, u16 dvsec);
>  void cxl_set_serial(struct cxl_dev_state *cxlds, u64 serial);
>  int cxl_set_resource(struct cxl_dev_state *cxlds, struct resource res,
>  		     enum cxl_resource);
> +bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
> +			u32 *current_caps);
>  #endif
Jonathan Cameron Sept. 13, 2024, 5:28 p.m. UTC | #5
On Sat, 7 Sep 2024 09:18:19 +0100
<alejandro.lucero-palau@amd.com> wrote:

> From: Alejandro Lucero <alucerop@amd.com>
> 
> During CXL device initialization supported capabilities by the device
> are discovered. Type3 and Type2 devices have different mandatory
> capabilities and a Type2 expects a specific set including optional
> capabilities.
> 
> Add a function for checking expected capabilities against those found
> during initialization.
> 
> Rely on this function for validating capabilities instead of when CXL
> regs are probed.
> 
> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
> ---
>  drivers/cxl/core/pci.c  | 17 +++++++++++++++++
>  drivers/cxl/core/regs.c |  9 ---------
>  drivers/cxl/pci.c       | 12 ++++++++++++
>  include/linux/cxl/cxl.h |  2 ++
>  4 files changed, 31 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index 3d6564dbda57..57370d9beb32 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -7,6 +7,7 @@
>  #include <linux/pci.h>
>  #include <linux/pci-doe.h>
>  #include <linux/aer.h>
> +#include <linux/cxl/cxl.h>
>  #include <linux/cxl/pci.h>
>  #include <cxlpci.h>
>  #include <cxlmem.h>
> @@ -1077,3 +1078,19 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port)
>  				     __cxl_endpoint_decoder_reset_detected);
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, CXL);
> +
> +bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
> +			u32 *current_caps)
> +{
> +	if (current_caps)
> +		*current_caps = cxlds->capabilities;

I'd split this up as setting a value in a 'check_caps' and comparisom with
a list is odd.

Also bitmaps all the way would be better.
Given you know it fits in one unsigned long you can short cut the
assignment of the bits though. Easy to extend that later if the bitmap
gets bigger.


> +
> +	dev_dbg(cxlds->dev, "Checking cxlds caps 0x%08x vs expected caps 0x%08x\n",
> +		cxlds->capabilities, expected_caps);
> +
> +	if ((cxlds->capabilities & expected_caps) != expected_caps)
> +		return false;
> +
> +	return true;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_pci_check_caps, CXL);
> diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
> index 8b8abcadcb93..35f6dc97be6e 100644
> --- a/drivers/cxl/core/regs.c
> +++ b/drivers/cxl/core/regs.c
> @@ -443,15 +443,6 @@ static int cxl_probe_regs(struct cxl_register_map *map, u32 *caps)
>  	case CXL_REGLOC_RBI_MEMDEV:
>  		dev_map = &map->device_map;
>  		cxl_probe_device_regs(host, base, dev_map, caps);
> -		if (!dev_map->status.valid || !dev_map->mbox.valid ||
> -		    !dev_map->memdev.valid) {
> -			dev_err(host, "registers not found: %s%s%s\n",
> -				!dev_map->status.valid ? "status " : "",
> -				!dev_map->mbox.valid ? "mbox " : "",
> -				!dev_map->memdev.valid ? "memdev " : "");
> -			return -ENXIO;
> -		}
> -
>  		dev_dbg(host, "Probing device registers...\n");
>  		break;
>  	default:
> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
> index 58f325019886..bec660357eec 100644
> --- a/drivers/cxl/pci.c
> +++ b/drivers/cxl/pci.c
> @@ -796,6 +796,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	struct cxl_register_map map;
>  	struct cxl_memdev *cxlmd;
>  	int i, rc, pmu_count;
> +	u32 expected, found;
>  	bool irq_avail;
>  	u16 dvsec;
>  
> @@ -852,6 +853,17 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  	if (rc)
>  		dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
>  
> +	/* These are the mandatory capabilities for a Type3 device */
> +	expected = BIT(CXL_DEV_CAP_HDM) | BIT(CXL_DEV_CAP_DEV_STATUS) |
> +		   BIT(CXL_DEV_CAP_MAILBOX_PRIMARY) | BIT(CXL_DEV_CAP_MEMDEV);
> +
> +	if (!cxl_pci_check_caps(cxlds, expected, &found)) {
> +		dev_err(&pdev->dev,
> +			"Expected capabilities not matching with found capabilities: (%08x - %08x)\n",
> +			expected, found);
> +		return -ENXIO;
> +	}
> +
>  	rc = cxl_await_media_ready(cxlds);
>  	if (rc == 0)
>  		cxlds->media_ready = true;
> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
> index 930b1b9c1d6a..4a57bf60403d 100644
> --- a/include/linux/cxl/cxl.h
> +++ b/include/linux/cxl/cxl.h
> @@ -48,4 +48,6 @@ void cxl_set_dvsec(struct cxl_dev_state *cxlds, u16 dvsec);
>  void cxl_set_serial(struct cxl_dev_state *cxlds, u64 serial);
>  int cxl_set_resource(struct cxl_dev_state *cxlds, struct resource res,
>  		     enum cxl_resource);
> +bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
> +			u32 *current_caps);
>  #endif
Alejandro Lucero Palau Sept. 16, 2024, 8:56 a.m. UTC | #6
On 9/12/24 00:06, Dave Jiang wrote:
>
> On 9/7/24 1:18 AM, alejandro.lucero-palau@amd.com wrote:
>> From: Alejandro Lucero <alucerop@amd.com>
>>
>> During CXL device initialization supported capabilities by the device
>> are discovered. Type3 and Type2 devices have different mandatory
>> capabilities and a Type2 expects a specific set including optional
>> capabilities.
>>
>> Add a function for checking expected capabilities against those found
>> during initialization.
>>
>> Rely on this function for validating capabilities instead of when CXL
>> regs are probed.
>>
>> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
>> ---
>>   drivers/cxl/core/pci.c  | 17 +++++++++++++++++
>>   drivers/cxl/core/regs.c |  9 ---------
>>   drivers/cxl/pci.c       | 12 ++++++++++++
>>   include/linux/cxl/cxl.h |  2 ++
>>   4 files changed, 31 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
>> index 3d6564dbda57..57370d9beb32 100644
>> --- a/drivers/cxl/core/pci.c
>> +++ b/drivers/cxl/core/pci.c
>> @@ -7,6 +7,7 @@
>>   #include <linux/pci.h>
>>   #include <linux/pci-doe.h>
>>   #include <linux/aer.h>
>> +#include <linux/cxl/cxl.h>
>>   #include <linux/cxl/pci.h>
>>   #include <cxlpci.h>
>>   #include <cxlmem.h>
>> @@ -1077,3 +1078,19 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port)
>>   				     __cxl_endpoint_decoder_reset_detected);
>>   }
>>   EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, CXL);
>> +
>> +bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
>> +			u32 *current_caps)
>> +{
>> +	if (current_caps)
>> +		*current_caps = cxlds->capabilities;
>> +
>> +	dev_dbg(cxlds->dev, "Checking cxlds caps 0x%08x vs expected caps 0x%08x\n",
>> +		cxlds->capabilities, expected_caps);
>> +
>> +	if ((cxlds->capabilities & expected_caps) != expected_caps)
>> +		return false;
>> +
>> +	return true;
>
> I think you can just do
> return (cxlds->capabilities & expected_caps) == expected_caps;


Yes. I'll do.


>> +}
>> +EXPORT_SYMBOL_NS_GPL(cxl_pci_check_caps, CXL);
>> diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
>> index 8b8abcadcb93..35f6dc97be6e 100644
>> --- a/drivers/cxl/core/regs.c
>> +++ b/drivers/cxl/core/regs.c
>> @@ -443,15 +443,6 @@ static int cxl_probe_regs(struct cxl_register_map *map, u32 *caps)
>>   	case CXL_REGLOC_RBI_MEMDEV:
>>   		dev_map = &map->device_map;
>>   		cxl_probe_device_regs(host, base, dev_map, caps);
>> -		if (!dev_map->status.valid || !dev_map->mbox.valid ||
>> -		    !dev_map->memdev.valid) {
>> -			dev_err(host, "registers not found: %s%s%s\n",
>> -				!dev_map->status.valid ? "status " : "",
>> -				!dev_map->mbox.valid ? "mbox " : "",
>> -				!dev_map->memdev.valid ? "memdev " : "");
>> -			return -ENXIO;
>> -		}
>> -
>>   		dev_dbg(host, "Probing device registers...\n");
>>   		break;
>>   	default:
>> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
>> index 58f325019886..bec660357eec 100644
>> --- a/drivers/cxl/pci.c
>> +++ b/drivers/cxl/pci.c
>> @@ -796,6 +796,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>   	struct cxl_register_map map;
>>   	struct cxl_memdev *cxlmd;
>>   	int i, rc, pmu_count;
>> +	u32 expected, found;
>>   	bool irq_avail;
>>   	u16 dvsec;
>>   
>> @@ -852,6 +853,17 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>   	if (rc)
>>   		dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
>>   
>> +	/* These are the mandatory capabilities for a Type3 device */
>> +	expected = BIT(CXL_DEV_CAP_HDM) | BIT(CXL_DEV_CAP_DEV_STATUS) |
>> +		   BIT(CXL_DEV_CAP_MAILBOX_PRIMARY) | BIT(CXL_DEV_CAP_MEMDEV);
> Maybe we can create a static mask for the expected mandatory type3 caps.
>
> I also wonder if cxl_pci_check_caps() can key on pci device type or class type and know which expected mask to use and no need to pass in the expected mask. Or since the driver is being attached to a certain device type, it should know what it is already, maybe we can attach static driver data to it that can be retrieved as the expected_caps:
>
> struct cxl_driver_data {
> 	u32 expected_caps;
> };
>
> static struct cxl_driver_data cxl_driver_data = {
> 	.expected_caps = BIT(CXL_DEV_CAP_HDM) | BIT(CXL_DEV_CAP_DEV_STATUS) |
> 			 BIT(CXL_DEV_CAP_MAILBOX_PRIMARY) | BIT(CXL_DEV_CAP_MEMDEV),
> };
>
> static const struct pci_device_id cxl_mem_pci_tbl[] = {
> /* Maybe need a new PCI_DEVICE_CLASS_DATA() macro */
> 	{
> 		.class = (PCI_CLASS_MEMORY_CXL << 8  | CXL_MEMORY_PROGIF),
> 		.class_mask = ~0,
> 		.vendor = PCI_ANY_ID,
> 		.device = PCI_ANY_ID,
> 		.sub_vendor = PCI_ANY_ID,
> 		.subdevice = PCI_ANY_ID,
> 		.driver_data = (kernel_ulong_t)&cxl_driver_data,
> 	},
> 	{},
> };
> MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
>
> static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> {
> 	struct cxl_driver_data *data = (struct cxl_driver_data *)id->driver_data;
>
> 	rc = cxl_pci_check_caps(cxlds, data->expected_caps, &found);
> ....
> }


I do not think this requires so much plumbing, but maybe using a macro 
for type3 mandatory caps makes sense.

About accel drivers, I'm not sure a macro is suitable because, I think, 
when hopefully CXL designs become mainstream, the caps will likely be 
needed to be extracted from the device somehow, other than what the CXL 
regs tell themselves, just for the sake of double checking. Thinking 
here of same base device with some changes regarding CXL in different 
device models/flavors.


>> +
>> +	if (!cxl_pci_check_caps(cxlds, expected, &found)) {
>> +		dev_err(&pdev->dev,
>> +			"Expected capabilities not matching with found capabilities: (%08x - %08x)\n",
>> +			expected, found);
>> +		return -ENXIO;
>> +	}
>> +
>>   	rc = cxl_await_media_ready(cxlds);
>>   	if (rc == 0)
>>   		cxlds->media_ready = true;
>> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
>> index 930b1b9c1d6a..4a57bf60403d 100644
>> --- a/include/linux/cxl/cxl.h
>> +++ b/include/linux/cxl/cxl.h
>> @@ -48,4 +48,6 @@ void cxl_set_dvsec(struct cxl_dev_state *cxlds, u16 dvsec);
>>   void cxl_set_serial(struct cxl_dev_state *cxlds, u64 serial);
>>   int cxl_set_resource(struct cxl_dev_state *cxlds, struct resource res,
>>   		     enum cxl_resource);
>> +bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
>> +			u32 *current_caps);
>>   #endif
Alejandro Lucero Palau Sept. 16, 2024, 12:17 p.m. UTC | #7
On 9/13/24 18:28, Jonathan Cameron wrote:
> On Sat, 7 Sep 2024 09:18:19 +0100
> <alejandro.lucero-palau@amd.com> wrote:
>
>> From: Alejandro Lucero <alucerop@amd.com>
>>
>> During CXL device initialization supported capabilities by the device
>> are discovered. Type3 and Type2 devices have different mandatory
>> capabilities and a Type2 expects a specific set including optional
>> capabilities.
>>
>> Add a function for checking expected capabilities against those found
>> during initialization.
>>
>> Rely on this function for validating capabilities instead of when CXL
>> regs are probed.
>>
>> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
>> ---
>>   drivers/cxl/core/pci.c  | 17 +++++++++++++++++
>>   drivers/cxl/core/regs.c |  9 ---------
>>   drivers/cxl/pci.c       | 12 ++++++++++++
>>   include/linux/cxl/cxl.h |  2 ++
>>   4 files changed, 31 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
>> index 3d6564dbda57..57370d9beb32 100644
>> --- a/drivers/cxl/core/pci.c
>> +++ b/drivers/cxl/core/pci.c
>> @@ -7,6 +7,7 @@
>>   #include <linux/pci.h>
>>   #include <linux/pci-doe.h>
>>   #include <linux/aer.h>
>> +#include <linux/cxl/cxl.h>
>>   #include <linux/cxl/pci.h>
>>   #include <cxlpci.h>
>>   #include <cxlmem.h>
>> @@ -1077,3 +1078,19 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port)
>>   				     __cxl_endpoint_decoder_reset_detected);
>>   }
>>   EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, CXL);
>> +
>> +bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
>> +			u32 *current_caps)
>> +{
>> +	if (current_caps)
>> +		*current_caps = cxlds->capabilities;
> I'd split this up as setting a value in a 'check_caps' and comparisom with
> a list is odd.


The idea is to return the caps discovered for the accel driver to know 
which one is not there.

The other option is to print out here a message which does not seem 
right to me.


> Also bitmaps all the way would be better.
> Given you know it fits in one unsigned long you can short cut the
> assignment of the bits though. Easy to extend that later if the bitmap
> gets bigger.


Yes, I'll adapt it to using a bitmap.

Thanks


>
>> +
>> +	dev_dbg(cxlds->dev, "Checking cxlds caps 0x%08x vs expected caps 0x%08x\n",
>> +		cxlds->capabilities, expected_caps);
>> +
>> +	if ((cxlds->capabilities & expected_caps) != expected_caps)
>> +		return false;
>> +
>> +	return true;
>> +}
>> +EXPORT_SYMBOL_NS_GPL(cxl_pci_check_caps, CXL);
>> diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
>> index 8b8abcadcb93..35f6dc97be6e 100644
>> --- a/drivers/cxl/core/regs.c
>> +++ b/drivers/cxl/core/regs.c
>> @@ -443,15 +443,6 @@ static int cxl_probe_regs(struct cxl_register_map *map, u32 *caps)
>>   	case CXL_REGLOC_RBI_MEMDEV:
>>   		dev_map = &map->device_map;
>>   		cxl_probe_device_regs(host, base, dev_map, caps);
>> -		if (!dev_map->status.valid || !dev_map->mbox.valid ||
>> -		    !dev_map->memdev.valid) {
>> -			dev_err(host, "registers not found: %s%s%s\n",
>> -				!dev_map->status.valid ? "status " : "",
>> -				!dev_map->mbox.valid ? "mbox " : "",
>> -				!dev_map->memdev.valid ? "memdev " : "");
>> -			return -ENXIO;
>> -		}
>> -
>>   		dev_dbg(host, "Probing device registers...\n");
>>   		break;
>>   	default:
>> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
>> index 58f325019886..bec660357eec 100644
>> --- a/drivers/cxl/pci.c
>> +++ b/drivers/cxl/pci.c
>> @@ -796,6 +796,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>   	struct cxl_register_map map;
>>   	struct cxl_memdev *cxlmd;
>>   	int i, rc, pmu_count;
>> +	u32 expected, found;
>>   	bool irq_avail;
>>   	u16 dvsec;
>>   
>> @@ -852,6 +853,17 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>   	if (rc)
>>   		dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
>>   
>> +	/* These are the mandatory capabilities for a Type3 device */
>> +	expected = BIT(CXL_DEV_CAP_HDM) | BIT(CXL_DEV_CAP_DEV_STATUS) |
>> +		   BIT(CXL_DEV_CAP_MAILBOX_PRIMARY) | BIT(CXL_DEV_CAP_MEMDEV);
>> +
>> +	if (!cxl_pci_check_caps(cxlds, expected, &found)) {
>> +		dev_err(&pdev->dev,
>> +			"Expected capabilities not matching with found capabilities: (%08x - %08x)\n",
>> +			expected, found);
>> +		return -ENXIO;
>> +	}
>> +
>>   	rc = cxl_await_media_ready(cxlds);
>>   	if (rc == 0)
>>   		cxlds->media_ready = true;
>> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
>> index 930b1b9c1d6a..4a57bf60403d 100644
>> --- a/include/linux/cxl/cxl.h
>> +++ b/include/linux/cxl/cxl.h
>> @@ -48,4 +48,6 @@ void cxl_set_dvsec(struct cxl_dev_state *cxlds, u16 dvsec);
>>   void cxl_set_serial(struct cxl_dev_state *cxlds, u64 serial);
>>   int cxl_set_resource(struct cxl_dev_state *cxlds, struct resource res,
>>   		     enum cxl_resource);
>> +bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
>> +			u32 *current_caps);
>>   #endif
Dave Jiang Sept. 16, 2024, 4:11 p.m. UTC | #8
On 9/16/24 1:56 AM, Alejandro Lucero Palau wrote:
> 
> On 9/12/24 00:06, Dave Jiang wrote:
>>
>> On 9/7/24 1:18 AM, alejandro.lucero-palau@amd.com wrote:
>>> From: Alejandro Lucero <alucerop@amd.com>
>>>
>>> During CXL device initialization supported capabilities by the device
>>> are discovered. Type3 and Type2 devices have different mandatory
>>> capabilities and a Type2 expects a specific set including optional
>>> capabilities.
>>>
>>> Add a function for checking expected capabilities against those found
>>> during initialization.
>>>
>>> Rely on this function for validating capabilities instead of when CXL
>>> regs are probed.
>>>
>>> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
>>> ---
>>>   drivers/cxl/core/pci.c  | 17 +++++++++++++++++
>>>   drivers/cxl/core/regs.c |  9 ---------
>>>   drivers/cxl/pci.c       | 12 ++++++++++++
>>>   include/linux/cxl/cxl.h |  2 ++
>>>   4 files changed, 31 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
>>> index 3d6564dbda57..57370d9beb32 100644
>>> --- a/drivers/cxl/core/pci.c
>>> +++ b/drivers/cxl/core/pci.c
>>> @@ -7,6 +7,7 @@
>>>   #include <linux/pci.h>
>>>   #include <linux/pci-doe.h>
>>>   #include <linux/aer.h>
>>> +#include <linux/cxl/cxl.h>
>>>   #include <linux/cxl/pci.h>
>>>   #include <cxlpci.h>
>>>   #include <cxlmem.h>
>>> @@ -1077,3 +1078,19 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port)
>>>                        __cxl_endpoint_decoder_reset_detected);
>>>   }
>>>   EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, CXL);
>>> +
>>> +bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
>>> +            u32 *current_caps)
>>> +{
>>> +    if (current_caps)
>>> +        *current_caps = cxlds->capabilities;
>>> +
>>> +    dev_dbg(cxlds->dev, "Checking cxlds caps 0x%08x vs expected caps 0x%08x\n",
>>> +        cxlds->capabilities, expected_caps);
>>> +
>>> +    if ((cxlds->capabilities & expected_caps) != expected_caps)
>>> +        return false;
>>> +
>>> +    return true;
>>
>> I think you can just do
>> return (cxlds->capabilities & expected_caps) == expected_caps;
> 
> 
> Yes. I'll do.
> 
> 
>>> +}
>>> +EXPORT_SYMBOL_NS_GPL(cxl_pci_check_caps, CXL);
>>> diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
>>> index 8b8abcadcb93..35f6dc97be6e 100644
>>> --- a/drivers/cxl/core/regs.c
>>> +++ b/drivers/cxl/core/regs.c
>>> @@ -443,15 +443,6 @@ static int cxl_probe_regs(struct cxl_register_map *map, u32 *caps)
>>>       case CXL_REGLOC_RBI_MEMDEV:
>>>           dev_map = &map->device_map;
>>>           cxl_probe_device_regs(host, base, dev_map, caps);
>>> -        if (!dev_map->status.valid || !dev_map->mbox.valid ||
>>> -            !dev_map->memdev.valid) {
>>> -            dev_err(host, "registers not found: %s%s%s\n",
>>> -                !dev_map->status.valid ? "status " : "",
>>> -                !dev_map->mbox.valid ? "mbox " : "",
>>> -                !dev_map->memdev.valid ? "memdev " : "");
>>> -            return -ENXIO;
>>> -        }
>>> -
>>>           dev_dbg(host, "Probing device registers...\n");
>>>           break;
>>>       default:
>>> diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
>>> index 58f325019886..bec660357eec 100644
>>> --- a/drivers/cxl/pci.c
>>> +++ b/drivers/cxl/pci.c
>>> @@ -796,6 +796,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>>       struct cxl_register_map map;
>>>       struct cxl_memdev *cxlmd;
>>>       int i, rc, pmu_count;
>>> +    u32 expected, found;
>>>       bool irq_avail;
>>>       u16 dvsec;
>>>   @@ -852,6 +853,17 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>>>       if (rc)
>>>           dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
>>>   +    /* These are the mandatory capabilities for a Type3 device */
>>> +    expected = BIT(CXL_DEV_CAP_HDM) | BIT(CXL_DEV_CAP_DEV_STATUS) |
>>> +           BIT(CXL_DEV_CAP_MAILBOX_PRIMARY) | BIT(CXL_DEV_CAP_MEMDEV);
>> Maybe we can create a static mask for the expected mandatory type3 caps.
>>
>> I also wonder if cxl_pci_check_caps() can key on pci device type or class type and know which expected mask to use and no need to pass in the expected mask. Or since the driver is being attached to a certain device type, it should know what it is already, maybe we can attach static driver data to it that can be retrieved as the expected_caps:
>>
>> struct cxl_driver_data {
>>     u32 expected_caps;
>> };
>>
>> static struct cxl_driver_data cxl_driver_data = {
>>     .expected_caps = BIT(CXL_DEV_CAP_HDM) | BIT(CXL_DEV_CAP_DEV_STATUS) |
>>              BIT(CXL_DEV_CAP_MAILBOX_PRIMARY) | BIT(CXL_DEV_CAP_MEMDEV),
>> };
>>
>> static const struct pci_device_id cxl_mem_pci_tbl[] = {
>> /* Maybe need a new PCI_DEVICE_CLASS_DATA() macro */
>>     {
>>         .class = (PCI_CLASS_MEMORY_CXL << 8  | CXL_MEMORY_PROGIF),
>>         .class_mask = ~0,
>>         .vendor = PCI_ANY_ID,
>>         .device = PCI_ANY_ID,
>>         .sub_vendor = PCI_ANY_ID,
>>         .subdevice = PCI_ANY_ID,
>>         .driver_data = (kernel_ulong_t)&cxl_driver_data,
>>     },
>>     {},
>> };
>> MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
>>
>> static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>> {
>>     struct cxl_driver_data *data = (struct cxl_driver_data *)id->driver_data;
>>
>>     rc = cxl_pci_check_caps(cxlds, data->expected_caps, &found);
>> ....
>> }
> 
> 
> I do not think this requires so much plumbing, but maybe using a macro for type3 mandatory caps makes sense.

ok sure.
> 
> About accel drivers, I'm not sure a macro is suitable because, I think, when hopefully CXL designs become mainstream, the caps will likely be needed to be extracted from the device somehow, other than what the CXL regs tell themselves, just for the sake of double checking. Thinking here of same base device with some changes regarding CXL in different device models/flavors.

I can see type2 have very different mandatory bits depending on the device. Maybe there's a small set of must haves for all.

> 
> 
>>> +
>>> +    if (!cxl_pci_check_caps(cxlds, expected, &found)) {
>>> +        dev_err(&pdev->dev,
>>> +            "Expected capabilities not matching with found capabilities: (%08x - %08x)\n",
>>> +            expected, found);
>>> +        return -ENXIO;
>>> +    }
>>> +
>>>       rc = cxl_await_media_ready(cxlds);
>>>       if (rc == 0)
>>>           cxlds->media_ready = true;
>>> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
>>> index 930b1b9c1d6a..4a57bf60403d 100644
>>> --- a/include/linux/cxl/cxl.h
>>> +++ b/include/linux/cxl/cxl.h
>>> @@ -48,4 +48,6 @@ void cxl_set_dvsec(struct cxl_dev_state *cxlds, u16 dvsec);
>>>   void cxl_set_serial(struct cxl_dev_state *cxlds, u64 serial);
>>>   int cxl_set_resource(struct cxl_dev_state *cxlds, struct resource res,
>>>                enum cxl_resource);
>>> +bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
>>> +            u32 *current_caps);
>>>   #endif
>
diff mbox series

Patch

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 3d6564dbda57..57370d9beb32 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -7,6 +7,7 @@ 
 #include <linux/pci.h>
 #include <linux/pci-doe.h>
 #include <linux/aer.h>
+#include <linux/cxl/cxl.h>
 #include <linux/cxl/pci.h>
 #include <cxlpci.h>
 #include <cxlmem.h>
@@ -1077,3 +1078,19 @@  bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port)
 				     __cxl_endpoint_decoder_reset_detected);
 }
 EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, CXL);
+
+bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
+			u32 *current_caps)
+{
+	if (current_caps)
+		*current_caps = cxlds->capabilities;
+
+	dev_dbg(cxlds->dev, "Checking cxlds caps 0x%08x vs expected caps 0x%08x\n",
+		cxlds->capabilities, expected_caps);
+
+	if ((cxlds->capabilities & expected_caps) != expected_caps)
+		return false;
+
+	return true;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_pci_check_caps, CXL);
diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
index 8b8abcadcb93..35f6dc97be6e 100644
--- a/drivers/cxl/core/regs.c
+++ b/drivers/cxl/core/regs.c
@@ -443,15 +443,6 @@  static int cxl_probe_regs(struct cxl_register_map *map, u32 *caps)
 	case CXL_REGLOC_RBI_MEMDEV:
 		dev_map = &map->device_map;
 		cxl_probe_device_regs(host, base, dev_map, caps);
-		if (!dev_map->status.valid || !dev_map->mbox.valid ||
-		    !dev_map->memdev.valid) {
-			dev_err(host, "registers not found: %s%s%s\n",
-				!dev_map->status.valid ? "status " : "",
-				!dev_map->mbox.valid ? "mbox " : "",
-				!dev_map->memdev.valid ? "memdev " : "");
-			return -ENXIO;
-		}
-
 		dev_dbg(host, "Probing device registers...\n");
 		break;
 	default:
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 58f325019886..bec660357eec 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -796,6 +796,7 @@  static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	struct cxl_register_map map;
 	struct cxl_memdev *cxlmd;
 	int i, rc, pmu_count;
+	u32 expected, found;
 	bool irq_avail;
 	u16 dvsec;
 
@@ -852,6 +853,17 @@  static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (rc)
 		dev_dbg(&pdev->dev, "Failed to map RAS capability.\n");
 
+	/* These are the mandatory capabilities for a Type3 device */
+	expected = BIT(CXL_DEV_CAP_HDM) | BIT(CXL_DEV_CAP_DEV_STATUS) |
+		   BIT(CXL_DEV_CAP_MAILBOX_PRIMARY) | BIT(CXL_DEV_CAP_MEMDEV);
+
+	if (!cxl_pci_check_caps(cxlds, expected, &found)) {
+		dev_err(&pdev->dev,
+			"Expected capabilities not matching with found capabilities: (%08x - %08x)\n",
+			expected, found);
+		return -ENXIO;
+	}
+
 	rc = cxl_await_media_ready(cxlds);
 	if (rc == 0)
 		cxlds->media_ready = true;
diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
index 930b1b9c1d6a..4a57bf60403d 100644
--- a/include/linux/cxl/cxl.h
+++ b/include/linux/cxl/cxl.h
@@ -48,4 +48,6 @@  void cxl_set_dvsec(struct cxl_dev_state *cxlds, u16 dvsec);
 void cxl_set_serial(struct cxl_dev_state *cxlds, u64 serial);
 int cxl_set_resource(struct cxl_dev_state *cxlds, struct resource res,
 		     enum cxl_resource);
+bool cxl_pci_check_caps(struct cxl_dev_state *cxlds, u32 expected_caps,
+			u32 *current_caps);
 #endif