diff mbox series

[v4,22/26] cxl: allow region creation by type2 drivers

Message ID 20241017165225.21206-23-alejandro.lucero-palau@amd.com (mailing list archive)
State Not Applicable
Headers show
Series cxl: add Type2 device support | expand

Checks

Context Check Description
netdev/tree_selection success Guessing tree name failed - patch did not apply

Commit Message

Lucero Palau, Alejandro Oct. 17, 2024, 4:52 p.m. UTC
From: Alejandro Lucero <alucerop@amd.com>

Creating a CXL region requires userspace intervention through the cxl
sysfs files. Type2 support should allow accelerator drivers to create
such cxl region from kernel code.

Adding that functionality and integrating it with current support for
memory expanders.

Based on https://lore.kernel.org/linux-cxl/168592159835.1948938.1647215579839222774.stgit@dwillia2-xfh.jf.intel.com/

Signed-off-by: Alejandro Lucero <alucerop@amd.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/region.c | 147 ++++++++++++++++++++++++++++++++++----
 drivers/cxl/cxlmem.h      |   2 +
 include/linux/cxl/cxl.h   |   4 ++
 3 files changed, 138 insertions(+), 15 deletions(-)

Comments

Ben Cheatham Oct. 17, 2024, 9:49 p.m. UTC | #1
On 10/17/24 11:52 AM, alejandro.lucero-palau@amd.com wrote:
> From: Alejandro Lucero <alucerop@amd.com>
> 
> Creating a CXL region requires userspace intervention through the cxl
> sysfs files. Type2 support should allow accelerator drivers to create
> such cxl region from kernel code.
> 
> Adding that functionality and integrating it with current support for
> memory expanders.
> 
> Based on https://lore.kernel.org/linux-cxl/168592159835.1948938.1647215579839222774.stgit@dwillia2-xfh.jf.intel.com/
> 

So I ran into an issue at this point when using v3 as a base for my own testing. The problem is that
you are doing manual region management while not explicitly preventing auto region discovery when
devm_cxl_add_memdev() is called (patch 14/26 in this series). This caused some resource allocation
conflicts which then caused both the auto region and the manual region set up to fail. To make it more
concrete, here's the flow I encountered (I tried something new here, let me know if the ascii
is all mangled):

devm_cxl_add_memdev() is called                                                                         
│                                                                                                       
├───► cxl_mem probes new memdev                                                                         
│     │                                                                                                 
│     ├─► cxl_mem probe adds new endpoint port                                                          
│     │                                                                                                 
│     └─► cxl_mem probe finishes                                                                        
├───────────────────────────────────────────────► Manual region set up starts (finding free space, etc.)
├───► cxl_port probes the new endpoint port            │                                                
│     │                                                │                                                
│     ├─► cxl_port probe sets up new endpoint          ├─► create_new_region() is called                
│     │                                                │                                                
│     ├─► cxl_port calls discover_region()             │                                                
│     │                                                │                                                
│     ├─► discover_region() creates new auto           ├─► create_new_region() creates
│     │   discoveredregion                             │   new manual region                                          
│◄────◄────────────────────────────────────────────────┘                                                
│                                                                                                       
└─► Region creation fails due to resource contention/race (DPA resource, RAM resource, etc.)

The timeline is a little off here I think, but it should be close enough to illustrate the point.
The easy solution here to not allow auto region discovery for CXL type 2 devices, like so:

diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index 22a9ba89cf5a..07b991e2c05b 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -34,6 +34,7 @@ static void schedule_detach(void *cxlmd)
 static int discover_region(struct device *dev, void *root)
 {
        struct cxl_endpoint_decoder *cxled;
+       struct cxl_memdev *cxlmd;
        int rc;

        dev_err(dev, "%s:%d: Enter\n", __func__, __LINE__);
@@ -45,7 +46,9 @@ static int discover_region(struct device *dev, void *root)
        if ((cxled->cxld.flags & CXL_DECODER_F_ENABLE) == 0)
                return 0;

-       if (cxled->state != CXL_DECODER_STATE_AUTO)
+       cxlmd = cxled_to_memdev(cxled);
+       if (cxled->state != CXL_DECODER_STATE_AUTO ||
+           cxlmd->cxlds->type == CXL_DEVTYPE_DEVMEM)
                return 0;

I think there's a better way to go about this, more to say about it in patch 24/26. I've
dropped this here just in case you don't like my ideas there ;).
                                                                    
> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
>  drivers/cxl/core/region.c | 147 ++++++++++++++++++++++++++++++++++----
>  drivers/cxl/cxlmem.h      |   2 +
>  include/linux/cxl/cxl.h   |   4 ++
>  3 files changed, 138 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index d08a2a848ac9..04c270a29e96 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -2253,6 +2253,18 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
>  	return rc;
>  }
>  
> +int cxl_accel_region_detach(struct cxl_endpoint_decoder *cxled)
> +{
> +	int rc;
> +
> +	down_write(&cxl_region_rwsem);
> +	cxled->mode = CXL_DECODER_DEAD;
> +	rc = cxl_region_detach(cxled);
> +	up_write(&cxl_region_rwsem);
> +	return rc;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_accel_region_detach, CXL);
> +
>  void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
>  {
>  	down_write(&cxl_region_rwsem);
> @@ -2781,6 +2793,14 @@ cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
>  	return to_cxl_region(region_dev);
>  }
>  
> +static void drop_region(struct cxl_region *cxlr)
> +{
> +	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
> +	struct cxl_port *port = cxlrd_to_port(cxlrd);
> +
> +	devm_release_action(port->uport_dev, unregister_region, cxlr);
> +}
> +
>  static ssize_t delete_region_store(struct device *dev,
>  				   struct device_attribute *attr,
>  				   const char *buf, size_t len)
> @@ -3386,17 +3406,18 @@ static int match_region_by_range(struct device *dev, void *data)
>  	return rc;
>  }
>  
> -/* Establish an empty region covering the given HPA range */
> -static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
> -					   struct cxl_endpoint_decoder *cxled)
> +static void construct_region_end(void)
> +{
> +	up_write(&cxl_region_rwsem);
> +}
> +
> +static struct cxl_region *construct_region_begin(struct cxl_root_decoder *cxlrd,
> +						 struct cxl_endpoint_decoder *cxled)
>  {
>  	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> -	struct cxl_port *port = cxlrd_to_port(cxlrd);
> -	struct range *hpa = &cxled->cxld.hpa_range;
>  	struct cxl_region_params *p;
>  	struct cxl_region *cxlr;
> -	struct resource *res;
> -	int rc;
> +	int err;
>  
>  	do {
>  		cxlr = __create_region(cxlrd, cxled->mode,
> @@ -3405,8 +3426,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>  	} while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
>  
>  	if (IS_ERR(cxlr)) {
> -		dev_err(cxlmd->dev.parent,
> -			"%s:%s: %s failed assign region: %ld\n",
> +		dev_err(cxlmd->dev.parent, "%s:%s: %s failed assign region: %ld\n",
>  			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
>  			__func__, PTR_ERR(cxlr));
>  		return cxlr;
> @@ -3416,13 +3436,33 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>  	p = &cxlr->params;
>  	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
>  		dev_err(cxlmd->dev.parent,
> -			"%s:%s: %s autodiscovery interrupted\n",
> +			"%s:%s: %s region setup interrupted\n",
>  			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
>  			__func__);
> -		rc = -EBUSY;
> -		goto err;
> +		err = -EBUSY;
> +		construct_region_end();
> +		drop_region(cxlr);
> +		return ERR_PTR(err);
>  	}
>  
> +	return cxlr;
> +}
> +
> +/* Establish an empty region covering the given HPA range */
> +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
> +					   struct cxl_endpoint_decoder *cxled)
> +{
> +	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> +	struct range *hpa = &cxled->cxld.hpa_range;
> +	struct cxl_region_params *p;
> +	struct cxl_region *cxlr;
> +	struct resource *res;
> +	int rc;
> +
> +	cxlr = construct_region_begin(cxlrd, cxled);
> +	if (IS_ERR(cxlr))
> +		return cxlr;
> +
>  	set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
>  
>  	res = kmalloc(sizeof(*res), GFP_KERNEL);
> @@ -3445,6 +3485,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>  			 __func__, dev_name(&cxlr->dev));
>  	}
>  
> +	p = &cxlr->params;
>  	p->res = res;
>  	p->interleave_ways = cxled->cxld.interleave_ways;
>  	p->interleave_granularity = cxled->cxld.interleave_granularity;
> @@ -3462,15 +3503,91 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>  	/* ...to match put_device() in cxl_add_to_region() */
>  	get_device(&cxlr->dev);
>  	up_write(&cxl_region_rwsem);
> -
> +	construct_region_end();
>  	return cxlr;
>  
>  err:
> -	up_write(&cxl_region_rwsem);
> -	devm_release_action(port->uport_dev, unregister_region, cxlr);
> +	construct_region_end();
> +	drop_region(cxlr);
> +	return ERR_PTR(rc);
> +}
> +
> +static struct cxl_region *
> +__construct_new_region(struct cxl_root_decoder *cxlrd,
> +		       struct cxl_endpoint_decoder *cxled)
> +{
> +	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
> +	struct cxl_region_params *p;
> +	struct cxl_region *cxlr;
> +	int rc;
> +
> +	cxlr = construct_region_begin(cxlrd, cxled);
> +	if (IS_ERR(cxlr))
> +		return cxlr;
> +
> +	rc = set_interleave_ways(cxlr, 1);
> +	if (rc)
> +		goto err;
> +
> +	rc = set_interleave_granularity(cxlr, cxld->interleave_granularity);
> +	if (rc)
> +		goto err;
> +
> +	rc = alloc_hpa(cxlr, resource_size(cxled->dpa_res));
> +	if (rc)
> +		goto err;
> +
> +	down_read(&cxl_dpa_rwsem);
> +	rc = cxl_region_attach(cxlr, cxled, 0);
> +	up_read(&cxl_dpa_rwsem);
> +
> +	if (rc)
> +		goto err;
> +
> +	rc = cxl_region_decode_commit(cxlr);
> +	if (rc)
> +		goto err;
> +
> +	p = &cxlr->params;
> +	p->state = CXL_CONFIG_COMMIT;
> +
> +	construct_region_end();
> +	return cxlr;
> +err:
> +	construct_region_end();
> +	drop_region(cxlr);
>  	return ERR_PTR(rc);
>  }
>  
> +/**
> + * cxl_create_region - Establish a region given an endpoint decoder
> + * @cxlrd: root decoder to allocate HPA
> + * @cxled: endpoint decoder with reserved DPA capacity
> + *
> + * Returns a fully formed region in the commit state and attached to the
> + * cxl_region driver.
> + */
> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> +				     struct cxl_endpoint_decoder *cxled)
> +{
> +	struct cxl_region *cxlr;
> +
> +	mutex_lock(&cxlrd->range_lock);
> +	cxlr = __construct_new_region(cxlrd, cxled);
> +	mutex_unlock(&cxlrd->range_lock);
> +
> +	if (IS_ERR(cxlr))
> +		return cxlr;
> +
> +	if (device_attach(&cxlr->dev) <= 0) {
> +		dev_err(&cxlr->dev, "failed to create region\n");
> +		drop_region(cxlr);
> +		return ERR_PTR(-ENODEV);
> +	}
> +	return cxlr;
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_create_region, CXL);
> +
>  int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
>  {
>  	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 68d28eab3696..0f5c71909fd1 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -875,4 +875,6 @@ struct cxl_hdm {
>  struct seq_file;
>  struct dentry *cxl_debugfs_create_dir(const char *dir);
>  void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds);
> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> +				     struct cxl_endpoint_decoder *cxled);
>  #endif /* __CXL_MEM_H__ */
> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
> index 45b6badb8048..c544339c2baf 100644
> --- a/include/linux/cxl/cxl.h
> +++ b/include/linux/cxl/cxl.h
> @@ -72,4 +72,8 @@ struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd,
>  					     resource_size_t min,
>  					     resource_size_t max);
>  int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
> +				     struct cxl_endpoint_decoder *cxled);
> +
> +int cxl_accel_region_detach(struct cxl_endpoint_decoder *cxled);
>  #endif
Alejandro Lucero Palau Oct. 18, 2024, 8:51 a.m. UTC | #2
On 10/17/24 22:49, Ben Cheatham wrote:
> On 10/17/24 11:52 AM, alejandro.lucero-palau@amd.com wrote:
>> From: Alejandro Lucero <alucerop@amd.com>
>>
>> Creating a CXL region requires userspace intervention through the cxl
>> sysfs files. Type2 support should allow accelerator drivers to create
>> such cxl region from kernel code.
>>
>> Adding that functionality and integrating it with current support for
>> memory expanders.
>>
>> Based on https://lore.kernel.org/linux-cxl/168592159835.1948938.1647215579839222774.stgit@dwillia2-xfh.jf.intel.com/
>>
> So I ran into an issue at this point when using v3 as a base for my own testing. The problem is that
> you are doing manual region management while not explicitly preventing auto region discovery when
> devm_cxl_add_memdev() is called (patch 14/26 in this series). This caused some resource allocation
> conflicts which then caused both the auto region and the manual region set up to fail. To make it more
> concrete, here's the flow I encountered (I tried something new here, let me know if the ascii
> is all mangled):
>
> devm_cxl_add_memdev() is called
> │
> ├───► cxl_mem probes new memdev
> │     │
> │     ├─► cxl_mem probe adds new endpoint port
> │     │
> │     └─► cxl_mem probe finishes
> ├───────────────────────────────────────────────► Manual region set up starts (finding free space, etc.)
> ├───► cxl_port probes the new endpoint port            │
> │     │                                                │
> │     ├─► cxl_port probe sets up new endpoint          ├─► create_new_region() is called
> │     │                                                │
> │     ├─► cxl_port calls discover_region()             │
> │     │                                                │
> │     ├─► discover_region() creates new auto           ├─► create_new_region() creates
> │     │   discoveredregion                             │   new manual region
> │◄────◄────────────────────────────────────────────────┘
> │
> └─► Region creation fails due to resource contention/race (DPA resource, RAM resource, etc.)
>
> The timeline is a little off here I think, but it should be close enough to illustrate the point.


Interesting.


I'm aware of that code path when endpoint port is probed, but it is not 
a problem with my testing because the decoder is not enabled at the time 
of discover_region.


I've tested this with two different emulated devices, one a dumb qemu 
type2 device with a driver doing nothing but cxl initialization, and 
another being our network device with CXL support and using RTL 
emulation, and in both cases the decoder is not enabled at that point, 
which makes sense since, AFAIK, it is at region creation/attachment when 
the decoder is committed/enabled. So my obvious question is how are you 
testing this functionality? It seems as if you could have been creating 
more than one region somehow, or maybe something I'm just missing about 
this.


> The easy solution here to not allow auto region discovery for CXL type 2 devices, like so:
>
> diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
> index 22a9ba89cf5a..07b991e2c05b 100644
> --- a/drivers/cxl/port.c
> +++ b/drivers/cxl/port.c
> @@ -34,6 +34,7 @@ static void schedule_detach(void *cxlmd)
>   static int discover_region(struct device *dev, void *root)
>   {
>          struct cxl_endpoint_decoder *cxled;
> +       struct cxl_memdev *cxlmd;
>          int rc;
>
>          dev_err(dev, "%s:%d: Enter\n", __func__, __LINE__);
> @@ -45,7 +46,9 @@ static int discover_region(struct device *dev, void *root)
>          if ((cxled->cxld.flags & CXL_DECODER_F_ENABLE) == 0)
>                  return 0;
>
> -       if (cxled->state != CXL_DECODER_STATE_AUTO)
> +       cxlmd = cxled_to_memdev(cxled);
> +       if (cxled->state != CXL_DECODER_STATE_AUTO ||
> +           cxlmd->cxlds->type == CXL_DEVTYPE_DEVMEM)
>                  return 0;
>
> I think there's a better way to go about this, more to say about it in patch 24/26. I've
> dropped this here just in case you don't like my ideas there ;).
>                                                                      
>> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>> ---
>>   drivers/cxl/core/region.c | 147 ++++++++++++++++++++++++++++++++++----
>>   drivers/cxl/cxlmem.h      |   2 +
>>   include/linux/cxl/cxl.h   |   4 ++
>>   3 files changed, 138 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
>> index d08a2a848ac9..04c270a29e96 100644
>> --- a/drivers/cxl/core/region.c
>> +++ b/drivers/cxl/core/region.c
>> @@ -2253,6 +2253,18 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
>>   	return rc;
>>   }
>>   
>> +int cxl_accel_region_detach(struct cxl_endpoint_decoder *cxled)
>> +{
>> +	int rc;
>> +
>> +	down_write(&cxl_region_rwsem);
>> +	cxled->mode = CXL_DECODER_DEAD;
>> +	rc = cxl_region_detach(cxled);
>> +	up_write(&cxl_region_rwsem);
>> +	return rc;
>> +}
>> +EXPORT_SYMBOL_NS_GPL(cxl_accel_region_detach, CXL);
>> +
>>   void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
>>   {
>>   	down_write(&cxl_region_rwsem);
>> @@ -2781,6 +2793,14 @@ cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
>>   	return to_cxl_region(region_dev);
>>   }
>>   
>> +static void drop_region(struct cxl_region *cxlr)
>> +{
>> +	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
>> +	struct cxl_port *port = cxlrd_to_port(cxlrd);
>> +
>> +	devm_release_action(port->uport_dev, unregister_region, cxlr);
>> +}
>> +
>>   static ssize_t delete_region_store(struct device *dev,
>>   				   struct device_attribute *attr,
>>   				   const char *buf, size_t len)
>> @@ -3386,17 +3406,18 @@ static int match_region_by_range(struct device *dev, void *data)
>>   	return rc;
>>   }
>>   
>> -/* Establish an empty region covering the given HPA range */
>> -static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>> -					   struct cxl_endpoint_decoder *cxled)
>> +static void construct_region_end(void)
>> +{
>> +	up_write(&cxl_region_rwsem);
>> +}
>> +
>> +static struct cxl_region *construct_region_begin(struct cxl_root_decoder *cxlrd,
>> +						 struct cxl_endpoint_decoder *cxled)
>>   {
>>   	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>> -	struct cxl_port *port = cxlrd_to_port(cxlrd);
>> -	struct range *hpa = &cxled->cxld.hpa_range;
>>   	struct cxl_region_params *p;
>>   	struct cxl_region *cxlr;
>> -	struct resource *res;
>> -	int rc;
>> +	int err;
>>   
>>   	do {
>>   		cxlr = __create_region(cxlrd, cxled->mode,
>> @@ -3405,8 +3426,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>   	} while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
>>   
>>   	if (IS_ERR(cxlr)) {
>> -		dev_err(cxlmd->dev.parent,
>> -			"%s:%s: %s failed assign region: %ld\n",
>> +		dev_err(cxlmd->dev.parent, "%s:%s: %s failed assign region: %ld\n",
>>   			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
>>   			__func__, PTR_ERR(cxlr));
>>   		return cxlr;
>> @@ -3416,13 +3436,33 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>   	p = &cxlr->params;
>>   	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
>>   		dev_err(cxlmd->dev.parent,
>> -			"%s:%s: %s autodiscovery interrupted\n",
>> +			"%s:%s: %s region setup interrupted\n",
>>   			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
>>   			__func__);
>> -		rc = -EBUSY;
>> -		goto err;
>> +		err = -EBUSY;
>> +		construct_region_end();
>> +		drop_region(cxlr);
>> +		return ERR_PTR(err);
>>   	}
>>   
>> +	return cxlr;
>> +}
>> +
>> +/* Establish an empty region covering the given HPA range */
>> +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>> +					   struct cxl_endpoint_decoder *cxled)
>> +{
>> +	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>> +	struct range *hpa = &cxled->cxld.hpa_range;
>> +	struct cxl_region_params *p;
>> +	struct cxl_region *cxlr;
>> +	struct resource *res;
>> +	int rc;
>> +
>> +	cxlr = construct_region_begin(cxlrd, cxled);
>> +	if (IS_ERR(cxlr))
>> +		return cxlr;
>> +
>>   	set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
>>   
>>   	res = kmalloc(sizeof(*res), GFP_KERNEL);
>> @@ -3445,6 +3485,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>   			 __func__, dev_name(&cxlr->dev));
>>   	}
>>   
>> +	p = &cxlr->params;
>>   	p->res = res;
>>   	p->interleave_ways = cxled->cxld.interleave_ways;
>>   	p->interleave_granularity = cxled->cxld.interleave_granularity;
>> @@ -3462,15 +3503,91 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>   	/* ...to match put_device() in cxl_add_to_region() */
>>   	get_device(&cxlr->dev);
>>   	up_write(&cxl_region_rwsem);
>> -
>> +	construct_region_end();
>>   	return cxlr;
>>   
>>   err:
>> -	up_write(&cxl_region_rwsem);
>> -	devm_release_action(port->uport_dev, unregister_region, cxlr);
>> +	construct_region_end();
>> +	drop_region(cxlr);
>> +	return ERR_PTR(rc);
>> +}
>> +
>> +static struct cxl_region *
>> +__construct_new_region(struct cxl_root_decoder *cxlrd,
>> +		       struct cxl_endpoint_decoder *cxled)
>> +{
>> +	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
>> +	struct cxl_region_params *p;
>> +	struct cxl_region *cxlr;
>> +	int rc;
>> +
>> +	cxlr = construct_region_begin(cxlrd, cxled);
>> +	if (IS_ERR(cxlr))
>> +		return cxlr;
>> +
>> +	rc = set_interleave_ways(cxlr, 1);
>> +	if (rc)
>> +		goto err;
>> +
>> +	rc = set_interleave_granularity(cxlr, cxld->interleave_granularity);
>> +	if (rc)
>> +		goto err;
>> +
>> +	rc = alloc_hpa(cxlr, resource_size(cxled->dpa_res));
>> +	if (rc)
>> +		goto err;
>> +
>> +	down_read(&cxl_dpa_rwsem);
>> +	rc = cxl_region_attach(cxlr, cxled, 0);
>> +	up_read(&cxl_dpa_rwsem);
>> +
>> +	if (rc)
>> +		goto err;
>> +
>> +	rc = cxl_region_decode_commit(cxlr);
>> +	if (rc)
>> +		goto err;
>> +
>> +	p = &cxlr->params;
>> +	p->state = CXL_CONFIG_COMMIT;
>> +
>> +	construct_region_end();
>> +	return cxlr;
>> +err:
>> +	construct_region_end();
>> +	drop_region(cxlr);
>>   	return ERR_PTR(rc);
>>   }
>>   
>> +/**
>> + * cxl_create_region - Establish a region given an endpoint decoder
>> + * @cxlrd: root decoder to allocate HPA
>> + * @cxled: endpoint decoder with reserved DPA capacity
>> + *
>> + * Returns a fully formed region in the commit state and attached to the
>> + * cxl_region driver.
>> + */
>> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
>> +				     struct cxl_endpoint_decoder *cxled)
>> +{
>> +	struct cxl_region *cxlr;
>> +
>> +	mutex_lock(&cxlrd->range_lock);
>> +	cxlr = __construct_new_region(cxlrd, cxled);
>> +	mutex_unlock(&cxlrd->range_lock);
>> +
>> +	if (IS_ERR(cxlr))
>> +		return cxlr;
>> +
>> +	if (device_attach(&cxlr->dev) <= 0) {
>> +		dev_err(&cxlr->dev, "failed to create region\n");
>> +		drop_region(cxlr);
>> +		return ERR_PTR(-ENODEV);
>> +	}
>> +	return cxlr;
>> +}
>> +EXPORT_SYMBOL_NS_GPL(cxl_create_region, CXL);
>> +
>>   int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
>>   {
>>   	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
>> index 68d28eab3696..0f5c71909fd1 100644
>> --- a/drivers/cxl/cxlmem.h
>> +++ b/drivers/cxl/cxlmem.h
>> @@ -875,4 +875,6 @@ struct cxl_hdm {
>>   struct seq_file;
>>   struct dentry *cxl_debugfs_create_dir(const char *dir);
>>   void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds);
>> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
>> +				     struct cxl_endpoint_decoder *cxled);
>>   #endif /* __CXL_MEM_H__ */
>> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
>> index 45b6badb8048..c544339c2baf 100644
>> --- a/include/linux/cxl/cxl.h
>> +++ b/include/linux/cxl/cxl.h
>> @@ -72,4 +72,8 @@ struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd,
>>   					     resource_size_t min,
>>   					     resource_size_t max);
>>   int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
>> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
>> +				     struct cxl_endpoint_decoder *cxled);
>> +
>> +int cxl_accel_region_detach(struct cxl_endpoint_decoder *cxled);
>>   #endif
Ben Cheatham Oct. 18, 2024, 4:40 p.m. UTC | #3
On 10/18/24 3:51 AM, Alejandro Lucero Palau wrote:
> 
> On 10/17/24 22:49, Ben Cheatham wrote:
>> On 10/17/24 11:52 AM, alejandro.lucero-palau@amd.com wrote:
>>> From: Alejandro Lucero <alucerop@amd.com>
>>>
>>> Creating a CXL region requires userspace intervention through the cxl
>>> sysfs files. Type2 support should allow accelerator drivers to create
>>> such cxl region from kernel code.
>>>
>>> Adding that functionality and integrating it with current support for
>>> memory expanders.
>>>
>>> Based on https://lore.kernel.org/linux-cxl/168592159835.1948938.1647215579839222774.stgit@dwillia2-xfh.jf.intel.com/
>>>
>> So I ran into an issue at this point when using v3 as a base for my own testing. The problem is that
>> you are doing manual region management while not explicitly preventing auto region discovery when
>> devm_cxl_add_memdev() is called (patch 14/26 in this series). This caused some resource allocation
>> conflicts which then caused both the auto region and the manual region set up to fail. To make it more
>> concrete, here's the flow I encountered (I tried something new here, let me know if the ascii
>> is all mangled):
>>
>> devm_cxl_add_memdev() is called
>> │
>> ├───► cxl_mem probes new memdev
>> │     │
>> │     ├─► cxl_mem probe adds new endpoint port
>> │     │
>> │     └─► cxl_mem probe finishes
>> ├───────────────────────────────────────────────► Manual region set up starts (finding free space, etc.)
>> ├───► cxl_port probes the new endpoint port            │
>> │     │                                                │
>> │     ├─► cxl_port probe sets up new endpoint          ├─► create_new_region() is called
>> │     │                                                │
>> │     ├─► cxl_port calls discover_region()             │
>> │     │                                                │
>> │     ├─► discover_region() creates new auto           ├─► create_new_region() creates
>> │     │   discoveredregion                             │   new manual region
>> │◄────◄────────────────────────────────────────────────┘
>> │
>> └─► Region creation fails due to resource contention/race (DPA resource, RAM resource, etc.)
>>
>> The timeline is a little off here I think, but it should be close enough to illustrate the point.
> 
> 
> Interesting.
> 
> 
> I'm aware of that code path when endpoint port is probed, but it is not a problem with my testing because the decoder is not enabled at the time of discover_region.
> 
> 
> I've tested this with two different emulated devices, one a dumb qemu type2 device with a driver doing nothing but cxl initialization, and another being our network device with CXL support and using RTL emulation, and in both cases the decoder is not enabled at that point, which makes sense since, AFAIK, it is at region creation/attachment when the decoder is committed/enabled. So my obvious question is how are you testing this functionality? It seems as if you could have been creating more than one region somehow, or maybe something I'm just missing about this.
> 

I think the reason you aren't seeing this is that QEMU doesn't have regions programmed by firmware. In my setup
the decoders are coming up pre-programmed and enabled by firmware, so it is hitting the path during endpoint probe.

Thanks,
Ben

> 
>> The easy solution here to not allow auto region discovery for CXL type 2 devices, like so:
>>
>> diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
>> index 22a9ba89cf5a..07b991e2c05b 100644
>> --- a/drivers/cxl/port.c
>> +++ b/drivers/cxl/port.c
>> @@ -34,6 +34,7 @@ static void schedule_detach(void *cxlmd)
>>   static int discover_region(struct device *dev, void *root)
>>   {
>>          struct cxl_endpoint_decoder *cxled;
>> +       struct cxl_memdev *cxlmd;
>>          int rc;
>>
>>          dev_err(dev, "%s:%d: Enter\n", __func__, __LINE__);
>> @@ -45,7 +46,9 @@ static int discover_region(struct device *dev, void *root)
>>          if ((cxled->cxld.flags & CXL_DECODER_F_ENABLE) == 0)
>>                  return 0;
>>
>> -       if (cxled->state != CXL_DECODER_STATE_AUTO)
>> +       cxlmd = cxled_to_memdev(cxled);
>> +       if (cxled->state != CXL_DECODER_STATE_AUTO ||
>> +           cxlmd->cxlds->type == CXL_DEVTYPE_DEVMEM)
>>                  return 0;
>>
>> I think there's a better way to go about this, more to say about it in patch 24/26. I've
>> dropped this here just in case you don't like my ideas there ;).
>>                                                                     
>>> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
>>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>>> ---
>>>   drivers/cxl/core/region.c | 147 ++++++++++++++++++++++++++++++++++----
>>>   drivers/cxl/cxlmem.h      |   2 +
>>>   include/linux/cxl/cxl.h   |   4 ++
>>>   3 files changed, 138 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
>>> index d08a2a848ac9..04c270a29e96 100644
>>> --- a/drivers/cxl/core/region.c
>>> +++ b/drivers/cxl/core/region.c
>>> @@ -2253,6 +2253,18 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
>>>       return rc;
>>>   }
>>>   +int cxl_accel_region_detach(struct cxl_endpoint_decoder *cxled)
>>> +{
>>> +    int rc;
>>> +
>>> +    down_write(&cxl_region_rwsem);
>>> +    cxled->mode = CXL_DECODER_DEAD;
>>> +    rc = cxl_region_detach(cxled);
>>> +    up_write(&cxl_region_rwsem);
>>> +    return rc;
>>> +}
>>> +EXPORT_SYMBOL_NS_GPL(cxl_accel_region_detach, CXL);
>>> +
>>>   void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
>>>   {
>>>       down_write(&cxl_region_rwsem);
>>> @@ -2781,6 +2793,14 @@ cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
>>>       return to_cxl_region(region_dev);
>>>   }
>>>   +static void drop_region(struct cxl_region *cxlr)
>>> +{
>>> +    struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
>>> +    struct cxl_port *port = cxlrd_to_port(cxlrd);
>>> +
>>> +    devm_release_action(port->uport_dev, unregister_region, cxlr);
>>> +}
>>> +
>>>   static ssize_t delete_region_store(struct device *dev,
>>>                      struct device_attribute *attr,
>>>                      const char *buf, size_t len)
>>> @@ -3386,17 +3406,18 @@ static int match_region_by_range(struct device *dev, void *data)
>>>       return rc;
>>>   }
>>>   -/* Establish an empty region covering the given HPA range */
>>> -static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>> -                       struct cxl_endpoint_decoder *cxled)
>>> +static void construct_region_end(void)
>>> +{
>>> +    up_write(&cxl_region_rwsem);
>>> +}
>>> +
>>> +static struct cxl_region *construct_region_begin(struct cxl_root_decoder *cxlrd,
>>> +                         struct cxl_endpoint_decoder *cxled)
>>>   {
>>>       struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>>> -    struct cxl_port *port = cxlrd_to_port(cxlrd);
>>> -    struct range *hpa = &cxled->cxld.hpa_range;
>>>       struct cxl_region_params *p;
>>>       struct cxl_region *cxlr;
>>> -    struct resource *res;
>>> -    int rc;
>>> +    int err;
>>>         do {
>>>           cxlr = __create_region(cxlrd, cxled->mode,
>>> @@ -3405,8 +3426,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>>       } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
>>>         if (IS_ERR(cxlr)) {
>>> -        dev_err(cxlmd->dev.parent,
>>> -            "%s:%s: %s failed assign region: %ld\n",
>>> +        dev_err(cxlmd->dev.parent, "%s:%s: %s failed assign region: %ld\n",
>>>               dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
>>>               __func__, PTR_ERR(cxlr));
>>>           return cxlr;
>>> @@ -3416,13 +3436,33 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>>       p = &cxlr->params;
>>>       if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
>>>           dev_err(cxlmd->dev.parent,
>>> -            "%s:%s: %s autodiscovery interrupted\n",
>>> +            "%s:%s: %s region setup interrupted\n",
>>>               dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
>>>               __func__);
>>> -        rc = -EBUSY;
>>> -        goto err;
>>> +        err = -EBUSY;
>>> +        construct_region_end();
>>> +        drop_region(cxlr);
>>> +        return ERR_PTR(err);
>>>       }
>>>   +    return cxlr;
>>> +}
>>> +
>>> +/* Establish an empty region covering the given HPA range */
>>> +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>> +                       struct cxl_endpoint_decoder *cxled)
>>> +{
>>> +    struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>>> +    struct range *hpa = &cxled->cxld.hpa_range;
>>> +    struct cxl_region_params *p;
>>> +    struct cxl_region *cxlr;
>>> +    struct resource *res;
>>> +    int rc;
>>> +
>>> +    cxlr = construct_region_begin(cxlrd, cxled);
>>> +    if (IS_ERR(cxlr))
>>> +        return cxlr;
>>> +
>>>       set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
>>>         res = kmalloc(sizeof(*res), GFP_KERNEL);
>>> @@ -3445,6 +3485,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>>                __func__, dev_name(&cxlr->dev));
>>>       }
>>>   +    p = &cxlr->params;
>>>       p->res = res;
>>>       p->interleave_ways = cxled->cxld.interleave_ways;
>>>       p->interleave_granularity = cxled->cxld.interleave_granularity;
>>> @@ -3462,15 +3503,91 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>>       /* ...to match put_device() in cxl_add_to_region() */
>>>       get_device(&cxlr->dev);
>>>       up_write(&cxl_region_rwsem);
>>> -
>>> +    construct_region_end();
>>>       return cxlr;
>>>     err:
>>> -    up_write(&cxl_region_rwsem);
>>> -    devm_release_action(port->uport_dev, unregister_region, cxlr);
>>> +    construct_region_end();
>>> +    drop_region(cxlr);
>>> +    return ERR_PTR(rc);
>>> +}
>>> +
>>> +static struct cxl_region *
>>> +__construct_new_region(struct cxl_root_decoder *cxlrd,
>>> +               struct cxl_endpoint_decoder *cxled)
>>> +{
>>> +    struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
>>> +    struct cxl_region_params *p;
>>> +    struct cxl_region *cxlr;
>>> +    int rc;
>>> +
>>> +    cxlr = construct_region_begin(cxlrd, cxled);
>>> +    if (IS_ERR(cxlr))
>>> +        return cxlr;
>>> +
>>> +    rc = set_interleave_ways(cxlr, 1);
>>> +    if (rc)
>>> +        goto err;
>>> +
>>> +    rc = set_interleave_granularity(cxlr, cxld->interleave_granularity);
>>> +    if (rc)
>>> +        goto err;
>>> +
>>> +    rc = alloc_hpa(cxlr, resource_size(cxled->dpa_res));
>>> +    if (rc)
>>> +        goto err;
>>> +
>>> +    down_read(&cxl_dpa_rwsem);
>>> +    rc = cxl_region_attach(cxlr, cxled, 0);
>>> +    up_read(&cxl_dpa_rwsem);
>>> +
>>> +    if (rc)
>>> +        goto err;
>>> +
>>> +    rc = cxl_region_decode_commit(cxlr);
>>> +    if (rc)
>>> +        goto err;
>>> +
>>> +    p = &cxlr->params;
>>> +    p->state = CXL_CONFIG_COMMIT;
>>> +
>>> +    construct_region_end();
>>> +    return cxlr;
>>> +err:
>>> +    construct_region_end();
>>> +    drop_region(cxlr);
>>>       return ERR_PTR(rc);
>>>   }
>>>   +/**
>>> + * cxl_create_region - Establish a region given an endpoint decoder
>>> + * @cxlrd: root decoder to allocate HPA
>>> + * @cxled: endpoint decoder with reserved DPA capacity
>>> + *
>>> + * Returns a fully formed region in the commit state and attached to the
>>> + * cxl_region driver.
>>> + */
>>> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
>>> +                     struct cxl_endpoint_decoder *cxled)
>>> +{
>>> +    struct cxl_region *cxlr;
>>> +
>>> +    mutex_lock(&cxlrd->range_lock);
>>> +    cxlr = __construct_new_region(cxlrd, cxled);
>>> +    mutex_unlock(&cxlrd->range_lock);
>>> +
>>> +    if (IS_ERR(cxlr))
>>> +        return cxlr;
>>> +
>>> +    if (device_attach(&cxlr->dev) <= 0) {
>>> +        dev_err(&cxlr->dev, "failed to create region\n");
>>> +        drop_region(cxlr);
>>> +        return ERR_PTR(-ENODEV);
>>> +    }
>>> +    return cxlr;
>>> +}
>>> +EXPORT_SYMBOL_NS_GPL(cxl_create_region, CXL);
>>> +
>>>   int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
>>>   {
>>>       struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>>> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
>>> index 68d28eab3696..0f5c71909fd1 100644
>>> --- a/drivers/cxl/cxlmem.h
>>> +++ b/drivers/cxl/cxlmem.h
>>> @@ -875,4 +875,6 @@ struct cxl_hdm {
>>>   struct seq_file;
>>>   struct dentry *cxl_debugfs_create_dir(const char *dir);
>>>   void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds);
>>> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
>>> +                     struct cxl_endpoint_decoder *cxled);
>>>   #endif /* __CXL_MEM_H__ */
>>> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
>>> index 45b6badb8048..c544339c2baf 100644
>>> --- a/include/linux/cxl/cxl.h
>>> +++ b/include/linux/cxl/cxl.h
>>> @@ -72,4 +72,8 @@ struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd,
>>>                            resource_size_t min,
>>>                            resource_size_t max);
>>>   int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
>>> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
>>> +                     struct cxl_endpoint_decoder *cxled);
>>> +
>>> +int cxl_accel_region_detach(struct cxl_endpoint_decoder *cxled);
>>>   #endif
Alejandro Lucero Palau Oct. 21, 2024, 9:54 a.m. UTC | #4
On 10/18/24 17:40, Ben Cheatham wrote:
>
> On 10/18/24 3:51 AM, Alejandro Lucero Palau wrote:
>> On 10/17/24 22:49, Ben Cheatham wrote:
>>> On 10/17/24 11:52 AM, alejandro.lucero-palau@amd.com wrote:
>>>> From: Alejandro Lucero <alucerop@amd.com>
>>>>
>>>> Creating a CXL region requires userspace intervention through the cxl
>>>> sysfs files. Type2 support should allow accelerator drivers to create
>>>> such cxl region from kernel code.
>>>>
>>>> Adding that functionality and integrating it with current support for
>>>> memory expanders.
>>>>
>>>> Based on https://lore.kernel.org/linux-cxl/168592159835.1948938.1647215579839222774.stgit@dwillia2-xfh.jf.intel.com/
>>>>
>>> So I ran into an issue at this point when using v3 as a base for my own testing. The problem is that
>>> you are doing manual region management while not explicitly preventing auto region discovery when
>>> devm_cxl_add_memdev() is called (patch 14/26 in this series). This caused some resource allocation
>>> conflicts which then caused both the auto region and the manual region set up to fail. To make it more
>>> concrete, here's the flow I encountered (I tried something new here, let me know if the ascii
>>> is all mangled):
>>>
>>> devm_cxl_add_memdev() is called
>>> │
>>> ├───► cxl_mem probes new memdev
>>> │     │
>>> │     ├─► cxl_mem probe adds new endpoint port
>>> │     │
>>> │     └─► cxl_mem probe finishes
>>> ├───────────────────────────────────────────────► Manual region set up starts (finding free space, etc.)
>>> ├───► cxl_port probes the new endpoint port            │
>>> │     │                                                │
>>> │     ├─► cxl_port probe sets up new endpoint          ├─► create_new_region() is called
>>> │     │                                                │
>>> │     ├─► cxl_port calls discover_region()             │
>>> │     │                                                │
>>> │     ├─► discover_region() creates new auto           ├─► create_new_region() creates
>>> │     │   discoveredregion                             │   new manual region
>>> │◄────◄────────────────────────────────────────────────┘
>>> │
>>> └─► Region creation fails due to resource contention/race (DPA resource, RAM resource, etc.)
>>>
>>> The timeline is a little off here I think, but it should be close enough to illustrate the point.
>>
>> Interesting.
>>
>>
>> I'm aware of that code path when endpoint port is probed, but it is not a problem with my testing because the decoder is not enabled at the time of discover_region.
>>
>>
>> I've tested this with two different emulated devices, one a dumb qemu type2 device with a driver doing nothing but cxl initialization, and another being our network device with CXL support and using RTL emulation, and in both cases the decoder is not enabled at that point, which makes sense since, AFAIK, it is at region creation/attachment when the decoder is committed/enabled. So my obvious question is how are you testing this functionality? It seems as if you could have been creating more than one region somehow, or maybe something I'm just missing about this.
>>
> I think the reason you aren't seeing this is that QEMU doesn't have regions programmed by firmware. In my setup
> the decoders are coming up pre-programmed and enabled by firmware, so it is hitting the path during endpoint probe.


That explains it, and it also means you do not have the EFI_RESERVED 
flag in use what we expect for our device.

And I think the solution you give below should fix it. I'll add it to v5.

Thanks!


> Thanks,
> Ben
>
>>> The easy solution here to not allow auto region discovery for CXL type 2 devices, like so:
>>>
>>> diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
>>> index 22a9ba89cf5a..07b991e2c05b 100644
>>> --- a/drivers/cxl/port.c
>>> +++ b/drivers/cxl/port.c
>>> @@ -34,6 +34,7 @@ static void schedule_detach(void *cxlmd)
>>>    static int discover_region(struct device *dev, void *root)
>>>    {
>>>           struct cxl_endpoint_decoder *cxled;
>>> +       struct cxl_memdev *cxlmd;
>>>           int rc;
>>>
>>>           dev_err(dev, "%s:%d: Enter\n", __func__, __LINE__);
>>> @@ -45,7 +46,9 @@ static int discover_region(struct device *dev, void *root)
>>>           if ((cxled->cxld.flags & CXL_DECODER_F_ENABLE) == 0)
>>>                   return 0;
>>>
>>> -       if (cxled->state != CXL_DECODER_STATE_AUTO)
>>> +       cxlmd = cxled_to_memdev(cxled);
>>> +       if (cxled->state != CXL_DECODER_STATE_AUTO ||
>>> +           cxlmd->cxlds->type == CXL_DEVTYPE_DEVMEM)
>>>                   return 0;
>>>
>>> I think there's a better way to go about this, more to say about it in patch 24/26. I've
>>> dropped this here just in case you don't like my ideas there ;).
>>>                                                                      
>>>> Signed-off-by: Alejandro Lucero <alucerop@amd.com>
>>>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>>>> ---
>>>>    drivers/cxl/core/region.c | 147 ++++++++++++++++++++++++++++++++++----
>>>>    drivers/cxl/cxlmem.h      |   2 +
>>>>    include/linux/cxl/cxl.h   |   4 ++
>>>>    3 files changed, 138 insertions(+), 15 deletions(-)
>>>>
>>>> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
>>>> index d08a2a848ac9..04c270a29e96 100644
>>>> --- a/drivers/cxl/core/region.c
>>>> +++ b/drivers/cxl/core/region.c
>>>> @@ -2253,6 +2253,18 @@ static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
>>>>        return rc;
>>>>    }
>>>>    +int cxl_accel_region_detach(struct cxl_endpoint_decoder *cxled)
>>>> +{
>>>> +    int rc;
>>>> +
>>>> +    down_write(&cxl_region_rwsem);
>>>> +    cxled->mode = CXL_DECODER_DEAD;
>>>> +    rc = cxl_region_detach(cxled);
>>>> +    up_write(&cxl_region_rwsem);
>>>> +    return rc;
>>>> +}
>>>> +EXPORT_SYMBOL_NS_GPL(cxl_accel_region_detach, CXL);
>>>> +
>>>>    void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
>>>>    {
>>>>        down_write(&cxl_region_rwsem);
>>>> @@ -2781,6 +2793,14 @@ cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
>>>>        return to_cxl_region(region_dev);
>>>>    }
>>>>    +static void drop_region(struct cxl_region *cxlr)
>>>> +{
>>>> +    struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
>>>> +    struct cxl_port *port = cxlrd_to_port(cxlrd);
>>>> +
>>>> +    devm_release_action(port->uport_dev, unregister_region, cxlr);
>>>> +}
>>>> +
>>>>    static ssize_t delete_region_store(struct device *dev,
>>>>                       struct device_attribute *attr,
>>>>                       const char *buf, size_t len)
>>>> @@ -3386,17 +3406,18 @@ static int match_region_by_range(struct device *dev, void *data)
>>>>        return rc;
>>>>    }
>>>>    -/* Establish an empty region covering the given HPA range */
>>>> -static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>>> -                       struct cxl_endpoint_decoder *cxled)
>>>> +static void construct_region_end(void)
>>>> +{
>>>> +    up_write(&cxl_region_rwsem);
>>>> +}
>>>> +
>>>> +static struct cxl_region *construct_region_begin(struct cxl_root_decoder *cxlrd,
>>>> +                         struct cxl_endpoint_decoder *cxled)
>>>>    {
>>>>        struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>>>> -    struct cxl_port *port = cxlrd_to_port(cxlrd);
>>>> -    struct range *hpa = &cxled->cxld.hpa_range;
>>>>        struct cxl_region_params *p;
>>>>        struct cxl_region *cxlr;
>>>> -    struct resource *res;
>>>> -    int rc;
>>>> +    int err;
>>>>          do {
>>>>            cxlr = __create_region(cxlrd, cxled->mode,
>>>> @@ -3405,8 +3426,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>>>        } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
>>>>          if (IS_ERR(cxlr)) {
>>>> -        dev_err(cxlmd->dev.parent,
>>>> -            "%s:%s: %s failed assign region: %ld\n",
>>>> +        dev_err(cxlmd->dev.parent, "%s:%s: %s failed assign region: %ld\n",
>>>>                dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
>>>>                __func__, PTR_ERR(cxlr));
>>>>            return cxlr;
>>>> @@ -3416,13 +3436,33 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>>>        p = &cxlr->params;
>>>>        if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
>>>>            dev_err(cxlmd->dev.parent,
>>>> -            "%s:%s: %s autodiscovery interrupted\n",
>>>> +            "%s:%s: %s region setup interrupted\n",
>>>>                dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
>>>>                __func__);
>>>> -        rc = -EBUSY;
>>>> -        goto err;
>>>> +        err = -EBUSY;
>>>> +        construct_region_end();
>>>> +        drop_region(cxlr);
>>>> +        return ERR_PTR(err);
>>>>        }
>>>>    +    return cxlr;
>>>> +}
>>>> +
>>>> +/* Establish an empty region covering the given HPA range */
>>>> +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>>> +                       struct cxl_endpoint_decoder *cxled)
>>>> +{
>>>> +    struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>>>> +    struct range *hpa = &cxled->cxld.hpa_range;
>>>> +    struct cxl_region_params *p;
>>>> +    struct cxl_region *cxlr;
>>>> +    struct resource *res;
>>>> +    int rc;
>>>> +
>>>> +    cxlr = construct_region_begin(cxlrd, cxled);
>>>> +    if (IS_ERR(cxlr))
>>>> +        return cxlr;
>>>> +
>>>>        set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
>>>>          res = kmalloc(sizeof(*res), GFP_KERNEL);
>>>> @@ -3445,6 +3485,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>>>                 __func__, dev_name(&cxlr->dev));
>>>>        }
>>>>    +    p = &cxlr->params;
>>>>        p->res = res;
>>>>        p->interleave_ways = cxled->cxld.interleave_ways;
>>>>        p->interleave_granularity = cxled->cxld.interleave_granularity;
>>>> @@ -3462,15 +3503,91 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>>>>        /* ...to match put_device() in cxl_add_to_region() */
>>>>        get_device(&cxlr->dev);
>>>>        up_write(&cxl_region_rwsem);
>>>> -
>>>> +    construct_region_end();
>>>>        return cxlr;
>>>>      err:
>>>> -    up_write(&cxl_region_rwsem);
>>>> -    devm_release_action(port->uport_dev, unregister_region, cxlr);
>>>> +    construct_region_end();
>>>> +    drop_region(cxlr);
>>>> +    return ERR_PTR(rc);
>>>> +}
>>>> +
>>>> +static struct cxl_region *
>>>> +__construct_new_region(struct cxl_root_decoder *cxlrd,
>>>> +               struct cxl_endpoint_decoder *cxled)
>>>> +{
>>>> +    struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
>>>> +    struct cxl_region_params *p;
>>>> +    struct cxl_region *cxlr;
>>>> +    int rc;
>>>> +
>>>> +    cxlr = construct_region_begin(cxlrd, cxled);
>>>> +    if (IS_ERR(cxlr))
>>>> +        return cxlr;
>>>> +
>>>> +    rc = set_interleave_ways(cxlr, 1);
>>>> +    if (rc)
>>>> +        goto err;
>>>> +
>>>> +    rc = set_interleave_granularity(cxlr, cxld->interleave_granularity);
>>>> +    if (rc)
>>>> +        goto err;
>>>> +
>>>> +    rc = alloc_hpa(cxlr, resource_size(cxled->dpa_res));
>>>> +    if (rc)
>>>> +        goto err;
>>>> +
>>>> +    down_read(&cxl_dpa_rwsem);
>>>> +    rc = cxl_region_attach(cxlr, cxled, 0);
>>>> +    up_read(&cxl_dpa_rwsem);
>>>> +
>>>> +    if (rc)
>>>> +        goto err;
>>>> +
>>>> +    rc = cxl_region_decode_commit(cxlr);
>>>> +    if (rc)
>>>> +        goto err;
>>>> +
>>>> +    p = &cxlr->params;
>>>> +    p->state = CXL_CONFIG_COMMIT;
>>>> +
>>>> +    construct_region_end();
>>>> +    return cxlr;
>>>> +err:
>>>> +    construct_region_end();
>>>> +    drop_region(cxlr);
>>>>        return ERR_PTR(rc);
>>>>    }
>>>>    +/**
>>>> + * cxl_create_region - Establish a region given an endpoint decoder
>>>> + * @cxlrd: root decoder to allocate HPA
>>>> + * @cxled: endpoint decoder with reserved DPA capacity
>>>> + *
>>>> + * Returns a fully formed region in the commit state and attached to the
>>>> + * cxl_region driver.
>>>> + */
>>>> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
>>>> +                     struct cxl_endpoint_decoder *cxled)
>>>> +{
>>>> +    struct cxl_region *cxlr;
>>>> +
>>>> +    mutex_lock(&cxlrd->range_lock);
>>>> +    cxlr = __construct_new_region(cxlrd, cxled);
>>>> +    mutex_unlock(&cxlrd->range_lock);
>>>> +
>>>> +    if (IS_ERR(cxlr))
>>>> +        return cxlr;
>>>> +
>>>> +    if (device_attach(&cxlr->dev) <= 0) {
>>>> +        dev_err(&cxlr->dev, "failed to create region\n");
>>>> +        drop_region(cxlr);
>>>> +        return ERR_PTR(-ENODEV);
>>>> +    }
>>>> +    return cxlr;
>>>> +}
>>>> +EXPORT_SYMBOL_NS_GPL(cxl_create_region, CXL);
>>>> +
>>>>    int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
>>>>    {
>>>>        struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
>>>> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
>>>> index 68d28eab3696..0f5c71909fd1 100644
>>>> --- a/drivers/cxl/cxlmem.h
>>>> +++ b/drivers/cxl/cxlmem.h
>>>> @@ -875,4 +875,6 @@ struct cxl_hdm {
>>>>    struct seq_file;
>>>>    struct dentry *cxl_debugfs_create_dir(const char *dir);
>>>>    void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds);
>>>> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
>>>> +                     struct cxl_endpoint_decoder *cxled);
>>>>    #endif /* __CXL_MEM_H__ */
>>>> diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
>>>> index 45b6badb8048..c544339c2baf 100644
>>>> --- a/include/linux/cxl/cxl.h
>>>> +++ b/include/linux/cxl/cxl.h
>>>> @@ -72,4 +72,8 @@ struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd,
>>>>                             resource_size_t min,
>>>>                             resource_size_t max);
>>>>    int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
>>>> +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
>>>> +                     struct cxl_endpoint_decoder *cxled);
>>>> +
>>>> +int cxl_accel_region_detach(struct cxl_endpoint_decoder *cxled);
>>>>    #endif
diff mbox series

Patch

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index d08a2a848ac9..04c270a29e96 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2253,6 +2253,18 @@  static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
 	return rc;
 }
 
+int cxl_accel_region_detach(struct cxl_endpoint_decoder *cxled)
+{
+	int rc;
+
+	down_write(&cxl_region_rwsem);
+	cxled->mode = CXL_DECODER_DEAD;
+	rc = cxl_region_detach(cxled);
+	up_write(&cxl_region_rwsem);
+	return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_accel_region_detach, CXL);
+
 void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
 {
 	down_write(&cxl_region_rwsem);
@@ -2781,6 +2793,14 @@  cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
 	return to_cxl_region(region_dev);
 }
 
+static void drop_region(struct cxl_region *cxlr)
+{
+	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+	struct cxl_port *port = cxlrd_to_port(cxlrd);
+
+	devm_release_action(port->uport_dev, unregister_region, cxlr);
+}
+
 static ssize_t delete_region_store(struct device *dev,
 				   struct device_attribute *attr,
 				   const char *buf, size_t len)
@@ -3386,17 +3406,18 @@  static int match_region_by_range(struct device *dev, void *data)
 	return rc;
 }
 
-/* Establish an empty region covering the given HPA range */
-static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
-					   struct cxl_endpoint_decoder *cxled)
+static void construct_region_end(void)
+{
+	up_write(&cxl_region_rwsem);
+}
+
+static struct cxl_region *construct_region_begin(struct cxl_root_decoder *cxlrd,
+						 struct cxl_endpoint_decoder *cxled)
 {
 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
-	struct cxl_port *port = cxlrd_to_port(cxlrd);
-	struct range *hpa = &cxled->cxld.hpa_range;
 	struct cxl_region_params *p;
 	struct cxl_region *cxlr;
-	struct resource *res;
-	int rc;
+	int err;
 
 	do {
 		cxlr = __create_region(cxlrd, cxled->mode,
@@ -3405,8 +3426,7 @@  static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
 	} while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
 
 	if (IS_ERR(cxlr)) {
-		dev_err(cxlmd->dev.parent,
-			"%s:%s: %s failed assign region: %ld\n",
+		dev_err(cxlmd->dev.parent, "%s:%s: %s failed assign region: %ld\n",
 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
 			__func__, PTR_ERR(cxlr));
 		return cxlr;
@@ -3416,13 +3436,33 @@  static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
 	p = &cxlr->params;
 	if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
 		dev_err(cxlmd->dev.parent,
-			"%s:%s: %s autodiscovery interrupted\n",
+			"%s:%s: %s region setup interrupted\n",
 			dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
 			__func__);
-		rc = -EBUSY;
-		goto err;
+		err = -EBUSY;
+		construct_region_end();
+		drop_region(cxlr);
+		return ERR_PTR(err);
 	}
 
+	return cxlr;
+}
+
+/* Establish an empty region covering the given HPA range */
+static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
+					   struct cxl_endpoint_decoder *cxled)
+{
+	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+	struct range *hpa = &cxled->cxld.hpa_range;
+	struct cxl_region_params *p;
+	struct cxl_region *cxlr;
+	struct resource *res;
+	int rc;
+
+	cxlr = construct_region_begin(cxlrd, cxled);
+	if (IS_ERR(cxlr))
+		return cxlr;
+
 	set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
 
 	res = kmalloc(sizeof(*res), GFP_KERNEL);
@@ -3445,6 +3485,7 @@  static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
 			 __func__, dev_name(&cxlr->dev));
 	}
 
+	p = &cxlr->params;
 	p->res = res;
 	p->interleave_ways = cxled->cxld.interleave_ways;
 	p->interleave_granularity = cxled->cxld.interleave_granularity;
@@ -3462,15 +3503,91 @@  static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
 	/* ...to match put_device() in cxl_add_to_region() */
 	get_device(&cxlr->dev);
 	up_write(&cxl_region_rwsem);
-
+	construct_region_end();
 	return cxlr;
 
 err:
-	up_write(&cxl_region_rwsem);
-	devm_release_action(port->uport_dev, unregister_region, cxlr);
+	construct_region_end();
+	drop_region(cxlr);
+	return ERR_PTR(rc);
+}
+
+static struct cxl_region *
+__construct_new_region(struct cxl_root_decoder *cxlrd,
+		       struct cxl_endpoint_decoder *cxled)
+{
+	struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
+	struct cxl_region_params *p;
+	struct cxl_region *cxlr;
+	int rc;
+
+	cxlr = construct_region_begin(cxlrd, cxled);
+	if (IS_ERR(cxlr))
+		return cxlr;
+
+	rc = set_interleave_ways(cxlr, 1);
+	if (rc)
+		goto err;
+
+	rc = set_interleave_granularity(cxlr, cxld->interleave_granularity);
+	if (rc)
+		goto err;
+
+	rc = alloc_hpa(cxlr, resource_size(cxled->dpa_res));
+	if (rc)
+		goto err;
+
+	down_read(&cxl_dpa_rwsem);
+	rc = cxl_region_attach(cxlr, cxled, 0);
+	up_read(&cxl_dpa_rwsem);
+
+	if (rc)
+		goto err;
+
+	rc = cxl_region_decode_commit(cxlr);
+	if (rc)
+		goto err;
+
+	p = &cxlr->params;
+	p->state = CXL_CONFIG_COMMIT;
+
+	construct_region_end();
+	return cxlr;
+err:
+	construct_region_end();
+	drop_region(cxlr);
 	return ERR_PTR(rc);
 }
 
+/**
+ * cxl_create_region - Establish a region given an endpoint decoder
+ * @cxlrd: root decoder to allocate HPA
+ * @cxled: endpoint decoder with reserved DPA capacity
+ *
+ * Returns a fully formed region in the commit state and attached to the
+ * cxl_region driver.
+ */
+struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
+				     struct cxl_endpoint_decoder *cxled)
+{
+	struct cxl_region *cxlr;
+
+	mutex_lock(&cxlrd->range_lock);
+	cxlr = __construct_new_region(cxlrd, cxled);
+	mutex_unlock(&cxlrd->range_lock);
+
+	if (IS_ERR(cxlr))
+		return cxlr;
+
+	if (device_attach(&cxlr->dev) <= 0) {
+		dev_err(&cxlr->dev, "failed to create region\n");
+		drop_region(cxlr);
+		return ERR_PTR(-ENODEV);
+	}
+	return cxlr;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_create_region, CXL);
+
 int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled)
 {
 	struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 68d28eab3696..0f5c71909fd1 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -875,4 +875,6 @@  struct cxl_hdm {
 struct seq_file;
 struct dentry *cxl_debugfs_create_dir(const char *dir);
 void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds);
+struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
+				     struct cxl_endpoint_decoder *cxled);
 #endif /* __CXL_MEM_H__ */
diff --git a/include/linux/cxl/cxl.h b/include/linux/cxl/cxl.h
index 45b6badb8048..c544339c2baf 100644
--- a/include/linux/cxl/cxl.h
+++ b/include/linux/cxl/cxl.h
@@ -72,4 +72,8 @@  struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd,
 					     resource_size_t min,
 					     resource_size_t max);
 int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
+struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd,
+				     struct cxl_endpoint_decoder *cxled);
+
+int cxl_accel_region_detach(struct cxl_endpoint_decoder *cxled);
 #endif