diff mbox series

PCI: Add a mutex to protect the global list pci_domain_busn_res_list

Message ID 20240419015302.13871-1-decui@microsoft.com (mailing list archive)
State Changes Requested
Delegated to: Bjorn Helgaas
Headers show
Series PCI: Add a mutex to protect the global list pci_domain_busn_res_list | expand

Commit Message

Dexuan Cui April 19, 2024, 1:53 a.m. UTC
There has been an effort to make the pci-hyperv driver support
async-probing to reduce the boot time. With async-probing, multiple
kernel threads can be running hv_pci_probe() -> create_root_hv_pci_bus() ->
pci_scan_root_bus_bridge() -> pci_bus_insert_busn_res() at the same time to
update the global list, causing list corruption.

Add a mutex to protect the list.

Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
 drivers/pci/probe.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

Comments

Haiyang Zhang April 19, 2024, 3:07 p.m. UTC | #1
> -----Original Message-----
> From: Dexuan Cui <decui@microsoft.com>
> Sent: Thursday, April 18, 2024 9:53 PM
> To: bhelgaas@google.com; wei.liu@kernel.org; KY Srinivasan
> <kys@microsoft.com>; Haiyang Zhang <haiyangz@microsoft.com>;
> lpieralisi@kernel.org; linux-pci@vger.kernel.org
> Cc: linux-hyperv@vger.kernel.org; linux-kernel@vger.kernel.org; Boqun
> Feng <Boqun.Feng@microsoft.com>; Sunil Muthuswamy
> <sunilmut@microsoft.com>; Saurabh Singh Sengar <ssengar@microsoft.com>;
> Dexuan Cui <decui@microsoft.com>
> Subject: [PATCH] PCI: Add a mutex to protect the global list
> pci_domain_busn_res_list
> 
> There has been an effort to make the pci-hyperv driver support
> async-probing to reduce the boot time. With async-probing, multiple
> kernel threads can be running hv_pci_probe() -> create_root_hv_pci_bus()
> ->
> pci_scan_root_bus_bridge() -> pci_bus_insert_busn_res() at the same time
> to
> update the global list, causing list corruption.
> 
> Add a mutex to protect the list.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
>  drivers/pci/probe.c | 25 ++++++++++++++++++-------
>  1 file changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index e19b79821dd6..1327fd820b24 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -37,6 +37,7 @@ LIST_HEAD(pci_root_buses);
>  EXPORT_SYMBOL(pci_root_buses);
> 
>  static LIST_HEAD(pci_domain_busn_res_list);
> +static DEFINE_MUTEX(pci_domain_busn_res_list_lock);
> 
>  struct pci_domain_busn_res {
>  	struct list_head list;
> @@ -47,14 +48,22 @@ struct pci_domain_busn_res {
>  static struct resource *get_pci_domain_busn_res(int domain_nr)
>  {
>  	struct pci_domain_busn_res *r;
> +	struct resource *ret;
> 
> -	list_for_each_entry(r, &pci_domain_busn_res_list, list)
> -		if (r->domain_nr == domain_nr)
> -			return &r->res;
> +	mutex_lock(&pci_domain_busn_res_list_lock);
> +
> +	list_for_each_entry(r, &pci_domain_busn_res_list, list) {
> +		if (r->domain_nr == domain_nr) {
> +			ret = &r->res;
> +			goto out;
> +		}
> +	}
> 
>  	r = kzalloc(sizeof(*r), GFP_KERNEL);
> -	if (!r)
> -		return NULL;
> +	if (!r) {
> +		ret = NULL;
> +		goto out;
> +	}
> 
>  	r->domain_nr = domain_nr;
>  	r->res.start = 0;
> @@ -62,8 +71,10 @@ static struct resource *get_pci_domain_busn_res(int
> domain_nr)
>  	r->res.flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED;
> 
>  	list_add_tail(&r->list, &pci_domain_busn_res_list);
> -
> -	return &r->res;
> +	ret = &r->res;
> +out:
> +	mutex_unlock(&pci_domain_busn_res_list_lock);
> +	return ret;
>  }

The patch is for common pci code. So, this bug has been there for a while?
Do you have a sample stack trace of the crash?

I checked pci-hyperv, it doesn't define the .driver.probe_type, so 
PROBE_DEFAULT_STRATEGY is in effect. driver_allows_async_probing() returns 
false unless kernel/mod param requests async. So async probing haven't 
been practiced here.

If in the future, we change the pci-hyperv's probe_type to PROBE_PREFER_ASYNCHRONOUS, 
how does it affect the underlying PCI device's probes within the same 
device type?
For example, MANA driver doesn't set probe_type. Will pci-hyperv's async 
probing cause async probing or potentially nondeterministic naming for 
MANA devices?

Thanks,
- Haiyang
Frank Li April 19, 2024, 3:12 p.m. UTC | #2
On Thu, Apr 18, 2024 at 06:53:02PM -0700, Dexuan Cui wrote:
> There has been an effort to make the pci-hyperv driver support
> async-probing to reduce the boot time. With async-probing, multiple
> kernel threads can be running hv_pci_probe() -> create_root_hv_pci_bus() ->
> pci_scan_root_bus_bridge() -> pci_bus_insert_busn_res() at the same time to
> update the global list, causing list corruption.
> 
> Add a mutex to protect the list.
> 
> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
>  drivers/pci/probe.c | 25 ++++++++++++++++++-------
>  1 file changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index e19b79821dd6..1327fd820b24 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -37,6 +37,7 @@ LIST_HEAD(pci_root_buses);
>  EXPORT_SYMBOL(pci_root_buses);
>  
>  static LIST_HEAD(pci_domain_busn_res_list);
> +static DEFINE_MUTEX(pci_domain_busn_res_list_lock);
>  
>  struct pci_domain_busn_res {
>  	struct list_head list;
> @@ -47,14 +48,22 @@ struct pci_domain_busn_res {
>  static struct resource *get_pci_domain_busn_res(int domain_nr)
>  {
>  	struct pci_domain_busn_res *r;
> +	struct resource *ret;
>  
> -	list_for_each_entry(r, &pci_domain_busn_res_list, list)
> -		if (r->domain_nr == domain_nr)
> -			return &r->res;
> +	mutex_lock(&pci_domain_busn_res_list_lock);

Using
	guard(mutex)(&pci_domain_busn_res_list_lock);

to simple logic, especially there are goto.

You can avoid goto out, direct return NULL;

Frank

> +
> +	list_for_each_entry(r, &pci_domain_busn_res_list, list) {
> +		if (r->domain_nr == domain_nr) {
> +			ret = &r->res;
> +			goto out;
> +		}
> +	}
>  
>  	r = kzalloc(sizeof(*r), GFP_KERNEL);
> -	if (!r)
> -		return NULL;
> +	if (!r) {
> +		ret = NULL;
> +		goto out;
> +	}
>  
>  	r->domain_nr = domain_nr;
>  	r->res.start = 0;
> @@ -62,8 +71,10 @@ static struct resource *get_pci_domain_busn_res(int domain_nr)
>  	r->res.flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED;
>  
>  	list_add_tail(&r->list, &pci_domain_busn_res_list);
> -
> -	return &r->res;
> +	ret = &r->res;
> +out:
> +	mutex_unlock(&pci_domain_busn_res_list_lock);
> +	return ret;
>  }
>  
>  /*
> -- 
> 2.25.1
>
Bjorn Helgaas April 25, 2024, 10:51 p.m. UTC | #3
On Thu, Apr 18, 2024 at 06:53:02PM -0700, Dexuan Cui wrote:
> There has been an effort to make the pci-hyperv driver support
> async-probing to reduce the boot time. With async-probing, multiple
> kernel threads can be running hv_pci_probe() -> create_root_hv_pci_bus() ->
> pci_scan_root_bus_bridge() -> pci_bus_insert_busn_res() at the same time to
> update the global list, causing list corruption.
> 
> Add a mutex to protect the list.

I think it's a good idea to support probing multiple PCI root buses in
parallel.

The problem in get_pci_domain_busn_res() is the global
pci_domain_busn_res_list.  I'm not even sure what that list contains,
since it's a lookup by "domain_nr".  In the hv case, you probably have
one host bridge per domain, but in general there may be multiple root
buses in the same domain, e.g.,

  ACPI: PCI Root Bridge [PC00] (domain 0000 [bus 00-16])
  ACPI: PCI Root Bridge [PC01] (domain 0000 [bus 17-39])
  ACPI: PCI Root Bridge [PC02] (domain 0000 [bus 3a-5c])
  ...

We only use get_pci_domain_busn_res() for root buses, and we should
know the bus number range for root buses when we set up the struct
pci_host_bridge, so it seems like we should keep the bus number
resource there instead of allocating it in this sort of random place.

Then we shouldn't need this weird pci_domain_busn_res_list at all.

> Signed-off-by: Dexuan Cui <decui@microsoft.com>
> ---
>  drivers/pci/probe.c | 25 ++++++++++++++++++-------
>  1 file changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> index e19b79821dd6..1327fd820b24 100644
> --- a/drivers/pci/probe.c
> +++ b/drivers/pci/probe.c
> @@ -37,6 +37,7 @@ LIST_HEAD(pci_root_buses);
>  EXPORT_SYMBOL(pci_root_buses);
>  
>  static LIST_HEAD(pci_domain_busn_res_list);
> +static DEFINE_MUTEX(pci_domain_busn_res_list_lock);
>  
>  struct pci_domain_busn_res {
>  	struct list_head list;
> @@ -47,14 +48,22 @@ struct pci_domain_busn_res {
>  static struct resource *get_pci_domain_busn_res(int domain_nr)
>  {
>  	struct pci_domain_busn_res *r;
> +	struct resource *ret;
>  
> -	list_for_each_entry(r, &pci_domain_busn_res_list, list)
> -		if (r->domain_nr == domain_nr)
> -			return &r->res;
> +	mutex_lock(&pci_domain_busn_res_list_lock);
> +
> +	list_for_each_entry(r, &pci_domain_busn_res_list, list) {
> +		if (r->domain_nr == domain_nr) {
> +			ret = &r->res;
> +			goto out;
> +		}
> +	}
>  
>  	r = kzalloc(sizeof(*r), GFP_KERNEL);
> -	if (!r)
> -		return NULL;
> +	if (!r) {
> +		ret = NULL;
> +		goto out;
> +	}
>  
>  	r->domain_nr = domain_nr;
>  	r->res.start = 0;
> @@ -62,8 +71,10 @@ static struct resource *get_pci_domain_busn_res(int domain_nr)
>  	r->res.flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED;
>  
>  	list_add_tail(&r->list, &pci_domain_busn_res_list);
> -
> -	return &r->res;
> +	ret = &r->res;
> +out:
> +	mutex_unlock(&pci_domain_busn_res_list_lock);
> +	return ret;
>  }
>  
>  /*
> -- 
> 2.25.1
>
Bjorn Helgaas April 29, 2024, 5:03 p.m. UTC | #4
On Thu, Apr 25, 2024 at 05:51:38PM -0500, Bjorn Helgaas wrote:
> On Thu, Apr 18, 2024 at 06:53:02PM -0700, Dexuan Cui wrote:
> > There has been an effort to make the pci-hyperv driver support
> > async-probing to reduce the boot time. With async-probing, multiple
> > kernel threads can be running hv_pci_probe() -> create_root_hv_pci_bus() ->
> > pci_scan_root_bus_bridge() -> pci_bus_insert_busn_res() at the same time to
> > update the global list, causing list corruption.
> > 
> > Add a mutex to protect the list.
> 
> I think it's a good idea to support probing multiple PCI root buses in
> parallel.
> 
> The problem in get_pci_domain_busn_res() is the global
> pci_domain_busn_res_list.  I'm not even sure what that list contains,
> since it's a lookup by "domain_nr".  In the hv case, you probably have
> one host bridge per domain, but in general there may be multiple root
> buses in the same domain, e.g.,
> 
>   ACPI: PCI Root Bridge [PC00] (domain 0000 [bus 00-16])
>   ACPI: PCI Root Bridge [PC01] (domain 0000 [bus 17-39])
>   ACPI: PCI Root Bridge [PC02] (domain 0000 [bus 3a-5c])
>   ...
> 
> We only use get_pci_domain_busn_res() for root buses, and we should
> know the bus number range for root buses when we set up the struct
> pci_host_bridge, so it seems like we should keep the bus number
> resource there instead of allocating it in this sort of random place.
> 
> Then we shouldn't need this weird pci_domain_busn_res_list at all.

Oops, sorry, I totally missed the point here.  The point is that for
each domain, we get a new 00-ff range of possible bus numbers.  This
is independent of the host bridges for that domain that may exist.
Then each host bridge will allocate a piece of the 00-ff range.

But I do still think get_pci_domain_busn_res() isn't really the best
place for this.  It seems like it should be at a higher level,
connected somehow to domain number allocation, e.g., somewhere related
to bridge->domain_nr like the pci_bus_find_domain_nr() path.

> > Signed-off-by: Dexuan Cui <decui@microsoft.com>
> > ---
> >  drivers/pci/probe.c | 25 ++++++++++++++++++-------
> >  1 file changed, 18 insertions(+), 7 deletions(-)
> > 
> > diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
> > index e19b79821dd6..1327fd820b24 100644
> > --- a/drivers/pci/probe.c
> > +++ b/drivers/pci/probe.c
> > @@ -37,6 +37,7 @@ LIST_HEAD(pci_root_buses);
> >  EXPORT_SYMBOL(pci_root_buses);
> >  
> >  static LIST_HEAD(pci_domain_busn_res_list);
> > +static DEFINE_MUTEX(pci_domain_busn_res_list_lock);
> >  
> >  struct pci_domain_busn_res {
> >  	struct list_head list;
> > @@ -47,14 +48,22 @@ struct pci_domain_busn_res {
> >  static struct resource *get_pci_domain_busn_res(int domain_nr)
> >  {
> >  	struct pci_domain_busn_res *r;
> > +	struct resource *ret;
> >  
> > -	list_for_each_entry(r, &pci_domain_busn_res_list, list)
> > -		if (r->domain_nr == domain_nr)
> > -			return &r->res;
> > +	mutex_lock(&pci_domain_busn_res_list_lock);
> > +
> > +	list_for_each_entry(r, &pci_domain_busn_res_list, list) {
> > +		if (r->domain_nr == domain_nr) {
> > +			ret = &r->res;
> > +			goto out;
> > +		}
> > +	}
> >  
> >  	r = kzalloc(sizeof(*r), GFP_KERNEL);
> > -	if (!r)
> > -		return NULL;
> > +	if (!r) {
> > +		ret = NULL;
> > +		goto out;
> > +	}
> >  
> >  	r->domain_nr = domain_nr;
> >  	r->res.start = 0;
> > @@ -62,8 +71,10 @@ static struct resource *get_pci_domain_busn_res(int domain_nr)
> >  	r->res.flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED;
> >  
> >  	list_add_tail(&r->list, &pci_domain_busn_res_list);
> > -
> > -	return &r->res;
> > +	ret = &r->res;
> > +out:
> > +	mutex_unlock(&pci_domain_busn_res_list_lock);
> > +	return ret;
> >  }
> >  
> >  /*
> > -- 
> > 2.25.1
> >
diff mbox series

Patch

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index e19b79821dd6..1327fd820b24 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -37,6 +37,7 @@  LIST_HEAD(pci_root_buses);
 EXPORT_SYMBOL(pci_root_buses);
 
 static LIST_HEAD(pci_domain_busn_res_list);
+static DEFINE_MUTEX(pci_domain_busn_res_list_lock);
 
 struct pci_domain_busn_res {
 	struct list_head list;
@@ -47,14 +48,22 @@  struct pci_domain_busn_res {
 static struct resource *get_pci_domain_busn_res(int domain_nr)
 {
 	struct pci_domain_busn_res *r;
+	struct resource *ret;
 
-	list_for_each_entry(r, &pci_domain_busn_res_list, list)
-		if (r->domain_nr == domain_nr)
-			return &r->res;
+	mutex_lock(&pci_domain_busn_res_list_lock);
+
+	list_for_each_entry(r, &pci_domain_busn_res_list, list) {
+		if (r->domain_nr == domain_nr) {
+			ret = &r->res;
+			goto out;
+		}
+	}
 
 	r = kzalloc(sizeof(*r), GFP_KERNEL);
-	if (!r)
-		return NULL;
+	if (!r) {
+		ret = NULL;
+		goto out;
+	}
 
 	r->domain_nr = domain_nr;
 	r->res.start = 0;
@@ -62,8 +71,10 @@  static struct resource *get_pci_domain_busn_res(int domain_nr)
 	r->res.flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED;
 
 	list_add_tail(&r->list, &pci_domain_busn_res_list);
-
-	return &r->res;
+	ret = &r->res;
+out:
+	mutex_unlock(&pci_domain_busn_res_list_lock);
+	return ret;
 }
 
 /*