diff mbox

[v4,12/16] x86, irq, ACPI: Implement interface to support ACPI based IOAPIC hot-addition

Message ID 1409192561-19744-13-git-send-email-jiang.liu@linux.intel.com (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

Jiang Liu Aug. 28, 2014, 2:22 a.m. UTC
Implement acpi_register_ioapic() and enhance mp_register_ioapic()
to support ACPI based IOAPIC hot-addition.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
---
 arch/x86/kernel/acpi/boot.c    |   31 +++++++++++++++++++++++++++++--
 arch/x86/kernel/apic/io_apic.c |   27 ++++++++++++++++++++++++---
 2 files changed, 53 insertions(+), 5 deletions(-)

Comments

Thomas Gleixner Sept. 9, 2014, 12:20 p.m. UTC | #1
On Thu, 28 Aug 2014, Jiang Liu wrote:
>  EXPORT_SYMBOL(acpi_register_ioapic);
> diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
> index 6e67af0c5f99..b286461cabf9 100644
> --- a/arch/x86/kernel/apic/io_apic.c
> +++ b/arch/x86/kernel/apic/io_apic.c
> @@ -3851,7 +3851,13 @@ static int bad_ioapic_register(int idx)
>  
>  static int find_free_ioapic_entry(void)
>  {
> -	return nr_ioapics;
> +	int idx;
> +
> +	for (idx = 0; idx < MAX_IO_APICS; idx++)
> +		if (ioapics[idx].nr_registers == 0)
> +			return idx;
> +
> +	return MAX_IO_APICS;
>  }
>  
>  int mp_register_ioapic(int id, u32 address, u32 gsi_base,
> @@ -3867,8 +3873,15 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
>  	}
>  	for_each_ioapic(ioapic)
>  		if (ioapics[ioapic].mp_config.apicaddr == address) {
> -			pr_warn("address 0x%x conflicts with IOAPIC%d\n",
> -				address, ioapic);
> +			/*
> +			 * IOAPIC unit may also be visible in PCI scope.
> +			 * When ioapic PCI driver's probe() is called,
> +			 * the IOAPIC unit may have already been initialized
> +			 * at boot time.
> +			 */
> +			if (!ioapic_initialized)
> +				pr_warn("address 0x%x conflicts with IOAPIC%d\n",
> +					address, ioapic);

Hmm. This smells fishy. Why do we allow multiple initializations of
the same IOAPIC in the first place. Either it's done via ACPI or via
PCI, but not both.

>  			return -EEXIST;
>  		}
>  
> @@ -3918,6 +3931,14 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
>  	ioapics[idx].irqdomain = NULL;
>  	ioapics[idx].irqdomain_cfg = *cfg;
>  
> +	if (ioapic_initialized) {

I have a hard time to understand this conditional. Why can't we do
that unconditionally?

> +		if (mp_irqdomain_create(idx)) {
> +			clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
> +			return -ENOMEM;
> +		}
> +		alloc_ioapic_saved_registers(idx);
> +	}
> +
>  	if (gsi_cfg->gsi_end >= gsi_top)
>  		gsi_top = gsi_cfg->gsi_end + 1;
>  	if (nr_ioapics <= idx)

Thanks,

	tglx
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiang Liu Sept. 10, 2014, 3:13 a.m. UTC | #2
On 2014/9/9 20:20, Thomas Gleixner wrote:
> On Thu, 28 Aug 2014, Jiang Liu wrote:
>>  EXPORT_SYMBOL(acpi_register_ioapic);
>> diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
>> index 6e67af0c5f99..b286461cabf9 100644
>> --- a/arch/x86/kernel/apic/io_apic.c
>> +++ b/arch/x86/kernel/apic/io_apic.c
>> @@ -3851,7 +3851,13 @@ static int bad_ioapic_register(int idx)
>>  
>>  static int find_free_ioapic_entry(void)
>>  {
>> -	return nr_ioapics;
>> +	int idx;
>> +
>> +	for (idx = 0; idx < MAX_IO_APICS; idx++)
>> +		if (ioapics[idx].nr_registers == 0)
>> +			return idx;
>> +
>> +	return MAX_IO_APICS;
>>  }
>>  
>>  int mp_register_ioapic(int id, u32 address, u32 gsi_base,
>> @@ -3867,8 +3873,15 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
>>  	}
>>  	for_each_ioapic(ioapic)
>>  		if (ioapics[ioapic].mp_config.apicaddr == address) {
>> -			pr_warn("address 0x%x conflicts with IOAPIC%d\n",
>> -				address, ioapic);
>> +			/*
>> +			 * IOAPIC unit may also be visible in PCI scope.
>> +			 * When ioapic PCI driver's probe() is called,
>> +			 * the IOAPIC unit may have already been initialized
>> +			 * at boot time.
>> +			 */
>> +			if (!ioapic_initialized)
>> +				pr_warn("address 0x%x conflicts with IOAPIC%d\n",
>> +					address, ioapic);
> 
> Hmm. This smells fishy. Why do we allow multiple initializations of
> the same IOAPIC in the first place. Either it's done via ACPI or via
> PCI, but not both.
The ACPI subsystem will register and initialize all IOAPICs when walking
ACPI MADT table during boot, before initializing PCI subsystem.
Later when binding ioapic PCI driver to IOAPIC PCI device, it will try
to register the IOAPIC device again.

After this patchset is applied, we could remove the !ioapic_initialized
check. We check acpi_ioapic_register() before calling
acpi_register_ioapic(). So the check becomes redundant.
Or we could remove the temporary code from this patch.

> 
>>  			return -EEXIST;
>>  		}
>>  
>> @@ -3918,6 +3931,14 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
>>  	ioapics[idx].irqdomain = NULL;
>>  	ioapics[idx].irqdomain_cfg = *cfg;
>>  
>> +	if (ioapic_initialized) {
> 
> I have a hard time to understand this conditional. Why can't we do
> that unconditionally?
How about following comments?
/*
 * If mp_register_ioapic() is called during early boot stage when
 * walking ACPI/SFI/DT tables, it's too early to create irqdomain,
 * we are still using bootmem allocator. So delay it to setup_IO_APIC().
 */
Regards!
Gerry
> 
>> +		if (mp_irqdomain_create(idx)) {
>> +			clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
>> +			return -ENOMEM;
>> +		}
>> +		alloc_ioapic_saved_registers(idx);
>> +	}
>> +
>>  	if (gsi_cfg->gsi_end >= gsi_top)
>>  		gsi_top = gsi_cfg->gsi_end + 1;
>>  	if (nr_ioapics <= idx)
> 
> Thanks,
> 
> 	tglx
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Gleixner Sept. 10, 2014, 8:06 p.m. UTC | #3
On Wed, 10 Sep 2014, Jiang Liu wrote:
> >>  int mp_register_ioapic(int id, u32 address, u32 gsi_base,
> >> @@ -3867,8 +3873,15 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
> >>  	}
> >>  	for_each_ioapic(ioapic)
> >>  		if (ioapics[ioapic].mp_config.apicaddr == address) {
> >> -			pr_warn("address 0x%x conflicts with IOAPIC%d\n",
> >> -				address, ioapic);
> >> +			/*
> >> +			 * IOAPIC unit may also be visible in PCI scope.
> >> +			 * When ioapic PCI driver's probe() is called,
> >> +			 * the IOAPIC unit may have already been initialized
> >> +			 * at boot time.
> >> +			 */
> >> +			if (!ioapic_initialized)
> >> +				pr_warn("address 0x%x conflicts with IOAPIC%d\n",
> >> +					address, ioapic);
> > 
> > Hmm. This smells fishy. Why do we allow multiple initializations of
> > the same IOAPIC in the first place. Either it's done via ACPI or via
> > PCI, but not both.
> The ACPI subsystem will register and initialize all IOAPICs when walking
> ACPI MADT table during boot, before initializing PCI subsystem.
> Later when binding ioapic PCI driver to IOAPIC PCI device, it will try
> to register the IOAPIC device again.
> 
> After this patchset is applied, we could remove the !ioapic_initialized
> check. We check acpi_ioapic_register() before calling
> acpi_register_ioapic(). So the check becomes redundant.
> Or we could remove the temporary code from this patch.

How about removing the disfunctional ioapic PCI driver first and then
implementing the whole thing cleanly?
 
> > 
> >>  			return -EEXIST;
> >>  		}
> >>  
> >> @@ -3918,6 +3931,14 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
> >>  	ioapics[idx].irqdomain = NULL;
> >>  	ioapics[idx].irqdomain_cfg = *cfg;
> >>  
> >> +	if (ioapic_initialized) {
> > 
> > I have a hard time to understand this conditional. Why can't we do
> > that unconditionally?
> How about following comments?
> /*
>  * If mp_register_ioapic() is called during early boot stage when
>  * walking ACPI/SFI/DT tables, it's too early to create irqdomain,
>  * we are still using bootmem allocator. So delay it to setup_IO_APIC().
>  */

Fine, but then the "if (ioapic_initialized)" conditional still does
not make sense. We surely have some global non ioapic specific
indicator that bootmem is done and the proper memory allocator is
available, right?

Aside of that is there a point to walk those tables before we actually
can make any use of their content?

Thanks,

	tglx
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiang Liu Sept. 11, 2014, 6:05 a.m. UTC | #4
On 2014/9/11 4:06, Thomas Gleixner wrote:
> On Wed, 10 Sep 2014, Jiang Liu wrote:
>>>>  int mp_register_ioapic(int id, u32 address, u32 gsi_base,
>>>> @@ -3867,8 +3873,15 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
>>>>  	}
>>>>  	for_each_ioapic(ioapic)
>>>>  		if (ioapics[ioapic].mp_config.apicaddr == address) {
>>>> -			pr_warn("address 0x%x conflicts with IOAPIC%d\n",
>>>> -				address, ioapic);
>>>> +			/*
>>>> +			 * IOAPIC unit may also be visible in PCI scope.
>>>> +			 * When ioapic PCI driver's probe() is called,
>>>> +			 * the IOAPIC unit may have already been initialized
>>>> +			 * at boot time.
>>>> +			 */
>>>> +			if (!ioapic_initialized)
>>>> +				pr_warn("address 0x%x conflicts with IOAPIC%d\n",
>>>> +					address, ioapic);
>>>
>>> Hmm. This smells fishy. Why do we allow multiple initializations of
>>> the same IOAPIC in the first place. Either it's done via ACPI or via
>>> PCI, but not both.
>> The ACPI subsystem will register and initialize all IOAPICs when walking
>> ACPI MADT table during boot, before initializing PCI subsystem.
>> Later when binding ioapic PCI driver to IOAPIC PCI device, it will try
>> to register the IOAPIC device again.
>>
>> After this patchset is applied, we could remove the !ioapic_initialized
>> check. We check acpi_ioapic_register() before calling
>> acpi_register_ioapic(). So the check becomes redundant.
>> Or we could remove the temporary code from this patch.
> 
> How about removing the disfunctional ioapic PCI driver first and then
> implementing the whole thing cleanly?
Good suggestion:)

>  
>>>
>>>>  			return -EEXIST;
>>>>  		}
>>>>  
>>>> @@ -3918,6 +3931,14 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
>>>>  	ioapics[idx].irqdomain = NULL;
>>>>  	ioapics[idx].irqdomain_cfg = *cfg;
>>>>  
>>>> +	if (ioapic_initialized) {
>>>
>>> I have a hard time to understand this conditional. Why can't we do
>>> that unconditionally?
>> How about following comments?
>> /*
>>  * If mp_register_ioapic() is called during early boot stage when
>>  * walking ACPI/SFI/DT tables, it's too early to create irqdomain,
>>  * we are still using bootmem allocator. So delay it to setup_IO_APIC().
>>  */
> 
> Fine, but then the "if (ioapic_initialized)" conditional still does
> not make sense. We surely have some global non ioapic specific
> indicator that bootmem is done and the proper memory allocator is
> available, right?
Flag ioapic_initialized will be used to check whether we have created
irqdomains for IOAPICs. Currently function arch_dynirq_lower_bound()
uses that flag, and alloc_irq_from_domain() will use it too later.

> 
> Aside of that is there a point to walk those tables before we actually
> can make any use of their content?
At least we depend on walking those tables to detect whether system has
IOAPICs available:)

Regards!
Gerry
> 
> Thanks,
> 
> 	tglx
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jiang Liu Sept. 11, 2014, 6:08 a.m. UTC | #5
On 2014/9/11 4:06, Thomas Gleixner wrote:
> On Wed, 10 Sep 2014, Jiang Liu wrote:
>>>>  int mp_register_ioapic(int id, u32 address, u32 gsi_base,
>>>> @@ -3867,8 +3873,15 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
>>>>  	}
>>>>  	for_each_ioapic(ioapic)
>>>>  		if (ioapics[ioapic].mp_config.apicaddr == address) {
>>>> -			pr_warn("address 0x%x conflicts with IOAPIC%d\n",
>>>> -				address, ioapic);
>>>> +			/*
>>>> +			 * IOAPIC unit may also be visible in PCI scope.
>>>> +			 * When ioapic PCI driver's probe() is called,
>>>> +			 * the IOAPIC unit may have already been initialized
>>>> +			 * at boot time.
>>>> +			 */
>>>> +			if (!ioapic_initialized)
>>>> +				pr_warn("address 0x%x conflicts with IOAPIC%d\n",
>>>> +					address, ioapic);
>>>
>>> Hmm. This smells fishy. Why do we allow multiple initializations of
>>> the same IOAPIC in the first place. Either it's done via ACPI or via
>>> PCI, but not both.
>> The ACPI subsystem will register and initialize all IOAPICs when walking
>> ACPI MADT table during boot, before initializing PCI subsystem.
>> Later when binding ioapic PCI driver to IOAPIC PCI device, it will try
>> to register the IOAPIC device again.
>>
>> After this patchset is applied, we could remove the !ioapic_initialized
>> check. We check acpi_ioapic_register() before calling
>> acpi_register_ioapic(). So the check becomes redundant.
>> Or we could remove the temporary code from this patch.
> 
> How about removing the disfunctional ioapic PCI driver first and then
> implementing the whole thing cleanly?
>  
>>>
>>>>  			return -EEXIST;
>>>>  		}
>>>>  
>>>> @@ -3918,6 +3931,14 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
>>>>  	ioapics[idx].irqdomain = NULL;
>>>>  	ioapics[idx].irqdomain_cfg = *cfg;
>>>>  
>>>> +	if (ioapic_initialized) {
>>>
>>> I have a hard time to understand this conditional. Why can't we do
>>> that unconditionally?
>> How about following comments?
>> /*
>>  * If mp_register_ioapic() is called during early boot stage when
>>  * walking ACPI/SFI/DT tables, it's too early to create irqdomain,
>>  * we are still using bootmem allocator. So delay it to setup_IO_APIC().
>>  */
> 
> Fine, but then the "if (ioapic_initialized)" conditional still does
> not make sense. We surely have some global non ioapic specific
> indicator that bootmem is done and the proper memory allocator is
> available, right?
Maybe a good name helps here. How about
bool hotplug = !!ioapic_initialized;

if (hotplug)
	mp_irqdomain_create(idx);

Regards!
Gerry	

> 
> Aside of that is there a point to walk those tables before we actually
> can make any use of their content?
> 
> Thanks,
> 
> 	tglx
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e23f7460c3f8..796cd9e31ef3 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -764,8 +764,35 @@  EXPORT_SYMBOL(acpi_unmap_lsapic);
 
 int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
 {
-	/* TBD */
-	return -EINVAL;
+	int ret = -ENOSYS;
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+	int ioapic_id;
+	u64 addr;
+	struct ioapic_domain_cfg cfg = {
+		.type = IOAPIC_DOMAIN_DYNAMIC,
+		.ops = &acpi_irqdomain_ops,
+	};
+
+	ioapic_id = acpi_get_ioapic_id(handle, gsi_base, &addr);
+	if (ioapic_id < 0) {
+		unsigned long long uid;
+		acpi_status status;
+
+		status = acpi_evaluate_integer(handle, METHOD_NAME__UID,
+					       NULL, &uid);
+		if (ACPI_FAILURE(status)) {
+			acpi_handle_warn(handle, "failed to get IOAPIC ID.\n");
+			return -EINVAL;
+		}
+		ioapic_id = (int)uid;
+	}
+
+	down_write(&acpi_ioapic_rwsem);
+	ret  = mp_register_ioapic(ioapic_id, phys_addr, gsi_base, &cfg);
+	up_write(&acpi_ioapic_rwsem);
+#endif
+
+	return ret;
 }
 
 EXPORT_SYMBOL(acpi_register_ioapic);
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 6e67af0c5f99..b286461cabf9 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3851,7 +3851,13 @@  static int bad_ioapic_register(int idx)
 
 static int find_free_ioapic_entry(void)
 {
-	return nr_ioapics;
+	int idx;
+
+	for (idx = 0; idx < MAX_IO_APICS; idx++)
+		if (ioapics[idx].nr_registers == 0)
+			return idx;
+
+	return MAX_IO_APICS;
 }
 
 int mp_register_ioapic(int id, u32 address, u32 gsi_base,
@@ -3867,8 +3873,15 @@  int mp_register_ioapic(int id, u32 address, u32 gsi_base,
 	}
 	for_each_ioapic(ioapic)
 		if (ioapics[ioapic].mp_config.apicaddr == address) {
-			pr_warn("address 0x%x conflicts with IOAPIC%d\n",
-				address, ioapic);
+			/*
+			 * IOAPIC unit may also be visible in PCI scope.
+			 * When ioapic PCI driver's probe() is called,
+			 * the IOAPIC unit may have already been initialized
+			 * at boot time.
+			 */
+			if (!ioapic_initialized)
+				pr_warn("address 0x%x conflicts with IOAPIC%d\n",
+					address, ioapic);
 			return -EEXIST;
 		}
 
@@ -3918,6 +3931,14 @@  int mp_register_ioapic(int id, u32 address, u32 gsi_base,
 	ioapics[idx].irqdomain = NULL;
 	ioapics[idx].irqdomain_cfg = *cfg;
 
+	if (ioapic_initialized) {
+		if (mp_irqdomain_create(idx)) {
+			clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+			return -ENOMEM;
+		}
+		alloc_ioapic_saved_registers(idx);
+	}
+
 	if (gsi_cfg->gsi_end >= gsi_top)
 		gsi_top = gsi_cfg->gsi_end + 1;
 	if (nr_ioapics <= idx)