diff mbox series

[v2,10/10] xen/x86: add detection of memory interleaves for different nodes

Message ID 20220418090735.3940393-11-wei.chen@arm.com (mailing list archive)
State Superseded
Headers show
Series Device tree based NUMA support for Arm - Part#1 | expand

Commit Message

Wei Chen April 18, 2022, 9:07 a.m. UTC
One NUMA node may contain several memory blocks. In current Xen
code, Xen will maintain a node memory range for each node to cover
all its memory blocks. But here comes the problem, in the gap of
one node's two memory blocks, if there are some memory blocks don't
belong to this node (remote memory blocks). This node's memory range
will be expanded to cover these remote memory blocks.

One node's memory range contains other nodes' memory, this is
obviously not very reasonable. This means current NUMA code only
can support node has no interleaved memory blocks. However, on a
physical machine, the addresses of multiple nodes can be interleaved.

So in this patch, we add code to detect memory interleaves of
different nodes. NUMA initialization will be failed and error
messages will be printed when Xen detect such hardware configuration.

Signed-off-by: Wei Chen <wei.chen@arm.com>
---
v1 ->v2:
1. Update the description to say we're after is no memory
   interleaves of different nodes.
2. Only update node range when it passes the interleave check.
3. Don't use full upper-case for "node".
---
 xen/arch/x86/srat.c | 49 +++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 45 insertions(+), 4 deletions(-)

Comments

Jan Beulich April 26, 2022, 9:20 a.m. UTC | #1
On 18.04.2022 11:07, Wei Chen wrote:
> --- a/xen/arch/x86/srat.c
> +++ b/xen/arch/x86/srat.c
> @@ -271,6 +271,35 @@ acpi_numa_processor_affinity_init(const struct acpi_srat_cpu_affinity *pa)
>  		       pxm, pa->apic_id, node);
>  }
>  
> +/*
> + * Check to see if there are other nodes within this node's range.
> + * We just need to check full contains situation. Because overlaps
> + * have been checked before by conflicting_memblks.
> + */
> +static bool __init check_node_memory_interleave(nodeid_t nid,
> +                                                paddr_t start, paddr_t end)
> +{
> +	nodeid_t i;
> +	const struct node *nd = &nodes[nid];
> +
> +	for_each_node_mask(i, memory_nodes_parsed)
> +	{
> +		/* Skip itself */
> +		if (i == nid)
> +			continue;
> +
> +		nd = &nodes[i];
> +		if (start < nd->start && nd->end < end) {
> +			printk(KERN_ERR
> +			       "Node %u: (%"PRIpaddr"-%"PRIpaddr") interleaves with node %u (%"PRIpaddr"-%"PRIpaddr")\n",
> +			       nid, start, end, i, nd->start, nd->end);
> +			return true;
> +		}
> +	}
> +
> +	return false;
> +}
> +
>  /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
>  void __init
>  acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
> @@ -340,10 +369,22 @@ acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)

Just up from here there already is overlap detection (via a call to
conflicting_memblks(), and you even mention that in the earlier
comment). If that doesn't cover all cases, I think it wants fixing
there rather than introducing a 2nd checking function. But afaics
that code covers the "fully contains" case.

Jan

>  			nd->start = start;
>  			nd->end = end;
>  		} else {
> -			if (start < nd->start)
> -				nd->start = start;
> -			if (nd->end < end)
> -				nd->end = end;
> +			paddr_t new_start = nd->start;
> +			paddr_t new_end = nd->end;
> +
> +			if (start < new_start)
> +				new_start = start;
> +			if (new_end < end)
> +				new_end = end;
> +
> +			/* Check whether new range contains memory for other nodes */
> +			if (check_node_memory_interleave(node, new_start, new_end)) {
> +				bad_srat();
> +				return;
> +			}
> +
> +			nd->start = new_start;
> +			nd->end = new_end;
>  		}
>  	}
>  	printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIpaddr"-%"PRIpaddr"%s\n",
Wei Chen April 26, 2022, 11:07 a.m. UTC | #2
Hi Jan,

On 2022/4/26 17:20, Jan Beulich wrote:
> On 18.04.2022 11:07, Wei Chen wrote:
>> --- a/xen/arch/x86/srat.c
>> +++ b/xen/arch/x86/srat.c
>> @@ -271,6 +271,35 @@ acpi_numa_processor_affinity_init(const struct acpi_srat_cpu_affinity *pa)
>>   		       pxm, pa->apic_id, node);
>>   }
>>   
>> +/*
>> + * Check to see if there are other nodes within this node's range.
>> + * We just need to check full contains situation. Because overlaps
>> + * have been checked before by conflicting_memblks.
>> + */
>> +static bool __init check_node_memory_interleave(nodeid_t nid,
>> +                                                paddr_t start, paddr_t end)
>> +{
>> +	nodeid_t i;
>> +	const struct node *nd = &nodes[nid];
>> +
>> +	for_each_node_mask(i, memory_nodes_parsed)
>> +	{
>> +		/* Skip itself */
>> +		if (i == nid)
>> +			continue;
>> +
>> +		nd = &nodes[i];
>> +		if (start < nd->start && nd->end < end) {
>> +			printk(KERN_ERR
>> +			       "Node %u: (%"PRIpaddr"-%"PRIpaddr") interleaves with node %u (%"PRIpaddr"-%"PRIpaddr")\n",
>> +			       nid, start, end, i, nd->start, nd->end);
>> +			return true;
>> +		}
>> +	}
>> +
>> +	return false;
>> +}
>> +
>>   /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
>>   void __init
>>   acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
>> @@ -340,10 +369,22 @@ acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
> 
> Just up from here there already is overlap detection (via a call to
> conflicting_memblks(), and you even mention that in the earlier
> comment). If that doesn't cover all cases, I think it wants fixing
> there rather than introducing a 2nd checking function. But afaics
> that code covers the "fully contains" case.
> 

Yes, that makes sense, I will try to add this case check in 
conflicting_memblks.

Thanks,
Wei Chen

> Jan
> 
>>   			nd->start = start;
>>   			nd->end = end;
>>   		} else {
>> -			if (start < nd->start)
>> -				nd->start = start;
>> -			if (nd->end < end)
>> -				nd->end = end;
>> +			paddr_t new_start = nd->start;
>> +			paddr_t new_end = nd->end;
>> +
>> +			if (start < new_start)
>> +				new_start = start;
>> +			if (new_end < end)
>> +				new_end = end;
>> +
>> +			/* Check whether new range contains memory for other nodes */
>> +			if (check_node_memory_interleave(node, new_start, new_end)) {
>> +				bad_srat();
>> +				return;
>> +			}
>> +
>> +			nd->start = new_start;
>> +			nd->end = new_end;
>>   		}
>>   	}
>>   	printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIpaddr"-%"PRIpaddr"%s\n",
>
diff mbox series

Patch

diff --git a/xen/arch/x86/srat.c b/xen/arch/x86/srat.c
index c3e13059e9..53968e4085 100644
--- a/xen/arch/x86/srat.c
+++ b/xen/arch/x86/srat.c
@@ -271,6 +271,35 @@  acpi_numa_processor_affinity_init(const struct acpi_srat_cpu_affinity *pa)
 		       pxm, pa->apic_id, node);
 }
 
+/*
+ * Check to see if there are other nodes within this node's range.
+ * We just need to check full contains situation. Because overlaps
+ * have been checked before by conflicting_memblks.
+ */
+static bool __init check_node_memory_interleave(nodeid_t nid,
+                                                paddr_t start, paddr_t end)
+{
+	nodeid_t i;
+	const struct node *nd = &nodes[nid];
+
+	for_each_node_mask(i, memory_nodes_parsed)
+	{
+		/* Skip itself */
+		if (i == nid)
+			continue;
+
+		nd = &nodes[i];
+		if (start < nd->start && nd->end < end) {
+			printk(KERN_ERR
+			       "Node %u: (%"PRIpaddr"-%"PRIpaddr") interleaves with node %u (%"PRIpaddr"-%"PRIpaddr")\n",
+			       nid, start, end, i, nd->start, nd->end);
+			return true;
+		}
+	}
+
+	return false;
+}
+
 /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
 void __init
 acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
@@ -340,10 +369,22 @@  acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
 			nd->start = start;
 			nd->end = end;
 		} else {
-			if (start < nd->start)
-				nd->start = start;
-			if (nd->end < end)
-				nd->end = end;
+			paddr_t new_start = nd->start;
+			paddr_t new_end = nd->end;
+
+			if (start < new_start)
+				new_start = start;
+			if (new_end < end)
+				new_end = end;
+
+			/* Check whether new range contains memory for other nodes */
+			if (check_node_memory_interleave(node, new_start, new_end)) {
+				bad_srat();
+				return;
+			}
+
+			nd->start = new_start;
+			nd->end = new_end;
 		}
 	}
 	printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIpaddr"-%"PRIpaddr"%s\n",