diff mbox series

[v4,1/4] memblock tests: add simulation of physical memory with multiple NUMA nodes

Message ID 49b96ce88dece5b394d5dd4332c1572da917b30a.1662264560.git.remckee0@gmail.com (mailing list archive)
State New
Headers show
Series memblock tests: add NUMA tests for memblock_alloc_try_nid* | expand

Commit Message

Rebecca Mckeever Sept. 4, 2022, 4:21 a.m. UTC
Add function setup_numa_memblock() for setting up a memory layout with
multiple NUMA nodes in a previously allocated dummy physical memory.
This function can be used in place of setup_memblock() in tests that need
to simulate a NUMA system.

setup_numa_memblock():
- allows for setting up a memory layout by specifying the fraction of
  MEM_SIZE in each node

Set CONFIG_NODES_SHIFT to 4 when building with NUMA=1 to allow for up to
16 NUMA nodes.

Signed-off-by: Rebecca Mckeever <remckee0@gmail.com>
---
 .../testing/memblock/scripts/Makefile.include |  2 +-
 tools/testing/memblock/tests/common.c         | 29 +++++++++++++++++++
 tools/testing/memblock/tests/common.h         |  4 ++-
 3 files changed, 33 insertions(+), 2 deletions(-)

Comments

David Hildenbrand Sept. 6, 2022, 1:17 p.m. UTC | #1
On 04.09.22 06:21, Rebecca Mckeever wrote:
> Add function setup_numa_memblock() for setting up a memory layout with
> multiple NUMA nodes in a previously allocated dummy physical memory.
> This function can be used in place of setup_memblock() in tests that need
> to simulate a NUMA system.
> 
> setup_numa_memblock():
> - allows for setting up a memory layout by specifying the fraction of
>    MEM_SIZE in each node
> 
> Set CONFIG_NODES_SHIFT to 4 when building with NUMA=1 to allow for up to
> 16 NUMA nodes.
> 
> Signed-off-by: Rebecca Mckeever <remckee0@gmail.com>
> ---
>   .../testing/memblock/scripts/Makefile.include |  2 +-
>   tools/testing/memblock/tests/common.c         | 29 +++++++++++++++++++
>   tools/testing/memblock/tests/common.h         |  4 ++-
>   3 files changed, 33 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/testing/memblock/scripts/Makefile.include b/tools/testing/memblock/scripts/Makefile.include
> index aa6d82d56a23..998281723590 100644
> --- a/tools/testing/memblock/scripts/Makefile.include
> +++ b/tools/testing/memblock/scripts/Makefile.include
> @@ -3,7 +3,7 @@
>   
>   # Simulate CONFIG_NUMA=y
>   ifeq ($(NUMA), 1)
> -	CFLAGS += -D CONFIG_NUMA
> +	CFLAGS += -D CONFIG_NUMA -D CONFIG_NODES_SHIFT=4
>   endif
>   
>   # Use 32 bit physical addresses.
> diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c
> index eec6901081af..b6110df21b2a 100644
> --- a/tools/testing/memblock/tests/common.c
> +++ b/tools/testing/memblock/tests/common.c
> @@ -72,6 +72,35 @@ void setup_memblock(void)
>   	fill_memblock();
>   }
>   
> +/**
> + * setup_numa_memblock:
> + * Set up a memory layout with multiple NUMA nodes in a previously allocated
> + * dummy physical memory.
> + * @nodes: an array containing the denominators of the fractions of MEM_SIZE
> + *         contained in each node (e.g., if nodes[0] = SZ_8, node 0 will
> + *         contain 1/8th of MEM_SIZE)
> + *
> + * The nids will be set to 0 through NUMA_NODES - 1.
> + */
> +void setup_numa_memblock(const phys_addr_t nodes[])
> +{
> +	phys_addr_t base;
> +	int flags;
> +
> +	reset_memblock_regions();
> +	base = (phys_addr_t)memory_block.base;
> +	flags = (movable_node_is_enabled()) ? MEMBLOCK_NONE : MEMBLOCK_HOTPLUG;
> +
> +	for (int i = 0; i < NUMA_NODES; i++) {
> +		assert(nodes[i] <= MEM_SIZE && nodes[i] > 0);

I think it would be even easier to get if this would just be a fraction.
E.g., instead of "1/8 * MEM_SIZE" just "1/8". All values have to add up 
to 1.

... but then we'd have to mess with floats eventually, so I guess this 
makes it easier to handle these fractions.


We could use "int" and simply specify the fraction in percent, like

nodes[0] = 50;
nodes[1] = 25;
nodes[2] = 25;

and everything has to add up to 100.


> +		phys_addr_t size = MEM_SIZE / nodes[i];


Hmmm, assuming a single node with "MEM_SIZE", we would get size=1.

Shouldn't this be "size = nodes[i]"

?
Rebecca Mckeever Sept. 6, 2022, 11:43 p.m. UTC | #2
On Tue, Sep 06, 2022 at 03:17:46PM +0200, David Hildenbrand wrote:
> On 04.09.22 06:21, Rebecca Mckeever wrote:
> > Add function setup_numa_memblock() for setting up a memory layout with
> > multiple NUMA nodes in a previously allocated dummy physical memory.
> > This function can be used in place of setup_memblock() in tests that need
> > to simulate a NUMA system.
> > 
> > setup_numa_memblock():
> > - allows for setting up a memory layout by specifying the fraction of
> >    MEM_SIZE in each node
> > 
> > Set CONFIG_NODES_SHIFT to 4 when building with NUMA=1 to allow for up to
> > 16 NUMA nodes.
> > 
> > Signed-off-by: Rebecca Mckeever <remckee0@gmail.com>
> > ---
> >   .../testing/memblock/scripts/Makefile.include |  2 +-
> >   tools/testing/memblock/tests/common.c         | 29 +++++++++++++++++++
> >   tools/testing/memblock/tests/common.h         |  4 ++-
> >   3 files changed, 33 insertions(+), 2 deletions(-)
> > 
> > diff --git a/tools/testing/memblock/scripts/Makefile.include b/tools/testing/memblock/scripts/Makefile.include
> > index aa6d82d56a23..998281723590 100644
> > --- a/tools/testing/memblock/scripts/Makefile.include
> > +++ b/tools/testing/memblock/scripts/Makefile.include
> > @@ -3,7 +3,7 @@
> >   # Simulate CONFIG_NUMA=y
> >   ifeq ($(NUMA), 1)
> > -	CFLAGS += -D CONFIG_NUMA
> > +	CFLAGS += -D CONFIG_NUMA -D CONFIG_NODES_SHIFT=4
> >   endif
> >   # Use 32 bit physical addresses.
> > diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c
> > index eec6901081af..b6110df21b2a 100644
> > --- a/tools/testing/memblock/tests/common.c
> > +++ b/tools/testing/memblock/tests/common.c
> > @@ -72,6 +72,35 @@ void setup_memblock(void)
> >   	fill_memblock();
> >   }
> > +/**
> > + * setup_numa_memblock:
> > + * Set up a memory layout with multiple NUMA nodes in a previously allocated
> > + * dummy physical memory.
> > + * @nodes: an array containing the denominators of the fractions of MEM_SIZE
> > + *         contained in each node (e.g., if nodes[0] = SZ_8, node 0 will
> > + *         contain 1/8th of MEM_SIZE)
> > + *
> > + * The nids will be set to 0 through NUMA_NODES - 1.
> > + */
> > +void setup_numa_memblock(const phys_addr_t nodes[])
> > +{
> > +	phys_addr_t base;
> > +	int flags;
> > +
> > +	reset_memblock_regions();
> > +	base = (phys_addr_t)memory_block.base;
> > +	flags = (movable_node_is_enabled()) ? MEMBLOCK_NONE : MEMBLOCK_HOTPLUG;
> > +
> > +	for (int i = 0; i < NUMA_NODES; i++) {
> > +		assert(nodes[i] <= MEM_SIZE && nodes[i] > 0);
> 
> I think it would be even easier to get if this would just be a fraction.
> E.g., instead of "1/8 * MEM_SIZE" just "1/8". All values have to add up to
> 1.
> 
> ... but then we'd have to mess with floats eventually, so I guess this makes
> it easier to handle these fractions.
> 
> 
> We could use "int" and simply specify the fraction in percent, like
> 
> nodes[0] = 50;
> nodes[1] = 25;
> nodes[2] = 25;
> 
> and everything has to add up to 100.
> 
This would still be a float for 1/8th (12.5) and 1/16th (6.25). What if
it was the "percent" of 256 (i.e., 0x100)?
> 
> > +		phys_addr_t size = MEM_SIZE / nodes[i];
> 
> 
> Hmmm, assuming a single node with "MEM_SIZE", we would get size=1.
> 
For a single node of MEM_SIZE, nodes[0] would be 1.

> Shouldn't this be "size = nodes[i]"
> 
> ?
No, not with the current implementation. The nodes array stores the
denominator of the fraction that will be multiplied by MEM_SIZE to
determine the size of that node (the numerator is always 1). So if the
size of the node should be 1/8 * MEM_SIZE, the nodes array just stores
the 8. I think the name of the array is misleading. Do you have any
suggestions for a better name?
> 
> -- 
> Thanks,
> 
> David / dhildenb
> 
Thanks,
Rebecca
David Hildenbrand Sept. 7, 2022, 8:44 a.m. UTC | #3
On 07.09.22 01:43, Rebecca Mckeever wrote:
> On Tue, Sep 06, 2022 at 03:17:46PM +0200, David Hildenbrand wrote:
>> On 04.09.22 06:21, Rebecca Mckeever wrote:
>>> Add function setup_numa_memblock() for setting up a memory layout with
>>> multiple NUMA nodes in a previously allocated dummy physical memory.
>>> This function can be used in place of setup_memblock() in tests that need
>>> to simulate a NUMA system.
>>>
>>> setup_numa_memblock():
>>> - allows for setting up a memory layout by specifying the fraction of
>>>     MEM_SIZE in each node
>>>
>>> Set CONFIG_NODES_SHIFT to 4 when building with NUMA=1 to allow for up to
>>> 16 NUMA nodes.
>>>
>>> Signed-off-by: Rebecca Mckeever <remckee0@gmail.com>
>>> ---
>>>    .../testing/memblock/scripts/Makefile.include |  2 +-
>>>    tools/testing/memblock/tests/common.c         | 29 +++++++++++++++++++
>>>    tools/testing/memblock/tests/common.h         |  4 ++-
>>>    3 files changed, 33 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/tools/testing/memblock/scripts/Makefile.include b/tools/testing/memblock/scripts/Makefile.include
>>> index aa6d82d56a23..998281723590 100644
>>> --- a/tools/testing/memblock/scripts/Makefile.include
>>> +++ b/tools/testing/memblock/scripts/Makefile.include
>>> @@ -3,7 +3,7 @@
>>>    # Simulate CONFIG_NUMA=y
>>>    ifeq ($(NUMA), 1)
>>> -	CFLAGS += -D CONFIG_NUMA
>>> +	CFLAGS += -D CONFIG_NUMA -D CONFIG_NODES_SHIFT=4
>>>    endif
>>>    # Use 32 bit physical addresses.
>>> diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c
>>> index eec6901081af..b6110df21b2a 100644
>>> --- a/tools/testing/memblock/tests/common.c
>>> +++ b/tools/testing/memblock/tests/common.c
>>> @@ -72,6 +72,35 @@ void setup_memblock(void)
>>>    	fill_memblock();
>>>    }
>>> +/**
>>> + * setup_numa_memblock:
>>> + * Set up a memory layout with multiple NUMA nodes in a previously allocated
>>> + * dummy physical memory.
>>> + * @nodes: an array containing the denominators of the fractions of MEM_SIZE
>>> + *         contained in each node (e.g., if nodes[0] = SZ_8, node 0 will
>>> + *         contain 1/8th of MEM_SIZE)
>>> + *
>>> + * The nids will be set to 0 through NUMA_NODES - 1.
>>> + */
>>> +void setup_numa_memblock(const phys_addr_t nodes[])
>>> +{
>>> +	phys_addr_t base;
>>> +	int flags;
>>> +
>>> +	reset_memblock_regions();
>>> +	base = (phys_addr_t)memory_block.base;
>>> +	flags = (movable_node_is_enabled()) ? MEMBLOCK_NONE : MEMBLOCK_HOTPLUG;
>>> +
>>> +	for (int i = 0; i < NUMA_NODES; i++) {
>>> +		assert(nodes[i] <= MEM_SIZE && nodes[i] > 0);
>>
>> I think it would be even easier to get if this would just be a fraction.
>> E.g., instead of "1/8 * MEM_SIZE" just "1/8". All values have to add up to
>> 1.
>>
>> ... but then we'd have to mess with floats eventually, so I guess this makes
>> it easier to handle these fractions.
>>
>>
>> We could use "int" and simply specify the fraction in percent, like
>>
>> nodes[0] = 50;
>> nodes[1] = 25;
>> nodes[2] = 25;
>>
>> and everything has to add up to 100.
>>
> This would still be a float for 1/8th (12.5) and 1/16th (6.25). What if
> it was the "percent" of 256 (i.e., 0x100)?

Right, or in something "smaller" like 1/32 th. I don't think we go below 
that?

If we don't need more digits, why not in "basis points" (per ten thousand)
-> https://en.wikipedia.org/wiki/Basis_point

nodes[0] = 5000; /* 1/2  */
nodes[1] = 2500; /* 1/4 */
nodes[2] = 1250; /* 1/8 */
nodes[4] = 0625; /* 1/32 */
nodes[5] = 0625;


>>
>>> +		phys_addr_t size = MEM_SIZE / nodes[i];
>>
>>
>> Hmmm, assuming a single node with "MEM_SIZE", we would get size=1.
>>
> For a single node of MEM_SIZE, nodes[0] would be 1.
> 
>> Shouldn't this be "size = nodes[i]"
>>
>> ?
> No, not with the current implementation. The nodes array stores the
> denominator of the fraction that will be multiplied by MEM_SIZE to
> determine the size of that node (the numerator is always 1). So if the
> size of the node should be 1/8 * MEM_SIZE, the nodes array just stores
> the 8. I think the name of the array is misleading. Do you have any
> suggestions for a better name?

Then I am confused about the
	assert(nodes[i] <= MEM_SIZE && nodes[i] > 0);

assertion :)

I think it would really be best to just store the actual fraction 
somehow. But maybe just I am confused :)
Rebecca Mckeever Sept. 7, 2022, 11:52 p.m. UTC | #4
On Wed, Sep 07, 2022 at 10:44:44AM +0200, David Hildenbrand wrote:
> On 07.09.22 01:43, Rebecca Mckeever wrote:
> > On Tue, Sep 06, 2022 at 03:17:46PM +0200, David Hildenbrand wrote:
> > > On 04.09.22 06:21, Rebecca Mckeever wrote:
> > > > Add function setup_numa_memblock() for setting up a memory layout with
> > > > multiple NUMA nodes in a previously allocated dummy physical memory.
> > > > This function can be used in place of setup_memblock() in tests that need
> > > > to simulate a NUMA system.
> > > > 
> > > > setup_numa_memblock():
> > > > - allows for setting up a memory layout by specifying the fraction of
> > > >     MEM_SIZE in each node
> > > > 
> > > > Set CONFIG_NODES_SHIFT to 4 when building with NUMA=1 to allow for up to
> > > > 16 NUMA nodes.
> > > > 
> > > > Signed-off-by: Rebecca Mckeever <remckee0@gmail.com>
> > > > ---
> > > >    .../testing/memblock/scripts/Makefile.include |  2 +-
> > > >    tools/testing/memblock/tests/common.c         | 29 +++++++++++++++++++
> > > >    tools/testing/memblock/tests/common.h         |  4 ++-
> > > >    3 files changed, 33 insertions(+), 2 deletions(-)
> > > > 
> > > > diff --git a/tools/testing/memblock/scripts/Makefile.include b/tools/testing/memblock/scripts/Makefile.include
> > > > index aa6d82d56a23..998281723590 100644
> > > > --- a/tools/testing/memblock/scripts/Makefile.include
> > > > +++ b/tools/testing/memblock/scripts/Makefile.include
> > > > @@ -3,7 +3,7 @@
> > > >    # Simulate CONFIG_NUMA=y
> > > >    ifeq ($(NUMA), 1)
> > > > -	CFLAGS += -D CONFIG_NUMA
> > > > +	CFLAGS += -D CONFIG_NUMA -D CONFIG_NODES_SHIFT=4
> > > >    endif
> > > >    # Use 32 bit physical addresses.
> > > > diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c
> > > > index eec6901081af..b6110df21b2a 100644
> > > > --- a/tools/testing/memblock/tests/common.c
> > > > +++ b/tools/testing/memblock/tests/common.c
> > > > @@ -72,6 +72,35 @@ void setup_memblock(void)
> > > >    	fill_memblock();
> > > >    }
> > > > +/**
> > > > + * setup_numa_memblock:
> > > > + * Set up a memory layout with multiple NUMA nodes in a previously allocated
> > > > + * dummy physical memory.
> > > > + * @nodes: an array containing the denominators of the fractions of MEM_SIZE
> > > > + *         contained in each node (e.g., if nodes[0] = SZ_8, node 0 will
> > > > + *         contain 1/8th of MEM_SIZE)
> > > > + *
> > > > + * The nids will be set to 0 through NUMA_NODES - 1.
> > > > + */
> > > > +void setup_numa_memblock(const phys_addr_t nodes[])
> > > > +{
> > > > +	phys_addr_t base;
> > > > +	int flags;
> > > > +
> > > > +	reset_memblock_regions();
> > > > +	base = (phys_addr_t)memory_block.base;
> > > > +	flags = (movable_node_is_enabled()) ? MEMBLOCK_NONE : MEMBLOCK_HOTPLUG;
> > > > +
> > > > +	for (int i = 0; i < NUMA_NODES; i++) {
> > > > +		assert(nodes[i] <= MEM_SIZE && nodes[i] > 0);
> > > 
> > > I think it would be even easier to get if this would just be a fraction.
> > > E.g., instead of "1/8 * MEM_SIZE" just "1/8". All values have to add up to
> > > 1.
> > > 
> > > ... but then we'd have to mess with floats eventually, so I guess this makes
> > > it easier to handle these fractions.
> > > 
> > > 
> > > We could use "int" and simply specify the fraction in percent, like
> > > 
> > > nodes[0] = 50;
> > > nodes[1] = 25;
> > > nodes[2] = 25;
> > > 
> > > and everything has to add up to 100.
> > > 
> > This would still be a float for 1/8th (12.5) and 1/16th (6.25). What if
> > it was the "percent" of 256 (i.e., 0x100)?
> 
> Right, or in something "smaller" like 1/32 th. I don't think we go below
> that?
> 
> If we don't need more digits, why not in "basis points" (per ten thousand)
> -> https://en.wikipedia.org/wiki/Basis_point
> 
Basis points should work.

> nodes[0] = 5000; /* 1/2  */
> nodes[1] = 2500; /* 1/4 */
> nodes[2] = 1250; /* 1/8 */
> nodes[4] = 0625; /* 1/32 */
> nodes[5] = 0625;
> 
> 
> > > 
> > > > +		phys_addr_t size = MEM_SIZE / nodes[i];
> > > 
> > > 
> > > Hmmm, assuming a single node with "MEM_SIZE", we would get size=1.
> > > 
> > For a single node of MEM_SIZE, nodes[0] would be 1.
> > 
> > > Shouldn't this be "size = nodes[i]"
> > > 
> > > ?
> > No, not with the current implementation. The nodes array stores the
> > denominator of the fraction that will be multiplied by MEM_SIZE to
> > determine the size of that node (the numerator is always 1). So if the
> > size of the node should be 1/8 * MEM_SIZE, the nodes array just stores
> > the 8. I think the name of the array is misleading. Do you have any
> > suggestions for a better name?
> 
> Then I am confused about the
> 	assert(nodes[i] <= MEM_SIZE && nodes[i] > 0);
> 
> assertion :)
> 
The first part of the assert ensures that size doesn't become less than
1, and the second part prevents a divide by 0. I see how this is
confusing now.

> I think it would really be best to just store the actual fraction somehow.
> But maybe just I am confused :)
> 
> -- 
> Thanks,
> 
> David / dhildenb
> 
Thanks,
Rebecca
diff mbox series

Patch

diff --git a/tools/testing/memblock/scripts/Makefile.include b/tools/testing/memblock/scripts/Makefile.include
index aa6d82d56a23..998281723590 100644
--- a/tools/testing/memblock/scripts/Makefile.include
+++ b/tools/testing/memblock/scripts/Makefile.include
@@ -3,7 +3,7 @@ 
 
 # Simulate CONFIG_NUMA=y
 ifeq ($(NUMA), 1)
-	CFLAGS += -D CONFIG_NUMA
+	CFLAGS += -D CONFIG_NUMA -D CONFIG_NODES_SHIFT=4
 endif
 
 # Use 32 bit physical addresses.
diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c
index eec6901081af..b6110df21b2a 100644
--- a/tools/testing/memblock/tests/common.c
+++ b/tools/testing/memblock/tests/common.c
@@ -72,6 +72,35 @@  void setup_memblock(void)
 	fill_memblock();
 }
 
+/**
+ * setup_numa_memblock:
+ * Set up a memory layout with multiple NUMA nodes in a previously allocated
+ * dummy physical memory.
+ * @nodes: an array containing the denominators of the fractions of MEM_SIZE
+ *         contained in each node (e.g., if nodes[0] = SZ_8, node 0 will
+ *         contain 1/8th of MEM_SIZE)
+ *
+ * The nids will be set to 0 through NUMA_NODES - 1.
+ */
+void setup_numa_memblock(const phys_addr_t nodes[])
+{
+	phys_addr_t base;
+	int flags;
+
+	reset_memblock_regions();
+	base = (phys_addr_t)memory_block.base;
+	flags = (movable_node_is_enabled()) ? MEMBLOCK_NONE : MEMBLOCK_HOTPLUG;
+
+	for (int i = 0; i < NUMA_NODES; i++) {
+		assert(nodes[i] <= MEM_SIZE && nodes[i] > 0);
+		phys_addr_t size = MEM_SIZE / nodes[i];
+
+		memblock_add_node(base, size, i, flags);
+		base += size;
+	}
+	fill_memblock();
+}
+
 void dummy_physical_memory_init(void)
 {
 	memory_block.base = malloc(MEM_SIZE);
diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h
index 78128e109a95..caf01543a315 100644
--- a/tools/testing/memblock/tests/common.h
+++ b/tools/testing/memblock/tests/common.h
@@ -10,7 +10,8 @@ 
 #include <linux/printk.h>
 #include <../selftests/kselftest.h>
 
-#define MEM_SIZE SZ_16K
+#define MEM_SIZE		SZ_16K
+#define NUMA_NODES		8
 
 enum test_flags {
 	/* No special request. */
@@ -102,6 +103,7 @@  struct region {
 void reset_memblock_regions(void);
 void reset_memblock_attributes(void);
 void setup_memblock(void);
+void setup_numa_memblock(const phys_addr_t nodes[]);
 void dummy_physical_memory_init(void);
 void dummy_physical_memory_cleanup(void);
 void parse_args(int argc, char **argv);