diff mbox series

[v2,1/2] ramlist: Make dirty bitmap blocks of ramlist resizable

Message ID 20201130131104.10600-2-zhukeqian1@huawei.com (mailing list archive)
State New, archived
Headers show
Series bugfix: Decrease dirty bitmap blocks after we remove ramblock | expand

Commit Message

zhukeqian Nov. 30, 2020, 1:11 p.m. UTC
When we remove a ramblock, we should decrease the dirty bitmap blocks
of ramlist to avoid memory leakage. This patch rebuilds dirty_memory_
extend to support both "extend" and "decrease".

Reported-by: Euler Robot <euler.robot@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
---
 softmmu/physmem.c | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

Comments

Stefan Hajnoczi Dec. 17, 2020, 10:05 a.m. UTC | #1
On Mon, Nov 30, 2020 at 09:11:03PM +0800, Keqian Zhu wrote:
> @@ -1839,15 +1841,26 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
>          new_blocks = g_malloc(sizeof(*new_blocks) +
>                                sizeof(new_blocks->blocks[0]) * new_num_blocks);
>  
> -        if (old_num_blocks) {
> +        if (cpy_num_blocks) {
>              memcpy(new_blocks->blocks, old_blocks->blocks,
> -                   old_num_blocks * sizeof(old_blocks->blocks[0]));
> +                   cpy_num_blocks * sizeof(old_blocks->blocks[0]));
>          }
>  
> -        for (j = old_num_blocks; j < new_num_blocks; j++) {
> -            new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
> +        if (extend) {
> +            for (j = cpy_num_blocks; j < new_num_blocks; j++) {
> +                new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
> +            }
> +        } else {
> +            for (j = cpy_num_blocks; j < old_num_blocks; j++) {
> +                /* We are safe to free it, for that it is out-of-use */
> +                g_free(old_blocks->blocks[j]);

This looks unsafe because this code uses Read Copy Update (RCU):

  old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]);

Other threads may still be accessing old_blocks so we cannot modify it
immediately. Changes need to be deferred until the next RCU period.
g_free_rcu() needs to be used to do this.
zhukeqian Dec. 21, 2020, 7:37 a.m. UTC | #2
On 2020/12/17 18:05, Stefan Hajnoczi wrote:
> On Mon, Nov 30, 2020 at 09:11:03PM +0800, Keqian Zhu wrote:
>> @@ -1839,15 +1841,26 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
>>          new_blocks = g_malloc(sizeof(*new_blocks) +
>>                                sizeof(new_blocks->blocks[0]) * new_num_blocks);
>>  
>> -        if (old_num_blocks) {
>> +        if (cpy_num_blocks) {
>>              memcpy(new_blocks->blocks, old_blocks->blocks,
>> -                   old_num_blocks * sizeof(old_blocks->blocks[0]));
>> +                   cpy_num_blocks * sizeof(old_blocks->blocks[0]));
>>          }
>>  
>> -        for (j = old_num_blocks; j < new_num_blocks; j++) {
>> -            new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
>> +        if (extend) {
>> +            for (j = cpy_num_blocks; j < new_num_blocks; j++) {
>> +                new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
>> +            }
>> +        } else {
>> +            for (j = cpy_num_blocks; j < old_num_blocks; j++) {
>> +                /* We are safe to free it, for that it is out-of-use */
>> +                g_free(old_blocks->blocks[j]);
> 
> This looks unsafe because this code uses Read Copy Update (RCU):
> 
>   old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]);
> 
> Other threads may still be accessing old_blocks so we cannot modify it
> immediately. Changes need to be deferred until the next RCU period.
> g_free_rcu() needs to be used to do this.
> 
Hi Stefan,

You are right. I was thinking about the VM life cycle before. We shrink the dirty_memory
when we are removing unused ramblock. However we can not rely on this.

I also notice that "Organization into blocks allows dirty memory to grow (but not shrink)
under RCU". Why "but not shrink"? Any thoughts?

[...]
 * Organization into blocks allows dirty memory to grow (but not shrink) under
 * RCU.  When adding new RAMBlocks requires the dirty memory to grow, a new
 * DirtyMemoryBlocks array is allocated with pointers to existing blocks kept
 * the same.  Other threads can safely access existing blocks while dirty
 * memory is being grown.  When no threads are using the old DirtyMemoryBlocks
 * anymore it is freed by RCU (but the underlying blocks stay because they are
 * pointed to from the new DirtyMemoryBlocks).
 */
#define DIRTY_MEMORY_BLOCK_SIZE ((ram_addr_t)256 * 1024 * 8)
typedef struct {
    struct rcu_head rcu;
    unsigned long *blocks[];
} DirtyMemoryBlocks;
[...]

Thanks,
Keqian
zhukeqian Dec. 26, 2020, 7:11 a.m. UTC | #3
[...]

>>> -        for (j = old_num_blocks; j < new_num_blocks; j++) {
>>> -            new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
>>> +        if (extend) {
>>> +            for (j = cpy_num_blocks; j < new_num_blocks; j++) {
>>> +                new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
>>> +            }
>>> +        } else {
>>> +            for (j = cpy_num_blocks; j < old_num_blocks; j++) {
>>> +                /* We are safe to free it, for that it is out-of-use */
>>> +                g_free(old_blocks->blocks[j]);
>>
>> This looks unsafe because this code uses Read Copy Update (RCU):
>>
>>   old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]);
>>
>> Other threads may still be accessing old_blocks so we cannot modify it
>> immediately. Changes need to be deferred until the next RCU period.
>> g_free_rcu() needs to be used to do this.
>>
> Hi Stefan,
> 
> You are right. I was thinking about the VM life cycle before. We shrink the dirty_memory
> when we are removing unused ramblock. However we can not rely on this.
> 
> I also notice that "Organization into blocks allows dirty memory to grow (but not shrink)
> under RCU". Why "but not shrink"? Any thoughts?
Hi,

After my analysis, it's both unsafe to grow or shrink under RCU.

ram_list.blocks and ram_list.dirty_memory[X] are closely related and
both protected by RCU. For the lockless RCU readers, we can't promise they
always see consistent version of the two structures.

For grow, a reader may see un-growed @dirty_memory and growed @blocks, causing out-of-bound access.
For shrink, a reader may see shrinked @dirty_memory and un-shrinked @blocks, causing out-of-bound access too.

I think it's a design problem, RCU can just protect one structure, not two.

Thanks,
Keqian.
> 
> [...]
>  * Organization into blocks allows dirty memory to grow (but not shrink) under
>  * RCU.  When adding new RAMBlocks requires the dirty memory to grow, a new
>  * DirtyMemoryBlocks array is allocated with pointers to existing blocks kept
>  * the same.  Other threads can safely access existing blocks while dirty
>  * memory is being grown.  When no threads are using the old DirtyMemoryBlocks
>  * anymore it is freed by RCU (but the underlying blocks stay because they are
>  * pointed to from the new DirtyMemoryBlocks).
>  */
> #define DIRTY_MEMORY_BLOCK_SIZE ((ram_addr_t)256 * 1024 * 8)
> typedef struct {
>     struct rcu_head rcu;
>     unsigned long *blocks[];
> } DirtyMemoryBlocks;
> [...]
> 
> Thanks,
> Keqian
> 
> 
> .
>
Stefan Hajnoczi March 8, 2021, 10:52 a.m. UTC | #4
On Sat, Dec 26, 2020 at 03:11:53PM +0800, Keqian Zhu wrote:
> 
> [...]
> 
> >>> -        for (j = old_num_blocks; j < new_num_blocks; j++) {
> >>> -            new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
> >>> +        if (extend) {
> >>> +            for (j = cpy_num_blocks; j < new_num_blocks; j++) {
> >>> +                new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
> >>> +            }
> >>> +        } else {
> >>> +            for (j = cpy_num_blocks; j < old_num_blocks; j++) {
> >>> +                /* We are safe to free it, for that it is out-of-use */
> >>> +                g_free(old_blocks->blocks[j]);
> >>
> >> This looks unsafe because this code uses Read Copy Update (RCU):
> >>
> >>   old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]);
> >>
> >> Other threads may still be accessing old_blocks so we cannot modify it
> >> immediately. Changes need to be deferred until the next RCU period.
> >> g_free_rcu() needs to be used to do this.
> >>
> > Hi Stefan,
> > 
> > You are right. I was thinking about the VM life cycle before. We shrink the dirty_memory
> > when we are removing unused ramblock. However we can not rely on this.
> > 
> > I also notice that "Organization into blocks allows dirty memory to grow (but not shrink)
> > under RCU". Why "but not shrink"? Any thoughts?
> Hi,
> 
> After my analysis, it's both unsafe to grow or shrink under RCU.
> 
> ram_list.blocks and ram_list.dirty_memory[X] are closely related and
> both protected by RCU. For the lockless RCU readers, we can't promise they
> always see consistent version of the two structures.
> 
> For grow, a reader may see un-growed @dirty_memory and growed @blocks, causing out-of-bound access.

Growth is safe because other threads only access pre-existing ranges
(below the old maximum size). They will only start accessing the newly
added ranges after resize.

Did you find a code path where this constraint is violated?

Stefan
diff mbox series

Patch

diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 3027747c03..3e4f29f126 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -1816,17 +1816,19 @@  void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length)
 }
 
 /* Called with ram_list.mutex held */
-static void dirty_memory_extend(ram_addr_t old_ram_size,
+static void dirty_memory_resize(ram_addr_t old_ram_size,
                                 ram_addr_t new_ram_size)
 {
     ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
                                              DIRTY_MEMORY_BLOCK_SIZE);
     ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
                                              DIRTY_MEMORY_BLOCK_SIZE);
+    ram_addr_t cpy_num_blocks = MIN(old_num_blocks, new_num_blocks);
+    bool extend = new_num_blocks > old_num_blocks;
     int i;
 
-    /* Only need to extend if block count increased */
-    if (new_num_blocks <= old_num_blocks) {
+    /* Only need to resize if block count changed */
+    if (new_num_blocks == old_num_blocks) {
         return;
     }
 
@@ -1839,15 +1841,26 @@  static void dirty_memory_extend(ram_addr_t old_ram_size,
         new_blocks = g_malloc(sizeof(*new_blocks) +
                               sizeof(new_blocks->blocks[0]) * new_num_blocks);
 
-        if (old_num_blocks) {
+        if (cpy_num_blocks) {
             memcpy(new_blocks->blocks, old_blocks->blocks,
-                   old_num_blocks * sizeof(old_blocks->blocks[0]));
+                   cpy_num_blocks * sizeof(old_blocks->blocks[0]));
         }
 
-        for (j = old_num_blocks; j < new_num_blocks; j++) {
-            new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
+        if (extend) {
+            for (j = cpy_num_blocks; j < new_num_blocks; j++) {
+                new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
+            }
+        } else {
+            for (j = cpy_num_blocks; j < old_num_blocks; j++) {
+                /* We are safe to free it, for that it is out-of-use */
+                g_free(old_blocks->blocks[j]);
+            }
         }
 
+        if (!new_num_blocks) {
+            g_free(new_blocks);
+            new_blocks = NULL;
+        }
         qatomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
 
         if (old_blocks) {
@@ -1894,7 +1907,7 @@  static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared)
     new_ram_size = MAX(old_ram_size,
               (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
     if (new_ram_size > old_ram_size) {
-        dirty_memory_extend(old_ram_size, new_ram_size);
+        dirty_memory_resize(old_ram_size, new_ram_size);
     }
     /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
      * QLIST (which has an RCU-friendly variant) does not have insertion at