Message ID | 20230914202054.3551250-2-william.roche@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Qemu crashes on VM migration after an handled memory error | expand |
On 15/09/2023 04:20, “William Roche wrote: > From: William Roche <william.roche@oracle.com> > > A memory page poisoned from the hypervisor level is no longer readable. > Thus, it is now treated as a zero-page for the ram saving migration phase. > > The migration of a VM will crash Qemu when it tries to read the > memory address space and stumbles on the poisoned page with a similar > stack trace: > > Program terminated with signal SIGBUS, Bus error. > #0 _mm256_loadu_si256 > #1 buffer_zero_avx2 > #2 select_accel_fn > #3 buffer_is_zero > #4 save_zero_page_to_file > #5 save_zero_page > #6 ram_save_target_page_legacy > #7 ram_save_host_page > #8 ram_find_and_save_block > #9 ram_save_iterate > #10 qemu_savevm_state_iterate > #11 migration_iteration_run > #12 migration_thread > #13 qemu_thread_start > > Fix it by considering poisoned pages as if they were zero-pages for > the migration copy. This fix also works with underlying large pages, > taking into account the RAMBlock segment "page-size". > > Standard migration and compressed transfers are handled by this code. > RDMA transfer isn't touched. > I'm okay with "RDMA isn't touched". BTW, could you share your reproducing program/hacking to poison the page, so that i am able to take a look the RDMA part later when i'm free. Not sure it's suitable to acknowledge a not touched part. Anyway Acked-by: Li Zhijian <lizhijian@fujitsu.com> # RDMA > Signed-off-by: William Roche <william.roche@oracle.com> > --- > accel/kvm/kvm-all.c | 14 ++++++++++++++ > accel/stubs/kvm-stub.c | 5 +++++ > include/sysemu/kvm.h | 10 ++++++++++ > migration/ram-compress.c | 3 ++- > migration/ram.c | 23 +++++++++++++++++++++-- > migration/ram.h | 2 ++ > 6 files changed, 54 insertions(+), 3 deletions(-) > > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c > index ff1578bb32..7fb13c8a56 100644 > --- a/accel/kvm/kvm-all.c > +++ b/accel/kvm/kvm-all.c > @@ -1152,6 +1152,20 @@ static void kvm_unpoison_all(void *param) > } > } > > +bool kvm_hwpoisoned_page(RAMBlock *block, void *offset) > +{ > + HWPoisonPage *pg; > + ram_addr_t ram_addr = (ram_addr_t) offset; > + > + QLIST_FOREACH(pg, &hwpoison_page_list, list) { > + if ((ram_addr >= pg->ram_addr) && > + (ram_addr - pg->ram_addr < block->page_size)) { > + return true; > + } > + } > + return false; > +} > + > void kvm_hwpoison_page_add(ram_addr_t ram_addr) > { > HWPoisonPage *page; > diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c > index 235dc661bc..c0a31611df 100644 > --- a/accel/stubs/kvm-stub.c > +++ b/accel/stubs/kvm-stub.c > @@ -133,3 +133,8 @@ uint32_t kvm_dirty_ring_size(void) > { > return 0; > } > + > +bool kvm_hwpoisoned_page(RAMBlock *block, void *ram_addr) > +{ > + return false; > +} > diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h > index ee9025f8e9..858688227a 100644 > --- a/include/sysemu/kvm.h > +++ b/include/sysemu/kvm.h > @@ -570,4 +570,14 @@ bool kvm_arch_cpu_check_are_resettable(void); > bool kvm_dirty_ring_enabled(void); > > uint32_t kvm_dirty_ring_size(void); > + > +/** > + * kvm_hwpoisoned_page - indicate if the given page is poisoned > + * @block: memory block of the given page > + * @ram_addr: offset of the page > + * > + * Returns: true: page is poisoned > + * false: page not yet poisoned > + */ > +bool kvm_hwpoisoned_page(RAMBlock *block, void *ram_addr); > #endif > diff --git a/migration/ram-compress.c b/migration/ram-compress.c > index 06254d8c69..1916ce709d 100644 > --- a/migration/ram-compress.c > +++ b/migration/ram-compress.c > @@ -34,6 +34,7 @@ > #include "qemu/error-report.h" > #include "migration.h" > #include "options.h" > +#include "ram.h" > #include "io/channel-null.h" > #include "exec/target_page.h" > #include "exec/ramblock.h" > @@ -198,7 +199,7 @@ static CompressResult do_compress_ram_page(QEMUFile *f, z_stream *stream, > > assert(qemu_file_buffer_empty(f)); > > - if (buffer_is_zero(p, page_size)) { > + if (migration_buffer_is_zero(block, offset, page_size)) { > return RES_ZEROPAGE; > } > > diff --git a/migration/ram.c b/migration/ram.c > index 9040d66e61..fd337f7e65 100644 > --- a/migration/ram.c > +++ b/migration/ram.c > @@ -1129,6 +1129,26 @@ void ram_release_page(const char *rbname, uint64_t offset) > ram_discard_range(rbname, offset, TARGET_PAGE_SIZE); > } > > +/** > + * migration_buffer_is_zero: indicate if the page at the given > + * location is entirely filled with zero, or is a poisoned page. > + * > + * @block: block that contains the page > + * @offset: offset inside the block for the page > + * @len: size to consider > + */ > +bool migration_buffer_is_zero(RAMBlock *block, ram_addr_t offset, > + size_t len) > +{ > + uint8_t *p = block->host + offset; > + > + if (kvm_enabled() && kvm_hwpoisoned_page(block, (void *)offset)) { > + return true; > + } > + > + return buffer_is_zero(p, len); > +} > + > /** > * save_zero_page_to_file: send the zero page to the file > * > @@ -1142,10 +1162,9 @@ void ram_release_page(const char *rbname, uint64_t offset) > static int save_zero_page_to_file(PageSearchStatus *pss, QEMUFile *file, > RAMBlock *block, ram_addr_t offset) > { > - uint8_t *p = block->host + offset; > int len = 0; > > - if (buffer_is_zero(p, TARGET_PAGE_SIZE)) { > + if (migration_buffer_is_zero(block, offset, TARGET_PAGE_SIZE)) { > len += save_page_header(pss, file, block, offset | RAM_SAVE_FLAG_ZERO); > qemu_put_byte(file, 0); > len += 1; > diff --git a/migration/ram.h b/migration/ram.h > index 145c915ca7..805ea2a211 100644 > --- a/migration/ram.h > +++ b/migration/ram.h > @@ -65,6 +65,8 @@ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size); > void ram_transferred_add(uint64_t bytes); > void ram_release_page(const char *rbname, uint64_t offset); > > +bool migration_buffer_is_zero(RAMBlock *block, ram_addr_t offset, size_t len); > + > int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr); > bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset); > void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr);
On 9/15/23 05:13, Zhijian Li (Fujitsu) wrote: > > > I'm okay with "RDMA isn't touched". > BTW, could you share your reproducing program/hacking to poison the page, so that > i am able to take a look the RDMA part later when i'm free. > > Not sure it's suitable to acknowledge a not touched part. Anyway > Acked-by: Li Zhijian <lizhijian@fujitsu.com> # RDMA > Thanks. As you asked for a procedure to inject memory errors into a running VM, I've attached to this email the source code (mce_process_react.c) of a program that will help to target the error injection in the VM. (Be careful that error injection is currently nor working on AMD platforms -- this is a work in progress is a separate qemu thread) The general idea: We are going to target a process memory page running inside a VM to see what happens when we inject an error on the underlying physical page at the platform (hypervisor) level. To have a better view of what's going on, we'll use a process made for this: It's goal is to allocate a memory page, and create a SIGBUS handler to inform when it receives this signal. It will also wait before touching this page to see what happens next. Compiling this tool: $ gcc -o mce_process_react_x86 mce_process_react.c Let's try that: This procedure shows the best case scenario, where an error injected at the platform level is reported up to the guest process using it. Note that qemu should be started with root privilege. 1. Choose a process running in the VM (and identify a memory page you want to target, and get its physical address – crash(8) vtop can help with that) or run the attached mce_process_react example (compiled for your platform mce_process_react_[x86|arm]) with an option to be early informed of _AO error (-e) and wait ENTER to continue with reading the allocated page (-w 0): [root@VM ]# ./mce_process_react_x86 -e -w 0 Setting Early kill... Ok Data pages at 0x7fa0f9b25000 physically 0x200f2fa000 Press ENTER to continue with page reading 2. Go into the VM monitor to get the translation from "Guest Physical Address to Host Physical Address" or "Host Virtual Address": (qemu) gpa2hpa 0x200f2fa000' Host physical address for 0x200f2fa000 (ram-node1) is 0x46f12fa000 3. Before we inject the error, we want to keep track of the VM console output (in a separate window). If you are using libvirt: # virsh console myvm 4. We now prepare for the error injection at the platform level to the address we found. To do so, we'll need to use the hwpoison-inject module (x86) Be careful, as hwpoison takes Page Frame Numbers and this PFN is not the physical address – you need to remove the last 12 bits (the last 3 zeros of the above address) ! [root@hv ]# modprobe hwpoison-inject [root@hv ]# echo 0x46f12fa > /sys/kernel/debug/hwpoison/corrupt-pfn If you see "Operation not permitted" error when writing as root on corrupt-pfn, you may be facing a "kernel_lockdown(7)" which is enabled on SecureBoot systems (can be verified with "mokutil --sb-state"). In this case, turn SecureBoot off (at the UEFI level for example) 5. Look at the qemu output (either on the terminal where qemu was started or if you are using libvirt: tail /var/log/libvirt/qemu/myvm 2022-08-31T13:52:25.645398Z qemu-system-x86_64: warning: Guest MCE Memory Error at QEMU addr 0x7eeeace00000 and GUEST addr 0x200f200 of type BUS_MCEERR_AO injected 6. On the guest console: We'll see the VM reaction to the injected error: [ 155.805149] Disabling lock debugging due to kernel taint [ 155.806174] mce: [Hardware Error]: Machine check events logged [ 155.807120] Memory failure: 0x200f200: Killing mce_process_rea:3548 due to hardware memory corruption [ 155.808877] Memory failure: 0x200f200: recovery action for dirty LRU page: Recovered 7. The Guest process that we started at the first step gives: Signal 7 received BUS_MCEERR_AO on vaddr: 0x7fa0f9b25000 At this stage, the VM has a poisoned page, and a migration of this VM needs to be fixed in order to avoid accessing the poisoned page. 8. The process continues to run (as it handled the SIGBUS). Now if you press ENTER on this process terminal, it will try to read the page which will generate a new MCE (a synchronous one) at VM level which will be sent to this process: Signal 7 received BUS_MCEERR_AR on vaddr: 0x7fa0f9b25000 Exit from the signal handler on BUS_MCEERR_AR 9. The VM console shows: [ 2520.895263] MCE: Killing mce_process_rea:3548 due to hardware memory corruption fault at 7f45e5265000 10. The VM continues to run... With a poisoned page in its address space HTH, William.
On 15/09/2023 19:31, William Roche wrote: > On 9/15/23 05:13, Zhijian Li (Fujitsu) wrote: >> >> >> I'm okay with "RDMA isn't touched". >> BTW, could you share your reproducing program/hacking to poison the page, so that >> i am able to take a look the RDMA part later when i'm free. >> >> Not sure it's suitable to acknowledge a not touched part. Anyway >> Acked-by: Li Zhijian <lizhijian@fujitsu.com> # RDMA >> > > Thanks. > As you asked for a procedure to inject memory errors into a running VM, > I've attached to this email the source code (mce_process_react.c) of a > program that will help to target the error injection in the VM. > Very very thanks for your details, Mark it :) Thanks Zhijian > (Be careful that error injection is currently nor working on AMD > platforms -- this is a work in progress is a separate qemu thread) > > > The general idea: > We are going to target a process memory page running inside a VM to see > what happens when we inject an error on the underlying physical page at > the platform (hypervisor) level. > To have a better view of what's going on, we'll use a process made for > this: It's goal is to allocate a memory page, and create a SIGBUS > handler to inform when it receives this signal. It will also wait before > touching this page to see what happens next. > > Compiling this tool: > $ gcc -o mce_process_react_x86 mce_process_react.c > > > Let's try that: > This procedure shows the best case scenario, where an error injected at > the platform level is reported up to the guest process using it. > Note that qemu should be started with root privilege. > > 1. Choose a process running in the VM (and identify a memory page > you want to target, and get its physical address – crash(8) vtop can > help with that) or run the attached mce_process_react example (compiled > for your platform mce_process_react_[x86|arm]) with an option to be > early informed of _AO error (-e) and wait ENTER to continue with reading > the allocated page (-w 0): > > [root@VM ]# ./mce_process_react_x86 -e -w 0 > Setting Early kill... Ok > > Data pages at 0x7fa0f9b25000 physically 0x200f2fa000 > > Press ENTER to continue with page reading > > > 2. Go into the VM monitor to get the translation from "Guest > Physical Address to Host Physical Address" or "Host Virtual Address": > > (qemu) gpa2hpa 0x200f2fa000' > Host physical address for 0x200f2fa000 (ram-node1) is 0x46f12fa000 > > > 3. Before we inject the error, we want to keep track of the VM > console output (in a separate window). > If you are using libvirt: # virsh console myvm > > > 4. We now prepare for the error injection at the platform level to > the address we found. To do so, we'll need to use the hwpoison-inject > module (x86) > Be careful, as hwpoison takes Page Frame Numbers and this PFN is not the > physical address – you need to remove the last 12 bits (the last 3 zeros > of the above address) ! > > [root@hv ]# modprobe hwpoison-inject > [root@hv ]# echo 0x46f12fa > /sys/kernel/debug/hwpoison/corrupt-pfn > > If you see "Operation not permitted" error when writing as root > on corrupt-pfn, you may be facing a "kernel_lockdown(7)" which is > enabled on SecureBoot systems (can be verified with > "mokutil --sb-state"). In this case, turn SecureBoot off (at the UEFI > level for example) > > 5. Look at the qemu output (either on the terminal where qemu was > started or if you are using libvirt: tail /var/log/libvirt/qemu/myvm > > 2022-08-31T13:52:25.645398Z qemu-system-x86_64: warning: Guest MCE Memory Error at QEMU addr 0x7eeeace00000 and GUEST addr 0x200f200 of type BUS_MCEERR_AO injected > > 6. On the guest console: > We'll see the VM reaction to the injected error: > > [ 155.805149] Disabling lock debugging due to kernel taint > [ 155.806174] mce: [Hardware Error]: Machine check events logged > [ 155.807120] Memory failure: 0x200f200: Killing mce_process_rea:3548 due to hardware memory corruption > [ 155.808877] Memory failure: 0x200f200: recovery action for dirty LRU page: Recovered > > 7. The Guest process that we started at the first step gives: > > Signal 7 received > BUS_MCEERR_AO on vaddr: 0x7fa0f9b25000 > > At this stage, the VM has a poisoned page, and a migration of this VM > needs to be fixed in order to avoid accessing the poisoned page. > > 8. The process continues to run (as it handled the SIGBUS). > Now if you press ENTER on this process terminal, it will try to read the > page which will generate a new MCE (a synchronous one) at VM level which > will be sent to this process: > > Signal 7 received > BUS_MCEERR_AR on vaddr: 0x7fa0f9b25000 > Exit from the signal handler on BUS_MCEERR_AR > > 9. The VM console shows: > [ 2520.895263] MCE: Killing mce_process_rea:3548 due to hardware memory corruption fault at 7f45e5265000 > > 10. The VM continues to run... > With a poisoned page in its address space > > HTH, > William.
On 15/09/2023 19:31, William Roche wrote: > On 9/15/23 05:13, Zhijian Li (Fujitsu) wrote: >> >> >> I'm okay with "RDMA isn't touched". >> BTW, could you share your reproducing program/hacking to poison the page, so that >> i am able to take a look the RDMA part later when i'm free. >> >> Not sure it's suitable to acknowledge a not touched part. Anyway >> Acked-by: Li Zhijian <lizhijian@fujitsu.com> # RDMA >> > > Thanks. > As you asked for a procedure to inject memory errors into a running VM, > I've attached to this email the source code (mce_process_react.c) of a > program that will help to target the error injection in the VM. I just tried you hwpoison program and do RDMA migration. Migration failed, but fortunately the source side is still alive :). (qemu) Failed to register chunk!: Bad address Chunk details: block: 0 chunk index 671 start 139955096518656 end 139955097567232 host 139955096518656 local 139954392924160 registrations: 636 qemu-system-x86_64: cannot get lkey qemu-system-x86_64: rdma migration: write error! -22 qemu-system-x86_64: RDMA is in an error state waiting migration to abort! qemu-system-x86_64: failed to save SaveStateEntry with id(name): 2(ram): -22 qemu-system-x86_64: Early error. Sending error. Since current RDMA migration transfers guest memory in a chunk size(1M) by default, we may need to option 1: reduce all chunk size to 1 page option 2: handle the hwpoison chunk specially However, because there may be a chance to use another protocol, it's also possible to temporarily not fix the issue. Tested-by: Li Zhijian <lizhijian@fujitsu.com> Thanks Zhijian > > (Be careful that error injection is currently nor working on AMD > platforms -- this is a work in progress is a separate qemu thread) > > > The general idea: > We are going to target a process memory page running inside a VM to see > what happens when we inject an error on the underlying physical page at > the platform (hypervisor) level. > To have a better view of what's going on, we'll use a process made for > this: It's goal is to allocate a memory page, and create a SIGBUS > handler to inform when it receives this signal. It will also wait before > touching this page to see what happens next. > > Compiling this tool: > $ gcc -o mce_process_react_x86 mce_process_react.c > > > Let's try that: > This procedure shows the best case scenario, where an error injected at > the platform level is reported up to the guest process using it. > Note that qemu should be started with root privilege. > > 1. Choose a process running in the VM (and identify a memory page > you want to target, and get its physical address – crash(8) vtop can > help with that) or run the attached mce_process_react example (compiled > for your platform mce_process_react_[x86|arm]) with an option to be > early informed of _AO error (-e) and wait ENTER to continue with reading > the allocated page (-w 0): > > [root@VM ]# ./mce_process_react_x86 -e -w 0 > Setting Early kill... Ok > > Data pages at 0x7fa0f9b25000 physically 0x200f2fa000 > > Press ENTER to continue with page reading > > > 2. Go into the VM monitor to get the translation from "Guest > Physical Address to Host Physical Address" or "Host Virtual Address": > > (qemu) gpa2hpa 0x200f2fa000' > Host physical address for 0x200f2fa000 (ram-node1) is 0x46f12fa000 > > > 3. Before we inject the error, we want to keep track of the VM > console output (in a separate window). > If you are using libvirt: # virsh console myvm > > > 4. We now prepare for the error injection at the platform level to > the address we found. To do so, we'll need to use the hwpoison-inject > module (x86) > Be careful, as hwpoison takes Page Frame Numbers and this PFN is not the > physical address – you need to remove the last 12 bits (the last 3 zeros > of the above address) ! > > [root@hv ]# modprobe hwpoison-inject > [root@hv ]# echo 0x46f12fa > /sys/kernel/debug/hwpoison/corrupt-pfn > > If you see "Operation not permitted" error when writing as root > on corrupt-pfn, you may be facing a "kernel_lockdown(7)" which is > enabled on SecureBoot systems (can be verified with > "mokutil --sb-state"). In this case, turn SecureBoot off (at the UEFI > level for example) > > 5. Look at the qemu output (either on the terminal where qemu was > started or if you are using libvirt: tail /var/log/libvirt/qemu/myvm > > 2022-08-31T13:52:25.645398Z qemu-system-x86_64: warning: Guest MCE Memory Error at QEMU addr 0x7eeeace00000 and GUEST addr 0x200f200 of type BUS_MCEERR_AO injected > > 6. On the guest console: > We'll see the VM reaction to the injected error: > > [ 155.805149] Disabling lock debugging due to kernel taint > [ 155.806174] mce: [Hardware Error]: Machine check events logged > [ 155.807120] Memory failure: 0x200f200: Killing mce_process_rea:3548 due to hardware memory corruption > [ 155.808877] Memory failure: 0x200f200: recovery action for dirty LRU page: Recovered > > 7. The Guest process that we started at the first step gives: > > Signal 7 received > BUS_MCEERR_AO on vaddr: 0x7fa0f9b25000 > > At this stage, the VM has a poisoned page, and a migration of this VM > needs to be fixed in order to avoid accessing the poisoned page. > > 8. The process continues to run (as it handled the SIGBUS). > Now if you press ENTER on this process terminal, it will try to read the > page which will generate a new MCE (a synchronous one) at VM level which > will be sent to this process: > > Signal 7 received > BUS_MCEERR_AR on vaddr: 0x7fa0f9b25000 > Exit from the signal handler on BUS_MCEERR_AR > > 9. The VM console shows: > [ 2520.895263] MCE: Killing mce_process_rea:3548 due to hardware memory corruption fault at 7f45e5265000 > > 10. The VM continues to run... > With a poisoned page in its address space > > HTH, > William.
Thank you Zhijian for your feedback. So I'll try to push this change today. Cheers, William. On 9/20/23 12:04, Zhijian Li (Fujitsu) wrote: > > > On 15/09/2023 19:31, William Roche wrote: >> On 9/15/23 05:13, Zhijian Li (Fujitsu) wrote: >>> >>> >>> I'm okay with "RDMA isn't touched". >>> BTW, could you share your reproducing program/hacking to poison the page, so that >>> i am able to take a look the RDMA part later when i'm free. >>> >>> Not sure it's suitable to acknowledge a not touched part. Anyway >>> Acked-by: Li Zhijian <lizhijian@fujitsu.com> # RDMA >>> >> >> Thanks. >> As you asked for a procedure to inject memory errors into a running VM, >> I've attached to this email the source code (mce_process_react.c) of a >> program that will help to target the error injection in the VM. > > > I just tried you hwpoison program and do RDMA migration. Migration failed, but fortunately > the source side is still alive :). > > (qemu) Failed to register chunk!: Bad address > Chunk details: block: 0 chunk index 671 start 139955096518656 end 139955097567232 host 139955096518656 local 139954392924160 registrations: 636 > qemu-system-x86_64: cannot get lkey > qemu-system-x86_64: rdma migration: write error! -22 > qemu-system-x86_64: RDMA is in an error state waiting migration to abort! > qemu-system-x86_64: failed to save SaveStateEntry with id(name): 2(ram): -22 > qemu-system-x86_64: Early error. Sending error. > > > Since current RDMA migration transfers guest memory in a chunk size(1M) by default, we may need to > > option 1: reduce all chunk size to 1 page > option 2: handle the hwpoison chunk specially > > However, because there may be a chance to use another protocol, it's also possible to temporarily not fix the issue. > > Tested-by: Li Zhijian <lizhijian@fujitsu.com> > > Thanks > Zhijian
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index ff1578bb32..7fb13c8a56 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1152,6 +1152,20 @@ static void kvm_unpoison_all(void *param) } } +bool kvm_hwpoisoned_page(RAMBlock *block, void *offset) +{ + HWPoisonPage *pg; + ram_addr_t ram_addr = (ram_addr_t) offset; + + QLIST_FOREACH(pg, &hwpoison_page_list, list) { + if ((ram_addr >= pg->ram_addr) && + (ram_addr - pg->ram_addr < block->page_size)) { + return true; + } + } + return false; +} + void kvm_hwpoison_page_add(ram_addr_t ram_addr) { HWPoisonPage *page; diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c index 235dc661bc..c0a31611df 100644 --- a/accel/stubs/kvm-stub.c +++ b/accel/stubs/kvm-stub.c @@ -133,3 +133,8 @@ uint32_t kvm_dirty_ring_size(void) { return 0; } + +bool kvm_hwpoisoned_page(RAMBlock *block, void *ram_addr) +{ + return false; +} diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index ee9025f8e9..858688227a 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -570,4 +570,14 @@ bool kvm_arch_cpu_check_are_resettable(void); bool kvm_dirty_ring_enabled(void); uint32_t kvm_dirty_ring_size(void); + +/** + * kvm_hwpoisoned_page - indicate if the given page is poisoned + * @block: memory block of the given page + * @ram_addr: offset of the page + * + * Returns: true: page is poisoned + * false: page not yet poisoned + */ +bool kvm_hwpoisoned_page(RAMBlock *block, void *ram_addr); #endif diff --git a/migration/ram-compress.c b/migration/ram-compress.c index 06254d8c69..1916ce709d 100644 --- a/migration/ram-compress.c +++ b/migration/ram-compress.c @@ -34,6 +34,7 @@ #include "qemu/error-report.h" #include "migration.h" #include "options.h" +#include "ram.h" #include "io/channel-null.h" #include "exec/target_page.h" #include "exec/ramblock.h" @@ -198,7 +199,7 @@ static CompressResult do_compress_ram_page(QEMUFile *f, z_stream *stream, assert(qemu_file_buffer_empty(f)); - if (buffer_is_zero(p, page_size)) { + if (migration_buffer_is_zero(block, offset, page_size)) { return RES_ZEROPAGE; } diff --git a/migration/ram.c b/migration/ram.c index 9040d66e61..fd337f7e65 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1129,6 +1129,26 @@ void ram_release_page(const char *rbname, uint64_t offset) ram_discard_range(rbname, offset, TARGET_PAGE_SIZE); } +/** + * migration_buffer_is_zero: indicate if the page at the given + * location is entirely filled with zero, or is a poisoned page. + * + * @block: block that contains the page + * @offset: offset inside the block for the page + * @len: size to consider + */ +bool migration_buffer_is_zero(RAMBlock *block, ram_addr_t offset, + size_t len) +{ + uint8_t *p = block->host + offset; + + if (kvm_enabled() && kvm_hwpoisoned_page(block, (void *)offset)) { + return true; + } + + return buffer_is_zero(p, len); +} + /** * save_zero_page_to_file: send the zero page to the file * @@ -1142,10 +1162,9 @@ void ram_release_page(const char *rbname, uint64_t offset) static int save_zero_page_to_file(PageSearchStatus *pss, QEMUFile *file, RAMBlock *block, ram_addr_t offset) { - uint8_t *p = block->host + offset; int len = 0; - if (buffer_is_zero(p, TARGET_PAGE_SIZE)) { + if (migration_buffer_is_zero(block, offset, TARGET_PAGE_SIZE)) { len += save_page_header(pss, file, block, offset | RAM_SAVE_FLAG_ZERO); qemu_put_byte(file, 0); len += 1; diff --git a/migration/ram.h b/migration/ram.h index 145c915ca7..805ea2a211 100644 --- a/migration/ram.h +++ b/migration/ram.h @@ -65,6 +65,8 @@ void ram_handle_compressed(void *host, uint8_t ch, uint64_t size); void ram_transferred_add(uint64_t bytes); void ram_release_page(const char *rbname, uint64_t offset); +bool migration_buffer_is_zero(RAMBlock *block, ram_addr_t offset, size_t len); + int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr); bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset); void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr);