Message ID | 20250201095726.3768796-4-william.roche@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Poisoned memory recovery on reboot | expand |
On Sat, Feb 01, 2025 at 09:57:23AM +0000, “William Roche wrote: > From: William Roche <william.roche@oracle.com> > > In case of a large page impacted by a memory error, provide an > information about the impacted large page before the memory > error injection message. > > This message would also appear on ras enabled ARM platforms, with > the introduction of an x86 similar error injection message. > > In the case of a large page impacted, we now report: > Memory Error on large page from <backend>:<address>+<fd_offset> +<page_size> > > The +<fd_offset> information is only provided with a file backend. > > Signed-off-by: William Roche <william.roche@oracle.com> This is still pretty kvm / arch relevant patch that needs some reviews. I wonder do we really need this - we could fetch ramblock mapping (e.g. hwaddr -> HVA) via HMP "info ramblock", and also dmesg shows process ID + VA. IIUC we have all below info already as long as we do some math based on above. Would that work too? > --- > accel/kvm/kvm-all.c | 18 ++++++++++++++++++ > include/exec/cpu-common.h | 10 ++++++++++ > system/physmem.c | 22 ++++++++++++++++++++++ > target/arm/kvm.c | 3 +++ > 4 files changed, 53 insertions(+) > > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c > index f89568bfa3..9a0d970ce1 100644 > --- a/accel/kvm/kvm-all.c > +++ b/accel/kvm/kvm-all.c > @@ -1296,6 +1296,24 @@ static void kvm_unpoison_all(void *param) > void kvm_hwpoison_page_add(ram_addr_t ram_addr) > { > HWPoisonPage *page; > + struct RAMBlockInfo rb_info; > + > + if (qemu_ram_block_info_from_addr(ram_addr, &rb_info)) { > + size_t ps = rb_info.page_size; > + > + if (ps > TARGET_PAGE_SIZE) { > + uint64_t offset = QEMU_ALIGN_DOWN(ram_addr - rb_info.offset, ps); > + > + if (rb_info.fd >= 0) { > + error_report("Memory Error on large page from %s:%" PRIx64 > + "+%" PRIx64 " +%zx", rb_info.idstr, offset, > + rb_info.fd_offset, ps); > + } else { > + error_report("Memory Error on large page from %s:%" PRIx64 > + " +%zx", rb_info.idstr, offset, ps); > + } > + } > + } > > QLIST_FOREACH(page, &hwpoison_page_list, list) { > if (page->ram_addr == ram_addr) { > diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h > index 3771b2130c..190bd4f34a 100644 > --- a/include/exec/cpu-common.h > +++ b/include/exec/cpu-common.h > @@ -110,6 +110,16 @@ int qemu_ram_get_fd(RAMBlock *rb); > size_t qemu_ram_pagesize(RAMBlock *block); > size_t qemu_ram_pagesize_largest(void); > > +struct RAMBlockInfo { > + char idstr[256]; > + ram_addr_t offset; > + int fd; > + uint64_t fd_offset; > + size_t page_size; > +}; > +bool qemu_ram_block_info_from_addr(ram_addr_t ram_addr, > + struct RAMBlockInfo *block); > + > /** > * cpu_address_space_init: > * @cpu: CPU to add this address space to > diff --git a/system/physmem.c b/system/physmem.c > index e8ff930bc9..686f569270 100644 > --- a/system/physmem.c > +++ b/system/physmem.c > @@ -1678,6 +1678,28 @@ size_t qemu_ram_pagesize_largest(void) > return largest; > } > > +/* Copy RAMBlock information associated to the given ram_addr location */ > +bool qemu_ram_block_info_from_addr(ram_addr_t ram_addr, > + struct RAMBlockInfo *b_info) > +{ > + RAMBlock *rb; > + > + assert(b_info); > + > + RCU_READ_LOCK_GUARD(); > + rb = qemu_get_ram_block(ram_addr); > + if (!rb) { > + return false; > + } > + > + pstrcat(b_info->idstr, sizeof(b_info->idstr), rb->idstr); > + b_info->offset = rb->offset; > + b_info->fd = rb->fd; > + b_info->fd_offset = rb->fd_offset; > + b_info->page_size = rb->page_size; > + return true; > +} > + > static int memory_try_enable_merging(void *addr, size_t len) > { > if (!machine_mem_merge(current_machine)) { > diff --git a/target/arm/kvm.c b/target/arm/kvm.c > index da30bdbb23..d9dedc6d74 100644 > --- a/target/arm/kvm.c > +++ b/target/arm/kvm.c > @@ -2389,6 +2389,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) > kvm_cpu_synchronize_state(c); > if (!acpi_ghes_memory_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { > kvm_inject_arm_sea(c); > + error_report("Guest Memory Error at QEMU addr %p and " > + "GUEST addr 0x%" HWADDR_PRIx " of type %s injected", > + addr, paddr, "BUS_MCEERR_AR"); > } else { > error_report("failed to record the error"); > abort(); > -- > 2.43.5 >
On 2/4/25 18:01, Peter Xu wrote: > On Sat, Feb 01, 2025 at 09:57:23AM +0000, “William Roche wrote: >> From: William Roche <william.roche@oracle.com> >> >> In case of a large page impacted by a memory error, provide an >> information about the impacted large page before the memory >> error injection message. >> >> This message would also appear on ras enabled ARM platforms, with >> the introduction of an x86 similar error injection message. >> >> In the case of a large page impacted, we now report: >> Memory Error on large page from <backend>:<address>+<fd_offset> +<page_size> >> >> The +<fd_offset> information is only provided with a file backend. >> >> Signed-off-by: William Roche <william.roche@oracle.com> > > This is still pretty kvm / arch relevant patch that needs some reviews. > > I wonder do we really need this - we could fetch ramblock mapping > (e.g. hwaddr -> HVA) via HMP "info ramblock", and also dmesg shows process > ID + VA. IIUC we have all below info already as long as we do some math > based on above. Would that work too? The HMP command "info ramblock" is implemented with the ram_block_format() function which returns a message buffer built with a string for each ramblock (protected by the RCU_READ_LOCK_GUARD). Our new function copies a struct with the necessary information. Relaying on the buffer format to retrieve the information doesn't seem reasonable, and more importantly, this buffer doesn't provide all the needed data, like fd and fd_offset. I would say that ram_block_format() and qemu_ram_block_info_from_addr() serve 2 different goals. (a reimplementation of ram_block_format() with an adapted version of qemu_ram_block_info_from_addr() taking the extra information needed could be doable for example, but may not be worth doing for now)
On Wed, Feb 05, 2025 at 05:27:13PM +0100, William Roche wrote: > On 2/4/25 18:01, Peter Xu wrote: > > On Sat, Feb 01, 2025 at 09:57:23AM +0000, “William Roche wrote: > > > From: William Roche <william.roche@oracle.com> > > > > > > In case of a large page impacted by a memory error, provide an > > > information about the impacted large page before the memory > > > error injection message. > > > > > > This message would also appear on ras enabled ARM platforms, with > > > the introduction of an x86 similar error injection message. > > > > > > In the case of a large page impacted, we now report: > > > Memory Error on large page from <backend>:<address>+<fd_offset> +<page_size> > > > > > > The +<fd_offset> information is only provided with a file backend. > > > > > > Signed-off-by: William Roche <william.roche@oracle.com> > > > > This is still pretty kvm / arch relevant patch that needs some reviews. > > > > I wonder do we really need this - we could fetch ramblock mapping > > (e.g. hwaddr -> HVA) via HMP "info ramblock", and also dmesg shows process > > ID + VA. IIUC we have all below info already as long as we do some math > > based on above. Would that work too? > > The HMP command "info ramblock" is implemented with the ram_block_format() > function which returns a message buffer built with a string for each > ramblock (protected by the RCU_READ_LOCK_GUARD). Our new function copies a > struct with the necessary information. > > Relaying on the buffer format to retrieve the information doesn't seem > reasonable, and more importantly, this buffer doesn't provide all the needed > data, like fd and fd_offset. > > I would say that ram_block_format() and qemu_ram_block_info_from_addr() > serve 2 different goals. > > (a reimplementation of ram_block_format() with an adapted version of > qemu_ram_block_info_from_addr() taking the extra information needed could be > doable for example, but may not be worth doing for now) IIUC admin should be aware of fd_offset because the admin should be fully aware of the start offset of FDs to specify in qemu cmdlines, or in Libvirt. But yes, we can always add fd_offset into ram_block_format() if it's helpful. Besides, the existing issues on this patch: - From outcome of this patch, it introduces one ramblock API (which is ok to me, so far), to do some error_report()s. It looks pretty much for debugging rather than something serious (e.g. report via QMP queries, QMP events etc.). From debug POV, I still don't see why this is needed.. per discussed above. - From merge POV, this patch isn't a pure memory change, so I'll need to get ack from other maintainers, at least that should be how it works.. I feel like when hwpoison becomes a serious topic, we need some more serious reporting facility than error reports. So that we could have this as separate topic to be revisited. It might speed up your prior patches from not being blocked on this. Thanks,
On 2/5/25 18:07, Peter Xu wrote: > On Wed, Feb 05, 2025 at 05:27:13PM +0100, William Roche wrote: >> [...] >> The HMP command "info ramblock" is implemented with the ram_block_format() >> function which returns a message buffer built with a string for each >> ramblock (protected by the RCU_READ_LOCK_GUARD). Our new function copies a >> struct with the necessary information. >> >> Relaying on the buffer format to retrieve the information doesn't seem >> reasonable, and more importantly, this buffer doesn't provide all the needed >> data, like fd and fd_offset. >> >> I would say that ram_block_format() and qemu_ram_block_info_from_addr() >> serve 2 different goals. >> >> (a reimplementation of ram_block_format() with an adapted version of >> qemu_ram_block_info_from_addr() taking the extra information needed could be >> doable for example, but may not be worth doing for now) > > IIUC admin should be aware of fd_offset because the admin should be fully > aware of the start offset of FDs to specify in qemu cmdlines, or in > Libvirt. But yes, we can always add fd_offset into ram_block_format() if > it's helpful. > > Besides, the existing issues on this patch: > > - From outcome of this patch, it introduces one ramblock API (which is ok > to me, so far), to do some error_report()s. It looks pretty much for > debugging rather than something serious (e.g. report via QMP queries, > QMP events etc.). From debug POV, I still don't see why this is > needed.. per discussed above. The reason why I want to inform the user of a large memory failure more specifically than a standard sized page loss is because of the significant behavior difference: Our current implementation can transparently handle many situations without necessarily leading the VM to a crash. But when it comes to large pages, there is no mechanism to inform the VM of a large memory loss, and usually this situation leads the VM to crash, and can also generate some weird situations like qemu itself crashing or a loop of errors, for example. So having a message informing of such a memory loss can help to understand a more radical VM or qemu behavior -- it increases the diagnosability of our code. To verify that a SIGBUS appeared because of a large page loss, we currently need to verify the targeted memory block backend page_size. We should usually get this information from the SIGBUS siginfo data (with a si_addr_lsb field giving an indication of the page size) but a KVM weakness with a hardcoded si_addr_lsb=PAGE_SHIFT value in the SIGBUS siginfo returned from the kernel prevents that: See kvm_send_hwpoison_signal() function. So I first wrote a small API addition called qemu_ram_pagesize_from_addr() to retrieve only this page_size value from the impacted address; and later on, this function turned into the richer qemu_ram_block_info_from_addr() function to have the generated messages match the existing memory messages as rightly requested by David. So the main reason is a KVM "weakness" with kvm_send_hwpoison_signal(), and the second reason is to have richer error messages. > - From merge POV, this patch isn't a pure memory change, so I'll need to > get ack from other maintainers, at least that should be how it works.. I agree :) > > I feel like when hwpoison becomes a serious topic, we need some more > serious reporting facility than error reports. So that we could have this > as separate topic to be revisited. It might speed up your prior patches > from not being blocked on this. I explained why I think that error messages are important, but I don't want to get blocked on fixing the hugepage memory recovery because of that. If you think that not displaying a specific message for large page loss can help to get the recovery fixed, than I can change my proposal to do so. Early next week, I'll send a simplified version of my first 3 patches without this specific messages and without the preallocation handling in all remap cases, so you can evaluate this possibility. Thank again for your feedback William.
On Fri, Feb 07, 2025 at 07:02:22PM +0100, William Roche wrote: > On 2/5/25 18:07, Peter Xu wrote: > > On Wed, Feb 05, 2025 at 05:27:13PM +0100, William Roche wrote: > > > [...] > > > The HMP command "info ramblock" is implemented with the ram_block_format() > > > function which returns a message buffer built with a string for each > > > ramblock (protected by the RCU_READ_LOCK_GUARD). Our new function copies a > > > struct with the necessary information. > > > > > > Relaying on the buffer format to retrieve the information doesn't seem > > > reasonable, and more importantly, this buffer doesn't provide all the needed > > > data, like fd and fd_offset. > > > > > > I would say that ram_block_format() and qemu_ram_block_info_from_addr() > > > serve 2 different goals. > > > > > > (a reimplementation of ram_block_format() with an adapted version of > > > qemu_ram_block_info_from_addr() taking the extra information needed could be > > > doable for example, but may not be worth doing for now) > > > > IIUC admin should be aware of fd_offset because the admin should be fully > > aware of the start offset of FDs to specify in qemu cmdlines, or in > > Libvirt. But yes, we can always add fd_offset into ram_block_format() if > > it's helpful. > > > > Besides, the existing issues on this patch: > > > > - From outcome of this patch, it introduces one ramblock API (which is ok > > to me, so far), to do some error_report()s. It looks pretty much for > > debugging rather than something serious (e.g. report via QMP queries, > > QMP events etc.). From debug POV, I still don't see why this is > > needed.. per discussed above. > > The reason why I want to inform the user of a large memory failure more > specifically than a standard sized page loss is because of the significant > behavior difference: Our current implementation can transparently handle > many situations without necessarily leading the VM to a crash. But when it > comes to large pages, there is no mechanism to inform the VM of a large > memory loss, and usually this situation leads the VM to crash, and can also > generate some weird situations like qemu itself crashing or a loop of > errors, for example. > > So having a message informing of such a memory loss can help to understand a > more radical VM or qemu behavior -- it increases the diagnosability of our > code. > > To verify that a SIGBUS appeared because of a large page loss, we currently > need to verify the targeted memory block backend page_size. > We should usually get this information from the SIGBUS siginfo data (with a > si_addr_lsb field giving an indication of the page size) but a KVM weakness > with a hardcoded si_addr_lsb=PAGE_SHIFT value in the SIGBUS siginfo returned > from the kernel prevents that: See kvm_send_hwpoison_signal() function. > > So I first wrote a small API addition called qemu_ram_pagesize_from_addr() > to retrieve only this page_size value from the impacted address; and later > on, this function turned into the richer qemu_ram_block_info_from_addr() > function to have the generated messages match the existing memory messages > as rightly requested by David. > > So the main reason is a KVM "weakness" with kvm_send_hwpoison_signal(), and > the second reason is to have richer error messages. This seems true, and I also remember something when I looked at this previously but maybe nobody tried to fix it. ARM seems to be correct on that field, otoh. Is it possible we fix KVM on x86? kvm_handle_error_pfn() has the fault context, so IIUC it should be able to figure that out too like what ARM does (with get_vma_page_shift()). > > > > > - From merge POV, this patch isn't a pure memory change, so I'll need to > > get ack from other maintainers, at least that should be how it works.. > > I agree :) > > > > > I feel like when hwpoison becomes a serious topic, we need some more > > serious reporting facility than error reports. So that we could have this > > as separate topic to be revisited. It might speed up your prior patches > > from not being blocked on this. > > I explained why I think that error messages are important, but I don't want > to get blocked on fixing the hugepage memory recovery because of that. What is the major benefit of reporting in QEMU's stderr in this case? For example, how should we consume the error reports that this patch introduces? Is it still for debugging purpose? I agree it's always better to dump something in QEMU when such happened, but IIUC what I mentioned above (by monitoring QEMU ramblock setups, and monitor host dmesg on any vaddr reported hwpoison) should also allow anyone to deduce the page size of affected vaddr, especially if it's for debugging purpose. However I could possibly have missed the goal here.. > > If you think that not displaying a specific message for large page loss can > help to get the recovery fixed, than I can change my proposal to do so. > > Early next week, I'll send a simplified version of my first 3 patches > without this specific messages and without the preallocation handling in all > remap cases, so you can evaluate this possibility. Yes IMHO it'll always be helpful to separate it if possible. Thanks,
On 2/10/25 17:48, Peter Xu wrote: > On Fri, Feb 07, 2025 at 07:02:22PM +0100, William Roche wrote: >> [...] >> So the main reason is a KVM "weakness" with kvm_send_hwpoison_signal(), and >> the second reason is to have richer error messages. > > This seems true, and I also remember something when I looked at this > previously but maybe nobody tried to fix it. ARM seems to be correct on > that field, otoh. > > Is it possible we fix KVM on x86? Yes, very probably, and it would be a kernel fix. This kernel modification would be needed to run on the hypervisor first to influence a new code in qemu able to use the SIGBUS siginfo information and identify the size of the page impacted (instead of using an internal addition to kvm API). But this mechanism could help to generate a large page memory error specific message on SIGBUS receiving. >>> >>> I feel like when hwpoison becomes a serious topic, we need some more >>> serious reporting facility than error reports. So that we could have this >>> as separate topic to be revisited. It might speed up your prior patches >>> from not being blocked on this. >> >> I explained why I think that error messages are important, but I don't want >> to get blocked on fixing the hugepage memory recovery because of that. > > What is the major benefit of reporting in QEMU's stderr in this case? Such messages can be collected into VM specific log file, as any other error_report() message, like the existing x86 error injection messages reported by Qemu. This messages should help the administrator to better understand the behavior of the VM. > For example, how should we consume the error reports that this patch > introduces? Is it still for debugging purpose? Its not only debugging, but it's a trace of a significant event that can have major consequences on the VM. > > I agree it's always better to dump something in QEMU when such happened, > but IIUC what I mentioned above (by monitoring QEMU ramblock setups, and > monitor host dmesg on any vaddr reported hwpoison) should also allow anyone > to deduce the page size of affected vaddr, especially if it's for debugging > purpose. However I could possibly have missed the goal here.. You're right that knowing the address, the administrator can deduce what memory area was impacted and the associated page size. But the goal of these large page specific messages was to give details on the event type and immediately qualify the consequences. Using large pages can also have drawbacks, and a large page specific message on memory error makes that more obvious ! Not only a debug msg, but an indication that the VM lost an unusually large amount of its memory. >> >> If you think that not displaying a specific message for large page loss can >> help to get the recovery fixed, than I can change my proposal to do so. >> >> Early next week, I'll send a simplified version of my first 3 patches >> without this specific messages and without the preallocation handling in all >> remap cases, so you can evaluate this possibility. > > Yes IMHO it'll always be helpful to separate it if possible. I'm sending now a v8 version, without the specific messages and the remap notification. It should fix the main recovery bug we currently have. More messages and a notification dealing with pre-allocation can be added in a second step. Please let me know if this v8 version can be integrated without the prealloc and specific messages ? Thanks, William.
On Tue, Feb 11, 2025 at 10:22:38PM +0100, William Roche wrote: > On 2/10/25 17:48, Peter Xu wrote: > > On Fri, Feb 07, 2025 at 07:02:22PM +0100, William Roche wrote: > > > [...] > > > So the main reason is a KVM "weakness" with kvm_send_hwpoison_signal(), and > > > the second reason is to have richer error messages. > > > > This seems true, and I also remember something when I looked at this > > previously but maybe nobody tried to fix it. ARM seems to be correct on > > that field, otoh. > > > > Is it possible we fix KVM on x86? > > Yes, very probably, and it would be a kernel fix. > This kernel modification would be needed to run on the hypervisor first to > influence a new code in qemu able to use the SIGBUS siginfo information and > identify the size of the page impacted (instead of using an internal > addition to kvm API). > But this mechanism could help to generate a large page memory error specific > message on SIGBUS receiving. Yes, QEMU should probably better be able to work on both old/new kernels, even if this will be fixed. > > > > > > > > > > I feel like when hwpoison becomes a serious topic, we need some more > > > > serious reporting facility than error reports. So that we could have this > > > > as separate topic to be revisited. It might speed up your prior patches > > > > from not being blocked on this. > > > > > > I explained why I think that error messages are important, but I don't want > > > to get blocked on fixing the hugepage memory recovery because of that. > > > > What is the major benefit of reporting in QEMU's stderr in this case? > > Such messages can be collected into VM specific log file, as any other > error_report() message, like the existing x86 error injection messages > reported by Qemu. > This messages should help the administrator to better understand the > behavior of the VM. I'll still put "better understand the behavior of VM" into debugging category. :) But I agree such can be important information. That's also why I was curious whether it should be something like a QMP event instead. That's a much formal way of sending important messages. > > > > For example, how should we consume the error reports that this patch > > introduces? Is it still for debugging purpose? > > Its not only debugging, but it's a trace of a significant event that can > have major consequences on the VM. > > > > > I agree it's always better to dump something in QEMU when such happened, > > but IIUC what I mentioned above (by monitoring QEMU ramblock setups, and > > monitor host dmesg on any vaddr reported hwpoison) should also allow anyone > > to deduce the page size of affected vaddr, especially if it's for debugging > > purpose. However I could possibly have missed the goal here.. > > You're right that knowing the address, the administrator can deduce what > memory area was impacted and the associated page size. But the goal of these > large page specific messages was to give details on the event type and > immediately qualify the consequences. > Using large pages can also have drawbacks, and a large page specific message > on memory error makes that more obvious ! Not only a debug msg, but an > indication that the VM lost an unusually large amount of its memory. > > > > > > > If you think that not displaying a specific message for large page loss can > > > help to get the recovery fixed, than I can change my proposal to do so. > > > > > > Early next week, I'll send a simplified version of my first 3 patches > > > without this specific messages and without the preallocation handling in all > > > remap cases, so you can evaluate this possibility. > > > > Yes IMHO it'll always be helpful to separate it if possible. > > I'm sending now a v8 version, without the specific messages and the remap > notification. It should fix the main recovery bug we currently have. More > messages and a notification dealing with pre-allocation can be added in a > second step. > > Please let me know if this v8 version can be integrated without the prealloc > and specific messages ? IMHO fixing hugetlb page is still a progress on its own, even without any added error message, or proactive allocation during reset. One issue is the v8 still contains patch 3 which is for ARM kvm.. You may need to post it separately for ARM maintainers to review & collect. I'll be able to queue patch 1-2. Thanks,
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index f89568bfa3..9a0d970ce1 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -1296,6 +1296,24 @@ static void kvm_unpoison_all(void *param) void kvm_hwpoison_page_add(ram_addr_t ram_addr) { HWPoisonPage *page; + struct RAMBlockInfo rb_info; + + if (qemu_ram_block_info_from_addr(ram_addr, &rb_info)) { + size_t ps = rb_info.page_size; + + if (ps > TARGET_PAGE_SIZE) { + uint64_t offset = QEMU_ALIGN_DOWN(ram_addr - rb_info.offset, ps); + + if (rb_info.fd >= 0) { + error_report("Memory Error on large page from %s:%" PRIx64 + "+%" PRIx64 " +%zx", rb_info.idstr, offset, + rb_info.fd_offset, ps); + } else { + error_report("Memory Error on large page from %s:%" PRIx64 + " +%zx", rb_info.idstr, offset, ps); + } + } + } QLIST_FOREACH(page, &hwpoison_page_list, list) { if (page->ram_addr == ram_addr) { diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 3771b2130c..190bd4f34a 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -110,6 +110,16 @@ int qemu_ram_get_fd(RAMBlock *rb); size_t qemu_ram_pagesize(RAMBlock *block); size_t qemu_ram_pagesize_largest(void); +struct RAMBlockInfo { + char idstr[256]; + ram_addr_t offset; + int fd; + uint64_t fd_offset; + size_t page_size; +}; +bool qemu_ram_block_info_from_addr(ram_addr_t ram_addr, + struct RAMBlockInfo *block); + /** * cpu_address_space_init: * @cpu: CPU to add this address space to diff --git a/system/physmem.c b/system/physmem.c index e8ff930bc9..686f569270 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -1678,6 +1678,28 @@ size_t qemu_ram_pagesize_largest(void) return largest; } +/* Copy RAMBlock information associated to the given ram_addr location */ +bool qemu_ram_block_info_from_addr(ram_addr_t ram_addr, + struct RAMBlockInfo *b_info) +{ + RAMBlock *rb; + + assert(b_info); + + RCU_READ_LOCK_GUARD(); + rb = qemu_get_ram_block(ram_addr); + if (!rb) { + return false; + } + + pstrcat(b_info->idstr, sizeof(b_info->idstr), rb->idstr); + b_info->offset = rb->offset; + b_info->fd = rb->fd; + b_info->fd_offset = rb->fd_offset; + b_info->page_size = rb->page_size; + return true; +} + static int memory_try_enable_merging(void *addr, size_t len) { if (!machine_mem_merge(current_machine)) { diff --git a/target/arm/kvm.c b/target/arm/kvm.c index da30bdbb23..d9dedc6d74 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -2389,6 +2389,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) kvm_cpu_synchronize_state(c); if (!acpi_ghes_memory_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { kvm_inject_arm_sea(c); + error_report("Guest Memory Error at QEMU addr %p and " + "GUEST addr 0x%" HWADDR_PRIx " of type %s injected", + addr, paddr, "BUS_MCEERR_AR"); } else { error_report("failed to record the error"); abort();