diff mbox series

[v2,2/7] plugins: save value during memory accesses

Message ID 20240626233757.375083-3-pierrick.bouvier@linaro.org (mailing list archive)
State New, archived
Headers show
Series plugins: access values during a memory read/write | expand

Commit Message

Pierrick Bouvier June 26, 2024, 11:37 p.m. UTC
Different code paths handle memory accesses:
- tcg generated code
- load/store helpers
- atomic helpers

This value is saved in cpu->plugin_state.

Atomic operations are doing read/write at the same time, so we generate
two memory callbacks instead of one, to allow plugins to access distinct
values.

Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
---
 accel/tcg/atomic_template.h   | 66 ++++++++++++++++++++++++++++----
 include/qemu/plugin.h         |  8 ++++
 plugins/core.c                |  7 ++++
 tcg/tcg-op-ldst.c             | 72 +++++++++++++++++++++++++++++++----
 accel/tcg/atomic_common.c.inc | 13 ++++++-
 accel/tcg/ldst_common.c.inc   | 38 +++++++++++-------
 6 files changed, 173 insertions(+), 31 deletions(-)

Comments

Richard Henderson June 27, 2024, 7:10 p.m. UTC | #1
On 6/26/24 16:37, Pierrick Bouvier wrote:
> Different code paths handle memory accesses:
> - tcg generated code
> - load/store helpers
> - atomic helpers
> 
> This value is saved in cpu->plugin_state.
> 
> Atomic operations are doing read/write at the same time, so we generate
> two memory callbacks instead of one, to allow plugins to access distinct
> values.
> 
> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> ---
>   accel/tcg/atomic_template.h   | 66 ++++++++++++++++++++++++++++----
>   include/qemu/plugin.h         |  8 ++++
>   plugins/core.c                |  7 ++++
>   tcg/tcg-op-ldst.c             | 72 +++++++++++++++++++++++++++++++----
>   accel/tcg/atomic_common.c.inc | 13 ++++++-
>   accel/tcg/ldst_common.c.inc   | 38 +++++++++++-------
>   6 files changed, 173 insertions(+), 31 deletions(-)
> 
> diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
> index 1dc2151dafd..830e4f16069 100644
> --- a/accel/tcg/atomic_template.h
> +++ b/accel/tcg/atomic_template.h
> @@ -53,6 +53,14 @@
>   # error unsupported data size
>   #endif
>   
> +#if DATA_SIZE == 16
> +# define UPPER_MEMORY_VALUE(val) int128_gethi(val)
> +# define LOWER_MEMORY_VALUE(val) int128_getlo(val)
> +#else
> +# define UPPER_MEMORY_VALUE(val) 0
> +# define LOWER_MEMORY_VALUE(val) val
> +#endif
> +
>   #if DATA_SIZE >= 4
>   # define ABI_TYPE  DATA_TYPE
>   #else
> @@ -83,7 +91,12 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, abi_ptr addr,
>       ret = qatomic_cmpxchg__nocheck(haddr, cmpv, newv);
>   #endif
>       ATOMIC_MMU_CLEANUP;
> -    atomic_trace_rmw_post(env, addr, oi);
> +    atomic_trace_rmw_post(env, addr,
> +                          UPPER_MEMORY_VALUE(ret),
> +                          LOWER_MEMORY_VALUE(ret),
> +                          UPPER_MEMORY_VALUE(newv),
> +                          LOWER_MEMORY_VALUE(newv),
> +                          oi);

Just a nit, but tcg is consistent in using little-endian argument ordering for values 
passed by parts.  I would prefer we continue with that.


> @@ -142,9 +142,13 @@ struct qemu_plugin_tb {
>   /**
>    * struct CPUPluginState - per-CPU state for plugins
>    * @event_mask: plugin event bitmap. Modified only via async work.
> + * @mem_value_upper_bits: 64 upper bits of latest accessed mem value.
> + * @mem_value_lower_bits: 64 lower bits of latest accessed mem value.
>    */
>   struct CPUPluginState {
>       DECLARE_BITMAP(event_mask, QEMU_PLUGIN_EV_MAX);
> +    uint64_t mem_value_upper_bits;
> +    uint64_t mem_value_lower_bits;
>   };

At some point we may well support 32 byte acceses, for better guest vector support.  Do we 
have a plan for this beyond "add more fields here"?


r~
Pierrick Bouvier June 27, 2024, 7:25 p.m. UTC | #2
On 6/27/24 12:10, Richard Henderson wrote:
> On 6/26/24 16:37, Pierrick Bouvier wrote:
>> Different code paths handle memory accesses:
>> - tcg generated code
>> - load/store helpers
>> - atomic helpers
>>
>> This value is saved in cpu->plugin_state.
>>
>> Atomic operations are doing read/write at the same time, so we generate
>> two memory callbacks instead of one, to allow plugins to access distinct
>> values.
>>
>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>> ---
>>    accel/tcg/atomic_template.h   | 66 ++++++++++++++++++++++++++++----
>>    include/qemu/plugin.h         |  8 ++++
>>    plugins/core.c                |  7 ++++
>>    tcg/tcg-op-ldst.c             | 72 +++++++++++++++++++++++++++++++----
>>    accel/tcg/atomic_common.c.inc | 13 ++++++-
>>    accel/tcg/ldst_common.c.inc   | 38 +++++++++++-------
>>    6 files changed, 173 insertions(+), 31 deletions(-)
>>
>> diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
>> index 1dc2151dafd..830e4f16069 100644
>> --- a/accel/tcg/atomic_template.h
>> +++ b/accel/tcg/atomic_template.h
>> @@ -53,6 +53,14 @@
>>    # error unsupported data size
>>    #endif
>>    
>> +#if DATA_SIZE == 16
>> +# define UPPER_MEMORY_VALUE(val) int128_gethi(val)
>> +# define LOWER_MEMORY_VALUE(val) int128_getlo(val)
>> +#else
>> +# define UPPER_MEMORY_VALUE(val) 0
>> +# define LOWER_MEMORY_VALUE(val) val
>> +#endif
>> +
>>    #if DATA_SIZE >= 4
>>    # define ABI_TYPE  DATA_TYPE
>>    #else
>> @@ -83,7 +91,12 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, abi_ptr addr,
>>        ret = qatomic_cmpxchg__nocheck(haddr, cmpv, newv);
>>    #endif
>>        ATOMIC_MMU_CLEANUP;
>> -    atomic_trace_rmw_post(env, addr, oi);
>> +    atomic_trace_rmw_post(env, addr,
>> +                          UPPER_MEMORY_VALUE(ret),
>> +                          LOWER_MEMORY_VALUE(ret),
>> +                          UPPER_MEMORY_VALUE(newv),
>> +                          LOWER_MEMORY_VALUE(newv),
>> +                          oi);
> 
> Just a nit, but tcg is consistent in using little-endian argument ordering for values
> passed by parts.  I would prefer we continue with that.
> 

Didn't notice that, but I'll stick to this. Any preference on the naming 
as well while I'm at it? (low/hi vs upper/lower)?

> 
>> @@ -142,9 +142,13 @@ struct qemu_plugin_tb {
>>    /**
>>     * struct CPUPluginState - per-CPU state for plugins
>>     * @event_mask: plugin event bitmap. Modified only via async work.
>> + * @mem_value_upper_bits: 64 upper bits of latest accessed mem value.
>> + * @mem_value_lower_bits: 64 lower bits of latest accessed mem value.
>>     */
>>    struct CPUPluginState {
>>        DECLARE_BITMAP(event_mask, QEMU_PLUGIN_EV_MAX);
>> +    uint64_t mem_value_upper_bits;
>> +    uint64_t mem_value_lower_bits;
>>    };
> 
> At some point we may well support 32 byte acceses, for better guest vector support.  Do we
> have a plan for this beyond "add more fields here"?
>

For now, I sticked to native tcg ops (up to 128 bits), with this simple 
solution. Do you think tcg core will be extended to support more, or 
will helper simply load/store four 128bits words, emitting four 
callbacks as well?

If you have a better idea, I'm open to implement an alternative, but 
didn't want to think too much ahead.

> 
> r~
Richard Henderson June 27, 2024, 8:03 p.m. UTC | #3
On 6/27/24 12:25, Pierrick Bouvier wrote:
> On 6/27/24 12:10, Richard Henderson wrote:
>> On 6/26/24 16:37, Pierrick Bouvier wrote:
>>> Different code paths handle memory accesses:
>>> - tcg generated code
>>> - load/store helpers
>>> - atomic helpers
>>>
>>> This value is saved in cpu->plugin_state.
>>>
>>> Atomic operations are doing read/write at the same time, so we generate
>>> two memory callbacks instead of one, to allow plugins to access distinct
>>> values.
>>>
>>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>> ---
>>>    accel/tcg/atomic_template.h   | 66 ++++++++++++++++++++++++++++----
>>>    include/qemu/plugin.h         |  8 ++++
>>>    plugins/core.c                |  7 ++++
>>>    tcg/tcg-op-ldst.c             | 72 +++++++++++++++++++++++++++++++----
>>>    accel/tcg/atomic_common.c.inc | 13 ++++++-
>>>    accel/tcg/ldst_common.c.inc   | 38 +++++++++++-------
>>>    6 files changed, 173 insertions(+), 31 deletions(-)
>>>
>>> diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
>>> index 1dc2151dafd..830e4f16069 100644
>>> --- a/accel/tcg/atomic_template.h
>>> +++ b/accel/tcg/atomic_template.h
>>> @@ -53,6 +53,14 @@
>>>    # error unsupported data size
>>>    #endif
>>> +#if DATA_SIZE == 16
>>> +# define UPPER_MEMORY_VALUE(val) int128_gethi(val)
>>> +# define LOWER_MEMORY_VALUE(val) int128_getlo(val)
>>> +#else
>>> +# define UPPER_MEMORY_VALUE(val) 0
>>> +# define LOWER_MEMORY_VALUE(val) val
>>> +#endif
>>> +
>>>    #if DATA_SIZE >= 4
>>>    # define ABI_TYPE  DATA_TYPE
>>>    #else
>>> @@ -83,7 +91,12 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, abi_ptr addr,
>>>        ret = qatomic_cmpxchg__nocheck(haddr, cmpv, newv);
>>>    #endif
>>>        ATOMIC_MMU_CLEANUP;
>>> -    atomic_trace_rmw_post(env, addr, oi);
>>> +    atomic_trace_rmw_post(env, addr,
>>> +                          UPPER_MEMORY_VALUE(ret),
>>> +                          LOWER_MEMORY_VALUE(ret),
>>> +                          UPPER_MEMORY_VALUE(newv),
>>> +                          LOWER_MEMORY_VALUE(newv),
>>> +                          oi);
>>
>> Just a nit, but tcg is consistent in using little-endian argument ordering for values
>> passed by parts.  I would prefer we continue with that.
>>
> 
> Didn't notice that, but I'll stick to this. Any preference on the naming as well while I'm 
> at it? (low/hi vs upper/lower)?

I guess we mostly use high/low, hi/lo, and variations thereof elsewhere as well.  I don't 
see any uses of upper/lower.

>> At some point we may well support 32 byte acceses, for better guest vector support.  Do we
>> have a plan for this beyond "add more fields here"?
>>
> 
> For now, I sticked to native tcg ops (up to 128 bits), with this simple solution. Do you 
> think tcg core will be extended to support more, or will helper simply load/store four 
> 128bits words, emitting four callbacks as well?

I assume we'll support a 256-bit (non-atomic) memory operation.
That avoids some of the "probe for write, perform stores after we know it's safe" sort of 
affair.

I don't think I'll do it while i686 is still a supported host though.

> If you have a better idea, I'm open to implement an alternative, but didn't want to think 
> too much ahead.

Fair.


r~
diff mbox series

Patch

diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h
index 1dc2151dafd..830e4f16069 100644
--- a/accel/tcg/atomic_template.h
+++ b/accel/tcg/atomic_template.h
@@ -53,6 +53,14 @@ 
 # error unsupported data size
 #endif
 
+#if DATA_SIZE == 16
+# define UPPER_MEMORY_VALUE(val) int128_gethi(val)
+# define LOWER_MEMORY_VALUE(val) int128_getlo(val)
+#else
+# define UPPER_MEMORY_VALUE(val) 0
+# define LOWER_MEMORY_VALUE(val) val
+#endif
+
 #if DATA_SIZE >= 4
 # define ABI_TYPE  DATA_TYPE
 #else
@@ -83,7 +91,12 @@  ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, abi_ptr addr,
     ret = qatomic_cmpxchg__nocheck(haddr, cmpv, newv);
 #endif
     ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr, oi);
+    atomic_trace_rmw_post(env, addr,
+                          UPPER_MEMORY_VALUE(ret),
+                          LOWER_MEMORY_VALUE(ret),
+                          UPPER_MEMORY_VALUE(newv),
+                          LOWER_MEMORY_VALUE(newv),
+                          oi);
     return ret;
 }
 
@@ -97,7 +110,12 @@  ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, abi_ptr addr, ABI_TYPE val,
 
     ret = qatomic_xchg__nocheck(haddr, val);
     ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr, oi);
+    atomic_trace_rmw_post(env, addr,
+                          UPPER_MEMORY_VALUE(ret),
+                          LOWER_MEMORY_VALUE(ret),
+                          UPPER_MEMORY_VALUE(val),
+                          LOWER_MEMORY_VALUE(val),
+                          oi);
     return ret;
 }
 
@@ -109,7 +127,12 @@  ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr,            \
     haddr = atomic_mmu_lookup(env_cpu(env), addr, oi, DATA_SIZE, retaddr);   \
     ret = qatomic_##X(haddr, val);                                  \
     ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr, oi);                           \
+    atomic_trace_rmw_post(env, addr,                                \
+                          UPPER_MEMORY_VALUE(ret),                  \
+                          LOWER_MEMORY_VALUE(ret),                  \
+                          UPPER_MEMORY_VALUE(val),                  \
+                          LOWER_MEMORY_VALUE(val),                  \
+                          oi);                                      \
     return ret;                                                     \
 }
 
@@ -145,7 +168,12 @@  ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr,            \
         cmp = qatomic_cmpxchg__nocheck(haddr, old, new);            \
     } while (cmp != old);                                           \
     ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr, oi);                           \
+    atomic_trace_rmw_post(env, addr,                                \
+                          UPPER_MEMORY_VALUE(old),                  \
+                          LOWER_MEMORY_VALUE(old),                  \
+                          UPPER_MEMORY_VALUE(xval),                 \
+                          LOWER_MEMORY_VALUE(xval),                 \
+                          oi);                                      \
     return RET;                                                     \
 }
 
@@ -188,7 +216,12 @@  ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, abi_ptr addr,
     ret = qatomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv));
 #endif
     ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr, oi);
+    atomic_trace_rmw_post(env, addr,
+                          UPPER_MEMORY_VALUE(ret),
+                          LOWER_MEMORY_VALUE(ret),
+                          UPPER_MEMORY_VALUE(newv),
+                          LOWER_MEMORY_VALUE(newv),
+                          oi);
     return BSWAP(ret);
 }
 
@@ -202,7 +235,12 @@  ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, abi_ptr addr, ABI_TYPE val,
 
     ret = qatomic_xchg__nocheck(haddr, BSWAP(val));
     ATOMIC_MMU_CLEANUP;
-    atomic_trace_rmw_post(env, addr, oi);
+    atomic_trace_rmw_post(env, addr,
+                          UPPER_MEMORY_VALUE(ret),
+                          LOWER_MEMORY_VALUE(ret),
+                          UPPER_MEMORY_VALUE(val),
+                          LOWER_MEMORY_VALUE(val),
+                          oi);
     return BSWAP(ret);
 }
 
@@ -214,7 +252,12 @@  ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr,            \
     haddr = atomic_mmu_lookup(env_cpu(env), addr, oi, DATA_SIZE, retaddr);   \
     ret = qatomic_##X(haddr, BSWAP(val));                           \
     ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr, oi);                           \
+    atomic_trace_rmw_post(env, addr,                                \
+                          UPPER_MEMORY_VALUE(ret),                  \
+                          LOWER_MEMORY_VALUE(ret),                  \
+                          UPPER_MEMORY_VALUE(val),                  \
+                          LOWER_MEMORY_VALUE(val),                  \
+                          oi);                                      \
     return BSWAP(ret);                                              \
 }
 
@@ -247,7 +290,12 @@  ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, abi_ptr addr,            \
         ldn = qatomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new));     \
     } while (ldo != ldn);                                           \
     ATOMIC_MMU_CLEANUP;                                             \
-    atomic_trace_rmw_post(env, addr, oi);                           \
+    atomic_trace_rmw_post(env, addr,                                \
+                          UPPER_MEMORY_VALUE(old),                  \
+                          LOWER_MEMORY_VALUE(old),                  \
+                          UPPER_MEMORY_VALUE(xval),                 \
+                          LOWER_MEMORY_VALUE(xval),                 \
+                          oi);                                      \
     return RET;                                                     \
 }
 
@@ -281,3 +329,5 @@  GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new)
 #undef SUFFIX
 #undef DATA_SIZE
 #undef SHIFT
+#undef UPPER_MEMORY_VALUE
+#undef LOWER_MEMORY_VALUE
diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
index bc5aef979e7..112dcdad717 100644
--- a/include/qemu/plugin.h
+++ b/include/qemu/plugin.h
@@ -142,9 +142,13 @@  struct qemu_plugin_tb {
 /**
  * struct CPUPluginState - per-CPU state for plugins
  * @event_mask: plugin event bitmap. Modified only via async work.
+ * @mem_value_upper_bits: 64 upper bits of latest accessed mem value.
+ * @mem_value_lower_bits: 64 lower bits of latest accessed mem value.
  */
 struct CPUPluginState {
     DECLARE_BITMAP(event_mask, QEMU_PLUGIN_EV_MAX);
+    uint64_t mem_value_upper_bits;
+    uint64_t mem_value_lower_bits;
 };
 
 /**
@@ -164,6 +168,8 @@  qemu_plugin_vcpu_syscall(CPUState *cpu, int64_t num, uint64_t a1,
 void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret);
 
 void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
+                             uint64_t value_upper_bits,
+                             uint64_t value_lower_bits,
                              MemOpIdx oi, enum qemu_plugin_mem_rw rw);
 
 void qemu_plugin_flush_cb(void);
@@ -248,6 +254,8 @@  void qemu_plugin_vcpu_syscall_ret(CPUState *cpu, int64_t num, int64_t ret)
 { }
 
 static inline void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
+                                           uint64_t value_upper_bits,
+                                           uint64_t value_lower_bits,
                                            MemOpIdx oi,
                                            enum qemu_plugin_mem_rw rw)
 { }
diff --git a/plugins/core.c b/plugins/core.c
index 9d737d82787..957a3efb06d 100644
--- a/plugins/core.c
+++ b/plugins/core.c
@@ -575,14 +575,21 @@  void exec_inline_op(enum plugin_dyn_cb_type type,
 }
 
 void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
+                             uint64_t value_upper_bits,
+                             uint64_t value_lower_bits,
                              MemOpIdx oi, enum qemu_plugin_mem_rw rw)
 {
     GArray *arr = cpu->neg.plugin_mem_cbs;
+    CPUPluginState *plugin_state = cpu->plugin_state;
     size_t i;
 
     if (arr == NULL) {
         return;
     }
+
+    plugin_state->mem_value_upper_bits = value_upper_bits;
+    plugin_state->mem_value_lower_bits = value_lower_bits;
+
     for (i = 0; i < arr->len; i++) {
         struct qemu_plugin_dyn_cb *cb =
             &g_array_index(arr, struct qemu_plugin_dyn_cb, i);
diff --git a/tcg/tcg-op-ldst.c b/tcg/tcg-op-ldst.c
index 85101602581..93b338704dc 100644
--- a/tcg/tcg-op-ldst.c
+++ b/tcg/tcg-op-ldst.c
@@ -148,14 +148,24 @@  static TCGv_i64 plugin_maybe_preserve_addr(TCGTemp *addr)
     return NULL;
 }
 
+#ifdef CONFIG_PLUGIN
 static void
-plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
+plugin_gen_mem_callbacks(TCGv_i64 value_upper_bits, TCGv_i64 value_lower_bits,
+                         TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
                          enum qemu_plugin_mem_rw rw)
 {
-#ifdef CONFIG_PLUGIN
     if (tcg_ctx->plugin_insn != NULL) {
         qemu_plugin_meminfo_t info = make_plugin_meminfo(oi, rw);
 
+        TCGv_ptr plugin_state = tcg_temp_ebb_new_ptr();
+        tcg_gen_ld_ptr(plugin_state, tcg_env,
+                       offsetof(CPUState, plugin_state) - sizeof(CPUState));
+        tcg_gen_st_i64(value_upper_bits, plugin_state,
+                       offsetof(CPUPluginState, mem_value_upper_bits));
+        tcg_gen_st_i64(value_lower_bits, plugin_state,
+                       offsetof(CPUPluginState, mem_value_lower_bits));
+        tcg_temp_free_ptr(plugin_state);
+
         if (tcg_ctx->addr_type == TCG_TYPE_I32) {
             if (!copy_addr) {
                 copy_addr = tcg_temp_ebb_new_i64();
@@ -172,6 +182,48 @@  plugin_gen_mem_callbacks(TCGv_i64 copy_addr, TCGTemp *orig_addr, MemOpIdx oi,
             }
         }
     }
+}
+#endif
+
+static void
+plugin_gen_mem_callbacks_i32(TCGv_i32 val,
+                             TCGv_i64 copy_addr, TCGTemp *orig_addr,
+                             MemOpIdx oi, enum qemu_plugin_mem_rw rw)
+{
+#ifdef CONFIG_PLUGIN
+    if (tcg_ctx->plugin_insn != NULL) {
+        TCGv_i64 ext_val = tcg_temp_ebb_new_i64();
+        tcg_gen_extu_i32_i64(ext_val, val);
+        plugin_gen_mem_callbacks(tcg_constant_i64(0), ext_val,
+                                 copy_addr, orig_addr, oi, rw);
+        tcg_temp_free_i64(ext_val);
+    }
+#endif
+}
+
+static void
+plugin_gen_mem_callbacks_i64(TCGv_i64 val,
+                             TCGv_i64 copy_addr, TCGTemp *orig_addr,
+                             MemOpIdx oi, enum qemu_plugin_mem_rw rw)
+{
+#ifdef CONFIG_PLUGIN
+    if (tcg_ctx->plugin_insn != NULL) {
+        plugin_gen_mem_callbacks(tcg_constant_i64(0), val,
+                                 copy_addr, orig_addr, oi, rw);
+    }
+#endif
+}
+
+static void
+plugin_gen_mem_callbacks_i128(TCGv_i128 val,
+                             TCGv_i64 copy_addr, TCGTemp *orig_addr,
+                             MemOpIdx oi, enum qemu_plugin_mem_rw rw)
+{
+#ifdef CONFIG_PLUGIN
+    if (tcg_ctx->plugin_insn != NULL) {
+        plugin_gen_mem_callbacks(TCGV128_HIGH(val), TCGV128_LOW(val),
+                                 copy_addr, orig_addr, oi, rw);
+    }
 #endif
 }
 
@@ -203,7 +255,8 @@  static void tcg_gen_qemu_ld_i32_int(TCGv_i32 val, TCGTemp *addr,
         opc = INDEX_op_qemu_ld_a64_i32;
     }
     gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
-    plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
+    plugin_gen_mem_callbacks_i32(val, copy_addr, addr, orig_oi,
+                                 QEMU_PLUGIN_MEM_R);
 
     if ((orig_memop ^ memop) & MO_BSWAP) {
         switch (orig_memop & MO_SIZE) {
@@ -271,7 +324,7 @@  static void tcg_gen_qemu_st_i32_int(TCGv_i32 val, TCGTemp *addr,
         }
     }
     gen_ldst(opc, tcgv_i32_temp(val), NULL, addr, oi);
-    plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
+    plugin_gen_mem_callbacks_i32(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
 
     if (swap) {
         tcg_temp_free_i32(swap);
@@ -324,7 +377,8 @@  static void tcg_gen_qemu_ld_i64_int(TCGv_i64 val, TCGTemp *addr,
         opc = INDEX_op_qemu_ld_a64_i64;
     }
     gen_ldst_i64(opc, val, addr, oi);
-    plugin_gen_mem_callbacks(copy_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
+    plugin_gen_mem_callbacks_i64(val, copy_addr, addr, orig_oi,
+                                 QEMU_PLUGIN_MEM_R);
 
     if ((orig_memop ^ memop) & MO_BSWAP) {
         int flags = (orig_memop & MO_SIGN
@@ -396,7 +450,7 @@  static void tcg_gen_qemu_st_i64_int(TCGv_i64 val, TCGTemp *addr,
         opc = INDEX_op_qemu_st_a64_i64;
     }
     gen_ldst_i64(opc, val, addr, oi);
-    plugin_gen_mem_callbacks(NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
+    plugin_gen_mem_callbacks_i64(val, NULL, addr, orig_oi, QEMU_PLUGIN_MEM_W);
 
     if (swap) {
         tcg_temp_free_i64(swap);
@@ -606,7 +660,8 @@  static void tcg_gen_qemu_ld_i128_int(TCGv_i128 val, TCGTemp *addr,
                            tcg_constant_i32(orig_oi));
     }
 
-    plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_R);
+    plugin_gen_mem_callbacks_i128(val, ext_addr, addr, orig_oi,
+                                  QEMU_PLUGIN_MEM_R);
 }
 
 void tcg_gen_qemu_ld_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
@@ -722,7 +777,8 @@  static void tcg_gen_qemu_st_i128_int(TCGv_i128 val, TCGTemp *addr,
                            tcg_constant_i32(orig_oi));
     }
 
-    plugin_gen_mem_callbacks(ext_addr, addr, orig_oi, QEMU_PLUGIN_MEM_W);
+    plugin_gen_mem_callbacks_i128(val, ext_addr, addr, orig_oi,
+                                  QEMU_PLUGIN_MEM_W);
 }
 
 void tcg_gen_qemu_st_i128_chk(TCGv_i128 val, TCGTemp *addr, TCGArg idx,
diff --git a/accel/tcg/atomic_common.c.inc b/accel/tcg/atomic_common.c.inc
index 95a5c5ff12d..bed56d25d47 100644
--- a/accel/tcg/atomic_common.c.inc
+++ b/accel/tcg/atomic_common.c.inc
@@ -14,9 +14,20 @@ 
  */
 
 static void atomic_trace_rmw_post(CPUArchState *env, uint64_t addr,
+                                  uint64_t read_value_upper_bits,
+                                  uint64_t read_value_lower_bits,
+                                  uint64_t write_value_upper_bits,
+                                  uint64_t write_value_lower_bits,
                                   MemOpIdx oi)
 {
-    qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_RW);
+    if (cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
+        qemu_plugin_vcpu_mem_cb(env_cpu(env), addr,
+                                read_value_upper_bits, read_value_lower_bits,
+                                oi, QEMU_PLUGIN_MEM_R);
+        qemu_plugin_vcpu_mem_cb(env_cpu(env), addr,
+                                write_value_upper_bits, write_value_lower_bits,
+                                oi, QEMU_PLUGIN_MEM_W);
+    }
 }
 
 /*
diff --git a/accel/tcg/ldst_common.c.inc b/accel/tcg/ldst_common.c.inc
index 87ceb954873..f6d5613a358 100644
--- a/accel/tcg/ldst_common.c.inc
+++ b/accel/tcg/ldst_common.c.inc
@@ -123,10 +123,15 @@  void helper_st_i128(CPUArchState *env, uint64_t addr, Int128 val, MemOpIdx oi)
  * Load helpers for cpu_ldst.h
  */
 
-static void plugin_load_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
+static void plugin_load_cb(CPUArchState *env, abi_ptr addr,
+                           uint64_t value_upper_bits,
+                           uint64_t value_lower_bits,
+                           MemOpIdx oi)
 {
     if (cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
-        qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+        qemu_plugin_vcpu_mem_cb(env_cpu(env), addr,
+                                value_upper_bits, value_lower_bits,
+                                oi, QEMU_PLUGIN_MEM_R);
     }
 }
 
@@ -136,7 +141,7 @@  uint8_t cpu_ldb_mmu(CPUArchState *env, abi_ptr addr, MemOpIdx oi, uintptr_t ra)
 
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_UB);
     ret = do_ld1_mmu(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD);
-    plugin_load_cb(env, addr, oi);
+    plugin_load_cb(env, addr, 0, ret, oi);
     return ret;
 }
 
@@ -147,7 +152,7 @@  uint16_t cpu_ldw_mmu(CPUArchState *env, abi_ptr addr,
 
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
     ret = do_ld2_mmu(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD);
-    plugin_load_cb(env, addr, oi);
+    plugin_load_cb(env, addr, 0, ret, oi);
     return ret;
 }
 
@@ -158,7 +163,7 @@  uint32_t cpu_ldl_mmu(CPUArchState *env, abi_ptr addr,
 
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
     ret = do_ld4_mmu(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD);
-    plugin_load_cb(env, addr, oi);
+    plugin_load_cb(env, addr, 0, ret, oi);
     return ret;
 }
 
@@ -169,7 +174,7 @@  uint64_t cpu_ldq_mmu(CPUArchState *env, abi_ptr addr,
 
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
     ret = do_ld8_mmu(env_cpu(env), addr, oi, ra, MMU_DATA_LOAD);
-    plugin_load_cb(env, addr, oi);
+    plugin_load_cb(env, addr, 0, ret, oi);
     return ret;
 }
 
@@ -180,7 +185,7 @@  Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
 
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
     ret = do_ld16_mmu(env_cpu(env), addr, oi, ra);
-    plugin_load_cb(env, addr, oi);
+    plugin_load_cb(env, addr, int128_gethi(ret), int128_getlo(ret), oi);
     return ret;
 }
 
@@ -188,10 +193,15 @@  Int128 cpu_ld16_mmu(CPUArchState *env, abi_ptr addr,
  * Store helpers for cpu_ldst.h
  */
 
-static void plugin_store_cb(CPUArchState *env, abi_ptr addr, MemOpIdx oi)
+static void plugin_store_cb(CPUArchState *env, abi_ptr addr,
+                            uint64_t value_upper_bits,
+                            uint64_t value_lower_bits,
+                            MemOpIdx oi)
 {
     if (cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
-        qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+        qemu_plugin_vcpu_mem_cb(env_cpu(env), addr,
+                                value_upper_bits, value_lower_bits,
+                                oi, QEMU_PLUGIN_MEM_W);
     }
 }
 
@@ -199,7 +209,7 @@  void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
                  MemOpIdx oi, uintptr_t retaddr)
 {
     helper_stb_mmu(env, addr, val, oi, retaddr);
-    plugin_store_cb(env, addr, oi);
+    plugin_store_cb(env, addr, 0, val, oi);
 }
 
 void cpu_stw_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
@@ -207,7 +217,7 @@  void cpu_stw_mmu(CPUArchState *env, abi_ptr addr, uint16_t val,
 {
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_16);
     do_st2_mmu(env_cpu(env), addr, val, oi, retaddr);
-    plugin_store_cb(env, addr, oi);
+    plugin_store_cb(env, addr, 0, val, oi);
 }
 
 void cpu_stl_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
@@ -215,7 +225,7 @@  void cpu_stl_mmu(CPUArchState *env, abi_ptr addr, uint32_t val,
 {
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_32);
     do_st4_mmu(env_cpu(env), addr, val, oi, retaddr);
-    plugin_store_cb(env, addr, oi);
+    plugin_store_cb(env, addr, 0, val, oi);
 }
 
 void cpu_stq_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
@@ -223,7 +233,7 @@  void cpu_stq_mmu(CPUArchState *env, abi_ptr addr, uint64_t val,
 {
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_64);
     do_st8_mmu(env_cpu(env), addr, val, oi, retaddr);
-    plugin_store_cb(env, addr, oi);
+    plugin_store_cb(env, addr, 0, val, oi);
 }
 
 void cpu_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
@@ -231,7 +241,7 @@  void cpu_st16_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
 {
     tcg_debug_assert((get_memop(oi) & MO_SIZE) == MO_128);
     do_st16_mmu(env_cpu(env), addr, val, oi, retaddr);
-    plugin_store_cb(env, addr, oi);
+    plugin_store_cb(env, addr, int128_gethi(val), int128_getlo(val), oi);
 }
 
 /*