@@ -117,27 +117,95 @@ static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
}
}
-static void fast_memset(CPUS390XState *env, uint64_t dest, uint8_t byte,
- uint32_t l, uintptr_t ra)
+/* An access covers at most 4096 bytes and therefore at most two pages. */
+typedef struct S390Access {
+ target_ulong vaddr1;
+ target_ulong vaddr2;
+ char *haddr1;
+ char *haddr2;
+ uint16_t size1;
+ uint16_t size2;
+ /*
+ * If we can't access the host page directly, we'll have to do I/O access
+ * via ld/st helpers. These are internal details, so we store the
+ * mmu idx to do the access here instead of passing it around in the
+ * helpers. Maybe, one day we can get rid of ld/st access - once we can
+ * handle TLB_NOTDIRTY differently. We don't expect these special accesses
+ * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
+ * pages, we might trigger a new MMU translation - very unlikely that
+ * the mapping changes in between and we would trigger a fault.
+ */
+ int mmu_idx;
+} S390Access;
+
+static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
+ MMUAccessType access_type, int mmu_idx,
+ uintptr_t ra)
{
- int mmu_idx = cpu_mmu_index(env, false);
+ S390Access access = {
+ .vaddr1 = vaddr,
+ .size1 = MIN(size, -(vaddr | TARGET_PAGE_MASK)),
+ .mmu_idx = mmu_idx,
+ };
- while (l > 0) {
- void *p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, mmu_idx);
- if (p) {
- /* Access to the whole page in write mode granted. */
- uint32_t l_adj = adj_len_to_page(l, dest);
- memset(p, byte, l_adj);
- dest += l_adj;
- l -= l_adj;
+ g_assert(size > 0 && size <= 4096);
+ access.haddr1 = probe_access(env, access.vaddr1, access.size1, access_type,
+ mmu_idx, ra);
+
+ if (unlikely(access.size1 != size)) {
+ /* The access crosses page boundaries. */
+ access.vaddr2 = wrap_address(env, vaddr + access.size1);
+ access.size2 = size - access.size1;
+ access.haddr2 = probe_access(env, access.vaddr2, access.size2,
+ access_type, mmu_idx, ra);
+ }
+ return access;
+}
+
+/* Helper to handle memset on a single page. */
+static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
+ uint8_t byte, uint16_t size, int mmu_idx,
+ uintptr_t ra)
+{
+#ifdef CONFIG_USER_ONLY
+ g_assert(haddr);
+ memset(haddr, byte, size);
+#else
+ TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+ int i;
+
+ if (likely(haddr)) {
+ memset(haddr, byte, size);
+ } else {
+ /*
+ * Do a single access and test if we can then get access to the
+ * page. This is especially relevant to speed up TLB_NOTDIRTY.
+ */
+ g_assert(size > 0);
+ helper_ret_stb_mmu(env, vaddr, byte, oi, ra);
+ haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
+ if (likely(haddr)) {
+ memset(haddr + 1, byte, size - 1);
} else {
- /* We failed to get access to the whole page. The next write
- access will likely fill the QEMU TLB for the next iteration. */
- cpu_stb_data_ra(env, dest, byte, ra);
- dest++;
- l--;
+ for (i = 1; i < size; i++) {
+ helper_ret_stb_mmu(env, vaddr + i, byte, oi, ra);
+ }
}
}
+#endif
+}
+
+static void access_memset(CPUS390XState *env, S390Access *desta,
+ uint8_t byte, uintptr_t ra)
+{
+
+ do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
+ desta->mmu_idx, ra);
+ if (likely(!desta->size2)) {
+ return;
+ }
+ do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
+ desta->mmu_idx, ra);
}
#ifndef CONFIG_USER_ONLY
@@ -259,15 +327,19 @@ uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
uint64_t src, uintptr_t ra)
{
+ const int mmu_idx = cpu_mmu_index(env, false);
+ S390Access desta;
uint32_t i;
uint8_t c = 0;
HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
__func__, l, dest, src);
+ desta = access_prepare(env, dest, l + 1, MMU_DATA_STORE, mmu_idx, ra);
+
/* xor with itself is the same as memset(0) */
if (src == dest) {
- fast_memset(env, dest, 0, l + 1, ra);
+ access_memset(env, &desta, 0, ra);
return 0;
}
@@ -315,6 +387,8 @@ uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
uint64_t src, uintptr_t ra)
{
+ const int mmu_idx = cpu_mmu_index(env, false);
+ S390Access desta;
uint32_t i;
HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
@@ -323,13 +397,15 @@ static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
/* MVC always copies one more byte than specified - maximum is 256 */
l++;
+ desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
+
/*
* "When the operands overlap, the result is obtained as if the operands
* were processed one byte at a time". Only non-destructive overlaps
* behave like memmove().
*/
if (dest == src + 1) {
- fast_memset(env, dest, cpu_ldub_data_ra(env, src, ra), l, ra);
+ access_memset(env, &desta, cpu_ldub_data_ra(env, src, ra), ra);
} else if (!is_destructive_overlap(env, dest, src, l)) {
fast_memmove(env, dest, src, l, ra);
} else {
@@ -775,7 +851,9 @@ static inline uint32_t do_mvcl(CPUS390XState *env,
uint64_t *src, uint64_t *srclen,
uint16_t pad, int wordsize, uintptr_t ra)
{
+ const int mmu_idx = cpu_mmu_index(env, false);
int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
+ S390Access desta;
int i, cc;
if (*destlen == *srclen) {
@@ -805,7 +883,8 @@ static inline uint32_t do_mvcl(CPUS390XState *env,
} else if (wordsize == 1) {
/* Pad the remaining area */
*destlen -= len;
- fast_memset(env, *dest, pad, len, ra);
+ desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
+ access_memset(env, &desta, pad, ra);
*dest = wrap_address(env, *dest + len);
} else {
/* The remaining length selects the padding byte. */
@@ -825,6 +904,7 @@ static inline uint32_t do_mvcl(CPUS390XState *env,
/* move long */
uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
{
+ const int mmu_idx = cpu_mmu_index(env, false);
uintptr_t ra = GETPC();
uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
uint64_t dest = get_address(env, r1);
@@ -832,6 +912,7 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
uint64_t src = get_address(env, r2);
uint8_t pad = env->regs[r2 + 1] >> 24;
uint32_t cc, cur_len;
+ S390Access desta;
if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
cc = 3;
@@ -859,7 +940,9 @@ uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
while (destlen) {
cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
if (!srclen) {
- fast_memset(env, dest, pad, cur_len, ra);
+ desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
+ ra);
+ access_memset(env, &desta, pad, ra);
} else {
cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);