@@ -691,6 +691,16 @@ static bool stage2_pte_is_counted(kvm_pte_t pte)
return kvm_pte_valid(pte) || kvm_invalid_pte_owner(pte);
}
+static bool stage2_try_set_pte(kvm_pte_t *ptep, kvm_pte_t old, kvm_pte_t new, bool shared)
+{
+ if (!shared) {
+ WRITE_ONCE(*ptep, new);
+ return true;
+ }
+
+ return cmpxchg(ptep, old, new) == old;
+}
+
static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
u32 level, struct kvm_pgtable_mm_ops *mm_ops)
{
@@ -985,6 +995,7 @@ struct stage2_attr_data {
kvm_pte_t pte;
u32 level;
struct kvm_pgtable_mm_ops *mm_ops;
+ bool shared;
};
static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
@@ -1017,7 +1028,9 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
stage2_pte_executable(pte) && !stage2_pte_executable(data->pte))
mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
kvm_granule_size(level));
- WRITE_ONCE(*ptep, pte);
+
+ if (!stage2_try_set_pte(ptep, data->pte, pte, data->shared))
+ return -EAGAIN;
}
return 0;
@@ -1026,7 +1039,7 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
u64 size, kvm_pte_t attr_set,
kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
- u32 *level)
+ u32 *level, bool shared)
{
int ret;
kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
@@ -1034,6 +1047,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
.attr_set = attr_set & attr_mask,
.attr_clr = attr_clr & attr_mask,
.mm_ops = pgt->mm_ops,
+ .shared = shared,
};
struct kvm_pgtable_walker walker = {
.cb = stage2_attr_walker,
@@ -1057,14 +1071,14 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
{
return stage2_update_leaf_attrs(pgt, addr, size, 0,
KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
- NULL, NULL);
+ NULL, NULL, false);
}
kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
{
kvm_pte_t pte = 0;
stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
- &pte, NULL);
+ &pte, NULL, false);
dsb(ishst);
return pte;
}
@@ -1073,7 +1087,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
{
kvm_pte_t pte = 0;
stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF,
- &pte, NULL);
+ &pte, NULL, false);
/*
* "But where's the TLBI?!", you scream.
* "Over in the core code", I sigh.
@@ -1086,7 +1100,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr)
{
kvm_pte_t pte = 0;
- stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL);
+ stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL, false);
return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF;
}
@@ -1109,7 +1123,7 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
if (prot & KVM_PGTABLE_PROT_X)
clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
- ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level);
+ ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, true);
if (!ret)
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
return ret;
The stage2 attr walker is already used for parallel walks. Since commit f783ef1c0e82 ("KVM: arm64: Add fast path to handle permission relaxation during dirty logging"), KVM acquires the read lock when write-unprotecting a PTE. However, the walker only uses a simple store to update the PTE. This is safe as the only possible race is with hardware updates to the access flag, which is benign. However, a subsequent change to KVM will allow more changes to the stage 2 page tables to be done in parallel. Prepare the stage 2 attribute walker by performing atomic updates to the PTE when walking in parallel. Signed-off-by: Oliver Upton <oliver.upton@linux.dev> --- arch/arm64/kvm/hyp/pgtable.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-)