@@ -1384,7 +1384,6 @@ extern void ptep_modify_prot_commit(struct vm_area_struct *vma,
* where it is possible and makes sense to do so. The PTE_CONT bit is considered
* a private implementation detail of the public ptep API (see below).
*/
-extern pte_t contpte_ptep_get_lockless(pte_t *orig_ptep);
extern void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned int nr, int full);
extern pte_t contpte_get_and_clear_full_ptes(struct mm_struct *mm,
@@ -1430,16 +1429,8 @@ static inline unsigned int pte_batch_hint(pte_t *ptep, pte_t pte)
extern pte_t ptep_get(pte_t *ptep);
#define ptep_get ptep_get
+extern pte_t ptep_get_lockless(pte_t *ptep);
#define ptep_get_lockless ptep_get_lockless
-static inline pte_t ptep_get_lockless(pte_t *ptep)
-{
- pte_t pte = __ptep_get(ptep);
-
- if (likely(!pte_valid_cont(pte)))
- return pte;
-
- return contpte_ptep_get_lockless(ptep);
-}
static inline void set_pte(pte_t *ptep, pte_t pte)
{
@@ -28,63 +28,6 @@ static void contpte_try_unfold_partial(struct mm_struct *mm, unsigned long addr,
}
}
-pte_t contpte_ptep_get_lockless(pte_t *orig_ptep)
-{
- /*
- * The ptep_get_lockless() API requires us to read and return *orig_ptep
- * so that it is self-consistent, without the PTL held, so we may be
- * racing with other threads modifying the pte. Usually a READ_ONCE()
- * would suffice, but for the contpte case, we also need to gather the
- * access and dirty bits from across all ptes in the contiguous block,
- * and we can't read all of those neighbouring ptes atomically, so any
- * contiguous range may be unfolded/modified/refolded under our feet.
- * Therefore we ensure we read a _consistent_ contpte range by checking
- * that all ptes in the range are valid and have CONT_PTE set, that all
- * pfns are contiguous and that all pgprots are the same (ignoring
- * access/dirty). If we find a pte that is not consistent, then we must
- * be racing with an update so start again. If the target pte does not
- * have CONT_PTE set then that is considered consistent on its own
- * because it is not part of a contpte range.
- */
-
- pgprot_t orig_prot;
- unsigned long pfn;
- pte_t orig_pte;
- pgprot_t prot;
- pte_t *ptep;
- pte_t pte;
- int i;
-
-retry:
- orig_pte = __ptep_get(orig_ptep);
-
- if (!pte_valid_cont(orig_pte))
- return orig_pte;
-
- orig_prot = pte_pgprot(pte_mkold(pte_mkclean(orig_pte)));
- ptep = arch_contpte_align_down(orig_ptep);
- pfn = pte_pfn(orig_pte) - (orig_ptep - ptep);
-
- for (i = 0; i < CONT_PTES; i++, ptep++, pfn++) {
- pte = __ptep_get(ptep);
- prot = pte_pgprot(pte_mkold(pte_mkclean(pte)));
-
- if (!pte_valid_cont(pte) ||
- pte_pfn(pte) != pfn ||
- pgprot_val(prot) != pgprot_val(orig_prot))
- goto retry;
-
- if (pte_dirty(pte))
- orig_pte = pte_mkdirty(orig_pte);
-
- if (pte_young(pte))
- orig_pte = pte_mkyoung(orig_pte);
- }
-
- return orig_pte;
-}
-EXPORT_SYMBOL_GPL(contpte_ptep_get_lockless);
-
void contpte_clear_full_ptes(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned int nr, int full)
{
@@ -785,6 +785,8 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
extern pte_t ptep_get(pte_t *ptep);
#define ptep_get ptep_get
+extern pte_t ptep_get_lockless(pte_t *ptep);
+#define ptep_get_lockless ptep_get_lockless
extern void set_ptes(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pteval, unsigned int nr);
#define set_ptes set_ptes
@@ -8,6 +8,7 @@
* a private implementation detail of the public ptep API (see below).
*/
pte_t contpte_ptep_get(pte_t *ptep, pte_t orig_pte);
+pte_t contpte_ptep_get_lockless(pte_t *orig_ptep);
void __contpte_try_fold(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte);
void contpte_try_fold(struct mm_struct *mm, unsigned long addr,
@@ -42,6 +42,7 @@
* - huge_ptep_clear_flush()
* - ptep_get()
* - set_ptes()
+ * - ptep_get_lockless()
*/
pte_t huge_ptep_get(pte_t *ptep)
@@ -589,4 +590,72 @@ __always_inline void set_ptes(struct mm_struct *mm, unsigned long addr,
contpte_set_ptes(mm, addr, ptep, pte, nr);
}
}
+
+pte_t contpte_ptep_get_lockless(pte_t *orig_ptep)
+{
+ /*
+ * The ptep_get_lockless() API requires us to read and return *orig_ptep
+ * so that it is self-consistent, without the PTL held, so we may be
+ * racing with other threads modifying the pte. Usually a READ_ONCE()
+ * would suffice, but for the contpte case, we also need to gather the
+ * access and dirty bits from across all ptes in the contiguous block,
+ * and we can't read all of those neighbouring ptes atomically, so any
+ * contiguous range may be unfolded/modified/refolded under our feet.
+ * Therefore we ensure we read a _consistent_ contpte range by checking
+ * that all ptes in the range are valid and have CONT_PTE set, that all
+ * pfns are contiguous and that all pgprots are the same (ignoring
+ * access/dirty). If we find a pte that is not consistent, then we must
+ * be racing with an update so start again. If the target pte does not
+ * have CONT_PTE set then that is considered consistent on its own
+ * because it is not part of a contpte range.
+ */
+
+ pgprot_t orig_prot;
+ unsigned long pfn;
+ pte_t orig_pte;
+ pgprot_t prot;
+ pte_t *ptep;
+ pte_t pte;
+ int i, ncontig;
+
+retry:
+ orig_pte = __ptep_get(orig_ptep);
+
+ if (!pte_valid_cont(orig_pte))
+ return orig_pte;
+
+ orig_prot = pte_pgprot(pte_mkold(pte_mkclean(orig_pte)));
+ ptep = arch_contpte_align_down(orig_ptep);
+ ncontig = arch_contpte_get_num_contig(NULL, 0, ptep, 0, NULL);
+ pfn = pte_pfn(orig_pte) - (orig_ptep - ptep);
+
+ for (i = 0; i < ncontig; i++, ptep++, pfn++) {
+ pte = __ptep_get(ptep);
+ prot = pte_pgprot(pte_mkold(pte_mkclean(pte)));
+
+ if (!pte_valid_cont(pte) ||
+ pte_pfn(pte) != pfn ||
+ pgprot_val(prot) != pgprot_val(orig_prot))
+ goto retry;
+
+ if (pte_dirty(pte))
+ orig_pte = pte_mkdirty(orig_pte);
+
+ if (pte_young(pte))
+ orig_pte = pte_mkyoung(orig_pte);
+ }
+
+ return orig_pte;
+}
+EXPORT_SYMBOL_GPL(contpte_ptep_get_lockless);
+
+__always_inline pte_t ptep_get_lockless(pte_t *ptep)
+{
+ pte_t pte = __ptep_get(ptep);
+
+ if (likely(!pte_valid_cont(pte)))
+ return pte;
+
+ return contpte_ptep_get_lockless(ptep);
+}
#endif /* CONFIG_THP_CONTPTE */
Make riscv use the contpte aware ptep_get_lockless() function from arm64. Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> --- arch/arm64/include/asm/pgtable.h | 11 +---- arch/arm64/mm/contpte.c | 57 -------------------------- arch/riscv/include/asm/pgtable.h | 2 + include/linux/contpte.h | 1 + mm/contpte.c | 69 ++++++++++++++++++++++++++++++++ 5 files changed, 73 insertions(+), 67 deletions(-)