diff mbox series

[v6,01/23] mm: Introduce PTE_MARKER swap entry

Message ID 20211115075522.73795-2-peterx@redhat.com (mailing list archive)
State New
Headers show
Series userfaultfd-wp: Support shmem and hugetlbfs | expand

Commit Message

Peter Xu Nov. 15, 2021, 7:55 a.m. UTC
This patch introduces a new swap entry type called PTE_MARKER.  It can be
installed for any pte that maps a file-backed memory when the pte is
temporarily zapped, so as to maintain per-pte information.

The information that kept in the pte is called a "marker".  Here we define the
marker as "unsigned long" just to match pgoff_t, however it will only work if
it still fits in swp_offset(), which is e.g. currently 58 bits on x86_64.

A new config CONFIG_PTE_MARKER is introduced too; it's by default off.  A bunch
of helpers are defined altogether to service the rest of the pte marker code.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 include/asm-generic/hugetlb.h |  9 ++++
 include/linux/swap.h          | 15 ++++++-
 include/linux/swapops.h       | 78 +++++++++++++++++++++++++++++++++++
 mm/Kconfig                    |  7 ++++
 4 files changed, 108 insertions(+), 1 deletion(-)

Comments

Alistair Popple Dec. 3, 2021, 3:30 a.m. UTC | #1
On Monday, 15 November 2021 6:55:00 PM AEDT Peter Xu wrote:

[...]

> diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> index d356ab4047f7..5103d2a4ae38 100644
> --- a/include/linux/swapops.h
> +++ b/include/linux/swapops.h
> @@ -247,6 +247,84 @@ static inline int is_writable_migration_entry(swp_entry_t entry)
>  
>  #endif
>  
> +typedef unsigned long pte_marker;
> +
> +#define  PTE_MARKER_MASK     (0)
> +
> +#ifdef CONFIG_PTE_MARKER
> +
> +static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
> +{
> +	return swp_entry(SWP_PTE_MARKER, marker);
> +}
> +
> +static inline bool is_pte_marker_entry(swp_entry_t entry)
> +{
> +	return swp_type(entry) == SWP_PTE_MARKER;
> +}
> +
> +static inline pte_marker pte_marker_get(swp_entry_t entry)
> +{
> +	return swp_offset(entry) & PTE_MARKER_MASK;

I'm not sure the PTE_MARKER_MASK adds much, especially as we only have one
user. I don't see a problem with open-coding these kind of checks (ie.
swp_offset(entry) & PTE_MARKER_UFFD_WP) as you kind of end up doing that anyway.
Alternatively if you want helper functions I think it would be better to define
them for each marker. Eg: is_pte_marker_uffd_wp().

> +}
> +
> +static inline bool is_pte_marker(pte_t pte)
> +{
> +	return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte));
> +}
> +
> +#else /* CONFIG_PTE_MARKER */
> +
> +static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
> +{
> +	/* This should never be called if !CONFIG_PTE_MARKER */

Can we leave this function undefined then? That way we will get an obvious
build error.

Overall I'm liking the swap entry approach a lot more than the special pte
approach, but maybe that's just because I'm more familiar with special swap
entries :-)

> +	WARN_ON_ONCE(1);
> +	return swp_entry(0, 0);
> +}
> +
> +static inline bool is_pte_marker_entry(swp_entry_t entry)
> +{
> +	return false;
> +}
> +
> +static inline pte_marker pte_marker_get(swp_entry_t entry)
> +{
> +	return 0;
> +}
> +
> +static inline bool is_pte_marker(pte_t pte)
> +{
> +	return false;
> +}
> +
> +#endif /* CONFIG_PTE_MARKER */
> +
> +static inline pte_t make_pte_marker(pte_marker marker)
> +{
> +	return swp_entry_to_pte(make_pte_marker_entry(marker));
> +}
> +
> +/*
> + * This is a special version to check pte_none() just to cover the case when
> + * the pte is a pte marker.  It existed because in many cases the pte marker
> + * should be seen as a none pte; it's just that we have stored some information
> + * onto the none pte so it becomes not-none any more.
> + *
> + * It should be used when the pte is file-backed, ram-based and backing
> + * userspace pages, like shmem.  It is not needed upon pgtables that do not
> + * support pte markers at all.  For example, it's not needed on anonymous
> + * memory, kernel-only memory (including when the system is during-boot),
> + * non-ram based generic file-system.  It's fine to be used even there, but the
> + * extra pte marker check will be pure overhead.
> + *
> + * For systems configured with !CONFIG_PTE_MARKER this will be automatically
> + * optimized to pte_none().
> + */
> +static inline int pte_none_mostly(pte_t pte)
> +{
> +	return pte_none(pte) || is_pte_marker(pte);
> +}
> +
>  static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry)
>  {
>  	struct page *p = pfn_to_page(swp_offset(entry));
> diff --git a/mm/Kconfig b/mm/Kconfig
> index 068ce591a13a..66f23c6c2032 100644
> --- a/mm/Kconfig
> +++ b/mm/Kconfig
> @@ -897,6 +897,13 @@ config IO_MAPPING
>  config SECRETMEM
>  	def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED
>  
> +config PTE_MARKER
> +	def_bool n
> +	bool "Marker PTEs support"
> +
> +	help
> +	  Allows to create marker PTEs for file-backed memory.
> +
>  source "mm/damon/Kconfig"
>  
>  endmenu
>
Peter Xu Dec. 3, 2021, 4:21 a.m. UTC | #2
On Fri, Dec 03, 2021 at 02:30:00PM +1100, Alistair Popple wrote:
> On Monday, 15 November 2021 6:55:00 PM AEDT Peter Xu wrote:
> 
> [...]
> 
> > diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> > index d356ab4047f7..5103d2a4ae38 100644
> > --- a/include/linux/swapops.h
> > +++ b/include/linux/swapops.h
> > @@ -247,6 +247,84 @@ static inline int is_writable_migration_entry(swp_entry_t entry)
> >  
> >  #endif
> >  
> > +typedef unsigned long pte_marker;
> > +
> > +#define  PTE_MARKER_MASK     (0)
> > +
> > +#ifdef CONFIG_PTE_MARKER
> > +
> > +static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
> > +{
> > +	return swp_entry(SWP_PTE_MARKER, marker);
> > +}
> > +
> > +static inline bool is_pte_marker_entry(swp_entry_t entry)
> > +{
> > +	return swp_type(entry) == SWP_PTE_MARKER;
> > +}
> > +
> > +static inline pte_marker pte_marker_get(swp_entry_t entry)
> > +{
> > +	return swp_offset(entry) & PTE_MARKER_MASK;
> 
> I'm not sure the PTE_MARKER_MASK adds much, especially as we only have one
> user. I don't see a problem with open-coding these kind of checks (ie.

It's more or less a safety belt to make sure anything pte_marker_get() returned
will be pte_marker defined bits only.

> swp_offset(entry) & PTE_MARKER_UFFD_WP) as you kind of end up doing that anyway.
> Alternatively if you want helper functions I think it would be better to define
> them for each marker. Eg: is_pte_marker_uffd_wp().

Yes we can have something like is_pte_marker_uffd_wp(), I didn't do that
explicitly because I want us to be clear that pte_marker is a bitmask, so
calling "is_*" will be slightly opaque - strictly speaking it should be
"pte_marker_has_uffd_wp_bit()" if there will be more bits defined, but then the
name of the helper will look a bit odd too.  Hence I just keep the only
interface to fetch the whole marker and use "&" in the call sites to check.

> 
> > +}
> > +
> > +static inline bool is_pte_marker(pte_t pte)
> > +{
> > +	return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte));
> > +}
> > +
> > +#else /* CONFIG_PTE_MARKER */
> > +
> > +static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
> > +{
> > +	/* This should never be called if !CONFIG_PTE_MARKER */
> 
> Can we leave this function undefined then? That way we will get an obvious
> build error.

We can, but then we need more macros to cover the common code.  E.g. currently
in hugetlb_change_protection() we have:

        /* None pte */
        if (unlikely(uffd_wp))
                /* Safe to modify directly (none->non-present). */
                set_huge_pte_at(mm, address, ptep,
                                make_pte_marker(PTE_MARKER_UFFD_WP));

If we drop this definition, to let it compile with !PTE_MARKER, we'll need:

+#ifdef PTE_MARKER
        /* None pte */
        if (unlikely(uffd_wp))
                /* Safe to modify directly (none->non-present). */
                set_huge_pte_at(mm, address, ptep,
                                make_pte_marker(PTE_MARKER_UFFD_WP));
+#endif

Comparing to adding macro checks over a few other places, I figured maybe it's
easier to define them in the header once then we proper WARN_ON_ONCE() if
triggered (while they should just never).

> 
> Overall I'm liking the swap entry approach a lot more than the special pte
> approach, but maybe that's just because I'm more familiar with special swap
> entries :-)

Swap entry solution is definitely cleaner to me if not considering wasting it
with one bit.

Operating on pte directly is actually slightly more challenging, because we
don't have the protection of is_swap_pte() anymore.  It can help shield out
quite some strange stuff due to the pte->swp level hierachy.

Thanks,
Alistair Popple Dec. 3, 2021, 5:35 a.m. UTC | #3
On Friday, 3 December 2021 3:21:12 PM AEDT Peter Xu wrote:
> On Fri, Dec 03, 2021 at 02:30:00PM +1100, Alistair Popple wrote:
> > On Monday, 15 November 2021 6:55:00 PM AEDT Peter Xu wrote:
> > 
> > [...]
> > 
> > > diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> > > index d356ab4047f7..5103d2a4ae38 100644
> > > --- a/include/linux/swapops.h
> > > +++ b/include/linux/swapops.h
> > > @@ -247,6 +247,84 @@ static inline int is_writable_migration_entry(swp_entry_t entry)
> > >  
> > >  #endif
> > >  
> > > +typedef unsigned long pte_marker;
> > > +
> > > +#define  PTE_MARKER_MASK     (0)
> > > +
> > > +#ifdef CONFIG_PTE_MARKER
> > > +
> > > +static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
> > > +{
> > > +	return swp_entry(SWP_PTE_MARKER, marker);
> > > +}
> > > +
> > > +static inline bool is_pte_marker_entry(swp_entry_t entry)
> > > +{
> > > +	return swp_type(entry) == SWP_PTE_MARKER;
> > > +}
> > > +
> > > +static inline pte_marker pte_marker_get(swp_entry_t entry)
> > > +{
> > > +	return swp_offset(entry) & PTE_MARKER_MASK;
> > 
> > I'm not sure the PTE_MARKER_MASK adds much, especially as we only have one
> > user. I don't see a problem with open-coding these kind of checks (ie.
> 
> It's more or less a safety belt to make sure anything pte_marker_get() returned
> will be pte_marker defined bits only.
> 
> > swp_offset(entry) & PTE_MARKER_UFFD_WP) as you kind of end up doing that anyway.
> > Alternatively if you want helper functions I think it would be better to define
> > them for each marker. Eg: is_pte_marker_uffd_wp().
> 
> Yes we can have something like is_pte_marker_uffd_wp(), I didn't do that
> explicitly because I want us to be clear that pte_marker is a bitmask, so
> calling "is_*" will be slightly opaque - strictly speaking it should be
> "pte_marker_has_uffd_wp_bit()" if there will be more bits defined, but then the
> name of the helper will look a bit odd too.  Hence I just keep the only
> interface to fetch the whole marker and use "&" in the call sites to check.

Why does a caller need to care if it's a bitmask or not though? Isn't that an
implementation detail that could be left to the "is_*" functions? I must admit
I'm still working through the rest of this series though - is it because you
end up storing some kind of value in the upper bits of the PTE marker?

> > 
> > > +}
> > > +
> > > +static inline bool is_pte_marker(pte_t pte)
> > > +{
> > > +	return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte));
> > > +}
> > > +
> > > +#else /* CONFIG_PTE_MARKER */
> > > +
> > > +static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
> > > +{
> > > +	/* This should never be called if !CONFIG_PTE_MARKER */
> > 
> > Can we leave this function undefined then? That way we will get an obvious
> > build error.
> 
> We can, but then we need more macros to cover the common code.  E.g. currently
> in hugetlb_change_protection() we have:
> 
>         /* None pte */
>         if (unlikely(uffd_wp))
>                 /* Safe to modify directly (none->non-present). */
>                 set_huge_pte_at(mm, address, ptep,
>                                 make_pte_marker(PTE_MARKER_UFFD_WP));
> 
> If we drop this definition, to let it compile with !PTE_MARKER, we'll need:
> 
> +#ifdef PTE_MARKER
>         /* None pte */
>         if (unlikely(uffd_wp))
>                 /* Safe to modify directly (none->non-present). */
>                 set_huge_pte_at(mm, address, ptep,
>                                 make_pte_marker(PTE_MARKER_UFFD_WP));
> +#endif
> 
> Comparing to adding macro checks over a few other places, I figured maybe it's
> easier to define them in the header once then we proper WARN_ON_ONCE() if
> triggered (while they should just never).

Ok, makes sense. Agree that adding macro checks everywhere isn't great.

> > 
> > Overall I'm liking the swap entry approach a lot more than the special pte
> > approach, but maybe that's just because I'm more familiar with special swap
> > entries :-)
> 
> Swap entry solution is definitely cleaner to me if not considering wasting it
> with one bit.
> 
> Operating on pte directly is actually slightly more challenging, because we
> don't have the protection of is_swap_pte() anymore.  It can help shield out
> quite some strange stuff due to the pte->swp level hierachy.

So I guess now we have the protection of is_swap_pte() there are probably a few
places where we need to check for marker pte entries when we find swap entries.
I'm not suggesting you haven't already found all of those cases of course, just
noting that it's something to review.

> Thanks,
> 
>
Peter Xu Dec. 3, 2021, 6:45 a.m. UTC | #4
On Fri, Dec 03, 2021 at 04:35:38PM +1100, Alistair Popple wrote:
> > > > +static inline pte_marker pte_marker_get(swp_entry_t entry)
> > > > +{
> > > > +	return swp_offset(entry) & PTE_MARKER_MASK;
> > > 
> > > I'm not sure the PTE_MARKER_MASK adds much, especially as we only have one
> > > user. I don't see a problem with open-coding these kind of checks (ie.
> > 
> > It's more or less a safety belt to make sure anything pte_marker_get() returned
> > will be pte_marker defined bits only.
> > 
> > > swp_offset(entry) & PTE_MARKER_UFFD_WP) as you kind of end up doing that anyway.
> > > Alternatively if you want helper functions I think it would be better to define
> > > them for each marker. Eg: is_pte_marker_uffd_wp().
> > 
> > Yes we can have something like is_pte_marker_uffd_wp(), I didn't do that
> > explicitly because I want us to be clear that pte_marker is a bitmask, so
> > calling "is_*" will be slightly opaque - strictly speaking it should be
> > "pte_marker_has_uffd_wp_bit()" if there will be more bits defined, but then the
> > name of the helper will look a bit odd too.  Hence I just keep the only
> > interface to fetch the whole marker and use "&" in the call sites to check.
> 
> Why does a caller need to care if it's a bitmask or not though? Isn't that an
> implementation detail that could be left to the "is_*" functions? I must admit
> I'm still working through the rest of this series though - is it because you
> end up storing some kind of value in the upper bits of the PTE marker?

Nop. I'm just afraid the caller could overlook the fact that it's a bitmask,
then there can be code like:

  if (is_pte_marker_uffd_wp(*ptep) && drop_uffd_wp)
      pte_clear(ptep)

While we should only do:

  if (is_pte_marker_uffd_wp(*ptep) && drop_uffd_wp)
      // remove uffd-wp bit in the pte_marker, keep the reset bitmask

I could be worrying too much, there's no real user of it.  If you prefer the
helper a lot I can add it in the new version.  Thanks,
Alistair Popple Dec. 7, 2021, 2:12 a.m. UTC | #5
On Friday, 3 December 2021 5:45:37 PM AEDT Peter Xu wrote:
> On Fri, Dec 03, 2021 at 04:35:38PM +1100, Alistair Popple wrote:
> > > > > +static inline pte_marker pte_marker_get(swp_entry_t entry)
> > > > > +{
> > > > > +	return swp_offset(entry) & PTE_MARKER_MASK;
> > > > 
> > > > I'm not sure the PTE_MARKER_MASK adds much, especially as we only have one
> > > > user. I don't see a problem with open-coding these kind of checks (ie.
> > > 
> > > It's more or less a safety belt to make sure anything pte_marker_get() returned
> > > will be pte_marker defined bits only.
> > > 
> > > > swp_offset(entry) & PTE_MARKER_UFFD_WP) as you kind of end up doing that anyway.
> > > > Alternatively if you want helper functions I think it would be better to define
> > > > them for each marker. Eg: is_pte_marker_uffd_wp().
> > > 
> > > Yes we can have something like is_pte_marker_uffd_wp(), I didn't do that
> > > explicitly because I want us to be clear that pte_marker is a bitmask, so
> > > calling "is_*" will be slightly opaque - strictly speaking it should be
> > > "pte_marker_has_uffd_wp_bit()" if there will be more bits defined, but then the
> > > name of the helper will look a bit odd too.  Hence I just keep the only
> > > interface to fetch the whole marker and use "&" in the call sites to check.
> > 
> > Why does a caller need to care if it's a bitmask or not though? Isn't that an
> > implementation detail that could be left to the "is_*" functions? I must admit
> > I'm still working through the rest of this series though - is it because you
> > end up storing some kind of value in the upper bits of the PTE marker?
> 
> Nop. I'm just afraid the caller could overlook the fact that it's a bitmask,
> then there can be code like:
> 
>   if (is_pte_marker_uffd_wp(*ptep) && drop_uffd_wp)
>       pte_clear(ptep)
> 
> While we should only do:
> 
>   if (is_pte_marker_uffd_wp(*ptep) && drop_uffd_wp)
>       // remove uffd-wp bit in the pte_marker, keep the reset bitmask

I'm not sure how having the helper function prevents or changes this though? In
fact I just noticed this in patch 8:

                             if (uffd_wp_resolve &&
                                    (pte_marker_get(entry) & PTE_MARKER_UFFD_WP)) {
                                        pte_clear(vma->vm_mm, addr, pte);
                                        pages++;
                                }

And if I'm understanding your point correctly isn't that wrong because if there
were other users of pte markers they would inadvertently get cleared? Unless of
course I've missed something - I haven't looked at patch 8 yet for context. To
help with the above situation I think you would need a helper for clearing
ptes.

> I could be worrying too much, there's no real user of it.  If you prefer the
> helper a lot I can add it in the new version.  Thanks,

It's not a massive issue, but I do think either defining a helper or open
coding the bit check is clearer. I think we can worry about other users if/when
they appear.

 - Alistair
Peter Xu Dec. 7, 2021, 2:30 a.m. UTC | #6
On Tue, Dec 07, 2021 at 01:12:23PM +1100, Alistair Popple wrote:
> On Friday, 3 December 2021 5:45:37 PM AEDT Peter Xu wrote:
> > On Fri, Dec 03, 2021 at 04:35:38PM +1100, Alistair Popple wrote:
> > > > > > +static inline pte_marker pte_marker_get(swp_entry_t entry)
> > > > > > +{
> > > > > > +	return swp_offset(entry) & PTE_MARKER_MASK;
> > > > > 
> > > > > I'm not sure the PTE_MARKER_MASK adds much, especially as we only have one
> > > > > user. I don't see a problem with open-coding these kind of checks (ie.
> > > > 
> > > > It's more or less a safety belt to make sure anything pte_marker_get() returned
> > > > will be pte_marker defined bits only.
> > > > 
> > > > > swp_offset(entry) & PTE_MARKER_UFFD_WP) as you kind of end up doing that anyway.
> > > > > Alternatively if you want helper functions I think it would be better to define
> > > > > them for each marker. Eg: is_pte_marker_uffd_wp().
> > > > 
> > > > Yes we can have something like is_pte_marker_uffd_wp(), I didn't do that
> > > > explicitly because I want us to be clear that pte_marker is a bitmask, so
> > > > calling "is_*" will be slightly opaque - strictly speaking it should be
> > > > "pte_marker_has_uffd_wp_bit()" if there will be more bits defined, but then the
> > > > name of the helper will look a bit odd too.  Hence I just keep the only
> > > > interface to fetch the whole marker and use "&" in the call sites to check.
> > > 
> > > Why does a caller need to care if it's a bitmask or not though? Isn't that an
> > > implementation detail that could be left to the "is_*" functions? I must admit
> > > I'm still working through the rest of this series though - is it because you
> > > end up storing some kind of value in the upper bits of the PTE marker?
> > 
> > Nop. I'm just afraid the caller could overlook the fact that it's a bitmask,
> > then there can be code like:
> > 
> >   if (is_pte_marker_uffd_wp(*ptep) && drop_uffd_wp)
> >       pte_clear(ptep)
> > 
> > While we should only do:
> > 
> >   if (is_pte_marker_uffd_wp(*ptep) && drop_uffd_wp)
> >       // remove uffd-wp bit in the pte_marker, keep the reset bitmask
> 
> I'm not sure how having the helper function prevents or changes this though? In
> fact I just noticed this in patch 8:
> 
>                              if (uffd_wp_resolve &&
>                                     (pte_marker_get(entry) & PTE_MARKER_UFFD_WP)) {
>                                         pte_clear(vma->vm_mm, addr, pte);
>                                         pages++;
>                                 }
> 
> And if I'm understanding your point correctly isn't that wrong because if there
> were other users of pte markers they would inadvertently get cleared? Unless of
> course I've missed something - I haven't looked at patch 8 yet for context. To
> help with the above situation I think you would need a helper for clearing
> ptes.

What I wanted to say is pte_marker_get() will make sure the caller will be
aware of the fact that the marker is a bitmask.  But it's true at least my
example might make it even more confusing..

> 
> > I could be worrying too much, there's no real user of it.  If you prefer the
> > helper a lot I can add it in the new version.  Thanks,
> 
> It's not a massive issue, but I do think either defining a helper or open
> coding the bit check is clearer. I think we can worry about other users if/when
> they appear.

OK, I'll add it.  Thanks,
diff mbox series

Patch

diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
index 8e1e6244a89d..f39cad20ffc6 100644
--- a/include/asm-generic/hugetlb.h
+++ b/include/asm-generic/hugetlb.h
@@ -2,6 +2,9 @@ 
 #ifndef _ASM_GENERIC_HUGETLB_H
 #define _ASM_GENERIC_HUGETLB_H
 
+#include <linux/swap.h>
+#include <linux/swapops.h>
+
 static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
 {
 	return mk_pte(page, pgprot);
@@ -80,6 +83,12 @@  static inline int huge_pte_none(pte_t pte)
 }
 #endif
 
+/* Please refer to comments above pte_none_mostly() for the usage */
+static inline int huge_pte_none_mostly(pte_t pte)
+{
+	return huge_pte_none(pte) || is_pte_marker(pte);
+}
+
 #ifndef __HAVE_ARCH_HUGE_PTE_WRPROTECT
 static inline pte_t huge_pte_wrprotect(pte_t pte)
 {
diff --git a/include/linux/swap.h b/include/linux/swap.h
index d1ea44b31f19..cc9adcfd666f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -55,6 +55,19 @@  static inline int current_is_kswapd(void)
  * actions on faults.
  */
 
+/*
+ * PTE markers are used to persist information onto PTEs that are mapped with
+ * file-backed memories.  As its name "PTE" hints, it should only be applied to
+ * the leaves of pgtables.
+ */
+#ifdef CONFIG_PTE_MARKER
+#define SWP_PTE_MARKER_NUM 1
+#define SWP_PTE_MARKER     (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
+			    SWP_MIGRATION_NUM + SWP_DEVICE_NUM)
+#else
+#define SWP_PTE_MARKER_NUM 0
+#endif
+
 /*
  * Unaddressable device memory support. See include/linux/hmm.h and
  * Documentation/vm/hmm.rst. Short description is we need struct pages for
@@ -100,7 +113,7 @@  static inline int current_is_kswapd(void)
 
 #define MAX_SWAPFILES \
 	((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
-	SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
+	SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - SWP_PTE_MARKER_NUM)
 
 /*
  * Magic header for a swap area. The first part of the union is
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index d356ab4047f7..5103d2a4ae38 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -247,6 +247,84 @@  static inline int is_writable_migration_entry(swp_entry_t entry)
 
 #endif
 
+typedef unsigned long pte_marker;
+
+#define  PTE_MARKER_MASK     (0)
+
+#ifdef CONFIG_PTE_MARKER
+
+static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
+{
+	return swp_entry(SWP_PTE_MARKER, marker);
+}
+
+static inline bool is_pte_marker_entry(swp_entry_t entry)
+{
+	return swp_type(entry) == SWP_PTE_MARKER;
+}
+
+static inline pte_marker pte_marker_get(swp_entry_t entry)
+{
+	return swp_offset(entry) & PTE_MARKER_MASK;
+}
+
+static inline bool is_pte_marker(pte_t pte)
+{
+	return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte));
+}
+
+#else /* CONFIG_PTE_MARKER */
+
+static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
+{
+	/* This should never be called if !CONFIG_PTE_MARKER */
+	WARN_ON_ONCE(1);
+	return swp_entry(0, 0);
+}
+
+static inline bool is_pte_marker_entry(swp_entry_t entry)
+{
+	return false;
+}
+
+static inline pte_marker pte_marker_get(swp_entry_t entry)
+{
+	return 0;
+}
+
+static inline bool is_pte_marker(pte_t pte)
+{
+	return false;
+}
+
+#endif /* CONFIG_PTE_MARKER */
+
+static inline pte_t make_pte_marker(pte_marker marker)
+{
+	return swp_entry_to_pte(make_pte_marker_entry(marker));
+}
+
+/*
+ * This is a special version to check pte_none() just to cover the case when
+ * the pte is a pte marker.  It existed because in many cases the pte marker
+ * should be seen as a none pte; it's just that we have stored some information
+ * onto the none pte so it becomes not-none any more.
+ *
+ * It should be used when the pte is file-backed, ram-based and backing
+ * userspace pages, like shmem.  It is not needed upon pgtables that do not
+ * support pte markers at all.  For example, it's not needed on anonymous
+ * memory, kernel-only memory (including when the system is during-boot),
+ * non-ram based generic file-system.  It's fine to be used even there, but the
+ * extra pte marker check will be pure overhead.
+ *
+ * For systems configured with !CONFIG_PTE_MARKER this will be automatically
+ * optimized to pte_none().
+ */
+static inline int pte_none_mostly(pte_t pte)
+{
+	return pte_none(pte) || is_pte_marker(pte);
+}
+
 static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry)
 {
 	struct page *p = pfn_to_page(swp_offset(entry));
diff --git a/mm/Kconfig b/mm/Kconfig
index 068ce591a13a..66f23c6c2032 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -897,6 +897,13 @@  config IO_MAPPING
 config SECRETMEM
 	def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED
 
+config PTE_MARKER
+	def_bool n
+	bool "Marker PTEs support"
+
+	help
+	  Allows to create marker PTEs for file-backed memory.
+
 source "mm/damon/Kconfig"
 
 endmenu