diff mbox series

[RFC,2/3] mm: provide rmap_wrprotect_file_page() function

Message ID 701a67692d5bf9c8424cdbda103c988bbb278e38.1736352361.git.lorenzo.stoakes@oracle.com (mailing list archive)
State Awaiting Upstream
Headers show
Series expose mapping wrprotect, fix fb_defio use | expand

Commit Message

Lorenzo Stoakes Jan. 8, 2025, 4:18 p.m. UTC
in the fb_defio video driver, page dirty state is used to determine when
frame buffer pages have been changed, allowing for batched, deferred I/O to
be performed for efficiency.

This implementation had only one means of doing so effectively - the use of
the folio_mkclean() function.

However, this use of the function is inappropriate, as the fb_defio
implementation allocates kernel memory to back the framebuffer, and then is
forced to specified page->index, mapping fields in order to permit the
folio_mkclean() rmap traversal to proceed correctly.

It is not correct to specify these fields on kernel-allocated memory, and
moreover since these are not folios, page->index, mapping are deprecated
fields, soon to be removed.

We therefore need to provide a means by which we can correctly traverse the
reverse mapping and write-protect mappings for a page backing an
address_space page cache object at a given offset.

This patch provides this - rmap_wrprotect_file_page() allows for this
operation to be performed for a specified address_space, offset and PFN,
without requiring a folio nor, of course, an inappropriate use of
page->index, mapping.

With this provided, we can subequently adjust the fb_defio implementation
to make use of this function and avoid incorrect invocation of
folio_mkclean() and more importantly, incorrect manipulation of
page->index, mapping fields.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
 include/linux/rmap.h | 20 ++++++++++++++++
 mm/rmap.c            | 54 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+)

Comments

Matthew Wilcox Jan. 8, 2025, 5:25 p.m. UTC | #1
On Wed, Jan 08, 2025 at 04:18:41PM +0000, Lorenzo Stoakes wrote:
> +++ b/include/linux/rmap.h
> @@ -754,6 +754,26 @@ unsigned long page_address_in_vma(const struct folio *folio,
>   */
>  int folio_mkclean(struct folio *);
>  
> +/**

The kerneldoc comment should be with the implementation, not the
prototype.

> + * rmap_wrprotect_file_page() - Traverses the reverse mapping, finding all VMAs
> + * which contain a shared mapping of the single page at PFN @pfn in @mapping at
> + * offset @pgoff and write-protecting the mappings.

After the '-' should come a _short_ description ... maybe "Write protect
all mappings of this page".

> + * The PFN mapped does not have to be a folio, but rather can be a kernel
> + * allocation that is mapped into userland. We therefore do not require that the
> + * PFN maps to a folio with a valid mapping or index field, rather these are
> + * specified in @mapping and @pgoff.
> + *
> + * @mapping:	The mapping whose reverse mapping should be traversed.
> + * @pgoff:	The page offset at which @pfn is mapped within @mapping.
> + * @nr_pages:	The number of physically contiguous base pages spanned.
> + * @pfn:	The PFN of the memory mapped in @mapping at @pgoff.

The description of the params comes between the short and full
description of the function.

> + * Return the number of write-protected PTEs, or an error.

colon after Return: so it becomes a section.

> +int rmap_wrprotect_file_page(struct address_space *mapping, pgoff_t pgoff,
> +		unsigned long nr_pages, unsigned long pfn)
> +{
> +	struct wrprotect_file_state state = {
> +		.cleaned = 0,
> +		.pgoff = pgoff,
> +		.pfn = pfn,
> +		.nr_pages = nr_pages,
> +	};
> +	struct rmap_walk_control rwc = {
> +		.arg = (void *)&state,
> +		.rmap_one = rmap_wrprotect_file_one,
> +		.invalid_vma = invalid_mkclean_vma,
> +	};
> +
> +	if (!mapping)
> +		return 0;

Should it be valid to pass in NULL?
Lorenzo Stoakes Jan. 8, 2025, 7:35 p.m. UTC | #2
On Wed, Jan 08, 2025 at 05:25:01PM +0000, Matthew Wilcox wrote:
> On Wed, Jan 08, 2025 at 04:18:41PM +0000, Lorenzo Stoakes wrote:
> > +++ b/include/linux/rmap.h
> > @@ -754,6 +754,26 @@ unsigned long page_address_in_vma(const struct folio *folio,
> >   */
> >  int folio_mkclean(struct folio *);
> >
> > +/**
>
> The kerneldoc comment should be with the implementation, not the
> prototype.
>
> > + * rmap_wrprotect_file_page() - Traverses the reverse mapping, finding all VMAs
> > + * which contain a shared mapping of the single page at PFN @pfn in @mapping at
> > + * offset @pgoff and write-protecting the mappings.
>
> After the '-' should come a _short_ description ... maybe "Write protect
> all mappings of this page".

As you _well_ know Matthew, brevity is not my strong suite ;)

But sure, will cut this down to size...

>
> > + * The PFN mapped does not have to be a folio, but rather can be a kernel
> > + * allocation that is mapped into userland. We therefore do not require that the
> > + * PFN maps to a folio with a valid mapping or index field, rather these are
> > + * specified in @mapping and @pgoff.
> > + *
> > + * @mapping:	The mapping whose reverse mapping should be traversed.
> > + * @pgoff:	The page offset at which @pfn is mapped within @mapping.
> > + * @nr_pages:	The number of physically contiguous base pages spanned.
> > + * @pfn:	The PFN of the memory mapped in @mapping at @pgoff.
>
> The description of the params comes between the short and full
> description of the function.

Ack

>
> > + * Return the number of write-protected PTEs, or an error.
>
> colon after Return: so it becomes a section.

Ack will do

>
> > +int rmap_wrprotect_file_page(struct address_space *mapping, pgoff_t pgoff,
> > +		unsigned long nr_pages, unsigned long pfn)
> > +{
> > +	struct wrprotect_file_state state = {
> > +		.cleaned = 0,
> > +		.pgoff = pgoff,
> > +		.pfn = pfn,
> > +		.nr_pages = nr_pages,
> > +	};
> > +	struct rmap_walk_control rwc = {
> > +		.arg = (void *)&state,
> > +		.rmap_one = rmap_wrprotect_file_one,
> > +		.invalid_vma = invalid_mkclean_vma,
> > +	};
> > +
> > +	if (!mapping)
> > +		return 0;
>
> Should it be valid to pass in NULL?
>

I think it's ok for it to be, as in that case it's valid to say 'ok we
write-protected everything mapped by mapping - which was nothing'.

It's a bit blurry though.
diff mbox series

Patch

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 4509a43fe59f..9d80b09e58ae 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -754,6 +754,26 @@  unsigned long page_address_in_vma(const struct folio *folio,
  */
 int folio_mkclean(struct folio *);
 
+/**
+ * rmap_wrprotect_file_page() - Traverses the reverse mapping, finding all VMAs
+ * which contain a shared mapping of the single page at PFN @pfn in @mapping at
+ * offset @pgoff and write-protecting the mappings.
+ *
+ * The PFN mapped does not have to be a folio, but rather can be a kernel
+ * allocation that is mapped into userland. We therefore do not require that the
+ * PFN maps to a folio with a valid mapping or index field, rather these are
+ * specified in @mapping and @pgoff.
+ *
+ * @mapping:	The mapping whose reverse mapping should be traversed.
+ * @pgoff:	The page offset at which @pfn is mapped within @mapping.
+ * @nr_pages:	The number of physically contiguous base pages spanned.
+ * @pfn:	The PFN of the memory mapped in @mapping at @pgoff.
+ *
+ * Return the number of write-protected PTEs, or an error.
+ */
+int rmap_wrprotect_file_page(struct address_space *mapping, pgoff_t pgoff,
+			     unsigned long nr_pages, unsigned long pfn);
+
 int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
 		      struct vm_area_struct *vma);
 
diff --git a/mm/rmap.c b/mm/rmap.c
index effafdb44365..46474343116c 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1127,6 +1127,60 @@  int folio_mkclean(struct folio *folio)
 }
 EXPORT_SYMBOL_GPL(folio_mkclean);
 
+struct wrprotect_file_state {
+	int cleaned;
+	pgoff_t pgoff;
+	unsigned long pfn;
+	unsigned long nr_pages;
+};
+
+static bool rmap_wrprotect_file_one(struct folio *folio, struct vm_area_struct *vma,
+		unsigned long address, void *arg)
+{
+	struct wrprotect_file_state *state = (struct wrprotect_file_state *)arg;
+	struct page_vma_mapped_walk pvmw = {
+		.pfn		= state->pfn,
+		.nr_pages	= state->nr_pages,
+		.pgoff		= state->pgoff,
+		.vma		= vma,
+		.address	= address,
+		.flags		= PVMW_SYNC,
+	};
+
+	state->cleaned += page_vma_mkclean_one(&pvmw);
+
+	return true;
+}
+
+static void __rmap_walk_file(struct folio *folio, struct address_space *mapping,
+			     pgoff_t pgoff_start, unsigned long nr_pages,
+			     struct rmap_walk_control *rwc, bool locked);
+
+int rmap_wrprotect_file_page(struct address_space *mapping, pgoff_t pgoff,
+		unsigned long nr_pages, unsigned long pfn)
+{
+	struct wrprotect_file_state state = {
+		.cleaned = 0,
+		.pgoff = pgoff,
+		.pfn = pfn,
+		.nr_pages = nr_pages,
+	};
+	struct rmap_walk_control rwc = {
+		.arg = (void *)&state,
+		.rmap_one = rmap_wrprotect_file_one,
+		.invalid_vma = invalid_mkclean_vma,
+	};
+
+	if (!mapping)
+		return 0;
+
+	__rmap_walk_file(/* folio = */NULL, mapping, pgoff, nr_pages, &rwc,
+			 /* locked = */false);
+
+	return state.cleaned;
+}
+EXPORT_SYMBOL_GPL(rmap_wrprotect_file_page);
+
 /**
  * pfn_mkclean_range - Cleans the PTEs (including PMDs) mapped with range of
  *                     [@pfn, @pfn + @nr_pages) at the specific offset (@pgoff)