diff mbox

[v2,07/11] mm: add find_get_entries_tag()

Message ID 1447459610-14259-8-git-send-email-ross.zwisler@linux.intel.com (mailing list archive)
State Changes Requested
Headers show

Commit Message

Ross Zwisler Nov. 14, 2015, 12:06 a.m. UTC
Add find_get_entries_tag() to the family of functions that include
find_get_entries(), find_get_pages() and find_get_pages_tag().  This is
needed for DAX dirty page handling because we need a list of both page
offsets and radix tree entries ('indices' and 'entries' in this function)
that are marked with the PAGECACHE_TAG_TOWRITE tag.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
---
 include/linux/pagemap.h |  3 +++
 mm/filemap.c            | 61 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)

Comments

Dave Chinner Nov. 16, 2015, 10:42 p.m. UTC | #1
On Fri, Nov 13, 2015 at 05:06:46PM -0700, Ross Zwisler wrote:
> Add find_get_entries_tag() to the family of functions that include
> find_get_entries(), find_get_pages() and find_get_pages_tag().  This is
> needed for DAX dirty page handling because we need a list of both page
> offsets and radix tree entries ('indices' and 'entries' in this function)
> that are marked with the PAGECACHE_TAG_TOWRITE tag.
> 
> Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> ---
>  include/linux/pagemap.h |  3 +++
>  mm/filemap.c            | 61 +++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 64 insertions(+)
> 
> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> index a6c78e0..6fea3be 100644
> --- a/include/linux/pagemap.h
> +++ b/include/linux/pagemap.h
> @@ -354,6 +354,9 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
>  			       unsigned int nr_pages, struct page **pages);
>  unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
>  			int tag, unsigned int nr_pages, struct page **pages);
> +unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
> +			int tag, unsigned int nr_entries,
> +			struct page **entries, pgoff_t *indices);
>  
>  struct page *grab_cache_page_write_begin(struct address_space *mapping,
>  			pgoff_t index, unsigned flags);
> diff --git a/mm/filemap.c b/mm/filemap.c
> index d5e94fd..89ab448 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -1454,6 +1454,67 @@ repeat:
>  }
>  EXPORT_SYMBOL(find_get_pages_tag);
>  
> +/**
> + * find_get_entries_tag - find and return entries that match @tag
> + * @mapping:	the address_space to search
> + * @start:	the starting page cache index
> + * @tag:	the tag index
> + * @nr_entries:	the maximum number of entries
> + * @entries:	where the resulting entries are placed
> + * @indices:	the cache indices corresponding to the entries in @entries
> + *
> + * Like find_get_entries, except we only return entries which are tagged with
> + * @tag.
> + */
> +unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
> +			int tag, unsigned int nr_entries,
> +			struct page **entries, pgoff_t *indices)
> +{
> +	void **slot;
> +	unsigned int ret = 0;
> +	struct radix_tree_iter iter;
> +
> +	if (!nr_entries)
> +		return 0;
> +
> +	rcu_read_lock();
> +restart:
> +	radix_tree_for_each_tagged(slot, &mapping->page_tree,
> +				   &iter, start, tag) {
> +		struct page *page;
> +repeat:
> +		page = radix_tree_deref_slot(slot);
> +		if (unlikely(!page))
> +			continue;
> +		if (radix_tree_exception(page)) {
> +			if (radix_tree_deref_retry(page))
> +				goto restart;

That restart condition looks wrong. ret can be non-zero, but we
start looking from the original start index again, resulting in
duplicates being added to the return arrays...

Cheers,

Dave.
Ross Zwisler Nov. 17, 2015, 6:08 p.m. UTC | #2
On Tue, Nov 17, 2015 at 09:42:22AM +1100, Dave Chinner wrote:
> On Fri, Nov 13, 2015 at 05:06:46PM -0700, Ross Zwisler wrote:
> > Add find_get_entries_tag() to the family of functions that include
> > find_get_entries(), find_get_pages() and find_get_pages_tag().  This is
> > needed for DAX dirty page handling because we need a list of both page
> > offsets and radix tree entries ('indices' and 'entries' in this function)
> > that are marked with the PAGECACHE_TAG_TOWRITE tag.
> > 
> > Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> > ---
> >  include/linux/pagemap.h |  3 +++
> >  mm/filemap.c            | 61 +++++++++++++++++++++++++++++++++++++++++++++++++
> >  2 files changed, 64 insertions(+)
> > 
> > diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
> > index a6c78e0..6fea3be 100644
> > --- a/include/linux/pagemap.h
> > +++ b/include/linux/pagemap.h
> > @@ -354,6 +354,9 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
> >  			       unsigned int nr_pages, struct page **pages);
> >  unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
> >  			int tag, unsigned int nr_pages, struct page **pages);
> > +unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
> > +			int tag, unsigned int nr_entries,
> > +			struct page **entries, pgoff_t *indices);
> >  
> >  struct page *grab_cache_page_write_begin(struct address_space *mapping,
> >  			pgoff_t index, unsigned flags);
> > diff --git a/mm/filemap.c b/mm/filemap.c
> > index d5e94fd..89ab448 100644
> > --- a/mm/filemap.c
> > +++ b/mm/filemap.c
> > @@ -1454,6 +1454,67 @@ repeat:
> >  }
> >  EXPORT_SYMBOL(find_get_pages_tag);
> >  
> > +/**
> > + * find_get_entries_tag - find and return entries that match @tag
> > + * @mapping:	the address_space to search
> > + * @start:	the starting page cache index
> > + * @tag:	the tag index
> > + * @nr_entries:	the maximum number of entries
> > + * @entries:	where the resulting entries are placed
> > + * @indices:	the cache indices corresponding to the entries in @entries
> > + *
> > + * Like find_get_entries, except we only return entries which are tagged with
> > + * @tag.
> > + */
> > +unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
> > +			int tag, unsigned int nr_entries,
> > +			struct page **entries, pgoff_t *indices)
> > +{
> > +	void **slot;
> > +	unsigned int ret = 0;
> > +	struct radix_tree_iter iter;
> > +
> > +	if (!nr_entries)
> > +		return 0;
> > +
> > +	rcu_read_lock();
> > +restart:
> > +	radix_tree_for_each_tagged(slot, &mapping->page_tree,
> > +				   &iter, start, tag) {
> > +		struct page *page;
> > +repeat:
> > +		page = radix_tree_deref_slot(slot);
> > +		if (unlikely(!page))
> > +			continue;
> > +		if (radix_tree_exception(page)) {
> > +			if (radix_tree_deref_retry(page))
> > +				goto restart;
> 
> That restart condition looks wrong. ret can be non-zero, but we
> start looking from the original start index again, resulting in
> duplicates being added to the return arrays...

This same restart logic is used in all the functions in this family:
find_get_entry() (though the tag is "repeat"), find_get_entries(),
find_get_pages(), find_get_pages_contig() and find_get_pages_tag().

Most don't have it well commented, but there is a good comment in
find_get_pages():

	if (radix_tree_exception(page)) {                               
		if (radix_tree_deref_retry(page)) {                     
			/*                                              
			 * Transient condition which can only trigger   
			 * when entry at index 0 moves out of or back   
			 * to root: none yet gotten, safe to restart.   
			 */                                             
			WARN_ON(iter.index);                            
			goto restart;                                   
		}   

I think the logic is correct, but I'm happy to add this comment in
find_get_entries_tag() if it would make things clearer.
diff mbox

Patch

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index a6c78e0..6fea3be 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -354,6 +354,9 @@  unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
 			       unsigned int nr_pages, struct page **pages);
 unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
 			int tag, unsigned int nr_pages, struct page **pages);
+unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
+			int tag, unsigned int nr_entries,
+			struct page **entries, pgoff_t *indices);
 
 struct page *grab_cache_page_write_begin(struct address_space *mapping,
 			pgoff_t index, unsigned flags);
diff --git a/mm/filemap.c b/mm/filemap.c
index d5e94fd..89ab448 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1454,6 +1454,67 @@  repeat:
 }
 EXPORT_SYMBOL(find_get_pages_tag);
 
+/**
+ * find_get_entries_tag - find and return entries that match @tag
+ * @mapping:	the address_space to search
+ * @start:	the starting page cache index
+ * @tag:	the tag index
+ * @nr_entries:	the maximum number of entries
+ * @entries:	where the resulting entries are placed
+ * @indices:	the cache indices corresponding to the entries in @entries
+ *
+ * Like find_get_entries, except we only return entries which are tagged with
+ * @tag.
+ */
+unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
+			int tag, unsigned int nr_entries,
+			struct page **entries, pgoff_t *indices)
+{
+	void **slot;
+	unsigned int ret = 0;
+	struct radix_tree_iter iter;
+
+	if (!nr_entries)
+		return 0;
+
+	rcu_read_lock();
+restart:
+	radix_tree_for_each_tagged(slot, &mapping->page_tree,
+				   &iter, start, tag) {
+		struct page *page;
+repeat:
+		page = radix_tree_deref_slot(slot);
+		if (unlikely(!page))
+			continue;
+		if (radix_tree_exception(page)) {
+			if (radix_tree_deref_retry(page))
+				goto restart;
+			/*
+			 * A shadow entry of a recently evicted page, a swap
+			 * entry from shmem/tmpfs or a DAX entry.  Return it
+			 * without attempting to raise page count.
+			 */
+			goto export;
+		}
+		if (!page_cache_get_speculative(page))
+			goto repeat;
+
+		/* Has the page moved? */
+		if (unlikely(page != *slot)) {
+			page_cache_release(page);
+			goto repeat;
+		}
+export:
+		indices[ret] = iter.index;
+		entries[ret] = page;
+		if (++ret == nr_entries)
+			break;
+	}
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL(find_get_entries_tag);
+
 /*
  * CD/DVDs are error prone. When a medium error occurs, the driver may fail
  * a _large_ part of the i/o request. Imagine the worst scenario: