diff mbox series

[v4,04/12] mm/shmem: Support memfile_notifier

Message ID 20220118132121.31388-5-chao.p.peng@linux.intel.com (mailing list archive)
State New
Headers show
Series KVM: mm: fd-based approach for supporting KVM guest private memory | expand

Commit Message

Chao Peng Jan. 18, 2022, 1:21 p.m. UTC
It maintains a memfile_notifier list in shmem_inode_info structure and
implements memfile_pfn_ops callbacks defined by memfile_notifier. It
then exposes them to memfile_notifier via
shmem_get_memfile_notifier_info.

We use SGP_NOALLOC in shmem_get_lock_pfn since the pages should be
allocated by userspace for private memory. If there is no pages
allocated at the offset then error should be returned so KVM knows that
the memory is not private memory.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
---
 include/linux/shmem_fs.h |  4 ++
 mm/memfile_notifier.c    | 12 +++++-
 mm/shmem.c               | 81 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 96 insertions(+), 1 deletion(-)

Comments

Mike Rapoport Feb. 8, 2022, 6:29 p.m. UTC | #1
Hi,

On Tue, Jan 18, 2022 at 09:21:13PM +0800, Chao Peng wrote:
> It maintains a memfile_notifier list in shmem_inode_info structure and
> implements memfile_pfn_ops callbacks defined by memfile_notifier. It
> then exposes them to memfile_notifier via
> shmem_get_memfile_notifier_info.
> 
> We use SGP_NOALLOC in shmem_get_lock_pfn since the pages should be
> allocated by userspace for private memory. If there is no pages
> allocated at the offset then error should be returned so KVM knows that
> the memory is not private memory.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> ---
>  include/linux/shmem_fs.h |  4 ++
>  mm/memfile_notifier.c    | 12 +++++-
>  mm/shmem.c               | 81 ++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 96 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
> index 166158b6e917..461633587eaf 100644
> --- a/include/linux/shmem_fs.h
> +++ b/include/linux/shmem_fs.h
> @@ -9,6 +9,7 @@
>  #include <linux/percpu_counter.h>
>  #include <linux/xattr.h>
>  #include <linux/fs_parser.h>
> +#include <linux/memfile_notifier.h>
>  
>  /* inode in-kernel data */
>  
> @@ -24,6 +25,9 @@ struct shmem_inode_info {
>  	struct shared_policy	policy;		/* NUMA memory alloc policy */
>  	struct simple_xattrs	xattrs;		/* list of xattrs */
>  	atomic_t		stop_eviction;	/* hold when working on inode */
> +#ifdef CONFIG_MEMFILE_NOTIFIER
> +	struct memfile_notifier_list memfile_notifiers;
> +#endif
>  	struct inode		vfs_inode;
>  };
>  
> diff --git a/mm/memfile_notifier.c b/mm/memfile_notifier.c
> index 8171d4601a04..b4699cbf629e 100644
> --- a/mm/memfile_notifier.c
> +++ b/mm/memfile_notifier.c
> @@ -41,11 +41,21 @@ void memfile_notifier_fallocate(struct memfile_notifier_list *list,
>  	srcu_read_unlock(&srcu, id);
>  }
>  
> +#ifdef CONFIG_SHMEM
> +extern int shmem_get_memfile_notifier_info(struct inode *inode,
> +					struct memfile_notifier_list **list,
> +					struct memfile_pfn_ops **ops);
> +#endif
> +
>  static int memfile_get_notifier_info(struct inode *inode,
>  				     struct memfile_notifier_list **list,
>  				     struct memfile_pfn_ops **ops)
>  {
> -	return -EOPNOTSUPP;
> +	int ret = -EOPNOTSUPP;
> +#ifdef CONFIG_SHMEM
> +	ret = shmem_get_memfile_notifier_info(inode, list, ops);
> +#endif

This looks backwards. Can we have some register method for memory backing
store and call it from shmem.c?

> +	return ret;
>  }
>  
>  int memfile_register_notifier(struct inode *inode,
Andy Lutomirski Feb. 11, 2022, 11:40 p.m. UTC | #2
On 1/18/22 05:21, Chao Peng wrote:
> It maintains a memfile_notifier list in shmem_inode_info structure and
> implements memfile_pfn_ops callbacks defined by memfile_notifier. It
> then exposes them to memfile_notifier via
> shmem_get_memfile_notifier_info.
> 
> We use SGP_NOALLOC in shmem_get_lock_pfn since the pages should be
> allocated by userspace for private memory. If there is no pages
> allocated at the offset then error should be returned so KVM knows that
> the memory is not private memory.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>

>   static int memfile_get_notifier_info(struct inode *inode,
>   				     struct memfile_notifier_list **list,
>   				     struct memfile_pfn_ops **ops)
>   {
> -	return -EOPNOTSUPP;
> +	int ret = -EOPNOTSUPP;
> +#ifdef CONFIG_SHMEM
> +	ret = shmem_get_memfile_notifier_info(inode, list, ops);
> +#endif
> +	return ret;
>   }

> +int shmem_get_memfile_notifier_info(struct inode *inode,
> +				    struct memfile_notifier_list **list,
> +				    struct memfile_pfn_ops **ops)
> +{
> +	struct shmem_inode_info *info;
> +
> +	if (!shmem_mapping(inode->i_mapping))
> +		return -EINVAL;
> +
> +	info = SHMEM_I(inode);
> +	*list = &info->memfile_notifiers;
> +	if (ops)
> +		*ops = &shmem_pfn_ops;
> +
> +	return 0;

I can't wrap my head around exactly who is supposed to call these 
functions and when, but there appears to be a missing check that the 
inode is actually a shmem inode.

What is this code trying to do?  It's very abstract.
Chao Peng Feb. 17, 2022, 1:10 p.m. UTC | #3
On Tue, Feb 08, 2022 at 08:29:56PM +0200, Mike Rapoport wrote:
> Hi,
> 
> On Tue, Jan 18, 2022 at 09:21:13PM +0800, Chao Peng wrote:
> > It maintains a memfile_notifier list in shmem_inode_info structure and
> > implements memfile_pfn_ops callbacks defined by memfile_notifier. It
> > then exposes them to memfile_notifier via
> > shmem_get_memfile_notifier_info.
> > 
> > We use SGP_NOALLOC in shmem_get_lock_pfn since the pages should be
> > allocated by userspace for private memory. If there is no pages
> > allocated at the offset then error should be returned so KVM knows that
> > the memory is not private memory.
> > 
> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> > ---
> >  include/linux/shmem_fs.h |  4 ++
> >  mm/memfile_notifier.c    | 12 +++++-
> >  mm/shmem.c               | 81 ++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 96 insertions(+), 1 deletion(-)
> > 
> > diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
> > index 166158b6e917..461633587eaf 100644
> > --- a/include/linux/shmem_fs.h
> > +++ b/include/linux/shmem_fs.h
> > @@ -9,6 +9,7 @@
> >  #include <linux/percpu_counter.h>
> >  #include <linux/xattr.h>
> >  #include <linux/fs_parser.h>
> > +#include <linux/memfile_notifier.h>
> >  
> >  /* inode in-kernel data */
> >  
> > @@ -24,6 +25,9 @@ struct shmem_inode_info {
> >  	struct shared_policy	policy;		/* NUMA memory alloc policy */
> >  	struct simple_xattrs	xattrs;		/* list of xattrs */
> >  	atomic_t		stop_eviction;	/* hold when working on inode */
> > +#ifdef CONFIG_MEMFILE_NOTIFIER
> > +	struct memfile_notifier_list memfile_notifiers;
> > +#endif
> >  	struct inode		vfs_inode;
> >  };
> >  
> > diff --git a/mm/memfile_notifier.c b/mm/memfile_notifier.c
> > index 8171d4601a04..b4699cbf629e 100644
> > --- a/mm/memfile_notifier.c
> > +++ b/mm/memfile_notifier.c
> > @@ -41,11 +41,21 @@ void memfile_notifier_fallocate(struct memfile_notifier_list *list,
> >  	srcu_read_unlock(&srcu, id);
> >  }
> >  
> > +#ifdef CONFIG_SHMEM
> > +extern int shmem_get_memfile_notifier_info(struct inode *inode,
> > +					struct memfile_notifier_list **list,
> > +					struct memfile_pfn_ops **ops);
> > +#endif
> > +
> >  static int memfile_get_notifier_info(struct inode *inode,
> >  				     struct memfile_notifier_list **list,
> >  				     struct memfile_pfn_ops **ops)
> >  {
> > -	return -EOPNOTSUPP;
> > +	int ret = -EOPNOTSUPP;
> > +#ifdef CONFIG_SHMEM
> > +	ret = shmem_get_memfile_notifier_info(inode, list, ops);
> > +#endif
> 
> This looks backwards. Can we have some register method for memory backing
> store and call it from shmem.c?

Agreed. That would be clearer.

Chao
> 
> > +	return ret;
> >  }
> >  
> >  int memfile_register_notifier(struct inode *inode,
> 
> -- 
> Sincerely yours,
> Mike.
Chao Peng Feb. 17, 2022, 1:23 p.m. UTC | #4
On Fri, Feb 11, 2022 at 03:40:09PM -0800, Andy Lutomirski wrote:
> On 1/18/22 05:21, Chao Peng wrote:
> > It maintains a memfile_notifier list in shmem_inode_info structure and
> > implements memfile_pfn_ops callbacks defined by memfile_notifier. It
> > then exposes them to memfile_notifier via
> > shmem_get_memfile_notifier_info.
> > 
> > We use SGP_NOALLOC in shmem_get_lock_pfn since the pages should be
> > allocated by userspace for private memory. If there is no pages
> > allocated at the offset then error should be returned so KVM knows that
> > the memory is not private memory.
> > 
> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
> 
> >   static int memfile_get_notifier_info(struct inode *inode,
> >   				     struct memfile_notifier_list **list,
> >   				     struct memfile_pfn_ops **ops)
> >   {
> > -	return -EOPNOTSUPP;
> > +	int ret = -EOPNOTSUPP;
> > +#ifdef CONFIG_SHMEM
> > +	ret = shmem_get_memfile_notifier_info(inode, list, ops);
> > +#endif
> > +	return ret;
> >   }
> 
> > +int shmem_get_memfile_notifier_info(struct inode *inode,
> > +				    struct memfile_notifier_list **list,
> > +				    struct memfile_pfn_ops **ops)
> > +{
> > +	struct shmem_inode_info *info;
> > +
> > +	if (!shmem_mapping(inode->i_mapping))
> > +		return -EINVAL;
> > +
> > +	info = SHMEM_I(inode);
> > +	*list = &info->memfile_notifiers;
> > +	if (ops)
> > +		*ops = &shmem_pfn_ops;
> > +
> > +	return 0;
> 
> I can't wrap my head around exactly who is supposed to call these functions
> and when, but there appears to be a missing check that the inode is actually
> a shmem inode.
> 
> What is this code trying to do?  It's very abstract.

This is to be called by memfile_(un)register_notifier in patch-03 to
allow shmem to be connected to memfile_notifer. But as Mike pointed out,
probably introducing a memfile_notifier_register_backing_store() sounds
better so backing store (e.g. shmem) can register itself to
memfile_notifier.

Chao
diff mbox series

Patch

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 166158b6e917..461633587eaf 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -9,6 +9,7 @@ 
 #include <linux/percpu_counter.h>
 #include <linux/xattr.h>
 #include <linux/fs_parser.h>
+#include <linux/memfile_notifier.h>
 
 /* inode in-kernel data */
 
@@ -24,6 +25,9 @@  struct shmem_inode_info {
 	struct shared_policy	policy;		/* NUMA memory alloc policy */
 	struct simple_xattrs	xattrs;		/* list of xattrs */
 	atomic_t		stop_eviction;	/* hold when working on inode */
+#ifdef CONFIG_MEMFILE_NOTIFIER
+	struct memfile_notifier_list memfile_notifiers;
+#endif
 	struct inode		vfs_inode;
 };
 
diff --git a/mm/memfile_notifier.c b/mm/memfile_notifier.c
index 8171d4601a04..b4699cbf629e 100644
--- a/mm/memfile_notifier.c
+++ b/mm/memfile_notifier.c
@@ -41,11 +41,21 @@  void memfile_notifier_fallocate(struct memfile_notifier_list *list,
 	srcu_read_unlock(&srcu, id);
 }
 
+#ifdef CONFIG_SHMEM
+extern int shmem_get_memfile_notifier_info(struct inode *inode,
+					struct memfile_notifier_list **list,
+					struct memfile_pfn_ops **ops);
+#endif
+
 static int memfile_get_notifier_info(struct inode *inode,
 				     struct memfile_notifier_list **list,
 				     struct memfile_pfn_ops **ops)
 {
-	return -EOPNOTSUPP;
+	int ret = -EOPNOTSUPP;
+#ifdef CONFIG_SHMEM
+	ret = shmem_get_memfile_notifier_info(inode, list, ops);
+#endif
+	return ret;
 }
 
 int memfile_register_notifier(struct inode *inode,
diff --git a/mm/shmem.c b/mm/shmem.c
index 72185630e7c4..00af869d26ce 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -906,6 +906,28 @@  static bool shmem_punch_compound(struct page *page, pgoff_t start, pgoff_t end)
 	return split_huge_page(page) >= 0;
 }
 
+static void notify_fallocate(struct inode *inode, pgoff_t start, pgoff_t end)
+{
+#ifdef CONFIG_MEMFILE_NOTIFIER
+	struct shmem_inode_info *info = SHMEM_I(inode);
+
+	memfile_notifier_fallocate(&info->memfile_notifiers, start, end);
+#endif
+}
+
+static void notify_invalidate_page(struct inode *inode, struct page *page,
+				   pgoff_t start, pgoff_t end)
+{
+#ifdef CONFIG_MEMFILE_NOTIFIER
+	struct shmem_inode_info *info = SHMEM_I(inode);
+
+	start = max(start, page->index);
+	end = min(end, page->index + thp_nr_pages(page));
+
+	memfile_notifier_invalidate(&info->memfile_notifiers, start, end);
+#endif
+}
+
 /*
  * Remove range of pages and swap entries from page cache, and free them.
  * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
@@ -949,6 +971,8 @@  static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 			}
 			index += thp_nr_pages(page) - 1;
 
+			notify_invalidate_page(inode, page, start, end);
+
 			if (!unfalloc || !PageUptodate(page))
 				truncate_inode_page(mapping, page);
 			unlock_page(page);
@@ -1025,6 +1049,9 @@  static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 					index--;
 					break;
 				}
+
+				notify_invalidate_page(inode, page, start, end);
+
 				VM_BUG_ON_PAGE(PageWriteback(page), page);
 				if (shmem_punch_compound(page, start, end))
 					truncate_inode_page(mapping, page);
@@ -2313,6 +2340,9 @@  static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
 		info->flags = flags & VM_NORESERVE;
 		INIT_LIST_HEAD(&info->shrinklist);
 		INIT_LIST_HEAD(&info->swaplist);
+#ifdef CONFIG_MEMFILE_NOTIFIER
+		memfile_notifier_list_init(&info->memfile_notifiers);
+#endif
 		simple_xattrs_init(&info->xattrs);
 		cache_no_acl(inode);
 		mapping_set_large_folios(inode->i_mapping);
@@ -2818,6 +2848,7 @@  static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 	if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
 		i_size_write(inode, offset + len);
 	inode->i_ctime = current_time(inode);
+	notify_fallocate(inode, start, end);
 undone:
 	spin_lock(&inode->i_lock);
 	inode->i_private = NULL;
@@ -4002,6 +4033,56 @@  struct kobj_attribute shmem_enabled_attr =
 	__ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */
 
+#ifdef CONFIG_MEMFILE_NOTIFIER
+static long shmem_get_lock_pfn(struct inode *inode, pgoff_t offset, int *order)
+{
+	struct page *page;
+	int ret;
+
+	ret = shmem_getpage(inode, offset, &page, SGP_NOALLOC);
+	if (ret)
+		return ret;
+
+	*order = thp_order(compound_head(page));
+
+	return page_to_pfn(page);
+}
+
+static void shmem_put_unlock_pfn(unsigned long pfn)
+{
+	struct page *page = pfn_to_page(pfn);
+
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
+
+	set_page_dirty(page);
+	unlock_page(page);
+	put_page(page);
+}
+
+static struct memfile_pfn_ops shmem_pfn_ops = {
+	.get_lock_pfn = shmem_get_lock_pfn,
+	.put_unlock_pfn = shmem_put_unlock_pfn,
+};
+
+int shmem_get_memfile_notifier_info(struct inode *inode,
+				    struct memfile_notifier_list **list,
+				    struct memfile_pfn_ops **ops)
+{
+	struct shmem_inode_info *info;
+
+	if (!shmem_mapping(inode->i_mapping))
+		return -EINVAL;
+
+	info = SHMEM_I(inode);
+	*list = &info->memfile_notifiers;
+	if (ops)
+		*ops = &shmem_pfn_ops;
+
+	return 0;
+}
+
+#endif /* CONFIG_MEMFILE_NOTIFIER */
+
 #else /* !CONFIG_SHMEM */
 
 /*