Message ID | 20220310140911.50924-3-chao.p.peng@linux.intel.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | KVM: mm: fd-based approach for supporting KVM guest private memory | expand |
On Thu, Mar 10, 2022, Chao Peng wrote: > diff --git a/mm/Makefile b/mm/Makefile > index 70d4309c9ce3..f628256dce0d 100644 > +void memfile_notifier_invalidate(struct memfile_notifier_list *list, > + pgoff_t start, pgoff_t end) > +{ > + struct memfile_notifier *notifier; > + int id; > + > + id = srcu_read_lock(&srcu); > + list_for_each_entry_srcu(notifier, &list->head, list, > + srcu_read_lock_held(&srcu)) { > + if (notifier->ops && notifier->ops->invalidate) Any reason notifier->ops isn't mandatory? > + notifier->ops->invalidate(notifier, start, end); > + } > + srcu_read_unlock(&srcu, id); > +} > + > +void memfile_notifier_fallocate(struct memfile_notifier_list *list, > + pgoff_t start, pgoff_t end) > +{ > + struct memfile_notifier *notifier; > + int id; > + > + id = srcu_read_lock(&srcu); > + list_for_each_entry_srcu(notifier, &list->head, list, > + srcu_read_lock_held(&srcu)) { > + if (notifier->ops && notifier->ops->fallocate) > + notifier->ops->fallocate(notifier, start, end); > + } > + srcu_read_unlock(&srcu, id); > +} > + > +void memfile_register_backing_store(struct memfile_backing_store *bs) > +{ > + BUG_ON(!bs || !bs->get_notifier_list); > + > + list_add_tail(&bs->list, &backing_store_list); > +} > + > +void memfile_unregister_backing_store(struct memfile_backing_store *bs) > +{ > + list_del(&bs->list); Allowing unregistration of a backing store is broken. Using the _safe() variant is not sufficient to guard against concurrent modification. I don't see any reason to support this out of the gate, the only reason to support unregistering a backing store is if the backing store is implemented as a module, and AFAIK none of the backing stores we plan on supporting initially support being built as a module. These aren't exported, so it's not like that's even possible. Registration would also be broken if modules are allowed, I'm pretty sure module init doesn't run under a global lock. We can always add this complexity if it's needed in the future, but for now the easiest thing would be to tag memfile_register_backing_store() with __init and make backing_store_list __ro_after_init. > +} > + > +static int memfile_get_notifier_info(struct inode *inode, > + struct memfile_notifier_list **list, > + struct memfile_pfn_ops **ops) > +{ > + struct memfile_backing_store *bs, *iter; > + struct memfile_notifier_list *tmp; > + > + list_for_each_entry_safe(bs, iter, &backing_store_list, list) { > + tmp = bs->get_notifier_list(inode); > + if (tmp) { > + *list = tmp; > + if (ops) > + *ops = &bs->pfn_ops; > + return 0; > + } > + } > + return -EOPNOTSUPP; > +} > + > +int memfile_register_notifier(struct inode *inode, Taking an inode is a bit odd from a user perspective. Any reason not to take a "struct file *" and get the inode here? That would give callers a hint that they need to hold a reference to the file for the lifetime of the registration. > + struct memfile_notifier *notifier, > + struct memfile_pfn_ops **pfn_ops) > +{ > + struct memfile_notifier_list *list; > + int ret; > + > + if (!inode || !notifier | !pfn_ops) Bitwise | instead of logical ||. But IMO taking in a pfn_ops pointer is silly. More below. > + return -EINVAL; > + > + ret = memfile_get_notifier_info(inode, &list, pfn_ops); > + if (ret) > + return ret; > + > + spin_lock(&list->lock); > + list_add_rcu(¬ifier->list, &list->head); > + spin_unlock(&list->lock); > + > + return 0; > +} > +EXPORT_SYMBOL_GPL(memfile_register_notifier); > + > +void memfile_unregister_notifier(struct inode *inode, > + struct memfile_notifier *notifier) > +{ > + struct memfile_notifier_list *list; > + > + if (!inode || !notifier) > + return; > + > + BUG_ON(memfile_get_notifier_info(inode, &list, NULL)); Eww. Rather than force the caller to provide the inode/file and the notifier, what about grabbing the backing store itself in the notifier? struct memfile_notifier { struct list_head list; struct memfile_notifier_ops *ops; struct memfile_backing_store *bs; }; That also helps avoid confusing between "ops" and "pfn_ops". IMO, exposing memfile_backing_store to the caller isn't a big deal, and is preferable to having to rewalk multiple lists just to delete a notifier. Then this can become: void memfile_unregister_notifier(struct memfile_notifier *notifier) { spin_lock(¬ifier->bs->list->lock); list_del_rcu(¬ifier->list); spin_unlock(¬ifier->bs->list->lock); synchronize_srcu(&srcu); } and registration can be: int memfile_register_notifier(const struct file *file, struct memfile_notifier *notifier) { struct memfile_notifier_list *list; struct memfile_backing_store *bs; int ret; if (!file || !notifier) return -EINVAL; list_for_each_entry(bs, &backing_store_list, list) { list = bs->get_notifier_list(file_inode(file)); if (list) { notifier->bs = bs; spin_lock(&list->lock); list_add_rcu(¬ifier->list, &list->head); spin_unlock(&list->lock); return 0; } } return -EOPNOTSUPP; }
On Tue, Mar 29, 2022 at 06:45:16PM +0000, Sean Christopherson wrote: > On Thu, Mar 10, 2022, Chao Peng wrote: > > diff --git a/mm/Makefile b/mm/Makefile > > index 70d4309c9ce3..f628256dce0d 100644 > > +void memfile_notifier_invalidate(struct memfile_notifier_list *list, > > + pgoff_t start, pgoff_t end) > > +{ > > + struct memfile_notifier *notifier; > > + int id; > > + > > + id = srcu_read_lock(&srcu); > > + list_for_each_entry_srcu(notifier, &list->head, list, > > + srcu_read_lock_held(&srcu)) { > > + if (notifier->ops && notifier->ops->invalidate) > > Any reason notifier->ops isn't mandatory? Yes it's mandatory, will skip the check here. > > > + notifier->ops->invalidate(notifier, start, end); > > + } > > + srcu_read_unlock(&srcu, id); > > +} > > + > > +void memfile_notifier_fallocate(struct memfile_notifier_list *list, > > + pgoff_t start, pgoff_t end) > > +{ > > + struct memfile_notifier *notifier; > > + int id; > > + > > + id = srcu_read_lock(&srcu); > > + list_for_each_entry_srcu(notifier, &list->head, list, > > + srcu_read_lock_held(&srcu)) { > > + if (notifier->ops && notifier->ops->fallocate) > > + notifier->ops->fallocate(notifier, start, end); > > + } > > + srcu_read_unlock(&srcu, id); > > +} > > + > > +void memfile_register_backing_store(struct memfile_backing_store *bs) > > +{ > > + BUG_ON(!bs || !bs->get_notifier_list); > > + > > + list_add_tail(&bs->list, &backing_store_list); > > +} > > + > > +void memfile_unregister_backing_store(struct memfile_backing_store *bs) > > +{ > > + list_del(&bs->list); > > Allowing unregistration of a backing store is broken. Using the _safe() variant > is not sufficient to guard against concurrent modification. I don't see any reason > to support this out of the gate, the only reason to support unregistering a backing > store is if the backing store is implemented as a module, and AFAIK none of the > backing stores we plan on supporting initially support being built as a module. > These aren't exported, so it's not like that's even possible. Registration would > also be broken if modules are allowed, I'm pretty sure module init doesn't run > under a global lock. > > We can always add this complexity if it's needed in the future, but for now the > easiest thing would be to tag memfile_register_backing_store() with __init and > make backing_store_list __ro_after_init. The only currently supported backing store shmem does not need this so can remove it for now. > > > +} > > + > > +static int memfile_get_notifier_info(struct inode *inode, > > + struct memfile_notifier_list **list, > > + struct memfile_pfn_ops **ops) > > +{ > > + struct memfile_backing_store *bs, *iter; > > + struct memfile_notifier_list *tmp; > > + > > + list_for_each_entry_safe(bs, iter, &backing_store_list, list) { > > + tmp = bs->get_notifier_list(inode); > > + if (tmp) { > > + *list = tmp; > > + if (ops) > > + *ops = &bs->pfn_ops; > > + return 0; > > + } > > + } > > + return -EOPNOTSUPP; > > +} > > + > > +int memfile_register_notifier(struct inode *inode, > > Taking an inode is a bit odd from a user perspective. Any reason not to take a > "struct file *" and get the inode here? That would give callers a hint that they > need to hold a reference to the file for the lifetime of the registration. Yes, I can change. > > > + struct memfile_notifier *notifier, > > + struct memfile_pfn_ops **pfn_ops) > > +{ > > + struct memfile_notifier_list *list; > > + int ret; > > + > > + if (!inode || !notifier | !pfn_ops) > > Bitwise | instead of logical ||. But IMO taking in a pfn_ops pointer is silly. > More below. > > > + return -EINVAL; > > + > > + ret = memfile_get_notifier_info(inode, &list, pfn_ops); > > + if (ret) > > + return ret; > > + > > + spin_lock(&list->lock); > > + list_add_rcu(¬ifier->list, &list->head); > > + spin_unlock(&list->lock); > > + > > + return 0; > > +} > > +EXPORT_SYMBOL_GPL(memfile_register_notifier); > > + > > +void memfile_unregister_notifier(struct inode *inode, > > + struct memfile_notifier *notifier) > > +{ > > + struct memfile_notifier_list *list; > > + > > + if (!inode || !notifier) > > + return; > > + > > + BUG_ON(memfile_get_notifier_info(inode, &list, NULL)); > > Eww. Rather than force the caller to provide the inode/file and the notifier, > what about grabbing the backing store itself in the notifier? > > struct memfile_notifier { > struct list_head list; > struct memfile_notifier_ops *ops; > > struct memfile_backing_store *bs; > }; > > That also helps avoid confusing between "ops" and "pfn_ops". IMO, exposing > memfile_backing_store to the caller isn't a big deal, and is preferable to having > to rewalk multiple lists just to delete a notifier. Agreed, good suggestion. > > Then this can become: > > void memfile_unregister_notifier(struct memfile_notifier *notifier) > { > spin_lock(¬ifier->bs->list->lock); > list_del_rcu(¬ifier->list); > spin_unlock(¬ifier->bs->list->lock); > > synchronize_srcu(&srcu); > } > > and registration can be: > > int memfile_register_notifier(const struct file *file, > struct memfile_notifier *notifier) > { > struct memfile_notifier_list *list; > struct memfile_backing_store *bs; > int ret; > > if (!file || !notifier) > return -EINVAL; > > list_for_each_entry(bs, &backing_store_list, list) { > list = bs->get_notifier_list(file_inode(file)); > if (list) { > notifier->bs = bs; > > spin_lock(&list->lock); > list_add_rcu(¬ifier->list, &list->head); > spin_unlock(&list->lock); > return 0; > } > } > > return -EOPNOTSUPP; > }
On Thu, 10 Mar 2022 22:09:00 +0800 Chao Peng wrote: > + > +void memfile_register_backing_store(struct memfile_backing_store *bs) > +{ > + BUG_ON(!bs || !bs->get_notifier_list); > + > + list_add_tail(&bs->list, &backing_store_list); > +} > + > +void memfile_unregister_backing_store(struct memfile_backing_store *bs) > +{ > + list_del(&bs->list); > +} > + > +static int memfile_get_notifier_info(struct inode *inode, Nit, s/get/lookup/ > + struct memfile_notifier_list **list, > + struct memfile_pfn_ops **ops) > +{ > + struct memfile_backing_store *bs, *iter; > + struct memfile_notifier_list *tmp; > + > + list_for_each_entry_safe(bs, iter, &backing_store_list, list) { Wonder what serializes list walk with list del and add above. > + tmp = bs->get_notifier_list(inode); > + if (tmp) { > + *list = tmp; > + if (ops) > + *ops = &bs->pfn_ops; > + return 0; > + } > + } > + return -EOPNOTSUPP; > +}
On Tue, Apr 12, 2022 at 10:36:54PM +0800, Hillf Danton wrote: > On Thu, 10 Mar 2022 22:09:00 +0800 Chao Peng wrote: > > + > > +void memfile_register_backing_store(struct memfile_backing_store *bs) > > +{ > > + BUG_ON(!bs || !bs->get_notifier_list); > > + > > + list_add_tail(&bs->list, &backing_store_list); > > +} > > + > > +void memfile_unregister_backing_store(struct memfile_backing_store *bs) > > +{ > > + list_del(&bs->list); > > +} > > + > > +static int memfile_get_notifier_info(struct inode *inode, > > Nit, s/get/lookup/ Thanks. > > > + struct memfile_notifier_list **list, > > + struct memfile_pfn_ops **ops) > > +{ > > + struct memfile_backing_store *bs, *iter; > > + struct memfile_notifier_list *tmp; > > + > > + list_for_each_entry_safe(bs, iter, &backing_store_list, list) { > > Wonder what serializes list walk with list del and add above. Yes, this needs locking if we want to support backing stores as modules, as Sean pointed out, at this time this is not quite meaningful so I will remove unregister also the register should be done at kernel init time so the walk here is actually on a readonly list. Thanks, Chao > > > + tmp = bs->get_notifier_list(inode); > > + if (tmp) { > > + *list = tmp; > > + if (ops) > > + *ops = &bs->pfn_ops; > > + return 0; > > + } > > + } > > + return -EOPNOTSUPP; > > +}
diff --git a/include/linux/memfile_notifier.h b/include/linux/memfile_notifier.h new file mode 100644 index 000000000000..e8d400558adb --- /dev/null +++ b/include/linux/memfile_notifier.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MEMFILE_NOTIFIER_H +#define _LINUX_MEMFILE_NOTIFIER_H + +#include <linux/rculist.h> +#include <linux/spinlock.h> +#include <linux/srcu.h> +#include <linux/fs.h> + +struct memfile_notifier; + +struct memfile_notifier_ops { + void (*invalidate)(struct memfile_notifier *notifier, + pgoff_t start, pgoff_t end); + void (*fallocate)(struct memfile_notifier *notifier, + pgoff_t start, pgoff_t end); +}; + +struct memfile_pfn_ops { + long (*get_lock_pfn)(struct inode *inode, pgoff_t offset, int *order); + void (*put_unlock_pfn)(unsigned long pfn); +}; + +struct memfile_notifier { + struct list_head list; + struct memfile_notifier_ops *ops; +}; + +struct memfile_notifier_list { + struct list_head head; + spinlock_t lock; +}; + +struct memfile_backing_store { + struct list_head list; + struct memfile_pfn_ops pfn_ops; + struct memfile_notifier_list* (*get_notifier_list)(struct inode *inode); +}; + +#ifdef CONFIG_MEMFILE_NOTIFIER +/* APIs for backing stores */ +static inline void memfile_notifier_list_init(struct memfile_notifier_list *list) +{ + INIT_LIST_HEAD(&list->head); + spin_lock_init(&list->lock); +} + +extern void memfile_notifier_invalidate(struct memfile_notifier_list *list, + pgoff_t start, pgoff_t end); +extern void memfile_notifier_fallocate(struct memfile_notifier_list *list, + pgoff_t start, pgoff_t end); +extern void memfile_register_backing_store(struct memfile_backing_store *bs); +extern void memfile_unregister_backing_store(struct memfile_backing_store *bs); + +/*APIs for notifier consumers */ +extern int memfile_register_notifier(struct inode *inode, + struct memfile_notifier *notifier, + struct memfile_pfn_ops **pfn_ops); +extern void memfile_unregister_notifier(struct inode *inode, + struct memfile_notifier *notifier); + +#endif /* CONFIG_MEMFILE_NOTIFIER */ + +#endif /* _LINUX_MEMFILE_NOTIFIER_H */ diff --git a/mm/Kconfig b/mm/Kconfig index 3326ee3903f3..7c6b1ad3dade 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -892,6 +892,10 @@ config ANON_VMA_NAME area from being merged with adjacent virtual memory areas due to the difference in their name. +config MEMFILE_NOTIFIER + bool + select SRCU + source "mm/damon/Kconfig" endmenu diff --git a/mm/Makefile b/mm/Makefile index 70d4309c9ce3..f628256dce0d 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -132,3 +132,4 @@ obj-$(CONFIG_PAGE_REPORTING) += page_reporting.o obj-$(CONFIG_IO_MAPPING) += io-mapping.o obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o +obj-$(CONFIG_MEMFILE_NOTIFIER) += memfile_notifier.o diff --git a/mm/memfile_notifier.c b/mm/memfile_notifier.c new file mode 100644 index 000000000000..a405db56fde2 --- /dev/null +++ b/mm/memfile_notifier.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/mm/memfile_notifier.c + * + * Copyright (C) 2022 Intel Corporation. + * Chao Peng <chao.p.peng@linux.intel.com> + */ + +#include <linux/memfile_notifier.h> +#include <linux/srcu.h> + +DEFINE_STATIC_SRCU(srcu); +static LIST_HEAD(backing_store_list); + +void memfile_notifier_invalidate(struct memfile_notifier_list *list, + pgoff_t start, pgoff_t end) +{ + struct memfile_notifier *notifier; + int id; + + id = srcu_read_lock(&srcu); + list_for_each_entry_srcu(notifier, &list->head, list, + srcu_read_lock_held(&srcu)) { + if (notifier->ops && notifier->ops->invalidate) + notifier->ops->invalidate(notifier, start, end); + } + srcu_read_unlock(&srcu, id); +} + +void memfile_notifier_fallocate(struct memfile_notifier_list *list, + pgoff_t start, pgoff_t end) +{ + struct memfile_notifier *notifier; + int id; + + id = srcu_read_lock(&srcu); + list_for_each_entry_srcu(notifier, &list->head, list, + srcu_read_lock_held(&srcu)) { + if (notifier->ops && notifier->ops->fallocate) + notifier->ops->fallocate(notifier, start, end); + } + srcu_read_unlock(&srcu, id); +} + +void memfile_register_backing_store(struct memfile_backing_store *bs) +{ + BUG_ON(!bs || !bs->get_notifier_list); + + list_add_tail(&bs->list, &backing_store_list); +} + +void memfile_unregister_backing_store(struct memfile_backing_store *bs) +{ + list_del(&bs->list); +} + +static int memfile_get_notifier_info(struct inode *inode, + struct memfile_notifier_list **list, + struct memfile_pfn_ops **ops) +{ + struct memfile_backing_store *bs, *iter; + struct memfile_notifier_list *tmp; + + list_for_each_entry_safe(bs, iter, &backing_store_list, list) { + tmp = bs->get_notifier_list(inode); + if (tmp) { + *list = tmp; + if (ops) + *ops = &bs->pfn_ops; + return 0; + } + } + return -EOPNOTSUPP; +} + +int memfile_register_notifier(struct inode *inode, + struct memfile_notifier *notifier, + struct memfile_pfn_ops **pfn_ops) +{ + struct memfile_notifier_list *list; + int ret; + + if (!inode || !notifier | !pfn_ops) + return -EINVAL; + + ret = memfile_get_notifier_info(inode, &list, pfn_ops); + if (ret) + return ret; + + spin_lock(&list->lock); + list_add_rcu(¬ifier->list, &list->head); + spin_unlock(&list->lock); + + return 0; +} +EXPORT_SYMBOL_GPL(memfile_register_notifier); + +void memfile_unregister_notifier(struct inode *inode, + struct memfile_notifier *notifier) +{ + struct memfile_notifier_list *list; + + if (!inode || !notifier) + return; + + BUG_ON(memfile_get_notifier_info(inode, &list, NULL)); + + spin_lock(&list->lock); + list_del_rcu(¬ifier->list); + spin_unlock(&list->lock); + + synchronize_srcu(&srcu); +} +EXPORT_SYMBOL_GPL(memfile_unregister_notifier);