Message ID | 20231107-vv-kmem_memmap-v10-3-1253ec050ed0@intel.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm: use memmap_on_memory semantics for dax/kmem | expand |
On 07/11/2023 15:22, Vishal Verma wrote: > Large amounts of memory managed by the kmem driver may come in via CXL, > and it is often desirable to have the memmap for this memory on the new > memory itself. > > Enroll kmem-managed memory for memmap_on_memory semantics if the dax > region originates via CXL. For non-CXL dax regions, retain the existing > default behavior of hot adding without memmap_on_memory semantics. > > Cc: Andrew Morton <akpm@linux-foundation.org> > Cc: David Hildenbrand <david@redhat.com> > Cc: Michal Hocko <mhocko@suse.com> > Cc: Oscar Salvador <osalvador@suse.de> > Cc: Dan Williams <dan.j.williams@intel.com> > Cc: Dave Jiang <dave.jiang@intel.com> > Cc: Dave Hansen <dave.hansen@linux.intel.com> > Cc: Huang Ying <ying.huang@intel.com> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > Reviewed-by: David Hildenbrand <david@redhat.com> > Reviewed-by: "Huang, Ying" <ying.huang@intel.com> > Signed-off-by: Vishal Verma <vishal.l.verma@intel.com> Tested-by: Li Zhijian <lizhijian@fujitsu.com> # both cxl.kmem and nvdimm.kmem > --- > drivers/dax/bus.h | 1 + > drivers/dax/dax-private.h | 1 + > drivers/dax/bus.c | 3 +++ > drivers/dax/cxl.c | 1 + > drivers/dax/hmem/hmem.c | 1 + > drivers/dax/kmem.c | 8 +++++++- > drivers/dax/pmem.c | 1 + > 7 files changed, 15 insertions(+), 1 deletion(-) > > diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h > index 1ccd23360124..cbbf64443098 100644 > --- a/drivers/dax/bus.h > +++ b/drivers/dax/bus.h > @@ -23,6 +23,7 @@ struct dev_dax_data { > struct dev_pagemap *pgmap; > resource_size_t size; > int id; > + bool memmap_on_memory; > }; > > struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data); > diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h > index 27cf2daaaa79..446617b73aea 100644 > --- a/drivers/dax/dax-private.h > +++ b/drivers/dax/dax-private.h > @@ -70,6 +70,7 @@ struct dev_dax { > struct ida ida; > struct device dev; > struct dev_pagemap *pgmap; > + bool memmap_on_memory; > int nr_range; > struct dev_dax_range { > unsigned long pgoff; > diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c > index 1659b787b65f..1ff1ab5fa105 100644 > --- a/drivers/dax/bus.c > +++ b/drivers/dax/bus.c > @@ -367,6 +367,7 @@ static ssize_t create_store(struct device *dev, struct device_attribute *attr, > .dax_region = dax_region, > .size = 0, > .id = -1, > + .memmap_on_memory = false, > }; > struct dev_dax *dev_dax = devm_create_dev_dax(&data); > > @@ -1400,6 +1401,8 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) > dev_dax->align = dax_region->align; > ida_init(&dev_dax->ida); > > + dev_dax->memmap_on_memory = data->memmap_on_memory; > + > inode = dax_inode(dax_dev); > dev->devt = inode->i_rdev; > dev->bus = &dax_bus_type; > diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c > index 8bc9d04034d6..c696837ab23c 100644 > --- a/drivers/dax/cxl.c > +++ b/drivers/dax/cxl.c > @@ -26,6 +26,7 @@ static int cxl_dax_region_probe(struct device *dev) > .dax_region = dax_region, > .id = -1, > .size = range_len(&cxlr_dax->hpa_range), > + .memmap_on_memory = true, > }; > > return PTR_ERR_OR_ZERO(devm_create_dev_dax(&data)); > diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c > index 5d2ddef0f8f5..b9da69f92697 100644 > --- a/drivers/dax/hmem/hmem.c > +++ b/drivers/dax/hmem/hmem.c > @@ -36,6 +36,7 @@ static int dax_hmem_probe(struct platform_device *pdev) > .dax_region = dax_region, > .id = -1, > .size = region_idle ? 0 : range_len(&mri->range), > + .memmap_on_memory = false, > }; > > return PTR_ERR_OR_ZERO(devm_create_dev_dax(&data)); > diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c > index 369c698b7706..42ee360cf4e3 100644 > --- a/drivers/dax/kmem.c > +++ b/drivers/dax/kmem.c > @@ -12,6 +12,7 @@ > #include <linux/mm.h> > #include <linux/mman.h> > #include <linux/memory-tiers.h> > +#include <linux/memory_hotplug.h> > #include "dax-private.h" > #include "bus.h" > > @@ -93,6 +94,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) > struct dax_kmem_data *data; > struct memory_dev_type *mtype; > int i, rc, mapped = 0; > + mhp_t mhp_flags; > int numa_node; > int adist = MEMTIER_DEFAULT_DAX_ADISTANCE; > > @@ -179,12 +181,16 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) > */ > res->flags = IORESOURCE_SYSTEM_RAM; > > + mhp_flags = MHP_NID_IS_MGID; > + if (dev_dax->memmap_on_memory) > + mhp_flags |= MHP_MEMMAP_ON_MEMORY; > + > /* > * Ensure that future kexec'd kernels will not treat > * this as RAM automatically. > */ > rc = add_memory_driver_managed(data->mgid, range.start, > - range_len(&range), kmem_name, MHP_NID_IS_MGID); > + range_len(&range), kmem_name, mhp_flags); > > if (rc) { > dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n", > diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c > index ae0cb113a5d3..f3c6c67b8412 100644 > --- a/drivers/dax/pmem.c > +++ b/drivers/dax/pmem.c > @@ -63,6 +63,7 @@ static struct dev_dax *__dax_pmem_probe(struct device *dev) > .id = id, > .pgmap = &pgmap, > .size = range_len(&range), > + .memmap_on_memory = false, > }; > > return devm_create_dev_dax(&data); >
diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index 1ccd23360124..cbbf64443098 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -23,6 +23,7 @@ struct dev_dax_data { struct dev_pagemap *pgmap; resource_size_t size; int id; + bool memmap_on_memory; }; struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data); diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h index 27cf2daaaa79..446617b73aea 100644 --- a/drivers/dax/dax-private.h +++ b/drivers/dax/dax-private.h @@ -70,6 +70,7 @@ struct dev_dax { struct ida ida; struct device dev; struct dev_pagemap *pgmap; + bool memmap_on_memory; int nr_range; struct dev_dax_range { unsigned long pgoff; diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 1659b787b65f..1ff1ab5fa105 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -367,6 +367,7 @@ static ssize_t create_store(struct device *dev, struct device_attribute *attr, .dax_region = dax_region, .size = 0, .id = -1, + .memmap_on_memory = false, }; struct dev_dax *dev_dax = devm_create_dev_dax(&data); @@ -1400,6 +1401,8 @@ struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) dev_dax->align = dax_region->align; ida_init(&dev_dax->ida); + dev_dax->memmap_on_memory = data->memmap_on_memory; + inode = dax_inode(dax_dev); dev->devt = inode->i_rdev; dev->bus = &dax_bus_type; diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c index 8bc9d04034d6..c696837ab23c 100644 --- a/drivers/dax/cxl.c +++ b/drivers/dax/cxl.c @@ -26,6 +26,7 @@ static int cxl_dax_region_probe(struct device *dev) .dax_region = dax_region, .id = -1, .size = range_len(&cxlr_dax->hpa_range), + .memmap_on_memory = true, }; return PTR_ERR_OR_ZERO(devm_create_dev_dax(&data)); diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index 5d2ddef0f8f5..b9da69f92697 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -36,6 +36,7 @@ static int dax_hmem_probe(struct platform_device *pdev) .dax_region = dax_region, .id = -1, .size = region_idle ? 0 : range_len(&mri->range), + .memmap_on_memory = false, }; return PTR_ERR_OR_ZERO(devm_create_dev_dax(&data)); diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c index 369c698b7706..42ee360cf4e3 100644 --- a/drivers/dax/kmem.c +++ b/drivers/dax/kmem.c @@ -12,6 +12,7 @@ #include <linux/mm.h> #include <linux/mman.h> #include <linux/memory-tiers.h> +#include <linux/memory_hotplug.h> #include "dax-private.h" #include "bus.h" @@ -93,6 +94,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) struct dax_kmem_data *data; struct memory_dev_type *mtype; int i, rc, mapped = 0; + mhp_t mhp_flags; int numa_node; int adist = MEMTIER_DEFAULT_DAX_ADISTANCE; @@ -179,12 +181,16 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) */ res->flags = IORESOURCE_SYSTEM_RAM; + mhp_flags = MHP_NID_IS_MGID; + if (dev_dax->memmap_on_memory) + mhp_flags |= MHP_MEMMAP_ON_MEMORY; + /* * Ensure that future kexec'd kernels will not treat * this as RAM automatically. */ rc = add_memory_driver_managed(data->mgid, range.start, - range_len(&range), kmem_name, MHP_NID_IS_MGID); + range_len(&range), kmem_name, mhp_flags); if (rc) { dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n", diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c index ae0cb113a5d3..f3c6c67b8412 100644 --- a/drivers/dax/pmem.c +++ b/drivers/dax/pmem.c @@ -63,6 +63,7 @@ static struct dev_dax *__dax_pmem_probe(struct device *dev) .id = id, .pgmap = &pgmap, .size = range_len(&range), + .memmap_on_memory = false, }; return devm_create_dev_dax(&data);