Message ID | 20190801002932.26430-5-vishal.l.verma@intel.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 07bb8562469da4510490cf1285206023237b6f54 |
Headers | show |
Series | daxctl: add a new reconfigure-device command | expand |
On Wed, 2019-07-31 at 18:29 -0600, Vishal Verma wrote: > Introduce a new 'daxctl_memory' object, which will be used for > operations related to managing dax devices in 'system-memory' modes. > > Add libdaxctl APIs to get the target_node of a DAX device, and to > online, offline, and query the state of hotplugged memory sections > associated with a given device. > > This adds the following new interfaces: > > daxctl_dev_get_target_node > daxctl_dev_get_memory > daxctl_memory_get_dev > daxctl_memory_get_node_path > daxctl_memory_get_block_size > daxctl_memory_online > daxctl_memory_offline > daxctl_memory_is_online > daxctl_memory_num_sections > > Cc: Pavel Tatashin <pasha.tatashin@soleen.com> > Cc: Dave Hansen <dave.hansen@linux.intel.com> > Cc: Dan Williams <dan.j.williams@intel.com> > [for the memblock-already-online TOCTOU hole] > Reported-by: Fan Du <fan.du@intel.com> > Tested-by: Fan Du <fan.du@intel.com> > Signed-off-by: Vishal Verma <vishal.l.verma@intel.com> > --- > daxctl/lib/libdaxctl-private.h | 18 ++ > daxctl/lib/libdaxctl.c | 384 +++++++++++++++++++++++++++++++++ > daxctl/lib/libdaxctl.sym | 9 + > daxctl/libdaxctl.h | 11 + > 4 files changed, 422 insertions(+) > [..] > + > +static bool memblock_in_dev(struct daxctl_dev *dev, const char *memblock) > +{ > + struct daxctl_memory *mem = daxctl_dev_get_memory(dev); > Static analysis complains that this can potentially cause a NULL dereference. Fix it by passing the mem object to memblock_in_dev(), since it has already been validated by that time. 3<---- From e8bf803e359b784259f645d1ff68e964b2c8618f Mon Sep 17 00:00:00 2001 From: Vishal Verma <vishal.l.verma@intel.com> Date: Fri, 3 May 2019 13:27:35 -0600 Subject: [ndctl PATCH] libdaxctl: add a 'daxctl_memory' object for memory based operations Introduce a new 'daxctl_memory' object, which will be used for operations related to managing dax devices in 'system-memory' modes. Add libdaxctl APIs to get the target_node of a DAX device, and to online, offline, and query the state of hotplugged memory sections associated with a given device. This adds the following new interfaces: daxctl_dev_get_target_node daxctl_dev_get_memory daxctl_memory_get_dev daxctl_memory_get_node_path daxctl_memory_get_block_size daxctl_memory_online daxctl_memory_offline daxctl_memory_is_online daxctl_memory_num_sections Cc: Pavel Tatashin <pasha.tatashin@soleen.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Dan Williams <dan.j.williams@intel.com> [for the memblock-already-online TOCTOU hole] Reported-by: Fan Du <fan.du@intel.com> Tested-by: Fan Du <fan.du@intel.com> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com> --- daxctl/lib/libdaxctl-private.h | 18 ++ daxctl/lib/libdaxctl.c | 384 +++++++++++++++++++++++++++++++++ daxctl/lib/libdaxctl.sym | 9 + daxctl/libdaxctl.h | 11 + 4 files changed, 422 insertions(+) diff --git a/daxctl/lib/libdaxctl-private.h b/daxctl/lib/libdaxctl- private.h index fee67d1..01091de 100644 --- a/daxctl/lib/libdaxctl-private.h +++ b/daxctl/lib/libdaxctl-private.h @@ -39,6 +39,13 @@ static const char *dax_modules[] = { [DAXCTL_DEV_MODE_RAM] = "kmem", }; +enum memory_op { + MEM_SET_OFFLINE, + MEM_SET_ONLINE, + MEM_IS_ONLINE, + MEM_COUNT, +}; + /** * struct daxctl_region - container for dax_devices */ @@ -70,8 +77,19 @@ struct daxctl_dev { struct kmod_module *module; struct kmod_list *kmod_list; struct daxctl_region *region; + struct daxctl_memory *mem; + int target_node; +}; + +struct daxctl_memory { + struct daxctl_dev *dev; + void *mem_buf; + size_t buf_len; + char *node_path; + unsigned long block_size; }; + static inline int check_kmod(struct kmod_ctx *kmod_ctx) { return kmod_ctx ? 0 : -ENXIO; diff --git a/daxctl/lib/libdaxctl.c b/daxctl/lib/libdaxctl.c index aa0d2f2..bcc77b6 100644 --- a/daxctl/lib/libdaxctl.c +++ b/daxctl/lib/libdaxctl.c @@ -200,6 +200,15 @@ DAXCTL_EXPORT void daxctl_region_get_uuid(struct daxctl_region *region, uuid_t u uuid_copy(uu, region->uuid); } +static void free_mem(struct daxctl_dev *dev) +{ + if (dev && dev->mem) { + free(dev->mem->node_path); + free(dev->mem); + dev->mem = NULL; + } +} + static void free_dev(struct daxctl_dev *dev, struct list_head *head) { if (head) @@ -207,6 +216,7 @@ static void free_dev(struct daxctl_dev *dev, struct list_head *head) kmod_module_unref_list(dev->kmod_list); free(dev->dev_buf); free(dev->dev_path); + free_mem(dev); free(dev); } @@ -380,6 +390,94 @@ static struct kmod_list *to_module_list(struct daxctl_ctx *ctx, return list; } +static int dev_is_system_ram_capable(struct daxctl_dev *dev) +{ + const char *devname = daxctl_dev_get_devname(dev); + struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev); + char *mod_path, *mod_base; + char path[200]; + const int len = sizeof(path); + + if (!device_model_is_dax_bus(dev)) + return false; + + if (!daxctl_dev_is_enabled(dev)) + return false; + + if (snprintf(path, len, "%s/driver/module", dev->dev_path) >= len) { + err(ctx, "%s: buffer too small!\n", devname); + return false; + } + + mod_path = realpath(path, NULL); + if (!mod_path) + return false; + + mod_base = basename(mod_path); + if (strcmp(mod_base, dax_modules[DAXCTL_DEV_MODE_RAM]) == 0) { + free(mod_path); + return true; + } + + free(mod_path); + return false; +} + +/* + * This checks for the device to be in system-ram mode, so calling + * daxctl_dev_get_memory() on a devdax mode device will always return NULL. + */ +static struct daxctl_memory *daxctl_dev_alloc_mem(struct daxctl_dev *dev) +{ + const char *size_path = "/sys/devices/system/memory/block_size_bytes"; + const char *node_base = "/sys/devices/system/node/node"; + const char *devname = daxctl_dev_get_devname(dev); + struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev); + struct daxctl_memory *mem; + char buf[SYSFS_ATTR_SIZE]; + int node_num; + + if (!dev_is_system_ram_capable(dev)) + return NULL; + + mem = calloc(1, sizeof(*mem)); + if (!mem) + return NULL; + + mem->dev = dev; + + if (sysfs_read_attr(ctx, size_path, buf) == 0) { + mem->block_size = strtoul(buf, NULL, 16); + if (mem->block_size == 0 || mem->block_size == ULONG_MAX) { + err(ctx, "%s: Unable to determine memblock size: %s\n", + devname, strerror(errno)); + mem->block_size = 0; + } + } + + node_num = daxctl_dev_get_target_node(dev); + if (node_num >= 0) { + if (asprintf(&mem->node_path, "%s%d", node_base, + node_num) < 0) { + err(ctx, "%s: Unable to set node_path\n", devname); + goto err_mem; + } + } + + mem->mem_buf = calloc(1, strlen(node_base) + 256); + if (!mem->mem_buf) + goto err_node; + mem->buf_len = strlen(node_base) + 256; + + return mem; + +err_node: + free(mem->node_path); +err_mem: + free(mem); + return NULL; +} + static void *add_dax_dev(void *parent, int id, const char *daxdev_base) { const char *devname = devpath_to_devname(daxdev_base); @@ -435,6 +533,12 @@ static void *add_dax_dev(void *parent, int id, const char *daxdev_base) if (rc == 0) dev->kmod_list = to_module_list(ctx, buf); + sprintf(path, "%s/target_node", daxdev_base); + if (sysfs_read_attr(ctx, path, buf) == 0) + dev->target_node = strtol(buf, NULL, 0); + else + dev->target_node = -1; + daxctl_dev_foreach(region, dev_dup) if (dev_dup->id == dev->id) { free_dev(dev, NULL); @@ -862,6 +966,9 @@ DAXCTL_EXPORT int daxctl_dev_disable(struct daxctl_dev *dev) if (!daxctl_dev_is_enabled(dev)) return 0; + /* If there is a memory object, first free that */ + free_mem(dev); + daxctl_unbind(ctx, dev->dev_path); if (daxctl_dev_is_enabled(dev)) { @@ -944,3 +1051,280 @@ DAXCTL_EXPORT unsigned long long daxctl_dev_get_size(struct daxctl_dev *dev) { return dev->size; } + +DAXCTL_EXPORT int daxctl_dev_get_target_node(struct daxctl_dev *dev) +{ + return dev->target_node; +} + +DAXCTL_EXPORT struct daxctl_memory *daxctl_dev_get_memory(struct daxctl_dev *dev) +{ + if (dev->mem) + return dev->mem; + else + return daxctl_dev_alloc_mem(dev); +} + +DAXCTL_EXPORT struct daxctl_dev *daxctl_memory_get_dev(struct daxctl_memory *mem) +{ + return mem->dev; +} + +DAXCTL_EXPORT const char *daxctl_memory_get_node_path(struct daxctl_memory *mem) +{ + return mem->node_path; +} + +DAXCTL_EXPORT unsigned long daxctl_memory_get_block_size(struct daxctl_memory *mem) +{ + return mem->block_size; +} + +static int online_one_memblock(struct daxctl_dev *dev, char *path) +{ + const char *devname = daxctl_dev_get_devname(dev); + struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev); + const char *mode = "online_movable"; + char buf[SYSFS_ATTR_SIZE]; + int rc; + + rc = sysfs_read_attr(ctx, path, buf); + if (rc) { + err(ctx, "%s: Failed to read %s: %s\n", + devname, path, strerror(-rc)); + return rc; + } + + /* + * if already online, possibly due to kernel config or a udev rule, + * there is nothing to do and we can skip over the memblock + */ + if (strncmp(buf, "online", 6) == 0) + return 1; + + rc = sysfs_write_attr_quiet(ctx, path, mode); + if (rc) { + /* + * While we performed an already-online check above, there + * is still a TOCTOU hole where someone (such as a udev rule) + * may have raced to online the memory. In such a case, + * the sysfs store will fail, however we can check for this + * by simply reading the state again. If it changed to the + * desired state, then we don't have to error out. + */ + if (sysfs_read_attr(ctx, path, buf) == 0) { + if (strncmp(buf, "online", 6) == 0) + return 1; + } + err(ctx, "%s: Failed to online %s: %s\n", + devname, path, strerror(-rc)); + } + return rc; +} + +static int offline_one_memblock(struct daxctl_dev *dev, char *path) +{ + const char *devname = daxctl_dev_get_devname(dev); + struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev); + const char *mode = "offline"; + char buf[SYSFS_ATTR_SIZE]; + int rc; + + rc = sysfs_read_attr(ctx, path, buf); + if (rc) { + err(ctx, "%s: Failed to read %s: %s\n", + devname, path, strerror(-rc)); + return rc; + } + + /* if already offline, there is nothing to do */ + if (strncmp(buf, "offline", 7) == 0) + return 1; + + rc = sysfs_write_attr_quiet(ctx, path, mode); + if (rc) { + /* Close the TOCTOU hole like in online_one_memblock() above */ + if (sysfs_read_attr(ctx, path, buf) == 0) { + if (strncmp(buf, "offline", 7) == 0) + return 1; + } + err(ctx, "%s: Failed to offline %s: %s\n", + devname, path, strerror(-rc)); + } + return rc; +} + +static int memblock_is_online(struct daxctl_dev *dev, char *path) +{ + const char *devname = daxctl_dev_get_devname(dev); + struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev); + char buf[SYSFS_ATTR_SIZE]; + int rc; + + rc = sysfs_read_attr(ctx, path, buf); + if (rc) { + err(ctx, "%s: Failed to read %s: %s\n", + devname, path, strerror(-rc)); + return rc; + } + + if (strncmp(buf, "online", 6) == 0) + return 1; + + /* offline */ + return 0; +} + +static bool memblock_in_dev(struct daxctl_memory *mem, const char *memblock) +{ + const char *mem_base = "/sys/devices/system/memory/"; + struct daxctl_dev *dev = daxctl_memory_get_dev(mem); + unsigned long long memblock_res, dev_start, dev_end; + const char *devname = daxctl_dev_get_devname(dev); + struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev); + unsigned long memblock_size; + int path_len = mem->buf_len; + char buf[SYSFS_ATTR_SIZE]; + unsigned long phys_index; + char *path = mem->mem_buf; + + if (snprintf(path, path_len, "%s/%s/phys_index", + mem_base, memblock) < 0) + return false; + + if (sysfs_read_attr(ctx, path, buf) == 0) { + phys_index = strtoul(buf, NULL, 16); + if (phys_index == 0 || phys_index == ULONG_MAX) { + err(ctx, "%s: %s: Unable to determine phys_index: %s\n", + devname, memblock, strerror(errno)); + return false; + } + } else { + err(ctx, "%s: %s: Unable to determine phys_index: %s\n", + devname, memblock, strerror(errno)); + return false; + } + + dev_start = daxctl_dev_get_resource(dev); + if (!dev_start) { + err(ctx, "%s: Unable to determine resource\n", devname); + return false; + } + dev_end = dev_start + daxctl_dev_get_size(dev); + + memblock_size = daxctl_memory_get_block_size(mem); + if (!memblock_size) { + err(ctx, "%s: Unable to determine memory block size\n", + devname); + return false; + } + memblock_res = phys_index * memblock_size; + + if (memblock_res >= dev_start && memblock_res <= dev_end) + return true; + + return false; +} + +static int op_for_one_memblock(struct daxctl_memory *mem, char *path, + enum memory_op op) +{ + struct daxctl_dev *dev = daxctl_memory_get_dev(mem); + const char *devname = daxctl_dev_get_devname(dev); + struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev); + int rc; + + switch (op) { + case MEM_SET_ONLINE: + return online_one_memblock(dev, path); + case MEM_SET_OFFLINE: + return offline_one_memblock(dev, path); + case MEM_IS_ONLINE: + rc = memblock_is_online(dev, path); + if (rc < 0) + return rc; + /* + * Retain the 'normal' semantics for if (memblock_is_online()), + * but since count needs rc == 0, we'll just flip rc for this op + */ + return !rc; + case MEM_COUNT: + return 0; + } + + err(ctx, "%s: BUG: unknown op: %d\n", devname, op); + return -EINVAL; +} + +static int daxctl_memory_op(struct daxctl_memory *mem, enum memory_op op) +{ + struct daxctl_dev *dev = daxctl_memory_get_dev(mem); + const char *devname = daxctl_dev_get_devname(dev); + struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev); + const char *node_path; + int rc, count = 0; + struct dirent *de; + DIR *node_dir; + + node_path = daxctl_memory_get_node_path(mem); + if (!node_path) { + err(ctx, "%s: Failed to get node_path\n", devname); + return -ENXIO; + } + + node_dir = opendir(node_path); + if (!node_dir) + return -errno; + + errno = 0; + while ((de = readdir(node_dir)) != NULL) { + char *path = mem->mem_buf; + int len = mem->buf_len; + + if (strncmp(de->d_name, "memory", 6) == 0) { + if (!memblock_in_dev(mem, de->d_name)) + continue; + rc = snprintf(path, len, "%s/%s/state", + node_path, de->d_name); + if (rc < 0) { + rc = -ENOMEM; + goto out_dir; + } + rc = op_for_one_memblock(mem, path, op); + if (rc < 0) + goto out_dir; + if (rc == 0) + count++; + } + errno = 0; + } + if (errno) { + rc = -errno; + goto out_dir; + } + rc = count; + +out_dir: + closedir(node_dir); + return rc; +} + +DAXCTL_EXPORT int daxctl_memory_online(struct daxctl_memory *mem) +{ + return daxctl_memory_op(mem, MEM_SET_ONLINE); +} + +DAXCTL_EXPORT int daxctl_memory_offline(struct daxctl_memory *mem) +{ + return daxctl_memory_op(mem, MEM_SET_OFFLINE); +} + +DAXCTL_EXPORT int daxctl_memory_is_online(struct daxctl_memory *mem) +{ + return daxctl_memory_op(mem, MEM_IS_ONLINE); +} + +DAXCTL_EXPORT int daxctl_memory_num_sections(struct daxctl_memory *mem) +{ + return daxctl_memory_op(mem, MEM_COUNT); +} diff --git a/daxctl/lib/libdaxctl.sym b/daxctl/lib/libdaxctl.sym index 1692624..bc18604 100644 --- a/daxctl/lib/libdaxctl.sym +++ b/daxctl/lib/libdaxctl.sym @@ -59,4 +59,13 @@ global: daxctl_dev_enable_devdax; daxctl_dev_enable_ram; daxctl_dev_get_resource; + daxctl_dev_get_target_node; + daxctl_dev_get_memory; + daxctl_memory_get_dev; + daxctl_memory_get_node_path; + daxctl_memory_get_block_size; + daxctl_memory_online; + daxctl_memory_offline; + daxctl_memory_is_online; + daxctl_memory_num_sections; } LIBDAXCTL_5; diff --git a/daxctl/libdaxctl.h b/daxctl/libdaxctl.h index adf55f3..fb6c3b1 100644 --- a/daxctl/libdaxctl.h +++ b/daxctl/libdaxctl.h @@ -73,6 +73,17 @@ int daxctl_dev_is_enabled(struct daxctl_dev *dev); int daxctl_dev_disable(struct daxctl_dev *dev); int daxctl_dev_enable_devdax(struct daxctl_dev *dev); int daxctl_dev_enable_ram(struct daxctl_dev *dev); +int daxctl_dev_get_target_node(struct daxctl_dev *dev); + +struct daxctl_memory; +struct daxctl_memory *daxctl_dev_get_memory(struct daxctl_dev *dev); +struct daxctl_dev *daxctl_memory_get_dev(struct daxctl_memory *mem); +const char *daxctl_memory_get_node_path(struct daxctl_memory *mem); +unsigned long daxctl_memory_get_block_size(struct daxctl_memory *mem); +int daxctl_memory_online(struct daxctl_memory *mem); +int daxctl_memory_offline(struct daxctl_memory *mem); +int daxctl_memory_is_online(struct daxctl_memory *mem); +int daxctl_memory_num_sections(struct daxctl_memory *mem); #define daxctl_dev_foreach(region, dev) \ for (dev = daxctl_dev_get_first(region); \
diff --git a/daxctl/lib/libdaxctl-private.h b/daxctl/lib/libdaxctl-private.h index fee67d1..01091de 100644 --- a/daxctl/lib/libdaxctl-private.h +++ b/daxctl/lib/libdaxctl-private.h @@ -39,6 +39,13 @@ static const char *dax_modules[] = { [DAXCTL_DEV_MODE_RAM] = "kmem", }; +enum memory_op { + MEM_SET_OFFLINE, + MEM_SET_ONLINE, + MEM_IS_ONLINE, + MEM_COUNT, +}; + /** * struct daxctl_region - container for dax_devices */ @@ -70,8 +77,19 @@ struct daxctl_dev { struct kmod_module *module; struct kmod_list *kmod_list; struct daxctl_region *region; + struct daxctl_memory *mem; + int target_node; +}; + +struct daxctl_memory { + struct daxctl_dev *dev; + void *mem_buf; + size_t buf_len; + char *node_path; + unsigned long block_size; }; + static inline int check_kmod(struct kmod_ctx *kmod_ctx) { return kmod_ctx ? 0 : -ENXIO; diff --git a/daxctl/lib/libdaxctl.c b/daxctl/lib/libdaxctl.c index aa0d2f2..949c56f 100644 --- a/daxctl/lib/libdaxctl.c +++ b/daxctl/lib/libdaxctl.c @@ -200,6 +200,15 @@ DAXCTL_EXPORT void daxctl_region_get_uuid(struct daxctl_region *region, uuid_t u uuid_copy(uu, region->uuid); } +static void free_mem(struct daxctl_dev *dev) +{ + if (dev && dev->mem) { + free(dev->mem->node_path); + free(dev->mem); + dev->mem = NULL; + } +} + static void free_dev(struct daxctl_dev *dev, struct list_head *head) { if (head) @@ -207,6 +216,7 @@ static void free_dev(struct daxctl_dev *dev, struct list_head *head) kmod_module_unref_list(dev->kmod_list); free(dev->dev_buf); free(dev->dev_path); + free_mem(dev); free(dev); } @@ -380,6 +390,94 @@ static struct kmod_list *to_module_list(struct daxctl_ctx *ctx, return list; } +static int dev_is_system_ram_capable(struct daxctl_dev *dev) +{ + const char *devname = daxctl_dev_get_devname(dev); + struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev); + char *mod_path, *mod_base; + char path[200]; + const int len = sizeof(path); + + if (!device_model_is_dax_bus(dev)) + return false; + + if (!daxctl_dev_is_enabled(dev)) + return false; + + if (snprintf(path, len, "%s/driver/module", dev->dev_path) >= len) { + err(ctx, "%s: buffer too small!\n", devname); + return false; + } + + mod_path = realpath(path, NULL); + if (!mod_path) + return false; + + mod_base = basename(mod_path); + if (strcmp(mod_base, dax_modules[DAXCTL_DEV_MODE_RAM]) == 0) { + free(mod_path); + return true; + } + + free(mod_path); + return false; +} + +/* + * This checks for the device to be in system-ram mode, so calling + * daxctl_dev_get_memory() on a devdax mode device will always return NULL. + */ +static struct daxctl_memory *daxctl_dev_alloc_mem(struct daxctl_dev *dev) +{ + const char *size_path = "/sys/devices/system/memory/block_size_bytes"; + const char *node_base = "/sys/devices/system/node/node"; + const char *devname = daxctl_dev_get_devname(dev); + struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev); + struct daxctl_memory *mem; + char buf[SYSFS_ATTR_SIZE]; + int node_num; + + if (!dev_is_system_ram_capable(dev)) + return NULL; + + mem = calloc(1, sizeof(*mem)); + if (!mem) + return NULL; + + mem->dev = dev; + + if (sysfs_read_attr(ctx, size_path, buf) == 0) { + mem->block_size = strtoul(buf, NULL, 16); + if (mem->block_size == 0 || mem->block_size == ULONG_MAX) { + err(ctx, "%s: Unable to determine memblock size: %s\n", + devname, strerror(errno)); + mem->block_size = 0; + } + } + + node_num = daxctl_dev_get_target_node(dev); + if (node_num >= 0) { + if (asprintf(&mem->node_path, "%s%d", node_base, + node_num) < 0) { + err(ctx, "%s: Unable to set node_path\n", devname); + goto err_mem; + } + } + + mem->mem_buf = calloc(1, strlen(node_base) + 256); + if (!mem->mem_buf) + goto err_node; + mem->buf_len = strlen(node_base) + 256; + + return mem; + +err_node: + free(mem->node_path); +err_mem: + free(mem); + return NULL; +} + static void *add_dax_dev(void *parent, int id, const char *daxdev_base) { const char *devname = devpath_to_devname(daxdev_base); @@ -435,6 +533,12 @@ static void *add_dax_dev(void *parent, int id, const char *daxdev_base) if (rc == 0) dev->kmod_list = to_module_list(ctx, buf); + sprintf(path, "%s/target_node", daxdev_base); + if (sysfs_read_attr(ctx, path, buf) == 0) + dev->target_node = strtol(buf, NULL, 0); + else + dev->target_node = -1; + daxctl_dev_foreach(region, dev_dup) if (dev_dup->id == dev->id) { free_dev(dev, NULL); @@ -862,6 +966,9 @@ DAXCTL_EXPORT int daxctl_dev_disable(struct daxctl_dev *dev) if (!daxctl_dev_is_enabled(dev)) return 0; + /* If there is a memory object, first free that */ + free_mem(dev); + daxctl_unbind(ctx, dev->dev_path); if (daxctl_dev_is_enabled(dev)) { @@ -944,3 +1051,280 @@ DAXCTL_EXPORT unsigned long long daxctl_dev_get_size(struct daxctl_dev *dev) { return dev->size; } + +DAXCTL_EXPORT int daxctl_dev_get_target_node(struct daxctl_dev *dev) +{ + return dev->target_node; +} + +DAXCTL_EXPORT struct daxctl_memory *daxctl_dev_get_memory(struct daxctl_dev *dev) +{ + if (dev->mem) + return dev->mem; + else + return daxctl_dev_alloc_mem(dev); +} + +DAXCTL_EXPORT struct daxctl_dev *daxctl_memory_get_dev(struct daxctl_memory *mem) +{ + return mem->dev; +} + +DAXCTL_EXPORT const char *daxctl_memory_get_node_path(struct daxctl_memory *mem) +{ + return mem->node_path; +} + +DAXCTL_EXPORT unsigned long daxctl_memory_get_block_size(struct daxctl_memory *mem) +{ + return mem->block_size; +} + +static int online_one_memblock(struct daxctl_dev *dev, char *path) +{ + const char *devname = daxctl_dev_get_devname(dev); + struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev); + const char *mode = "online_movable"; + char buf[SYSFS_ATTR_SIZE]; + int rc; + + rc = sysfs_read_attr(ctx, path, buf); + if (rc) { + err(ctx, "%s: Failed to read %s: %s\n", + devname, path, strerror(-rc)); + return rc; + } + + /* + * if already online, possibly due to kernel config or a udev rule, + * there is nothing to do and we can skip over the memblock + */ + if (strncmp(buf, "online", 6) == 0) + return 1; + + rc = sysfs_write_attr_quiet(ctx, path, mode); + if (rc) { + /* + * While we performed an already-online check above, there + * is still a TOCTOU hole where someone (such as a udev rule) + * may have raced to online the memory. In such a case, + * the sysfs store will fail, however we can check for this + * by simply reading the state again. If it changed to the + * desired state, then we don't have to error out. + */ + if (sysfs_read_attr(ctx, path, buf) == 0) { + if (strncmp(buf, "online", 6) == 0) + return 1; + } + err(ctx, "%s: Failed to online %s: %s\n", + devname, path, strerror(-rc)); + } + return rc; +} + +static int offline_one_memblock(struct daxctl_dev *dev, char *path) +{ + const char *devname = daxctl_dev_get_devname(dev); + struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev); + const char *mode = "offline"; + char buf[SYSFS_ATTR_SIZE]; + int rc; + + rc = sysfs_read_attr(ctx, path, buf); + if (rc) { + err(ctx, "%s: Failed to read %s: %s\n", + devname, path, strerror(-rc)); + return rc; + } + + /* if already offline, there is nothing to do */ + if (strncmp(buf, "offline", 7) == 0) + return 1; + + rc = sysfs_write_attr_quiet(ctx, path, mode); + if (rc) { + /* Close the TOCTOU hole like in online_one_memblock() above */ + if (sysfs_read_attr(ctx, path, buf) == 0) { + if (strncmp(buf, "offline", 7) == 0) + return 1; + } + err(ctx, "%s: Failed to offline %s: %s\n", + devname, path, strerror(-rc)); + } + return rc; +} + +static int memblock_is_online(struct daxctl_dev *dev, char *path) +{ + const char *devname = daxctl_dev_get_devname(dev); + struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev); + char buf[SYSFS_ATTR_SIZE]; + int rc; + + rc = sysfs_read_attr(ctx, path, buf); + if (rc) { + err(ctx, "%s: Failed to read %s: %s\n", + devname, path, strerror(-rc)); + return rc; + } + + if (strncmp(buf, "online", 6) == 0) + return 1; + + /* offline */ + return 0; +} + +static bool memblock_in_dev(struct daxctl_dev *dev, const char *memblock) +{ + struct daxctl_memory *mem = daxctl_dev_get_memory(dev); + const char *mem_base = "/sys/devices/system/memory/"; + unsigned long long memblock_res, dev_start, dev_end; + const char *devname = daxctl_dev_get_devname(dev); + struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev); + unsigned long memblock_size; + int path_len = mem->buf_len; + char buf[SYSFS_ATTR_SIZE]; + unsigned long phys_index; + char *path = mem->mem_buf; + + if (snprintf(path, path_len, "%s/%s/phys_index", + mem_base, memblock) < 0) + return false; + + if (sysfs_read_attr(ctx, path, buf) == 0) { + phys_index = strtoul(buf, NULL, 16); + if (phys_index == 0 || phys_index == ULONG_MAX) { + err(ctx, "%s: %s: Unable to determine phys_index: %s\n", + devname, memblock, strerror(errno)); + return false; + } + } else { + err(ctx, "%s: %s: Unable to determine phys_index: %s\n", + devname, memblock, strerror(errno)); + return false; + } + + dev_start = daxctl_dev_get_resource(dev); + if (!dev_start) { + err(ctx, "%s: Unable to determine resource\n", devname); + return false; + } + dev_end = dev_start + daxctl_dev_get_size(dev); + + memblock_size = daxctl_memory_get_block_size(mem); + if (!memblock_size) { + err(ctx, "%s: Unable to determine memory block size\n", + devname); + return false; + } + memblock_res = phys_index * memblock_size; + + if (memblock_res >= dev_start && memblock_res <= dev_end) + return true; + + return false; +} + +static int op_for_one_memblock(struct daxctl_memory *mem, char *path, + enum memory_op op) +{ + struct daxctl_dev *dev = daxctl_memory_get_dev(mem); + const char *devname = daxctl_dev_get_devname(dev); + struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev); + int rc; + + switch (op) { + case MEM_SET_ONLINE: + return online_one_memblock(dev, path); + case MEM_SET_OFFLINE: + return offline_one_memblock(dev, path); + case MEM_IS_ONLINE: + rc = memblock_is_online(dev, path); + if (rc < 0) + return rc; + /* + * Retain the 'normal' semantics for if (memblock_is_online()), + * but since count needs rc == 0, we'll just flip rc for this op + */ + return !rc; + case MEM_COUNT: + return 0; + } + + err(ctx, "%s: BUG: unknown op: %d\n", devname, op); + return -EINVAL; +} + +static int daxctl_memory_op(struct daxctl_memory *mem, enum memory_op op) +{ + struct daxctl_dev *dev = daxctl_memory_get_dev(mem); + const char *devname = daxctl_dev_get_devname(dev); + struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev); + const char *node_path; + int rc, count = 0; + struct dirent *de; + DIR *node_dir; + + node_path = daxctl_memory_get_node_path(mem); + if (!node_path) { + err(ctx, "%s: Failed to get node_path\n", devname); + return -ENXIO; + } + + node_dir = opendir(node_path); + if (!node_dir) + return -errno; + + errno = 0; + while ((de = readdir(node_dir)) != NULL) { + char *path = mem->mem_buf; + int len = mem->buf_len; + + if (strncmp(de->d_name, "memory", 6) == 0) { + if (!memblock_in_dev(dev, de->d_name)) + continue; + rc = snprintf(path, len, "%s/%s/state", + node_path, de->d_name); + if (rc < 0) { + rc = -ENOMEM; + goto out_dir; + } + rc = op_for_one_memblock(mem, path, op); + if (rc < 0) + goto out_dir; + if (rc == 0) + count++; + } + errno = 0; + } + if (errno) { + rc = -errno; + goto out_dir; + } + rc = count; + +out_dir: + closedir(node_dir); + return rc; +} + +DAXCTL_EXPORT int daxctl_memory_online(struct daxctl_memory *mem) +{ + return daxctl_memory_op(mem, MEM_SET_ONLINE); +} + +DAXCTL_EXPORT int daxctl_memory_offline(struct daxctl_memory *mem) +{ + return daxctl_memory_op(mem, MEM_SET_OFFLINE); +} + +DAXCTL_EXPORT int daxctl_memory_is_online(struct daxctl_memory *mem) +{ + return daxctl_memory_op(mem, MEM_IS_ONLINE); +} + +DAXCTL_EXPORT int daxctl_memory_num_sections(struct daxctl_memory *mem) +{ + return daxctl_memory_op(mem, MEM_COUNT); +} diff --git a/daxctl/lib/libdaxctl.sym b/daxctl/lib/libdaxctl.sym index 1692624..bc18604 100644 --- a/daxctl/lib/libdaxctl.sym +++ b/daxctl/lib/libdaxctl.sym @@ -59,4 +59,13 @@ global: daxctl_dev_enable_devdax; daxctl_dev_enable_ram; daxctl_dev_get_resource; + daxctl_dev_get_target_node; + daxctl_dev_get_memory; + daxctl_memory_get_dev; + daxctl_memory_get_node_path; + daxctl_memory_get_block_size; + daxctl_memory_online; + daxctl_memory_offline; + daxctl_memory_is_online; + daxctl_memory_num_sections; } LIBDAXCTL_5; diff --git a/daxctl/libdaxctl.h b/daxctl/libdaxctl.h index adf55f3..fb6c3b1 100644 --- a/daxctl/libdaxctl.h +++ b/daxctl/libdaxctl.h @@ -73,6 +73,17 @@ int daxctl_dev_is_enabled(struct daxctl_dev *dev); int daxctl_dev_disable(struct daxctl_dev *dev); int daxctl_dev_enable_devdax(struct daxctl_dev *dev); int daxctl_dev_enable_ram(struct daxctl_dev *dev); +int daxctl_dev_get_target_node(struct daxctl_dev *dev); + +struct daxctl_memory; +struct daxctl_memory *daxctl_dev_get_memory(struct daxctl_dev *dev); +struct daxctl_dev *daxctl_memory_get_dev(struct daxctl_memory *mem); +const char *daxctl_memory_get_node_path(struct daxctl_memory *mem); +unsigned long daxctl_memory_get_block_size(struct daxctl_memory *mem); +int daxctl_memory_online(struct daxctl_memory *mem); +int daxctl_memory_offline(struct daxctl_memory *mem); +int daxctl_memory_is_online(struct daxctl_memory *mem); +int daxctl_memory_num_sections(struct daxctl_memory *mem); #define daxctl_dev_foreach(region, dev) \ for (dev = daxctl_dev_get_first(region); \