@@ -344,6 +344,25 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
}
EXPORT_SYMBOL_GPL(dax_copy_to_iter);
+int dax_zero_page_range(struct dax_device *dax_dev, u64 offset, size_t len)
+{
+ if (!dax_alive(dax_dev))
+ return -ENXIO;
+
+ if (!dax_dev->ops->zero_page_range)
+ return -EOPNOTSUPP;
+ /*
+ * There are no callers that want to zero across a page boundary as of
+ * now. Once users are there, this check can be removed after the
+ * device mapper code has been updated to split ranges across targets.
+ */
+ if (offset_in_page(offset) + len > PAGE_SIZE)
+ return -EIO;
+
+ return dax_dev->ops->zero_page_range(dax_dev, offset, len);
+}
+EXPORT_SYMBOL_GPL(dax_zero_page_range);
+
#ifdef CONFIG_ARCH_HAS_PMEM_API
void arch_wb_cache_pmem(void *addr, size_t size);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size)
@@ -299,6 +299,20 @@ static const struct block_device_operations pmem_fops = {
.revalidate_disk = nvdimm_revalidate_disk,
};
+static int pmem_dax_zero_page_range(struct dax_device *dax_dev, u64 offset,
+ size_t len)
+{
+ int rc;
+ struct pmem_device *pmem = dax_get_private(dax_dev);
+ struct page *page = ZERO_PAGE(0);
+
+ rc = pmem_do_write(pmem, page, 0, offset, len);
+ if (rc > 0)
+ return -EIO;
+
+ return 0;
+}
+
static long pmem_dax_direct_access(struct dax_device *dax_dev,
pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
{
@@ -330,6 +344,7 @@ static const struct dax_operations pmem_dax_ops = {
.dax_supported = generic_fsdax_supported,
.copy_from_iter = pmem_copy_from_iter,
.copy_to_iter = pmem_copy_to_iter,
+ .zero_page_range = pmem_dax_zero_page_range,
};
static const struct attribute_group *pmem_attribute_groups[] = {
@@ -34,6 +34,8 @@ struct dax_operations {
/* copy_to_iter: required operation for fs-dax direct-i/o */
size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t,
struct iov_iter *);
+ /* zero_page_range: required operation. Zero range with-in a page */
+ int (*zero_page_range)(struct dax_device *, u64, size_t);
};
extern struct attribute_group dax_attribute_group;
@@ -209,6 +211,7 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i);
size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i);
+int dax_zero_page_range(struct dax_device *dax_dev, u64 offset, size_t len);
void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
Add a dax operation zero_page_range, to zero a range of memory. This will also clear any poison in the range being zeroed. As of now, zeroing of up to one page is allowed in a single call. There are no callers which are trying to zero more than a page in a single call. Once we grow the callers which zero more than a page in single call, we can add that support. Primary reason for not doing that yet is that this will add little complexity in dm implementation where a range might be spanning multiple underlying targets and one will have to split the range into multiple sub ranges and call zero_page_range() on individual targets. Suggested-by: Christoph Hellwig <hch@infradead.org> Signed-off-by: Vivek Goyal <vgoyal@redhat.com> --- drivers/dax/super.c | 19 +++++++++++++++++++ drivers/nvdimm/pmem.c | 15 +++++++++++++++ include/linux/dax.h | 3 +++ 3 files changed, 37 insertions(+)