@@ -111,6 +111,21 @@ config BLK_CMDLINE_PARSER
See Documentation/block/cmdline-partition.txt for more information.
+config BLK_DEV_DAX
+ bool "Block layer DAX support default"
+ depends on FS_DAX
+ help
+ When DAX support is available (CONFIG_FS_DAX) raw block devices
+ can also support direct userspace access to the storage capacity
+ via MMAP(2) similar to a file on a DAX-enabled filesystem.
+ However, the DAX I/O-path disables some standard I/O-statistics,
+ and the MMAP(2) path has some functional differences due to
+ bypassing the page cache. The choice here can be overridden at
+ run time via the BLKDAXSET ioctl. If you are unsure if the DAX
+ behavior is compatible with your environment, say N. Otherwise
+ DAX is a significantly faster way to access persistent memory
+ from NVDIMM devices.
+
menu "Partition Types"
source "block/partitions/Kconfig"
@@ -296,7 +296,7 @@ static inline int is_unrecognized_ioctl(int ret)
}
#ifdef CONFIG_FS_DAX
-static bool blkdev_dax_capable(struct block_device *bdev)
+bool blkdev_dax_capable(struct block_device *bdev)
{
struct gendisk *disk = bdev->bd_disk;
@@ -1185,7 +1185,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
bdev->bd_disk = disk;
bdev->bd_queue = disk->queue;
bdev->bd_contains = bdev;
- bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0;
+ if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access)
+ bdev->bd_inode->i_flags = S_DAX;
if (!partno) {
ret = -ENXIO;
bdev->bd_part = disk_get_part(disk, partno);
@@ -1212,8 +1213,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
}
}
- if (!ret)
+ if (!ret) {
bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
+ if (!blkdev_dax_capable(bdev))
+ bdev->bd_inode->i_flags &= ~S_DAX;
+ }
/*
* If the device is invalidated, rescan partition
@@ -1227,6 +1231,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
else if (ret == -ENOMEDIUM)
invalidate_partitions(disk, bdev);
}
+
if (ret)
goto out_clear;
} else {
@@ -1247,12 +1252,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
goto out_clear;
}
bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
- /*
- * If the partition is not aligned on a page
- * boundary, we can't do dax I/O to it.
- */
- if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) ||
- (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
+ if (!blkdev_dax_capable(bdev))
bdev->bd_inode->i_flags &= ~S_DAX;
}
} else {
@@ -2282,6 +2282,14 @@ extern struct super_block *freeze_bdev(struct block_device *);
extern void emergency_thaw_all(void);
extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
extern int fsync_bdev(struct block_device *);
+#ifdef CONFIG_FS_DAX
+extern bool blkdev_dax_capable(struct block_device *bdev);
+#else
+static inline bool blkdev_dax_capable(struct block_device *bdev)
+{
+ return false;
+}
+#endif
extern struct super_block *blockdev_superblock;
Now that we have the ability to dynamically enable/disable DAX for a raw block inode, make the default behavior a compile time decision. DAX does not yet have feature parity with pagecache backed mappings, and it may disable statistics that an application depends on, so environments should knowingly enable DAX semantics. Note, that this does not affect the mmap path for filesystems on top of a DAX capable block device. They currently open code a check for the ->direct_access() op in the gendisk. That said, DAX support is already opt-in for filesystems via a mount flag. Cc: Dave Chinner <david@fromorbit.com> [dgc: leave the dax_do_io() path alone, let it honor S_DAX] Signed-off-by: Dan Williams <dan.j.williams@intel.com> --- block/Kconfig | 15 +++++++++++++++ block/ioctl.c | 2 +- fs/block_dev.c | 16 ++++++++-------- include/linux/fs.h | 8 ++++++++ 4 files changed, 32 insertions(+), 9 deletions(-)