@@ -101,6 +101,9 @@ config XFS_LIVE_HOOKS
bool
select JUMP_LABEL if HAVE_ARCH_JUMP_LABEL
+config XFS_IN_MEMORY_FILE
+ bool
+
config XFS_ONLINE_SCRUB
bool "XFS online metadata check support"
default n
@@ -108,6 +111,7 @@ config XFS_ONLINE_SCRUB
depends on TMPFS && SHMEM
select XFS_LIVE_HOOKS
select XFS_DRAIN_INTENTS
+ select XFS_IN_MEMORY_FILE
help
If you say Y here you will be able to check metadata on a
mounted XFS filesystem. This feature is intended to reduce
@@ -6,6 +6,8 @@
#ifndef __XFS_SCRUB_XFILE_H__
#define __XFS_SCRUB_XFILE_H__
+#ifdef CONFIG_XFS_IN_MEMORY_FILE
+
struct xfile_page {
struct page *page;
void *fsdata;
@@ -76,5 +78,18 @@ int xfile_get_page(struct xfile *xf, loff_t offset, unsigned int len,
int xfile_put_page(struct xfile *xf, struct xfile_page *xbuf);
int xfile_dump(struct xfile *xf);
+#else
+static inline int
+xfile_obj_load(struct xfile *xf, void *buf, size_t count, loff_t offset)
+{
+ return -EIO;
+}
+
+static inline int
+xfile_obj_store(struct xfile *xf, const void *buf, size_t count, loff_t offset)
+{
+ return -EIO;
+}
+#endif /* CONFIG_XFS_IN_MEMORY_FILE */
#endif /* __XFS_SCRUB_XFILE_H__ */
@@ -562,7 +562,10 @@ xfs_iomap_swapfile_activate(
struct file *swap_file,
sector_t *span)
{
- sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
+ struct xfs_inode *ip = XFS_I(file_inode(swap_file));
+ struct xfs_buftarg *btp = xfs_inode_buftarg(ip);
+
+ sis->bdev = xfs_buftarg_bdev(btp);
return iomap_swapfile_activate(sis, swap_file, span,
&xfs_read_iomap_ops);
}
@@ -62,10 +62,10 @@ xfs_zero_extent(
xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb);
sector_t block = XFS_BB_TO_FSBT(mp, sector);
- return blkdev_issue_zeroout(target->bt_bdev,
- block << (mp->m_super->s_blocksize_bits - 9),
- count_fsb << (mp->m_super->s_blocksize_bits - 9),
- GFP_NOFS, 0);
+ return xfs_buftarg_zeroout(target,
+ block << (mp->m_super->s_blocksize_bits - 9),
+ count_fsb << (mp->m_super->s_blocksize_bits - 9),
+ GFP_NOFS, 0);
}
#ifdef CONFIG_XFS_RT
@@ -21,6 +21,7 @@
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_ag.h"
+#include "scrub/xfile.h"
struct kmem_cache *xfs_buf_cache;
@@ -1554,6 +1555,36 @@ xfs_buf_ioapply_map(
}
+static inline void
+xfs_buf_ioapply_in_memory(
+ struct xfs_buf *bp)
+{
+ struct xfile *xfile = bp->b_target->bt_xfile;
+ loff_t pos = BBTOB(xfs_buf_daddr(bp));
+ size_t size = BBTOB(bp->b_length);
+ int error;
+
+ atomic_inc(&bp->b_io_remaining);
+
+ if (bp->b_map_count > 1) {
+ /* We don't need or support multi-map buffers. */
+ ASSERT(0);
+ error = -EIO;
+ } else if (bp->b_flags & XBF_WRITE) {
+ error = xfile_obj_store(xfile, bp->b_addr, size, pos);
+ } else {
+ error = xfile_obj_load(xfile, bp->b_addr, size, pos);
+ }
+ if (error)
+ cmpxchg(&bp->b_io_error, 0, error);
+
+ if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
+ invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
+
+ if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
+ xfs_buf_ioend(bp);
+}
+
STATIC void
_xfs_buf_ioapply(
struct xfs_buf *bp)
@@ -1611,6 +1642,11 @@ _xfs_buf_ioapply(
/* we only use the buffer cache for meta-data */
op |= REQ_META;
+ if (bp->b_target->bt_flags & XFS_BUFTARG_IN_MEMORY) {
+ xfs_buf_ioapply_in_memory(bp);
+ return;
+ }
+
/*
* Walk all the vectors issuing IO on them. Set up the initial offset
* into the buffer and the desired IO size before we start -
@@ -1978,9 +2014,11 @@ xfs_free_buftarg(
if (btp->bt_flags & XFS_BUFTARG_SELF_CACHED)
rhashtable_destroy(&btp->bt_bufhash);
- blkdev_issue_flush(btp->bt_bdev);
- invalidate_bdev(btp->bt_bdev);
- fs_put_dax(btp->bt_daxdev, btp->bt_mount);
+ if (!(btp->bt_flags & XFS_BUFTARG_IN_MEMORY)) {
+ blkdev_issue_flush(btp->bt_bdev);
+ invalidate_bdev(btp->bt_bdev);
+ fs_put_dax(btp->bt_daxdev, btp->bt_mount);
+ }
kmem_free(btp);
}
@@ -2024,12 +2062,13 @@ xfs_setsize_buftarg_early(
static struct xfs_buftarg *
__xfs_alloc_buftarg(
struct xfs_mount *mp,
- unsigned int flags)
+ unsigned int flags,
+ xfs_km_flags_t km_flags)
{
struct xfs_buftarg *btp;
int error;
- btp = kmem_zalloc(sizeof(*btp), KM_NOFS);
+ btp = kmem_zalloc(sizeof(*btp), KM_NOFS | km_flags);
if (!btp)
return NULL;
@@ -2090,7 +2129,7 @@ xfs_alloc_buftarg(
ops = &xfs_dax_holder_operations;
#endif
- btp = __xfs_alloc_buftarg(mp, 0);
+ btp = __xfs_alloc_buftarg(mp, 0, 0);
if (!btp)
return NULL;
@@ -2109,6 +2148,35 @@ xfs_alloc_buftarg(
return NULL;
}
+#ifdef CONFIG_XFS_IN_MEMORY_FILE
+/* Allocate a buffer cache target for a memory-backed file. */
+int
+xfs_alloc_memory_buftarg(
+ struct xfs_mount *mp,
+ struct xfile *xfile,
+ struct xfs_buftarg **btpp)
+{
+ struct xfs_buftarg *btp;
+
+ btp = __xfs_alloc_buftarg(mp,
+ XFS_BUFTARG_SELF_CACHED | XFS_BUFTARG_IN_MEMORY,
+ KM_MAYFAIL);
+ if (!btp)
+ return -ENOMEM;
+
+ btp->bt_xfile = xfile;
+ btp->bt_dev = (dev_t)-1U;
+
+ btp->bt_meta_sectorsize = SECTOR_SIZE;
+ btp->bt_meta_sectormask = SECTOR_SIZE - 1;
+ btp->bt_logical_sectorsize = SECTOR_SIZE;
+ btp->bt_logical_sectormask = SECTOR_SIZE - 1;
+
+ *btpp = btp;
+ return 0;
+}
+#endif /* CONFIG_XFS_IN_MEMORY_FILE */
+
/*
* Cancel a delayed write list.
*
@@ -21,6 +21,7 @@ extern struct kmem_cache *xfs_buf_cache;
* Base types
*/
struct xfs_buf;
+struct xfile;
#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
@@ -99,7 +100,10 @@ typedef unsigned int xfs_buf_flags_t;
*/
typedef struct xfs_buftarg {
dev_t bt_dev;
- struct block_device *bt_bdev;
+ union {
+ struct block_device *bt_bdev;
+ struct xfile *bt_xfile;
+ };
struct dax_device *bt_daxdev;
u64 bt_dax_part_off;
struct xfs_mount *bt_mount;
@@ -124,6 +128,20 @@ typedef struct xfs_buftarg {
/* the xfs_buftarg indexes buffers via bt_buf_hash */
#define XFS_BUFTARG_SELF_CACHED (1U << 0)
+/* in-memory buftarg via bt_xfile */
+#ifdef CONFIG_XFS_IN_MEMORY_FILE
+# define XFS_BUFTARG_IN_MEMORY (1U << 1)
+#else
+# define XFS_BUFTARG_IN_MEMORY (0)
+#endif
+
+static inline bool
+xfs_buftarg_in_memory(
+ struct xfs_buftarg *btp)
+{
+ return btp->bt_flags & XFS_BUFTARG_IN_MEMORY;
+}
+
#define XB_PAGES 2
struct xfs_buf_map {
@@ -372,13 +390,60 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
*/
struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *mp,
struct block_device *bdev);
+#ifdef CONFIG_XFS_IN_MEMORY_FILE
+int xfs_alloc_memory_buftarg(struct xfs_mount *mp, struct xfile *xfile,
+ struct xfs_buftarg **btpp);
+#endif
extern void xfs_free_buftarg(struct xfs_buftarg *);
extern void xfs_buftarg_wait(struct xfs_buftarg *);
extern void xfs_buftarg_drain(struct xfs_buftarg *);
extern int xfs_setsize_buftarg(struct xfs_buftarg *, unsigned int);
-#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
-#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
+static inline struct block_device *
+xfs_buftarg_bdev(struct xfs_buftarg *btp)
+{
+ if (btp->bt_flags & XFS_BUFTARG_IN_MEMORY)
+ return NULL;
+ return btp->bt_bdev;
+}
+
+static inline unsigned int
+xfs_getsize_buftarg(struct xfs_buftarg *btp)
+{
+ if (btp->bt_flags & XFS_BUFTARG_IN_MEMORY)
+ return SECTOR_SIZE;
+ return block_size(btp->bt_bdev);
+}
+
+static inline bool
+xfs_readonly_buftarg(struct xfs_buftarg *btp)
+{
+ if (btp->bt_flags & XFS_BUFTARG_IN_MEMORY)
+ return false;
+ return bdev_read_only(btp->bt_bdev);
+}
+
+static inline int
+xfs_buftarg_flush(struct xfs_buftarg *btp)
+{
+ if (btp->bt_flags & XFS_BUFTARG_IN_MEMORY)
+ return 0;
+ return blkdev_issue_flush(btp->bt_bdev);
+}
+
+static inline int
+xfs_buftarg_zeroout(
+ struct xfs_buftarg *btp,
+ sector_t sector,
+ sector_t nr_sects,
+ gfp_t gfp_mask,
+ unsigned flags)
+{
+ if (btp->bt_flags & XFS_BUFTARG_IN_MEMORY)
+ return -EOPNOTSUPP;
+ return blkdev_issue_zeroout(btp->bt_bdev, sector, nr_sects, gfp_mask,
+ flags);
+}
int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic);
@@ -29,7 +29,7 @@ xfs_trim_extents(
xfs_daddr_t minlen,
uint64_t *blocks_trimmed)
{
- struct block_device *bdev = mp->m_ddev_targp->bt_bdev;
+ struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp);
struct xfs_btree_cur *cur;
struct xfs_buf *agbp;
struct xfs_agf *agf;
@@ -154,8 +154,8 @@ xfs_ioc_trim(
struct xfs_mount *mp,
struct fstrim_range __user *urange)
{
- unsigned int granularity =
- bdev_discard_granularity(mp->m_ddev_targp->bt_bdev);
+ struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp);
+ unsigned int granularity = bdev_discard_granularity(bdev);
struct fstrim_range range;
xfs_daddr_t start, end, minlen;
xfs_agnumber_t start_agno, end_agno, agno;
@@ -164,7 +164,7 @@ xfs_ioc_trim(
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!bdev_max_discard_sectors(mp->m_ddev_targp->bt_bdev))
+ if (!bdev_max_discard_sectors(bdev))
return -EOPNOTSUPP;
/*
@@ -164,9 +164,9 @@ xfs_file_fsync(
* inode size in case of an extending write.
*/
if (XFS_IS_REALTIME_INODE(ip))
- error = blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
+ error = xfs_buftarg_flush(mp->m_rtdev_targp);
else if (mp->m_logdev_targp != mp->m_ddev_targp)
- error = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
+ error = xfs_buftarg_flush(mp->m_ddev_targp);
/*
* Any inode that has dirty modifications in the log is pinned. The
@@ -189,7 +189,7 @@ xfs_file_fsync(
*/
if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) &&
mp->m_logdev_targp == mp->m_ddev_targp) {
- err2 = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
+ err2 = xfs_buftarg_flush(mp->m_ddev_targp);
if (err2 && !error)
error = err2;
}
@@ -1762,6 +1762,7 @@ xfs_ioc_setlabel(
char __user *newlabel)
{
struct xfs_sb *sbp = &mp->m_sb;
+ struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp);
char label[XFSLABEL_MAX + 1];
size_t len;
int error;
@@ -1808,7 +1809,7 @@ xfs_ioc_setlabel(
error = xfs_update_secondary_sbs(mp);
mutex_unlock(&mp->m_growlock);
- invalidate_bdev(mp->m_ddev_targp->bt_bdev);
+ invalidate_bdev(bdev);
out:
mnt_drop_write_file(filp);
@@ -129,7 +129,7 @@ xfs_bmbt_to_iomap(
if (mapping_flags & IOMAP_DAX)
iomap->dax_dev = target->bt_daxdev;
else
- iomap->bdev = target->bt_bdev;
+ iomap->bdev = xfs_buftarg_bdev(target);
iomap->flags = iomap_flags;
if (xfs_ipincount(ip) &&
@@ -154,7 +154,7 @@ xfs_hole_to_iomap(
iomap->type = IOMAP_HOLE;
iomap->offset = XFS_FSB_TO_B(ip->i_mount, offset_fsb);
iomap->length = XFS_FSB_TO_B(ip->i_mount, end_fsb - offset_fsb);
- iomap->bdev = target->bt_bdev;
+ iomap->bdev = xfs_buftarg_bdev(target);
iomap->dax_dev = target->bt_daxdev;
}
@@ -1938,7 +1938,7 @@ xlog_write_iclog(
* writeback throttle from throttling log writes behind background
* metadata writeback and causing priority inversions.
*/
- bio_init(&iclog->ic_bio, log->l_targ->bt_bdev, iclog->ic_bvec,
+ bio_init(&iclog->ic_bio, xfs_buftarg_bdev(log->l_targ), iclog->ic_bvec,
howmany(count, PAGE_SIZE),
REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE);
iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
@@ -1959,7 +1959,7 @@ xlog_write_iclog(
* avoid shutdown re-entering this path and erroring out again.
*/
if (log->l_targ != log->l_mp->m_ddev_targp &&
- blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) {
+ xfs_buftarg_flush(log->l_mp->m_ddev_targp)) {
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
return;
}
@@ -742,7 +742,8 @@ xlog_discard_busy_extents(
trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
busyp->length);
- error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
+ error = __blkdev_issue_discard(
+ xfs_buftarg_bdev(mp->m_ddev_targp),
XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
XFS_FSB_TO_BB(mp, busyp->length),
GFP_NOFS, &bio);
@@ -137,7 +137,8 @@ xlog_do_io(
nbblks = round_up(nbblks, log->l_sectBBsize);
ASSERT(nbblks > 0);
- error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no,
+ error = xfs_rw_bdev(xfs_buftarg_bdev(log->l_targ),
+ log->l_logBBstart + blk_no,
BBTOB(nbblks), data, op);
if (error && !xlog_is_shutdown(log)) {
xfs_alert(log->l_mp,
@@ -397,13 +397,13 @@ xfs_close_devices(
struct xfs_mount *mp)
{
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
- struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
+ struct block_device *logdev = xfs_buftarg_bdev(mp->m_logdev_targp);
xfs_free_buftarg(mp->m_logdev_targp);
xfs_blkdev_put(logdev);
}
if (mp->m_rtdev_targp) {
- struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
+ struct block_device *rtdev = xfs_buftarg_bdev(mp->m_rtdev_targp);
xfs_free_buftarg(mp->m_rtdev_targp);
xfs_blkdev_put(rtdev);