@@ -109,6 +109,7 @@ xfs-y += xfs_log.o \
xfs_inode_item.o \
xfs_refcount_item.o \
xfs_rmap_item.o \
+ xfs_swapext_item.o \
xfs_log_recover.o \
xfs_trans_ail.o \
xfs_trans_buf.o
@@ -117,7 +117,9 @@ struct xfs_unmount_log_format {
#define XLOG_REG_TYPE_CUD_FORMAT 24
#define XLOG_REG_TYPE_BUI_FORMAT 25
#define XLOG_REG_TYPE_BUD_FORMAT 26
-#define XLOG_REG_TYPE_MAX 26
+#define XLOG_REG_TYPE_SXI_FORMAT 27
+#define XLOG_REG_TYPE_SXD_FORMAT 28
+#define XLOG_REG_TYPE_MAX 28
/*
* Flags to log operation header
@@ -240,6 +242,8 @@ typedef struct xfs_trans_header {
#define XFS_LI_CUD 0x1243
#define XFS_LI_BUI 0x1244 /* bmbt update intent */
#define XFS_LI_BUD 0x1245
+#define XFS_LI_SXI 0x1246
+#define XFS_LI_SXD 0x1247
#define XFS_LI_TYPE_DESC \
{ XFS_LI_EFI, "XFS_LI_EFI" }, \
@@ -255,7 +259,9 @@ typedef struct xfs_trans_header {
{ XFS_LI_CUI, "XFS_LI_CUI" }, \
{ XFS_LI_CUD, "XFS_LI_CUD" }, \
{ XFS_LI_BUI, "XFS_LI_BUI" }, \
- { XFS_LI_BUD, "XFS_LI_BUD" }
+ { XFS_LI_BUD, "XFS_LI_BUD" }, \
+ { XFS_LI_SXI, "XFS_LI_SXI" }, \
+ { XFS_LI_SXD, "XFS_LI_SXD" }
/*
* Inode Log Item Format definitions.
@@ -786,6 +792,51 @@ struct xfs_bud_log_format {
uint64_t bud_bui_id; /* id of corresponding bui */
};
+/*
+ * SXI/SXD (extent swapping) log format definitions
+ */
+
+struct xfs_swap_extent {
+ uint64_t se_inode1;
+ uint64_t se_inode2;
+ uint64_t se_startoff1;
+ uint64_t se_startoff2;
+ uint64_t se_blockcount;
+ uint64_t se_flags;
+ int64_t se_isize1;
+ int64_t se_isize2;
+};
+
+/* Swap extents between extended attribute forks. */
+#define XFS_SWAP_EXTENT_ATTR_FORK (1ULL << 0)
+
+/* Set the file sizes when finished. */
+#define XFS_SWAP_EXTENT_SET_SIZES (1ULL << 1)
+
+#define XFS_SWAP_EXTENT_FLAGS (XFS_SWAP_EXTENT_ATTR_FORK | \
+ XFS_SWAP_EXTENT_SET_SIZES)
+
+/* This is the structure used to lay out an sxi log item in the log. */
+struct xfs_sxi_log_format {
+ uint16_t sxi_type; /* sxi log item type */
+ uint16_t sxi_size; /* size of this item */
+ uint32_t __pad; /* must be zero */
+ uint64_t sxi_id; /* sxi identifier */
+ struct xfs_swap_extent sxi_extent; /* extent to swap */
+};
+
+/*
+ * This is the structure used to lay out an sxd log item in the
+ * log. The sxd_extents array is a variable size array whose
+ * size is given by sxd_nextents;
+ */
+struct xfs_sxd_log_format {
+ uint16_t sxd_type; /* sxd log item type */
+ uint16_t sxd_size; /* size of this item */
+ uint32_t __pad;
+ uint64_t sxd_sxi_id; /* id of corresponding bui */
+};
+
/*
* Dquot Log format definitions.
*
@@ -169,6 +169,7 @@ extern const struct xlog_recover_intent_type xlog_recover_extfree_type;
extern const struct xlog_recover_intent_type xlog_recover_rmap_type;
extern const struct xlog_recover_intent_type xlog_recover_refcount_type;
extern const struct xlog_recover_intent_type xlog_recover_bmap_type;
+extern const struct xlog_recover_intent_type xlog_recover_swapext_type;
typedef bool (*xlog_recover_release_intent_fn)(struct xlog *log,
struct xfs_log_item *item, uint64_t intent_id);
@@ -1975,6 +1975,8 @@ xlog_print_tic_res(
REG_TYPE_STR(CUD_FORMAT, "cud_format"),
REG_TYPE_STR(BUI_FORMAT, "bui_format"),
REG_TYPE_STR(BUD_FORMAT, "bud_format"),
+ REG_TYPE_STR(SXI_FORMAT, "sxi_format"),
+ REG_TYPE_STR(SXD_FORMAT, "sxd_format"),
};
BUILD_BUG_ON(ARRAY_SIZE(res_type_str) != XLOG_REG_TYPE_MAX + 1);
#undef REG_TYPE_STR
@@ -1851,6 +1851,9 @@ xlog_intent_for_type(
case XFS_LI_BUI:
case XFS_LI_BUD:
return &xlog_recover_bmap_type;
+ case XFS_LI_SXI:
+ case XFS_LI_SXD:
+ return &xlog_recover_swapext_type;
default:
return NULL;
}
@@ -1865,6 +1868,7 @@ xlog_is_intent_done_item(
case XFS_LI_RUD:
case XFS_LI_CUD:
case XFS_LI_BUD:
+ case XFS_LI_SXD:
return true;
default:
return false;
@@ -1917,6 +1921,8 @@ xlog_item_for_type(
case XFS_LI_CUD:
case XFS_LI_BUI:
case XFS_LI_BUD:
+ case XFS_LI_SXI:
+ case XFS_LI_SXD:
return &xlog_intent_item_type;
case XFS_LI_INODE:
return &xlog_inode_item_type;
@@ -35,6 +35,7 @@
#include "xfs_refcount_item.h"
#include "xfs_bmap_item.h"
#include "xfs_reflink.h"
+#include "xfs_swapext_item.h"
#include <linux/magic.h>
#include <linux/fs_context.h>
@@ -2075,8 +2076,24 @@ xfs_init_zones(void)
if (!xfs_bui_zone)
goto out_destroy_bud_zone;
+ xfs_sxd_zone = kmem_cache_create("xfs_sxd_item",
+ sizeof(struct xfs_sxd_log_item),
+ 0, 0, NULL);
+ if (!xfs_sxd_zone)
+ goto out_destroy_bui_zone;
+
+ xfs_sxi_zone = kmem_cache_create("xfs_sxi_item",
+ sizeof(struct xfs_sxi_log_item),
+ 0, 0, NULL);
+ if (!xfs_sxi_zone)
+ goto out_destroy_sxd_zone;
+
return 0;
+ out_destroy_sxd_zone:
+ kmem_cache_destroy(xfs_sxd_zone);
+ out_destroy_bui_zone:
+ kmem_cache_destroy(xfs_bui_zone);
out_destroy_bud_zone:
kmem_cache_destroy(xfs_bud_zone);
out_destroy_cui_zone:
new file mode 100644
@@ -0,0 +1,365 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_shared.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
+#include "xfs_swapext_item.h"
+#include "xfs_log.h"
+#include "xfs_bmap.h"
+#include "xfs_icache.h"
+#include "xfs_trans_space.h"
+#include "xfs_error.h"
+#include "xfs_log_priv.h"
+#include "xfs_log_recover.h"
+
+kmem_zone_t *xfs_sxi_zone;
+kmem_zone_t *xfs_sxd_zone;
+
+static inline struct xfs_sxi_log_item *SXI_ITEM(struct xfs_log_item *lip)
+{
+ return container_of(lip, struct xfs_sxi_log_item, sxi_item);
+}
+
+STATIC void
+xfs_sxi_item_free(
+ struct xfs_sxi_log_item *ilip)
+{
+ kmem_cache_free(xfs_sxi_zone, ilip);
+}
+
+/*
+ * Freeing the SXI requires that we remove it from the AIL if it has already
+ * been placed there. However, the SXI may not yet have been placed in the AIL
+ * when called by xfs_sxi_release() from SXD processing due to the ordering of
+ * committed vs unpin operations in bulk insert operations. Hence the reference
+ * count to ensure only the last caller frees the SXI.
+ */
+STATIC void
+xfs_sxi_release(
+ struct xfs_sxi_log_item *ilip)
+{
+ ASSERT(atomic_read(&ilip->sxi_refcount) > 0);
+ if (atomic_dec_and_test(&ilip->sxi_refcount)) {
+ xfs_trans_ail_remove(&ilip->sxi_item, SHUTDOWN_LOG_IO_ERROR);
+ xfs_sxi_item_free(ilip);
+ }
+}
+
+
+STATIC void
+xfs_sxi_item_size(
+ struct xfs_log_item *lip,
+ int *nvecs,
+ int *nbytes)
+{
+ *nvecs += 1;
+ *nbytes += sizeof(struct xfs_sxi_log_format);
+}
+
+/*
+ * This is called to fill in the vector of log iovecs for the
+ * given sxi log item. We use only 1 iovec, and we point that
+ * at the sxi_log_format structure embedded in the sxi item.
+ * It is at this point that we assert that all of the extent
+ * slots in the sxi item have been filled.
+ */
+STATIC void
+xfs_sxi_item_format(
+ struct xfs_log_item *lip,
+ struct xfs_log_vec *lv)
+{
+ struct xfs_sxi_log_item *ilip = SXI_ITEM(lip);
+ struct xfs_log_iovec *vecp = NULL;
+
+ ilip->sxi_format.sxi_type = XFS_LI_SXI;
+ ilip->sxi_format.sxi_size = 1;
+
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_SXI_FORMAT, &ilip->sxi_format,
+ sizeof(struct xfs_sxi_log_format));
+}
+
+/*
+ * The unpin operation is the last place an SXI is manipulated in the log. It is
+ * either inserted in the AIL or aborted in the event of a log I/O error. In
+ * either case, the SXI transaction has been successfully committed to make it
+ * this far. Therefore, we expect whoever committed the SXI to either construct
+ * and commit the SXD or drop the SXD's reference in the event of error. Simply
+ * drop the log's SXI reference now that the log is done with it.
+ */
+STATIC void
+xfs_sxi_item_unpin(
+ struct xfs_log_item *lip,
+ int remove)
+{
+ struct xfs_sxi_log_item *ilip = SXI_ITEM(lip);
+
+ xfs_sxi_release(ilip);
+}
+
+/*
+ * The SXI has been either committed or aborted if the transaction has been
+ * cancelled. If the transaction was cancelled, an SXD isn't going to be
+ * constructed and thus we free the SXI here directly.
+ */
+STATIC void
+xfs_sxi_item_release(
+ struct xfs_log_item *lip)
+{
+ xfs_sxi_release(SXI_ITEM(lip));
+}
+
+static const struct xfs_item_ops xfs_sxi_item_ops = {
+ .iop_size = xfs_sxi_item_size,
+ .iop_format = xfs_sxi_item_format,
+ .iop_unpin = xfs_sxi_item_unpin,
+ .iop_release = xfs_sxi_item_release,
+};
+
+/*
+ * Allocate and initialize an sxi item with the given number of extents.
+ */
+STATIC struct xfs_sxi_log_item *
+xfs_sxi_init(
+ struct xfs_mount *mp)
+
+{
+ struct xfs_sxi_log_item *ilip;
+
+ ilip = kmem_zone_zalloc(xfs_sxi_zone, 0);
+
+ xfs_log_item_init(mp, &ilip->sxi_item, XFS_LI_SXI, &xfs_sxi_item_ops);
+ ilip->sxi_format.sxi_id = (uintptr_t)(void *)ilip;
+ atomic_set(&ilip->sxi_refcount, 2);
+
+ return ilip;
+}
+
+static inline struct xfs_sxd_log_item *SXD_ITEM(struct xfs_log_item *lip)
+{
+ return container_of(lip, struct xfs_sxd_log_item, sxd_item);
+}
+
+STATIC void
+xfs_sxd_item_size(
+ struct xfs_log_item *lip,
+ int *nvecs,
+ int *nbytes)
+{
+ *nvecs += 1;
+ *nbytes += sizeof(struct xfs_sxd_log_format);
+}
+
+/*
+ * This is called to fill in the vector of log iovecs for the
+ * given sxd log item. We use only 1 iovec, and we point that
+ * at the sxd_log_format structure embedded in the sxd item.
+ * It is at this point that we assert that all of the extent
+ * slots in the sxd item have been filled.
+ */
+STATIC void
+xfs_sxd_item_format(
+ struct xfs_log_item *lip,
+ struct xfs_log_vec *lv)
+{
+ struct xfs_sxd_log_item *dlip = SXD_ITEM(lip);
+ struct xfs_log_iovec *vecp = NULL;
+
+ dlip->sxd_format.sxd_type = XFS_LI_SXD;
+ dlip->sxd_format.sxd_size = 1;
+
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_SXD_FORMAT, &dlip->sxd_format,
+ sizeof(struct xfs_sxd_log_format));
+}
+
+/*
+ * The SXD is either committed or aborted if the transaction is cancelled. If
+ * the transaction is cancelled, drop our reference to the SXI and free the
+ * SXD.
+ */
+STATIC void
+xfs_sxd_item_release(
+ struct xfs_log_item *lip)
+{
+ struct xfs_sxd_log_item *dlip = SXD_ITEM(lip);
+
+ xfs_sxi_release(dlip->sxd_intent_log_item);
+ kmem_cache_free(xfs_sxd_zone, dlip);
+}
+
+static const struct xfs_item_ops xfs_sxd_item_ops = {
+ .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
+ .iop_size = xfs_sxd_item_size,
+ .iop_format = xfs_sxd_item_format,
+ .iop_release = xfs_sxd_item_release,
+};
+
+/*
+ * Process a swapext update intent item that was recovered from the log.
+ * We need to update some inode's bmbt.
+ */
+STATIC int
+xfs_sxi_recover(
+ struct xfs_mount *mp,
+ struct xfs_defer_freezer **dffp,
+ struct xfs_sxi_log_item *ilip)
+{
+ return -EFSCORRUPTED;
+}
+
+/*
+ * Copy an SXI format buffer from the given buf, and into the destination
+ * SXI format structure. The SXI/SXD items were designed not to need any
+ * special alignment handling.
+ */
+static int
+xfs_sxi_copy_format(
+ struct xfs_log_iovec *buf,
+ struct xfs_sxi_log_format *dst_sxi_fmt)
+{
+ struct xfs_sxi_log_format *src_sxi_fmt;
+ size_t len;
+
+ src_sxi_fmt = buf->i_addr;
+ len = sizeof(struct xfs_sxi_log_format);
+
+ if (buf->i_len == len) {
+ memcpy(dst_sxi_fmt, src_sxi_fmt, len);
+ return 0;
+ }
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
+ return -EFSCORRUPTED;
+}
+
+/*
+ * This routine is called to create an in-core extent swapext update
+ * item from the sxi format structure which was logged on disk.
+ * It allocates an in-core sxi, copies the extents from the format
+ * structure into it, and adds the sxi to the AIL with the given
+ * LSN.
+ */
+STATIC int
+xlog_recover_sxi(
+ struct xlog *log,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ int error;
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_sxi_log_item *ilip;
+ struct xfs_sxi_log_format *sxi_formatp;
+
+ sxi_formatp = item->ri_buf[0].i_addr;
+
+ if (sxi_formatp->__pad != 0) {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+ return -EFSCORRUPTED;
+ }
+ ilip = xfs_sxi_init(mp);
+ error = xfs_sxi_copy_format(&item->ri_buf[0], &ilip->sxi_format);
+ if (error) {
+ xfs_sxi_item_free(ilip);
+ return error;
+ }
+ xlog_recover_insert_ail(log, &ilip->sxi_item, lsn);
+ xfs_sxi_release(ilip);
+ return 0;
+}
+
+STATIC bool
+xlog_release_sxi(
+ struct xlog *log,
+ struct xfs_log_item *lip,
+ uint64_t intent_id)
+{
+ struct xfs_sxi_log_item *ilip = SXI_ITEM(lip);
+ struct xfs_ail *ailp = log->l_ailp;
+
+ if (ilip->sxi_format.sxi_id == intent_id) {
+ /*
+ * Drop the SXD reference to the SXI. This
+ * removes the SXI from the AIL and frees it.
+ */
+ spin_unlock(&ailp->ail_lock);
+ xfs_sxi_release(ilip);
+ spin_lock(&ailp->ail_lock);
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * This routine is called when an SXD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding SXI if it
+ * was still in the log. To do this it searches the AIL for the SXI with an id
+ * equal to that in the SXD format structure. If we find it we drop the SXD
+ * reference, which removes the SXI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_sxd(
+ struct xlog *log,
+ struct xlog_recover_item *item)
+{
+ struct xfs_sxd_log_format *sxd_formatp;
+
+ sxd_formatp = item->ri_buf[0].i_addr;
+ if (item->ri_buf[0].i_len != sizeof(struct xfs_sxd_log_format)) {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
+ return -EFSCORRUPTED;
+ }
+
+ xlog_recover_release_intent(log, XFS_LI_SXI, sxd_formatp->sxd_sxi_id,
+ xlog_release_sxi);
+ return 0;
+}
+
+/* Recover the SXI if necessary. */
+STATIC int
+xlog_recover_process_sxi(
+ struct xlog *log,
+ struct xfs_defer_freezer **dffp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_ail *ailp = log->l_ailp;
+ struct xfs_sxi_log_item *ilip = SXI_ITEM(lip);
+ int error;
+
+ /*
+ * Skip SXIs that we've already processed.
+ */
+ if (test_bit(XFS_SXI_RECOVERED, &ilip->sxi_flags))
+ return 0;
+
+ spin_unlock(&ailp->ail_lock);
+ error = xfs_sxi_recover(log->l_mp, dffp, ilip);
+ spin_lock(&ailp->ail_lock);
+
+ return error;
+}
+
+/* Release the SXI since we're cancelling everything. */
+STATIC void
+xlog_recover_cancel_sxi(
+ struct xfs_log_item *lip)
+{
+ xfs_sxi_release(SXI_ITEM(lip));
+}
+
+const struct xlog_recover_intent_type xlog_recover_swapext_type = {
+ .recover_intent = xlog_recover_sxi,
+ .recover_done = xlog_recover_sxd,
+ .process_intent = xlog_recover_process_sxi,
+ .cancel_intent = xlog_recover_cancel_sxi,
+};
new file mode 100644
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#ifndef __XFS_SWAPEXT_ITEM_H__
+#define __XFS_SWAPEXT_ITEM_H__
+
+/*
+ * The extent swapping intent item help us perform atomic extent swaps between
+ * two inode forks. It does this by tracking the range of logical offsets that
+ * still need to be swapped, and relogs as progress happens.
+ *
+ * *I items should be recorded in the *first* of a series of rolled
+ * transactions, and the *D items should be recorded in the same transaction
+ * that records the associated bmbt updates.
+ *
+ * Should the system crash after the commit of the first transaction but
+ * before the commit of the final transaction in a series, log recovery will
+ * use the redo information recorded by the intent items to replay the
+ * rest of the extent swaps.
+ */
+
+/* kernel only SXI/SXD definitions */
+
+struct xfs_mount;
+struct kmem_zone;
+
+/*
+ * Max number of extents in fast allocation path.
+ */
+#define XFS_SXI_MAX_FAST_EXTENTS 1
+
+/*
+ * Define SXI flag bits. Manipulated by set/clear/test_bit operators.
+ */
+#define XFS_SXI_RECOVERED 1
+
+/*
+ * This is the "swapext update intent" log item. It is used to log the fact
+ * that we are swapping extents between two files. It is used in conjunction
+ * with the "swapext update done" log item described below.
+ *
+ * These log items follow the same rules as struct xfs_efi_log_item; see the
+ * comments about that structure (in xfs_extfree_item.h) for more details.
+ */
+struct xfs_sxi_log_item {
+ struct xfs_log_item sxi_item;
+ atomic_t sxi_refcount;
+ unsigned long sxi_flags;
+ struct xfs_sxi_log_format sxi_format;
+};
+
+/*
+ * This is the "swapext update done" log item. It is used to log the fact that
+ * some extent swapping mentioned in an earlier sxi item have been performed.
+ */
+struct xfs_sxd_log_item {
+ struct xfs_log_item sxd_item;
+ struct xfs_sxi_log_item *sxd_intent_log_item;
+ struct xfs_sxd_log_format sxd_format;
+};
+
+extern struct kmem_zone *xfs_sxi_zone;
+extern struct kmem_zone *xfs_sxd_zone;
+
+#endif /* __XFS_SWAPEXT_ITEM_H__ */