@@ -327,6 +327,13 @@ struct xfs_inode_log_format_32 {
*/
#define XFS_ILOG_TIMESTAMP 0x4000
+/*
+ * Similar for this one: it means we increased the inode version, which
+ * when combined with just XFS_ILOG_TIMESTAMP does not require blocking
+ * in fdatasync.
+ */
+#define XFS_ILOG_VERSION 0x8000
+
#define XFS_ILOG_NONCORE (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
@@ -343,8 +350,8 @@ struct xfs_inode_log_format_32 {
XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
XFS_ILOG_DEV | XFS_ILOG_ADATA | \
XFS_ILOG_AEXT | XFS_ILOG_ABROOT | \
- XFS_ILOG_TIMESTAMP | XFS_ILOG_DOWNER | \
- XFS_ILOG_AOWNER)
+ XFS_ILOG_DOWNER | XFS_ILOG_AOWNER | \
+ XFS_ILOG_TIMESTAMP | XFS_ILOG_VERSION)
static inline int xfs_ilog_fbroot(int w)
{
@@ -165,27 +165,19 @@ xfs_file_fsync(
* All metadata updates are logged, which means that we just have to
* flush the log up to the latest LSN that touched the inode. If we have
* concurrent fsync/fdatasync() calls, we need them to all block on the
- * log force before we clear the ili_fsync_fields field. This ensures
- * that we don't get a racing sync operation that does not wait for the
- * metadata to hit the journal before returning. If we race with
- * clearing the ili_fsync_fields, then all that will happen is the log
- * force will do nothing as the lsn will already be on disk. We can't
- * race with setting ili_fsync_fields because that is done under
- * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared
- * until after the ili_fsync_fields is cleared.
+ * log force before returning.
*/
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (xfs_ipincount(ip)) {
- if (!datasync ||
- (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
+ if (datasync)
+ lsn = ip->i_itemp->ili_datasync_lsn;
+ else
lsn = ip->i_itemp->ili_last_lsn;
}
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
- if (lsn) {
+ if (lsn)
error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
- ip->i_itemp->ili_fsync_fields = 0;
- }
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
/*
* If we only have a single device, and the log force about was
@@ -2371,7 +2371,6 @@ xfs_ifree_cluster(
iip->ili_last_fields = iip->ili_fields;
iip->ili_fields = 0;
- iip->ili_fsync_fields = 0;
iip->ili_logged = 1;
xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
&iip->ili_item.li_lsn);
@@ -3606,7 +3605,6 @@ xfs_iflush_int(
*/
iip->ili_last_fields = iip->ili_fields;
iip->ili_fields = 0;
- iip->ili_fsync_fields = 0;
iip->ili_logged = 1;
xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
@@ -441,7 +441,8 @@ xfs_inode_item_format(
}
/* update the format with the exact fields we actually logged */
- ilf->ilf_fields |= (iip->ili_fields & ~XFS_ILOG_TIMESTAMP);
+ ilf->ilf_fields |=
+ (iip->ili_fields & ~(XFS_ILOG_TIMESTAMP | XFS_ILOG_VERSION));
}
/*
@@ -626,6 +627,9 @@ xfs_inode_item_committed(
struct xfs_inode_log_item *iip = INODE_ITEM(lip);
struct xfs_inode *ip = iip->ili_inode;
+ iip->ili_last_lsn = 0;
+ iip->ili_datasync_lsn = 0;
+
if (xfs_iflags_test(ip, XFS_ISTALE)) {
xfs_inode_item_unpin(lip, 0);
return -1;
@@ -638,7 +642,11 @@ xfs_inode_item_committing(
struct xfs_log_item *lip,
xfs_lsn_t lsn)
{
- INODE_ITEM(lip)->ili_last_lsn = lsn;
+ struct xfs_inode_log_item *iip = INODE_ITEM(lip);
+
+ iip->ili_last_lsn = lsn;
+ if (iip->ili_fields & ~(XFS_ILOG_TIMESTAMP | XFS_ILOG_VERSION))
+ iip->ili_datasync_lsn = lsn;
}
/*
@@ -835,7 +843,6 @@ xfs_iflush_abort(
* attempted.
*/
iip->ili_fields = 0;
- iip->ili_fsync_fields = 0;
}
/*
* Release the inode's flush lock since we're done with it.
@@ -30,11 +30,11 @@ typedef struct xfs_inode_log_item {
struct xfs_inode *ili_inode; /* inode ptr */
xfs_lsn_t ili_flush_lsn; /* lsn at last flush */
xfs_lsn_t ili_last_lsn; /* lsn at last transaction */
+ xfs_lsn_t ili_datasync_lsn;
unsigned short ili_lock_flags; /* lock flags */
unsigned short ili_logged; /* flushed logged data */
unsigned int ili_last_fields; /* fields when flushed */
unsigned int ili_fields; /* fields to be logged */
- unsigned int ili_fsync_fields; /* logged since last fsync */
} xfs_inode_log_item_t;
static inline int xfs_inode_clean(xfs_inode_t *ip)
@@ -1090,8 +1090,7 @@ xfs_file_iomap_begin(
trace_xfs_iomap_found(ip, offset, length, 0, &imap);
}
- if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields
- & ~XFS_ILOG_TIMESTAMP))
+ if (xfs_ipincount(ip) && ip->i_itemp->ili_datasync_lsn)
iomap->flags |= IOMAP_F_DIRTY;
xfs_bmbt_to_iomap(ip, iomap, &imap);
@@ -99,15 +99,6 @@ xfs_trans_log_inode(
ASSERT(ip->i_itemp != NULL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- /*
- * Record the specific change for fdatasync optimisation. This
- * allows fdatasync to skip log forces for inodes that are only
- * timestamp dirty. We do this before the change count so that
- * the core being logged in this case does not impact on fdatasync
- * behaviour.
- */
- ip->i_itemp->ili_fsync_fields |= flags;
-
/*
* First time we log the inode in a transaction, bump the inode change
* counter if it is configured for this to occur. We don't use
@@ -118,7 +109,7 @@ xfs_trans_log_inode(
if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) &&
IS_I_VERSION(VFS_I(ip))) {
VFS_I(ip)->i_version++;
- flags |= XFS_ILOG_CORE;
+ flags |= XFS_ILOG_VERSION;
}
tp->t_flags |= XFS_TRANS_DIRTY;