@@ -373,6 +373,7 @@ typedef struct xfs_mount {
#define XFS_FEAT_ZONED (1ULL << 29) /* zoned RT device */
/* Mount features */
+#define XFS_FEAT_NOLIFETIME (1ULL << 47) /* disable lifetime hints */
#define XFS_FEAT_NOATTR2 (1ULL << 48) /* disable attr2 creation */
#define XFS_FEAT_NOALIGN (1ULL << 49) /* ignore alignment */
#define XFS_FEAT_ALLOCSIZE (1ULL << 50) /* user specified allocation size */
@@ -428,6 +429,7 @@ __XFS_HAS_FEAT(large_extent_counts, NREXT64)
__XFS_HAS_FEAT(exchange_range, EXCHANGE_RANGE)
__XFS_HAS_FEAT(metadir, METADIR)
__XFS_HAS_FEAT(zoned, ZONED)
+__XFS_HAS_FEAT(nolifetime, NOLIFETIME)
static inline bool xfs_has_rtgroups(const struct xfs_mount *mp)
{
@@ -111,6 +111,7 @@ enum {
Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
Opt_discard, Opt_nodiscard, Opt_dax, Opt_dax_enum, Opt_max_open_zones,
+ Opt_lifetime, Opt_nolifetime,
};
static const struct fs_parameter_spec xfs_fs_parameters[] = {
@@ -156,6 +157,8 @@ static const struct fs_parameter_spec xfs_fs_parameters[] = {
fsparam_flag("dax", Opt_dax),
fsparam_enum("dax", Opt_dax_enum, dax_param_enums),
fsparam_u32("max_open_zones", Opt_max_open_zones),
+ fsparam_flag("lifetime", Opt_lifetime),
+ fsparam_flag("nolifetime", Opt_nolifetime),
{}
};
@@ -184,6 +187,7 @@ xfs_fs_show_options(
{ XFS_FEAT_LARGE_IOSIZE, ",largeio" },
{ XFS_FEAT_DAX_ALWAYS, ",dax=always" },
{ XFS_FEAT_DAX_NEVER, ",dax=never" },
+ { XFS_FEAT_NOLIFETIME, ",nolifetime" },
{ 0, NULL }
};
struct xfs_mount *mp = XFS_M(root->d_sb);
@@ -1091,6 +1095,11 @@ xfs_finish_flags(
"max_open_zones mount option only supported on zoned file systems.");
return -EINVAL;
}
+ if (mp->m_features & XFS_FEAT_NOLIFETIME) {
+ xfs_warn(mp,
+"nolifetime mount option only supported on zoned file systems.");
+ return -EINVAL;
+ }
}
return 0;
@@ -1478,6 +1487,12 @@ xfs_fs_parse_param(
case Opt_max_open_zones:
parsing_mp->m_max_open_zones = result.uint_32;
return 0;
+ case Opt_lifetime:
+ parsing_mp->m_features &= ~XFS_FEAT_NOLIFETIME;
+ return 0;
+ case Opt_nolifetime:
+ parsing_mp->m_features |= XFS_FEAT_NOLIFETIME;
+ return 0;
default:
xfs_warn(parsing_mp, "unknown mount option [%s].", param->key);
return -EINVAL;
@@ -424,6 +424,7 @@ static struct xfs_open_zone *
xfs_init_open_zone(
struct xfs_rtgroup *rtg,
xfs_rgblock_t write_pointer,
+ enum rw_hint write_hint,
bool is_gc)
{
struct xfs_open_zone *oz;
@@ -434,6 +435,7 @@ xfs_init_open_zone(
oz->oz_rtg = rtg;
oz->oz_write_pointer = write_pointer;
oz->oz_written = write_pointer;
+ oz->oz_write_hint = write_hint;
oz->oz_is_gc = is_gc;
/*
@@ -453,6 +455,7 @@ xfs_init_open_zone(
struct xfs_open_zone *
xfs_open_zone(
struct xfs_mount *mp,
+ enum rw_hint write_hint,
bool is_gc)
{
struct xfs_zone_info *zi = mp->m_zone_info;
@@ -465,12 +468,13 @@ xfs_open_zone(
return NULL;
set_current_state(TASK_RUNNING);
- return xfs_init_open_zone(to_rtg(xg), 0, is_gc);
+ return xfs_init_open_zone(to_rtg(xg), 0, write_hint, is_gc);
}
static struct xfs_open_zone *
xfs_try_open_zone(
- struct xfs_mount *mp)
+ struct xfs_mount *mp,
+ enum rw_hint write_hint)
{
struct xfs_zone_info *zi = mp->m_zone_info;
struct xfs_open_zone *oz;
@@ -487,7 +491,7 @@ xfs_try_open_zone(
*/
zi->zi_nr_open_zones++;
spin_unlock(&zi->zi_open_zones_lock);
- oz = xfs_open_zone(mp, false);
+ oz = xfs_open_zone(mp, write_hint, false);
spin_lock(&zi->zi_open_zones_lock);
if (!oz) {
zi->zi_nr_open_zones--;
@@ -510,16 +514,78 @@ xfs_try_open_zone(
return oz;
}
+/*
+ * For data with short or medium lifetime, try to colocated it into an
+ * already open zone with a matching temperature.
+ */
+static bool
+xfs_colocate_eagerly(
+ enum rw_hint file_hint)
+{
+ switch (file_hint) {
+ case WRITE_LIFE_MEDIUM:
+ case WRITE_LIFE_SHORT:
+ case WRITE_LIFE_NONE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool
+xfs_good_hint_match(
+ struct xfs_open_zone *oz,
+ enum rw_hint file_hint)
+{
+ switch (oz->oz_write_hint) {
+ case WRITE_LIFE_LONG:
+ case WRITE_LIFE_EXTREME:
+ /* colocate long and extreme */
+ if (file_hint == WRITE_LIFE_LONG ||
+ file_hint == WRITE_LIFE_EXTREME)
+ return true;
+ break;
+ case WRITE_LIFE_MEDIUM:
+ /* colocate medium with medium */
+ if (file_hint == WRITE_LIFE_MEDIUM)
+ return true;
+ break;
+ case WRITE_LIFE_SHORT:
+ case WRITE_LIFE_NONE:
+ case WRITE_LIFE_NOT_SET:
+ /* colocate short and none */
+ if (file_hint <= WRITE_LIFE_SHORT)
+ return true;
+ break;
+ }
+ return false;
+}
+
static bool
xfs_try_use_zone(
struct xfs_zone_info *zi,
- struct xfs_open_zone *oz)
+ enum rw_hint file_hint,
+ struct xfs_open_zone *oz,
+ bool lowspace)
{
if (oz->oz_write_pointer == rtg_blocks(oz->oz_rtg))
return false;
+ if (!lowspace && !xfs_good_hint_match(oz, file_hint))
+ return false;
if (!atomic_inc_not_zero(&oz->oz_ref))
return false;
+ /*
+ * If we have a hint set for the data, use that for the zone even if
+ * some data was written already without any hint set, but don't change
+ * the temperature after that as that would make little sense without
+ * tracking per-temperature class written block counts, which is
+ * probably overkill anyway.
+ */
+ if (file_hint != WRITE_LIFE_NOT_SET &&
+ oz->oz_write_hint == WRITE_LIFE_NOT_SET)
+ oz->oz_write_hint = file_hint;
+
/*
* If we couldn't match by inode or life time we just pick the first
* zone with enough space above. For that we want the least busy zone
@@ -534,14 +600,16 @@ xfs_try_use_zone(
static struct xfs_open_zone *
xfs_select_open_zone_lru(
- struct xfs_zone_info *zi)
+ struct xfs_zone_info *zi,
+ enum rw_hint file_hint,
+ bool lowspace)
{
struct xfs_open_zone *oz;
lockdep_assert_held(&zi->zi_open_zones_lock);
list_for_each_entry(oz, &zi->zi_open_zones, oz_entry)
- if (xfs_try_use_zone(zi, oz))
+ if (xfs_try_use_zone(zi, file_hint, oz, lowspace))
return oz;
cond_resched_lock(&zi->zi_open_zones_lock);
@@ -550,20 +618,28 @@ xfs_select_open_zone_lru(
static struct xfs_open_zone *
xfs_select_open_zone_mru(
- struct xfs_zone_info *zi)
+ struct xfs_zone_info *zi,
+ enum rw_hint file_hint)
{
struct xfs_open_zone *oz;
lockdep_assert_held(&zi->zi_open_zones_lock);
list_for_each_entry_reverse(oz, &zi->zi_open_zones, oz_entry)
- if (xfs_try_use_zone(zi, oz))
+ if (xfs_try_use_zone(zi, file_hint, oz, false))
return oz;
cond_resched_lock(&zi->zi_open_zones_lock);
return NULL;
}
+static inline enum rw_hint xfs_inode_write_hint(struct xfs_inode *ip)
+{
+ if (xfs_has_nolifetime(ip->i_mount))
+ return WRITE_LIFE_NOT_SET;
+ return VFS_I(ip)->i_write_hint;
+}
+
/*
* Try to pack inodes that are written back after they were closed tight instead
* of trying to open new zones for them or spread them to the least recently
@@ -587,6 +663,7 @@ static inline bool xfs_zoned_pack_tight(struct xfs_inode *ip)
static struct xfs_open_zone *
xfs_select_zone_nowait(
struct xfs_mount *mp,
+ enum rw_hint write_hint,
bool pack_tight)
{
struct xfs_zone_info *zi = mp->m_zone_info;
@@ -595,20 +672,38 @@ xfs_select_zone_nowait(
if (xfs_is_shutdown(mp))
return NULL;
+ /*
+ * Try to fill up open zones with matching temperature if available. It
+ * is better to try to co-locate data when this is favorable, so we can
+ * activate empty zones when it is statistically better to separate
+ * data.
+ */
spin_lock(&zi->zi_open_zones_lock);
- if (pack_tight)
- oz = xfs_select_open_zone_mru(zi);
+ if (xfs_colocate_eagerly(write_hint))
+ oz = xfs_select_open_zone_lru(zi, write_hint, false);
+ else if (pack_tight)
+ oz = xfs_select_open_zone_mru(zi, write_hint);
if (oz)
goto out_unlock;
/*
* See if we can open a new zone and use that.
*/
- oz = xfs_try_open_zone(mp);
+ oz = xfs_try_open_zone(mp, write_hint);
if (oz)
goto out_unlock;
- oz = xfs_select_open_zone_lru(zi);
+ /*
+ * Try to colocate cold data with other cold data if we failed to open a
+ * new zone for it.
+ */
+ if (write_hint != WRITE_LIFE_NOT_SET &&
+ !xfs_colocate_eagerly(write_hint))
+ oz = xfs_select_open_zone_lru(zi, write_hint, false);
+ if (!oz)
+ oz = xfs_select_open_zone_lru(zi, WRITE_LIFE_NOT_SET, false);
+ if (!oz)
+ oz = xfs_select_open_zone_lru(zi, WRITE_LIFE_NOT_SET, true);
out_unlock:
spin_unlock(&zi->zi_open_zones_lock);
return oz;
@@ -617,19 +712,20 @@ xfs_select_zone_nowait(
static struct xfs_open_zone *
xfs_select_zone(
struct xfs_mount *mp,
+ enum rw_hint write_hint,
bool pack_tight)
{
struct xfs_zone_info *zi = mp->m_zone_info;
DEFINE_WAIT (wait);
struct xfs_open_zone *oz;
- oz = xfs_select_zone_nowait(mp, pack_tight);
+ oz = xfs_select_zone_nowait(mp, write_hint, pack_tight);
if (oz)
return oz;
for (;;) {
prepare_to_wait(&zi->zi_zone_wait, &wait, TASK_UNINTERRUPTIBLE);
- oz = xfs_select_zone_nowait(mp, pack_tight);
+ oz = xfs_select_zone_nowait(mp, write_hint, pack_tight);
if (oz)
break;
schedule();
@@ -707,6 +803,7 @@ xfs_zone_alloc_and_submit(
{
struct xfs_inode *ip = XFS_I(ioend->io_inode);
struct xfs_mount *mp = ip->i_mount;
+ enum rw_hint write_hint = xfs_inode_write_hint(ip);
bool pack_tight = xfs_zoned_pack_tight(ip);
unsigned int alloc_len;
struct iomap_ioend *split;
@@ -724,7 +821,7 @@ xfs_zone_alloc_and_submit(
*oz = xfs_last_used_zone(ioend);
if (!*oz) {
select_zone:
- *oz = xfs_select_zone(mp, pack_tight);
+ *oz = xfs_select_zone(mp, write_hint, pack_tight);
if (!*oz)
goto out_error;
}
@@ -862,7 +959,8 @@ xfs_init_zone(
struct xfs_open_zone *oz;
atomic_inc(&rtg_group(rtg)->xg_active_ref);
- oz = xfs_init_open_zone(rtg, write_pointer, false);
+ oz = xfs_init_open_zone(rtg, write_pointer, WRITE_LIFE_NOT_SET,
+ false);
list_add_tail(&oz->oz_entry, &zi->zi_open_zones);
zi->zi_nr_open_zones++;
@@ -547,7 +547,7 @@ xfs_zone_gc_select_target(
ASSERT(zi->zi_nr_open_zones <=
mp->m_max_open_zones - XFS_OPEN_GC_ZONES);
- oz = xfs_open_zone(mp, true);
+ oz = xfs_open_zone(mp, WRITE_LIFE_NOT_SET, true);
if (oz)
trace_xfs_zone_gc_target_opened(oz->oz_rtg);
spin_lock(&zi->zi_open_zones_lock);
@@ -1117,7 +1117,7 @@ xfs_zone_gc_mount(
zi->zi_nr_open_zones == mp->m_max_open_zones)
oz = xfs_zone_gc_steal_open(zi);
else
- oz = xfs_open_zone(mp, true);
+ oz = xfs_open_zone(mp, WRITE_LIFE_NOT_SET, true);
if (!oz) {
xfs_warn(mp, "unable to allocate a zone for gc");
error = -EIO;
@@ -26,6 +26,12 @@ struct xfs_open_zone {
*/
xfs_rgblock_t oz_written;
+ /*
+ * Write hint (data temperature) assigned to this zone, or
+ * WRITE_LIFE_NOT_SET if none was set.
+ */
+ enum rw_hint oz_write_hint;
+
/*
* Is this open zone used for garbage collection? There can only be a
* single open GC zone, which is pointed to by zi_open_gc_zone in
@@ -100,7 +106,8 @@ struct xfs_zone_info {
};
-struct xfs_open_zone *xfs_open_zone(struct xfs_mount *mp, bool is_gc);
+struct xfs_open_zone *xfs_open_zone(struct xfs_mount *mp,
+ enum rw_hint write_hint, bool is_gc);
int xfs_zone_gc_reset_sync(struct xfs_rtgroup *rtg);
bool xfs_zoned_need_gc(struct xfs_mount *mp);