@@ -1537,11 +1537,33 @@ struct lu_tgt_descs {
void lu_tgt_descs_fini(struct lu_tgt_descs *ltd);
int ltd_add_tgt(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt);
void ltd_del_tgt(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt);
-bool ltd_qos_is_usable(struct lu_tgt_descs *ltd);
int ltd_qos_penalties_calc(struct lu_tgt_descs *ltd);
int ltd_qos_update(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt,
u64 *total_wt);
+/**
+ * Whether MDT inode and space usages are balanced.
+ */
+static inline bool ltd_qos_is_balanced(struct lu_tgt_descs *ltd)
+{
+ return !test_bit(LQ_DIRTY, <d->ltd_qos.lq_flags) &&
+ test_bit(LQ_SAME_SPACE, <d->ltd_qos.lq_flags);
+}
+
+/**
+ * Whether QoS data is up-to-date and QoS can be applied.
+ */
+static inline bool ltd_qos_is_usable(struct lu_tgt_descs *ltd)
+{
+ if (ltd_qos_is_balanced(ltd))
+ return false;
+
+ if (ltd->ltd_lov_desc.ld_active_tgt_count < 2)
+ return false;
+
+ return true;
+}
+
static inline struct lu_tgt_desc *ltd_first_tgt(struct lu_tgt_descs *ltd)
{
int index;
@@ -46,6 +46,8 @@ struct lmv_stripe_md {
u32 lsm_md_stripe_count;
u32 lsm_md_master_mdt_index;
u32 lsm_md_hash_type;
+ u8 lsm_md_max_inherit;
+ u8 lsm_md_max_inherit_rr;
u32 lsm_md_layout_version;
u32 lsm_md_migrate_offset;
u32 lsm_md_migrate_hash;
@@ -119,11 +121,11 @@ static inline void lsm_md_dump(int mask, const struct lmv_stripe_md *lsm)
* terminated string so only print LOV_MAXPOOLNAME bytes.
*/
CDEBUG(mask,
- "magic %#x stripe count %d master mdt %d hash type %#x version %d migrate offset %d migrate hash %#x pool %.*s\n",
+ "magic %#x stripe count %d master mdt %d hash type %#x max inherit %hhu version %d migrate offset %d migrate hash %#x pool %.*s\n",
lsm->lsm_md_magic, lsm->lsm_md_stripe_count,
lsm->lsm_md_master_mdt_index, lsm->lsm_md_hash_type,
- lsm->lsm_md_layout_version, lsm->lsm_md_migrate_offset,
- lsm->lsm_md_migrate_hash,
+ lsm->lsm_md_max_inherit, lsm->lsm_md_layout_version,
+ lsm->lsm_md_migrate_offset, lsm->lsm_md_migrate_hash,
LOV_MAXPOOLNAME, lsm->lsm_md_pool_name);
if (!lmv_dir_striped(lsm))
@@ -1451,6 +1451,10 @@ static int ll_new_node(struct inode *dir, struct dentry *dentry,
md.default_lmv->lsm_md_master_mdt_index =
lum->lum_stripe_offset;
md.default_lmv->lsm_md_hash_type = lum->lum_hash_type;
+ md.default_lmv->lsm_md_max_inherit =
+ lum->lum_max_inherit;
+ md.default_lmv->lsm_md_max_inherit_rr =
+ lum->lum_max_inherit_rr;
err = ll_update_inode(dir, &md);
md_free_lustre_md(sbi->ll_md_exp, &md);
@@ -1695,6 +1695,22 @@ int lmv_old_layout_lookup(struct lmv_obd *lmv, struct md_op_data *op_data)
return rc;
}
+static inline bool lmv_op_user_qos_mkdir(const struct md_op_data *op_data)
+{
+ const struct lmv_user_md *lum = op_data->op_data;
+
+ return (op_data->op_cli_flags & CLI_SET_MEA) && lum &&
+ le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC &&
+ le32_to_cpu(lum->lum_stripe_offset) == LMV_OFFSET_DEFAULT;
+}
+
+static inline bool lmv_op_default_qos_mkdir(const struct md_op_data *op_data)
+{
+ const struct lmv_stripe_md *lsm = op_data->op_default_mea1;
+
+ return lsm && lsm->lsm_md_master_mdt_index == LMV_OFFSET_DEFAULT;
+}
+
/* mkdir by QoS in two cases:
* 1. 'lfs mkdir -i -1'
* 2. parent default LMV master_mdt_index is -1
@@ -1704,27 +1720,38 @@ int lmv_old_layout_lookup(struct lmv_obd *lmv, struct md_op_data *op_data)
*/
static inline bool lmv_op_qos_mkdir(const struct md_op_data *op_data)
{
- const struct lmv_stripe_md *lsm = op_data->op_default_mea1;
- const struct lmv_user_md *lum = op_data->op_data;
-
if (op_data->op_code != LUSTRE_OPC_MKDIR)
return false;
if (lmv_dir_striped(op_data->op_mea1))
return false;
- if (op_data->op_cli_flags & CLI_SET_MEA && lum &&
- (le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC ||
- le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC_SPECIFIC) &&
- le32_to_cpu(lum->lum_stripe_offset) == LMV_OFFSET_DEFAULT)
+ if (lmv_op_user_qos_mkdir(op_data))
return true;
- if (lsm && lsm->lsm_md_master_mdt_index == LMV_OFFSET_DEFAULT)
+ if (lmv_op_default_qos_mkdir(op_data))
return true;
return false;
}
+/* if default LMV is set, and its index is LMV_OFFSET_DEFAULT, and
+ * 1. max_inherit_rr is set and is not LMV_INHERIT_RR_NONE
+ * 2. or parent is ROOT
+ * mkdir roundrobin.
+ * NB, this also needs to check server is balanced, which is checked by caller.
+ */
+static inline bool lmv_op_default_rr_mkdir(const struct md_op_data *op_data)
+{
+ const struct lmv_stripe_md *lsm = op_data->op_default_mea1;
+
+ if (!lmv_op_default_qos_mkdir(op_data))
+ return false;
+
+ return lsm->lsm_md_max_inherit_rr != LMV_INHERIT_RR_NONE ||
+ fid_is_root(&op_data->op_fid1);
+}
+
/* 'lfs mkdir -i <specific_MDT>' */
static inline bool lmv_op_user_specific_mkdir(const struct md_op_data *op_data)
{
@@ -1746,6 +1773,7 @@ static inline bool lmv_op_user_specific_mkdir(const struct md_op_data *op_data)
op_data->op_default_mea1->lsm_md_master_mdt_index !=
LMV_OFFSET_DEFAULT;
}
+
int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
const void *data, size_t datalen, umode_t mode, uid_t uid,
gid_t gid, kernel_cap_t cap_effective, u64 rdev,
@@ -1793,11 +1821,23 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
if (!tgt)
return -ENODEV;
} else if (lmv_op_qos_mkdir(op_data)) {
+ struct lmv_tgt_desc *tmp = tgt;
+
tgt = lmv_locate_tgt_qos(lmv, &op_data->op_mds);
- if (tgt == ERR_PTR(-EAGAIN))
- tgt = lmv_locate_tgt_rr(lmv, &op_data->op_mds);
+ if (tgt == ERR_PTR(-EAGAIN)) {
+ if (ltd_qos_is_balanced(&lmv->lmv_mdt_descs) &&
+ !lmv_op_default_rr_mkdir(op_data) &&
+ !lmv_op_user_qos_mkdir(op_data))
+ /* if it's not necessary, don't create remote
+ * directory.
+ */
+ tgt = tmp;
+ else
+ tgt = lmv_locate_tgt_rr(lmv, &op_data->op_mds);
+ }
if (IS_ERR(tgt))
return PTR_ERR(tgt);
+
/*
* only update statfs after QoS mkdir, this means the cached
* statfs may be stale, and current mkdir may not follow QoS
@@ -3110,6 +3150,8 @@ static inline int lmv_unpack_user_md(struct obd_export *exp,
lsm->lsm_md_stripe_count = le32_to_cpu(lmu->lum_stripe_count);
lsm->lsm_md_master_mdt_index = le32_to_cpu(lmu->lum_stripe_offset);
lsm->lsm_md_hash_type = le32_to_cpu(lmu->lum_hash_type);
+ lsm->lsm_md_max_inherit = lmu->lum_max_inherit;
+ lsm->lsm_md_max_inherit_rr = lmu->lum_max_inherit_rr;
lsm->lsm_md_pool_name[LOV_MAXPOOLNAME] = 0;
return 0;
@@ -403,22 +403,6 @@ void ltd_del_tgt(struct lu_tgt_descs *ltd, struct lu_tgt_desc *tgt)
EXPORT_SYMBOL(ltd_del_tgt);
/**
- * Whether QoS data is up-to-date and QoS can be applied.
- */
-bool ltd_qos_is_usable(struct lu_tgt_descs *ltd)
-{
- if (!test_bit(LQ_DIRTY, <d->ltd_qos.lq_flags) &&
- test_bit(LQ_SAME_SPACE, <d->ltd_qos.lq_flags))
- return false;
-
- if (ltd->ltd_lov_desc.ld_active_tgt_count < 2)
- return false;
-
- return true;
-}
-EXPORT_SYMBOL(ltd_qos_is_usable);
-
-/**
* Calculate penalties per-tgt and per-server
*
* Re-calculate penalties when the configuration changes, active targets
@@ -2067,7 +2067,10 @@ void lustre_swab_lmv_user_md(struct lmv_user_md *lum)
__swab32s(&lum->lum_stripe_offset);
__swab32s(&lum->lum_hash_type);
__swab32s(&lum->lum_type);
- BUILD_BUG_ON(!offsetof(typeof(*lum), lum_padding1));
+ /* lum_max_inherit and lum_max_inherit_rr do not need to be swabbed */
+ BUILD_BUG_ON(offsetof(typeof(*lum), lum_padding1) == 0);
+ BUILD_BUG_ON(offsetof(typeof(*lum), lum_padding2) == 0);
+ BUILD_BUG_ON(offsetof(typeof(*lum), lum_padding3) == 0);
switch (lum->lum_magic) {
case LMV_USER_MAGIC_SPECIFIC:
count = lum->lum_stripe_count;
@@ -789,7 +789,11 @@ struct lmv_user_md_v1 {
__u32 lum_stripe_offset; /* MDT idx for default dirstripe */
__u32 lum_hash_type; /* Dir stripe policy */
__u32 lum_type; /* LMV type: default */
- __u32 lum_padding1;
+ __u8 lum_max_inherit; /* inherit depth of default LMV */
+ __u8 lum_max_inherit_rr; /* inherit depth of default LMV to
+ * round-robin mkdir
+ */
+ __u16 lum_padding1;
__u32 lum_padding2;
__u32 lum_padding3;
char lum_pool_name[LOV_MAXPOOLNAME + 1];
@@ -815,6 +819,37 @@ enum lmv_type {
LMV_TYPE_DEFAULT = 0x0000,
};
+/* lum_max_inherit will be decreased by 1 after each inheritance if it's not
+ * LMV_INHERIT_UNLIMITED or > LMV_INHERIT_MAX.
+ */
+enum {
+ /* for historical reason, 0 means unlimited inheritance */
+ LMV_INHERIT_UNLIMITED = 0,
+ /* unlimited lum_max_inherit by default */
+ LMV_INHERIT_DEFAULT = 0,
+ /* not inherit any more */
+ LMV_INHERIT_END = 1,
+ /* max inherit depth */
+ LMV_INHERIT_MAX = 250,
+ /* [251, 254] are reserved */
+ /* not set, or when inherit depth goes beyond end, */
+ LMV_INHERIT_NONE = 255,
+};
+
+enum {
+ /* not set, or when inherit_rr depth goes beyond end, */
+ LMV_INHERIT_RR_NONE = 0,
+ /* disable lum_max_inherit_rr by default */
+ LMV_INHERIT_RR_DEFAULT = 0,
+ /* not inherit any more */
+ LMV_INHERIT_RR_END = 1,
+ /* max inherit depth */
+ LMV_INHERIT_RR_MAX = 250,
+ /* [251, 254] are reserved */
+ /* unlimited inheritance */
+ LMV_INHERIT_RR_UNLIMITED = 255,
+};
+
static inline int lmv_user_md_size(int stripes, int lmm_magic)
{
int size = sizeof(struct lmv_user_md);