@@ -68,7 +68,7 @@ static inline bool lmv_dir_foreign(const struct lmv_stripe_md *lsm)
static inline bool lmv_dir_layout_changing(const struct lmv_stripe_md *lsm)
{
return lmv_dir_striped(lsm) &&
- (lsm->lsm_md_hash_type & LMV_HASH_FLAG_LAYOUT_CHANGE);
+ lmv_hash_is_layout_changing(lsm->lsm_md_hash_type);
}
static inline bool lmv_dir_bad_hash(const struct lmv_stripe_md *lsm)
@@ -275,6 +275,15 @@ static inline u32 crush_hash(u32 a, u32 b)
return idx;
}
+/* directory layout may change in three ways:
+ * 1. directory migration, in its LMV source stripes are appended after
+ * target stripes, @migrate_hash is source hash type, @migrate_offset is
+ * target stripe count,
+ * 2. directory split, @migrate_hash is hash type before split,
+ * @migrate_offset is stripe count before split.
+ * 3. directory merge, @migrate_hash is hash type after merge,
+ * @migrate_offset is stripe count after merge.
+ */
static inline int
__lmv_name_to_stripe_index(u32 hash_type, u32 stripe_count,
u32 migrate_hash, u32 migrate_offset,
@@ -287,7 +296,17 @@ static inline u32 crush_hash(u32 a, u32 b)
LASSERT(namelen > 0);
LASSERT(stripe_count > 0);
- if (hash_type & LMV_HASH_FLAG_MIGRATION) {
+ if (lmv_hash_is_splitting(hash_type)) {
+ if (!new_layout) {
+ hash_type = migrate_hash;
+ stripe_count = migrate_offset;
+ }
+ } else if (lmv_hash_is_merging(hash_type)) {
+ if (new_layout) {
+ hash_type = migrate_hash;
+ stripe_count = migrate_offset;
+ }
+ } else if (lmv_hash_is_migrating(hash_type)) {
if (new_layout) {
stripe_count = migrate_offset;
} else {
@@ -317,12 +336,12 @@ static inline u32 crush_hash(u32 a, u32 b)
LASSERT(stripe_index < stripe_count);
- if ((saved_hash & LMV_HASH_FLAG_MIGRATION) && !new_layout)
+ if (!new_layout && lmv_hash_is_migrating(saved_hash))
stripe_index += migrate_offset;
LASSERT(stripe_index < saved_count);
- CDEBUG(D_INFO, "name %.*s hash %#x/%#x idx %d/%u/%u under %s layout\n",
+ CDEBUG(D_INFO, "name %.*s hash=%#x/%#x idx=%d/%u/%u under %s layout\n",
namelen, name, saved_hash, migrate_hash, stripe_index,
saved_count, migrate_offset, new_layout ? "new" : "old");
@@ -382,21 +401,25 @@ static inline bool lmv_user_magic_supported(u32 lum_magic)
#define LMV_DEBUG(mask, lmv, msg) \
CDEBUG(mask, \
- "%s LMV: magic %#x count %u index %u hash %#x version %u migrate offset %u migrate hash %u.\n",\
+ "%s LMV: magic=%#x count=%u index=%u hash=%#x version=%u migrate offset=%u migrate hash=%u.\n",\
msg, (lmv)->lmv_magic, (lmv)->lmv_stripe_count, \
(lmv)->lmv_master_mdt_index, (lmv)->lmv_hash_type, \
(lmv)->lmv_layout_version, (lmv)->lmv_migrate_offset, \
(lmv)->lmv_migrate_hash)
+/* master LMV is sane */
static inline bool lmv_is_sane(const struct lmv_mds_md_v1 *lmv)
{
+ if (!lmv)
+ return false;
+
if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1)
goto insane;
if (le32_to_cpu(lmv->lmv_stripe_count) == 0)
goto insane;
- if (!lmv_is_known_hash_type(lmv->lmv_hash_type))
+ if (!lmv_is_known_hash_type(le32_to_cpu(lmv->lmv_hash_type)))
goto insane;
return true;
@@ -405,4 +428,59 @@ static inline bool lmv_is_sane(const struct lmv_mds_md_v1 *lmv)
return false;
}
+/* LMV can be either master or stripe LMV */
+static inline bool lmv_is_sane2(const struct lmv_mds_md_v1 *lmv)
+{
+ if (!lmv)
+ return false;
+
+ if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1 &&
+ le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_STRIPE)
+ goto insane;
+
+ if (le32_to_cpu(lmv->lmv_stripe_count) == 0)
+ goto insane;
+
+ if (!lmv_is_known_hash_type(le32_to_cpu(lmv->lmv_hash_type)))
+ goto insane;
+
+ return true;
+insane:
+ LMV_DEBUG(D_ERROR, lmv, "insane");
+ return false;
+}
+
+static inline bool lmv_is_splitting(const struct lmv_mds_md_v1 *lmv)
+{
+ LASSERT(lmv_is_sane2(lmv));
+ return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type));
+}
+
+static inline bool lmv_is_merging(const struct lmv_mds_md_v1 *lmv)
+{
+ LASSERT(lmv_is_sane2(lmv));
+ return lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type));
+}
+
+static inline bool lmv_is_migrating(const struct lmv_mds_md_v1 *lmv)
+{
+ LASSERT(lmv_is_sane(lmv));
+ return lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type));
+}
+
+static inline bool lmv_is_restriping(const struct lmv_mds_md_v1 *lmv)
+{
+ LASSERT(lmv_is_sane2(lmv));
+ return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) ||
+ lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type));
+}
+
+static inline bool lmv_is_layout_changing(const struct lmv_mds_md_v1 *lmv)
+{
+ LASSERT(lmv_is_sane2(lmv));
+ return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) ||
+ lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type)) ||
+ lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type));
+}
+
#endif
@@ -1743,6 +1743,7 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
struct obd_device *obd = exp->exp_obd;
struct lmv_obd *lmv = &obd->u.lmv;
struct lmv_tgt_desc *tgt;
+ struct mdt_body *repbody;
int rc;
if (!lmv->lmv_mdt_descs.ltd_lmv_desc.ld_active_tgt_count)
@@ -1767,19 +1768,7 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
if (IS_ERR(tgt))
return PTR_ERR(tgt);
- if (lmv_op_qos_mkdir(op_data)) {
- tgt = lmv_locate_tgt_qos(lmv, &op_data->op_mds);
- if (tgt == ERR_PTR(-EAGAIN))
- tgt = lmv_locate_tgt_rr(lmv, &op_data->op_mds);
- /*
- * only update statfs after QoS mkdir, this means the cached
- * statfs may be stale, and current mkdir may not follow QoS
- * accurately, but it's not serious, and avoids periodic statfs
- * when client doesn't mkdir by QoS.
- */
- if (!IS_ERR(tgt))
- lmv_statfs_check_update(obd, tgt);
- } else if (lmv_op_user_specific_mkdir(op_data)) {
+ if (lmv_op_user_specific_mkdir(op_data)) {
struct lmv_user_md *lum = op_data->op_data;
op_data->op_mds = le32_to_cpu(lum->lum_stripe_offset);
@@ -1792,11 +1781,22 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
tgt = lmv_tgt(lmv, op_data->op_mds);
if (!tgt)
return -ENODEV;
+ } else if (lmv_op_qos_mkdir(op_data)) {
+ tgt = lmv_locate_tgt_qos(lmv, &op_data->op_mds);
+ if (tgt == ERR_PTR(-EAGAIN))
+ tgt = lmv_locate_tgt_rr(lmv, &op_data->op_mds);
+ if (IS_ERR(tgt))
+ return PTR_ERR(tgt);
+ /*
+ * only update statfs after QoS mkdir, this means the cached
+ * statfs may be stale, and current mkdir may not follow QoS
+ * accurately, but it's not serious, and avoids periodic statfs
+ * when client doesn't mkdir by QoS.
+ */
+ lmv_statfs_check_update(obd, tgt);
}
- if (IS_ERR(tgt))
- return PTR_ERR(tgt);
-
+retry:
rc = lmv_fid_alloc(NULL, exp, &op_data->op_fid2, op_data);
if (rc)
return rc;
@@ -1816,7 +1816,30 @@ int lmv_create(struct obd_export *exp, struct md_op_data *op_data,
CDEBUG(D_INODE, "Created - " DFID "\n",
PFID(&op_data->op_fid2));
}
- return rc;
+
+ /* dir restripe needs to send to MDT where dir is located */
+ if (rc != -EREMOTE ||
+ !(exp_connect_flags2(exp) & OBD_CONNECT2_CRUSH))
+ return rc;
+
+ repbody = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
+ if (!repbody)
+ return -EPROTO;
+
+ /* Not cross-ref case, just get out of here. */
+ if (likely(!(repbody->mbo_valid & OBD_MD_MDS)))
+ return rc;
+
+ op_data->op_fid2 = repbody->mbo_fid1;
+ ptlrpc_req_finished(*request);
+ *request = NULL;
+
+ tgt = lmv_fid2tgt(lmv, &op_data->op_fid2);
+ if (IS_ERR(tgt))
+ return PTR_ERR(tgt);
+
+ op_data->op_mds = tgt->ltd_index;
+ goto retry;
}
static int
@@ -1760,6 +1760,10 @@ void lustre_assert_wire_constants(void)
BUILD_BUG_ON(LMV_MAGIC_V1 != 0x0CD20CD0);
BUILD_BUG_ON(LMV_MAGIC_STRIPE != 0x0CD40CD0);
BUILD_BUG_ON(LMV_HASH_TYPE_MASK != 0x0000ffff);
+ BUILD_BUG_ON(LMV_HASH_FLAG_MERGE != 0x04000000);
+ BUILD_BUG_ON(LMV_HASH_FLAG_SPLIT != 0x08000000);
+ BUILD_BUG_ON(LMV_HASH_FLAG_LOST_LMV != 0x10000000);
+ BUILD_BUG_ON(LMV_HASH_FLAG_BAD_TYPE != 0x20000000);
BUILD_BUG_ON(LMV_HASH_FLAG_MIGRATION != 0x80000000);
BUILD_BUG_ON(LMV_CRUSH_PG_COUNT != 4096);
@@ -2075,6 +2079,8 @@ void lustre_assert_wire_constants(void)
(unsigned int)MDS_OWNEROVERRIDE);
LASSERTF(MDS_HSM_RELEASE == 0x00001000UL, "found 0x%.8xUL\n",
(unsigned int)MDS_HSM_RELEASE);
+ LASSERTF(MDS_CLOSE_MIGRATE == 0x00002000UL, "found 0x%.8xUL\n",
+ (unsigned int)MDS_CLOSE_MIGRATE);
LASSERTF(MDS_CLOSE_LAYOUT_SWAP == 0x00004000UL, "found 0x%.8xUL\n",
(unsigned int)MDS_CLOSE_LAYOUT_SWAP);
LASSERTF(MDS_CLOSE_LAYOUT_MERGE == 0x00008000UL, "found 0x%.8xUL\n",
@@ -706,6 +706,9 @@ static inline bool lmv_is_known_hash_type(__u32 type)
(type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_CRUSH;
}
+#define LMV_HASH_FLAG_MERGE 0x04000000
+#define LMV_HASH_FLAG_SPLIT 0x08000000
+
/* The striped directory has ever lost its master LMV EA, then LFSCK
* re-generated it. This flag is used to indicate such case. It is an
* on-disk flag.
@@ -715,7 +718,39 @@ static inline bool lmv_is_known_hash_type(__u32 type)
#define LMV_HASH_FLAG_BAD_TYPE 0x20000000
#define LMV_HASH_FLAG_MIGRATION 0x80000000
-#define LMV_HASH_FLAG_LAYOUT_CHANGE LMV_HASH_FLAG_MIGRATION
+#define LMV_HASH_FLAG_LAYOUT_CHANGE \
+ (LMV_HASH_FLAG_MIGRATION | LMV_HASH_FLAG_SPLIT | LMV_HASH_FLAG_MERGE)
+
+/* both SPLIT and MIGRATION are set for directory split */
+static inline bool lmv_hash_is_splitting(__u32 hash)
+{
+ return (hash & LMV_HASH_FLAG_LAYOUT_CHANGE) ==
+ (LMV_HASH_FLAG_SPLIT | LMV_HASH_FLAG_MIGRATION);
+}
+
+/* both MERGE and MIGRATION are set for directory merge */
+static inline bool lmv_hash_is_merging(__u32 hash)
+{
+ return (hash & LMV_HASH_FLAG_LAYOUT_CHANGE) ==
+ (LMV_HASH_FLAG_MERGE | LMV_HASH_FLAG_MIGRATION);
+}
+
+/* only MIGRATION is set for directory migration */
+static inline bool lmv_hash_is_migrating(__u32 hash)
+{
+ return (hash & LMV_HASH_FLAG_LAYOUT_CHANGE) == LMV_HASH_FLAG_MIGRATION;
+}
+
+static inline bool lmv_hash_is_restriping(__u32 hash)
+{
+ return lmv_hash_is_splitting(hash) || lmv_hash_is_merging(hash);
+}
+
+static inline bool lmv_hash_is_layout_changing(__u32 hash)
+{
+ return lmv_hash_is_splitting(hash) || lmv_hash_is_merging(hash) ||
+ lmv_hash_is_migrating(hash);
+}
struct lustre_foreign_type {
__u32 lft_type;