@@ -1457,6 +1457,12 @@ struct lu_tgt_qos {
};
/* target descriptor */
+#define LOV_QOS_DEF_THRESHOLD_RR_PCT 17
+#define LMV_QOS_DEF_THRESHOLD_RR_PCT 5
+
+#define LOV_QOS_DEF_PRIO_FREE 90
+#define LMV_QOS_DEF_PRIO_FREE 90
+
struct lu_tgt_desc {
union {
struct dt_device *ltd_tgt;
@@ -718,11 +718,11 @@ enum md_cli_flags {
};
enum md_op_code {
- LUSTRE_OPC_MKDIR = 0,
- LUSTRE_OPC_SYMLINK = 1,
- LUSTRE_OPC_MKNOD = 2,
- LUSTRE_OPC_CREATE = 3,
- LUSTRE_OPC_ANY = 5,
+ LUSTRE_OPC_MKDIR = 1,
+ LUSTRE_OPC_SYMLINK,
+ LUSTRE_OPC_MKNOD,
+ LUSTRE_OPC_CREATE,
+ LUSTRE_OPC_ANY,
};
/**
@@ -1429,9 +1429,10 @@ static int lmv_close(struct obd_export *exp, struct md_op_data *op_data,
static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, u32 *mdt)
{
- struct lu_tgt_desc *tgt;
+ struct lu_tgt_desc *tgt, *cur = NULL;
u64 total_weight = 0;
u64 cur_weight = 0;
+ int total_usable = 0;
u64 rand;
int rc;
@@ -1452,15 +1453,30 @@ static struct lu_tgt_desc *lmv_locate_tgt_qos(struct lmv_obd *lmv, u32 *mdt)
}
lmv_foreach_tgt(lmv, tgt) {
- tgt->ltd_qos.ltq_usable = 0;
- if (!tgt->ltd_exp || !tgt->ltd_active)
+ if (!tgt->ltd_exp || !tgt->ltd_active) {
+ tgt->ltd_qos.ltq_usable = 0;
continue;
+ }
tgt->ltd_qos.ltq_usable = 1;
lu_tgt_qos_weight_calc(tgt);
+ if (tgt->ltd_index == *mdt) {
+ cur = tgt;
+ cur_weight = tgt->ltd_qos.ltq_weight;
+ }
total_weight += tgt->ltd_qos.ltq_weight;
+ total_usable++;
+ }
+
+ /* if current MDT has higher-than-average space, stay on same MDT */
+ rand = total_weight / total_usable;
+ if (cur_weight >= rand) {
+ tgt = cur;
+ rc = 0;
+ goto unlock;
}
+ cur_weight = 0;
rand = lu_prandom_u64_max(total_weight);
lmv_foreach_connected_tgt(lmv, tgt) {
@@ -265,13 +265,21 @@ int lu_tgt_descs_init(struct lu_tgt_descs *ltd, bool is_mdt)
init_rwsem(<d->ltd_qos.lq_rw_sem);
set_bit(LQ_DIRTY, <d->ltd_qos.lq_flags);
set_bit(LQ_RESET, <d->ltd_qos.lq_flags);
- /* Default priority is toward free space balance */
- ltd->ltd_qos.lq_prio_free = 232;
- /* Default threshold for rr (roughly 17%) */
- ltd->ltd_qos.lq_threshold_rr = 43;
ltd->ltd_is_mdt = is_mdt;
- if (is_mdt)
+ /* MDT imbalance threshold is low to balance across MDTs
+ * relatively quickly, because each directory may result
+ * in a large number of files/subdirs created therein.
+ */
+ if (is_mdt) {
ltd->ltd_lmv_desc.ld_pattern = LMV_HASH_TYPE_DEFAULT;
+ ltd->ltd_qos.lq_prio_free = LMV_QOS_DEF_PRIO_FREE * 256 / 100;
+ ltd->ltd_qos.lq_threshold_rr =
+ LMV_QOS_DEF_THRESHOLD_RR_PCT * 256 / 100;
+ } else {
+ ltd->ltd_qos.lq_prio_free = LOV_QOS_DEF_PRIO_FREE * 256 / 100;
+ ltd->ltd_qos.lq_threshold_rr =
+ LOV_QOS_DEF_THRESHOLD_RR_PCT * 256 / 100;
+ }
return 0;
}