Message ID | 20191204115739.53303-1-xiubli@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | ceph: add possible_max_rank and make the code more readable | expand |
On 2019/12/4 19:57, xiubli@redhat.com wrote: [...] > diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c > index a77e0ecb9a6b..889627817e52 100644 > --- a/fs/ceph/mdsmap.c > +++ b/fs/ceph/mdsmap.c > @@ -14,22 +14,15 @@ > #include "super.h" > > #define CEPH_MDS_IS_READY(i, ignore_laggy) \ > - (m->m_info[i].state > 0 && (ignore_laggy ? true : !m->m_info[i].laggy)) > + (m->m_info[i].state > 0 && ignore_laggy ? true : !m->m_info[i].laggy) > > static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy) > { > int n = 0; > int i, j; > > - /* > - * special case for one mds, no matter it is laggy or > - * not we have no choice > - */ > - if (1 == m->m_num_mds && m->m_info[0].state > 0) > - return 0; > - I removed this because when possible_max_mds != m_num_mds and the validate MDS rank possible be 1 or a larger number. This bug existed even without my previous mdsmap series. All the other code are for enhancement. BRs [...]
On Wed, 2019-12-04 at 06:57 -0500, xiubli@redhat.com wrote: > From: Xiubo Li <xiubli@redhat.com> > > The m_num_mds here is actually the number for MDSs which are in > up:active status, and it will be duplicated to m_num_active_mds, > so remove it. > > Add possible_max_rank to the mdsmap struct and this will be > the correctly possible largest rank boundary. > > Remove the special case for one mds in __mdsmap_get_random_mds(), > because the validate mds rank may not always be 0. > > Signed-off-by: Xiubo Li <xiubli@redhat.com> > --- > fs/ceph/debugfs.c | 2 +- > fs/ceph/mds_client.c | 10 ++++---- > fs/ceph/mdsmap.c | 49 +++++++++++++++---------------------- > include/linux/ceph/mdsmap.h | 10 ++++---- > 4 files changed, 31 insertions(+), 40 deletions(-) > > diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c > index facb387c2735..0b1591e76077 100644 > --- a/fs/ceph/debugfs.c > +++ b/fs/ceph/debugfs.c > @@ -33,7 +33,7 @@ static int mdsmap_show(struct seq_file *s, void *p) > seq_printf(s, "max_mds %d\n", mdsmap->m_max_mds); > seq_printf(s, "session_timeout %d\n", mdsmap->m_session_timeout); > seq_printf(s, "session_autoclose %d\n", mdsmap->m_session_autoclose); > - for (i = 0; i < mdsmap->m_num_mds; i++) { > + for (i = 0; i < mdsmap->possible_max_rank; i++) { > struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr; > int state = mdsmap->m_info[i].state; > seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > index 39f4d8501df5..036d388ddb10 100644 > --- a/fs/ceph/mds_client.c > +++ b/fs/ceph/mds_client.c > @@ -597,7 +597,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, > { > struct ceph_mds_session *s; > > - if (mds >= mdsc->mdsmap->m_num_mds) > + if (mds >= mdsc->mdsmap->possible_max_rank) > return ERR_PTR(-EINVAL); > > s = kzalloc(sizeof(*s), GFP_NOFS); > @@ -1222,7 +1222,7 @@ static void __open_export_target_sessions(struct ceph_mds_client *mdsc, > struct ceph_mds_session *ts; > int i, mds = session->s_mds; > > - if (mds >= mdsc->mdsmap->m_num_mds) > + if (mds >= mdsc->mdsmap->possible_max_rank) > return; > > mi = &mdsc->mdsmap->m_info[mds]; > @@ -3762,7 +3762,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, > dout("check_new_map new %u old %u\n", > newmap->m_epoch, oldmap->m_epoch); > > - for (i = 0; i < oldmap->m_num_mds && i < mdsc->max_sessions; i++) { > + for (i = 0; i < oldmap->possible_max_rank && i < mdsc->max_sessions; i++) { > if (!mdsc->sessions[i]) > continue; > s = mdsc->sessions[i]; > @@ -3776,7 +3776,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, > ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", > ceph_session_state_name(s->s_state)); > > - if (i >= newmap->m_num_mds) { > + if (i >= newmap->possible_max_rank) { > /* force close session for stopped mds */ > get_session(s); > __unregister_session(mdsc, s); > @@ -3833,7 +3833,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, > } > } > > - for (i = 0; i < newmap->m_num_mds && i < mdsc->max_sessions; i++) { > + for (i = 0; i < newmap->possible_max_rank && i < mdsc->max_sessions; i++) { > s = mdsc->sessions[i]; > if (!s) > continue; > diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c > index a77e0ecb9a6b..889627817e52 100644 > --- a/fs/ceph/mdsmap.c > +++ b/fs/ceph/mdsmap.c > @@ -14,22 +14,15 @@ > #include "super.h" > > #define CEPH_MDS_IS_READY(i, ignore_laggy) \ > - (m->m_info[i].state > 0 && (ignore_laggy ? true : !m->m_info[i].laggy)) > + (m->m_info[i].state > 0 && ignore_laggy ? true : !m->m_info[i].laggy) > > static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy) > { > int n = 0; > int i, j; > > - /* > - * special case for one mds, no matter it is laggy or > - * not we have no choice > - */ > - if (1 == m->m_num_mds && m->m_info[0].state > 0) > - return 0; > - > /* count */ > - for (i = 0; i < m->m_num_mds; i++) > + for (i = 0; i < m->possible_max_rank; i++) > if (CEPH_MDS_IS_READY(i, ignore_laggy)) > n++; > if (n == 0) > @@ -37,7 +30,7 @@ static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy) > > /* pick */ > n = prandom_u32() % n; > - for (j = 0, i = 0; i < m->m_num_mds; i++) { > + for (j = 0, i = 0; i < m->possible_max_rank; i++) { > if (CEPH_MDS_IS_READY(i, ignore_laggy)) > j++; > if (j > n) > @@ -55,10 +48,10 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) > int mds; > > mds = __mdsmap_get_random_mds(m, false); > - if (mds == m->m_num_mds || mds == -1) > + if (mds == m->possible_max_rank || mds == -1) > mds = __mdsmap_get_random_mds(m, true); > > - return mds == m->m_num_mds ? -1 : mds; > + return mds == m->possible_max_rank ? -1 : mds; > } > > #define __decode_and_drop_type(p, end, type, bad) \ > @@ -129,7 +122,6 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) > int err; > u8 mdsmap_v, mdsmap_cv; > u16 mdsmap_ev; > - u32 possible_max_rank; > > m = kzalloc(sizeof(*m), GFP_NOFS); > if (!m) > @@ -157,24 +149,23 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) > m->m_max_mds = ceph_decode_32(p); > > /* > - * pick out the active nodes as the m_num_mds, the m_num_mds > - * maybe larger than m_max_mds when decreasing the max_mds in > - * cluster side, in other case it should less than or equal > - * to m_max_mds. > + * pick out the active nodes as the m_num_active_mds, the > + * m_num_active_mds maybe larger than m_max_mds when decreasing > + * the max_mds in cluster side, in other case it should less > + * than or equal to m_max_mds. > */ > - m->m_num_mds = n = ceph_decode_32(p); > - m->m_num_active_mds = m->m_num_mds; > + m->m_num_active_mds = n = ceph_decode_32(p); > > /* > - * the possible max rank, it maybe larger than the m->m_num_mds, > + * the possible max rank, it maybe larger than the m_num_active_mds, > * for example if the mds_max == 2 in the cluster, when the MDS(0) > * was laggy and being replaced by a new MDS, we will temporarily > * receive a new mds map with n_num_mds == 1 and the active MDS(1), > - * and the mds rank >= m->m_num_mds. > + * and the mds rank >= m_num_active_mds. > */ > - possible_max_rank = max((u32)m->m_num_mds, m->m_max_mds); > + m->possible_max_rank = max(m->m_num_active_mds, m->m_max_mds); > > - m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS); > + m->m_info = kcalloc(m->possible_max_rank, sizeof(*m->m_info), GFP_NOFS); > if (!m->m_info) > goto nomem; > > @@ -248,7 +239,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) > ceph_mds_state_name(state), > laggy ? "(laggy)" : ""); > > - if (mds < 0 || mds >= possible_max_rank) { > + if (mds < 0 || mds >= m->possible_max_rank) { > pr_warn("mdsmap_decode got incorrect mds(%d)\n", mds); > continue; > } > @@ -318,14 +309,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) > > for (i = 0; i < n; i++) { > s32 mds = ceph_decode_32(p); > - if (mds >= 0 && mds < m->m_num_mds) { > + if (mds >= 0 && mds < m->possible_max_rank) { > if (m->m_info[mds].laggy) > num_laggy++; > } > } > m->m_num_laggy = num_laggy; > > - if (n > m->m_num_mds) { > + if (n > m->possible_max_rank) { > void *new_m_info = krealloc(m->m_info, > n * sizeof(*m->m_info), > GFP_NOFS | __GFP_ZERO); > @@ -333,7 +324,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) > goto nomem; > m->m_info = new_m_info; > } > - m->m_num_mds = n; > + m->possible_max_rank = n; > } > > /* inc */ > @@ -404,7 +395,7 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m) > { > int i; > > - for (i = 0; i < m->m_num_mds; i++) > + for (i = 0; i < m->possible_max_rank; i++) > kfree(m->m_info[i].export_targets); > kfree(m->m_info); > kfree(m->m_data_pg_pools); > @@ -420,7 +411,7 @@ bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m) > return false; > if (m->m_num_laggy == m->m_num_active_mds) > return false; > - for (i = 0; i < m->m_num_mds; i++) { > + for (i = 0; i < m->possible_max_rank; i++) { > if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE) > nr_active++; > } > diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h > index 3a66f4f926ce..35d385296fbb 100644 > --- a/include/linux/ceph/mdsmap.h > +++ b/include/linux/ceph/mdsmap.h > @@ -26,8 +26,8 @@ struct ceph_mdsmap { > u32 m_session_autoclose; /* seconds */ > u64 m_max_file_size; > u32 m_max_mds; /* expected up:active mds number */ > - int m_num_active_mds; /* actual up:active mds number */ > - int m_num_mds; /* size of m_info array */ > + u32 m_num_active_mds; /* actual up:active mds number */ > + u32 possible_max_rank; /* possible max rank index */ > struct ceph_mds_info *m_info; > > /* which object pools file data can be stored in */ > @@ -43,7 +43,7 @@ struct ceph_mdsmap { > static inline struct ceph_entity_addr * > ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) > { > - if (w >= m->m_num_mds) > + if (w >= m->possible_max_rank) > return NULL; > return &m->m_info[w].addr; > } > @@ -51,14 +51,14 @@ ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) > static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) > { > BUG_ON(w < 0); > - if (w >= m->m_num_mds) > + if (w >= m->possible_max_rank) > return CEPH_MDS_STATE_DNE; > return m->m_info[w].state; > } > > static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) > { > - if (w >= 0 && w < m->m_num_mds) > + if (w >= 0 && w < m->possible_max_rank) > return m->m_info[w].laggy; > return false; > } Looks reasonable -- merged.
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index facb387c2735..0b1591e76077 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -33,7 +33,7 @@ static int mdsmap_show(struct seq_file *s, void *p) seq_printf(s, "max_mds %d\n", mdsmap->m_max_mds); seq_printf(s, "session_timeout %d\n", mdsmap->m_session_timeout); seq_printf(s, "session_autoclose %d\n", mdsmap->m_session_autoclose); - for (i = 0; i < mdsmap->m_num_mds; i++) { + for (i = 0; i < mdsmap->possible_max_rank; i++) { struct ceph_entity_addr *addr = &mdsmap->m_info[i].addr; int state = mdsmap->m_info[i].state; seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 39f4d8501df5..036d388ddb10 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -597,7 +597,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, { struct ceph_mds_session *s; - if (mds >= mdsc->mdsmap->m_num_mds) + if (mds >= mdsc->mdsmap->possible_max_rank) return ERR_PTR(-EINVAL); s = kzalloc(sizeof(*s), GFP_NOFS); @@ -1222,7 +1222,7 @@ static void __open_export_target_sessions(struct ceph_mds_client *mdsc, struct ceph_mds_session *ts; int i, mds = session->s_mds; - if (mds >= mdsc->mdsmap->m_num_mds) + if (mds >= mdsc->mdsmap->possible_max_rank) return; mi = &mdsc->mdsmap->m_info[mds]; @@ -3762,7 +3762,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, dout("check_new_map new %u old %u\n", newmap->m_epoch, oldmap->m_epoch); - for (i = 0; i < oldmap->m_num_mds && i < mdsc->max_sessions; i++) { + for (i = 0; i < oldmap->possible_max_rank && i < mdsc->max_sessions; i++) { if (!mdsc->sessions[i]) continue; s = mdsc->sessions[i]; @@ -3776,7 +3776,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "", ceph_session_state_name(s->s_state)); - if (i >= newmap->m_num_mds) { + if (i >= newmap->possible_max_rank) { /* force close session for stopped mds */ get_session(s); __unregister_session(mdsc, s); @@ -3833,7 +3833,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, } } - for (i = 0; i < newmap->m_num_mds && i < mdsc->max_sessions; i++) { + for (i = 0; i < newmap->possible_max_rank && i < mdsc->max_sessions; i++) { s = mdsc->sessions[i]; if (!s) continue; diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index a77e0ecb9a6b..889627817e52 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -14,22 +14,15 @@ #include "super.h" #define CEPH_MDS_IS_READY(i, ignore_laggy) \ - (m->m_info[i].state > 0 && (ignore_laggy ? true : !m->m_info[i].laggy)) + (m->m_info[i].state > 0 && ignore_laggy ? true : !m->m_info[i].laggy) static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy) { int n = 0; int i, j; - /* - * special case for one mds, no matter it is laggy or - * not we have no choice - */ - if (1 == m->m_num_mds && m->m_info[0].state > 0) - return 0; - /* count */ - for (i = 0; i < m->m_num_mds; i++) + for (i = 0; i < m->possible_max_rank; i++) if (CEPH_MDS_IS_READY(i, ignore_laggy)) n++; if (n == 0) @@ -37,7 +30,7 @@ static int __mdsmap_get_random_mds(struct ceph_mdsmap *m, bool ignore_laggy) /* pick */ n = prandom_u32() % n; - for (j = 0, i = 0; i < m->m_num_mds; i++) { + for (j = 0, i = 0; i < m->possible_max_rank; i++) { if (CEPH_MDS_IS_READY(i, ignore_laggy)) j++; if (j > n) @@ -55,10 +48,10 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) int mds; mds = __mdsmap_get_random_mds(m, false); - if (mds == m->m_num_mds || mds == -1) + if (mds == m->possible_max_rank || mds == -1) mds = __mdsmap_get_random_mds(m, true); - return mds == m->m_num_mds ? -1 : mds; + return mds == m->possible_max_rank ? -1 : mds; } #define __decode_and_drop_type(p, end, type, bad) \ @@ -129,7 +122,6 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) int err; u8 mdsmap_v, mdsmap_cv; u16 mdsmap_ev; - u32 possible_max_rank; m = kzalloc(sizeof(*m), GFP_NOFS); if (!m) @@ -157,24 +149,23 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) m->m_max_mds = ceph_decode_32(p); /* - * pick out the active nodes as the m_num_mds, the m_num_mds - * maybe larger than m_max_mds when decreasing the max_mds in - * cluster side, in other case it should less than or equal - * to m_max_mds. + * pick out the active nodes as the m_num_active_mds, the + * m_num_active_mds maybe larger than m_max_mds when decreasing + * the max_mds in cluster side, in other case it should less + * than or equal to m_max_mds. */ - m->m_num_mds = n = ceph_decode_32(p); - m->m_num_active_mds = m->m_num_mds; + m->m_num_active_mds = n = ceph_decode_32(p); /* - * the possible max rank, it maybe larger than the m->m_num_mds, + * the possible max rank, it maybe larger than the m_num_active_mds, * for example if the mds_max == 2 in the cluster, when the MDS(0) * was laggy and being replaced by a new MDS, we will temporarily * receive a new mds map with n_num_mds == 1 and the active MDS(1), - * and the mds rank >= m->m_num_mds. + * and the mds rank >= m_num_active_mds. */ - possible_max_rank = max((u32)m->m_num_mds, m->m_max_mds); + m->possible_max_rank = max(m->m_num_active_mds, m->m_max_mds); - m->m_info = kcalloc(m->m_num_mds, sizeof(*m->m_info), GFP_NOFS); + m->m_info = kcalloc(m->possible_max_rank, sizeof(*m->m_info), GFP_NOFS); if (!m->m_info) goto nomem; @@ -248,7 +239,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ceph_mds_state_name(state), laggy ? "(laggy)" : ""); - if (mds < 0 || mds >= possible_max_rank) { + if (mds < 0 || mds >= m->possible_max_rank) { pr_warn("mdsmap_decode got incorrect mds(%d)\n", mds); continue; } @@ -318,14 +309,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) for (i = 0; i < n; i++) { s32 mds = ceph_decode_32(p); - if (mds >= 0 && mds < m->m_num_mds) { + if (mds >= 0 && mds < m->possible_max_rank) { if (m->m_info[mds].laggy) num_laggy++; } } m->m_num_laggy = num_laggy; - if (n > m->m_num_mds) { + if (n > m->possible_max_rank) { void *new_m_info = krealloc(m->m_info, n * sizeof(*m->m_info), GFP_NOFS | __GFP_ZERO); @@ -333,7 +324,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) goto nomem; m->m_info = new_m_info; } - m->m_num_mds = n; + m->possible_max_rank = n; } /* inc */ @@ -404,7 +395,7 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m) { int i; - for (i = 0; i < m->m_num_mds; i++) + for (i = 0; i < m->possible_max_rank; i++) kfree(m->m_info[i].export_targets); kfree(m->m_info); kfree(m->m_data_pg_pools); @@ -420,7 +411,7 @@ bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m) return false; if (m->m_num_laggy == m->m_num_active_mds) return false; - for (i = 0; i < m->m_num_mds; i++) { + for (i = 0; i < m->possible_max_rank; i++) { if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE) nr_active++; } diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 3a66f4f926ce..35d385296fbb 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -26,8 +26,8 @@ struct ceph_mdsmap { u32 m_session_autoclose; /* seconds */ u64 m_max_file_size; u32 m_max_mds; /* expected up:active mds number */ - int m_num_active_mds; /* actual up:active mds number */ - int m_num_mds; /* size of m_info array */ + u32 m_num_active_mds; /* actual up:active mds number */ + u32 possible_max_rank; /* possible max rank index */ struct ceph_mds_info *m_info; /* which object pools file data can be stored in */ @@ -43,7 +43,7 @@ struct ceph_mdsmap { static inline struct ceph_entity_addr * ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) { - if (w >= m->m_num_mds) + if (w >= m->possible_max_rank) return NULL; return &m->m_info[w].addr; } @@ -51,14 +51,14 @@ ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) { BUG_ON(w < 0); - if (w >= m->m_num_mds) + if (w >= m->possible_max_rank) return CEPH_MDS_STATE_DNE; return m->m_info[w].state; } static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) { - if (w >= 0 && w < m->m_num_mds) + if (w >= 0 && w < m->possible_max_rank) return m->m_info[w].laggy; return false; }