Message ID | 20191120082902.38666-4-xiubli@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | mdsmap: fix mds choosing | expand |
On 11/20/19 4:29 PM, xiubli@redhat.com wrote: > From: Xiubo Li <xiubli@redhat.com> > > Even the MDS is in up:active state, but it also maybe laggy. Here > will skip the laggy MDSs. > > Signed-off-by: Xiubo Li <xiubli@redhat.com> > --- > fs/ceph/mds_client.c | 6 ++++-- > fs/ceph/mdsmap.c | 13 +++++++++---- > 2 files changed, 13 insertions(+), 6 deletions(-) > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > index 82a929084671..a4e7026aaec9 100644 > --- a/fs/ceph/mds_client.c > +++ b/fs/ceph/mds_client.c > @@ -972,7 +972,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc, > frag.frag, mds, > (int)r, frag.ndist); > if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= > - CEPH_MDS_STATE_ACTIVE) > + CEPH_MDS_STATE_ACTIVE && > + !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) > goto out; > } > > @@ -987,7 +988,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc, > "frag %u mds%d (auth)\n", > inode, ceph_vinop(inode), frag.frag, mds); > if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= > - CEPH_MDS_STATE_ACTIVE) > + CEPH_MDS_STATE_ACTIVE && > + !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) > goto out; > } > } for use USE_AUTH_MDS case, request can only be handled by auth mds. client should send request to auth mds even it seems laggy. > diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c > index 8b4f93e5b468..098669e6f1e4 100644 > --- a/fs/ceph/mdsmap.c > +++ b/fs/ceph/mdsmap.c > @@ -13,6 +13,7 @@ > > #include "super.h" > > +#define CEPH_MDS_IS_READY(i) (m->m_info[i].state > 0 && !m->m_info[i].laggy) > > /* > * choose a random mds that is "up" (i.e. has a state > 0), or -1. > @@ -23,12 +24,16 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) > int i, j; > > /* special case for one mds */ > - if (1 == m->m_num_mds && m->m_info[0].state > 0) > - return 0; > + if (1 == m->m_num_mds && m->m_info[0].state > 0) { > + if (m->m_info[0].laggy) > + return -1; > + else > + return 0; > + } > > /* count */ > for (i = 0; i < m->m_num_mds; i++) > - if (m->m_info[i].state > 0) > + if (CEPH_MDS_IS_READY(i)) > n++; > if (n == 0) > return -1; > @@ -36,7 +41,7 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) > /* pick */ > n = prandom_u32() % n; > for (j = 0, i = 0; i < m->m_num_mds; i++) { > - if (m->m_info[i].state > 0) > + if (CEPH_MDS_IS_READY(i)) > j++; > if (j > n) > break; >
On 2019/11/21 10:46, Yan, Zheng wrote: > On 11/20/19 4:29 PM, xiubli@redhat.com wrote: >> From: Xiubo Li <xiubli@redhat.com> >> >> Even the MDS is in up:active state, but it also maybe laggy. Here >> will skip the laggy MDSs. >> >> Signed-off-by: Xiubo Li <xiubli@redhat.com> >> --- >> fs/ceph/mds_client.c | 6 ++++-- >> fs/ceph/mdsmap.c | 13 +++++++++---- >> 2 files changed, 13 insertions(+), 6 deletions(-) >> >> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c >> index 82a929084671..a4e7026aaec9 100644 >> --- a/fs/ceph/mds_client.c >> +++ b/fs/ceph/mds_client.c >> @@ -972,7 +972,8 @@ static int __choose_mds(struct ceph_mds_client >> *mdsc, >> frag.frag, mds, >> (int)r, frag.ndist); >> if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= >> - CEPH_MDS_STATE_ACTIVE) >> + CEPH_MDS_STATE_ACTIVE && >> + !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) >> goto out; >> } >> @@ -987,7 +988,8 @@ static int __choose_mds(struct ceph_mds_client >> *mdsc, >> "frag %u mds%d (auth)\n", >> inode, ceph_vinop(inode), frag.frag, mds); >> if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= >> - CEPH_MDS_STATE_ACTIVE) >> + CEPH_MDS_STATE_ACTIVE && >> + !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) >> goto out; >> } >> } > for use USE_AUTH_MDS case, request can only be handled by auth mds. > client should send request to auth mds even it seems laggy. > BTW, what if the coreesponding auth mds was down, will it allow to choose other mds ? From the current code it seems might. Or as long as when the corresponding auth mds is in up:active state will the requests only could to be handled by it ? Thanks. > >> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c >> index 8b4f93e5b468..098669e6f1e4 100644 >> --- a/fs/ceph/mdsmap.c >> +++ b/fs/ceph/mdsmap.c >> @@ -13,6 +13,7 @@ >> #include "super.h" >> +#define CEPH_MDS_IS_READY(i) (m->m_info[i].state > 0 && >> !m->m_info[i].laggy) >> /* >> * choose a random mds that is "up" (i.e. has a state > 0), or -1. >> @@ -23,12 +24,16 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap >> *m) >> int i, j; >> /* special case for one mds */ >> - if (1 == m->m_num_mds && m->m_info[0].state > 0) >> - return 0; >> + if (1 == m->m_num_mds && m->m_info[0].state > 0) { >> + if (m->m_info[0].laggy) >> + return -1; >> + else >> + return 0; >> + } >> /* count */ >> for (i = 0; i < m->m_num_mds; i++) >> - if (m->m_info[i].state > 0) >> + if (CEPH_MDS_IS_READY(i)) >> n++; >> if (n == 0) >> return -1; >> @@ -36,7 +41,7 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) >> /* pick */ >> n = prandom_u32() % n; >> for (j = 0, i = 0; i < m->m_num_mds; i++) { >> - if (m->m_info[i].state > 0) >> + if (CEPH_MDS_IS_READY(i)) >> j++; >> if (j > n) >> break; >> >
On 11/21/19 1:24 PM, Xiubo Li wrote: > On 2019/11/21 10:46, Yan, Zheng wrote: >> On 11/20/19 4:29 PM, xiubli@redhat.com wrote: >>> From: Xiubo Li <xiubli@redhat.com> >>> >>> Even the MDS is in up:active state, but it also maybe laggy. Here >>> will skip the laggy MDSs. >>> >>> Signed-off-by: Xiubo Li <xiubli@redhat.com> >>> --- >>> fs/ceph/mds_client.c | 6 ++++-- >>> fs/ceph/mdsmap.c | 13 +++++++++---- >>> 2 files changed, 13 insertions(+), 6 deletions(-) >>> >>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c >>> index 82a929084671..a4e7026aaec9 100644 >>> --- a/fs/ceph/mds_client.c >>> +++ b/fs/ceph/mds_client.c >>> @@ -972,7 +972,8 @@ static int __choose_mds(struct ceph_mds_client >>> *mdsc, >>> frag.frag, mds, >>> (int)r, frag.ndist); >>> if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= >>> - CEPH_MDS_STATE_ACTIVE) >>> + CEPH_MDS_STATE_ACTIVE && >>> + !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) >>> goto out; >>> } >>> @@ -987,7 +988,8 @@ static int __choose_mds(struct ceph_mds_client >>> *mdsc, >>> "frag %u mds%d (auth)\n", >>> inode, ceph_vinop(inode), frag.frag, mds); >>> if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= >>> - CEPH_MDS_STATE_ACTIVE) >>> + CEPH_MDS_STATE_ACTIVE && >>> + !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) >>> goto out; >>> } >>> } >> for use USE_AUTH_MDS case, request can only be handled by auth mds. >> client should send request to auth mds even it seems laggy. >> > BTW, what if the coreesponding auth mds was down, will it allow to > choose other mds ? From the current code it seems might. Or as long as > when the corresponding auth mds is in up:active state will the requests > only could to be handled by it ? > Some requests can only be handled by given MDS. Choosing other mds just wastes resource. > Thanks. > > >> >>> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c >>> index 8b4f93e5b468..098669e6f1e4 100644 >>> --- a/fs/ceph/mdsmap.c >>> +++ b/fs/ceph/mdsmap.c >>> @@ -13,6 +13,7 @@ >>> #include "super.h" >>> +#define CEPH_MDS_IS_READY(i) (m->m_info[i].state > 0 && >>> !m->m_info[i].laggy) >>> /* >>> * choose a random mds that is "up" (i.e. has a state > 0), or -1. >>> @@ -23,12 +24,16 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap >>> *m) >>> int i, j; >>> /* special case for one mds */ >>> - if (1 == m->m_num_mds && m->m_info[0].state > 0) >>> - return 0; >>> + if (1 == m->m_num_mds && m->m_info[0].state > 0) { >>> + if (m->m_info[0].laggy) >>> + return -1; >>> + else >>> + return 0; >>> + } >>> /* count */ >>> for (i = 0; i < m->m_num_mds; i++) >>> - if (m->m_info[i].state > 0) >>> + if (CEPH_MDS_IS_READY(i)) >>> n++; >>> if (n == 0) >>> return -1; >>> @@ -36,7 +41,7 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) >>> /* pick */ >>> n = prandom_u32() % n; >>> for (j = 0, i = 0; i < m->m_num_mds; i++) { >>> - if (m->m_info[i].state > 0) >>> + if (CEPH_MDS_IS_READY(i)) >>> j++; >>> if (j > n) >>> break; >>> >> >
On 2019/11/21 16:19, Yan, Zheng wrote: > On 11/21/19 1:24 PM, Xiubo Li wrote: >> On 2019/11/21 10:46, Yan, Zheng wrote: >>> On 11/20/19 4:29 PM, xiubli@redhat.com wrote: >>>> From: Xiubo Li <xiubli@redhat.com> >>>> >>>> Even the MDS is in up:active state, but it also maybe laggy. Here >>>> will skip the laggy MDSs. >>>> >>>> Signed-off-by: Xiubo Li <xiubli@redhat.com> >>>> --- >>>> fs/ceph/mds_client.c | 6 ++++-- >>>> fs/ceph/mdsmap.c | 13 +++++++++---- >>>> 2 files changed, 13 insertions(+), 6 deletions(-) >>>> >>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c >>>> index 82a929084671..a4e7026aaec9 100644 >>>> --- a/fs/ceph/mds_client.c >>>> +++ b/fs/ceph/mds_client.c >>>> @@ -972,7 +972,8 @@ static int __choose_mds(struct ceph_mds_client >>>> *mdsc, >>>> frag.frag, mds, >>>> (int)r, frag.ndist); >>>> if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= >>>> - CEPH_MDS_STATE_ACTIVE) >>>> + CEPH_MDS_STATE_ACTIVE && >>>> + !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) >>>> goto out; >>>> } >>>> @@ -987,7 +988,8 @@ static int __choose_mds(struct >>>> ceph_mds_client *mdsc, >>>> "frag %u mds%d (auth)\n", >>>> inode, ceph_vinop(inode), frag.frag, mds); >>>> if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= >>>> - CEPH_MDS_STATE_ACTIVE) >>>> + CEPH_MDS_STATE_ACTIVE && >>>> + !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) >>>> goto out; >>>> } >>>> } >>> for use USE_AUTH_MDS case, request can only be handled by auth mds. >>> client should send request to auth mds even it seems laggy. >>> >> BTW, what if the coreesponding auth mds was down, will it allow to >> choose other mds ? From the current code it seems might. Or as long >> as when the corresponding auth mds is in up:active state will the >> requests only could to be handled by it ? >> > > Some requests can only be handled by given MDS. Choosing other mds > just wastes resource. > > Okay, will check it again. Thanks Yan. BRs > >> Thanks. >> >> >>> >>>> diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c >>>> index 8b4f93e5b468..098669e6f1e4 100644 >>>> --- a/fs/ceph/mdsmap.c >>>> +++ b/fs/ceph/mdsmap.c >>>> @@ -13,6 +13,7 @@ >>>> #include "super.h" >>>> +#define CEPH_MDS_IS_READY(i) (m->m_info[i].state > 0 && >>>> !m->m_info[i].laggy) >>>> /* >>>> * choose a random mds that is "up" (i.e. has a state > 0), or -1. >>>> @@ -23,12 +24,16 @@ int ceph_mdsmap_get_random_mds(struct >>>> ceph_mdsmap *m) >>>> int i, j; >>>> /* special case for one mds */ >>>> - if (1 == m->m_num_mds && m->m_info[0].state > 0) >>>> - return 0; >>>> + if (1 == m->m_num_mds && m->m_info[0].state > 0) { >>>> + if (m->m_info[0].laggy) >>>> + return -1; >>>> + else >>>> + return 0; >>>> + } >>>> /* count */ >>>> for (i = 0; i < m->m_num_mds; i++) >>>> - if (m->m_info[i].state > 0) >>>> + if (CEPH_MDS_IS_READY(i)) >>>> n++; >>>> if (n == 0) >>>> return -1; >>>> @@ -36,7 +41,7 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap >>>> *m) >>>> /* pick */ >>>> n = prandom_u32() % n; >>>> for (j = 0, i = 0; i < m->m_num_mds; i++) { >>>> - if (m->m_info[i].state > 0) >>>> + if (CEPH_MDS_IS_READY(i)) >>>> j++; >>>> if (j > n) >>>> break; >>>> >>> >> >
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 82a929084671..a4e7026aaec9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -972,7 +972,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc, frag.frag, mds, (int)r, frag.ndist); if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= - CEPH_MDS_STATE_ACTIVE) + CEPH_MDS_STATE_ACTIVE && + !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) goto out; } @@ -987,7 +988,8 @@ static int __choose_mds(struct ceph_mds_client *mdsc, "frag %u mds%d (auth)\n", inode, ceph_vinop(inode), frag.frag, mds); if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >= - CEPH_MDS_STATE_ACTIVE) + CEPH_MDS_STATE_ACTIVE && + !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds)) goto out; } } diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 8b4f93e5b468..098669e6f1e4 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -13,6 +13,7 @@ #include "super.h" +#define CEPH_MDS_IS_READY(i) (m->m_info[i].state > 0 && !m->m_info[i].laggy) /* * choose a random mds that is "up" (i.e. has a state > 0), or -1. @@ -23,12 +24,16 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) int i, j; /* special case for one mds */ - if (1 == m->m_num_mds && m->m_info[0].state > 0) - return 0; + if (1 == m->m_num_mds && m->m_info[0].state > 0) { + if (m->m_info[0].laggy) + return -1; + else + return 0; + } /* count */ for (i = 0; i < m->m_num_mds; i++) - if (m->m_info[i].state > 0) + if (CEPH_MDS_IS_READY(i)) n++; if (n == 0) return -1; @@ -36,7 +41,7 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) /* pick */ n = prandom_u32() % n; for (j = 0, i = 0; i < m->m_num_mds; i++) { - if (m->m_info[i].state > 0) + if (CEPH_MDS_IS_READY(i)) j++; if (j > n) break;