Message ID | 20200221070556.18922-6-xiubli@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | ceph: add perf metrics support | expand |
On Fri, 2020-02-21 at 02:05 -0500, xiubli@redhat.com wrote: > From: Xiubo Li <xiubli@redhat.com> > > It will calculate the latency for the metedata requests, which only > include the time cousumed by network and the ceph. > "and the ceph MDS" ? > item total sum_lat(us) avg_lat(us) > ----------------------------------------------------- > metadata 113 220000 1946 > > URL: https://tracker.ceph.com/issues/43215 > Signed-off-by: Xiubo Li <xiubli@redhat.com> > --- > fs/ceph/debugfs.c | 6 ++++++ > fs/ceph/mds_client.c | 20 ++++++++++++++++++++ > fs/ceph/metric.h | 13 +++++++++++++ > 3 files changed, 39 insertions(+) > > diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c > index 464bfbdb970d..60f3e307fca1 100644 > --- a/fs/ceph/debugfs.c > +++ b/fs/ceph/debugfs.c > @@ -146,6 +146,12 @@ static int metric_show(struct seq_file *s, void *p) > avg = total ? sum / total : 0; > seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write", total, sum, avg); > > + total = percpu_counter_sum(&mdsc->metric.total_metadatas); > + sum = percpu_counter_sum(&mdsc->metric.metadata_latency_sum); > + sum = jiffies_to_usecs(sum); > + avg = total ? sum / total : 0; > + seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata", total, sum, avg); > + > seq_printf(s, "\n"); > seq_printf(s, "item total miss hit\n"); > seq_printf(s, "-------------------------------------------------\n"); > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > index 0a3447966b26..3e792eca6af7 100644 > --- a/fs/ceph/mds_client.c > +++ b/fs/ceph/mds_client.c > @@ -3017,6 +3017,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) > > /* kick calling process */ > complete_request(mdsc, req); > + > + if (!result || result == -ENOENT) { > + s64 latency = jiffies - req->r_started; > + > + ceph_update_metadata_latency(&mdsc->metric, latency); > + } Should we add an r_end_stamp field to the mds request struct and use that to calculate this? Many jiffies may have passed between the reply coming in and this point. If you really want to measure the latency that would be more accurate, I think. > out: > ceph_mdsc_put_request(req); > return; > @@ -4196,8 +4202,20 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric) > if (ret) > goto err_write_latency_sum; > > + ret = percpu_counter_init(&metric->total_metadatas, 0, GFP_KERNEL); > + if (ret) > + goto err_total_metadatas; > + > + ret = percpu_counter_init(&metric->metadata_latency_sum, 0, GFP_KERNEL); > + if (ret) > + goto err_metadata_latency_sum; > + > return 0; > > +err_metadata_latency_sum: > + percpu_counter_destroy(&metric->total_metadatas); > +err_total_metadatas: > + percpu_counter_destroy(&metric->write_latency_sum); > err_write_latency_sum: > percpu_counter_destroy(&metric->total_writes); > err_total_writes: > @@ -4553,6 +4571,8 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc) > > ceph_mdsc_stop(mdsc); > > + percpu_counter_destroy(&mdsc->metric.metadata_latency_sum); > + percpu_counter_destroy(&mdsc->metric.total_metadatas); > percpu_counter_destroy(&mdsc->metric.write_latency_sum); > percpu_counter_destroy(&mdsc->metric.total_writes); > percpu_counter_destroy(&mdsc->metric.read_latency_sum); > diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h > index a3d342f258e6..4c14b4ac089d 100644 > --- a/fs/ceph/metric.h > +++ b/fs/ceph/metric.h > @@ -18,6 +18,9 @@ struct ceph_client_metric { > > struct percpu_counter total_writes; > struct percpu_counter write_latency_sum; > + > + struct percpu_counter total_metadatas; > + struct percpu_counter metadata_latency_sum; > }; > > static inline void ceph_update_cap_hit(struct ceph_client_metric *m) > @@ -65,4 +68,14 @@ static inline void ceph_update_write_latency(struct ceph_client_metric *m, > percpu_counter_add(&m->write_latency_sum, latency); > } > } > + > +static inline void ceph_update_metadata_latency(struct ceph_client_metric *m, > + s64 latency) > +{ > + if (!m) > + return; > + > + percpu_counter_inc(&m->total_metadatas); > + percpu_counter_add(&m->metadata_latency_sum, latency); > +} > #endif /* _FS_CEPH_MDS_METRIC_H */
On Fri, Feb 21, 2020 at 1:03 PM Jeff Layton <jlayton@kernel.org> wrote: > > On Fri, 2020-02-21 at 02:05 -0500, xiubli@redhat.com wrote: > > From: Xiubo Li <xiubli@redhat.com> > > > > It will calculate the latency for the metedata requests, which only > > include the time cousumed by network and the ceph. > > > > "and the ceph MDS" ? > > > item total sum_lat(us) avg_lat(us) > > ----------------------------------------------------- > > metadata 113 220000 1946 > > > > URL: https://tracker.ceph.com/issues/43215 > > Signed-off-by: Xiubo Li <xiubli@redhat.com> > > --- > > fs/ceph/debugfs.c | 6 ++++++ > > fs/ceph/mds_client.c | 20 ++++++++++++++++++++ > > fs/ceph/metric.h | 13 +++++++++++++ > > 3 files changed, 39 insertions(+) > > > > diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c > > index 464bfbdb970d..60f3e307fca1 100644 > > --- a/fs/ceph/debugfs.c > > +++ b/fs/ceph/debugfs.c > > @@ -146,6 +146,12 @@ static int metric_show(struct seq_file *s, void *p) > > avg = total ? sum / total : 0; > > seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write", total, sum, avg); > > > > + total = percpu_counter_sum(&mdsc->metric.total_metadatas); > > + sum = percpu_counter_sum(&mdsc->metric.metadata_latency_sum); > > + sum = jiffies_to_usecs(sum); > > + avg = total ? sum / total : 0; > > + seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata", total, sum, avg); > > + > > seq_printf(s, "\n"); > > seq_printf(s, "item total miss hit\n"); > > seq_printf(s, "-------------------------------------------------\n"); > > diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > > index 0a3447966b26..3e792eca6af7 100644 > > --- a/fs/ceph/mds_client.c > > +++ b/fs/ceph/mds_client.c > > @@ -3017,6 +3017,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) > > > > /* kick calling process */ > > complete_request(mdsc, req); > > + > > + if (!result || result == -ENOENT) { > > + s64 latency = jiffies - req->r_started; > > + > > + ceph_update_metadata_latency(&mdsc->metric, latency); > > + } > > Should we add an r_end_stamp field to the mds request struct and use > that to calculate this? Many jiffies may have passed between the reply > coming in and this point. If you really want to measure the latency that > would be more accurate, I think. Yes, capturing it after invoking the callback is inconsistent with what is done for OSD requests (the new r_end_stamp is set in finish_request()). It looks like this is the only place where MDS r_end_stamp would be needed, so perhaps just move this before complete_request() call? Thanks, Ilya
On 2020/2/21 20:03, Jeff Layton wrote: > On Fri, 2020-02-21 at 02:05 -0500, xiubli@redhat.com wrote: >> From: Xiubo Li <xiubli@redhat.com> >> >> It will calculate the latency for the metedata requests, which only >> include the time cousumed by network and the ceph. >> > "and the ceph MDS" ? Yeah, will fix the commit comment. > >> item total sum_lat(us) avg_lat(us) >> ----------------------------------------------------- >> metadata 113 220000 1946 >> >> URL: https://tracker.ceph.com/issues/43215 >> Signed-off-by: Xiubo Li <xiubli@redhat.com> >> --- >> fs/ceph/debugfs.c | 6 ++++++ >> fs/ceph/mds_client.c | 20 ++++++++++++++++++++ >> fs/ceph/metric.h | 13 +++++++++++++ >> 3 files changed, 39 insertions(+) >> >> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c >> index 464bfbdb970d..60f3e307fca1 100644 >> --- a/fs/ceph/debugfs.c >> +++ b/fs/ceph/debugfs.c >> @@ -146,6 +146,12 @@ static int metric_show(struct seq_file *s, void *p) >> avg = total ? sum / total : 0; >> seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write", total, sum, avg); >> >> + total = percpu_counter_sum(&mdsc->metric.total_metadatas); >> + sum = percpu_counter_sum(&mdsc->metric.metadata_latency_sum); >> + sum = jiffies_to_usecs(sum); >> + avg = total ? sum / total : 0; >> + seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata", total, sum, avg); >> + >> seq_printf(s, "\n"); >> seq_printf(s, "item total miss hit\n"); >> seq_printf(s, "-------------------------------------------------\n"); >> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c >> index 0a3447966b26..3e792eca6af7 100644 >> --- a/fs/ceph/mds_client.c >> +++ b/fs/ceph/mds_client.c >> @@ -3017,6 +3017,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) >> >> /* kick calling process */ >> complete_request(mdsc, req); >> + >> + if (!result || result == -ENOENT) { >> + s64 latency = jiffies - req->r_started; >> + >> + ceph_update_metadata_latency(&mdsc->metric, latency); >> + } > Should we add an r_end_stamp field to the mds request struct and use > that to calculate this? Many jiffies may have passed between the reply > coming in and this point. If you really want to measure the latency that > would be more accurate, I think. Okay, will add it. Thanks. BRs >> out: >> ceph_mdsc_put_request(req); >> return; >> @@ -4196,8 +4202,20 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric) >> if (ret) >> goto err_write_latency_sum; >> >> + ret = percpu_counter_init(&metric->total_metadatas, 0, GFP_KERNEL); >> + if (ret) >> + goto err_total_metadatas; >> + >> + ret = percpu_counter_init(&metric->metadata_latency_sum, 0, GFP_KERNEL); >> + if (ret) >> + goto err_metadata_latency_sum; >> + >> return 0; >> >> +err_metadata_latency_sum: >> + percpu_counter_destroy(&metric->total_metadatas); >> +err_total_metadatas: >> + percpu_counter_destroy(&metric->write_latency_sum); >> err_write_latency_sum: >> percpu_counter_destroy(&metric->total_writes); >> err_total_writes: >> @@ -4553,6 +4571,8 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc) >> >> ceph_mdsc_stop(mdsc); >> >> + percpu_counter_destroy(&mdsc->metric.metadata_latency_sum); >> + percpu_counter_destroy(&mdsc->metric.total_metadatas); >> percpu_counter_destroy(&mdsc->metric.write_latency_sum); >> percpu_counter_destroy(&mdsc->metric.total_writes); >> percpu_counter_destroy(&mdsc->metric.read_latency_sum); >> diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h >> index a3d342f258e6..4c14b4ac089d 100644 >> --- a/fs/ceph/metric.h >> +++ b/fs/ceph/metric.h >> @@ -18,6 +18,9 @@ struct ceph_client_metric { >> >> struct percpu_counter total_writes; >> struct percpu_counter write_latency_sum; >> + >> + struct percpu_counter total_metadatas; >> + struct percpu_counter metadata_latency_sum; >> }; >> >> static inline void ceph_update_cap_hit(struct ceph_client_metric *m) >> @@ -65,4 +68,14 @@ static inline void ceph_update_write_latency(struct ceph_client_metric *m, >> percpu_counter_add(&m->write_latency_sum, latency); >> } >> } >> + >> +static inline void ceph_update_metadata_latency(struct ceph_client_metric *m, >> + s64 latency) >> +{ >> + if (!m) >> + return; >> + >> + percpu_counter_inc(&m->total_metadatas); >> + percpu_counter_add(&m->metadata_latency_sum, latency); >> +} >> #endif /* _FS_CEPH_MDS_METRIC_H */
On 2020/2/21 22:56, Ilya Dryomov wrote: > On Fri, Feb 21, 2020 at 1:03 PM Jeff Layton <jlayton@kernel.org> wrote: >> On Fri, 2020-02-21 at 02:05 -0500, xiubli@redhat.com wrote: >>> From: Xiubo Li <xiubli@redhat.com> >>> >>> It will calculate the latency for the metedata requests, which only >>> include the time cousumed by network and the ceph. >>> >> "and the ceph MDS" ? >> >>> item total sum_lat(us) avg_lat(us) >>> ----------------------------------------------------- >>> metadata 113 220000 1946 >>> >>> URL: https://tracker.ceph.com/issues/43215 >>> Signed-off-by: Xiubo Li <xiubli@redhat.com> >>> --- >>> fs/ceph/debugfs.c | 6 ++++++ >>> fs/ceph/mds_client.c | 20 ++++++++++++++++++++ >>> fs/ceph/metric.h | 13 +++++++++++++ >>> 3 files changed, 39 insertions(+) >>> >>> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c >>> index 464bfbdb970d..60f3e307fca1 100644 >>> --- a/fs/ceph/debugfs.c >>> +++ b/fs/ceph/debugfs.c >>> @@ -146,6 +146,12 @@ static int metric_show(struct seq_file *s, void *p) >>> avg = total ? sum / total : 0; >>> seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write", total, sum, avg); >>> >>> + total = percpu_counter_sum(&mdsc->metric.total_metadatas); >>> + sum = percpu_counter_sum(&mdsc->metric.metadata_latency_sum); >>> + sum = jiffies_to_usecs(sum); >>> + avg = total ? sum / total : 0; >>> + seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata", total, sum, avg); >>> + >>> seq_printf(s, "\n"); >>> seq_printf(s, "item total miss hit\n"); >>> seq_printf(s, "-------------------------------------------------\n"); >>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c >>> index 0a3447966b26..3e792eca6af7 100644 >>> --- a/fs/ceph/mds_client.c >>> +++ b/fs/ceph/mds_client.c >>> @@ -3017,6 +3017,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) >>> >>> /* kick calling process */ >>> complete_request(mdsc, req); >>> + >>> + if (!result || result == -ENOENT) { >>> + s64 latency = jiffies - req->r_started; >>> + >>> + ceph_update_metadata_latency(&mdsc->metric, latency); >>> + } >> Should we add an r_end_stamp field to the mds request struct and use >> that to calculate this? Many jiffies may have passed between the reply >> coming in and this point. If you really want to measure the latency that >> would be more accurate, I think. > Yes, capturing it after invoking the callback is inconsistent > with what is done for OSD requests (the new r_end_stamp is set in > finish_request()). > > It looks like this is the only place where MDS r_end_stamp would be > needed, so perhaps just move this before complete_request() call? Currently for the OSD requests, they are almost in the same place where at the end of the handle_reply. If we don't want to calculate the consumption by the most of handle_reply code, we may set the r_end_stamp in the begin of it for both OSD/MDS requests ? I'm thinking since in the handle_reply, it may also wait for the mutex locks and then sleeps, so move the r_end_stamp to the beginning should make sense... Thanks BRs > Thanks, > > Ilya >
On 2020/2/22 9:20, Xiubo Li wrote: > On 2020/2/21 22:56, Ilya Dryomov wrote: >> On Fri, Feb 21, 2020 at 1:03 PM Jeff Layton <jlayton@kernel.org> wrote: >>> On Fri, 2020-02-21 at 02:05 -0500, xiubli@redhat.com wrote: >>>> From: Xiubo Li <xiubli@redhat.com> >>>> >>>> It will calculate the latency for the metedata requests, which only >>>> include the time cousumed by network and the ceph. >>>> >>> "and the ceph MDS" ? >>> >>>> item total sum_lat(us) avg_lat(us) >>>> ----------------------------------------------------- >>>> metadata 113 220000 1946 >>>> >>>> URL: https://tracker.ceph.com/issues/43215 >>>> Signed-off-by: Xiubo Li <xiubli@redhat.com> >>>> --- >>>> fs/ceph/debugfs.c | 6 ++++++ >>>> fs/ceph/mds_client.c | 20 ++++++++++++++++++++ >>>> fs/ceph/metric.h | 13 +++++++++++++ >>>> 3 files changed, 39 insertions(+) >>>> >>>> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c >>>> index 464bfbdb970d..60f3e307fca1 100644 >>>> --- a/fs/ceph/debugfs.c >>>> +++ b/fs/ceph/debugfs.c >>>> @@ -146,6 +146,12 @@ static int metric_show(struct seq_file *s, >>>> void *p) >>>> avg = total ? sum / total : 0; >>>> seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write", total, >>>> sum, avg); >>>> >>>> + total = percpu_counter_sum(&mdsc->metric.total_metadatas); >>>> + sum = percpu_counter_sum(&mdsc->metric.metadata_latency_sum); >>>> + sum = jiffies_to_usecs(sum); >>>> + avg = total ? sum / total : 0; >>>> + seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata", total, >>>> sum, avg); >>>> + >>>> seq_printf(s, "\n"); >>>> seq_printf(s, "item total miss hit\n"); >>>> seq_printf(s, >>>> "-------------------------------------------------\n"); >>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c >>>> index 0a3447966b26..3e792eca6af7 100644 >>>> --- a/fs/ceph/mds_client.c >>>> +++ b/fs/ceph/mds_client.c >>>> @@ -3017,6 +3017,12 @@ static void handle_reply(struct >>>> ceph_mds_session *session, struct ceph_msg *msg) >>>> >>>> /* kick calling process */ >>>> complete_request(mdsc, req); >>>> + >>>> + if (!result || result == -ENOENT) { >>>> + s64 latency = jiffies - req->r_started; >>>> + >>>> + ceph_update_metadata_latency(&mdsc->metric, latency); >>>> + } >>> Should we add an r_end_stamp field to the mds request struct and use >>> that to calculate this? Many jiffies may have passed between the reply >>> coming in and this point. If you really want to measure the latency >>> that >>> would be more accurate, I think. >> Yes, capturing it after invoking the callback is inconsistent >> with what is done for OSD requests (the new r_end_stamp is set in >> finish_request()). >> >> It looks like this is the only place where MDS r_end_stamp would be >> needed, so perhaps just move this before complete_request() call? > > Currently for the OSD requests, they are almost in the same place > where at the end of the handle_reply. > > If we don't want to calculate the consumption by the most of > handle_reply code, we may set the r_end_stamp in the begin of it for > both OSD/MDS requests ? > > I'm thinking since in the handle_reply, it may also wait for the mutex > locks and then sleeps, so move the r_end_stamp to the beginning should > make sense... > Currently case: we are calculating the time took from OSD/MDS requests creating until the handle_reply finishes. Another case mentioned above: we should move the r_end_stamp to the handle_reply beginning, move r_start_stamp just before the requesting submitted. Which one should be better ? Thanks. BRs > Thanks > BRs > > >> Thanks, >> >> Ilya >> >
On Sat, Feb 22, 2020 at 2:21 AM Xiubo Li <xiubli@redhat.com> wrote: > > On 2020/2/21 22:56, Ilya Dryomov wrote: > > On Fri, Feb 21, 2020 at 1:03 PM Jeff Layton <jlayton@kernel.org> wrote: > >> On Fri, 2020-02-21 at 02:05 -0500, xiubli@redhat.com wrote: > >>> From: Xiubo Li <xiubli@redhat.com> > >>> > >>> It will calculate the latency for the metedata requests, which only > >>> include the time cousumed by network and the ceph. > >>> > >> "and the ceph MDS" ? > >> > >>> item total sum_lat(us) avg_lat(us) > >>> ----------------------------------------------------- > >>> metadata 113 220000 1946 > >>> > >>> URL: https://tracker.ceph.com/issues/43215 > >>> Signed-off-by: Xiubo Li <xiubli@redhat.com> > >>> --- > >>> fs/ceph/debugfs.c | 6 ++++++ > >>> fs/ceph/mds_client.c | 20 ++++++++++++++++++++ > >>> fs/ceph/metric.h | 13 +++++++++++++ > >>> 3 files changed, 39 insertions(+) > >>> > >>> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c > >>> index 464bfbdb970d..60f3e307fca1 100644 > >>> --- a/fs/ceph/debugfs.c > >>> +++ b/fs/ceph/debugfs.c > >>> @@ -146,6 +146,12 @@ static int metric_show(struct seq_file *s, void *p) > >>> avg = total ? sum / total : 0; > >>> seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write", total, sum, avg); > >>> > >>> + total = percpu_counter_sum(&mdsc->metric.total_metadatas); > >>> + sum = percpu_counter_sum(&mdsc->metric.metadata_latency_sum); > >>> + sum = jiffies_to_usecs(sum); > >>> + avg = total ? sum / total : 0; > >>> + seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata", total, sum, avg); > >>> + > >>> seq_printf(s, "\n"); > >>> seq_printf(s, "item total miss hit\n"); > >>> seq_printf(s, "-------------------------------------------------\n"); > >>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c > >>> index 0a3447966b26..3e792eca6af7 100644 > >>> --- a/fs/ceph/mds_client.c > >>> +++ b/fs/ceph/mds_client.c > >>> @@ -3017,6 +3017,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) > >>> > >>> /* kick calling process */ > >>> complete_request(mdsc, req); > >>> + > >>> + if (!result || result == -ENOENT) { > >>> + s64 latency = jiffies - req->r_started; > >>> + > >>> + ceph_update_metadata_latency(&mdsc->metric, latency); > >>> + } > >> Should we add an r_end_stamp field to the mds request struct and use > >> that to calculate this? Many jiffies may have passed between the reply > >> coming in and this point. If you really want to measure the latency that > >> would be more accurate, I think. > > Yes, capturing it after invoking the callback is inconsistent > > with what is done for OSD requests (the new r_end_stamp is set in > > finish_request()). > > > > It looks like this is the only place where MDS r_end_stamp would be > > needed, so perhaps just move this before complete_request() call? > > Currently for the OSD requests, they are almost in the same place where > at the end of the handle_reply. For OSD requests, r_end_request is captured _before_ the supplied callback is invoked. I suggest to do the same for MDS requests. > > If we don't want to calculate the consumption by the most of > handle_reply code, we may set the r_end_stamp in the begin of it for > both OSD/MDS requests ? > > I'm thinking since in the handle_reply, it may also wait for the mutex > locks and then sleeps, so move the r_end_stamp to the beginning should > make sense... No, the time spent in handle_reply() must be included, just like the time spent in submit_request() is included. If you look at the code, you will see that there is a case where handle_reply() drops the reply on the floor and resubmits the OSD request. Further, on many errors, handle_reply() isn't even called, so finish_request() is the only place where r_end_stamp can be captured in a consistent manner. Thanks, Ilya
On 2020/2/24 17:56, Ilya Dryomov wrote: > On Sat, Feb 22, 2020 at 2:21 AM Xiubo Li <xiubli@redhat.com> wrote: >> On 2020/2/21 22:56, Ilya Dryomov wrote: >>> On Fri, Feb 21, 2020 at 1:03 PM Jeff Layton <jlayton@kernel.org> wrote: >>>> On Fri, 2020-02-21 at 02:05 -0500, xiubli@redhat.com wrote: >>>>> From: Xiubo Li <xiubli@redhat.com> >>>>> >>>>> It will calculate the latency for the metedata requests, which only >>>>> include the time cousumed by network and the ceph. >>>>> >>>> "and the ceph MDS" ? >>>> >>>>> item total sum_lat(us) avg_lat(us) >>>>> ----------------------------------------------------- >>>>> metadata 113 220000 1946 >>>>> >>>>> URL: https://tracker.ceph.com/issues/43215 >>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com> >>>>> --- >>>>> fs/ceph/debugfs.c | 6 ++++++ >>>>> fs/ceph/mds_client.c | 20 ++++++++++++++++++++ >>>>> fs/ceph/metric.h | 13 +++++++++++++ >>>>> 3 files changed, 39 insertions(+) >>>>> >>>>> diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c >>>>> index 464bfbdb970d..60f3e307fca1 100644 >>>>> --- a/fs/ceph/debugfs.c >>>>> +++ b/fs/ceph/debugfs.c >>>>> @@ -146,6 +146,12 @@ static int metric_show(struct seq_file *s, void *p) >>>>> avg = total ? sum / total : 0; >>>>> seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write", total, sum, avg); >>>>> >>>>> + total = percpu_counter_sum(&mdsc->metric.total_metadatas); >>>>> + sum = percpu_counter_sum(&mdsc->metric.metadata_latency_sum); >>>>> + sum = jiffies_to_usecs(sum); >>>>> + avg = total ? sum / total : 0; >>>>> + seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata", total, sum, avg); >>>>> + >>>>> seq_printf(s, "\n"); >>>>> seq_printf(s, "item total miss hit\n"); >>>>> seq_printf(s, "-------------------------------------------------\n"); >>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c >>>>> index 0a3447966b26..3e792eca6af7 100644 >>>>> --- a/fs/ceph/mds_client.c >>>>> +++ b/fs/ceph/mds_client.c >>>>> @@ -3017,6 +3017,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) >>>>> >>>>> /* kick calling process */ >>>>> complete_request(mdsc, req); >>>>> + >>>>> + if (!result || result == -ENOENT) { >>>>> + s64 latency = jiffies - req->r_started; >>>>> + >>>>> + ceph_update_metadata_latency(&mdsc->metric, latency); >>>>> + } >>>> Should we add an r_end_stamp field to the mds request struct and use >>>> that to calculate this? Many jiffies may have passed between the reply >>>> coming in and this point. If you really want to measure the latency that >>>> would be more accurate, I think. >>> Yes, capturing it after invoking the callback is inconsistent >>> with what is done for OSD requests (the new r_end_stamp is set in >>> finish_request()). >>> >>> It looks like this is the only place where MDS r_end_stamp would be >>> needed, so perhaps just move this before complete_request() call? >> Currently for the OSD requests, they are almost in the same place where >> at the end of the handle_reply. > For OSD requests, r_end_request is captured _before_ the supplied > callback is invoked. I suggest to do the same for MDS requests. Will do it in complete_request() for MDS case, there is no finish_request(). >> If we don't want to calculate the consumption by the most of >> handle_reply code, we may set the r_end_stamp in the begin of it for >> both OSD/MDS requests ? >> >> I'm thinking since in the handle_reply, it may also wait for the mutex >> locks and then sleeps, so move the r_end_stamp to the beginning should >> make sense... > No, the time spent in handle_reply() must be included, just like the > time spent in submit_request() is included. If you look at the code, > you will see that there is a case where handle_reply() drops the reply > on the floor and resubmits the OSD request. Further, on many errors, > handle_reply() isn't even called, so finish_request() is the only place > where r_end_stamp can be captured in a consistent manner. Okay, get it. Thanks Ilya. BRs > Thanks, > > Ilya >
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 464bfbdb970d..60f3e307fca1 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -146,6 +146,12 @@ static int metric_show(struct seq_file *s, void *p) avg = total ? sum / total : 0; seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "write", total, sum, avg); + total = percpu_counter_sum(&mdsc->metric.total_metadatas); + sum = percpu_counter_sum(&mdsc->metric.metadata_latency_sum); + sum = jiffies_to_usecs(sum); + avg = total ? sum / total : 0; + seq_printf(s, "%-14s%-12lld%-16lld%lld\n", "metadata", total, sum, avg); + seq_printf(s, "\n"); seq_printf(s, "item total miss hit\n"); seq_printf(s, "-------------------------------------------------\n"); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 0a3447966b26..3e792eca6af7 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3017,6 +3017,12 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) /* kick calling process */ complete_request(mdsc, req); + + if (!result || result == -ENOENT) { + s64 latency = jiffies - req->r_started; + + ceph_update_metadata_latency(&mdsc->metric, latency); + } out: ceph_mdsc_put_request(req); return; @@ -4196,8 +4202,20 @@ static int ceph_mdsc_metric_init(struct ceph_client_metric *metric) if (ret) goto err_write_latency_sum; + ret = percpu_counter_init(&metric->total_metadatas, 0, GFP_KERNEL); + if (ret) + goto err_total_metadatas; + + ret = percpu_counter_init(&metric->metadata_latency_sum, 0, GFP_KERNEL); + if (ret) + goto err_metadata_latency_sum; + return 0; +err_metadata_latency_sum: + percpu_counter_destroy(&metric->total_metadatas); +err_total_metadatas: + percpu_counter_destroy(&metric->write_latency_sum); err_write_latency_sum: percpu_counter_destroy(&metric->total_writes); err_total_writes: @@ -4553,6 +4571,8 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc) ceph_mdsc_stop(mdsc); + percpu_counter_destroy(&mdsc->metric.metadata_latency_sum); + percpu_counter_destroy(&mdsc->metric.total_metadatas); percpu_counter_destroy(&mdsc->metric.write_latency_sum); percpu_counter_destroy(&mdsc->metric.total_writes); percpu_counter_destroy(&mdsc->metric.read_latency_sum); diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h index a3d342f258e6..4c14b4ac089d 100644 --- a/fs/ceph/metric.h +++ b/fs/ceph/metric.h @@ -18,6 +18,9 @@ struct ceph_client_metric { struct percpu_counter total_writes; struct percpu_counter write_latency_sum; + + struct percpu_counter total_metadatas; + struct percpu_counter metadata_latency_sum; }; static inline void ceph_update_cap_hit(struct ceph_client_metric *m) @@ -65,4 +68,14 @@ static inline void ceph_update_write_latency(struct ceph_client_metric *m, percpu_counter_add(&m->write_latency_sum, latency); } } + +static inline void ceph_update_metadata_latency(struct ceph_client_metric *m, + s64 latency) +{ + if (!m) + return; + + percpu_counter_inc(&m->total_metadatas); + percpu_counter_add(&m->metadata_latency_sum, latency); +} #endif /* _FS_CEPH_MDS_METRIC_H */