@@ -216,6 +216,9 @@ static int ceph_sync_readpages(struct ceph_fs_client *fsc,
if (!rc)
rc = ceph_osdc_wait_request(osdc, req);
+ ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_stamp,
+ req->r_end_stamp, rc);
+
ceph_osdc_put_request(req);
dout("readpages result %d\n", rc);
return rc;
@@ -299,6 +302,7 @@ static int ceph_readpage(struct file *filp, struct page *page)
static void finish_read(struct ceph_osd_request *req)
{
struct inode *inode = req->r_inode;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_osd_data *osd_data;
int rc = req->r_result <= 0 ? req->r_result : 0;
int bytes = req->r_result >= 0 ? req->r_result : 0;
@@ -336,6 +340,10 @@ static void finish_read(struct ceph_osd_request *req)
put_page(page);
bytes -= PAGE_SIZE;
}
+
+ ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_stamp,
+ req->r_end_stamp, rc);
+
kfree(osd_data->pages);
}
@@ -643,6 +651,9 @@ static int ceph_sync_writepages(struct ceph_fs_client *fsc,
if (!rc)
rc = ceph_osdc_wait_request(osdc, req);
+ ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_stamp,
+ req->r_end_stamp, rc);
+
ceph_osdc_put_request(req);
if (rc == 0)
rc = len;
@@ -794,6 +805,9 @@ static void writepages_finish(struct ceph_osd_request *req)
ceph_clear_error_write(ci);
}
+ ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_stamp,
+ req->r_end_stamp, rc);
+
/*
* We lost the cache cap, need to truncate the page before
* it is unlocked, otherwise we'd truncate it later in the
@@ -1852,6 +1866,10 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
if (!err)
err = ceph_osdc_wait_request(&fsc->client->osdc, req);
+
+ ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_stamp,
+ req->r_end_stamp, err);
+
out_put:
ceph_osdc_put_request(req);
if (err == -ECANCELED)
@@ -7,6 +7,7 @@
#include <linux/ctype.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
+#include <linux/math64.h>
#include <linux/ceph/libceph.h>
#include <linux/ceph/mon_client.h>
@@ -124,13 +125,71 @@ static int mdsc_show(struct seq_file *s, void *p)
return 0;
}
+static s64 get_avg(atomic64_t *totalp, atomic64_t *sump, spinlock_t *lockp,
+ s64 *total)
+{
+ s64 t, sum, avg = 0;
+
+ spin_lock(lockp);
+ t = atomic64_read(totalp);
+ sum = atomic64_read(sump);
+ spin_unlock(lockp);
+
+ if (likely(t))
+ avg = DIV64_U64_ROUND_CLOSEST(sum, t);
+
+ *total = t;
+ return avg;
+}
+
+#define CEPH_METRIC_SHOW(name, total, avg, min, max, sq) { \
+ s64 _total, _avg, _min, _max, _sq, _st, _re = 0; \
+ _avg = jiffies_to_usecs(avg); \
+ _min = jiffies_to_usecs(min == S64_MAX ? 0 : min); \
+ _max = jiffies_to_usecs(max); \
+ _total = total - 1; \
+ _sq = _total > 0 ? DIV64_U64_ROUND_CLOSEST(sq, _total) : 0; \
+ _sq = jiffies_to_usecs(_sq); \
+ _st = int_sqrt64(_sq); \
+ if (_st > 0) { \
+ _re = 5 * (_sq - (_st * _st)); \
+ _re = _re > 0 ? _re - 1 : 0; \
+ _re = _st > 0 ? div64_s64(_re, _st) : 0; \
+ } \
+ seq_printf(s, "%-14s%-12lld%-16lld%-16lld%-16lld%lld.%lld\n", \
+ name, total, _avg, _min, _max, _st, _re); \
+}
+
static int metric_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_client_metric *m = &mdsc->metric;
int i, nr_caps = 0;
-
+ s64 total, avg, min, max, sq;
+
+ seq_printf(s, "item total avg_lat(us) min_lat(us) max_lat(us) stdev(us)\n");
+ seq_printf(s, "-----------------------------------------------------------------------------------\n");
+
+ avg = get_avg(&m->total_reads,
+ &m->read_latency_sum,
+ &m->read_latency_lock,
+ &total);
+ min = atomic64_read(&m->read_latency_min);
+ max = atomic64_read(&m->read_latency_max);
+ sq = percpu_counter_sum(&m->read_latency_sq_sum);
+ CEPH_METRIC_SHOW("read", total, avg, min, max, sq);
+
+ avg = get_avg(&m->total_writes,
+ &m->write_latency_sum,
+ &m->write_latency_lock,
+ &total);
+ min = atomic64_read(&m->write_latency_min);
+ max = atomic64_read(&m->write_latency_max);
+ sq = percpu_counter_sum(&m->write_latency_sq_sum);
+ CEPH_METRIC_SHOW("write", total, avg, min, max, sq);
+
+ seq_printf(s, "\n");
seq_printf(s, "item total miss hit\n");
seq_printf(s, "-------------------------------------------------\n");
@@ -906,6 +906,10 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
ret = ceph_osdc_start_request(osdc, req, false);
if (!ret)
ret = ceph_osdc_wait_request(osdc, req);
+
+ ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_stamp,
+ req->r_end_stamp, ret);
+
ceph_osdc_put_request(req);
i_size = i_size_read(inode);
@@ -1044,6 +1048,8 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
struct inode *inode = req->r_inode;
struct ceph_aio_request *aio_req = req->r_priv;
struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_client_metric *metric = &fsc->mdsc->metric;
BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS);
BUG_ON(!osd_data->num_bvecs);
@@ -1051,6 +1057,16 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
dout("ceph_aio_complete_req %p rc %d bytes %u\n",
inode, rc, osd_data->bvec_pos.iter.bi_size);
+ /* r_start_stamp == 0 means the request was not submitted */
+ if (req->r_start_stamp) {
+ if (aio_req->write)
+ ceph_update_write_latency(metric, req->r_start_stamp,
+ req->r_end_stamp, rc);
+ else
+ ceph_update_read_latency(metric, req->r_start_stamp,
+ req->r_end_stamp, rc);
+ }
+
if (rc == -EOLDSNAPC) {
struct ceph_aio_work *aio_work;
BUG_ON(!aio_req->write);
@@ -1179,6 +1195,7 @@ static void ceph_aio_retry_work(struct work_struct *work)
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_client_metric *metric = &fsc->mdsc->metric;
struct ceph_vino vino;
struct ceph_osd_request *req;
struct bio_vec *bvecs;
@@ -1295,6 +1312,13 @@ static void ceph_aio_retry_work(struct work_struct *work)
if (!ret)
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ if (write)
+ ceph_update_write_latency(metric, req->r_start_stamp,
+ req->r_end_stamp, ret);
+ else
+ ceph_update_read_latency(metric, req->r_start_stamp,
+ req->r_end_stamp, ret);
+
size = i_size_read(inode);
if (!write) {
if (ret == -ENOENT)
@@ -1466,6 +1490,8 @@ static void ceph_aio_retry_work(struct work_struct *work)
if (!ret)
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_stamp,
+ req->r_end_stamp, ret);
out:
ceph_osdc_put_request(req);
if (ret != 0) {
@@ -2,6 +2,7 @@
#include <linux/types.h>
#include <linux/percpu_counter.h>
+#include <linux/math64.h>
#include "metric.h"
@@ -29,8 +30,32 @@ int ceph_metric_init(struct ceph_client_metric *m)
if (ret)
goto err_i_caps_mis;
+ ret = percpu_counter_init(&m->read_latency_sq_sum, 0, GFP_KERNEL);
+ if (ret)
+ goto err_read_latency_sq_sum;
+
+ spin_lock_init(&m->read_latency_lock);
+ atomic64_set(&m->total_reads, 0);
+ atomic64_set(&m->read_latency_sum, 0);
+ atomic64_set(&m->read_latency_min, S64_MAX);
+ atomic64_set(&m->read_latency_max, 0);
+
+ ret = percpu_counter_init(&m->write_latency_sq_sum, 0, GFP_KERNEL);
+ if (ret)
+ goto err_write_latency_sq_sum;
+
+ spin_lock_init(&m->write_latency_lock);
+ atomic64_set(&m->total_writes, 0);
+ atomic64_set(&m->write_latency_sum, 0);
+ atomic64_set(&m->write_latency_min, S64_MAX);
+ atomic64_set(&m->write_latency_max, 0);
+
return 0;
+err_write_latency_sq_sum:
+ percpu_counter_destroy(&m->read_latency_sq_sum);
+err_read_latency_sq_sum:
+ percpu_counter_destroy(&m->i_caps_mis);
err_i_caps_mis:
percpu_counter_destroy(&m->i_caps_hit);
err_i_caps_hit:
@@ -46,8 +71,92 @@ void ceph_metric_destroy(struct ceph_client_metric *m)
if (!m)
return;
+ percpu_counter_destroy(&m->write_latency_sq_sum);
+ percpu_counter_destroy(&m->read_latency_sq_sum);
percpu_counter_destroy(&m->i_caps_mis);
percpu_counter_destroy(&m->i_caps_hit);
percpu_counter_destroy(&m->d_lease_mis);
percpu_counter_destroy(&m->d_lease_hit);
}
+
+static inline void __update_min_latency(atomic64_t *min, unsigned long lat)
+{
+ unsigned long cur, old;
+
+ cur = atomic64_read(min);
+ do {
+ old = cur;
+ if (likely(lat >= old))
+ break;
+ } while (unlikely((cur = atomic64_cmpxchg(min, old, lat)) != old));
+}
+
+static inline void __update_max_latency(atomic64_t *max, unsigned long lat)
+{
+ unsigned long cur, old;
+
+ cur = atomic64_read(max);
+ do {
+ old = cur;
+ if (likely(lat <= old))
+ break;
+ } while (unlikely((cur = atomic64_cmpxchg(max, old, lat)) != old));
+}
+
+static inline void __update_avg_and_sq(atomic64_t *totalp, atomic64_t *lat_sump,
+ struct percpu_counter *sq_sump,
+ spinlock_t *lockp, unsigned long lat)
+{
+ s64 total, avg, sq, lsum;
+
+ spin_lock(lockp);
+ total = atomic64_inc_return(totalp);
+ lsum = atomic64_add_return(lat, lat_sump);
+ spin_unlock(lockp);
+
+ if (unlikely(total == 1))
+ return;
+
+ /* the sq is (lat - old_avg) * (lat - new_avg) */
+ avg = DIV64_U64_ROUND_CLOSEST((lsum - lat), (total - 1));
+ sq = lat - avg;
+ avg = DIV64_U64_ROUND_CLOSEST(lsum, total);
+ sq = sq * (lat - avg);
+ percpu_counter_add(sq_sump, sq);
+}
+
+void ceph_update_read_latency(struct ceph_client_metric *m,
+ unsigned long r_start,
+ unsigned long r_end,
+ int rc)
+{
+ unsigned long lat = r_end - r_start;
+
+ if (unlikely(rc < 0 && rc != -ENOENT && rc != -ETIMEDOUT))
+ return;
+
+ __update_min_latency(&m->read_latency_min, lat);
+ __update_max_latency(&m->read_latency_max, lat);
+ __update_avg_and_sq(&m->total_reads, &m->read_latency_sum,
+ &m->read_latency_sq_sum,
+ &m->read_latency_lock,
+ lat);
+}
+
+void ceph_update_write_latency(struct ceph_client_metric *m,
+ unsigned long r_start,
+ unsigned long r_end,
+ int rc)
+{
+ unsigned long lat = r_end - r_start;
+
+ if (unlikely(rc && rc != -ETIMEDOUT))
+ return;
+
+ __update_min_latency(&m->write_latency_min, lat);
+ __update_max_latency(&m->write_latency_max, lat);
+ __update_avg_and_sq(&m->total_writes, &m->write_latency_sum,
+ &m->write_latency_sq_sum,
+ &m->write_latency_lock,
+ lat);
+}
@@ -13,6 +13,20 @@ struct ceph_client_metric {
struct percpu_counter i_caps_hit;
struct percpu_counter i_caps_mis;
+
+ struct percpu_counter read_latency_sq_sum;
+ spinlock_t read_latency_lock;
+ atomic64_t total_reads;
+ atomic64_t read_latency_sum;
+ atomic64_t read_latency_min;
+ atomic64_t read_latency_max;
+
+ struct percpu_counter write_latency_sq_sum;
+ spinlock_t write_latency_lock;
+ atomic64_t total_writes;
+ atomic64_t write_latency_sum;
+ atomic64_t write_latency_min;
+ atomic64_t write_latency_max;
};
extern int ceph_metric_init(struct ceph_client_metric *m);
@@ -27,4 +41,13 @@ static inline void ceph_update_cap_mis(struct ceph_client_metric *m)
{
percpu_counter_inc(&m->i_caps_mis);
}
+
+extern void ceph_update_read_latency(struct ceph_client_metric *m,
+ unsigned long r_start,
+ unsigned long r_end,
+ int rc);
+extern void ceph_update_write_latency(struct ceph_client_metric *m,
+ unsigned long r_start,
+ unsigned long r_end,
+ int rc);
#endif /* _FS_CEPH_MDS_METRIC_H */
@@ -213,6 +213,7 @@ struct ceph_osd_request {
/* internal */
unsigned long r_stamp; /* jiffies, send or check time */
unsigned long r_start_stamp; /* jiffies */
+ unsigned long r_end_stamp; /* jiffies */
int r_attempts;
u32 r_map_dne_bound;
@@ -2389,6 +2389,8 @@ static void finish_request(struct ceph_osd_request *req)
WARN_ON(lookup_request_mc(&osdc->map_checks, req->r_tid));
dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
+ req->r_end_stamp = jiffies;
+
if (req->r_osd)
unlink_request(req->r_osd, req);
atomic_dec(&osdc->num_requests);