@@ -14,6 +14,7 @@
#include <linux/err.h>
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
+#include <linux/rdma_dim.h>
/* # of WCs to poll for with a single call to ib_poll_cq */
#define IB_POLL_BATCH 16
@@ -26,6 +27,47 @@
#define IB_POLL_FLAGS \
(IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
+static int ib_cq_dim_modify_cq(struct ib_cq *cq, unsigned short level)
+{
+ u16 usec = rdma_dim_prof[level].usec;
+ u16 comps = rdma_dim_prof[level].comps;
+
+ return cq->device->ops.modify_cq(cq, comps, usec);
+}
+
+static void update_cq_moderation(struct dim *dim, struct ib_cq *cq)
+{
+ dim->state = DIM_START_MEASURE;
+
+ ib_cq_dim_modify_cq(cq, dim->profile_ix);
+}
+
+static void ib_cq_rdma_dim_workqueue_work(struct work_struct *w)
+{
+ struct dim *dim = container_of(w, struct dim, work);
+ struct ib_cq *cq = container_of(dim, struct ib_cq, workqueue_poll.dim);
+
+ update_cq_moderation(dim, cq);
+}
+
+static void ib_cq_rdma_dim_irqpoll_work(struct work_struct *w)
+{
+ struct dim *dim = container_of(w, struct dim, work);
+ struct irq_poll *iop = container_of(dim, struct irq_poll, dim);
+ struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
+
+ update_cq_moderation(dim, cq);
+}
+
+void rdma_dim_init(struct dim *dim, work_func_t func)
+{
+ memset(dim, 0, sizeof(*dim));
+ dim->state = DIM_START_MEASURE;
+ dim->tune_state = DIM_GOING_RIGHT;
+ dim->profile_ix = RDMA_DIM_START_PROFILE;
+ INIT_WORK(&dim->work, func);
+}
+
static int __ib_process_cq(struct ib_cq *cq, int budget, struct ib_wc *wcs,
int batch)
{
@@ -105,19 +147,30 @@ static void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
static void ib_cq_poll_work(struct work_struct *work)
{
- struct ib_cq *cq = container_of(work, struct ib_cq, work);
+ struct ib_cq *cq = container_of(work, struct ib_cq,
+ workqueue_poll.work);
int completed;
+ struct dim_sample e_sample;
+ struct dim_sample *m_sample = &cq->workqueue_poll.dim.measuring_sample;
completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE, cq->wc,
IB_POLL_BATCH);
+
+ if (cq->workqueue_poll.dim_used)
+ dim_create_sample(m_sample->event_ctr + 1, m_sample->pkt_ctr,
+ m_sample->byte_ctr,
+ m_sample->comp_ctr + completed, &e_sample);
+
if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
- queue_work(cq->comp_wq, &cq->work);
+ queue_work(cq->comp_wq, &cq->workqueue_poll.work);
+ else if (cq->workqueue_poll.dim_used)
+ rdma_dim(&cq->workqueue_poll.dim, &e_sample);
}
static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
{
- queue_work(cq->comp_wq, &cq->work);
+ queue_work(cq->comp_wq, &cq->workqueue_poll.work);
}
/**
@@ -129,6 +182,7 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
* @poll_ctx: context to poll the CQ from.
* @caller: module owner name.
* @udata: Valid user data or NULL for kernel object
+ * @use_dim: use dynamic interrupt moderation
*
* This is the proper interface to allocate a CQ for in-kernel users. A
* CQ allocated with this interface will automatically be polled from the
@@ -138,7 +192,8 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
int nr_cqe, int comp_vector,
enum ib_poll_context poll_ctx,
- const char *caller, struct ib_udata *udata)
+ const char *caller, struct ib_udata *udata,
+ bool use_dim)
{
struct ib_cq_init_attr cq_attr = {
.cqe = nr_cqe,
@@ -174,12 +229,22 @@ struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
cq->comp_handler = ib_cq_completion_softirq;
irq_poll_init(&cq->iop, IB_POLL_BUDGET_IRQ, ib_poll_handler);
+ if (cq->device->ops.modify_cq && use_dim) {
+ rdma_dim_init(&cq->iop.dim,
+ ib_cq_rdma_dim_irqpoll_work);
+ cq->iop.dim_used = true;
+ }
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
break;
case IB_POLL_WORKQUEUE:
case IB_POLL_UNBOUND_WORKQUEUE:
cq->comp_handler = ib_cq_completion_workqueue;
- INIT_WORK(&cq->work, ib_cq_poll_work);
+ INIT_WORK(&cq->workqueue_poll.work, ib_cq_poll_work);
+ if (cq->device->ops.modify_cq && use_dim) {
+ rdma_dim_init(&cq->workqueue_poll.dim,
+ ib_cq_rdma_dim_workqueue_work);
+ cq->workqueue_poll.dim_used = true;
+ }
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
cq->comp_wq = (cq->poll_ctx == IB_POLL_WORKQUEUE) ?
ib_comp_wq : ib_comp_unbound_wq;
@@ -220,7 +285,9 @@ void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
break;
case IB_POLL_WORKQUEUE:
case IB_POLL_UNBOUND_WORKQUEUE:
- cancel_work_sync(&cq->work);
+ cancel_work_sync(&cq->workqueue_poll.work);
+ if (cq->workqueue_poll.dim_used)
+ flush_work(&cq->iop.dim.work);
break;
default:
WARN_ON_ONCE(1);
@@ -4385,7 +4385,7 @@ static void handle_drain_completion(struct ib_cq *cq,
irq_poll_enable(&cq->iop);
break;
case IB_POLL_WORKQUEUE:
- cancel_work_sync(&cq->work);
+ cancel_work_sync(&cq->workqueue_poll.work);
break;
default:
WARN_ON_ONCE(1);
@@ -6267,7 +6267,7 @@ static void handle_drain_completion(struct ib_cq *cq,
irq_poll_enable(&cq->iop);
break;
case IB_POLL_WORKQUEUE:
- cancel_work_sync(&cq->work);
+ cancel_work_sync(&cq->workqueue_poll.work);
break;
default:
WARN_ON_ONCE(1);
@@ -2,6 +2,8 @@
#ifndef IRQ_POLL_H
#define IRQ_POLL_H
+#include <linux/rdma_dim.h>
+
struct irq_poll;
typedef int (irq_poll_fn)(struct irq_poll *, int);
@@ -10,6 +12,9 @@ struct irq_poll {
unsigned long state;
int weight;
irq_poll_fn *poll;
+
+ bool dim_used;
+ struct dim dim;
};
enum {
@@ -1587,6 +1587,12 @@ enum ib_poll_context {
IB_POLL_UNBOUND_WORKQUEUE, /* poll from unbound workqueue */
};
+struct ib_cq_workqueue_poll {
+ struct dim dim;
+ struct work_struct work;
+ bool dim_used;
+};
+
struct ib_cq {
struct ib_device *device;
struct ib_uobject *uobject;
@@ -1598,8 +1604,8 @@ struct ib_cq {
enum ib_poll_context poll_ctx;
struct ib_wc *wc;
union {
- struct irq_poll iop;
- struct work_struct work;
+ struct irq_poll iop;
+ struct ib_cq_workqueue_poll workqueue_poll;
};
struct workqueue_struct *comp_wq;
/*
@@ -3628,7 +3634,8 @@ static inline int ib_post_recv(struct ib_qp *qp,
struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
int nr_cqe, int comp_vector,
enum ib_poll_context poll_ctx,
- const char *caller, struct ib_udata *udata);
+ const char *caller, struct ib_udata *udata,
+ bool use_dim);
/**
* ib_alloc_cq_user: Allocate kernel/user CQ
@@ -3646,7 +3653,27 @@ static inline struct ib_cq *ib_alloc_cq_user(struct ib_device *dev,
struct ib_udata *udata)
{
return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
- KBUILD_MODNAME, udata);
+ KBUILD_MODNAME, udata, false);
+}
+
+/**
+ * ib_alloc_cq_user_dim: Allocate kernel/user CQ with dynamic interrupt
+ * moderation
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @comp_vector: Completion vector used for the IRQs
+ * @poll_ctx: Context used for polling the CQ
+ * @udata: Valid user data or NULL for kernel objects
+ */
+static inline struct ib_cq *ib_alloc_cq_user_dim(struct ib_device *dev,
+ void *private, int nr_cqe,
+ int comp_vector,
+ enum ib_poll_context poll_ctx,
+ struct ib_udata *udata)
+{
+ return __ib_alloc_cq_user(dev, private, nr_cqe, comp_vector, poll_ctx,
+ KBUILD_MODNAME, udata, true);
}
/**
@@ -3668,6 +3695,25 @@ static inline struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
}
/**
+ * ib_alloc_cq_dim: Allocate kernel CQ with dynamic interrupt moderation
+ * @dev: The IB device
+ * @private: Private data attached to the CQE
+ * @nr_cqe: Number of CQEs in the CQ
+ * @comp_vector: Completion vector used for the IRQs
+ * @poll_ctx: Context used for polling the CQ
+ *
+ * NOTE: for user cq use ib_alloc_cq_user with valid udata!
+ */
+static inline struct ib_cq *ib_alloc_cq_dim(struct ib_device *dev,
+ void *private, int nr_cqe,
+ int comp_vector,
+ enum ib_poll_context poll_ctx)
+{
+ return ib_alloc_cq_user_dim(dev, private, nr_cqe, comp_vector,
+ poll_ctx, NULL);
+}
+
+/**
* ib_free_cq_user - Free kernel/user CQ
* @cq: The CQ to free
* @udata: Valid user data or NULL for kernel objects
@@ -50,6 +50,8 @@ void irq_poll_sched(struct irq_poll *iop)
**/
static void __irq_poll_complete(struct irq_poll *iop)
{
+ if (iop->dim_used)
+ rdma_dim(&iop->dim, &iop->dim.measuring_sample);
list_del(&iop->list);
smp_mb__before_atomic();
clear_bit_unlock(IRQ_POLL_F_SCHED, &iop->state);
@@ -86,6 +88,7 @@ static void __latent_entropy irq_poll_softirq(struct softirq_action *h)
while (!list_empty(list)) {
struct irq_poll *iop;
int work, weight;
+ struct dim_sample *m_sample;
/*
* If softirq window is exhausted then punt.
@@ -104,10 +107,18 @@ static void __latent_entropy irq_poll_softirq(struct softirq_action *h)
*/
iop = list_entry(list->next, struct irq_poll, list);
+ m_sample = &iop->dim.measuring_sample;
weight = iop->weight;
work = 0;
- if (test_bit(IRQ_POLL_F_SCHED, &iop->state))
+ if (test_bit(IRQ_POLL_F_SCHED, &iop->state)) {
work = iop->poll(iop, weight);
+ if (iop->dim_used)
+ dim_create_sample(m_sample->event_ctr + 1,
+ m_sample->pkt_ctr,
+ m_sample->byte_ctr,
+ m_sample->comp_ctr + work,
+ &iop->dim.measuring_sample);
+ }
budget -= work;
@@ -144,6 +155,8 @@ static void __latent_entropy irq_poll_softirq(struct softirq_action *h)
**/
void irq_poll_disable(struct irq_poll *iop)
{
+ if (iop->dim_used)
+ flush_work(&iop->dim.work);
set_bit(IRQ_POLL_F_DISABLE, &iop->state);
while (test_and_set_bit(IRQ_POLL_F_SCHED, &iop->state))
msleep(1);