Message ID | 20210930121602.63131-1-galpress@amazon.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [for-next,v3] RDMA/efa: CQ notifications | expand |
On Thu, Sep 30, 2021 at 03:16:00PM +0300, Gal Pressman wrote: > This patch adds support for CQ notifications through the standard verbs > api. > > In order to achieve that, a new event queue (EQ) object is introduced, > which is in charge of reporting completion events to the driver. > On driver load, EQs are allocated and their affinity is set to a single > cpu. When a user app creates a CQ with a completion channel, the > completion vector number is converted to a EQ number, which is in charge > of reporting the CQ events. > > In addition, the CQ creation admin command now returns an offset for the > CQ doorbell, which is mapped to the userspace provider and is used to > arm the CQ when requested by the user. > > The EQs use a single doorbell (located on the registers BAR), which > encodes the EQ number and arm as part of the doorbell value. > The EQs are polled by the driver on each new EQE, and arm it when the > poll is completed. > > Reviewed-by: Firas JahJah <firasj@amazon.com> > Reviewed-by: Yossi Leybovich <sleybo@amazon.com> > Signed-off-by: Gal Pressman <galpress@amazon.com> > --- > PR was sent: > https://github.com/linux-rdma/rdma-core/pull/1044 > > Changelog - > v2->v3: https://lore.kernel.org/linux-rdma/20210913120406.61745-1-galpress@amazon.com/ > * Only store CQs with interrupts enabled in the CQs xarray > * Add a comment before the xa_load to explain why it is safe > > v1->v2: https://lore.kernel.org/linux-rdma/20210811151131.39138-1-galpress@amazon.com/ > * Replace xa_init_flags() with xa_init() > * Add a synchronize_irq() in destroy_cq flow to prevent a race with > interrupt flow. > --- > drivers/infiniband/hw/efa/efa.h | 19 +- > .../infiniband/hw/efa/efa_admin_cmds_defs.h | 100 +++++++++- > drivers/infiniband/hw/efa/efa_admin_defs.h | 41 ++++ > drivers/infiniband/hw/efa/efa_com.c | 171 ++++++++++++++++ > drivers/infiniband/hw/efa/efa_com.h | 38 +++- > drivers/infiniband/hw/efa/efa_com_cmd.c | 35 +++- > drivers/infiniband/hw/efa/efa_com_cmd.h | 10 +- > drivers/infiniband/hw/efa/efa_main.c | 185 +++++++++++++++--- > drivers/infiniband/hw/efa/efa_regs_defs.h | 7 +- > drivers/infiniband/hw/efa/efa_verbs.c | 67 ++++++- > include/uapi/rdma/efa-abi.h | 18 +- > 11 files changed, 636 insertions(+), 55 deletions(-) > > diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h > index 87b1dadeb7fe..587d4bfbb3d1 100644 > --- a/drivers/infiniband/hw/efa/efa.h > +++ b/drivers/infiniband/hw/efa/efa.h > @@ -20,14 +20,14 @@ > > #define EFA_IRQNAME_SIZE 40 > > -/* 1 for AENQ + ADMIN */ > -#define EFA_NUM_MSIX_VEC 1 > #define EFA_MGMNT_MSIX_VEC_IDX 0 > +#define EFA_COMP_EQS_VEC_BASE 1 > > struct efa_irq { > irq_handler_t handler; > void *data; > u32 irqn; > + u32 vector; > cpumask_t affinity_hint_mask; > char name[EFA_IRQNAME_SIZE]; > }; > @@ -61,6 +61,13 @@ struct efa_dev { > struct efa_irq admin_irq; > > struct efa_stats stats; > + > + /* Array of completion EQs */ > + struct efa_eq *eqs; > + unsigned int neqs; > + > + /* Only stores CQs with interrupts enabled */ > + struct xarray cqs_xa; > }; > > struct efa_ucontext { > @@ -84,8 +91,11 @@ struct efa_cq { > dma_addr_t dma_addr; > void *cpu_addr; > struct rdma_user_mmap_entry *mmap_entry; > + struct rdma_user_mmap_entry *db_mmap_entry; > size_t size; > u16 cq_idx; > + /* NULL when no interrupts requested */ > + struct efa_eq *eq; > }; > > struct efa_qp { > @@ -116,6 +126,11 @@ struct efa_ah { > u8 id[EFA_GID_SIZE]; > }; > > +struct efa_eq { > + struct efa_com_eq eeq; > + struct efa_irq irq; > +}; > + > int efa_query_device(struct ib_device *ibdev, > struct ib_device_attr *props, > struct ib_udata *udata); > diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h > index fa38b34eddb8..0b0b93b529f3 100644 > --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h > +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h > @@ -28,7 +28,9 @@ enum efa_admin_aq_opcode { > EFA_ADMIN_DEALLOC_PD = 15, > EFA_ADMIN_ALLOC_UAR = 16, > EFA_ADMIN_DEALLOC_UAR = 17, > - EFA_ADMIN_MAX_OPCODE = 17, > + EFA_ADMIN_CREATE_EQ = 18, > + EFA_ADMIN_DESTROY_EQ = 19, > + EFA_ADMIN_MAX_OPCODE = 19, > }; > > enum efa_admin_aq_feature_id { > @@ -38,6 +40,7 @@ enum efa_admin_aq_feature_id { > EFA_ADMIN_QUEUE_ATTR = 4, > EFA_ADMIN_HW_HINTS = 5, > EFA_ADMIN_HOST_INFO = 6, > + EFA_ADMIN_EVENT_QUEUE_ATTR = 7, > }; > > /* QP transport type */ > @@ -430,8 +433,8 @@ struct efa_admin_create_cq_cmd { > /* > * 4:0 : reserved5 - MBZ > * 5 : interrupt_mode_enabled - if set, cq operates > - * in interrupt mode (i.e. CQ events and MSI-X are > - * generated), otherwise - polling > + * in interrupt mode (i.e. CQ events and EQ elements > + * are generated), otherwise - polling > * 6 : virt - If set, ring base address is virtual > * (IOVA returned by MR registration) > * 7 : reserved6 - MBZ > @@ -448,8 +451,11 @@ struct efa_admin_create_cq_cmd { > /* completion queue depth in # of entries. must be power of 2 */ > u16 cq_depth; > > - /* msix vector assigned to this cq */ > - u32 msix_vector_idx; > + /* EQ number assigned to this cq */ > + u16 eqn; > + > + /* MBZ */ > + u16 reserved; > > /* > * CQ ring base address, virtual or physical depending on 'virt' > @@ -480,6 +486,15 @@ struct efa_admin_create_cq_resp { > > /* actual cq depth in number of entries */ > u16 cq_actual_depth; > + > + /* CQ doorbell address, as offset to PCIe DB BAR */ > + u32 db_offset; > + > + /* > + * 0 : db_valid - If set, doorbell offset is valid. > + * Always set when interrupts are requested. > + */ > + u32 flags; > }; > > struct efa_admin_destroy_cq_cmd { > @@ -669,6 +684,17 @@ struct efa_admin_feature_queue_attr_desc { > u16 max_tx_batch; > }; > > +struct efa_admin_event_queue_attr_desc { > + /* The maximum number of event queues supported */ > + u32 max_eq; > + > + /* Maximum number of EQEs per Event Queue */ > + u32 max_eq_depth; > + > + /* Supported events bitmask */ > + u32 event_bitmask; > +}; > + > struct efa_admin_feature_aenq_desc { > /* bitmask for AENQ groups the device can report */ > u32 supported_groups; > @@ -727,6 +753,8 @@ struct efa_admin_get_feature_resp { > > struct efa_admin_feature_queue_attr_desc queue_attr; > > + struct efa_admin_event_queue_attr_desc event_queue_attr; > + > struct efa_admin_hw_hints hw_hints; > } u; > }; > @@ -810,6 +838,60 @@ struct efa_admin_dealloc_uar_resp { > struct efa_admin_acq_common_desc acq_common_desc; > }; > > +struct efa_admin_create_eq_cmd { > + struct efa_admin_aq_common_desc aq_common_descriptor; > + > + /* Size of the EQ in entries, must be power of 2 */ > + u16 depth; > + > + /* MSI-X table entry index */ > + u8 msix_vec; > + > + /* > + * 4:0 : entry_size_words - size of EQ entry in > + * 32-bit words > + * 7:5 : reserved - MBZ > + */ > + u8 caps; > + > + /* EQ ring base address */ > + struct efa_common_mem_addr ba; > + > + /* > + * Enabled events on this EQ > + * 0 : completion_events - Enable completion events > + * 31:1 : reserved - MBZ > + */ > + u32 event_bitmask; > + > + /* MBZ */ > + u32 reserved; > +}; > + > +struct efa_admin_create_eq_resp { > + struct efa_admin_acq_common_desc acq_common_desc; > + > + /* EQ number */ > + u16 eqn; > + > + /* MBZ */ > + u16 reserved; > +}; > + > +struct efa_admin_destroy_eq_cmd { > + struct efa_admin_aq_common_desc aq_common_descriptor; > + > + /* EQ number */ > + u16 eqn; > + > + /* MBZ */ > + u16 reserved; > +}; > + > +struct efa_admin_destroy_eq_resp { > + struct efa_admin_acq_common_desc acq_common_desc; > +}; > + > /* asynchronous event notification groups */ > enum efa_admin_aenq_group { > EFA_ADMIN_FATAL_ERROR = 1, > @@ -899,10 +981,18 @@ struct efa_admin_host_info { > #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) > #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) > > +/* create_cq_resp */ > +#define EFA_ADMIN_CREATE_CQ_RESP_DB_VALID_MASK BIT(0) > + > /* feature_device_attr_desc */ > #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) > #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1) > > +/* create_eq_cmd */ > +#define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) > +#define EFA_ADMIN_CREATE_EQ_CMD_VIRT_MASK BIT(6) > +#define EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS_MASK BIT(0) > + > /* host_info */ > #define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK GENMASK(7, 0) > #define EFA_ADMIN_HOST_INFO_DRIVER_SUB_MINOR_MASK GENMASK(15, 8) > diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h > index 78ff9389ae25..83f20c38a840 100644 > --- a/drivers/infiniband/hw/efa/efa_admin_defs.h > +++ b/drivers/infiniband/hw/efa/efa_admin_defs.h > @@ -118,6 +118,43 @@ struct efa_admin_aenq_entry { > u32 inline_data_w4[12]; > }; > > +enum efa_admin_eqe_event_type { > + EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION = 0, > +}; > + > +/* Completion event */ > +struct efa_admin_comp_event { > + /* CQ number */ > + u16 cqn; > + > + /* MBZ */ > + u16 reserved; > + > + /* MBZ */ > + u32 reserved2; > +}; > + > +/* Event Queue Element */ > +struct efa_admin_eqe { > + /* > + * 0 : phase > + * 8:1 : event_type - Event type > + * 31:9 : reserved - MBZ > + */ > + u32 common; > + > + /* MBZ */ > + u32 reserved; > + > + union { > + /* Event data */ > + u32 event_data[2]; > + > + /* Completion Event */ > + struct efa_admin_comp_event comp_event; > + } u; > +}; > + > /* aq_common_desc */ > #define EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) > #define EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0) > @@ -131,4 +168,8 @@ struct efa_admin_aenq_entry { > /* aenq_common_desc */ > #define EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) > > +/* eqe */ > +#define EFA_ADMIN_EQE_PHASE_MASK BIT(0) > +#define EFA_ADMIN_EQE_EVENT_TYPE_MASK GENMASK(8, 1) > + > #endif /* _EFA_ADMIN_H_ */ > diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c > index 0d523ad736c7..c00c7f526067 100644 > --- a/drivers/infiniband/hw/efa/efa_com.c > +++ b/drivers/infiniband/hw/efa/efa_com.c > @@ -56,11 +56,19 @@ static const char *efa_com_cmd_str(u8 cmd) > EFA_CMD_STR_CASE(DEALLOC_PD); > EFA_CMD_STR_CASE(ALLOC_UAR); > EFA_CMD_STR_CASE(DEALLOC_UAR); > + EFA_CMD_STR_CASE(CREATE_EQ); > + EFA_CMD_STR_CASE(DESTROY_EQ); > default: return "unknown command opcode"; > } > #undef EFA_CMD_STR_CASE > } > > +void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low) > +{ > + *addr_low = lower_32_bits(addr); > + *addr_high = upper_32_bits(addr); > +} > + > static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset) > { > struct efa_com_mmio_read *mmio_read = &edev->mmio_read; > @@ -1081,3 +1089,166 @@ int efa_com_dev_reset(struct efa_com_dev *edev, > > return 0; > } > + > +static int efa_com_create_eq(struct efa_com_dev *edev, > + struct efa_com_create_eq_params *params, > + struct efa_com_create_eq_result *result) > +{ > + struct efa_com_admin_queue *aq = &edev->aq; > + struct efa_admin_create_eq_resp resp = {}; > + struct efa_admin_create_eq_cmd cmd = {}; > + int err; > + > + cmd.aq_common_descriptor.opcode = EFA_ADMIN_CREATE_EQ; > + EFA_SET(&cmd.caps, EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS, > + params->entry_size_in_bytes / 4); > + cmd.depth = params->depth; > + cmd.event_bitmask = params->event_bitmask; > + cmd.msix_vec = params->msix_vec; > + > + efa_com_set_dma_addr(params->dma_addr, &cmd.ba.mem_addr_high, > + &cmd.ba.mem_addr_low); > + > + err = efa_com_cmd_exec(aq, > + (struct efa_admin_aq_entry *)&cmd, > + sizeof(cmd), > + (struct efa_admin_acq_entry *)&resp, > + sizeof(resp)); > + if (err) { > + ibdev_err_ratelimited(edev->efa_dev, > + "Failed to create eq[%d]\n", err); > + return err; > + } > + > + result->eqn = resp.eqn; > + > + return 0; > +} > + > +static int efa_com_destroy_eq(struct efa_com_dev *edev, > + struct efa_com_destroy_eq_params *params) > +{ Single caller of this function is not interested in return value from this function. It is worth to make it void from the beginning. Thanks > + struct efa_com_admin_queue *aq = &edev->aq; > + struct efa_admin_destroy_eq_resp resp = {}; > + struct efa_admin_destroy_eq_cmd cmd = {}; > + int err; > + > + cmd.aq_common_descriptor.opcode = EFA_ADMIN_DESTROY_EQ; > + cmd.eqn = params->eqn; > + > + err = efa_com_cmd_exec(aq, > + (struct efa_admin_aq_entry *)&cmd, > + sizeof(cmd), > + (struct efa_admin_acq_entry *)&resp, > + sizeof(resp)); > + > + if (err) { > + ibdev_err_ratelimited(edev->efa_dev, > + "Failed to destroy EQ-%u [%d]\n", cmd.eqn, > + err); > + return err; > + } > + > + return 0; > +} > + > +static void efa_com_arm_eq(struct efa_com_dev *edev, struct efa_com_eq *eeq) > +{ > + u32 val = 0; > + > + EFA_SET(&val, EFA_REGS_EQ_DB_EQN, eeq->eqn); > + EFA_SET(&val, EFA_REGS_EQ_DB_ARM, 1); > + > + writel(val, edev->reg_bar + EFA_REGS_EQ_DB_OFF); > +} > + > +void efa_com_eq_comp_intr_handler(struct efa_com_dev *edev, > + struct efa_com_eq *eeq) > +{ > + struct efa_admin_eqe *eqe; > + u32 processed = 0; > + u8 phase; > + u32 ci; > + > + ci = eeq->cc & (eeq->depth - 1); > + phase = eeq->phase; > + eqe = &eeq->eqes[ci]; > + > + /* Go over all the events */ > + while ((READ_ONCE(eqe->common) & EFA_ADMIN_EQE_PHASE_MASK) == phase) { > + /* > + * Do not read the rest of the completion entry before the > + * phase bit was validated > + */ > + dma_rmb(); > + > + eeq->cb(eeq, eqe); > + > + /* Get next event entry */ > + ci++; > + processed++; > + > + if (ci == eeq->depth) { > + ci = 0; > + phase = !phase; > + } > + > + eqe = &eeq->eqes[ci]; > + } > + > + eeq->cc += processed; > + eeq->phase = phase; > + efa_com_arm_eq(eeq->edev, eeq); > +} > + > +int efa_com_eq_destroy(struct efa_com_dev *edev, struct efa_com_eq *eeq) > +{ > + struct efa_com_destroy_eq_params params = { > + .eqn = eeq->eqn, > + }; > + > + efa_com_destroy_eq(edev, ¶ms); > + dma_free_coherent(edev->dmadev, eeq->depth * sizeof(*eeq->eqes), > + eeq->eqes, eeq->dma_addr); > + > + return 0; > +} > + > +int efa_com_eq_init(struct efa_com_dev *edev, struct efa_com_eq *eeq, > + efa_eqe_handler cb, u16 depth, u8 msix_vec) > +{ > + struct efa_com_create_eq_params params = {}; > + struct efa_com_create_eq_result result = {}; > + int err; > + > + params.depth = depth; > + params.entry_size_in_bytes = sizeof(*eeq->eqes); > + EFA_SET(¶ms.event_bitmask, > + EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS, 1); > + params.msix_vec = msix_vec; > + > + eeq->eqes = dma_alloc_coherent(edev->dmadev, > + params.depth * sizeof(*eeq->eqes), > + ¶ms.dma_addr, GFP_KERNEL); > + if (!eeq->eqes) > + return -ENOMEM; > + > + err = efa_com_create_eq(edev, ¶ms, &result); > + if (err) > + goto err_free_coherent; > + > + eeq->eqn = result.eqn; > + eeq->edev = edev; > + eeq->dma_addr = params.dma_addr; > + eeq->phase = 1; > + eeq->depth = params.depth; > + eeq->cb = cb; > + efa_com_arm_eq(edev, eeq); > + > + return 0; > + > +err_free_coherent: > + dma_free_coherent(edev->dmadev, params.depth * sizeof(*eeq->eqes), > + eeq->eqes, params.dma_addr); > + return err; > +} > diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h > index 5e4c88877ddb..0fe241cd48e7 100644 > --- a/drivers/infiniband/hw/efa/efa_com.h > +++ b/drivers/infiniband/hw/efa/efa_com.h > @@ -1,6 +1,6 @@ > /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ > /* > - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. > + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. > */ > > #ifndef _EFA_COM_H_ > @@ -80,6 +80,9 @@ struct efa_com_admin_queue { > }; > > struct efa_aenq_handlers; > +struct efa_com_eq; > +typedef void (*efa_eqe_handler)(struct efa_com_eq *eeq, > + struct efa_admin_eqe *eqe); > > struct efa_com_aenq { > struct efa_admin_aenq_entry *entries; > @@ -112,6 +115,33 @@ struct efa_com_dev { > struct efa_com_mmio_read mmio_read; > }; > > +struct efa_com_eq { > + struct efa_com_dev *edev; > + struct efa_admin_eqe *eqes; > + dma_addr_t dma_addr; > + u32 cc; /* Consumer counter */ > + u16 eqn; > + u16 depth; > + u8 phase; > + efa_eqe_handler cb; > +}; > + > +struct efa_com_create_eq_params { > + dma_addr_t dma_addr; > + u32 event_bitmask; > + u16 depth; > + u8 entry_size_in_bytes; > + u8 msix_vec; > +}; > + > +struct efa_com_create_eq_result { > + u16 eqn; > +}; > + > +struct efa_com_destroy_eq_params { > + u16 eqn; > +}; > + > typedef void (*efa_aenq_handler)(void *data, > struct efa_admin_aenq_entry *aenq_e); > > @@ -121,9 +151,13 @@ struct efa_aenq_handlers { > efa_aenq_handler unimplemented_handler; > }; > > +void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); > int efa_com_admin_init(struct efa_com_dev *edev, > struct efa_aenq_handlers *aenq_handlers); > void efa_com_admin_destroy(struct efa_com_dev *edev); > +int efa_com_eq_init(struct efa_com_dev *edev, struct efa_com_eq *eeq, > + efa_eqe_handler cb, u16 depth, u8 msix_vec); > +int efa_com_eq_destroy(struct efa_com_dev *edev, struct efa_com_eq *eeq); > int efa_com_dev_reset(struct efa_com_dev *edev, > enum efa_regs_reset_reason_types reset_reason); > void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling); > @@ -140,5 +174,7 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, > struct efa_admin_acq_entry *comp, > size_t comp_size); > void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data); > +void efa_com_eq_comp_intr_handler(struct efa_com_dev *edev, > + struct efa_com_eq *eeq); > > #endif /* _EFA_COM_H_ */ > diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c > index f752ef64159c..fb405da4e1db 100644 > --- a/drivers/infiniband/hw/efa/efa_com_cmd.c > +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c > @@ -1,17 +1,11 @@ > // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause > /* > - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. > + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. > */ > > #include "efa_com.h" > #include "efa_com_cmd.h" > > -void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low) > -{ > - *addr_low = lower_32_bits(addr); > - *addr_high = upper_32_bits(addr); > -} > - > int efa_com_create_qp(struct efa_com_dev *edev, > struct efa_com_create_qp_params *params, > struct efa_com_create_qp_result *res) > @@ -157,7 +151,7 @@ int efa_com_create_cq(struct efa_com_dev *edev, > struct efa_com_create_cq_params *params, > struct efa_com_create_cq_result *result) > { > - struct efa_admin_create_cq_resp cmd_completion; > + struct efa_admin_create_cq_resp cmd_completion = {}; > struct efa_admin_create_cq_cmd create_cmd = {}; > struct efa_com_admin_queue *aq = &edev->aq; > int err; > @@ -169,6 +163,11 @@ int efa_com_create_cq(struct efa_com_dev *edev, > create_cmd.cq_depth = params->cq_depth; > create_cmd.num_sub_cqs = params->num_sub_cqs; > create_cmd.uar = params->uarn; > + if (params->interrupt_mode_enabled) { > + EFA_SET(&create_cmd.cq_caps_1, > + EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED, 1); > + create_cmd.eqn = params->eqn; > + } > > efa_com_set_dma_addr(params->dma_addr, > &create_cmd.cq_ba.mem_addr_high, > @@ -187,6 +186,9 @@ int efa_com_create_cq(struct efa_com_dev *edev, > > result->cq_idx = cmd_completion.cq_idx; > result->actual_depth = params->cq_depth; > + result->db_off = cmd_completion.db_offset; > + result->db_valid = EFA_GET(&cmd_completion.flags, > + EFA_ADMIN_CREATE_CQ_RESP_DB_VALID); > > return 0; > } > @@ -497,6 +499,23 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, > sizeof(resp.u.network_attr.addr)); > result->mtu = resp.u.network_attr.mtu; > > + if (efa_com_check_supported_feature_id(edev, > + EFA_ADMIN_EVENT_QUEUE_ATTR)) { > + err = efa_com_get_feature(edev, &resp, > + EFA_ADMIN_EVENT_QUEUE_ATTR); > + if (err) { > + ibdev_err_ratelimited( > + edev->efa_dev, > + "Failed to get event queue attributes %d\n", > + err); > + return err; > + } > + > + result->max_eq = resp.u.event_queue_attr.max_eq; > + result->max_eq_depth = resp.u.event_queue_attr.max_eq_depth; > + result->event_bitmask = resp.u.event_queue_attr.event_bitmask; > + } > + > return 0; > } > > diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h > index eea4ebfbe6ec..c33010bbf9e8 100644 > --- a/drivers/infiniband/hw/efa/efa_com_cmd.h > +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h > @@ -1,6 +1,6 @@ > /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ > /* > - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. > + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. > */ > > #ifndef _EFA_COM_CMD_H_ > @@ -73,7 +73,9 @@ struct efa_com_create_cq_params { > u16 cq_depth; > u16 num_sub_cqs; > u16 uarn; > + u16 eqn; > u8 entry_size_in_bytes; > + bool interrupt_mode_enabled; > }; > > struct efa_com_create_cq_result { > @@ -81,6 +83,8 @@ struct efa_com_create_cq_result { > u16 cq_idx; > /* actual cq depth in # of entries */ > u16 actual_depth; > + u32 db_off; > + bool db_valid; > }; > > struct efa_com_destroy_cq_params { > @@ -125,6 +129,9 @@ struct efa_com_get_device_attr_result { > u32 max_llq_size; > u32 max_rdma_size; > u32 device_caps; > + u32 max_eq; > + u32 max_eq_depth; > + u32 event_bitmask; /* EQ events bitmask */ > u16 sub_cqs_per_cq; > u16 max_sq_sge; > u16 max_rq_sge; > @@ -260,7 +267,6 @@ union efa_com_get_stats_result { > struct efa_com_rdma_read_stats rdma_read_stats; > }; > > -void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); > int efa_com_create_qp(struct efa_com_dev *edev, > struct efa_com_create_qp_params *params, > struct efa_com_create_qp_result *res); > diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c > index 417dea5f90cf..8a3409b4dfbb 100644 > --- a/drivers/infiniband/hw/efa/efa_main.c > +++ b/drivers/infiniband/hw/efa/efa_main.c > @@ -67,6 +67,47 @@ static void efa_release_bars(struct efa_dev *dev, int bars_mask) > pci_release_selected_regions(pdev, release_bars); > } > > +static void efa_process_comp_eqe(struct efa_dev *dev, struct efa_admin_eqe *eqe) > +{ > + u16 cqn = eqe->u.comp_event.cqn; > + struct efa_cq *cq; > + > + /* Safe to load as we're in irq and removal calls synchronize_irq() */ > + cq = xa_load(&dev->cqs_xa, cqn); > + if (unlikely(!cq)) { > + ibdev_err_ratelimited(&dev->ibdev, > + "Completion event on non-existent CQ[%u]", > + cqn); > + return; > + } > + > + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); > +} > + > +static void efa_process_eqe(struct efa_com_eq *eeq, struct efa_admin_eqe *eqe) > +{ > + struct efa_dev *dev = container_of(eeq->edev, struct efa_dev, edev); > + > + if (likely(EFA_GET(&eqe->common, EFA_ADMIN_EQE_EVENT_TYPE) == > + EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION)) > + efa_process_comp_eqe(dev, eqe); > + else > + ibdev_err_ratelimited(&dev->ibdev, > + "Unknown event type received %lu", > + EFA_GET(&eqe->common, > + EFA_ADMIN_EQE_EVENT_TYPE)); > +} > + > +static irqreturn_t efa_intr_msix_comp(int irq, void *data) > +{ > + struct efa_eq *eq = data; > + struct efa_com_dev *edev = eq->eeq.edev; > + > + efa_com_eq_comp_intr_handler(edev, &eq->eeq); > + > + return IRQ_HANDLED; > +} > + > static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data) > { > struct efa_dev *dev = data; > @@ -77,26 +118,43 @@ static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data) > return IRQ_HANDLED; > } > > -static int efa_request_mgmnt_irq(struct efa_dev *dev) > +static int efa_request_irq(struct efa_dev *dev, struct efa_irq *irq) > { > - struct efa_irq *irq; > int err; > > - irq = &dev->admin_irq; > err = request_irq(irq->irqn, irq->handler, 0, irq->name, irq->data); > if (err) { > - dev_err(&dev->pdev->dev, "Failed to request admin irq (%d)\n", > - err); > + dev_err(&dev->pdev->dev, "Failed to request irq %s (%d)\n", > + irq->name, err); > return err; > } > > - dev_dbg(&dev->pdev->dev, "Set affinity hint of mgmnt irq to %*pbl (irq vector: %d)\n", > - nr_cpumask_bits, &irq->affinity_hint_mask, irq->irqn); > irq_set_affinity_hint(irq->irqn, &irq->affinity_hint_mask); > > return 0; > } > > +static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq, > + int vector) > +{ > + u32 cpu; > + > + cpu = vector - EFA_COMP_EQS_VEC_BASE; > + snprintf(eq->irq.name, EFA_IRQNAME_SIZE, "efa-comp%d@pci:%s", cpu, > + pci_name(dev->pdev)); > + eq->irq.handler = efa_intr_msix_comp; > + eq->irq.data = eq; > + eq->irq.vector = vector; > + eq->irq.irqn = pci_irq_vector(dev->pdev, vector); > + cpumask_set_cpu(cpu, &eq->irq.affinity_hint_mask); > +} > + > +static void efa_free_irq(struct efa_dev *dev, struct efa_irq *irq) > +{ > + irq_set_affinity_hint(irq->irqn, NULL); > + free_irq(irq->irqn, irq->data); > +} > + > static void efa_setup_mgmnt_irq(struct efa_dev *dev) > { > u32 cpu; > @@ -105,8 +163,9 @@ static void efa_setup_mgmnt_irq(struct efa_dev *dev) > "efa-mgmnt@pci:%s", pci_name(dev->pdev)); > dev->admin_irq.handler = efa_intr_msix_mgmnt; > dev->admin_irq.data = dev; > - dev->admin_irq.irqn = > - pci_irq_vector(dev->pdev, dev->admin_msix_vector_idx); > + dev->admin_irq.vector = dev->admin_msix_vector_idx; > + dev->admin_irq.irqn = pci_irq_vector(dev->pdev, > + dev->admin_msix_vector_idx); > cpu = cpumask_first(cpu_online_mask); > cpumask_set_cpu(cpu, > &dev->admin_irq.affinity_hint_mask); > @@ -115,20 +174,11 @@ static void efa_setup_mgmnt_irq(struct efa_dev *dev) > dev->admin_irq.name); > } > > -static void efa_free_mgmnt_irq(struct efa_dev *dev) > -{ > - struct efa_irq *irq; > - > - irq = &dev->admin_irq; > - irq_set_affinity_hint(irq->irqn, NULL); > - free_irq(irq->irqn, irq->data); > -} > - > static int efa_set_mgmnt_irq(struct efa_dev *dev) > { > efa_setup_mgmnt_irq(dev); > > - return efa_request_mgmnt_irq(dev); > + return efa_request_irq(dev, &dev->admin_irq); > } > > static int efa_request_doorbell_bar(struct efa_dev *dev) > @@ -234,6 +284,76 @@ static void efa_set_host_info(struct efa_dev *dev) > dma_free_coherent(&dev->pdev->dev, bufsz, hinf, hinf_dma); > } > > +static int efa_destroy_eq(struct efa_dev *dev, struct efa_eq *eq) > +{ > + efa_com_eq_destroy(&dev->edev, &eq->eeq); > + efa_free_irq(dev, &eq->irq); > + > + return 0; > +} > + > +static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u8 msix_vec) > +{ > + int err; > + > + efa_setup_comp_irq(dev, eq, msix_vec); > + err = efa_request_irq(dev, &eq->irq); > + if (err) > + return err; > + > + err = efa_com_eq_init(&dev->edev, &eq->eeq, efa_process_eqe, > + dev->dev_attr.max_eq_depth, msix_vec); > + if (err) > + goto err_free_comp_irq; > + > + return 0; > + > +err_free_comp_irq: > + efa_free_irq(dev, &eq->irq); > + return err; > +} > + > +static int efa_create_eqs(struct efa_dev *dev) > +{ > + unsigned int neqs = dev->dev_attr.max_eq; > + int err; > + int i; > + > + neqs = min_t(unsigned int, neqs, num_online_cpus()); > + dev->neqs = neqs; > + dev->eqs = kcalloc(neqs, sizeof(*dev->eqs), GFP_KERNEL); > + if (!dev->eqs) > + return -ENOMEM; > + > + for (i = 0; i < neqs; i++) { > + err = efa_create_eq(dev, &dev->eqs[i], > + i + EFA_COMP_EQS_VEC_BASE); > + if (err) > + goto err_destroy_eqs; > + } > + > + return 0; > + > +err_destroy_eqs: > + for (i--; i >= 0; i--) > + efa_destroy_eq(dev, &dev->eqs[i]); > + kfree(dev->eqs); > + > + return err; > +} > + > +static int efa_destroy_eqs(struct efa_dev *dev) > +{ > + int i; > + > + for (i = 0; i < dev->neqs; i++) > + efa_destroy_eq(dev, &dev->eqs[i]); > + > + kfree(dev->eqs); > + > + return 0; > +} > + > static const struct ib_device_ops efa_dev_ops = { > .owner = THIS_MODULE, > .driver_id = RDMA_DRIVER_EFA, > @@ -300,23 +420,29 @@ static int efa_ib_device_add(struct efa_dev *dev) > if (err) > goto err_release_doorbell_bar; > > + err = efa_create_eqs(dev); > + if (err) > + goto err_release_doorbell_bar; > + > efa_set_host_info(dev); > > dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED; > dev->ibdev.phys_port_cnt = 1; > - dev->ibdev.num_comp_vectors = 1; > + dev->ibdev.num_comp_vectors = dev->neqs ?: 1; > dev->ibdev.dev.parent = &pdev->dev; > > ib_set_device_ops(&dev->ibdev, &efa_dev_ops); > > err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); > if (err) > - goto err_release_doorbell_bar; > + goto err_destroy_eqs; > > ibdev_info(&dev->ibdev, "IB device registered\n"); > > return 0; > > +err_destroy_eqs: > + efa_destroy_eqs(dev); > err_release_doorbell_bar: > efa_release_doorbell_bar(dev); > return err; > @@ -324,9 +450,10 @@ static int efa_ib_device_add(struct efa_dev *dev) > > static void efa_ib_device_remove(struct efa_dev *dev) > { > - efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); > ibdev_info(&dev->ibdev, "Unregister ib device\n"); > ib_unregister_device(&dev->ibdev); > + efa_destroy_eqs(dev); > + efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); > efa_release_doorbell_bar(dev); > } > > @@ -339,8 +466,12 @@ static int efa_enable_msix(struct efa_dev *dev) > { > int msix_vecs, irq_num; > > - /* Reserve the max msix vectors we might need */ > - msix_vecs = EFA_NUM_MSIX_VEC; > + /* > + * Reserve the max msix vectors we might need, one vector is reserved > + * for admin. > + */ > + msix_vecs = min_t(int, pci_msix_vec_count(dev->pdev), > + num_online_cpus() + 1); > dev_dbg(&dev->pdev->dev, "Trying to enable MSI-X, vectors %d\n", > msix_vecs); > > @@ -421,6 +552,7 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev) > edev->efa_dev = dev; > edev->dmadev = &pdev->dev; > dev->pdev = pdev; > + xa_init(&dev->cqs_xa); > > bars = pci_select_bars(pdev, IORESOURCE_MEM) & EFA_BASE_BAR_MASK; > err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); > @@ -476,7 +608,7 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev) > return dev; > > err_free_mgmnt_irq: > - efa_free_mgmnt_irq(dev); > + efa_free_irq(dev, &dev->admin_irq); > err_disable_msix: > efa_disable_msix(dev); > err_reg_read_destroy: > @@ -499,11 +631,12 @@ static void efa_remove_device(struct pci_dev *pdev) > > edev = &dev->edev; > efa_com_admin_destroy(edev); > - efa_free_mgmnt_irq(dev); > + efa_free_irq(dev, &dev->admin_irq); > efa_disable_msix(dev); > efa_com_mmio_reg_read_destroy(edev); > devm_iounmap(&pdev->dev, edev->reg_bar); > efa_release_bars(dev, EFA_BASE_BAR_MASK); > + xa_destroy(&dev->cqs_xa); > ib_dealloc_device(&dev->ibdev); > pci_disable_device(pdev); > } > diff --git a/drivers/infiniband/hw/efa/efa_regs_defs.h b/drivers/infiniband/hw/efa/efa_regs_defs.h > index 4017982fe13b..714ae6258800 100644 > --- a/drivers/infiniband/hw/efa/efa_regs_defs.h > +++ b/drivers/infiniband/hw/efa/efa_regs_defs.h > @@ -1,6 +1,6 @@ > /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ > /* > - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. > + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. > */ > > #ifndef _EFA_REGS_H_ > @@ -42,6 +42,7 @@ enum efa_regs_reset_reason_types { > #define EFA_REGS_MMIO_REG_READ_OFF 0x5c > #define EFA_REGS_MMIO_RESP_LO_OFF 0x60 > #define EFA_REGS_MMIO_RESP_HI_OFF 0x64 > +#define EFA_REGS_EQ_DB_OFF 0x68 > > /* version register */ > #define EFA_REGS_VERSION_MINOR_VERSION_MASK 0xff > @@ -93,4 +94,8 @@ enum efa_regs_reset_reason_types { > #define EFA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff > #define EFA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000 > > +/* eq_db register */ > +#define EFA_REGS_EQ_DB_EQN_MASK 0xffff > +#define EFA_REGS_EQ_DB_ARM_MASK 0x80000000 > + > #endif /* _EFA_REGS_H_ */ > diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c > index e5f9d90aad5e..3353ad4925ee 100644 > --- a/drivers/infiniband/hw/efa/efa_verbs.c > +++ b/drivers/infiniband/hw/efa/efa_verbs.c > @@ -245,6 +245,9 @@ int efa_query_device(struct ib_device *ibdev, > if (EFA_DEV_CAP(dev, RNR_RETRY)) > resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY; > > + if (dev->neqs) > + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS; > + > err = ib_copy_to_udata(udata, &resp, > min(sizeof(resp), udata->outlen)); > if (err) { > @@ -984,6 +987,12 @@ static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx) > return efa_com_destroy_cq(&dev->edev, ¶ms); > } > > +static void efa_cq_user_mmap_entries_remove(struct efa_cq *cq) > +{ > + rdma_user_mmap_entry_remove(cq->db_mmap_entry); > + rdma_user_mmap_entry_remove(cq->mmap_entry); > +} > + > int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) > { > struct efa_dev *dev = to_edev(ibcq->device); > @@ -993,15 +1002,25 @@ int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) > "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n", > cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr); > > - rdma_user_mmap_entry_remove(cq->mmap_entry); > + efa_cq_user_mmap_entries_remove(cq); > efa_destroy_cq_idx(dev, cq->cq_idx); > + if (cq->eq) { > + xa_erase(&dev->cqs_xa, cq->cq_idx); > + synchronize_irq(cq->eq->irq.irqn); > + } > efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, > DMA_FROM_DEVICE); > return 0; > } > > +static struct efa_eq *efa_vec2eq(struct efa_dev *dev, int vec) > +{ > + return &dev->eqs[vec]; > +} > + > static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, > - struct efa_ibv_create_cq_resp *resp) > + struct efa_ibv_create_cq_resp *resp, > + bool db_valid) > { > resp->q_mmap_size = cq->size; > cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, > @@ -1011,6 +1030,21 @@ static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, > if (!cq->mmap_entry) > return -ENOMEM; > > + if (db_valid) { > + cq->db_mmap_entry = > + efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, > + dev->db_bar_addr + resp->db_off, > + PAGE_SIZE, EFA_MMAP_IO_NC, > + &resp->db_mmap_key); > + if (!cq->db_mmap_entry) { > + rdma_user_mmap_entry_remove(cq->mmap_entry); > + return -ENOMEM; > + } > + > + resp->db_off &= ~PAGE_MASK; > + resp->comp_mask |= EFA_CREATE_CQ_RESP_DB_OFF; > + } > + > return 0; > } > > @@ -1019,8 +1053,8 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, > { > struct efa_ucontext *ucontext = rdma_udata_to_drv_context( > udata, struct efa_ucontext, ibucontext); > + struct efa_com_create_cq_params params = {}; > struct efa_ibv_create_cq_resp resp = {}; > - struct efa_com_create_cq_params params; > struct efa_com_create_cq_result result; > struct ib_device *ibdev = ibcq->device; > struct efa_dev *dev = to_edev(ibdev); > @@ -1065,7 +1099,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, > goto err_out; > } > > - if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_50)) { > + if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_58)) { > ibdev_dbg(ibdev, > "Incompatible ABI params, unknown fields in udata\n"); > err = -EINVAL; > @@ -1101,29 +1135,45 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, > params.dma_addr = cq->dma_addr; > params.entry_size_in_bytes = cmd.cq_entry_size; > params.num_sub_cqs = cmd.num_sub_cqs; > + if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) { > + cq->eq = efa_vec2eq(dev, attr->comp_vector); > + params.eqn = cq->eq->eeq.eqn; > + params.interrupt_mode_enabled = true; > + } > + > err = efa_com_create_cq(&dev->edev, ¶ms, &result); > if (err) > goto err_free_mapped; > > + resp.db_off = result.db_off; > resp.cq_idx = result.cq_idx; > cq->cq_idx = result.cq_idx; > cq->ibcq.cqe = result.actual_depth; > WARN_ON_ONCE(entries != result.actual_depth); > > - err = cq_mmap_entries_setup(dev, cq, &resp); > + err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid); > if (err) { > ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n", > cq->cq_idx); > goto err_destroy_cq; > } > > + if (cq->eq) { > + err = xa_err(xa_store(&dev->cqs_xa, cq->cq_idx, cq, GFP_KERNEL)); > + if (err) { > + ibdev_dbg(ibdev, "Failed to store cq[%u] in xarray\n", > + cq->cq_idx); > + goto err_remove_mmap; > + } > + } > + > if (udata->outlen) { > err = ib_copy_to_udata(udata, &resp, > min(sizeof(resp), udata->outlen)); > if (err) { > ibdev_dbg(ibdev, > "Failed to copy udata for create_cq\n"); > - goto err_remove_mmap; > + goto err_xa_erase; > } > } > > @@ -1132,8 +1182,11 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, > > return 0; > > +err_xa_erase: > + if (cq->eq) > + xa_erase(&dev->cqs_xa, cq->cq_idx); > err_remove_mmap: > - rdma_user_mmap_entry_remove(cq->mmap_entry); > + efa_cq_user_mmap_entries_remove(cq); > err_destroy_cq: > efa_destroy_cq_idx(dev, cq->cq_idx); > err_free_mapped: > diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h > index f89fbb5b1e8d..08035ccf1fff 100644 > --- a/include/uapi/rdma/efa-abi.h > +++ b/include/uapi/rdma/efa-abi.h > @@ -1,6 +1,6 @@ > /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ > /* > - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. > + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. > */ > > #ifndef EFA_ABI_USER_H > @@ -52,11 +52,20 @@ struct efa_ibv_alloc_pd_resp { > __u8 reserved_30[2]; > }; > > +enum { > + EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL = 1 << 0, > +}; > + > struct efa_ibv_create_cq { > __u32 comp_mask; > __u32 cq_entry_size; > __u16 num_sub_cqs; > - __u8 reserved_50[6]; > + __u8 flags; > + __u8 reserved_58[5]; > +}; > + > +enum { > + EFA_CREATE_CQ_RESP_DB_OFF = 1 << 0, > }; > > struct efa_ibv_create_cq_resp { > @@ -65,7 +74,9 @@ struct efa_ibv_create_cq_resp { > __aligned_u64 q_mmap_key; > __aligned_u64 q_mmap_size; > __u16 cq_idx; > - __u8 reserved_d0[6]; > + __u8 reserved_d0[2]; > + __u32 db_off; > + __aligned_u64 db_mmap_key; > }; > > enum { > @@ -106,6 +117,7 @@ struct efa_ibv_create_ah_resp { > enum { > EFA_QUERY_DEVICE_CAPS_RDMA_READ = 1 << 0, > EFA_QUERY_DEVICE_CAPS_RNR_RETRY = 1 << 1, > + EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS = 1 << 2, > }; > > struct efa_ibv_ex_query_device_resp { > > base-commit: d30ef6d5c013c19e907f2a3a3d6eee04fcd3de0d > -- > 2.33.0 >
On 01/10/2021 2:31, Leon Romanovsky wrote: > On Thu, Sep 30, 2021 at 03:16:00PM +0300, Gal Pressman wrote: >> This patch adds support for CQ notifications through the standard verbs >> api. >> >> In order to achieve that, a new event queue (EQ) object is introduced, >> which is in charge of reporting completion events to the driver. >> On driver load, EQs are allocated and their affinity is set to a single >> cpu. When a user app creates a CQ with a completion channel, the >> completion vector number is converted to a EQ number, which is in charge >> of reporting the CQ events. >> >> In addition, the CQ creation admin command now returns an offset for the >> CQ doorbell, which is mapped to the userspace provider and is used to >> arm the CQ when requested by the user. >> >> The EQs use a single doorbell (located on the registers BAR), which >> encodes the EQ number and arm as part of the doorbell value. >> The EQs are polled by the driver on each new EQE, and arm it when the >> poll is completed. >> >> Reviewed-by: Firas JahJah <firasj@amazon.com> >> Reviewed-by: Yossi Leybovich <sleybo@amazon.com> >> Signed-off-by: Gal Pressman <galpress@amazon.com> >> --- >> PR was sent: >> https://github.com/linux-rdma/rdma-core/pull/1044 >> >> Changelog - >> v2->v3: https://lore.kernel.org/linux-rdma/20210913120406.61745-1-galpress@amazon.com/ >> * Only store CQs with interrupts enabled in the CQs xarray >> * Add a comment before the xa_load to explain why it is safe >> >> v1->v2: https://lore.kernel.org/linux-rdma/20210811151131.39138-1-galpress@amazon.com/ >> * Replace xa_init_flags() with xa_init() >> * Add a synchronize_irq() in destroy_cq flow to prevent a race with >> interrupt flow. >> --- >> drivers/infiniband/hw/efa/efa.h | 19 +- >> .../infiniband/hw/efa/efa_admin_cmds_defs.h | 100 +++++++++- >> drivers/infiniband/hw/efa/efa_admin_defs.h | 41 ++++ >> drivers/infiniband/hw/efa/efa_com.c | 171 ++++++++++++++++ >> drivers/infiniband/hw/efa/efa_com.h | 38 +++- >> drivers/infiniband/hw/efa/efa_com_cmd.c | 35 +++- >> drivers/infiniband/hw/efa/efa_com_cmd.h | 10 +- >> drivers/infiniband/hw/efa/efa_main.c | 185 +++++++++++++++--- >> drivers/infiniband/hw/efa/efa_regs_defs.h | 7 +- >> drivers/infiniband/hw/efa/efa_verbs.c | 67 ++++++- >> include/uapi/rdma/efa-abi.h | 18 +- >> 11 files changed, 636 insertions(+), 55 deletions(-) >> >> diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h >> index 87b1dadeb7fe..587d4bfbb3d1 100644 >> --- a/drivers/infiniband/hw/efa/efa.h >> +++ b/drivers/infiniband/hw/efa/efa.h >> @@ -20,14 +20,14 @@ >> >> #define EFA_IRQNAME_SIZE 40 >> >> -/* 1 for AENQ + ADMIN */ >> -#define EFA_NUM_MSIX_VEC 1 >> #define EFA_MGMNT_MSIX_VEC_IDX 0 >> +#define EFA_COMP_EQS_VEC_BASE 1 >> >> struct efa_irq { >> irq_handler_t handler; >> void *data; >> u32 irqn; >> + u32 vector; >> cpumask_t affinity_hint_mask; >> char name[EFA_IRQNAME_SIZE]; >> }; >> @@ -61,6 +61,13 @@ struct efa_dev { >> struct efa_irq admin_irq; >> >> struct efa_stats stats; >> + >> + /* Array of completion EQs */ >> + struct efa_eq *eqs; >> + unsigned int neqs; >> + >> + /* Only stores CQs with interrupts enabled */ >> + struct xarray cqs_xa; >> }; >> >> struct efa_ucontext { >> @@ -84,8 +91,11 @@ struct efa_cq { >> dma_addr_t dma_addr; >> void *cpu_addr; >> struct rdma_user_mmap_entry *mmap_entry; >> + struct rdma_user_mmap_entry *db_mmap_entry; >> size_t size; >> u16 cq_idx; >> + /* NULL when no interrupts requested */ >> + struct efa_eq *eq; >> }; >> >> struct efa_qp { >> @@ -116,6 +126,11 @@ struct efa_ah { >> u8 id[EFA_GID_SIZE]; >> }; >> >> +struct efa_eq { >> + struct efa_com_eq eeq; >> + struct efa_irq irq; >> +}; >> + >> int efa_query_device(struct ib_device *ibdev, >> struct ib_device_attr *props, >> struct ib_udata *udata); >> diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h >> index fa38b34eddb8..0b0b93b529f3 100644 >> --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h >> +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h >> @@ -28,7 +28,9 @@ enum efa_admin_aq_opcode { >> EFA_ADMIN_DEALLOC_PD = 15, >> EFA_ADMIN_ALLOC_UAR = 16, >> EFA_ADMIN_DEALLOC_UAR = 17, >> - EFA_ADMIN_MAX_OPCODE = 17, >> + EFA_ADMIN_CREATE_EQ = 18, >> + EFA_ADMIN_DESTROY_EQ = 19, >> + EFA_ADMIN_MAX_OPCODE = 19, >> }; >> >> enum efa_admin_aq_feature_id { >> @@ -38,6 +40,7 @@ enum efa_admin_aq_feature_id { >> EFA_ADMIN_QUEUE_ATTR = 4, >> EFA_ADMIN_HW_HINTS = 5, >> EFA_ADMIN_HOST_INFO = 6, >> + EFA_ADMIN_EVENT_QUEUE_ATTR = 7, >> }; >> >> /* QP transport type */ >> @@ -430,8 +433,8 @@ struct efa_admin_create_cq_cmd { >> /* >> * 4:0 : reserved5 - MBZ >> * 5 : interrupt_mode_enabled - if set, cq operates >> - * in interrupt mode (i.e. CQ events and MSI-X are >> - * generated), otherwise - polling >> + * in interrupt mode (i.e. CQ events and EQ elements >> + * are generated), otherwise - polling >> * 6 : virt - If set, ring base address is virtual >> * (IOVA returned by MR registration) >> * 7 : reserved6 - MBZ >> @@ -448,8 +451,11 @@ struct efa_admin_create_cq_cmd { >> /* completion queue depth in # of entries. must be power of 2 */ >> u16 cq_depth; >> >> - /* msix vector assigned to this cq */ >> - u32 msix_vector_idx; >> + /* EQ number assigned to this cq */ >> + u16 eqn; >> + >> + /* MBZ */ >> + u16 reserved; >> >> /* >> * CQ ring base address, virtual or physical depending on 'virt' >> @@ -480,6 +486,15 @@ struct efa_admin_create_cq_resp { >> >> /* actual cq depth in number of entries */ >> u16 cq_actual_depth; >> + >> + /* CQ doorbell address, as offset to PCIe DB BAR */ >> + u32 db_offset; >> + >> + /* >> + * 0 : db_valid - If set, doorbell offset is valid. >> + * Always set when interrupts are requested. >> + */ >> + u32 flags; >> }; >> >> struct efa_admin_destroy_cq_cmd { >> @@ -669,6 +684,17 @@ struct efa_admin_feature_queue_attr_desc { >> u16 max_tx_batch; >> }; >> >> +struct efa_admin_event_queue_attr_desc { >> + /* The maximum number of event queues supported */ >> + u32 max_eq; >> + >> + /* Maximum number of EQEs per Event Queue */ >> + u32 max_eq_depth; >> + >> + /* Supported events bitmask */ >> + u32 event_bitmask; >> +}; >> + >> struct efa_admin_feature_aenq_desc { >> /* bitmask for AENQ groups the device can report */ >> u32 supported_groups; >> @@ -727,6 +753,8 @@ struct efa_admin_get_feature_resp { >> >> struct efa_admin_feature_queue_attr_desc queue_attr; >> >> + struct efa_admin_event_queue_attr_desc event_queue_attr; >> + >> struct efa_admin_hw_hints hw_hints; >> } u; >> }; >> @@ -810,6 +838,60 @@ struct efa_admin_dealloc_uar_resp { >> struct efa_admin_acq_common_desc acq_common_desc; >> }; >> >> +struct efa_admin_create_eq_cmd { >> + struct efa_admin_aq_common_desc aq_common_descriptor; >> + >> + /* Size of the EQ in entries, must be power of 2 */ >> + u16 depth; >> + >> + /* MSI-X table entry index */ >> + u8 msix_vec; >> + >> + /* >> + * 4:0 : entry_size_words - size of EQ entry in >> + * 32-bit words >> + * 7:5 : reserved - MBZ >> + */ >> + u8 caps; >> + >> + /* EQ ring base address */ >> + struct efa_common_mem_addr ba; >> + >> + /* >> + * Enabled events on this EQ >> + * 0 : completion_events - Enable completion events >> + * 31:1 : reserved - MBZ >> + */ >> + u32 event_bitmask; >> + >> + /* MBZ */ >> + u32 reserved; >> +}; >> + >> +struct efa_admin_create_eq_resp { >> + struct efa_admin_acq_common_desc acq_common_desc; >> + >> + /* EQ number */ >> + u16 eqn; >> + >> + /* MBZ */ >> + u16 reserved; >> +}; >> + >> +struct efa_admin_destroy_eq_cmd { >> + struct efa_admin_aq_common_desc aq_common_descriptor; >> + >> + /* EQ number */ >> + u16 eqn; >> + >> + /* MBZ */ >> + u16 reserved; >> +}; >> + >> +struct efa_admin_destroy_eq_resp { >> + struct efa_admin_acq_common_desc acq_common_desc; >> +}; >> + >> /* asynchronous event notification groups */ >> enum efa_admin_aenq_group { >> EFA_ADMIN_FATAL_ERROR = 1, >> @@ -899,10 +981,18 @@ struct efa_admin_host_info { >> #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) >> #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) >> >> +/* create_cq_resp */ >> +#define EFA_ADMIN_CREATE_CQ_RESP_DB_VALID_MASK BIT(0) >> + >> /* feature_device_attr_desc */ >> #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) >> #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1) >> >> +/* create_eq_cmd */ >> +#define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) >> +#define EFA_ADMIN_CREATE_EQ_CMD_VIRT_MASK BIT(6) >> +#define EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS_MASK BIT(0) >> + >> /* host_info */ >> #define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK GENMASK(7, 0) >> #define EFA_ADMIN_HOST_INFO_DRIVER_SUB_MINOR_MASK GENMASK(15, 8) >> diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h >> index 78ff9389ae25..83f20c38a840 100644 >> --- a/drivers/infiniband/hw/efa/efa_admin_defs.h >> +++ b/drivers/infiniband/hw/efa/efa_admin_defs.h >> @@ -118,6 +118,43 @@ struct efa_admin_aenq_entry { >> u32 inline_data_w4[12]; >> }; >> >> +enum efa_admin_eqe_event_type { >> + EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION = 0, >> +}; >> + >> +/* Completion event */ >> +struct efa_admin_comp_event { >> + /* CQ number */ >> + u16 cqn; >> + >> + /* MBZ */ >> + u16 reserved; >> + >> + /* MBZ */ >> + u32 reserved2; >> +}; >> + >> +/* Event Queue Element */ >> +struct efa_admin_eqe { >> + /* >> + * 0 : phase >> + * 8:1 : event_type - Event type >> + * 31:9 : reserved - MBZ >> + */ >> + u32 common; >> + >> + /* MBZ */ >> + u32 reserved; >> + >> + union { >> + /* Event data */ >> + u32 event_data[2]; >> + >> + /* Completion Event */ >> + struct efa_admin_comp_event comp_event; >> + } u; >> +}; >> + >> /* aq_common_desc */ >> #define EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) >> #define EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0) >> @@ -131,4 +168,8 @@ struct efa_admin_aenq_entry { >> /* aenq_common_desc */ >> #define EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) >> >> +/* eqe */ >> +#define EFA_ADMIN_EQE_PHASE_MASK BIT(0) >> +#define EFA_ADMIN_EQE_EVENT_TYPE_MASK GENMASK(8, 1) >> + >> #endif /* _EFA_ADMIN_H_ */ >> diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c >> index 0d523ad736c7..c00c7f526067 100644 >> --- a/drivers/infiniband/hw/efa/efa_com.c >> +++ b/drivers/infiniband/hw/efa/efa_com.c >> @@ -56,11 +56,19 @@ static const char *efa_com_cmd_str(u8 cmd) >> EFA_CMD_STR_CASE(DEALLOC_PD); >> EFA_CMD_STR_CASE(ALLOC_UAR); >> EFA_CMD_STR_CASE(DEALLOC_UAR); >> + EFA_CMD_STR_CASE(CREATE_EQ); >> + EFA_CMD_STR_CASE(DESTROY_EQ); >> default: return "unknown command opcode"; >> } >> #undef EFA_CMD_STR_CASE >> } >> >> +void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low) >> +{ >> + *addr_low = lower_32_bits(addr); >> + *addr_high = upper_32_bits(addr); >> +} >> + >> static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset) >> { >> struct efa_com_mmio_read *mmio_read = &edev->mmio_read; >> @@ -1081,3 +1089,166 @@ int efa_com_dev_reset(struct efa_com_dev *edev, >> >> return 0; >> } >> + >> +static int efa_com_create_eq(struct efa_com_dev *edev, >> + struct efa_com_create_eq_params *params, >> + struct efa_com_create_eq_result *result) >> +{ >> + struct efa_com_admin_queue *aq = &edev->aq; >> + struct efa_admin_create_eq_resp resp = {}; >> + struct efa_admin_create_eq_cmd cmd = {}; >> + int err; >> + >> + cmd.aq_common_descriptor.opcode = EFA_ADMIN_CREATE_EQ; >> + EFA_SET(&cmd.caps, EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS, >> + params->entry_size_in_bytes / 4); >> + cmd.depth = params->depth; >> + cmd.event_bitmask = params->event_bitmask; >> + cmd.msix_vec = params->msix_vec; >> + >> + efa_com_set_dma_addr(params->dma_addr, &cmd.ba.mem_addr_high, >> + &cmd.ba.mem_addr_low); >> + >> + err = efa_com_cmd_exec(aq, >> + (struct efa_admin_aq_entry *)&cmd, >> + sizeof(cmd), >> + (struct efa_admin_acq_entry *)&resp, >> + sizeof(resp)); >> + if (err) { >> + ibdev_err_ratelimited(edev->efa_dev, >> + "Failed to create eq[%d]\n", err); >> + return err; >> + } >> + >> + result->eqn = resp.eqn; >> + >> + return 0; >> +} >> + >> +static int efa_com_destroy_eq(struct efa_com_dev *edev, >> + struct efa_com_destroy_eq_params *params) >> +{ > > Single caller of this function is not interested in return value from > this function. It is worth to make it void from the beginning. Thanks, will change.
diff --git a/drivers/infiniband/hw/efa/efa.h b/drivers/infiniband/hw/efa/efa.h index 87b1dadeb7fe..587d4bfbb3d1 100644 --- a/drivers/infiniband/hw/efa/efa.h +++ b/drivers/infiniband/hw/efa/efa.h @@ -20,14 +20,14 @@ #define EFA_IRQNAME_SIZE 40 -/* 1 for AENQ + ADMIN */ -#define EFA_NUM_MSIX_VEC 1 #define EFA_MGMNT_MSIX_VEC_IDX 0 +#define EFA_COMP_EQS_VEC_BASE 1 struct efa_irq { irq_handler_t handler; void *data; u32 irqn; + u32 vector; cpumask_t affinity_hint_mask; char name[EFA_IRQNAME_SIZE]; }; @@ -61,6 +61,13 @@ struct efa_dev { struct efa_irq admin_irq; struct efa_stats stats; + + /* Array of completion EQs */ + struct efa_eq *eqs; + unsigned int neqs; + + /* Only stores CQs with interrupts enabled */ + struct xarray cqs_xa; }; struct efa_ucontext { @@ -84,8 +91,11 @@ struct efa_cq { dma_addr_t dma_addr; void *cpu_addr; struct rdma_user_mmap_entry *mmap_entry; + struct rdma_user_mmap_entry *db_mmap_entry; size_t size; u16 cq_idx; + /* NULL when no interrupts requested */ + struct efa_eq *eq; }; struct efa_qp { @@ -116,6 +126,11 @@ struct efa_ah { u8 id[EFA_GID_SIZE]; }; +struct efa_eq { + struct efa_com_eq eeq; + struct efa_irq irq; +}; + int efa_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index fa38b34eddb8..0b0b93b529f3 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -28,7 +28,9 @@ enum efa_admin_aq_opcode { EFA_ADMIN_DEALLOC_PD = 15, EFA_ADMIN_ALLOC_UAR = 16, EFA_ADMIN_DEALLOC_UAR = 17, - EFA_ADMIN_MAX_OPCODE = 17, + EFA_ADMIN_CREATE_EQ = 18, + EFA_ADMIN_DESTROY_EQ = 19, + EFA_ADMIN_MAX_OPCODE = 19, }; enum efa_admin_aq_feature_id { @@ -38,6 +40,7 @@ enum efa_admin_aq_feature_id { EFA_ADMIN_QUEUE_ATTR = 4, EFA_ADMIN_HW_HINTS = 5, EFA_ADMIN_HOST_INFO = 6, + EFA_ADMIN_EVENT_QUEUE_ATTR = 7, }; /* QP transport type */ @@ -430,8 +433,8 @@ struct efa_admin_create_cq_cmd { /* * 4:0 : reserved5 - MBZ * 5 : interrupt_mode_enabled - if set, cq operates - * in interrupt mode (i.e. CQ events and MSI-X are - * generated), otherwise - polling + * in interrupt mode (i.e. CQ events and EQ elements + * are generated), otherwise - polling * 6 : virt - If set, ring base address is virtual * (IOVA returned by MR registration) * 7 : reserved6 - MBZ @@ -448,8 +451,11 @@ struct efa_admin_create_cq_cmd { /* completion queue depth in # of entries. must be power of 2 */ u16 cq_depth; - /* msix vector assigned to this cq */ - u32 msix_vector_idx; + /* EQ number assigned to this cq */ + u16 eqn; + + /* MBZ */ + u16 reserved; /* * CQ ring base address, virtual or physical depending on 'virt' @@ -480,6 +486,15 @@ struct efa_admin_create_cq_resp { /* actual cq depth in number of entries */ u16 cq_actual_depth; + + /* CQ doorbell address, as offset to PCIe DB BAR */ + u32 db_offset; + + /* + * 0 : db_valid - If set, doorbell offset is valid. + * Always set when interrupts are requested. + */ + u32 flags; }; struct efa_admin_destroy_cq_cmd { @@ -669,6 +684,17 @@ struct efa_admin_feature_queue_attr_desc { u16 max_tx_batch; }; +struct efa_admin_event_queue_attr_desc { + /* The maximum number of event queues supported */ + u32 max_eq; + + /* Maximum number of EQEs per Event Queue */ + u32 max_eq_depth; + + /* Supported events bitmask */ + u32 event_bitmask; +}; + struct efa_admin_feature_aenq_desc { /* bitmask for AENQ groups the device can report */ u32 supported_groups; @@ -727,6 +753,8 @@ struct efa_admin_get_feature_resp { struct efa_admin_feature_queue_attr_desc queue_attr; + struct efa_admin_event_queue_attr_desc event_queue_attr; + struct efa_admin_hw_hints hw_hints; } u; }; @@ -810,6 +838,60 @@ struct efa_admin_dealloc_uar_resp { struct efa_admin_acq_common_desc acq_common_desc; }; +struct efa_admin_create_eq_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + /* Size of the EQ in entries, must be power of 2 */ + u16 depth; + + /* MSI-X table entry index */ + u8 msix_vec; + + /* + * 4:0 : entry_size_words - size of EQ entry in + * 32-bit words + * 7:5 : reserved - MBZ + */ + u8 caps; + + /* EQ ring base address */ + struct efa_common_mem_addr ba; + + /* + * Enabled events on this EQ + * 0 : completion_events - Enable completion events + * 31:1 : reserved - MBZ + */ + u32 event_bitmask; + + /* MBZ */ + u32 reserved; +}; + +struct efa_admin_create_eq_resp { + struct efa_admin_acq_common_desc acq_common_desc; + + /* EQ number */ + u16 eqn; + + /* MBZ */ + u16 reserved; +}; + +struct efa_admin_destroy_eq_cmd { + struct efa_admin_aq_common_desc aq_common_descriptor; + + /* EQ number */ + u16 eqn; + + /* MBZ */ + u16 reserved; +}; + +struct efa_admin_destroy_eq_resp { + struct efa_admin_acq_common_desc acq_common_desc; +}; + /* asynchronous event notification groups */ enum efa_admin_aenq_group { EFA_ADMIN_FATAL_ERROR = 1, @@ -899,10 +981,18 @@ struct efa_admin_host_info { #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) +/* create_cq_resp */ +#define EFA_ADMIN_CREATE_CQ_RESP_DB_VALID_MASK BIT(0) + /* feature_device_attr_desc */ #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RDMA_READ_MASK BIT(0) #define EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_RNR_RETRY_MASK BIT(1) +/* create_eq_cmd */ +#define EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) +#define EFA_ADMIN_CREATE_EQ_CMD_VIRT_MASK BIT(6) +#define EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS_MASK BIT(0) + /* host_info */ #define EFA_ADMIN_HOST_INFO_DRIVER_MODULE_TYPE_MASK GENMASK(7, 0) #define EFA_ADMIN_HOST_INFO_DRIVER_SUB_MINOR_MASK GENMASK(15, 8) diff --git a/drivers/infiniband/hw/efa/efa_admin_defs.h b/drivers/infiniband/hw/efa/efa_admin_defs.h index 78ff9389ae25..83f20c38a840 100644 --- a/drivers/infiniband/hw/efa/efa_admin_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_defs.h @@ -118,6 +118,43 @@ struct efa_admin_aenq_entry { u32 inline_data_w4[12]; }; +enum efa_admin_eqe_event_type { + EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION = 0, +}; + +/* Completion event */ +struct efa_admin_comp_event { + /* CQ number */ + u16 cqn; + + /* MBZ */ + u16 reserved; + + /* MBZ */ + u32 reserved2; +}; + +/* Event Queue Element */ +struct efa_admin_eqe { + /* + * 0 : phase + * 8:1 : event_type - Event type + * 31:9 : reserved - MBZ + */ + u32 common; + + /* MBZ */ + u32 reserved; + + union { + /* Event data */ + u32 event_data[2]; + + /* Completion Event */ + struct efa_admin_comp_event comp_event; + } u; +}; + /* aq_common_desc */ #define EFA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) #define EFA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0) @@ -131,4 +168,8 @@ struct efa_admin_aenq_entry { /* aenq_common_desc */ #define EFA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) +/* eqe */ +#define EFA_ADMIN_EQE_PHASE_MASK BIT(0) +#define EFA_ADMIN_EQE_EVENT_TYPE_MASK GENMASK(8, 1) + #endif /* _EFA_ADMIN_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_com.c b/drivers/infiniband/hw/efa/efa_com.c index 0d523ad736c7..c00c7f526067 100644 --- a/drivers/infiniband/hw/efa/efa_com.c +++ b/drivers/infiniband/hw/efa/efa_com.c @@ -56,11 +56,19 @@ static const char *efa_com_cmd_str(u8 cmd) EFA_CMD_STR_CASE(DEALLOC_PD); EFA_CMD_STR_CASE(ALLOC_UAR); EFA_CMD_STR_CASE(DEALLOC_UAR); + EFA_CMD_STR_CASE(CREATE_EQ); + EFA_CMD_STR_CASE(DESTROY_EQ); default: return "unknown command opcode"; } #undef EFA_CMD_STR_CASE } +void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low) +{ + *addr_low = lower_32_bits(addr); + *addr_high = upper_32_bits(addr); +} + static u32 efa_com_reg_read32(struct efa_com_dev *edev, u16 offset) { struct efa_com_mmio_read *mmio_read = &edev->mmio_read; @@ -1081,3 +1089,166 @@ int efa_com_dev_reset(struct efa_com_dev *edev, return 0; } + +static int efa_com_create_eq(struct efa_com_dev *edev, + struct efa_com_create_eq_params *params, + struct efa_com_create_eq_result *result) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_create_eq_resp resp = {}; + struct efa_admin_create_eq_cmd cmd = {}; + int err; + + cmd.aq_common_descriptor.opcode = EFA_ADMIN_CREATE_EQ; + EFA_SET(&cmd.caps, EFA_ADMIN_CREATE_EQ_CMD_ENTRY_SIZE_WORDS, + params->entry_size_in_bytes / 4); + cmd.depth = params->depth; + cmd.event_bitmask = params->event_bitmask; + cmd.msix_vec = params->msix_vec; + + efa_com_set_dma_addr(params->dma_addr, &cmd.ba.mem_addr_high, + &cmd.ba.mem_addr_low); + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + if (err) { + ibdev_err_ratelimited(edev->efa_dev, + "Failed to create eq[%d]\n", err); + return err; + } + + result->eqn = resp.eqn; + + return 0; +} + +static int efa_com_destroy_eq(struct efa_com_dev *edev, + struct efa_com_destroy_eq_params *params) +{ + struct efa_com_admin_queue *aq = &edev->aq; + struct efa_admin_destroy_eq_resp resp = {}; + struct efa_admin_destroy_eq_cmd cmd = {}; + int err; + + cmd.aq_common_descriptor.opcode = EFA_ADMIN_DESTROY_EQ; + cmd.eqn = params->eqn; + + err = efa_com_cmd_exec(aq, + (struct efa_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct efa_admin_acq_entry *)&resp, + sizeof(resp)); + + if (err) { + ibdev_err_ratelimited(edev->efa_dev, + "Failed to destroy EQ-%u [%d]\n", cmd.eqn, + err); + return err; + } + + return 0; +} + +static void efa_com_arm_eq(struct efa_com_dev *edev, struct efa_com_eq *eeq) +{ + u32 val = 0; + + EFA_SET(&val, EFA_REGS_EQ_DB_EQN, eeq->eqn); + EFA_SET(&val, EFA_REGS_EQ_DB_ARM, 1); + + writel(val, edev->reg_bar + EFA_REGS_EQ_DB_OFF); +} + +void efa_com_eq_comp_intr_handler(struct efa_com_dev *edev, + struct efa_com_eq *eeq) +{ + struct efa_admin_eqe *eqe; + u32 processed = 0; + u8 phase; + u32 ci; + + ci = eeq->cc & (eeq->depth - 1); + phase = eeq->phase; + eqe = &eeq->eqes[ci]; + + /* Go over all the events */ + while ((READ_ONCE(eqe->common) & EFA_ADMIN_EQE_PHASE_MASK) == phase) { + /* + * Do not read the rest of the completion entry before the + * phase bit was validated + */ + dma_rmb(); + + eeq->cb(eeq, eqe); + + /* Get next event entry */ + ci++; + processed++; + + if (ci == eeq->depth) { + ci = 0; + phase = !phase; + } + + eqe = &eeq->eqes[ci]; + } + + eeq->cc += processed; + eeq->phase = phase; + efa_com_arm_eq(eeq->edev, eeq); +} + +int efa_com_eq_destroy(struct efa_com_dev *edev, struct efa_com_eq *eeq) +{ + struct efa_com_destroy_eq_params params = { + .eqn = eeq->eqn, + }; + + efa_com_destroy_eq(edev, ¶ms); + dma_free_coherent(edev->dmadev, eeq->depth * sizeof(*eeq->eqes), + eeq->eqes, eeq->dma_addr); + + return 0; +} + +int efa_com_eq_init(struct efa_com_dev *edev, struct efa_com_eq *eeq, + efa_eqe_handler cb, u16 depth, u8 msix_vec) +{ + struct efa_com_create_eq_params params = {}; + struct efa_com_create_eq_result result = {}; + int err; + + params.depth = depth; + params.entry_size_in_bytes = sizeof(*eeq->eqes); + EFA_SET(¶ms.event_bitmask, + EFA_ADMIN_CREATE_EQ_CMD_COMPLETION_EVENTS, 1); + params.msix_vec = msix_vec; + + eeq->eqes = dma_alloc_coherent(edev->dmadev, + params.depth * sizeof(*eeq->eqes), + ¶ms.dma_addr, GFP_KERNEL); + if (!eeq->eqes) + return -ENOMEM; + + err = efa_com_create_eq(edev, ¶ms, &result); + if (err) + goto err_free_coherent; + + eeq->eqn = result.eqn; + eeq->edev = edev; + eeq->dma_addr = params.dma_addr; + eeq->phase = 1; + eeq->depth = params.depth; + eeq->cb = cb; + efa_com_arm_eq(edev, eeq); + + return 0; + +err_free_coherent: + dma_free_coherent(edev->dmadev, params.depth * sizeof(*eeq->eqes), + eeq->eqes, params.dma_addr); + return err; +} diff --git a/drivers/infiniband/hw/efa/efa_com.h b/drivers/infiniband/hw/efa/efa_com.h index 5e4c88877ddb..0fe241cd48e7 100644 --- a/drivers/infiniband/hw/efa/efa_com.h +++ b/drivers/infiniband/hw/efa/efa_com.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COM_H_ @@ -80,6 +80,9 @@ struct efa_com_admin_queue { }; struct efa_aenq_handlers; +struct efa_com_eq; +typedef void (*efa_eqe_handler)(struct efa_com_eq *eeq, + struct efa_admin_eqe *eqe); struct efa_com_aenq { struct efa_admin_aenq_entry *entries; @@ -112,6 +115,33 @@ struct efa_com_dev { struct efa_com_mmio_read mmio_read; }; +struct efa_com_eq { + struct efa_com_dev *edev; + struct efa_admin_eqe *eqes; + dma_addr_t dma_addr; + u32 cc; /* Consumer counter */ + u16 eqn; + u16 depth; + u8 phase; + efa_eqe_handler cb; +}; + +struct efa_com_create_eq_params { + dma_addr_t dma_addr; + u32 event_bitmask; + u16 depth; + u8 entry_size_in_bytes; + u8 msix_vec; +}; + +struct efa_com_create_eq_result { + u16 eqn; +}; + +struct efa_com_destroy_eq_params { + u16 eqn; +}; + typedef void (*efa_aenq_handler)(void *data, struct efa_admin_aenq_entry *aenq_e); @@ -121,9 +151,13 @@ struct efa_aenq_handlers { efa_aenq_handler unimplemented_handler; }; +void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); int efa_com_admin_init(struct efa_com_dev *edev, struct efa_aenq_handlers *aenq_handlers); void efa_com_admin_destroy(struct efa_com_dev *edev); +int efa_com_eq_init(struct efa_com_dev *edev, struct efa_com_eq *eeq, + efa_eqe_handler cb, u16 depth, u8 msix_vec); +int efa_com_eq_destroy(struct efa_com_dev *edev, struct efa_com_eq *eeq); int efa_com_dev_reset(struct efa_com_dev *edev, enum efa_regs_reset_reason_types reset_reason); void efa_com_set_admin_polling_mode(struct efa_com_dev *edev, bool polling); @@ -140,5 +174,7 @@ int efa_com_cmd_exec(struct efa_com_admin_queue *aq, struct efa_admin_acq_entry *comp, size_t comp_size); void efa_com_aenq_intr_handler(struct efa_com_dev *edev, void *data); +void efa_com_eq_comp_intr_handler(struct efa_com_dev *edev, + struct efa_com_eq *eeq); #endif /* _EFA_COM_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index f752ef64159c..fb405da4e1db 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -1,17 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_com.h" #include "efa_com_cmd.h" -void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low) -{ - *addr_low = lower_32_bits(addr); - *addr_high = upper_32_bits(addr); -} - int efa_com_create_qp(struct efa_com_dev *edev, struct efa_com_create_qp_params *params, struct efa_com_create_qp_result *res) @@ -157,7 +151,7 @@ int efa_com_create_cq(struct efa_com_dev *edev, struct efa_com_create_cq_params *params, struct efa_com_create_cq_result *result) { - struct efa_admin_create_cq_resp cmd_completion; + struct efa_admin_create_cq_resp cmd_completion = {}; struct efa_admin_create_cq_cmd create_cmd = {}; struct efa_com_admin_queue *aq = &edev->aq; int err; @@ -169,6 +163,11 @@ int efa_com_create_cq(struct efa_com_dev *edev, create_cmd.cq_depth = params->cq_depth; create_cmd.num_sub_cqs = params->num_sub_cqs; create_cmd.uar = params->uarn; + if (params->interrupt_mode_enabled) { + EFA_SET(&create_cmd.cq_caps_1, + EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED, 1); + create_cmd.eqn = params->eqn; + } efa_com_set_dma_addr(params->dma_addr, &create_cmd.cq_ba.mem_addr_high, @@ -187,6 +186,9 @@ int efa_com_create_cq(struct efa_com_dev *edev, result->cq_idx = cmd_completion.cq_idx; result->actual_depth = params->cq_depth; + result->db_off = cmd_completion.db_offset; + result->db_valid = EFA_GET(&cmd_completion.flags, + EFA_ADMIN_CREATE_CQ_RESP_DB_VALID); return 0; } @@ -497,6 +499,23 @@ int efa_com_get_device_attr(struct efa_com_dev *edev, sizeof(resp.u.network_attr.addr)); result->mtu = resp.u.network_attr.mtu; + if (efa_com_check_supported_feature_id(edev, + EFA_ADMIN_EVENT_QUEUE_ATTR)) { + err = efa_com_get_feature(edev, &resp, + EFA_ADMIN_EVENT_QUEUE_ATTR); + if (err) { + ibdev_err_ratelimited( + edev->efa_dev, + "Failed to get event queue attributes %d\n", + err); + return err; + } + + result->max_eq = resp.u.event_queue_attr.max_eq; + result->max_eq_depth = resp.u.event_queue_attr.max_eq_depth; + result->event_bitmask = resp.u.event_queue_attr.event_bitmask; + } + return 0; } diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index eea4ebfbe6ec..c33010bbf9e8 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COM_CMD_H_ @@ -73,7 +73,9 @@ struct efa_com_create_cq_params { u16 cq_depth; u16 num_sub_cqs; u16 uarn; + u16 eqn; u8 entry_size_in_bytes; + bool interrupt_mode_enabled; }; struct efa_com_create_cq_result { @@ -81,6 +83,8 @@ struct efa_com_create_cq_result { u16 cq_idx; /* actual cq depth in # of entries */ u16 actual_depth; + u32 db_off; + bool db_valid; }; struct efa_com_destroy_cq_params { @@ -125,6 +129,9 @@ struct efa_com_get_device_attr_result { u32 max_llq_size; u32 max_rdma_size; u32 device_caps; + u32 max_eq; + u32 max_eq_depth; + u32 event_bitmask; /* EQ events bitmask */ u16 sub_cqs_per_cq; u16 max_sq_sge; u16 max_rq_sge; @@ -260,7 +267,6 @@ union efa_com_get_stats_result { struct efa_com_rdma_read_stats rdma_read_stats; }; -void efa_com_set_dma_addr(dma_addr_t addr, u32 *addr_high, u32 *addr_low); int efa_com_create_qp(struct efa_com_dev *edev, struct efa_com_create_qp_params *params, struct efa_com_create_qp_result *res); diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 417dea5f90cf..8a3409b4dfbb 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -67,6 +67,47 @@ static void efa_release_bars(struct efa_dev *dev, int bars_mask) pci_release_selected_regions(pdev, release_bars); } +static void efa_process_comp_eqe(struct efa_dev *dev, struct efa_admin_eqe *eqe) +{ + u16 cqn = eqe->u.comp_event.cqn; + struct efa_cq *cq; + + /* Safe to load as we're in irq and removal calls synchronize_irq() */ + cq = xa_load(&dev->cqs_xa, cqn); + if (unlikely(!cq)) { + ibdev_err_ratelimited(&dev->ibdev, + "Completion event on non-existent CQ[%u]", + cqn); + return; + } + + cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); +} + +static void efa_process_eqe(struct efa_com_eq *eeq, struct efa_admin_eqe *eqe) +{ + struct efa_dev *dev = container_of(eeq->edev, struct efa_dev, edev); + + if (likely(EFA_GET(&eqe->common, EFA_ADMIN_EQE_EVENT_TYPE) == + EFA_ADMIN_EQE_EVENT_TYPE_COMPLETION)) + efa_process_comp_eqe(dev, eqe); + else + ibdev_err_ratelimited(&dev->ibdev, + "Unknown event type received %lu", + EFA_GET(&eqe->common, + EFA_ADMIN_EQE_EVENT_TYPE)); +} + +static irqreturn_t efa_intr_msix_comp(int irq, void *data) +{ + struct efa_eq *eq = data; + struct efa_com_dev *edev = eq->eeq.edev; + + efa_com_eq_comp_intr_handler(edev, &eq->eeq); + + return IRQ_HANDLED; +} + static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data) { struct efa_dev *dev = data; @@ -77,26 +118,43 @@ static irqreturn_t efa_intr_msix_mgmnt(int irq, void *data) return IRQ_HANDLED; } -static int efa_request_mgmnt_irq(struct efa_dev *dev) +static int efa_request_irq(struct efa_dev *dev, struct efa_irq *irq) { - struct efa_irq *irq; int err; - irq = &dev->admin_irq; err = request_irq(irq->irqn, irq->handler, 0, irq->name, irq->data); if (err) { - dev_err(&dev->pdev->dev, "Failed to request admin irq (%d)\n", - err); + dev_err(&dev->pdev->dev, "Failed to request irq %s (%d)\n", + irq->name, err); return err; } - dev_dbg(&dev->pdev->dev, "Set affinity hint of mgmnt irq to %*pbl (irq vector: %d)\n", - nr_cpumask_bits, &irq->affinity_hint_mask, irq->irqn); irq_set_affinity_hint(irq->irqn, &irq->affinity_hint_mask); return 0; } +static void efa_setup_comp_irq(struct efa_dev *dev, struct efa_eq *eq, + int vector) +{ + u32 cpu; + + cpu = vector - EFA_COMP_EQS_VEC_BASE; + snprintf(eq->irq.name, EFA_IRQNAME_SIZE, "efa-comp%d@pci:%s", cpu, + pci_name(dev->pdev)); + eq->irq.handler = efa_intr_msix_comp; + eq->irq.data = eq; + eq->irq.vector = vector; + eq->irq.irqn = pci_irq_vector(dev->pdev, vector); + cpumask_set_cpu(cpu, &eq->irq.affinity_hint_mask); +} + +static void efa_free_irq(struct efa_dev *dev, struct efa_irq *irq) +{ + irq_set_affinity_hint(irq->irqn, NULL); + free_irq(irq->irqn, irq->data); +} + static void efa_setup_mgmnt_irq(struct efa_dev *dev) { u32 cpu; @@ -105,8 +163,9 @@ static void efa_setup_mgmnt_irq(struct efa_dev *dev) "efa-mgmnt@pci:%s", pci_name(dev->pdev)); dev->admin_irq.handler = efa_intr_msix_mgmnt; dev->admin_irq.data = dev; - dev->admin_irq.irqn = - pci_irq_vector(dev->pdev, dev->admin_msix_vector_idx); + dev->admin_irq.vector = dev->admin_msix_vector_idx; + dev->admin_irq.irqn = pci_irq_vector(dev->pdev, + dev->admin_msix_vector_idx); cpu = cpumask_first(cpu_online_mask); cpumask_set_cpu(cpu, &dev->admin_irq.affinity_hint_mask); @@ -115,20 +174,11 @@ static void efa_setup_mgmnt_irq(struct efa_dev *dev) dev->admin_irq.name); } -static void efa_free_mgmnt_irq(struct efa_dev *dev) -{ - struct efa_irq *irq; - - irq = &dev->admin_irq; - irq_set_affinity_hint(irq->irqn, NULL); - free_irq(irq->irqn, irq->data); -} - static int efa_set_mgmnt_irq(struct efa_dev *dev) { efa_setup_mgmnt_irq(dev); - return efa_request_mgmnt_irq(dev); + return efa_request_irq(dev, &dev->admin_irq); } static int efa_request_doorbell_bar(struct efa_dev *dev) @@ -234,6 +284,76 @@ static void efa_set_host_info(struct efa_dev *dev) dma_free_coherent(&dev->pdev->dev, bufsz, hinf, hinf_dma); } +static int efa_destroy_eq(struct efa_dev *dev, struct efa_eq *eq) +{ + efa_com_eq_destroy(&dev->edev, &eq->eeq); + efa_free_irq(dev, &eq->irq); + + return 0; +} + +static int efa_create_eq(struct efa_dev *dev, struct efa_eq *eq, u8 msix_vec) +{ + int err; + + efa_setup_comp_irq(dev, eq, msix_vec); + err = efa_request_irq(dev, &eq->irq); + if (err) + return err; + + err = efa_com_eq_init(&dev->edev, &eq->eeq, efa_process_eqe, + dev->dev_attr.max_eq_depth, msix_vec); + if (err) + goto err_free_comp_irq; + + return 0; + +err_free_comp_irq: + efa_free_irq(dev, &eq->irq); + return err; +} + +static int efa_create_eqs(struct efa_dev *dev) +{ + unsigned int neqs = dev->dev_attr.max_eq; + int err; + int i; + + neqs = min_t(unsigned int, neqs, num_online_cpus()); + dev->neqs = neqs; + dev->eqs = kcalloc(neqs, sizeof(*dev->eqs), GFP_KERNEL); + if (!dev->eqs) + return -ENOMEM; + + for (i = 0; i < neqs; i++) { + err = efa_create_eq(dev, &dev->eqs[i], + i + EFA_COMP_EQS_VEC_BASE); + if (err) + goto err_destroy_eqs; + } + + return 0; + +err_destroy_eqs: + for (i--; i >= 0; i--) + efa_destroy_eq(dev, &dev->eqs[i]); + kfree(dev->eqs); + + return err; +} + +static int efa_destroy_eqs(struct efa_dev *dev) +{ + int i; + + for (i = 0; i < dev->neqs; i++) + efa_destroy_eq(dev, &dev->eqs[i]); + + kfree(dev->eqs); + + return 0; +} + static const struct ib_device_ops efa_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_EFA, @@ -300,23 +420,29 @@ static int efa_ib_device_add(struct efa_dev *dev) if (err) goto err_release_doorbell_bar; + err = efa_create_eqs(dev); + if (err) + goto err_release_doorbell_bar; + efa_set_host_info(dev); dev->ibdev.node_type = RDMA_NODE_UNSPECIFIED; dev->ibdev.phys_port_cnt = 1; - dev->ibdev.num_comp_vectors = 1; + dev->ibdev.num_comp_vectors = dev->neqs ?: 1; dev->ibdev.dev.parent = &pdev->dev; ib_set_device_ops(&dev->ibdev, &efa_dev_ops); err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev); if (err) - goto err_release_doorbell_bar; + goto err_destroy_eqs; ibdev_info(&dev->ibdev, "IB device registered\n"); return 0; +err_destroy_eqs: + efa_destroy_eqs(dev); err_release_doorbell_bar: efa_release_doorbell_bar(dev); return err; @@ -324,9 +450,10 @@ static int efa_ib_device_add(struct efa_dev *dev) static void efa_ib_device_remove(struct efa_dev *dev) { - efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); ibdev_info(&dev->ibdev, "Unregister ib device\n"); ib_unregister_device(&dev->ibdev); + efa_destroy_eqs(dev); + efa_com_dev_reset(&dev->edev, EFA_REGS_RESET_NORMAL); efa_release_doorbell_bar(dev); } @@ -339,8 +466,12 @@ static int efa_enable_msix(struct efa_dev *dev) { int msix_vecs, irq_num; - /* Reserve the max msix vectors we might need */ - msix_vecs = EFA_NUM_MSIX_VEC; + /* + * Reserve the max msix vectors we might need, one vector is reserved + * for admin. + */ + msix_vecs = min_t(int, pci_msix_vec_count(dev->pdev), + num_online_cpus() + 1); dev_dbg(&dev->pdev->dev, "Trying to enable MSI-X, vectors %d\n", msix_vecs); @@ -421,6 +552,7 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev) edev->efa_dev = dev; edev->dmadev = &pdev->dev; dev->pdev = pdev; + xa_init(&dev->cqs_xa); bars = pci_select_bars(pdev, IORESOURCE_MEM) & EFA_BASE_BAR_MASK; err = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); @@ -476,7 +608,7 @@ static struct efa_dev *efa_probe_device(struct pci_dev *pdev) return dev; err_free_mgmnt_irq: - efa_free_mgmnt_irq(dev); + efa_free_irq(dev, &dev->admin_irq); err_disable_msix: efa_disable_msix(dev); err_reg_read_destroy: @@ -499,11 +631,12 @@ static void efa_remove_device(struct pci_dev *pdev) edev = &dev->edev; efa_com_admin_destroy(edev); - efa_free_mgmnt_irq(dev); + efa_free_irq(dev, &dev->admin_irq); efa_disable_msix(dev); efa_com_mmio_reg_read_destroy(edev); devm_iounmap(&pdev->dev, edev->reg_bar); efa_release_bars(dev, EFA_BASE_BAR_MASK); + xa_destroy(&dev->cqs_xa); ib_dealloc_device(&dev->ibdev); pci_disable_device(pdev); } diff --git a/drivers/infiniband/hw/efa/efa_regs_defs.h b/drivers/infiniband/hw/efa/efa_regs_defs.h index 4017982fe13b..714ae6258800 100644 --- a/drivers/infiniband/hw/efa/efa_regs_defs.h +++ b/drivers/infiniband/hw/efa/efa_regs_defs.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_REGS_H_ @@ -42,6 +42,7 @@ enum efa_regs_reset_reason_types { #define EFA_REGS_MMIO_REG_READ_OFF 0x5c #define EFA_REGS_MMIO_RESP_LO_OFF 0x60 #define EFA_REGS_MMIO_RESP_HI_OFF 0x64 +#define EFA_REGS_EQ_DB_OFF 0x68 /* version register */ #define EFA_REGS_VERSION_MINOR_VERSION_MASK 0xff @@ -93,4 +94,8 @@ enum efa_regs_reset_reason_types { #define EFA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff #define EFA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000 +/* eq_db register */ +#define EFA_REGS_EQ_DB_EQN_MASK 0xffff +#define EFA_REGS_EQ_DB_ARM_MASK 0x80000000 + #endif /* _EFA_REGS_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index e5f9d90aad5e..3353ad4925ee 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -245,6 +245,9 @@ int efa_query_device(struct ib_device *ibdev, if (EFA_DEV_CAP(dev, RNR_RETRY)) resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY; + if (dev->neqs) + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS; + err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (err) { @@ -984,6 +987,12 @@ static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx) return efa_com_destroy_cq(&dev->edev, ¶ms); } +static void efa_cq_user_mmap_entries_remove(struct efa_cq *cq) +{ + rdma_user_mmap_entry_remove(cq->db_mmap_entry); + rdma_user_mmap_entry_remove(cq->mmap_entry); +} + int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct efa_dev *dev = to_edev(ibcq->device); @@ -993,15 +1002,25 @@ int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n", cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr); - rdma_user_mmap_entry_remove(cq->mmap_entry); + efa_cq_user_mmap_entries_remove(cq); efa_destroy_cq_idx(dev, cq->cq_idx); + if (cq->eq) { + xa_erase(&dev->cqs_xa, cq->cq_idx); + synchronize_irq(cq->eq->irq.irqn); + } efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size, DMA_FROM_DEVICE); return 0; } +static struct efa_eq *efa_vec2eq(struct efa_dev *dev, int vec) +{ + return &dev->eqs[vec]; +} + static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, - struct efa_ibv_create_cq_resp *resp) + struct efa_ibv_create_cq_resp *resp, + bool db_valid) { resp->q_mmap_size = cq->size; cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, @@ -1011,6 +1030,21 @@ static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq, if (!cq->mmap_entry) return -ENOMEM; + if (db_valid) { + cq->db_mmap_entry = + efa_user_mmap_entry_insert(&cq->ucontext->ibucontext, + dev->db_bar_addr + resp->db_off, + PAGE_SIZE, EFA_MMAP_IO_NC, + &resp->db_mmap_key); + if (!cq->db_mmap_entry) { + rdma_user_mmap_entry_remove(cq->mmap_entry); + return -ENOMEM; + } + + resp->db_off &= ~PAGE_MASK; + resp->comp_mask |= EFA_CREATE_CQ_RESP_DB_OFF; + } + return 0; } @@ -1019,8 +1053,8 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, { struct efa_ucontext *ucontext = rdma_udata_to_drv_context( udata, struct efa_ucontext, ibucontext); + struct efa_com_create_cq_params params = {}; struct efa_ibv_create_cq_resp resp = {}; - struct efa_com_create_cq_params params; struct efa_com_create_cq_result result; struct ib_device *ibdev = ibcq->device; struct efa_dev *dev = to_edev(ibdev); @@ -1065,7 +1099,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_out; } - if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_50)) { + if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_58)) { ibdev_dbg(ibdev, "Incompatible ABI params, unknown fields in udata\n"); err = -EINVAL; @@ -1101,29 +1135,45 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, params.dma_addr = cq->dma_addr; params.entry_size_in_bytes = cmd.cq_entry_size; params.num_sub_cqs = cmd.num_sub_cqs; + if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) { + cq->eq = efa_vec2eq(dev, attr->comp_vector); + params.eqn = cq->eq->eeq.eqn; + params.interrupt_mode_enabled = true; + } + err = efa_com_create_cq(&dev->edev, ¶ms, &result); if (err) goto err_free_mapped; + resp.db_off = result.db_off; resp.cq_idx = result.cq_idx; cq->cq_idx = result.cq_idx; cq->ibcq.cqe = result.actual_depth; WARN_ON_ONCE(entries != result.actual_depth); - err = cq_mmap_entries_setup(dev, cq, &resp); + err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid); if (err) { ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n", cq->cq_idx); goto err_destroy_cq; } + if (cq->eq) { + err = xa_err(xa_store(&dev->cqs_xa, cq->cq_idx, cq, GFP_KERNEL)); + if (err) { + ibdev_dbg(ibdev, "Failed to store cq[%u] in xarray\n", + cq->cq_idx); + goto err_remove_mmap; + } + } + if (udata->outlen) { err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); if (err) { ibdev_dbg(ibdev, "Failed to copy udata for create_cq\n"); - goto err_remove_mmap; + goto err_xa_erase; } } @@ -1132,8 +1182,11 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return 0; +err_xa_erase: + if (cq->eq) + xa_erase(&dev->cqs_xa, cq->cq_idx); err_remove_mmap: - rdma_user_mmap_entry_remove(cq->mmap_entry); + efa_cq_user_mmap_entries_remove(cq); err_destroy_cq: efa_destroy_cq_idx(dev, cq->cq_idx); err_free_mapped: diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index f89fbb5b1e8d..08035ccf1fff 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ /* - * Copyright 2018-2020 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef EFA_ABI_USER_H @@ -52,11 +52,20 @@ struct efa_ibv_alloc_pd_resp { __u8 reserved_30[2]; }; +enum { + EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL = 1 << 0, +}; + struct efa_ibv_create_cq { __u32 comp_mask; __u32 cq_entry_size; __u16 num_sub_cqs; - __u8 reserved_50[6]; + __u8 flags; + __u8 reserved_58[5]; +}; + +enum { + EFA_CREATE_CQ_RESP_DB_OFF = 1 << 0, }; struct efa_ibv_create_cq_resp { @@ -65,7 +74,9 @@ struct efa_ibv_create_cq_resp { __aligned_u64 q_mmap_key; __aligned_u64 q_mmap_size; __u16 cq_idx; - __u8 reserved_d0[6]; + __u8 reserved_d0[2]; + __u32 db_off; + __aligned_u64 db_mmap_key; }; enum { @@ -106,6 +117,7 @@ struct efa_ibv_create_ah_resp { enum { EFA_QUERY_DEVICE_CAPS_RDMA_READ = 1 << 0, EFA_QUERY_DEVICE_CAPS_RNR_RETRY = 1 << 1, + EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS = 1 << 2, }; struct efa_ibv_ex_query_device_resp {