@@ -65,6 +65,8 @@
#define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2
#define HNS_ROCE_MIN_CQE_CNT 16
+#define HNS_ROCE_RESERVED_SGE 1
+
#define HNS_ROCE_MAX_IRQ_NUM 128
#define HNS_ROCE_SGE_IN_WQE 2
@@ -395,6 +397,7 @@ struct hns_roce_wq {
spinlock_t lock;
u32 wqe_cnt; /* WQE num */
u32 max_gs;
+ u32 rsv_sge;
int offset;
int wqe_shift; /* WQE size */
u32 head;
@@ -498,6 +501,7 @@ struct hns_roce_srq {
unsigned long srqn;
u32 wqe_cnt;
int max_gs;
+ u32 rsv_sge;
int wqe_shift;
void __iomem *db_reg_l;
@@ -741,6 +741,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
unsigned long flags;
void *wqe = NULL;
u32 wqe_idx;
+ u32 max_sge;
int nreq;
int ret;
int i;
@@ -754,6 +755,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
goto out;
}
+ max_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
if (unlikely(hns_roce_wq_overflow(&hr_qp->rq, nreq,
hr_qp->ibqp.recv_cq))) {
@@ -764,9 +766,9 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
- if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) {
+ if (unlikely(wr->num_sge > max_sge)) {
ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n",
- wr->num_sge, hr_qp->rq.max_gs);
+ wr->num_sge, max_sge);
ret = -EINVAL;
*bad_wr = wr;
goto out;
@@ -781,9 +783,10 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
dseg++;
}
- if (wr->num_sge < hr_qp->rq.max_gs) {
+ if (hr_qp->rq.rsv_sge) {
dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
dseg->addr = 0;
+ dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH);
}
/* rq support inline data */
@@ -879,6 +882,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
__le32 *srq_idx;
int ret = 0;
int wqe_idx;
+ u32 max_sge;
void *wqe;
int nreq;
int i;
@@ -886,9 +890,13 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
spin_lock_irqsave(&srq->lock, flags);
ind = srq->head & (srq->wqe_cnt - 1);
+ max_sge = srq->max_gs - srq->rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) {
- if (unlikely(wr->num_sge >= srq->max_gs)) {
+ if (unlikely(wr->num_sge > max_sge)) {
+ ibdev_err(&hr_dev->ib_dev,
+ "srq: num_sge = %d, max_sge = %u.\n",
+ wr->num_sge, max_sge);
ret = -EINVAL;
*bad_wr = wr;
break;
@@ -916,9 +924,9 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr);
}
- if (wr->num_sge < srq->max_gs) {
- dseg[i].len = 0;
- dseg[i].lkey = cpu_to_le32(0x100);
+ if (srq->rsv_sge) {
+ dseg[i].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH);
+ dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY);
dseg[i].addr = 0;
}
@@ -1999,10 +2007,12 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg);
caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline);
caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg);
+ caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg);
caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg);
caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer);
caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer);
caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges);
+ caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges);
caps->num_aeq_vectors = resp_a->num_aeq_vectors;
caps->num_other_vectors = resp_a->num_other_vectors;
caps->max_sq_desc_sz = resp_a->max_sq_desc_sz;
@@ -5071,7 +5081,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
done:
qp_attr->cur_qp_state = qp_attr->qp_state;
qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
- qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
+ qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
if (!ibqp->uobject) {
qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
@@ -5383,7 +5393,7 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
attr->srq_limit = limit_wl;
attr->max_wr = srq->wqe_cnt - 1;
- attr->max_sge = srq->max_gs;
+ attr->max_sge = srq->max_gs - srq->rsv_sge;
out:
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
@@ -96,7 +96,8 @@
#define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE
#define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000
#define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2
-#define HNS_ROCE_INVALID_LKEY 0x100
+#define HNS_ROCE_INVALID_LKEY 0x0
+#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000
#define HNS_ROCE_CMQ_TX_TIMEOUT 30000
#define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2
#define HNS_ROCE_V2_RSV_QPS 8
@@ -413,9 +413,32 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
spin_unlock(&hr_dev->qp_table.bank_lock);
}
+static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp,
+ bool user)
+{
+ u32 max_sge = dev->caps.max_rq_sg;
+
+ if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return max_sge;
+
+ /* Reserve SGEs only for HIP08 in kernel; The userspace driver will
+ * calculate number of max_sge with reserved SGEs when allocating wqe
+ * buf, so there is no need to do this again in kernel. But the number
+ * may exceed the capacity of SGEs recorded in the firmware, so the
+ * kernel driver should just adapt the value accordingly.
+ */
+ if (user)
+ max_sge = roundup_pow_of_two(max_sge + 1);
+ else
+ hr_qp->rq.rsv_sge = 1;
+
+ return max_sge;
+}
+
static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
- struct hns_roce_qp *hr_qp, int has_rq)
+ struct hns_roce_qp *hr_qp, int has_rq, bool user)
{
+ u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user);
u32 cnt;
/* If srq exist, set zero for relative number of rq */
@@ -431,8 +454,9 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
/* Check the validity of QP support capacity */
if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes ||
- cap->max_recv_sge > hr_dev->caps.max_rq_sg) {
- ibdev_err(&hr_dev->ib_dev, "RQ config error, depth=%u, sge=%d\n",
+ cap->max_recv_sge > max_sge) {
+ ibdev_err(&hr_dev->ib_dev,
+ "RQ config error, depth = %u, sge = %u\n",
cap->max_recv_wr, cap->max_recv_sge);
return -EINVAL;
}
@@ -444,7 +468,8 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
return -EINVAL;
}
- hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge));
+ hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) +
+ hr_qp->rq.rsv_sge);
if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE)
hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz);
@@ -459,7 +484,7 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
hr_qp->rq_inl_buf.wqe_cnt = 0;
cap->max_recv_wr = cnt;
- cap->max_recv_sge = hr_qp->rq.max_gs;
+ cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
return 0;
}
@@ -918,7 +943,7 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp,
- hns_roce_qp_has_rq(init_attr));
+ hns_roce_qp_has_rq(init_attr), !!udata);
if (ret) {
ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n",
ret);
@@ -3,6 +3,7 @@
* Copyright (c) 2018 Hisilicon Limited.
*/
+#include <linux/pci.h>
#include <rdma/ib_umem.h>
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
@@ -277,6 +278,28 @@ static void free_srq_wrid(struct hns_roce_srq *srq)
srq->wrid = NULL;
}
+static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq,
+ bool user)
+{
+ u32 max_sge = dev->caps.max_srq_sges;
+
+ if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+ return max_sge;
+
+ /* Reserve SGEs only for HIP08 in kernel; The userspace driver will
+ * calculate number of max_sge with reserved SGEs when allocating wqe
+ * buf, so there is no need to do this again in kernel. But the number
+ * may exceed the capacity of SGEs recorded in the firmware, so the
+ * kernel driver should just adapt the value accordingly.
+ */
+ if (user)
+ max_sge = roundup_pow_of_two(max_sge + 1);
+ else
+ hr_srq->rsv_sge = 1;
+
+ return max_sge;
+}
+
int hns_roce_create_srq(struct ib_srq *ib_srq,
struct ib_srq_init_attr *init_attr,
struct ib_udata *udata)
@@ -286,6 +309,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
struct hns_roce_srq *srq = to_hr_srq(ib_srq);
struct ib_device *ibdev = &hr_dev->ib_dev;
struct hns_roce_ib_create_srq ucmd = {};
+ u32 max_sge;
int ret;
u32 cqn;
@@ -293,16 +317,24 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
init_attr->srq_type != IB_SRQT_XRC)
return -EOPNOTSUPP;
- /* Check the actual SRQ wqe and SRQ sge num */
+ max_sge = proc_srq_sge(hr_dev, srq, !!udata);
+
if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs ||
- init_attr->attr.max_sge > hr_dev->caps.max_srq_sges)
+ init_attr->attr.max_sge > max_sge) {
+ ibdev_err(&hr_dev->ib_dev,
+ "SRQ config error, depth = %u, sge = %d\n",
+ init_attr->attr.max_wr, init_attr->attr.max_sge);
return -EINVAL;
+ }
mutex_init(&srq->mutex);
spin_lock_init(&srq->lock);
srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1);
- srq->max_gs = init_attr->attr.max_sge;
+ srq->max_gs =
+ roundup_pow_of_two(init_attr->attr.max_sge + srq->rsv_sge);
+ init_attr->attr.max_wr = srq->wqe_cnt;
+ init_attr->attr.max_sge = srq->max_gs;
if (udata) {
ret = ib_copy_from_udata(&ucmd, udata,
@@ -349,6 +381,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
srq->event = hns_roce_ib_srq_event;
resp.srqn = srq->srqn;
+ srq->max_gs = init_attr->attr.max_sge;
+ init_attr->attr.max_sge = srq->max_gs - srq->rsv_sge;
if (udata) {
ret = ib_copy_to_udata(udata, &resp,