diff mbox series

[2/2] qla2xxx: Fix DMA error when the DIF sg buffer crosses 4GB boundary

Message ID 20181221173345.15606-3-hmadhani@marvell.com (mailing list archive)
State Accepted
Headers show
Series qla2xxx: Updates for driver | expand

Commit Message

Himanshu Madhani Dec. 21, 2018, 5:33 p.m. UTC
From: Giridhar Malavali <gmalavali@marvell.com>

When SGE buffer containing DIF information crosses 4G boundary, it
results into DMA error. This patch fixes this issue by calculating
SGE buffer size and if it crosses 4G boundary, driver will split it
into multiple SGE buffers to avoid DMA error.

Signed-off-by: Giridhar Malavali <gmalavali@marvell.com>
Signed-off-by: Himanshu Madhani <hmadhani@marvell.com>
---
 drivers/scsi/qla2xxx/qla_attr.c   |  21 ++-
 drivers/scsi/qla2xxx/qla_def.h    |  28 ++++
 drivers/scsi/qla2xxx/qla_gbl.h    |   3 +-
 drivers/scsi/qla2xxx/qla_iocb.c   | 335 +++++++++++++++++++++++++++++++-------
 drivers/scsi/qla2xxx/qla_isr.c    |  11 ++
 drivers/scsi/qla2xxx/qla_os.c     | 168 ++++++++++++++++++-
 drivers/scsi/qla2xxx/qla_target.c |   2 +-
 drivers/scsi/qla2xxx/qla_target.h |   2 +
 8 files changed, 502 insertions(+), 68 deletions(-)

Comments

Martin K. Petersen Jan. 12, 2019, 3:09 a.m. UTC | #1
Himanshu,

> When SGE buffer containing DIF information crosses 4G boundary, it
> results into DMA error. This patch fixes this issue by calculating
> SGE buffer size and if it crosses 4G boundary, driver will split it
> into multiple SGE buffers to avoid DMA error.

Applied to 5.1/scsi-queue, thanks!
diff mbox series

Patch

diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 00444dc79756..8b4dd72011bf 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1002,7 +1002,7 @@  qla2x00_free_sysfs_attr(scsi_qla_host_t *vha, bool stop_beacon)
 /* Scsi_Host attributes. */
 
 static ssize_t
-qla2x00_drvr_version_show(struct device *dev,
+qla2x00_driver_version_show(struct device *dev,
 			  struct device_attribute *attr, char *buf)
 {
 	return scnprintf(buf, PAGE_SIZE, "%s\n", qla2x00_version_str);
@@ -2059,7 +2059,21 @@  ql2xiniexchg_store(struct device *dev, struct device_attribute *attr,
 	return strlen(buf);
 }
 
-static DEVICE_ATTR(driver_version, S_IRUGO, qla2x00_drvr_version_show, NULL);
+static ssize_t
+qla2x00_dif_bundle_statistics_show(struct device *dev,
+    struct device_attribute *attr, char *buf)
+{
+	scsi_qla_host_t *vha = shost_priv(class_to_shost(dev));
+	struct qla_hw_data *ha = vha->hw;
+
+	return scnprintf(buf, PAGE_SIZE,
+	    "cross=%llu read=%llu write=%llu kalloc=%llu dma_alloc=%llu unusable=%u\n",
+	    ha->dif_bundle_crossed_pages, ha->dif_bundle_reads,
+	    ha->dif_bundle_writes, ha->dif_bundle_kallocs,
+	    ha->dif_bundle_dma_allocs, ha->pool.unusable.count);
+}
+
+static DEVICE_ATTR(driver_version, S_IRUGO, qla2x00_driver_version_show, NULL);
 static DEVICE_ATTR(fw_version, S_IRUGO, qla2x00_fw_version_show, NULL);
 static DEVICE_ATTR(serial_num, S_IRUGO, qla2x00_serial_num_show, NULL);
 static DEVICE_ATTR(isp_name, S_IRUGO, qla2x00_isp_name_show, NULL);
@@ -2112,6 +2126,8 @@  static DEVICE_ATTR(zio_threshold, 0644,
 static DEVICE_ATTR_RW(qlini_mode);
 static DEVICE_ATTR_RW(ql2xexchoffld);
 static DEVICE_ATTR_RW(ql2xiniexchg);
+static DEVICE_ATTR(dif_bundle_statistics, 0444,
+    qla2x00_dif_bundle_statistics_show, NULL);
 
 
 struct device_attribute *qla2x00_host_attrs[] = {
@@ -2150,6 +2166,7 @@  struct device_attribute *qla2x00_host_attrs[] = {
 	&dev_attr_min_link_speed,
 	&dev_attr_max_speed_sup,
 	&dev_attr_zio_threshold,
+	&dev_attr_dif_bundle_statistics,
 	NULL, /* reserve for qlini_mode */
 	NULL, /* reserve for ql2xiniexchg */
 	NULL, /* reserve for ql2xexchoffld */
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 26b93c563f92..97f66b142ff2 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -314,6 +314,7 @@  struct srb_cmd {
 #define SRB_CRC_PROT_DMA_VALID		BIT_4	/* DIF: prot DMA valid */
 #define SRB_CRC_CTX_DSD_VALID		BIT_5	/* DIF: dsd_list valid */
 #define SRB_WAKEUP_ON_COMP		BIT_6
+#define SRB_DIF_BUNDL_DMA_VALID		BIT_7   /* DIF: DMA list valid */
 
 /* To identify if a srb is of T10-CRC type. @sp => srb_t pointer */
 #define IS_PROT_IO(sp)	(sp->flags & SRB_CRC_CTX_DSD_VALID)
@@ -1892,6 +1893,13 @@  struct crc_context {
 	/* List of DMA context transfers */
 	struct list_head dsd_list;
 
+	/* List of DIF Bundling context DMA address */
+	struct list_head ldif_dsd_list;
+	u8 no_ldif_dsd;
+
+	struct list_head ldif_dma_hndl_list;
+	u32 dif_bundl_len;
+	u8 no_dif_bundl;
 	/* This structure should not exceed 512 bytes */
 };
 
@@ -4184,6 +4192,26 @@  struct qla_hw_data {
 	uint16_t min_link_speed;
 	uint16_t max_speed_sup;
 
+	/* DMA pool for the DIF bundling buffers */
+	struct dma_pool *dif_bundl_pool;
+	#define DIF_BUNDLING_DMA_POOL_SIZE  1024
+	struct {
+		struct {
+			struct list_head head;
+			uint count;
+		} good;
+		struct {
+			struct list_head head;
+			uint count;
+		} unusable;
+	} pool;
+
+	unsigned long long dif_bundle_crossed_pages;
+	unsigned long long dif_bundle_reads;
+	unsigned long long dif_bundle_writes;
+	unsigned long long dif_bundle_kallocs;
+	unsigned long long dif_bundle_dma_allocs;
+
 	atomic_t        nvme_active_aen_cnt;
 	uint16_t        nvme_last_rptd_aen;             /* Last recorded aen count */
 
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 3673fcdb033a..bcc17a7261e7 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -160,6 +160,7 @@  extern int ql2xautodetectsfp;
 extern int ql2xenablemsix;
 extern int qla2xuseresexchforels;
 extern int ql2xexlogins;
+extern int ql2xdifbundlinginternalbuffers;
 
 extern int qla2x00_loop_reset(scsi_qla_host_t *);
 extern void qla2x00_abort_all_cmds(scsi_qla_host_t *, int);
@@ -285,7 +286,7 @@  extern int qla24xx_walk_and_build_sglist_no_difb(struct qla_hw_data *, srb_t *,
 extern int qla24xx_walk_and_build_sglist(struct qla_hw_data *, srb_t *,
 	uint32_t *, uint16_t, struct qla_tc_param *);
 extern int qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *, srb_t *,
-	uint32_t *, uint16_t, struct qla_tc_param *);
+	uint32_t *, uint16_t, struct qla_tgt_cmd *);
 extern int qla24xx_get_one_block_sg(uint32_t, struct qla2_sgx *, uint32_t *);
 extern int qla24xx_configure_prot_mode(srb_t *, uint16_t *);
 extern int qla24xx_build_scsi_crc_2_iocbs(srb_t *,
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 032635321ad6..65ba0e36ee60 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -1098,88 +1098,300 @@  qla24xx_walk_and_build_sglist(struct qla_hw_data *ha, srb_t *sp, uint32_t *dsd,
 
 int
 qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
-	uint32_t *dsd, uint16_t tot_dsds, struct qla_tc_param *tc)
+    uint32_t *cur_dsd, uint16_t tot_dsds, struct qla_tgt_cmd *tc)
 {
-	void *next_dsd;
-	uint8_t avail_dsds = 0;
-	uint32_t dsd_list_len;
-	struct dsd_dma *dsd_ptr;
+	struct dsd_dma *dsd_ptr = NULL, *dif_dsd, *nxt_dsd;
 	struct scatterlist *sg, *sgl;
-	int	i;
-	struct scsi_cmnd *cmd;
-	uint32_t *cur_dsd = dsd;
-	uint16_t used_dsds = tot_dsds;
+	struct crc_context *difctx = NULL;
 	struct scsi_qla_host *vha;
+	uint dsd_list_len;
+	uint avail_dsds = 0;
+	uint used_dsds = tot_dsds;
+	bool dif_local_dma_alloc = false;
+	bool direction_to_device = false;
+	int i;
 
 	if (sp) {
-		cmd = GET_CMD_SP(sp);
+		struct scsi_cmnd *cmd = GET_CMD_SP(sp);
 		sgl = scsi_prot_sglist(cmd);
 		vha = sp->vha;
+		difctx = sp->u.scmd.ctx;
+		direction_to_device = cmd->sc_data_direction == DMA_TO_DEVICE;
+		ql_dbg(ql_dbg_tgt + ql_dbg_verbose, vha, 0xe021,
+		  "%s: scsi_cmnd: %p, crc_ctx: %p, sp: %p\n",
+			__func__, cmd, difctx, sp);
 	} else if (tc) {
 		vha = tc->vha;
 		sgl = tc->prot_sg;
+		difctx = tc->ctx;
+		direction_to_device = tc->dma_data_direction == DMA_TO_DEVICE;
 	} else {
 		BUG();
 		return 1;
 	}
 
-	ql_dbg(ql_dbg_tgt, vha, 0xe021,
-		"%s: enter\n", __func__);
-
-	for_each_sg(sgl, sg, tot_dsds, i) {
-		dma_addr_t	sle_dma;
-
-		/* Allocate additional continuation packets? */
-		if (avail_dsds == 0) {
-			avail_dsds = (used_dsds > QLA_DSDS_PER_IOCB) ?
-						QLA_DSDS_PER_IOCB : used_dsds;
-			dsd_list_len = (avail_dsds + 1) * 12;
-			used_dsds -= avail_dsds;
-
-			/* allocate tracking DS */
-			dsd_ptr = kzalloc(sizeof(struct dsd_dma), GFP_ATOMIC);
-			if (!dsd_ptr)
-				return 1;
-
-			/* allocate new list */
-			dsd_ptr->dsd_addr = next_dsd =
-			    dma_pool_alloc(ha->dl_dma_pool, GFP_ATOMIC,
-				&dsd_ptr->dsd_list_dma);
-
-			if (!next_dsd) {
-				/*
-				 * Need to cleanup only this dsd_ptr, rest
-				 * will be done by sp_free_dma()
-				 */
-				kfree(dsd_ptr);
-				return 1;
+	ql_dbg(ql_dbg_tgt + ql_dbg_verbose, vha, 0xe021,
+	    "%s: enter (write=%u)\n", __func__, direction_to_device);
+
+	/* if initiator doing write or target doing read */
+	if (direction_to_device) {
+		for_each_sg(sgl, sg, tot_dsds, i) {
+			dma_addr_t sle_phys = sg_phys(sg);
+
+			/* If SGE addr + len flips bits in upper 32-bits */
+			if (MSD(sle_phys + sg->length) ^ MSD(sle_phys)) {
+				ql_dbg(ql_dbg_tgt + ql_dbg_verbose, vha, 0xe022,
+				    "%s: page boundary crossing (phys=%llx len=%x)\n",
+				    __func__, sle_phys, sg->length);
+
+				if (difctx) {
+					ha->dif_bundle_crossed_pages++;
+					dif_local_dma_alloc = true;
+				} else {
+					ql_dbg(ql_dbg_tgt + ql_dbg_verbose,
+					    vha, 0xe022,
+					    "%s: difctx pointer is NULL\n",
+					    __func__);
+				}
+				break;
+			}
+		}
+		ha->dif_bundle_writes++;
+	} else {
+		ha->dif_bundle_reads++;
+	}
+
+	if (ql2xdifbundlinginternalbuffers)
+		dif_local_dma_alloc = direction_to_device;
+
+	if (dif_local_dma_alloc) {
+		u32 track_difbundl_buf = 0;
+		u32 ldma_sg_len = 0;
+		u8 ldma_needed = 1;
+
+		difctx->no_dif_bundl = 0;
+		difctx->dif_bundl_len = 0;
+
+		/* Track DSD buffers */
+		INIT_LIST_HEAD(&difctx->ldif_dsd_list);
+		/* Track local DMA buffers */
+		INIT_LIST_HEAD(&difctx->ldif_dma_hndl_list);
+
+		for_each_sg(sgl, sg, tot_dsds, i) {
+			u32 sglen = sg_dma_len(sg);
+
+			ql_dbg(ql_dbg_tgt + ql_dbg_verbose, vha, 0xe023,
+			    "%s: sg[%x] (phys=%llx sglen=%x) ldma_sg_len: %x dif_bundl_len: %x ldma_needed: %x\n",
+			    __func__, i, sg_phys(sg), sglen, ldma_sg_len,
+			    difctx->dif_bundl_len, ldma_needed);
+
+			while (sglen) {
+				u32 xfrlen = 0;
+
+				if (ldma_needed) {
+					/*
+					 * Allocate list item to store
+					 * the DMA buffers
+					 */
+					dsd_ptr = kzalloc(sizeof(*dsd_ptr),
+					    GFP_ATOMIC);
+					if (!dsd_ptr) {
+						ql_dbg(ql_dbg_tgt, vha, 0xe024,
+						    "%s: failed alloc dsd_ptr\n",
+						    __func__);
+						return 1;
+					}
+					ha->dif_bundle_kallocs++;
+
+					/* allocate dma buffer */
+					dsd_ptr->dsd_addr = dma_pool_alloc
+						(ha->dif_bundl_pool, GFP_ATOMIC,
+						 &dsd_ptr->dsd_list_dma);
+					if (!dsd_ptr->dsd_addr) {
+						ql_dbg(ql_dbg_tgt, vha, 0xe024,
+						    "%s: failed alloc ->dsd_ptr\n",
+						    __func__);
+						/*
+						 * need to cleanup only this
+						 * dsd_ptr rest will be done
+						 * by sp_free_dma()
+						 */
+						kfree(dsd_ptr);
+						ha->dif_bundle_kallocs--;
+						return 1;
+					}
+					ha->dif_bundle_dma_allocs++;
+					ldma_needed = 0;
+					difctx->no_dif_bundl++;
+					list_add_tail(&dsd_ptr->list,
+					    &difctx->ldif_dma_hndl_list);
+				}
+
+				/* xfrlen is min of dma pool size and sglen */
+				xfrlen = (sglen >
+				   (DIF_BUNDLING_DMA_POOL_SIZE - ldma_sg_len)) ?
+				    DIF_BUNDLING_DMA_POOL_SIZE - ldma_sg_len :
+				    sglen;
+
+				/* replace with local allocated dma buffer */
+				sg_pcopy_to_buffer(sgl, sg_nents(sgl),
+				    dsd_ptr->dsd_addr + ldma_sg_len, xfrlen,
+				    difctx->dif_bundl_len);
+				difctx->dif_bundl_len += xfrlen;
+				sglen -= xfrlen;
+				ldma_sg_len += xfrlen;
+				if (ldma_sg_len == DIF_BUNDLING_DMA_POOL_SIZE ||
+				    sg_is_last(sg)) {
+					ldma_needed = 1;
+					ldma_sg_len = 0;
+				}
 			}
+		}
 
-			if (sp) {
-				list_add_tail(&dsd_ptr->list,
-				    &((struct crc_context *)
-					    sp->u.scmd.ctx)->dsd_list);
+		track_difbundl_buf = used_dsds = difctx->no_dif_bundl;
+		ql_dbg(ql_dbg_tgt + ql_dbg_verbose, vha, 0xe025,
+		    "dif_bundl_len=%x, no_dif_bundl=%x track_difbundl_buf: %x\n",
+		    difctx->dif_bundl_len, difctx->no_dif_bundl,
+		    track_difbundl_buf);
 
-				sp->flags |= SRB_CRC_CTX_DSD_VALID;
-			} else {
-				list_add_tail(&dsd_ptr->list,
-				    &(tc->ctx->dsd_list));
-				*tc->ctx_dsd_alloced = 1;
+		if (sp)
+			sp->flags |= SRB_DIF_BUNDL_DMA_VALID;
+		else
+			tc->prot_flags = DIF_BUNDL_DMA_VALID;
+
+		list_for_each_entry_safe(dif_dsd, nxt_dsd,
+		    &difctx->ldif_dma_hndl_list, list) {
+			u32 sglen = (difctx->dif_bundl_len >
+			    DIF_BUNDLING_DMA_POOL_SIZE) ?
+			    DIF_BUNDLING_DMA_POOL_SIZE : difctx->dif_bundl_len;
+
+			BUG_ON(track_difbundl_buf == 0);
+
+			/* Allocate additional continuation packets? */
+			if (avail_dsds == 0) {
+				ql_dbg(ql_dbg_tgt + ql_dbg_verbose, vha,
+				    0xe024,
+				    "%s: adding continuation iocb's\n",
+				    __func__);
+				avail_dsds = (used_dsds > QLA_DSDS_PER_IOCB) ?
+				    QLA_DSDS_PER_IOCB : used_dsds;
+				dsd_list_len = (avail_dsds + 1) * 12;
+				used_dsds -= avail_dsds;
+
+				/* allocate tracking DS */
+				dsd_ptr = kzalloc(sizeof(*dsd_ptr), GFP_ATOMIC);
+				if (!dsd_ptr) {
+					ql_dbg(ql_dbg_tgt, vha, 0xe026,
+					    "%s: failed alloc dsd_ptr\n",
+					    __func__);
+					return 1;
+				}
+				ha->dif_bundle_kallocs++;
+
+				difctx->no_ldif_dsd++;
+				/* allocate new list */
+				dsd_ptr->dsd_addr =
+				    dma_pool_alloc(ha->dl_dma_pool, GFP_ATOMIC,
+					&dsd_ptr->dsd_list_dma);
+				if (!dsd_ptr->dsd_addr) {
+					ql_dbg(ql_dbg_tgt, vha, 0xe026,
+					    "%s: failed alloc ->dsd_addr\n",
+					    __func__);
+					/*
+					 * need to cleanup only this dsd_ptr
+					 *  rest will be done by sp_free_dma()
+					 */
+					kfree(dsd_ptr);
+					ha->dif_bundle_kallocs--;
+					return 1;
+				}
+				ha->dif_bundle_dma_allocs++;
+
+				if (sp) {
+					list_add_tail(&dsd_ptr->list,
+					    &difctx->ldif_dsd_list);
+					sp->flags |= SRB_CRC_CTX_DSD_VALID;
+				} else {
+					list_add_tail(&dsd_ptr->list,
+					    &difctx->ldif_dsd_list);
+					tc->ctx_dsd_alloced = 1;
+				}
+
+				/* add new list to cmd iocb or last list */
+				*cur_dsd++ =
+				    cpu_to_le32(LSD(dsd_ptr->dsd_list_dma));
+				*cur_dsd++ =
+				    cpu_to_le32(MSD(dsd_ptr->dsd_list_dma));
+				*cur_dsd++ = dsd_list_len;
+				cur_dsd = dsd_ptr->dsd_addr;
 			}
-
-			/* add new list to cmd iocb or last list */
-			*cur_dsd++ = cpu_to_le32(LSD(dsd_ptr->dsd_list_dma));
-			*cur_dsd++ = cpu_to_le32(MSD(dsd_ptr->dsd_list_dma));
-			*cur_dsd++ = dsd_list_len;
-			cur_dsd = (uint32_t *)next_dsd;
+			*cur_dsd++ = cpu_to_le32(LSD(dif_dsd->dsd_list_dma));
+			*cur_dsd++ = cpu_to_le32(MSD(dif_dsd->dsd_list_dma));
+			*cur_dsd++ = cpu_to_le32(sglen);
+			avail_dsds--;
+			difctx->dif_bundl_len -= sglen;
+			track_difbundl_buf--;
 		}
-		sle_dma = sg_dma_address(sg);
-
-		*cur_dsd++ = cpu_to_le32(LSD(sle_dma));
-		*cur_dsd++ = cpu_to_le32(MSD(sle_dma));
-		*cur_dsd++ = cpu_to_le32(sg_dma_len(sg));
 
-		avail_dsds--;
+		ql_dbg(ql_dbg_tgt + ql_dbg_verbose, vha, 0xe026,
+		    "%s: no_ldif_dsd:%x, no_dif_bundl:%x\n", __func__,
+			difctx->no_ldif_dsd, difctx->no_dif_bundl);
+	} else {
+		for_each_sg(sgl, sg, tot_dsds, i) {
+			dma_addr_t sle_dma;
+
+			/* Allocate additional continuation packets? */
+			if (avail_dsds == 0) {
+				avail_dsds = (used_dsds > QLA_DSDS_PER_IOCB) ?
+				    QLA_DSDS_PER_IOCB : used_dsds;
+				dsd_list_len = (avail_dsds + 1) * 12;
+				used_dsds -= avail_dsds;
+
+				/* allocate tracking DS */
+				dsd_ptr = kzalloc(sizeof(*dsd_ptr), GFP_ATOMIC);
+				if (!dsd_ptr) {
+					ql_dbg(ql_dbg_tgt + ql_dbg_verbose,
+					    vha, 0xe027,
+					    "%s: failed alloc dsd_dma...\n",
+					    __func__);
+					return 1;
+				}
+
+				/* allocate new list */
+				dsd_ptr->dsd_addr =
+				    dma_pool_alloc(ha->dl_dma_pool, GFP_ATOMIC,
+					&dsd_ptr->dsd_list_dma);
+				if (!dsd_ptr->dsd_addr) {
+					/* need to cleanup only this dsd_ptr */
+					/* rest will be done by sp_free_dma() */
+					kfree(dsd_ptr);
+					return 1;
+				}
+
+				if (sp) {
+					list_add_tail(&dsd_ptr->list,
+					    &difctx->dsd_list);
+					sp->flags |= SRB_CRC_CTX_DSD_VALID;
+				} else {
+					list_add_tail(&dsd_ptr->list,
+					    &difctx->dsd_list);
+					tc->ctx_dsd_alloced = 1;
+				}
+
+				/* add new list to cmd iocb or last list */
+				*cur_dsd++ =
+				    cpu_to_le32(LSD(dsd_ptr->dsd_list_dma));
+				*cur_dsd++ =
+				    cpu_to_le32(MSD(dsd_ptr->dsd_list_dma));
+				*cur_dsd++ = dsd_list_len;
+				cur_dsd = dsd_ptr->dsd_addr;
+			}
+			sle_dma = sg_dma_address(sg);
+			*cur_dsd++ = cpu_to_le32(LSD(sle_dma));
+			*cur_dsd++ = cpu_to_le32(MSD(sle_dma));
+			*cur_dsd++ = cpu_to_le32(sg_dma_len(sg));
+			avail_dsds--;
+		}
 	}
 	/* Null termination */
 	*cur_dsd++ = 0;
@@ -1187,7 +1399,6 @@  qla24xx_walk_and_build_prot_sglist(struct qla_hw_data *ha, srb_t *sp,
 	*cur_dsd++ = 0;
 	return 0;
 }
-
 /**
  * qla24xx_build_scsi_crc_2_iocbs() - Build IOCB command utilizing Command
  *							Type 6 IOCB types.
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 30d3090842f8..5c1de6ed825b 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -2725,6 +2725,17 @@  qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
 			    cp->device->vendor);
 		break;
 
+	case CS_DMA:
+		ql_log(ql_log_info, fcport->vha, 0x3022,
+		    "CS_DMA error: 0x%x-0x%x (0x%x) nexus=%ld:%d:%llu portid=%06x oxid=0x%x cdb=%10phN len=0x%x rsp_info=0x%x resid=0x%x fw_resid=0x%x sp=%p cp=%p.\n",
+		    comp_status, scsi_status, res, vha->host_no,
+		    cp->device->id, cp->device->lun, fcport->d_id.b24,
+		    ox_id, cp->cmnd, scsi_bufflen(cp), rsp_info_len,
+		    resid_len, fw_resid_len, sp, cp);
+		ql_dump_buffer(ql_dbg_tgt + ql_dbg_verbose, vha, 0xe0ee,
+		    pkt, sizeof(*sts24));
+		res = DID_ERROR << 16;
+		break;
 	default:
 		res = DID_ERROR << 16;
 		break;
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index deb923058d08..2bc4fcd0c797 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -299,6 +299,13 @@  MODULE_PARM_DESC(ql2xprotguard, "Override choice of DIX checksum\n"
     "  1 -- Force T10 CRC\n"
     "  2 -- Force IP checksum\n");
 
+int ql2xdifbundlinginternalbuffers;
+module_param(ql2xdifbundlinginternalbuffers, int, 0644);
+MODULE_PARM_DESC(ql2xdifbundlinginternalbuffers,
+    "Force using internal buffers for DIF information\n"
+    "0 (Default). Based on check.\n"
+    "1 Force using internal buffers\n");
+
 /*
  * SCSI host template entry points
  */
@@ -819,7 +826,44 @@  qla2xxx_qpair_sp_free_dma(void *ptr)
 		ha->gbl_dsd_inuse -= ctx1->dsd_use_cnt;
 		ha->gbl_dsd_avail += ctx1->dsd_use_cnt;
 		mempool_free(ctx1, ha->ctx_mempool);
+		sp->flags &= ~SRB_FCP_CMND_DMA_VALID;
+	}
+	if (sp->flags & SRB_DIF_BUNDL_DMA_VALID) {
+		struct crc_context *difctx = sp->u.scmd.ctx;
+		struct dsd_dma *dif_dsd, *nxt_dsd;
+
+		list_for_each_entry_safe(dif_dsd, nxt_dsd,
+		    &difctx->ldif_dma_hndl_list, list) {
+			list_del(&dif_dsd->list);
+			dma_pool_free(ha->dif_bundl_pool, dif_dsd->dsd_addr,
+			    dif_dsd->dsd_list_dma);
+			kfree(dif_dsd);
+			difctx->no_dif_bundl--;
+		}
+
+		list_for_each_entry_safe(dif_dsd, nxt_dsd,
+		    &difctx->ldif_dsd_list, list) {
+			list_del(&dif_dsd->list);
+			dma_pool_free(ha->dl_dma_pool, dif_dsd->dsd_addr,
+			    dif_dsd->dsd_list_dma);
+			kfree(dif_dsd);
+			difctx->no_ldif_dsd--;
+		}
+
+		if (difctx->no_ldif_dsd) {
+			ql_dbg(ql_dbg_tgt+ql_dbg_verbose, sp->vha, 0xe022,
+			    "%s: difctx->no_ldif_dsd=%x\n",
+			    __func__, difctx->no_ldif_dsd);
+		}
+
+		if (difctx->no_dif_bundl) {
+			ql_dbg(ql_dbg_tgt+ql_dbg_verbose, sp->vha, 0xe022,
+			    "%s: difctx->no_dif_bundl=%x\n",
+			    __func__, difctx->no_dif_bundl);
+		}
+		sp->flags &= ~SRB_DIF_BUNDL_DMA_VALID;
 	}
+
 end:
 	CMD_SP(cmd) = NULL;
 	qla2xxx_rel_qpair_sp(sp->qpair, sp);
@@ -4030,9 +4074,86 @@  qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len,
 			    "Failed to allocate memory for fcp_cmnd_dma_pool.\n");
 			goto fail_dl_dma_pool;
 		}
+
+		if (ql2xenabledif) {
+			u64 bufsize = DIF_BUNDLING_DMA_POOL_SIZE;
+			struct dsd_dma *dsd, *nxt;
+			uint i;
+			/* Creata a DMA pool of buffers for DIF bundling */
+			ha->dif_bundl_pool = dma_pool_create(name,
+			    &ha->pdev->dev, DIF_BUNDLING_DMA_POOL_SIZE, 8, 0);
+			if (!ha->dif_bundl_pool) {
+				ql_dbg_pci(ql_dbg_init, ha->pdev, 0x0024,
+				    "%s: failed create dif_bundl_pool\n",
+				    __func__);
+				goto fail_dif_bundl_dma_pool;
+			}
+
+			INIT_LIST_HEAD(&ha->pool.good.head);
+			INIT_LIST_HEAD(&ha->pool.unusable.head);
+			ha->pool.good.count = 0;
+			ha->pool.unusable.count = 0;
+			for (i = 0; i < 128; i++) {
+				dsd = kzalloc(sizeof(*dsd), GFP_ATOMIC);
+				if (!dsd) {
+					ql_dbg_pci(ql_dbg_init, ha->pdev,
+					    0xe0ee, "%s: failed alloc dsd\n",
+					    __func__);
+					return 1;
+				}
+				ha->dif_bundle_kallocs++;
+
+				dsd->dsd_addr = dma_pool_alloc(
+				    ha->dif_bundl_pool, GFP_ATOMIC,
+				    &dsd->dsd_list_dma);
+				if (!dsd->dsd_addr) {
+					ql_dbg_pci(ql_dbg_init, ha->pdev,
+					    0xe0ee,
+					    "%s: failed alloc ->dsd_addr\n",
+					    __func__);
+					kfree(dsd);
+					ha->dif_bundle_kallocs--;
+					continue;
+				}
+				ha->dif_bundle_dma_allocs++;
+
+				/*
+				 * if DMA buffer crosses 4G boundary,
+				 * put it on bad list
+				 */
+				if (MSD(dsd->dsd_list_dma) ^
+				    MSD(dsd->dsd_list_dma + bufsize)) {
+					list_add_tail(&dsd->list,
+					    &ha->pool.unusable.head);
+					ha->pool.unusable.count++;
+				} else {
+					list_add_tail(&dsd->list,
+					    &ha->pool.good.head);
+					ha->pool.good.count++;
+				}
+			}
+
+			/* return the good ones back to the pool */
+			list_for_each_entry_safe(dsd, nxt,
+			    &ha->pool.good.head, list) {
+				list_del(&dsd->list);
+				dma_pool_free(ha->dif_bundl_pool,
+				    dsd->dsd_addr, dsd->dsd_list_dma);
+				ha->dif_bundle_dma_allocs--;
+				kfree(dsd);
+				ha->dif_bundle_kallocs--;
+			}
+
+			ql_dbg_pci(ql_dbg_init, ha->pdev, 0x0024,
+			    "%s: dif dma pool (good=%u unusable=%u)\n",
+			    __func__, ha->pool.good.count,
+			    ha->pool.unusable.count);
+		}
+
 		ql_dbg_pci(ql_dbg_init, ha->pdev, 0x0025,
-		    "dl_dma_pool=%p fcp_cmnd_dma_pool=%p.\n",
-		    ha->dl_dma_pool, ha->fcp_cmnd_dma_pool);
+		    "dl_dma_pool=%p fcp_cmnd_dma_pool=%p dif_bundl_pool=%p.\n",
+		    ha->dl_dma_pool, ha->fcp_cmnd_dma_pool,
+		    ha->dif_bundl_pool);
 	}
 
 	/* Allocate memory for SNS commands */
@@ -4197,6 +4318,24 @@  qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len,
 		dma_free_coherent(&ha->pdev->dev, sizeof(struct sns_cmd_pkt),
 		    ha->sns_cmd, ha->sns_cmd_dma);
 fail_dma_pool:
+	if (ql2xenabledif) {
+		struct dsd_dma *dsd, *nxt;
+
+		list_for_each_entry_safe(dsd, nxt, &ha->pool.unusable.head,
+		    list) {
+			list_del(&dsd->list);
+			dma_pool_free(ha->dif_bundl_pool, dsd->dsd_addr,
+			    dsd->dsd_list_dma);
+			ha->dif_bundle_dma_allocs--;
+			kfree(dsd);
+			ha->dif_bundle_kallocs--;
+			ha->pool.unusable.count--;
+		}
+		dma_pool_destroy(ha->dif_bundl_pool);
+		ha->dif_bundl_pool = NULL;
+	}
+
+fail_dif_bundl_dma_pool:
 	if (IS_QLA82XX(ha) || ql2xenabledif) {
 		dma_pool_destroy(ha->fcp_cmnd_dma_pool);
 		ha->fcp_cmnd_dma_pool = NULL;
@@ -4583,6 +4722,31 @@  qla2x00_mem_free(struct qla_hw_data *ha)
 
 	if (ha->ctx_mempool)
 		mempool_destroy(ha->ctx_mempool);
+	if (ql2xenabledif) {
+		struct dsd_dma *dsd, *nxt;
+
+		list_for_each_entry_safe(dsd, nxt, &ha->pool.unusable.head,
+		    list) {
+			list_del(&dsd->list);
+			dma_pool_free(ha->dif_bundl_pool, dsd->dsd_addr,
+			    dsd->dsd_list_dma);
+			ha->dif_bundle_dma_allocs--;
+			kfree(dsd);
+			ha->dif_bundle_kallocs--;
+			ha->pool.unusable.count--;
+		}
+		list_for_each_entry_safe(dsd, nxt, &ha->pool.good.head, list) {
+			list_del(&dsd->list);
+			dma_pool_free(ha->dif_bundl_pool, dsd->dsd_addr,
+			    dsd->dsd_list_dma);
+			ha->dif_bundle_dma_allocs--;
+			kfree(dsd);
+			ha->dif_bundle_kallocs--;
+		}
+	}
+
+	if (ha->dif_bundl_pool)
+		dma_pool_destroy(ha->dif_bundl_pool);
 
 	qlt_mem_free(ha);
 
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index bceb8e882e7f..0df24968aeda 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -3230,7 +3230,7 @@  qlt_build_ctio_crc2_pkt(struct qla_qpair *qpair, struct qla_tgt_prm *prm)
 
 		cur_dsd = (uint32_t *) &crc_ctx_pkt->u.bundling.dif_address;
 		if (qla24xx_walk_and_build_prot_sglist(ha, NULL, cur_dsd,
-			prm->prot_seg_cnt, &tc))
+			prm->prot_seg_cnt, cmd))
 			goto crc_queuing_error;
 	}
 	return QLA_SUCCESS;
diff --git a/drivers/scsi/qla2xxx/qla_target.h b/drivers/scsi/qla2xxx/qla_target.h
index 577e1786a3f1..f3de75000a08 100644
--- a/drivers/scsi/qla2xxx/qla_target.h
+++ b/drivers/scsi/qla2xxx/qla_target.h
@@ -928,6 +928,8 @@  struct qla_tgt_cmd {
 	uint64_t	lba;
 	uint16_t	a_guard, e_guard, a_app_tag, e_app_tag;
 	uint32_t	a_ref_tag, e_ref_tag;
+#define DIF_BUNDL_DMA_VALID 1
+	uint16_t prot_flags;
 
 	uint64_t jiffies_at_alloc;
 	uint64_t jiffies_at_free;