Message ID | 1402477799-24610-3-git-send-email-sagig@mellanox.com (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
This patch causes a regression when using the iscsi initiator over TCP for me. When mounting a newly created ext4 filesystem I get the following BUG: [ 31.611803] BUG: unable to handle kernel NULL pointer dereference at 000000000000000c [ 31.613563] IP: [<ffffffff8197b38d>] iscsi_tcp_segment_done+0x2bd/0x380 [ 31.613563] PGD 7a3e4067 PUD 7a45f067 PMD 0 [ 31.613563] Oops: 0000 [#1] SMP [ 31.613563] Modules linked in: [ 31.613563] CPU: 3 PID: 3739 Comm: kworker/u8:5 Not tainted 3.16.0-rc2 #187 [ 31.613563] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 31.613563] Workqueue: iscsi_q_2 iscsi_xmitworker [ 31.613563] task: ffff88007b33cf10 ti: ffff88007ad94000 task.ti: ffff88007ad94000 [ 31.613563] RIP: 0010:[<ffffffff8197b38d>] [<ffffffff8197b38d>] iscsi_tcp_segment_done+0x2bd/0x380 [ 31.613563] RSP: 0018:ffff88007ad97b38 EFLAGS: 00010246 [ 31.613563] RAX: 0000000000000000 RBX: ffff88007cd67910 RCX: 0000000000000200 [ 31.613563] RDX: 0000000000002000 RSI: 0000000000000000 RDI: ffff88007cd67910 [ 31.613563] RBP: ffff88007ad97b98 R08: 0000000000000200 R09: 0000000000000000 [ 31.613563] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 [ 31.613563] R13: ffff88007cd67780 R14: 0000000000000000 R15: 0000000000000000 [ 31.613563] FS: 0000000000000000(0000) GS:ffff88007fd80000(0000) knlGS:0000000000000000 [ 31.613563] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 31.613563] CR2: 000000000000000c CR3: 000000007afd9000 CR4: 00000000000006e0 [ 31.613563] Stack: [ 31.613563] ffff88007ad97b98 ffffffff81c68fd6 ffffffff81c68f20 ffff88007c8e37c8 [ 31.613563] 000000007b33d728 ffff88007dc805b0 ffff88007ad97c58 0000000000000200 [ 31.613563] ffff88007cd67780 ffff880000c00040 ffff88007ad97c00 ffff88007cd67910 [ 31.613563] Call Trace: [ 31.613563] [<ffffffff81c68fd6>] ? inet_sendpage+0xb6/0x130 [ 31.613563] [<ffffffff81c68f20>] ? inet_dgram_connect+0x80/0x80 [ 31.613563] [<ffffffff8197bd95>] iscsi_sw_tcp_pdu_xmit+0xe5/0x2e0 [ 31.613563] [<ffffffff8197badf>] ? iscsi_sw_tcp_pdu_init+0x1bf/0x390 [ 31.613563] [<ffffffff81979b82>] iscsi_tcp_task_xmit+0xa2/0x2b0 [ 31.613563] [<ffffffff81974815>] ? iscsi_xmit_task+0x45/0xd0 [ 31.613563] [<ffffffff810fbb8d>] ? trace_hardirqs_on+0xd/0x10 [ 31.613563] [<ffffffff810b54a0>] ? __local_bh_enable_ip+0x70/0xd0 [ 31.613563] [<ffffffff81974829>] iscsi_xmit_task+0x59/0xd0 [ 31.613563] [<ffffffff81978468>] iscsi_xmitworker+0x288/0x330 [ 31.613563] [<ffffffff810cc847>] process_one_work+0x1c7/0x490 [ 31.613563] [<ffffffff810cc7dd>] ? process_one_work+0x15d/0x490 [ 31.613563] [<ffffffff810cd539>] worker_thread+0x119/0x4f0 [ 31.613563] [<ffffffff810fbb8d>] ? trace_hardirqs_on+0xd/0x10 [ 31.613563] [<ffffffff810cd420>] ? init_pwq+0x190/0x190 [ 31.613563] [<ffffffff810d3c3f>] kthread+0xdf/0x100 [ 31.613563] [<ffffffff810d3b60>] ? __init_kthread_worker+0x70/0x70 [ 31.613563] [<ffffffff81d904bc>] ret_from_fork+0x7c/0xb0 [ 31.613563] [<ffffffff810d3b60>] ? __init_kthread_worker+0x70/0x70 [ 31.613563] Code: 89 03 31 c0 e9 cc fe ff ff 0f 1f 44 00 00 48 8b 7b 30 e8 17 74 de ff 8b 53 10 c7 43 40 00 00 00 00 48 89 43 30 44 89 f6 48 89 df <8b> 40 0c 48 c7 03 00 00 00 00 2b 53 14 39 c2 0f 47 d0 89 53 08 (gdb) l *(iscsi_tcp_segment_done+0x2bd) 0xffffffff8197b38d is in iscsi_tcp_segment_done (../drivers/scsi/libiscsi_tcp.c:102). 97 iscsi_tcp_segment_init_sg(struct iscsi_segment *segment, 98 struct scatterlist *sg, unsigned int offset) 99 { 100 segment->sg = sg; 101 segment->sg_offset = offset; 102 segment->size = min(sg->length - offset, 103 segment->total_size - segment->total_copied); 104 segment->data = NULL; 105 } 106 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 06/23/2014 03:59 PM, Christoph Hellwig wrote: > This patch causes a regression when using the iscsi initiator over > TCP for me. When mounting a newly created ext4 filesystem I get the > following BUG: > > [ 31.611803] BUG: unable to handle kernel NULL pointer dereference at 000000000000000c > [ 31.613563] IP: [<ffffffff8197b38d>] iscsi_tcp_segment_done+0x2bd/0x380 > [ 31.613563] PGD 7a3e4067 PUD 7a45f067 PMD 0 > [ 31.613563] Oops: 0000 [#1] SMP > [ 31.613563] Modules linked in: > [ 31.613563] CPU: 3 PID: 3739 Comm: kworker/u8:5 Not tainted 3.16.0-rc2 #187 > [ 31.613563] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 > [ 31.613563] Workqueue: iscsi_q_2 iscsi_xmitworker > [ 31.613563] task: ffff88007b33cf10 ti: ffff88007ad94000 task.ti: ffff88007ad94000 > [ 31.613563] RIP: 0010:[<ffffffff8197b38d>] [<ffffffff8197b38d>] iscsi_tcp_segment_done+0x2bd/0x380 > [ 31.613563] RSP: 0018:ffff88007ad97b38 EFLAGS: 00010246 > [ 31.613563] RAX: 0000000000000000 RBX: ffff88007cd67910 RCX: 0000000000000200 > [ 31.613563] RDX: 0000000000002000 RSI: 0000000000000000 RDI: ffff88007cd67910 > [ 31.613563] RBP: ffff88007ad97b98 R08: 0000000000000200 R09: 0000000000000000 > [ 31.613563] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 > [ 31.613563] R13: ffff88007cd67780 R14: 0000000000000000 R15: 0000000000000000 > [ 31.613563] FS: 0000000000000000(0000) GS:ffff88007fd80000(0000) knlGS:0000000000000000 > [ 31.613563] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > [ 31.613563] CR2: 000000000000000c CR3: 000000007afd9000 CR4: 00000000000006e0 > [ 31.613563] Stack: > [ 31.613563] ffff88007ad97b98 ffffffff81c68fd6 ffffffff81c68f20 ffff88007c8e37c8 > [ 31.613563] 000000007b33d728 ffff88007dc805b0 ffff88007ad97c58 0000000000000200 > [ 31.613563] ffff88007cd67780 ffff880000c00040 ffff88007ad97c00 ffff88007cd67910 > [ 31.613563] Call Trace: > [ 31.613563] [<ffffffff81c68fd6>] ? inet_sendpage+0xb6/0x130 > [ 31.613563] [<ffffffff81c68f20>] ? inet_dgram_connect+0x80/0x80 > [ 31.613563] [<ffffffff8197bd95>] iscsi_sw_tcp_pdu_xmit+0xe5/0x2e0 > [ 31.613563] [<ffffffff8197badf>] ? iscsi_sw_tcp_pdu_init+0x1bf/0x390 > [ 31.613563] [<ffffffff81979b82>] iscsi_tcp_task_xmit+0xa2/0x2b0 > [ 31.613563] [<ffffffff81974815>] ? iscsi_xmit_task+0x45/0xd0 > [ 31.613563] [<ffffffff810fbb8d>] ? trace_hardirqs_on+0xd/0x10 > [ 31.613563] [<ffffffff810b54a0>] ? __local_bh_enable_ip+0x70/0xd0 > [ 31.613563] [<ffffffff81974829>] iscsi_xmit_task+0x59/0xd0 > [ 31.613563] [<ffffffff81978468>] iscsi_xmitworker+0x288/0x330 > [ 31.613563] [<ffffffff810cc847>] process_one_work+0x1c7/0x490 > [ 31.613563] [<ffffffff810cc7dd>] ? process_one_work+0x15d/0x490 > [ 31.613563] [<ffffffff810cd539>] worker_thread+0x119/0x4f0 > [ 31.613563] [<ffffffff810fbb8d>] ? trace_hardirqs_on+0xd/0x10 > [ 31.613563] [<ffffffff810cd420>] ? init_pwq+0x190/0x190 > [ 31.613563] [<ffffffff810d3c3f>] kthread+0xdf/0x100 > [ 31.613563] [<ffffffff810d3b60>] ? __init_kthread_worker+0x70/0x70 > [ 31.613563] [<ffffffff81d904bc>] ret_from_fork+0x7c/0xb0 > [ 31.613563] [<ffffffff810d3b60>] ? __init_kthread_worker+0x70/0x70 > [ 31.613563] Code: 89 03 31 c0 e9 cc fe ff ff 0f 1f 44 00 00 48 8b 7b > 30 e8 17 74 de ff 8b 53 10 c7 43 40 00 00 00 00 48 89 43 30 44 89 f6 48 > 89 df <8b> 40 0c 48 c7 03 00 00 00 00 2b 53 14 39 c2 0f 47 d0 89 53 08 > > > (gdb) l *(iscsi_tcp_segment_done+0x2bd) > 0xffffffff8197b38d is in iscsi_tcp_segment_done > (../drivers/scsi/libiscsi_tcp.c:102). > 97 iscsi_tcp_segment_init_sg(struct iscsi_segment *segment, > 98 struct scatterlist *sg, unsigned int offset) > 99 { > 100 segment->sg = sg; > 101 segment->sg_offset = offset; > 102 segment->size = min(sg->length - offset, > 103 segment->total_size - segment->total_copied); > 104 segment->data = NULL; > 105 } > 106 > Ok, it looks like scsi_out(scsi_cmnd)->length (iscsi_tcp/libiscsi_tcp still uses that for lower level operations since it was not converted to support t10 pi) returns a different value than scsi_transfer_length() (libiscsi uses this for higher level operations when it was converted to t10 support since iser uses that module and also has t10 support) for some commands. We then end up incorrectly thinking some requests are the wrong size and then hit this. Looking into why exactly this happens. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 06/11/2014 12:09 PM, Sagi Grimberg wrote: > In case protection information exists over the wire > iscsi header data length is required to include it. > Use protection information aware scsi helpers to set > the correct transfer length. > > In order to avoid breakage, remove iser transfer length > checks for each task as they are not always true and > somewhat redundant anyway. > > Signed-off-by: Sagi Grimberg <sagig@mellanox.com> > Reviewed-by: Mike Christie <michaelc@cs.wisc.edu> > --- > drivers/infiniband/ulp/iser/iser_initiator.c | 34 +++++++------------------ > drivers/scsi/libiscsi.c | 18 +++++++------- > 2 files changed, 19 insertions(+), 33 deletions(-) > > diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c > index 2e2d903..8d44a40 100644 > --- a/drivers/infiniband/ulp/iser/iser_initiator.c > +++ b/drivers/infiniband/ulp/iser/iser_initiator.c > @@ -41,11 +41,11 @@ > #include "iscsi_iser.h" > > /* Register user buffer memory and initialize passive rdma > - * dto descriptor. Total data size is stored in > - * iser_task->data[ISER_DIR_IN].data_len > + * dto descriptor. Data size is stored in > + * task->data[ISER_DIR_IN].data_len, Protection size > + * os stored in task->prot[ISER_DIR_IN].data_len > */ > -static int iser_prepare_read_cmd(struct iscsi_task *task, > - unsigned int edtl) > +static int iser_prepare_read_cmd(struct iscsi_task *task) > > { > struct iscsi_iser_task *iser_task = task->dd_data; > @@ -73,14 +73,6 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, > return err; > } > > - if (edtl > iser_task->data[ISER_DIR_IN].data_len) { > - iser_err("Total data length: %ld, less than EDTL: " > - "%d, in READ cmd BHS itt: %d, conn: 0x%p\n", > - iser_task->data[ISER_DIR_IN].data_len, edtl, > - task->itt, iser_task->ib_conn); > - return -EINVAL; > - } > - > err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN); > if (err) { > iser_err("Failed to set up Data-IN RDMA\n"); > @@ -100,8 +92,9 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, > } > > /* Register user buffer memory and initialize passive rdma > - * dto descriptor. Total data size is stored in > - * task->data[ISER_DIR_OUT].data_len > + * dto descriptor. Data size is stored in > + * task->data[ISER_DIR_OUT].data_len, Protection size > + * is stored at task->prot[ISER_DIR_OUT].data_len > */ > static int > iser_prepare_write_cmd(struct iscsi_task *task, > @@ -135,14 +128,6 @@ iser_prepare_write_cmd(struct iscsi_task *task, > return err; > } > > - if (edtl > iser_task->data[ISER_DIR_OUT].data_len) { > - iser_err("Total data length: %ld, less than EDTL: %d, " > - "in WRITE cmd BHS itt: %d, conn: 0x%p\n", > - iser_task->data[ISER_DIR_OUT].data_len, > - edtl, task->itt, task->conn); > - return -EINVAL; > - } > - > err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT); > if (err != 0) { > iser_err("Failed to register write cmd RDMA mem\n"); > @@ -417,11 +402,12 @@ int iser_send_command(struct iscsi_conn *conn, > if (scsi_prot_sg_count(sc)) { > prot_buf->buf = scsi_prot_sglist(sc); > prot_buf->size = scsi_prot_sg_count(sc); > - prot_buf->data_len = sc->prot_sdb->length; > + prot_buf->data_len = data_buf->data_len >> > + ilog2(sc->device->sector_size) * 8; > } > > if (hdr->flags & ISCSI_FLAG_CMD_READ) { > - err = iser_prepare_read_cmd(task, edtl); > + err = iser_prepare_read_cmd(task); > if (err) > goto send_command_error; > } > diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c > index 26dc005..3f46234 100644 > --- a/drivers/scsi/libiscsi.c > +++ b/drivers/scsi/libiscsi.c > @@ -338,7 +338,7 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) > struct iscsi_session *session = conn->session; > struct scsi_cmnd *sc = task->sc; > struct iscsi_scsi_req *hdr; > - unsigned hdrlength, cmd_len; > + unsigned hdrlength, cmd_len, transfer_length; I hate that you introduced this new transfer_length variable. It does not exist. In BIDI supporting driver there is out_len and in_len just as original code. > itt_t itt; > int rc; > > @@ -391,11 +391,11 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) > if (scsi_get_prot_op(sc) != SCSI_PROT_NORMAL) > task->protected = true; > > + transfer_length = scsi_transfer_length(sc); > + hdr->data_length = cpu_to_be32(transfer_length); > if (sc->sc_data_direction == DMA_TO_DEVICE) { > - unsigned out_len = scsi_out(sc)->length; In light of all the comments and the obvious bugs, please just re do this patch. Do not just later fix this one. All you need is: + unsigned out_len = scsi_transfer_length(sc ,scsi_out(sc)); Also I would love if you left the addition to the user .I.E out_len += scsi_proto_length(sc ,scsi_out(sc)); This way we can see that this addition is because of scsi_proto and that this particular driver puts them together in the same payload. There can be other DIFF supporting drivers that put the DIFF payload on another stream / another SG list, like the sata one, right? Then scsi_transfer_length() becomes: static inline unsigned scsi_proto_length(struct scsi_cmnd *scmd, scsi_data_buffer *sdb) { unsigned int prot_op = scsi_get_prot_op(scmd); unsigned int sector_size = scmd->device->sector_size; switch (prot_op) { case SCSI_PROT_NORMAL: case SCSI_PROT_WRITE_STRIP: case SCSI_PROT_READ_INSERT: return 0; } return (sdb->length >> ilog2(sector_size)) * 8; } > struct iscsi_r2t_info *r2t = &task->unsol_r2t; > > - hdr->data_length = cpu_to_be32(out_len); And this stays as is > hdr->flags |= ISCSI_FLAG_CMD_WRITE; > /* > * Write counters: > @@ -414,18 +414,19 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) > memset(r2t, 0, sizeof(*r2t)); > > if (session->imm_data_en) { > - if (out_len >= session->first_burst) > + if (transfer_length >= session->first_burst) And this stays as is > task->imm_count = min(session->first_burst, > conn->max_xmit_dlength); > else > - task->imm_count = min(out_len, > - conn->max_xmit_dlength); > + task->imm_count = min(transfer_length, > + conn->max_xmit_dlength); And this stays as is > hton24(hdr->dlength, task->imm_count); > } else > zero_data(hdr->dlength); > > if (!session->initial_r2t_en) { > - r2t->data_length = min(session->first_burst, out_len) - > + r2t->data_length = min(session->first_burst, > + transfer_length) - And this stays as is > task->imm_count; > r2t->data_offset = task->imm_count; > r2t->ttt = cpu_to_be32(ISCSI_RESERVED_TAG); > @@ -438,7 +439,6 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) > } else { > hdr->flags |= ISCSI_FLAG_CMD_FINAL; > zero_data(hdr->dlength); > - hdr->data_length = cpu_to_be32(scsi_in(sc)->length); And this stays as is BTW: When reading DIFF devices, don't you have extra bits to read as well? How does the DIFF read works? Please get me up to speed. I'm not familiar with this protocol? (I'd imagine that if say an app reads X bytes with DIFF info, it wants to receive its DIFF checksome for every sector in X, where is this info on the iscsi wire?) > > if (sc->sc_data_direction == DMA_FROM_DEVICE) > hdr->flags |= ISCSI_FLAG_CMD_READ; > @@ -466,7 +466,7 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) > scsi_bidi_cmnd(sc) ? "bidirectional" : > sc->sc_data_direction == DMA_TO_DEVICE ? > "write" : "read", conn->id, sc, sc->cmnd[0], > - task->itt, scsi_bufflen(sc), > + task->itt, transfer_length, And this This print is correct as it covers all cases. If you want to print the adjusted length then OK, you'd need to store this I guess, but store it as a different variable like proto_length and print it as an additional variable. The current print is one-to-one with what the caller requested, FS wrote X bytes. If any was added for proto I'd like to see both prints. > scsi_bidi_cmnd(sc) ? scsi_in(sc)->length : 0, > session->cmdsn, > session->max_cmdsn - session->exp_cmdsn + 1); > Rrrr I see that this patch is already in mainline. I'd like to see the fixing patch reverting all these wrong changes to the code and only leaving the single needed change above. I'll send a PATCH over linus/master later today. Thanks Boaz -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Boaz, On 7/27/2014 1:08 PM, Boaz Harrosh wrote: <SNIP> >> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c >> index 26dc005..3f46234 100644 >> --- a/drivers/scsi/libiscsi.c >> +++ b/drivers/scsi/libiscsi.c >> @@ -338,7 +338,7 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) >> struct iscsi_session *session = conn->session; >> struct scsi_cmnd *sc = task->sc; >> struct iscsi_scsi_req *hdr; >> - unsigned hdrlength, cmd_len; >> + unsigned hdrlength, cmd_len, transfer_length; > > I hate that you introduced this new transfer_length variable. It does > not exist. In BIDI supporting driver there is out_len and in_len just > as original code. Effectively, out_len and in_len are the same except for the bidi case. Moreover, the hdr->data_length is exactly the command scsi data buffer length, the bidi length is taken from scsi_in when we build the AHS for bidi (rlen_ahdr->read_length). So to me it is correct and makes sense. But I'm don't feel so strong about it... Mike what's your take on this? > >> itt_t itt; >> int rc; >> >> @@ -391,11 +391,11 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) >> if (scsi_get_prot_op(sc) != SCSI_PROT_NORMAL) >> task->protected = true; >> >> + transfer_length = scsi_transfer_length(sc); >> + hdr->data_length = cpu_to_be32(transfer_length); >> if (sc->sc_data_direction == DMA_TO_DEVICE) { >> - unsigned out_len = scsi_out(sc)->length; > > In light of all the comments and the obvious bugs, please just > re do this patch. Do not just later fix this one. > > All you need is: > + unsigned out_len = scsi_transfer_length(sc ,scsi_out(sc)); > > Also I would love if you left the addition to the user .I.E > > out_len += scsi_proto_length(sc ,scsi_out(sc)); > > This way we can see that this addition is because of scsi_proto and that > this particular driver puts them together in the same payload. There can be > other DIFF supporting drivers that put the DIFF payload on another stream / another > SG list, like the sata one, right? I think that DIF specification says that on the wire the data and protection are interleaved i.e. Block1, DIF1, Block2, DIF2... So I do think that the transfer length should always include data_length + protection_length. > > BTW: When reading DIFF devices, don't you have extra bits to read as well? > How does the DIFF read works? Please get me up to speed. I'm not familiar > with this protocol? > (I'd imagine that if say an app reads X bytes with DIFF info, it wants to > receive its DIFF checksome for every sector in X, where is this info > on the iscsi wire?) > The application wants to read X bytes from a DIF formatted device, the initiator will prepare buffers for data and for DIF data (in some implementations it can be the same buffer but not in Linux), and request reading X+DIF_size bytes (where each block is followed by it's corresponding integrity field) and place the data blocks in the data buffer and the integrity fields in the DIF data buffer (usually this is offloaded). >> >> if (sc->sc_data_direction == DMA_FROM_DEVICE) >> hdr->flags |= ISCSI_FLAG_CMD_READ; >> @@ -466,7 +466,7 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) >> scsi_bidi_cmnd(sc) ? "bidirectional" : >> sc->sc_data_direction == DMA_TO_DEVICE ? >> "write" : "read", conn->id, sc, sc->cmnd[0], >> - task->itt, scsi_bufflen(sc), >> + task->itt, transfer_length, > > And this > > This print is correct as it covers all cases. If you want to print the adjusted > length then OK, you'd need to store this I guess, but store it as a different > variable like proto_length and print it as an additional variable. But it is the transfer length that is sent in iSCSI header. Why do you want to print it as additional info? for transactions that include DIF the length is the data + protection. > The current print is one-to-one with what the caller requested, FS wrote X bytes. It is still one-to-one isn't it? Sagi. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 08/06/2014 03:43 PM, Sagi Grimberg wrote: > Hi Boaz, > <> >> >> I hate that you introduced this new transfer_length variable. It does >> not exist. In BIDI supporting driver there is out_len and in_len just >> as original code. > > Effectively, out_len and in_len are the same except for the bidi case. > Moreover, the hdr->data_length is exactly the command scsi data buffer > length, the bidi length is taken from scsi_in when we build the AHS for > bidi (rlen_ahdr->read_length). > > So to me it is correct and makes sense. But I'm don't feel so strong > about it... > > Mike what's your take on this? > I have a patch to clean all that, will send tomorrow. What I mean is that this is on the out-path only the in path is different. See the code this variable is only used in the if (== DMA_TO_DEVICE) case and should be local to that scope. This is my clean up <> >> this particular driver puts them together in the same payload. There can be >> other DIFF supporting drivers that put the DIFF payload on another stream / another >> SG list, like the sata one, right? > > I think that DIF specification says that on the wire the data and > protection are interleaved i.e. Block1, DIF1, Block2, DIF2... > No it does not. This is a per transport, and actually per device host driver. Yes in iSCSI_tcp they are inline in HW cards they might come as two different SGs (Like the Linux model). Even with iscsi-offload they might want to be two SG-lists. > So I do think that the transfer length should always include > data_length + protection_length. > This is at the iscsi level. But the scsi_transfer_length() is on the scsi level which keeps them separate. So I think the proper API should be scsi_proto_length() And for LLDs that want them together they can do scsi_bufflen() + scsi_proto_length() and for other drivers they can do it separately. Don't infect iscsi level assumptions on the generic layer API. Again my patch fixes this. >> And this >> >> This print is correct as it covers all cases. If you want to print the adjusted >> length then OK, you'd need to store this I guess, but store it as a different >> variable like proto_length and print it as an additional variable. > > But it is the transfer length that is sent in iSCSI header. Why do you > want to print it as additional info? I want to see what was the length the app/FS sent, then as added info how much was added for DIFF, your way there is lost information. > for transactions that include DIF > the length is the data + protection. > > It is still one-to-one isn't it? > No! the original submitted length is lost from the print > Sagi. > Shalom Boaz -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
>>>>> "Sagi" == Sagi Grimberg <sagig@dev.mellanox.co.il> writes: >> BTW: When reading DIFF devices, don't you have extra bits to read as >> well? How does the DIFF read works? Please get me up to speed. I'm >> not familiar with this protocol? (I'd imagine that if say an app >> reads X bytes with DIFF info, it wants to receive its DIFF checksome >> for every sector in X, where is this info on the iscsi wire?) >> Sagi> The application wants to read X bytes from a DIF formatted device, Sagi> the initiator will prepare buffers for data and for DIF data (in Sagi> some implementations it can be the same buffer but not in Linux), Sagi> and request reading X+DIF_size bytes (where each block is followed Sagi> by it's corresponding integrity field) and place the data blocks Sagi> in the data buffer and the integrity fields in the DIF data buffer Sagi> (usually this is offloaded). READ CAPACITY returns a block size of 512 even though the blocks on disk are 520. That's the beauty of it. At the command level all transfers are expressed in number of blocks, not bytes. The application will see an N x 512 byte buffer but on the wire between initiator and target we'll transfer N x 520 bytes.
On 8/6/2014 4:25 PM, Boaz Harrosh wrote: > On 08/06/2014 03:43 PM, Sagi Grimberg wrote: >> Hi Boaz, >> > <> >>> >>> I hate that you introduced this new transfer_length variable. It does >>> not exist. In BIDI supporting driver there is out_len and in_len just >>> as original code. >> >> Effectively, out_len and in_len are the same except for the bidi case. >> Moreover, the hdr->data_length is exactly the command scsi data buffer >> length, the bidi length is taken from scsi_in when we build the AHS for >> bidi (rlen_ahdr->read_length). >> >> So to me it is correct and makes sense. But I'm don't feel so strong >> about it... >> >> Mike what's your take on this? >> > > I have a patch to clean all that, will send tomorrow. > > What I mean is that this is on the out-path only the in path is different. > See the code this variable is only used in the if (== DMA_TO_DEVICE) case and > should be local to that scope. This is my clean up > Hey Boaz, So in the current flow, I still don't think it is wrong/buggy, the transfer byte length related to scsi buffer length (In iscsi for sure but I think that for all transports it is the same). But, Since you have such a strong objection on this I'm also OK with changing stuff... We can pass a buffer to scsi_transfer_length (although it has no meaning effectively) and we can keep in/out_len local variables and print them along with total transfer. Do you want me to send out a patch here (since I have some hardware to test it...) or are you still planning to send out something? Cheers, Sagi. -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 08/13/2014 04:09 PM, Sagi Grimberg wrote: > On 8/6/2014 4:25 PM, Boaz Harrosh wrote: > > Hey Boaz, > > So in the current flow, I still don't think it is wrong/buggy, the > transfer byte length related to scsi buffer length (In iscsi for sure > but I think that for all transports it is the same). > > But, > Since you have such a strong objection on this I'm also OK with changing > stuff... We can pass a buffer to scsi_transfer_length (although it has > no meaning effectively) and we can keep in/out_len local variables and > print them along with total transfer. > > Do you want me to send out a patch here (since I have some hardware to > test it...) or are you still planning to send out something? > I'll do it. As you said there is no bugs, just ugly code. I will send a cleanup Thanks Boaz > Cheers, > Sagi. > -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 2e2d903..8d44a40 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -41,11 +41,11 @@ #include "iscsi_iser.h" /* Register user buffer memory and initialize passive rdma - * dto descriptor. Total data size is stored in - * iser_task->data[ISER_DIR_IN].data_len + * dto descriptor. Data size is stored in + * task->data[ISER_DIR_IN].data_len, Protection size + * os stored in task->prot[ISER_DIR_IN].data_len */ -static int iser_prepare_read_cmd(struct iscsi_task *task, - unsigned int edtl) +static int iser_prepare_read_cmd(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; @@ -73,14 +73,6 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, return err; } - if (edtl > iser_task->data[ISER_DIR_IN].data_len) { - iser_err("Total data length: %ld, less than EDTL: " - "%d, in READ cmd BHS itt: %d, conn: 0x%p\n", - iser_task->data[ISER_DIR_IN].data_len, edtl, - task->itt, iser_task->ib_conn); - return -EINVAL; - } - err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN); if (err) { iser_err("Failed to set up Data-IN RDMA\n"); @@ -100,8 +92,9 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, } /* Register user buffer memory and initialize passive rdma - * dto descriptor. Total data size is stored in - * task->data[ISER_DIR_OUT].data_len + * dto descriptor. Data size is stored in + * task->data[ISER_DIR_OUT].data_len, Protection size + * is stored at task->prot[ISER_DIR_OUT].data_len */ static int iser_prepare_write_cmd(struct iscsi_task *task, @@ -135,14 +128,6 @@ iser_prepare_write_cmd(struct iscsi_task *task, return err; } - if (edtl > iser_task->data[ISER_DIR_OUT].data_len) { - iser_err("Total data length: %ld, less than EDTL: %d, " - "in WRITE cmd BHS itt: %d, conn: 0x%p\n", - iser_task->data[ISER_DIR_OUT].data_len, - edtl, task->itt, task->conn); - return -EINVAL; - } - err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT); if (err != 0) { iser_err("Failed to register write cmd RDMA mem\n"); @@ -417,11 +402,12 @@ int iser_send_command(struct iscsi_conn *conn, if (scsi_prot_sg_count(sc)) { prot_buf->buf = scsi_prot_sglist(sc); prot_buf->size = scsi_prot_sg_count(sc); - prot_buf->data_len = sc->prot_sdb->length; + prot_buf->data_len = data_buf->data_len >> + ilog2(sc->device->sector_size) * 8; } if (hdr->flags & ISCSI_FLAG_CMD_READ) { - err = iser_prepare_read_cmd(task, edtl); + err = iser_prepare_read_cmd(task); if (err) goto send_command_error; } diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 26dc005..3f46234 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -338,7 +338,7 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) struct iscsi_session *session = conn->session; struct scsi_cmnd *sc = task->sc; struct iscsi_scsi_req *hdr; - unsigned hdrlength, cmd_len; + unsigned hdrlength, cmd_len, transfer_length; itt_t itt; int rc; @@ -391,11 +391,11 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) if (scsi_get_prot_op(sc) != SCSI_PROT_NORMAL) task->protected = true; + transfer_length = scsi_transfer_length(sc); + hdr->data_length = cpu_to_be32(transfer_length); if (sc->sc_data_direction == DMA_TO_DEVICE) { - unsigned out_len = scsi_out(sc)->length; struct iscsi_r2t_info *r2t = &task->unsol_r2t; - hdr->data_length = cpu_to_be32(out_len); hdr->flags |= ISCSI_FLAG_CMD_WRITE; /* * Write counters: @@ -414,18 +414,19 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) memset(r2t, 0, sizeof(*r2t)); if (session->imm_data_en) { - if (out_len >= session->first_burst) + if (transfer_length >= session->first_burst) task->imm_count = min(session->first_burst, conn->max_xmit_dlength); else - task->imm_count = min(out_len, - conn->max_xmit_dlength); + task->imm_count = min(transfer_length, + conn->max_xmit_dlength); hton24(hdr->dlength, task->imm_count); } else zero_data(hdr->dlength); if (!session->initial_r2t_en) { - r2t->data_length = min(session->first_burst, out_len) - + r2t->data_length = min(session->first_burst, + transfer_length) - task->imm_count; r2t->data_offset = task->imm_count; r2t->ttt = cpu_to_be32(ISCSI_RESERVED_TAG); @@ -438,7 +439,6 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) } else { hdr->flags |= ISCSI_FLAG_CMD_FINAL; zero_data(hdr->dlength); - hdr->data_length = cpu_to_be32(scsi_in(sc)->length); if (sc->sc_data_direction == DMA_FROM_DEVICE) hdr->flags |= ISCSI_FLAG_CMD_READ; @@ -466,7 +466,7 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) scsi_bidi_cmnd(sc) ? "bidirectional" : sc->sc_data_direction == DMA_TO_DEVICE ? "write" : "read", conn->id, sc, sc->cmnd[0], - task->itt, scsi_bufflen(sc), + task->itt, transfer_length, scsi_bidi_cmnd(sc) ? scsi_in(sc)->length : 0, session->cmdsn, session->max_cmdsn - session->exp_cmdsn + 1);