Message ID | 20210730103854.12681-2-jgross@suse.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | xen: harden blkfront against malicious backends | expand |
Hi, Juergen! On 30.07.21 13:38, Juergen Gross wrote: > In order to avoid problems in case the backend is modifying a response > on the ring page while the frontend has already seen it, just read the > response into a local buffer in one go and then operate on that buffer > only. > > Signed-off-by: Juergen Gross <jgross@suse.com> > Reviewed-by: Jan Beulich <jbeulich@suse.com> > Acked-by: Roger Pau Monné <roger.pau@citrix.com> > --- > drivers/block/xen-blkfront.c | 35 ++++++++++++++++++----------------- > 1 file changed, 18 insertions(+), 17 deletions(-) > > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c > index d83fee21f6c5..15e840287734 100644 > --- a/drivers/block/xen-blkfront.c > +++ b/drivers/block/xen-blkfront.c > @@ -1496,7 +1496,7 @@ static bool blkif_completion(unsigned long *id, > static irqreturn_t blkif_interrupt(int irq, void *dev_id) > { > struct request *req; > - struct blkif_response *bret; > + struct blkif_response bret; > RING_IDX i, rp; > unsigned long flags; > struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id; > @@ -1513,8 +1513,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) > for (i = rinfo->ring.rsp_cons; i != rp; i++) { > unsigned long id; > > - bret = RING_GET_RESPONSE(&rinfo->ring, i); > - id = bret->id; > + RING_COPY_RESPONSE(&rinfo->ring, i, &bret); As per my understanding copying is still not an atomic operation as the request/response are multi-byte structures in general. IOW, what prevents the backend from modifying the ring while we are copying? Thanks, Oleksandr > + id = bret.id; > + > /* > * The backend has messed up and given us an id that we would > * never have given to it (we stamp it up to BLK_RING_SIZE - > @@ -1522,39 +1523,39 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) > */ > if (id >= BLK_RING_SIZE(info)) { > WARN(1, "%s: response to %s has incorrect id (%ld)\n", > - info->gd->disk_name, op_name(bret->operation), id); > + info->gd->disk_name, op_name(bret.operation), id); > /* We can't safely get the 'struct request' as > * the id is busted. */ > continue; > } > req = rinfo->shadow[id].request; > > - if (bret->operation != BLKIF_OP_DISCARD) { > + if (bret.operation != BLKIF_OP_DISCARD) { > /* > * We may need to wait for an extra response if the > * I/O request is split in 2 > */ > - if (!blkif_completion(&id, rinfo, bret)) > + if (!blkif_completion(&id, rinfo, &bret)) > continue; > } > > if (add_id_to_freelist(rinfo, id)) { > WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", > - info->gd->disk_name, op_name(bret->operation), id); > + info->gd->disk_name, op_name(bret.operation), id); > continue; > } > > - if (bret->status == BLKIF_RSP_OKAY) > + if (bret.status == BLKIF_RSP_OKAY) > blkif_req(req)->error = BLK_STS_OK; > else > blkif_req(req)->error = BLK_STS_IOERR; > > - switch (bret->operation) { > + switch (bret.operation) { > case BLKIF_OP_DISCARD: > - if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { > + if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { > struct request_queue *rq = info->rq; > printk(KERN_WARNING "blkfront: %s: %s op failed\n", > - info->gd->disk_name, op_name(bret->operation)); > + info->gd->disk_name, op_name(bret.operation)); > blkif_req(req)->error = BLK_STS_NOTSUPP; > info->feature_discard = 0; > info->feature_secdiscard = 0; > @@ -1564,15 +1565,15 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) > break; > case BLKIF_OP_FLUSH_DISKCACHE: > case BLKIF_OP_WRITE_BARRIER: > - if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { > + if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { > printk(KERN_WARNING "blkfront: %s: %s op failed\n", > - info->gd->disk_name, op_name(bret->operation)); > + info->gd->disk_name, op_name(bret.operation)); > blkif_req(req)->error = BLK_STS_NOTSUPP; > } > - if (unlikely(bret->status == BLKIF_RSP_ERROR && > + if (unlikely(bret.status == BLKIF_RSP_ERROR && > rinfo->shadow[id].req.u.rw.nr_segments == 0)) { > printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", > - info->gd->disk_name, op_name(bret->operation)); > + info->gd->disk_name, op_name(bret.operation)); > blkif_req(req)->error = BLK_STS_NOTSUPP; > } > if (unlikely(blkif_req(req)->error)) { > @@ -1585,9 +1586,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) > fallthrough; > case BLKIF_OP_READ: > case BLKIF_OP_WRITE: > - if (unlikely(bret->status != BLKIF_RSP_OKAY)) > + if (unlikely(bret.status != BLKIF_RSP_OKAY)) > dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " > - "request: %x\n", bret->status); > + "request: %x\n", bret.status); > > break; > default:
Hi, On 02/08/2021 15:06, Oleksandr Andrushchenko wrote: > On 30.07.21 13:38, Juergen Gross wrote: >> In order to avoid problems in case the backend is modifying a response >> on the ring page while the frontend has already seen it, just read the >> response into a local buffer in one go and then operate on that buffer >> only. >> >> Signed-off-by: Juergen Gross <jgross@suse.com> >> Reviewed-by: Jan Beulich <jbeulich@suse.com> >> Acked-by: Roger Pau Monné <roger.pau@citrix.com> >> --- >> drivers/block/xen-blkfront.c | 35 ++++++++++++++++++----------------- >> 1 file changed, 18 insertions(+), 17 deletions(-) >> >> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c >> index d83fee21f6c5..15e840287734 100644 >> --- a/drivers/block/xen-blkfront.c >> +++ b/drivers/block/xen-blkfront.c >> @@ -1496,7 +1496,7 @@ static bool blkif_completion(unsigned long *id, >> static irqreturn_t blkif_interrupt(int irq, void *dev_id) >> { >> struct request *req; >> - struct blkif_response *bret; >> + struct blkif_response bret; >> RING_IDX i, rp; >> unsigned long flags; >> struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id; >> @@ -1513,8 +1513,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) >> for (i = rinfo->ring.rsp_cons; i != rp; i++) { >> unsigned long id; >> >> - bret = RING_GET_RESPONSE(&rinfo->ring, i); >> - id = bret->id; >> + RING_COPY_RESPONSE(&rinfo->ring, i, &bret); > > As per my understanding copying is still not an atomic operation as the request/response > > are multi-byte structures in general. IOW, what prevents the backend from modifying the ring while > > we are copying? Nothing and, I believe, you are never going to be able to ensure atomicity with large structure (at least between entity that doesn't trust each other). However, what you can do is copying the response once, check that it is consistent and then use it. If it is not consistent, then you can report an error. This is better than what's currently in tree. IOW we may have multiple read so the code is prone to TOCTOU. Cheers,
On 02.08.21 22:26, Julien Grall wrote: > Hi, > > On 02/08/2021 15:06, Oleksandr Andrushchenko wrote: >> On 30.07.21 13:38, Juergen Gross wrote: >>> In order to avoid problems in case the backend is modifying a response >>> on the ring page while the frontend has already seen it, just read the >>> response into a local buffer in one go and then operate on that buffer >>> only. >>> >>> Signed-off-by: Juergen Gross <jgross@suse.com> >>> Reviewed-by: Jan Beulich <jbeulich@suse.com> >>> Acked-by: Roger Pau Monné <roger.pau@citrix.com> >>> --- >>> drivers/block/xen-blkfront.c | 35 ++++++++++++++++++----------------- >>> 1 file changed, 18 insertions(+), 17 deletions(-) >>> >>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c >>> index d83fee21f6c5..15e840287734 100644 >>> --- a/drivers/block/xen-blkfront.c >>> +++ b/drivers/block/xen-blkfront.c >>> @@ -1496,7 +1496,7 @@ static bool blkif_completion(unsigned long *id, >>> static irqreturn_t blkif_interrupt(int irq, void *dev_id) >>> { >>> struct request *req; >>> - struct blkif_response *bret; >>> + struct blkif_response bret; >>> RING_IDX i, rp; >>> unsigned long flags; >>> struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id; >>> @@ -1513,8 +1513,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) >>> for (i = rinfo->ring.rsp_cons; i != rp; i++) { >>> unsigned long id; >>> - bret = RING_GET_RESPONSE(&rinfo->ring, i); >>> - id = bret->id; >>> + RING_COPY_RESPONSE(&rinfo->ring, i, &bret); >> >> As per my understanding copying is still not an atomic operation as the request/response >> >> are multi-byte structures in general. IOW, what prevents the backend from modifying the ring while >> >> we are copying? > > Nothing and, I believe, you are never going to be able to ensure atomicity with large structure (at least between entity that doesn't trust each other). > > However, what you can do is copying the response once, check that it is consistent and then use it. If it is not consistent, then you can report an error. > > This is better than what's currently in tree. IOW we may have multiple read so the code is prone to TOCTOU. Agree, Thanks > > Cheers, >
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index d83fee21f6c5..15e840287734 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1496,7 +1496,7 @@ static bool blkif_completion(unsigned long *id, static irqreturn_t blkif_interrupt(int irq, void *dev_id) { struct request *req; - struct blkif_response *bret; + struct blkif_response bret; RING_IDX i, rp; unsigned long flags; struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id; @@ -1513,8 +1513,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) for (i = rinfo->ring.rsp_cons; i != rp; i++) { unsigned long id; - bret = RING_GET_RESPONSE(&rinfo->ring, i); - id = bret->id; + RING_COPY_RESPONSE(&rinfo->ring, i, &bret); + id = bret.id; + /* * The backend has messed up and given us an id that we would * never have given to it (we stamp it up to BLK_RING_SIZE - @@ -1522,39 +1523,39 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) */ if (id >= BLK_RING_SIZE(info)) { WARN(1, "%s: response to %s has incorrect id (%ld)\n", - info->gd->disk_name, op_name(bret->operation), id); + info->gd->disk_name, op_name(bret.operation), id); /* We can't safely get the 'struct request' as * the id is busted. */ continue; } req = rinfo->shadow[id].request; - if (bret->operation != BLKIF_OP_DISCARD) { + if (bret.operation != BLKIF_OP_DISCARD) { /* * We may need to wait for an extra response if the * I/O request is split in 2 */ - if (!blkif_completion(&id, rinfo, bret)) + if (!blkif_completion(&id, rinfo, &bret)) continue; } if (add_id_to_freelist(rinfo, id)) { WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", - info->gd->disk_name, op_name(bret->operation), id); + info->gd->disk_name, op_name(bret.operation), id); continue; } - if (bret->status == BLKIF_RSP_OKAY) + if (bret.status == BLKIF_RSP_OKAY) blkif_req(req)->error = BLK_STS_OK; else blkif_req(req)->error = BLK_STS_IOERR; - switch (bret->operation) { + switch (bret.operation) { case BLKIF_OP_DISCARD: - if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { + if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { struct request_queue *rq = info->rq; printk(KERN_WARNING "blkfront: %s: %s op failed\n", - info->gd->disk_name, op_name(bret->operation)); + info->gd->disk_name, op_name(bret.operation)); blkif_req(req)->error = BLK_STS_NOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; @@ -1564,15 +1565,15 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) break; case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_WRITE_BARRIER: - if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { + if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { printk(KERN_WARNING "blkfront: %s: %s op failed\n", - info->gd->disk_name, op_name(bret->operation)); + info->gd->disk_name, op_name(bret.operation)); blkif_req(req)->error = BLK_STS_NOTSUPP; } - if (unlikely(bret->status == BLKIF_RSP_ERROR && + if (unlikely(bret.status == BLKIF_RSP_ERROR && rinfo->shadow[id].req.u.rw.nr_segments == 0)) { printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", - info->gd->disk_name, op_name(bret->operation)); + info->gd->disk_name, op_name(bret.operation)); blkif_req(req)->error = BLK_STS_NOTSUPP; } if (unlikely(blkif_req(req)->error)) { @@ -1585,9 +1586,9 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) fallthrough; case BLKIF_OP_READ: case BLKIF_OP_WRITE: - if (unlikely(bret->status != BLKIF_RSP_OKAY)) + if (unlikely(bret.status != BLKIF_RSP_OKAY)) dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " - "request: %x\n", bret->status); + "request: %x\n", bret.status); break; default: