diff mbox series

[v11,05/10] block: treat BDRV_REQ_ALLOCATE as serialising

Message ID 20181218075707.12006-6-anton.nefedov@virtuozzo.com (mailing list archive)
State New, archived
Headers show
Series qcow2: cluster space preallocation | expand

Commit Message

Anton Nefedov Dec. 18, 2018, 7:57 a.m. UTC
The idea is that ALLOCATE requests may overlap with other requests.
Reuse the existing block layer infrastructure for serialising requests.
Use the following approach:
  - mark ALLOCATE also SERIALISING, so subsequent requests to the area wait
  - ALLOCATE request itself must never wait if another request is in flight
    already. Return EAGAIN, let the caller reconsider.

Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
---
 include/block/block.h |  3 +++
 block/io.c            | 31 ++++++++++++++++++++++++-------
 2 files changed, 27 insertions(+), 7 deletions(-)

Comments

Alberto Garcia Jan. 7, 2019, 3:19 p.m. UTC | #1
On Tue 18 Dec 2018 08:57:38 AM CET, Anton Nefedov wrote:
> The idea is that ALLOCATE requests may overlap with other requests.
> Reuse the existing block layer infrastructure for serialising requests.
> Use the following approach:
>   - mark ALLOCATE also SERIALISING, so subsequent requests to the area wait
>   - ALLOCATE request itself must never wait if another request is in flight
>     already. Return EAGAIN, let the caller reconsider.
>
> Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>

Reviewed-by: Alberto Garcia <berto@igalia.com>

Berto
diff mbox series

Patch

diff --git a/include/block/block.h b/include/block/block.h
index 643d32f4b8..dfc0fc1b8f 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -88,6 +88,9 @@  typedef enum {
      * efficiently allocate the space so it reads as zeroes, or return an error.
      * If this flag is set then BDRV_REQ_ZERO_WRITE must also be set.
      * This flag cannot be set together with BDRV_REQ_MAY_UNMAP.
+     * This flag implicitly sets BDRV_REQ_SERIALISING meaning it is protected
+     * from conflicts with overlapping requests. If such conflict is detected,
+     * -EAGAIN is returned.
      */
     BDRV_REQ_ALLOCATE           = 0x100,
 
diff --git a/block/io.c b/block/io.c
index 66006a089d..4451714a60 100644
--- a/block/io.c
+++ b/block/io.c
@@ -720,12 +720,13 @@  void bdrv_dec_in_flight(BlockDriverState *bs)
     bdrv_wakeup(bs);
 }
 
-static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
+static bool coroutine_fn find_or_wait_serialising_requests(
+    BdrvTrackedRequest *self, bool wait)
 {
     BlockDriverState *bs = self->bs;
     BdrvTrackedRequest *req;
     bool retry;
-    bool waited = false;
+    bool found = false;
 
     if (!atomic_read(&bs->serialising_in_flight)) {
         return false;
@@ -751,11 +752,14 @@  static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
                  * will wait for us as soon as it wakes up, then just go on
                  * (instead of producing a deadlock in the former case). */
                 if (!req->waiting_for) {
+                    found = true;
+                    if (!wait) {
+                        break;
+                    }
                     self->waiting_for = req;
                     qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
                     self->waiting_for = NULL;
                     retry = true;
-                    waited = true;
                     break;
                 }
             }
@@ -763,7 +767,12 @@  static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
         qemu_co_mutex_unlock(&bs->reqs_lock);
     } while (retry);
 
-    return waited;
+    return found;
+}
+
+static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
+{
+    return find_or_wait_serialising_requests(self, true);
 }
 
 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
@@ -1585,7 +1594,7 @@  bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
                           BdrvTrackedRequest *req, int flags)
 {
     BlockDriverState *bs = child->bs;
-    bool waited;
+    bool found;
     int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
 
     if (bs->read_only) {
@@ -1602,9 +1611,13 @@  bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
         mark_request_serialising(req, bdrv_get_cluster_size(bs));
     }
 
-    waited = wait_serialising_requests(req);
+    found = find_or_wait_serialising_requests(req,
+                                              !(flags & BDRV_REQ_ALLOCATE));
+    if (found && (flags & BDRV_REQ_ALLOCATE)) {
+        return -EAGAIN;
+    }
 
-    assert(!waited || !req->serialising ||
+    assert(!found || !req->serialising ||
            is_request_serialising_and_aligned(req));
     assert(req->overlap_offset <= offset);
     assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
@@ -1864,6 +1877,10 @@  int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
     assert(!((flags & BDRV_REQ_ALLOCATE) && (flags & BDRV_REQ_MAY_UNMAP)));
     assert(!((flags & BDRV_REQ_ALLOCATE) && !(flags & BDRV_REQ_ZERO_WRITE)));
 
+    if (flags & BDRV_REQ_ALLOCATE) {
+        flags |= BDRV_REQ_SERIALISING;
+    }
+
     trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
 
     if (!bs->drv) {