@@ -88,6 +88,9 @@ typedef enum {
* efficiently allocate the space so it reads as zeroes, or return an error.
* If this flag is set then BDRV_REQ_ZERO_WRITE must also be set.
* This flag cannot be set together with BDRV_REQ_MAY_UNMAP.
+ * This flag implicitly sets BDRV_REQ_SERIALISING meaning it is protected
+ * from conflicts with overlapping requests. If such conflict is detected,
+ * -EAGAIN is returned.
*/
BDRV_REQ_ALLOCATE = 0x100,
@@ -720,12 +720,13 @@ void bdrv_dec_in_flight(BlockDriverState *bs)
bdrv_wakeup(bs);
}
-static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
+static bool coroutine_fn find_or_wait_serialising_requests(
+ BdrvTrackedRequest *self, bool wait)
{
BlockDriverState *bs = self->bs;
BdrvTrackedRequest *req;
bool retry;
- bool waited = false;
+ bool found = false;
if (!atomic_read(&bs->serialising_in_flight)) {
return false;
@@ -751,11 +752,14 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
* will wait for us as soon as it wakes up, then just go on
* (instead of producing a deadlock in the former case). */
if (!req->waiting_for) {
+ found = true;
+ if (!wait) {
+ break;
+ }
self->waiting_for = req;
qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
self->waiting_for = NULL;
retry = true;
- waited = true;
break;
}
}
@@ -763,7 +767,12 @@ static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
qemu_co_mutex_unlock(&bs->reqs_lock);
} while (retry);
- return waited;
+ return found;
+}
+
+static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
+{
+ return find_or_wait_serialising_requests(self, true);
}
static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
@@ -1585,7 +1594,7 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
BdrvTrackedRequest *req, int flags)
{
BlockDriverState *bs = child->bs;
- bool waited;
+ bool found;
int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
if (bs->read_only) {
@@ -1602,9 +1611,13 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
mark_request_serialising(req, bdrv_get_cluster_size(bs));
}
- waited = wait_serialising_requests(req);
+ found = find_or_wait_serialising_requests(req,
+ !(flags & BDRV_REQ_ALLOCATE));
+ if (found && (flags & BDRV_REQ_ALLOCATE)) {
+ return -EAGAIN;
+ }
- assert(!waited || !req->serialising ||
+ assert(!found || !req->serialising ||
is_request_serialising_and_aligned(req));
assert(req->overlap_offset <= offset);
assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
@@ -1864,6 +1877,10 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
assert(!((flags & BDRV_REQ_ALLOCATE) && (flags & BDRV_REQ_MAY_UNMAP)));
assert(!((flags & BDRV_REQ_ALLOCATE) && !(flags & BDRV_REQ_ZERO_WRITE)));
+ if (flags & BDRV_REQ_ALLOCATE) {
+ flags |= BDRV_REQ_SERIALISING;
+ }
+
trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
if (!bs->drv) {
The idea is that ALLOCATE requests may overlap with other requests. Reuse the existing block layer infrastructure for serialising requests. Use the following approach: - mark ALLOCATE also SERIALISING, so subsequent requests to the area wait - ALLOCATE request itself must never wait if another request is in flight already. Return EAGAIN, let the caller reconsider. Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com> --- include/block/block.h | 3 +++ block/io.c | 31 ++++++++++++++++++++++++------- 2 files changed, 27 insertions(+), 7 deletions(-)