@@ -1105,8 +1105,11 @@ static int64_t find_nonzero(const uint8_t *buf, int64_t n)
*
* 'pnum' is set to the number of sectors (including and immediately following
* the first one) that are known to be in the same allocated/unallocated state.
+ * The function will try to align 'pnum' to the number of sectors specified
+ * in 'alignment' to avoid unnecassary RMW cycles on modern hardware.
*/
-static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum)
+static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum,
+ int64_t sector_num, int alignment)
{
bool is_zero;
int i;
@@ -1115,14 +1118,25 @@ static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum)
*pnum = 0;
return 0;
}
- is_zero = buffer_is_zero(buf, 512);
- for(i = 1; i < n; i++) {
- buf += 512;
- if (is_zero != buffer_is_zero(buf, 512)) {
+
+ if (sector_num % alignment) {
+ n = ROUND_UP(sector_num, alignment) - sector_num;
+ }
+
+ if (n % alignment) {
+ alignment = 1;
+ }
+
+ n /= alignment;
+
+ is_zero = buffer_is_zero(buf, BDRV_SECTOR_SIZE * alignment);
+ for (i = 1; i < n; i++) {
+ buf += BDRV_SECTOR_SIZE * alignment;
+ if (is_zero != buffer_is_zero(buf, BDRV_SECTOR_SIZE * alignment)) {
break;
}
}
- *pnum = i;
+ *pnum = i * alignment;
return !is_zero;
}
@@ -1132,7 +1146,7 @@ static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum)
* breaking up write requests for only small sparse areas.
*/
static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
- int min)
+ int min, int64_t sector_num, int alignment)
{
int ret;
int num_checked, num_used;
@@ -1141,7 +1155,7 @@ static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
min = n;
}
- ret = is_allocated_sectors(buf, n, pnum);
+ ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
if (!ret) {
return ret;
}
@@ -1149,13 +1163,15 @@ static int is_allocated_sectors_min(const uint8_t *buf, int n, int *pnum,
num_used = *pnum;
buf += BDRV_SECTOR_SIZE * *pnum;
n -= *pnum;
+ sector_num += *pnum;
num_checked = num_used;
while (n > 0) {
- ret = is_allocated_sectors(buf, n, pnum);
+ ret = is_allocated_sectors(buf, n, pnum, sector_num, alignment);
buf += BDRV_SECTOR_SIZE * *pnum;
n -= *pnum;
+ sector_num += *pnum;
num_checked += *pnum;
if (ret) {
num_used = num_checked;
@@ -1560,6 +1576,7 @@ typedef struct ImgConvertState {
bool wr_in_order;
bool copy_range;
int min_sparse;
+ int alignment;
size_t cluster_sectors;
size_t buf_sectors;
long num_coroutines;
@@ -1724,7 +1741,8 @@ static int coroutine_fn convert_co_write(ImgConvertState *s, int64_t sector_num,
* zeroed. */
if (!s->min_sparse ||
(!s->compressed &&
- is_allocated_sectors_min(buf, n, &n, s->min_sparse)) ||
+ is_allocated_sectors_min(buf, n, &n, s->min_sparse,
+ sector_num, s->alignment)) ||
(s->compressed &&
!buffer_is_zero(buf, n * BDRV_SECTOR_SIZE)))
{
@@ -2373,6 +2391,12 @@ static int img_convert(int argc, char **argv)
out_bs->bl.pdiscard_alignment >>
BDRV_SECTOR_BITS)));
+ /* try to align the write requests to the destination to avoid unnecessary
+ * RMW cycles. */
+ s.alignment = MAX(s.min_sparse,
+ DIV_ROUND_UP(out_bs->bl.request_alignment,
+ BDRV_SECTOR_SIZE));
+
if (skip_create) {
int64_t output_sectors = blk_nb_sectors(s.target);
if (output_sectors < 0) {
We currently don't enforce that the sparse segments we detect during convert are aligned. This leads to unnecessary and costly read-modify-write cycles either internally in Qemu or in the background on the storage device as nearly all modern filesystems or hardware have a 4k alignment internally. The number of RMW cycles when converting an example image [1] to a raw device that has 4k sector size is about 4600 4k read requests to perform a total of about 15000 write requests. With this path the additional 4600 read requests are eliminated. [1] https://cloud-images.ubuntu.com/releases/16.04/release/ubuntu-16.04-server-cloudimg-amd64-disk1.vmdk Signed-off-by: Peter Lieven <pl@kamp.de> --- V1->V2: - take the current sector offset into account [Max] - try to figure out the target alignment [Max] qemu-img.c | 44 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-)