diff mbox series

[v4,07/11] blksnap: minimum data storage unit of the original block device

Message ID 20230609115858.4737-7-sergei.shtepa@veeam.com (mailing list archive)
State Superseded
Headers show
Series [v4,01/11] documentation: Block Device Filtering Mechanism | expand

Commit Message

Sergei Shtepa June 9, 2023, 11:58 a.m. UTC
The struct chunk describes the minimum data storage unit of the original
block device. Functions for working with these minimal blocks implement
algorithms for reading and writing blocks.

Co-developed-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Sergei Shtepa <sergei.shtepa@veeam.com>
---
 drivers/block/blksnap/chunk.c | 454 ++++++++++++++++++++++++++++++++++
 drivers/block/blksnap/chunk.h | 114 +++++++++
 2 files changed, 568 insertions(+)
 create mode 100644 drivers/block/blksnap/chunk.c
 create mode 100644 drivers/block/blksnap/chunk.h

--
2.20.1
diff mbox series

Patch

diff --git a/drivers/block/blksnap/chunk.c b/drivers/block/blksnap/chunk.c
new file mode 100644
index 000000000000..fe1e9b0e3323
--- /dev/null
+++ b/drivers/block/blksnap/chunk.c
@@ -0,0 +1,454 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Veeam Software Group GmbH */
+#define pr_fmt(fmt) KBUILD_MODNAME "-chunk: " fmt
+
+#include <linux/blkdev.h>
+#include <linux/slab.h>
+#include "chunk.h"
+#include "diff_buffer.h"
+#include "diff_storage.h"
+#include "params.h"
+
+struct chunk_bio {
+	struct work_struct work;
+	struct list_head chunks;
+	struct bio *orig_bio;
+	struct bvec_iter orig_iter;
+	struct bio bio;
+};
+
+static struct bio_set chunk_io_bioset;
+static struct bio_set chunk_clone_bioset;
+
+static inline sector_t chunk_sector(struct chunk *chunk)
+{
+	return (sector_t)(chunk->number)
+	       << (chunk->diff_area->chunk_shift - SECTOR_SHIFT);
+}
+
+void chunk_store_failed(struct chunk *chunk, int error)
+{
+	struct diff_area *diff_area = diff_area_get(chunk->diff_area);
+
+	WARN_ON_ONCE(chunk->state != CHUNK_ST_NEW &&
+		     chunk->state != CHUNK_ST_IN_MEMORY);
+	chunk->state = CHUNK_ST_FAILED;
+
+	if (likely(chunk->diff_buffer)) {
+		diff_buffer_release(diff_area, chunk->diff_buffer);
+		chunk->diff_buffer = NULL;
+	}
+	diff_storage_free_region(chunk->diff_region);
+	chunk->diff_region = NULL;
+
+	chunk_up(chunk);
+	if (error)
+		diff_area_set_corrupted(diff_area, error);
+	diff_area_put(diff_area);
+};
+
+static void chunk_schedule_storing(struct chunk *chunk)
+{
+	struct diff_area *diff_area = diff_area_get(chunk->diff_area);
+	int queue_count;
+
+	WARN_ON_ONCE(chunk->state != CHUNK_ST_NEW &&
+		     chunk->state != CHUNK_ST_STORED);
+	chunk->state = CHUNK_ST_IN_MEMORY;
+
+	spin_lock(&diff_area->store_queue_lock);
+	list_add_tail(&chunk->link, &diff_area->store_queue);
+	queue_count = atomic_inc_return(&diff_area->store_queue_count);
+	spin_unlock(&diff_area->store_queue_lock);
+
+	chunk_up(chunk);
+
+	/* Initiate the queue clearing process */
+	if (queue_count > get_chunk_maximum_in_queue())
+		queue_work(system_wq, &diff_area->store_queue_work);
+	diff_area_put(diff_area);
+}
+
+void chunk_copy_bio(struct chunk *chunk, struct bio *bio,
+		    struct bvec_iter *iter)
+{
+	unsigned int chunk_ofs, chunk_left;
+
+	chunk_ofs = (iter->bi_sector - chunk_sector(chunk)) << SECTOR_SHIFT;
+	chunk_left = chunk->diff_buffer->size - chunk_ofs;
+	while (chunk_left && iter->bi_size) {
+		struct bio_vec bvec = bio_iter_iovec(bio, *iter);
+		unsigned int page_ofs = offset_in_page(chunk_ofs);
+		struct page *page;
+		unsigned int len;
+
+		page = chunk->diff_buffer->pages[chunk_ofs >> PAGE_SHIFT];
+		len = min3(bvec.bv_len,
+			   chunk_left,
+			   (unsigned int)PAGE_SIZE - page_ofs);
+
+		if (op_is_write(bio_op(bio))) {
+			/* from bio to buffer */
+			memcpy_page(page, page_ofs,
+				    bvec.bv_page, bvec.bv_offset,
+				    len);
+		} else {
+			/* from buffer to bio */
+			memcpy_page(bvec.bv_page, bvec.bv_offset,
+				    page, page_ofs,
+				    len);
+		}
+
+		chunk_ofs += len;
+		chunk_left -= len;
+		bio_advance_iter_single(bio, iter, len);
+	}
+}
+
+static void chunk_clone_endio(struct bio *bio)
+{
+	struct bio *orig_bio = bio->bi_private;
+
+	if (unlikely(bio->bi_status != BLK_STS_OK))
+		bio_io_error(orig_bio);
+	else
+		bio_endio(orig_bio);
+}
+
+static inline sector_t chunk_offset(struct chunk *chunk, struct bio *bio)
+{
+	return bio->bi_iter.bi_sector - chunk_sector(chunk);
+}
+
+static inline void chunk_limit_iter(struct chunk *chunk, struct bio *bio,
+				    sector_t sector, struct bvec_iter *iter)
+{
+	sector_t chunk_ofs = chunk_offset(chunk, bio);
+
+	iter->bi_sector = sector + chunk_ofs;
+	iter->bi_size = min_t(unsigned int,
+			bio->bi_iter.bi_size,
+			(chunk->sector_count - chunk_ofs) << SECTOR_SHIFT);
+}
+
+static inline unsigned int chunk_limit(struct chunk *chunk, struct bio *bio)
+{
+	unsigned int chunk_ofs, chunk_left;
+
+	chunk_ofs = (unsigned int)chunk_offset(chunk, bio) << SECTOR_SHIFT;
+	chunk_left = chunk->diff_buffer->size - chunk_ofs;
+
+	return min(bio->bi_iter.bi_size, chunk_left);
+}
+
+struct bio *chunk_alloc_clone(struct block_device *bdev, struct bio *bio)
+{
+	return bio_alloc_clone(bdev, bio, GFP_NOIO, &chunk_clone_bioset);
+}
+
+void chunk_clone_bio(struct chunk *chunk, struct bio *bio)
+{
+	struct bio *new_bio;
+	struct block_device *bdev;
+	sector_t sector;
+
+	if (chunk->state == CHUNK_ST_STORED) {
+		bdev = chunk->diff_region->bdev;
+		sector = chunk->diff_region->sector;
+	} else {
+		bdev = chunk->diff_area->orig_bdev;
+		sector = chunk_sector(chunk);
+	}
+
+	new_bio = chunk_alloc_clone(bdev, bio);
+	WARN_ON(!new_bio);
+
+	chunk_limit_iter(chunk, bio, sector, &new_bio->bi_iter);
+	bio_set_flag(new_bio, BIO_FILTERED);
+	new_bio->bi_end_io = chunk_clone_endio;
+	new_bio->bi_private = bio;
+
+	bio_advance(bio, new_bio->bi_iter.bi_size);
+	bio_inc_remaining(bio);
+
+	submit_bio_noacct(new_bio);
+}
+
+static inline struct chunk *get_chunk_from_cbio(struct chunk_bio *cbio)
+{
+	struct chunk *chunk = list_first_entry_or_null(&cbio->chunks,
+						       struct chunk, link);
+
+	if (chunk)
+		list_del_init(&chunk->link);
+	return chunk;
+}
+
+static void notify_load_and_schedule_io(struct work_struct *work)
+{
+	struct chunk_bio *cbio = container_of(work, struct chunk_bio, work);
+	struct chunk *chunk;
+
+	while ((chunk = get_chunk_from_cbio(cbio))) {
+		if (unlikely(cbio->bio.bi_status != BLK_STS_OK)) {
+			chunk_store_failed(chunk, -EIO);
+			continue;
+		}
+		if (chunk->state == CHUNK_ST_FAILED) {
+			chunk_up(chunk);
+			continue;
+		}
+
+		chunk_copy_bio(chunk, cbio->orig_bio, &cbio->orig_iter);
+		bio_endio(cbio->orig_bio);
+
+		chunk_schedule_storing(chunk);
+	}
+
+	bio_put(&cbio->bio);
+}
+
+static void notify_load_and_postpone_io(struct work_struct *work)
+{
+	struct chunk_bio *cbio = container_of(work, struct chunk_bio, work);
+	struct chunk *chunk;
+
+	while ((chunk = get_chunk_from_cbio(cbio))) {
+		if (unlikely(cbio->bio.bi_status != BLK_STS_OK)) {
+			chunk_store_failed(chunk, -EIO);
+			continue;
+		}
+		if (chunk->state == CHUNK_ST_FAILED) {
+			chunk_up(chunk);
+			continue;
+		}
+
+		chunk_schedule_storing(chunk);
+	}
+
+	/* submit the original bio fed into the tracker */
+	submit_bio_noacct_nocheck(cbio->orig_bio);
+	bio_put(&cbio->bio);
+}
+
+static void chunk_notify_store(struct work_struct *work)
+{
+	struct chunk_bio *cbio = container_of(work, struct chunk_bio, work);
+	struct chunk *chunk;
+
+	while ((chunk = get_chunk_from_cbio(cbio))) {
+		if (unlikely(cbio->bio.bi_status != BLK_STS_OK)) {
+			chunk_store_failed(chunk, -EIO);
+			continue;
+		}
+
+		WARN_ON_ONCE(chunk->state != CHUNK_ST_IN_MEMORY);
+		chunk->state = CHUNK_ST_STORED;
+
+		if (chunk->diff_buffer) {
+			diff_buffer_release(chunk->diff_area,
+					    chunk->diff_buffer);
+			chunk->diff_buffer = NULL;
+		}
+		chunk_up(chunk);
+	}
+
+	bio_put(&cbio->bio);
+}
+
+static void chunk_io_endio(struct bio *bio)
+{
+	struct chunk_bio *cbio = container_of(bio, struct chunk_bio, bio);
+
+	queue_work(system_wq, &cbio->work);
+}
+
+static void chunk_submit_bio(struct bio *bio)
+{
+	bio->bi_end_io = chunk_io_endio;
+	submit_bio_noacct(bio);
+}
+
+static inline unsigned short calc_max_vecs(sector_t left)
+{
+	return bio_max_segs(round_up(left, PAGE_SECTORS) / PAGE_SECTORS);
+}
+
+void chunk_store(struct chunk *chunk)
+{
+	struct block_device *bdev = chunk->diff_region->bdev;
+	sector_t sector = chunk->diff_region->sector;
+	sector_t count = chunk->diff_region->count;
+	unsigned int page_idx = 0;
+	struct bio *bio;
+	struct chunk_bio *cbio;
+
+	bio = bio_alloc_bioset(bdev, calc_max_vecs(count),
+			       REQ_OP_WRITE | REQ_SYNC | REQ_FUA, GFP_NOIO,
+			       &chunk_io_bioset);
+	bio->bi_iter.bi_sector = sector;
+	bio_set_flag(bio, BIO_FILTERED);
+
+	while (count) {
+		struct bio *next;
+		sector_t portion = min_t(sector_t, count, PAGE_SECTORS);
+		unsigned int bytes = portion << SECTOR_SHIFT;
+
+		if (bio_add_page(bio, chunk->diff_buffer->pages[page_idx],
+				 bytes, 0) == bytes) {
+			page_idx++;
+			count -= portion;
+			continue;
+		}
+
+		/* Create next bio */
+		next = bio_alloc_bioset(bdev, calc_max_vecs(count),
+					REQ_OP_WRITE | REQ_SYNC | REQ_FUA,
+					GFP_NOIO, &chunk_io_bioset);
+		next->bi_iter.bi_sector = bio_end_sector(bio);
+		bio_set_flag(next, BIO_FILTERED);
+		bio_chain(bio, next);
+		submit_bio_noacct(bio);
+		bio = next;
+	}
+
+	cbio = container_of(bio, struct chunk_bio, bio);
+
+	INIT_WORK(&cbio->work, chunk_notify_store);
+	INIT_LIST_HEAD(&cbio->chunks);
+	list_add_tail(&chunk->link, &cbio->chunks);
+	cbio->orig_bio = NULL;
+	chunk_submit_bio(bio);
+}
+
+static struct bio *__chunk_load(struct chunk *chunk)
+{
+	struct diff_buffer *diff_buffer;
+	unsigned int page_idx = 0;
+	struct bio *bio;
+	struct block_device *bdev;
+	sector_t sector, count;
+
+	diff_buffer = diff_buffer_take(chunk->diff_area);
+	if (IS_ERR(diff_buffer))
+		return ERR_CAST(diff_buffer);
+	chunk->diff_buffer = diff_buffer;
+
+	if (chunk->state == CHUNK_ST_STORED) {
+		bdev = chunk->diff_region->bdev;
+		sector = chunk->diff_region->sector;
+		count = chunk->diff_region->count;
+	} else {
+		bdev = chunk->diff_area->orig_bdev;
+		sector = chunk_sector(chunk);
+		count = chunk->sector_count;
+	}
+
+	bio = bio_alloc_bioset(bdev, calc_max_vecs(count),
+			       REQ_OP_READ, GFP_NOIO, &chunk_io_bioset);
+	bio->bi_iter.bi_sector = sector;
+	bio_set_flag(bio, BIO_FILTERED);
+
+	while (count) {
+		struct bio *next;
+		sector_t portion = min_t(sector_t, count, PAGE_SECTORS);
+		unsigned int bytes = portion << SECTOR_SHIFT;
+
+		if (bio_add_page(bio, chunk->diff_buffer->pages[page_idx],
+				 bytes, 0) == bytes) {
+			page_idx++;
+			count -= portion;
+			continue;
+		}
+
+		/* Create next bio */
+		next = bio_alloc_bioset(bdev, calc_max_vecs(count),
+					REQ_OP_READ, GFP_NOIO,
+					&chunk_io_bioset);
+		next->bi_iter.bi_sector = bio_end_sector(bio);
+		bio_set_flag(next, BIO_FILTERED);
+		bio_chain(bio, next);
+		submit_bio_noacct(bio);
+		bio = next;
+	}
+	return bio;
+}
+
+int chunk_load_and_postpone_io(struct chunk *chunk, struct bio **chunk_bio)
+{
+	struct bio *prev = *chunk_bio, *bio;
+
+	bio = __chunk_load(chunk);
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	if (prev) {
+		bio_chain(prev, bio);
+		submit_bio_noacct(prev);
+	}
+
+	*chunk_bio = bio;
+	return 0;
+}
+
+void chunk_load_and_postpone_io_finish(struct list_head *chunks,
+				struct bio *chunk_bio, struct bio *orig_bio)
+{
+	struct chunk_bio *cbio;
+
+	cbio = container_of(chunk_bio, struct chunk_bio, bio);
+	INIT_LIST_HEAD(&cbio->chunks);
+	while (!list_empty(chunks)) {
+		struct chunk *it;
+
+		it = list_first_entry(chunks, struct chunk, link);
+		list_del_init(&it->link);
+
+		list_add_tail(&it->link, &cbio->chunks);
+	}
+	INIT_WORK(&cbio->work, notify_load_and_postpone_io);
+	cbio->orig_bio = orig_bio;
+	chunk_submit_bio(chunk_bio);
+}
+
+int chunk_load_and_schedule_io(struct chunk *chunk, struct bio *orig_bio)
+{
+	struct chunk_bio *cbio;
+	struct bio *bio;
+
+	bio = __chunk_load(chunk);
+	if (IS_ERR(bio))
+		return PTR_ERR(bio);
+
+	cbio = container_of(bio, struct chunk_bio, bio);
+	INIT_LIST_HEAD(&cbio->chunks);
+	list_add_tail(&chunk->link, &cbio->chunks);
+	INIT_WORK(&cbio->work, notify_load_and_schedule_io);
+	cbio->orig_bio = orig_bio;
+	cbio->orig_iter = orig_bio->bi_iter;
+	bio_advance_iter_single(orig_bio, &orig_bio->bi_iter,
+				chunk_limit(chunk, orig_bio));
+	bio_inc_remaining(orig_bio);
+
+	chunk_submit_bio(bio);
+	return 0;
+}
+
+int __init chunk_init(void)
+{
+	int ret;
+
+	ret = bioset_init(&chunk_io_bioset, 64,
+			  offsetof(struct chunk_bio, bio),
+			  BIOSET_NEED_BVECS | BIOSET_NEED_RESCUER);
+	if (!ret)
+		ret = bioset_init(&chunk_clone_bioset, 64, 0,
+				  BIOSET_NEED_BVECS | BIOSET_NEED_RESCUER);
+	return ret;
+}
+
+void chunk_done(void)
+{
+	bioset_exit(&chunk_io_bioset);
+	bioset_exit(&chunk_clone_bioset);
+}
diff --git a/drivers/block/blksnap/chunk.h b/drivers/block/blksnap/chunk.h
new file mode 100644
index 000000000000..cd119ac729df
--- /dev/null
+++ b/drivers/block/blksnap/chunk.h
@@ -0,0 +1,114 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2023 Veeam Software Group GmbH */
+#ifndef __BLKSNAP_CHUNK_H
+#define __BLKSNAP_CHUNK_H
+
+#include <linux/blk_types.h>
+#include <linux/blkdev.h>
+#include <linux/rwsem.h>
+#include <linux/atomic.h>
+#include "diff_area.h"
+
+struct diff_area;
+struct diff_region;
+
+/**
+ * enum chunk_st - Possible states for a chunk.
+ *
+ * @CHUNK_ST_NEW:
+ *	No data is associated with the chunk.
+ * @CHUNK_ST_IN_MEMORY:
+ *	The data of the chunk is ready to be read from the RAM buffer.
+ *	The flag is removed when a chunk is removed from the store queue
+ *	and its buffer is released.
+ * @CHUNK_ST_STORED:
+ *	The data of the chunk has been written to the difference storage.
+ * @CHUNK_ST_FAILED:
+ *	An error occurred while processing the chunk data.
+ *
+ * Chunks life circle:
+ *	CHUNK_ST_NEW -> CHUNK_ST_IN_MEMORY <-> CHUNK_ST_STORED
+ */
+
+enum chunk_st {
+	CHUNK_ST_NEW,
+	CHUNK_ST_IN_MEMORY,
+	CHUNK_ST_STORED,
+	CHUNK_ST_FAILED,
+};
+
+/**
+ * struct chunk - Minimum data storage unit.
+ *
+ * @link:
+ *	The list header allows to create queue of chunks.
+ * @number:
+ *	Sequential number of the chunk.
+ * @sector_count:
+ *	Number of sectors in the current chunk. This is especially true
+ *	for the	last chunk.
+ * @lock:
+ *	Binary semaphore. Syncs access to the chunks fields: state,
+ *	diff_buffer and diff_region.
+ * @diff_area:
+ *	Pointer to the difference area - the difference storage area for a
+ *	specific device. This field is only available when the chunk is locked.
+ *	Allows to protect the difference area from early release.
+ * @state:
+ *	Defines the state of a chunk.
+ * @diff_buffer:
+ *	Pointer to &struct diff_buffer. Describes a buffer in the memory
+ *	for storing the chunk data.
+ * @diff_region:
+ *	Pointer to &struct diff_region. Describes a copy of the chunk data
+ *	on the difference storage.
+ *
+ * This structure describes the block of data that the module operates
+ * with when executing the copy-on-write algorithm and when performing I/O
+ * to snapshot images.
+ *
+ * If the data of the chunk has been changed or has just been read, then
+ * the chunk gets into store queue.
+ *
+ * The semaphore is blocked for writing if there is no actual data in the
+ * buffer, since a block of data is being read from the original device or
+ * from a diff storage. If data is being read from or written to the
+ * diff_buffer, the semaphore must be locked.
+ */
+struct chunk {
+	struct list_head link;
+	unsigned long number;
+	sector_t sector_count;
+
+	struct semaphore lock;
+	struct diff_area *diff_area;
+
+	enum chunk_st state;
+	struct diff_buffer *diff_buffer;
+	struct diff_region *diff_region;
+};
+
+static inline void chunk_up(struct chunk *chunk)
+{
+	struct diff_area *diff_area = chunk->diff_area;
+
+	chunk->diff_area = NULL;
+	up(&chunk->lock);
+	diff_area_put(diff_area);
+};
+
+void chunk_store_failed(struct chunk *chunk, int error);
+struct bio *chunk_alloc_clone(struct block_device *bdev, struct bio *bio);
+
+void chunk_copy_bio(struct chunk *chunk, struct bio *bio,
+		    struct bvec_iter *iter);
+void chunk_clone_bio(struct chunk *chunk, struct bio *bio);
+void chunk_store(struct chunk *chunk);
+int chunk_load_and_schedule_io(struct chunk *chunk, struct bio *orig_bio);
+int chunk_load_and_postpone_io(struct chunk *chunk, struct bio **chunk_bio);
+void chunk_load_and_postpone_io_finish(struct list_head *chunks,
+				struct bio *chunk_bio, struct bio *orig_bio);
+
+int __init chunk_init(void);
+void chunk_done(void);
+#endif /* __BLKSNAP_CHUNK_H */