@@ -1325,11 +1325,96 @@ static int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
return ret;
}
+typedef struct VFIODirtyRanges {
+ hwaddr min32;
+ hwaddr max32;
+ hwaddr min64;
+ hwaddr max64;
+} VFIODirtyRanges;
+
+typedef struct VFIODirtyRangesListener {
+ VFIOContainer *container;
+ VFIODirtyRanges ranges;
+ MemoryListener listener;
+} VFIODirtyRangesListener;
+
+static void vfio_dirty_tracking_update(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ VFIODirtyRangesListener *dirty = container_of(listener,
+ VFIODirtyRangesListener,
+ listener);
+ VFIODirtyRanges *range = &dirty->ranges;
+ hwaddr iova, end, *min, *max;
+
+ if (!vfio_listener_valid_section(section, "tracking_update") ||
+ !vfio_get_section_iova_range(dirty->container, section,
+ &iova, &end, NULL)) {
+ return;
+ }
+
+ /*
+ * The address space passed to the dirty tracker is reduced to two ranges:
+ * one for 32-bit DMA ranges, and another one for 64-bit DMA ranges.
+ * The underlying reports of dirty will query a sub-interval of each of
+ * these ranges.
+ *
+ * The purpose of the dual range handling is to handle known cases of big
+ * holes in the address space, like the x86 AMD 1T hole. The alternative
+ * would be an IOVATree but that has a much bigger runtime overhead and
+ * unnecessary complexity.
+ */
+ min = (end <= UINT32_MAX) ? &range->min32 : &range->min64;
+ max = (end <= UINT32_MAX) ? &range->max32 : &range->max64;
+
+ if (*min > iova) {
+ *min = iova;
+ }
+ if (*max < end) {
+ *max = end;
+ }
+
+ trace_vfio_device_dirty_tracking_update(iova, end, *min, *max);
+ return;
+}
+
+static const MemoryListener vfio_dirty_tracking_listener = {
+ .name = "vfio-tracking",
+ .region_add = vfio_dirty_tracking_update,
+};
+
+static void vfio_dirty_tracking_init(VFIOContainer *container,
+ VFIODirtyRanges *ranges)
+{
+ VFIODirtyRangesListener dirty;
+
+ memset(&dirty, 0, sizeof(dirty));
+ dirty.ranges.min32 = UINT32_MAX;
+ dirty.ranges.min64 = UINT64_MAX;
+ dirty.listener = vfio_dirty_tracking_listener;
+ dirty.container = container;
+
+ memory_listener_register(&dirty.listener,
+ container->space->as);
+
+ *ranges = dirty.ranges;
+
+ /*
+ * The memory listener is synchronous, and used to calculate the range
+ * to dirty tracking. Unregister it after we are done as we are not
+ * interested in any follow-up updates.
+ */
+ memory_listener_unregister(&dirty.listener);
+}
+
static void vfio_listener_log_global_start(MemoryListener *listener)
{
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+ VFIODirtyRanges ranges;
int ret;
+ vfio_dirty_tracking_init(container, &ranges);
+
ret = vfio_set_dirty_page_tracking(container, true);
if (ret) {
vfio_set_migration_error(ret);
@@ -103,6 +103,7 @@ vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr
vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR
vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA"
vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
+vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t min, uint64_t max) "section 0x%"PRIx64" - 0x%"PRIx64" -> update [0x%"PRIx64" - 0x%"PRIx64"]"
vfio_disconnect_container(int fd) "close container->fd=%d"
vfio_put_group(int fd) "close group->fd=%d"
vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u"