diff mbox series

[v5,08/13] hw/cxl/cxl-mailbox-utils: Add mailbox commands to support add/release dynamic capacity response

Message ID 20240304194331.1586191-9-nifan.cxl@gmail.com
State Superseded
Headers show
Series Enabling DCD emulation support in Qemu | expand

Commit Message

Fan Ni March 4, 2024, 7:34 p.m. UTC
From: Fan Ni <fan.ni@samsung.com>

Per CXL spec 3.1, two mailbox commands are implemented:
Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.9.9.3, and
Release Dynamic Capacity (Opcode 4803h) 8.2.9.9.9.4.

Signed-off-by: Fan Ni <fan.ni@samsung.com>
---
 hw/cxl/cxl-mailbox-utils.c  | 310 ++++++++++++++++++++++++++++++++++++
 hw/mem/cxl_type3.c          |  12 ++
 include/hw/cxl/cxl_device.h |   4 +
 3 files changed, 326 insertions(+)

Comments

Jonathan Cameron March 6, 2024, 5:28 p.m. UTC | #1
On Mon,  4 Mar 2024 11:34:03 -0800
nifan.cxl@gmail.com wrote:

> From: Fan Ni <fan.ni@samsung.com>
> 
> Per CXL spec 3.1, two mailbox commands are implemented:
> Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.9.9.3, and
> Release Dynamic Capacity (Opcode 4803h) 8.2.9.9.9.4.
> 
> Signed-off-by: Fan Ni <fan.ni@samsung.com>

Hmm. So I had a thought which would work for what you
have here. See include/qemu/range.h
I like the region merging stuff that is also in the list operators
but we shouldn't use that because we have other reasons not to
fuse ranges (sequence numbering etc)

We could make an extent a wrapper around a struct Range though
so that we can use the comparison stuff directly.
+ we can use the list manipulation in there as the basis for a future
extent merging infrastructure that is tag and sequence number (if
provided - so shared capacity or pmem) aware.

Jonathan


> ---
> +
> +/*
> + * CXL r3.1 Table 8-168: Add Dynamic Capacity Response Input Payload
> + * CXL r3.1 Table 8-170: Release Dynamic Capacity Input Payload
> + */
> +typedef struct CXLUpdateDCExtentListInPl {
> +    uint32_t num_entries_updated;
> +    uint8_t flags;
> +    uint8_t rsvd[3];
> +    /* CXL r3.1 Table 8-169: Updated Extent */
> +    struct {
> +        uint64_t start_dpa;
> +        uint64_t len;
> +        uint8_t rsvd[8];
> +    } QEMU_PACKED updated_entries[];
> +} QEMU_PACKED CXLUpdateDCExtentListInPl;
> +
> +/*
> + * For the extents in the extent list to operate, check whether they are valid
> + * 1. The extent should be in the range of a valid DC region;
> + * 2. The extent should not cross multiple regions;
> + * 3. The start DPA and the length of the extent should align with the block
> + * size of the region;
> + * 4. The address range of multiple extents in the list should not overlap.

Hmm. Interesting.  I was thinking a given add / remove command rather than
just the extents can't overlap a region.  However I can't find text on that
so I believe your interpretation is correct. It is only specified for the
event records, but that is good enough I think.  We might want to propose
tightening the spec on this to allow devices to say no to such complex
extent lists. Maybe a nice friendly Memory vendor should query this one if
it's a potential problem for real devices.  Might not be!

> + */
> +static CXLRetCode cxl_detect_malformed_extent_list(CXLType3Dev *ct3d,
> +        const CXLUpdateDCExtentListInPl *in)
> +{
> +    uint64_t min_block_size = UINT64_MAX;
> +    CXLDCRegion *region = &ct3d->dc.regions[0];
> +    CXLDCRegion *lastregion = &ct3d->dc.regions[ct3d->dc.num_regions - 1];
> +    g_autofree unsigned long *blk_bitmap = NULL;
> +    uint64_t dpa, len;
> +    uint32_t i;
> +
> +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> +        region = &ct3d->dc.regions[i];
> +        min_block_size = MIN(min_block_size, region->block_size);
> +    }
> +
> +    blk_bitmap = bitmap_new((lastregion->base + lastregion->len -
> +                             ct3d->dc.regions[0].base) / min_block_size);
> +
> +    for (i = 0; i < in->num_entries_updated; i++) {
> +        dpa = in->updated_entries[i].start_dpa;
> +        len = in->updated_entries[i].len;
> +
> +        region = cxl_find_dc_region(ct3d, dpa, len);
> +        if (!region) {
> +            return CXL_MBOX_INVALID_PA;
> +        }
> +
> +        dpa -= ct3d->dc.regions[0].base;
> +        if (dpa % region->block_size || len % region->block_size) {
> +            return CXL_MBOX_INVALID_EXTENT_LIST;
> +        }
> +        /* the dpa range already covered by some other extents in the list */
> +        if (test_any_bits_set(blk_bitmap, dpa / min_block_size,
> +            len / min_block_size)) {
> +            return CXL_MBOX_INVALID_EXTENT_LIST;
> +        }
> +        bitmap_set(blk_bitmap, dpa / min_block_size, len / min_block_size);
> +   }
> +
> +    return CXL_MBOX_SUCCESS;
> +}
> +
> +/*
> + * CXL r3.1 section 8.2.9.9.9.3: Add Dynamic Capacity Response (Opcode 4802h)
> + * An extent is added to the extent list and becomes usable only after the
> + * response is processed successfully
> + */
> +static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd,
> +                                          uint8_t *payload_in,
> +                                          size_t len_in,
> +                                          uint8_t *payload_out,
> +                                          size_t *len_out,
> +                                          CXLCCI *cci)
> +{
> +    CXLUpdateDCExtentListInPl *in = (void *)payload_in;
> +    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> +    CXLDCExtentList *extent_list = &ct3d->dc.extents;
> +    CXLDCExtent *ent;
> +    uint32_t i;
> +    uint64_t dpa, len;
> +    CXLRetCode ret;
> +
> +    if (in->num_entries_updated == 0) {
> +        return CXL_MBOX_SUCCESS;
> +    }
> +
> +    /* Adding extents causes exceeding device's extent tracking ability. */
> +    if (in->num_entries_updated + ct3d->dc.total_extent_count >
> +        CXL_NUM_EXTENTS_SUPPORTED) {
> +        return CXL_MBOX_RESOURCES_EXHAUSTED;
> +    }
> +
> +    ret = cxl_detect_malformed_extent_list(ct3d, in);
> +    if (ret != CXL_MBOX_SUCCESS) {
> +        return ret;
> +    }
> +
> +    for (i = 0; i < in->num_entries_updated; i++) {
> +        dpa = in->updated_entries[i].start_dpa;
> +        len = in->updated_entries[i].len;
> +
> +        /*
> +         * Check if the DPA range of the to-be-added extent overlaps with
> +         * existing extent list maintained by the device.
> +         */
> +        QTAILQ_FOREACH(ent, extent_list, node) {

There are too many checks in here for an overlapping test.

Conditions are

	|  Extent tested against |
|  Overlap entirely                 |
| overlap left edge |
                    | overlap right edge |
Think of it in the inverse condition and it is easier to reason about.

              | Extent tested against |
| to left |---                        ---| to right |

which I think is something like.

        if (!((dpa + len <= ent->start_dpa) || (dpa >= ent->start_dpa + ent->len)) {
		 return CXL_MBOX_INVALID_PA;
	}

Hmm. For internal tracking (not the exposed values) we should probably use
struct range from include/qemu/range.h.
Felt like there had to be something better than doing this ourselves so I went
looking.  Note it uses inclusive upper bound so be careful with that!

Advantage is we get this checks for free.
https://elixir.bootlin.com/qemu/latest/source/include/qemu/range.h#L152
range_overlaps_range()

There are functions to set them up nicely for us and by base and size
as well which should tidy that part up.

	    

> +            if (ent->start_dpa <= dpa &&
> +                    dpa + len <= ent->start_dpa + ent->len) {
> +                return CXL_MBOX_INVALID_PA;
> +            /* Overlapping one end of the other */
> +            } else if ((dpa < ent->start_dpa + ent->len &&
> +                        dpa + len > ent->start_dpa + ent->len) ||
> +                       (dpa < ent->start_dpa && dpa + len > ent->start_dpa)) {
> +                return CXL_MBOX_INVALID_PA;
> +            }
> +        }
> +
> +        /*
> +         * TODO: we will add a pending extent list based on event log record
> +         * and verify the input response; also, the "More" flag is not
> +         * considered at the moment.
> +         */
> +
> +        cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
> +        ct3d->dc.total_extent_count += 1;
> +    }
> +
> +    return CXL_MBOX_SUCCESS;
> +}
> +
> +/*
> + * CXL r3.1 section 8.2.9.9.9.4: Release Dynamic Capacity (Opcode 4803h)
> + */
> +static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd,
> +                                          uint8_t *payload_in,
> +                                          size_t len_in,
> +                                          uint8_t *payload_out,
> +                                          size_t *len_out,
> +                                          CXLCCI *cci)
> +{
> +    CXLUpdateDCExtentListInPl *in = (void *)payload_in;
> +    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> +    CXLDCExtentList *extent_list = &ct3d->dc.extents;
> +    CXLDCExtent *ent;
> +    uint32_t i;
> +    uint64_t dpa, len;
> +    CXLRetCode ret;
> +
> +    if (in->num_entries_updated == 0) {
> +        return CXL_MBOX_INVALID_INPUT;
> +    }
> +
> +    ret = cxl_detect_malformed_extent_list(ct3d, in);
> +    if (ret != CXL_MBOX_SUCCESS) {
> +        return ret;
> +    }
> +
> +    for (i = 0; i < in->num_entries_updated; i++) {
> +        bool found = false;
> +
> +        dpa = in->updated_entries[i].start_dpa;
> +        len = in->updated_entries[i].len;
> +
> +        QTAILQ_FOREACH(ent, extent_list, node) {
> +            /* Found the extent overlapping with */
> +            if (ent->start_dpa <= dpa && dpa < ent->start_dpa + ent->len) {
> +                if (dpa + len <= ent->start_dpa + ent->len) {
> +                    /*
> +                     * The incoming extent covers a portion of an extent
> +                     * in the device extent list, remove only the overlapping
> +                     * portion, meaning
> +                     * 1. the portions that are not covered by the incoming
> +                     *    extent at both end of the original extent will become
> +                     *    new extents and inserted to the extent list; and
> +                     * 2. the original extent is removed from the extent list;
> +                     * 3. DC extent count is updated accordingly.
> +                     */
> +                    uint64_t ent_start_dpa = ent->start_dpa;
> +                    uint64_t ent_len = ent->len;
> +                    uint64_t len1 = dpa - ent_start_dpa;
> +                    uint64_t len2 = ent_start_dpa + ent_len - dpa - len;
> +
> +                    /*
> +                     * TODO: checking for possible extent overflow, will be
> +                     * moved into a dedicated function of detecting extent
> +                     * overflow.
> +                     */
> +                    if (len1 && len2 && ct3d->dc.total_extent_count ==
> +                        CXL_NUM_EXTENTS_SUPPORTED) {
> +                        return CXL_MBOX_RESOURCES_EXHAUSTED;
> +                    }
> +
> +                    found = true;
> +                    cxl_remove_extent_from_extent_list(extent_list, ent);
> +                    ct3d->dc.total_extent_count -= 1;
> +
> +                    if (len1) {
> +                        cxl_insert_extent_to_extent_list(extent_list,
> +                                                         ent_start_dpa, len1,
> +                                                         NULL, 0);
> +                        ct3d->dc.total_extent_count += 1;
> +                    }
> +                    if (len2) {
> +                        cxl_insert_extent_to_extent_list(extent_list, dpa + len,
> +                                                         len2, NULL, 0);
> +                        ct3d->dc.total_extent_count += 1;
> +                    }
> +                    break;
Maybe this makes sense after the support below is added, but at this
point in the series 
			return CXL_MBOX_SUCCESS;
then found isn't relevant so can drop that. Looks like you drop it later in the
series anyway.

> +                } else {
> +                    /*
> +                     * TODO: we reject the attempt to remove an extent that
> +                     * overlaps with multiple extents in the device for now,
> +                     * once the bitmap indicating whether a DPA range is
> +                     * covered by valid extents is introduced, will allow it.
> +                     */
> +                    return CXL_MBOX_INVALID_PA;
> +                }
> +            }
> +        }
> +
> +        if (!found) {
> +            /* Try to remove a non-existing extent. */
> +            return CXL_MBOX_INVALID_PA;
> +        }
> +    }
> +
> +    return CXL_MBOX_SUCCESS;
> +}
> +

>  static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
> diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> index 102fa8151e..dccfaaad3a 100644
> --- a/hw/mem/cxl_type3.c
> +++ b/hw/mem/cxl_type3.c
> @@ -678,6 +678,16 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
>      return true;
>  }
>  
> +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> +{
> +    CXLDCExtent *ent;
> +
> +    while (!QTAILQ_EMPTY(&ct3d->dc.extents)) {
> +        ent = QTAILQ_FIRST(&ct3d->dc.extents);
> +        cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);

Isn't this same a something like.
    QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node)) {
	cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
        //This wrapper is small enough I'd be tempted to just have the
        //code inline at the places it's called.

    }
> +    }
> +}
Fan Ni March 6, 2024, 9:39 p.m. UTC | #2
On Wed, Mar 06, 2024 at 05:28:27PM +0000, Jonathan Cameron wrote:
> On Mon,  4 Mar 2024 11:34:03 -0800
> nifan.cxl@gmail.com wrote:
> 
> > From: Fan Ni <fan.ni@samsung.com>
> > 
> > Per CXL spec 3.1, two mailbox commands are implemented:
> > Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.9.9.3, and
> > Release Dynamic Capacity (Opcode 4803h) 8.2.9.9.9.4.
> > 
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>
> 
> Hmm. So I had a thought which would work for what you
> have here. See include/qemu/range.h
> I like the region merging stuff that is also in the list operators
> but we shouldn't use that because we have other reasons not to
> fuse ranges (sequence numbering etc)
> 
> We could make an extent a wrapper around a struct Range though
> so that we can use the comparison stuff directly.
> + we can use the list manipulation in there as the basis for a future
> extent merging infrastructure that is tag and sequence number (if
> provided - so shared capacity or pmem) aware.
> 
> Jonathan
> 
> 
> > ---
> > +
> > +/*
> > + * CXL r3.1 Table 8-168: Add Dynamic Capacity Response Input Payload
> > + * CXL r3.1 Table 8-170: Release Dynamic Capacity Input Payload
> > + */
> > +typedef struct CXLUpdateDCExtentListInPl {
> > +    uint32_t num_entries_updated;
> > +    uint8_t flags;
> > +    uint8_t rsvd[3];
> > +    /* CXL r3.1 Table 8-169: Updated Extent */
> > +    struct {
> > +        uint64_t start_dpa;
> > +        uint64_t len;
> > +        uint8_t rsvd[8];
> > +    } QEMU_PACKED updated_entries[];
> > +} QEMU_PACKED CXLUpdateDCExtentListInPl;
> > +
> > +/*
> > + * For the extents in the extent list to operate, check whether they are valid
> > + * 1. The extent should be in the range of a valid DC region;
> > + * 2. The extent should not cross multiple regions;
> > + * 3. The start DPA and the length of the extent should align with the block
> > + * size of the region;
> > + * 4. The address range of multiple extents in the list should not overlap.
> 
> Hmm. Interesting.  I was thinking a given add / remove command rather than
> just the extents can't overlap a region.  However I can't find text on that
> so I believe your interpretation is correct. It is only specified for the
> event records, but that is good enough I think.  We might want to propose
> tightening the spec on this to allow devices to say no to such complex
> extent lists. Maybe a nice friendly Memory vendor should query this one if
> it's a potential problem for real devices.  Might not be!
> 
> > + */
> > +static CXLRetCode cxl_detect_malformed_extent_list(CXLType3Dev *ct3d,
> > +        const CXLUpdateDCExtentListInPl *in)
> > +{
> > +    uint64_t min_block_size = UINT64_MAX;
> > +    CXLDCRegion *region = &ct3d->dc.regions[0];
> > +    CXLDCRegion *lastregion = &ct3d->dc.regions[ct3d->dc.num_regions - 1];
> > +    g_autofree unsigned long *blk_bitmap = NULL;
> > +    uint64_t dpa, len;
> > +    uint32_t i;
> > +
> > +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> > +        region = &ct3d->dc.regions[i];
> > +        min_block_size = MIN(min_block_size, region->block_size);
> > +    }
> > +
> > +    blk_bitmap = bitmap_new((lastregion->base + lastregion->len -
> > +                             ct3d->dc.regions[0].base) / min_block_size);
> > +
> > +    for (i = 0; i < in->num_entries_updated; i++) {
> > +        dpa = in->updated_entries[i].start_dpa;
> > +        len = in->updated_entries[i].len;
> > +
> > +        region = cxl_find_dc_region(ct3d, dpa, len);
> > +        if (!region) {
> > +            return CXL_MBOX_INVALID_PA;
> > +        }
> > +
> > +        dpa -= ct3d->dc.regions[0].base;
> > +        if (dpa % region->block_size || len % region->block_size) {
> > +            return CXL_MBOX_INVALID_EXTENT_LIST;
> > +        }
> > +        /* the dpa range already covered by some other extents in the list */
> > +        if (test_any_bits_set(blk_bitmap, dpa / min_block_size,
> > +            len / min_block_size)) {
> > +            return CXL_MBOX_INVALID_EXTENT_LIST;
> > +        }
> > +        bitmap_set(blk_bitmap, dpa / min_block_size, len / min_block_size);
> > +   }
> > +
> > +    return CXL_MBOX_SUCCESS;
> > +}
> > +
> > +/*
> > + * CXL r3.1 section 8.2.9.9.9.3: Add Dynamic Capacity Response (Opcode 4802h)
> > + * An extent is added to the extent list and becomes usable only after the
> > + * response is processed successfully
> > + */
> > +static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd,
> > +                                          uint8_t *payload_in,
> > +                                          size_t len_in,
> > +                                          uint8_t *payload_out,
> > +                                          size_t *len_out,
> > +                                          CXLCCI *cci)
> > +{
> > +    CXLUpdateDCExtentListInPl *in = (void *)payload_in;
> > +    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> > +    CXLDCExtentList *extent_list = &ct3d->dc.extents;
> > +    CXLDCExtent *ent;
> > +    uint32_t i;
> > +    uint64_t dpa, len;
> > +    CXLRetCode ret;
> > +
> > +    if (in->num_entries_updated == 0) {
> > +        return CXL_MBOX_SUCCESS;
> > +    }
> > +
> > +    /* Adding extents causes exceeding device's extent tracking ability. */
> > +    if (in->num_entries_updated + ct3d->dc.total_extent_count >
> > +        CXL_NUM_EXTENTS_SUPPORTED) {
> > +        return CXL_MBOX_RESOURCES_EXHAUSTED;
> > +    }
> > +
> > +    ret = cxl_detect_malformed_extent_list(ct3d, in);
> > +    if (ret != CXL_MBOX_SUCCESS) {
> > +        return ret;
> > +    }
> > +
> > +    for (i = 0; i < in->num_entries_updated; i++) {
> > +        dpa = in->updated_entries[i].start_dpa;
> > +        len = in->updated_entries[i].len;
> > +
> > +        /*
> > +         * Check if the DPA range of the to-be-added extent overlaps with
> > +         * existing extent list maintained by the device.
> > +         */
> > +        QTAILQ_FOREACH(ent, extent_list, node) {
> 
> There are too many checks in here for an overlapping test.
> 
> Conditions are
> 
> 	|  Extent tested against |
> |  Overlap entirely                 |
> | overlap left edge |
>                     | overlap right edge |
> Think of it in the inverse condition and it is easier to reason about.
> 
>               | Extent tested against |
> | to left |---                        ---| to right |
> 
> which I think is something like.
> 
>         if (!((dpa + len <= ent->start_dpa) || (dpa >= ent->start_dpa + ent->len)) {
> 		 return CXL_MBOX_INVALID_PA;
> 	}
> 
> Hmm. For internal tracking (not the exposed values) we should probably use
> struct range from include/qemu/range.h.
> Felt like there had to be something better than doing this ourselves so I went
> looking.  Note it uses inclusive upper bound so be careful with that!
> 
> Advantage is we get this checks for free.
> https://elixir.bootlin.com/qemu/latest/source/include/qemu/range.h#L152
> range_overlaps_range()
> 
> There are functions to set them up nicely for us and by base and size
> as well which should tidy that part up.
> 
> 	    
> 
> > +            if (ent->start_dpa <= dpa &&
> > +                    dpa + len <= ent->start_dpa + ent->len) {
> > +                return CXL_MBOX_INVALID_PA;
> > +            /* Overlapping one end of the other */
> > +            } else if ((dpa < ent->start_dpa + ent->len &&
> > +                        dpa + len > ent->start_dpa + ent->len) ||
> > +                       (dpa < ent->start_dpa && dpa + len > ent->start_dpa)) {
> > +                return CXL_MBOX_INVALID_PA;
> > +            }
> > +        }
> > +
> > +        /*
> > +         * TODO: we will add a pending extent list based on event log record
> > +         * and verify the input response; also, the "More" flag is not
> > +         * considered at the moment.
> > +         */
> > +
> > +        cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
> > +        ct3d->dc.total_extent_count += 1;
> > +    }
> > +
> > +    return CXL_MBOX_SUCCESS;
> > +}
> > +
> > +/*
> > + * CXL r3.1 section 8.2.9.9.9.4: Release Dynamic Capacity (Opcode 4803h)
> > + */
> > +static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd,
> > +                                          uint8_t *payload_in,
> > +                                          size_t len_in,
> > +                                          uint8_t *payload_out,
> > +                                          size_t *len_out,
> > +                                          CXLCCI *cci)
> > +{
> > +    CXLUpdateDCExtentListInPl *in = (void *)payload_in;
> > +    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> > +    CXLDCExtentList *extent_list = &ct3d->dc.extents;
> > +    CXLDCExtent *ent;
> > +    uint32_t i;
> > +    uint64_t dpa, len;
> > +    CXLRetCode ret;
> > +
> > +    if (in->num_entries_updated == 0) {
> > +        return CXL_MBOX_INVALID_INPUT;
> > +    }
> > +
> > +    ret = cxl_detect_malformed_extent_list(ct3d, in);
> > +    if (ret != CXL_MBOX_SUCCESS) {
> > +        return ret;
> > +    }
> > +
> > +    for (i = 0; i < in->num_entries_updated; i++) {
> > +        bool found = false;
> > +
> > +        dpa = in->updated_entries[i].start_dpa;
> > +        len = in->updated_entries[i].len;
> > +
> > +        QTAILQ_FOREACH(ent, extent_list, node) {
> > +            /* Found the extent overlapping with */
> > +            if (ent->start_dpa <= dpa && dpa < ent->start_dpa + ent->len) {
> > +                if (dpa + len <= ent->start_dpa + ent->len) {
> > +                    /*
> > +                     * The incoming extent covers a portion of an extent
> > +                     * in the device extent list, remove only the overlapping
> > +                     * portion, meaning
> > +                     * 1. the portions that are not covered by the incoming
> > +                     *    extent at both end of the original extent will become
> > +                     *    new extents and inserted to the extent list; and
> > +                     * 2. the original extent is removed from the extent list;
> > +                     * 3. DC extent count is updated accordingly.
> > +                     */
> > +                    uint64_t ent_start_dpa = ent->start_dpa;
> > +                    uint64_t ent_len = ent->len;
> > +                    uint64_t len1 = dpa - ent_start_dpa;
> > +                    uint64_t len2 = ent_start_dpa + ent_len - dpa - len;
> > +
> > +                    /*
> > +                     * TODO: checking for possible extent overflow, will be
> > +                     * moved into a dedicated function of detecting extent
> > +                     * overflow.
> > +                     */
> > +                    if (len1 && len2 && ct3d->dc.total_extent_count ==
> > +                        CXL_NUM_EXTENTS_SUPPORTED) {
> > +                        return CXL_MBOX_RESOURCES_EXHAUSTED;
> > +                    }
> > +
> > +                    found = true;
> > +                    cxl_remove_extent_from_extent_list(extent_list, ent);
> > +                    ct3d->dc.total_extent_count -= 1;
> > +
> > +                    if (len1) {
> > +                        cxl_insert_extent_to_extent_list(extent_list,
> > +                                                         ent_start_dpa, len1,
> > +                                                         NULL, 0);
> > +                        ct3d->dc.total_extent_count += 1;
> > +                    }
> > +                    if (len2) {
> > +                        cxl_insert_extent_to_extent_list(extent_list, dpa + len,
> > +                                                         len2, NULL, 0);
> > +                        ct3d->dc.total_extent_count += 1;
> > +                    }
> > +                    break;
> Maybe this makes sense after the support below is added, but at this
> point in the series 
> 			return CXL_MBOX_SUCCESS;
> then found isn't relevant so can drop that. Looks like you drop it later in the
> series anyway.

We cannot return directly as we have more extents to release.
One thing I think I need to add is a dry run to test if any extent in
the income list is not contained by an extent in the extent list and
return error before starting to do the real release. The spec just says
we need to return invalid PA but not specify whether we should update the list
until we found a "bad" extent or reject the request directly. Current code
leaves a situation where we may have updated the extent list until we found a
"bad" extent to release.

> 
> > +                } else {
> > +                    /*
> > +                     * TODO: we reject the attempt to remove an extent that
> > +                     * overlaps with multiple extents in the device for now,
> > +                     * once the bitmap indicating whether a DPA range is
> > +                     * covered by valid extents is introduced, will allow it.
> > +                     */
> > +                    return CXL_MBOX_INVALID_PA;
> > +                }
> > +            }
> > +        }
> > +
> > +        if (!found) {
> > +            /* Try to remove a non-existing extent. */
> > +            return CXL_MBOX_INVALID_PA;
> > +        }
> > +    }
> > +
> > +    return CXL_MBOX_SUCCESS;
> > +}
> > +
> 
> >  static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index 102fa8151e..dccfaaad3a 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -678,6 +678,16 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
> >      return true;
> >  }
> >  
> > +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> > +{
> > +    CXLDCExtent *ent;
> > +
> > +    while (!QTAILQ_EMPTY(&ct3d->dc.extents)) {
> > +        ent = QTAILQ_FIRST(&ct3d->dc.extents);
> > +        cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
> 
> Isn't this same a something like.
>     QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node)) {
> 	cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
>         //This wrapper is small enough I'd be tempted to just have the
>         //code inline at the places it's called.
Good point, will update.

Fan
> 
>     }
> > +    }
> > +}
>
Fan Ni March 6, 2024, 10:34 p.m. UTC | #3
On Wed, Mar 06, 2024 at 05:28:27PM +0000, Jonathan Cameron wrote:
> On Mon,  4 Mar 2024 11:34:03 -0800
> nifan.cxl@gmail.com wrote:
> 
> > From: Fan Ni <fan.ni@samsung.com>
> > 
> > Per CXL spec 3.1, two mailbox commands are implemented:
> > Add Dynamic Capacity Response (Opcode 4802h) 8.2.9.9.9.3, and
> > Release Dynamic Capacity (Opcode 4803h) 8.2.9.9.9.4.
> > 
> > Signed-off-by: Fan Ni <fan.ni@samsung.com>
> 
> Hmm. So I had a thought which would work for what you
> have here. See include/qemu/range.h
> I like the region merging stuff that is also in the list operators
> but we shouldn't use that because we have other reasons not to
> fuse ranges (sequence numbering etc)
> 
> We could make an extent a wrapper around a struct Range though
> so that we can use the comparison stuff directly.
> + we can use the list manipulation in there as the basis for a future
> extent merging infrastructure that is tag and sequence number (if
> provided - so shared capacity or pmem) aware.
> 
> Jonathan
> 
> 
> > ---
> > +
> > +/*
> > + * CXL r3.1 Table 8-168: Add Dynamic Capacity Response Input Payload
> > + * CXL r3.1 Table 8-170: Release Dynamic Capacity Input Payload
> > + */
> > +typedef struct CXLUpdateDCExtentListInPl {
> > +    uint32_t num_entries_updated;
> > +    uint8_t flags;
> > +    uint8_t rsvd[3];
> > +    /* CXL r3.1 Table 8-169: Updated Extent */
> > +    struct {
> > +        uint64_t start_dpa;
> > +        uint64_t len;
> > +        uint8_t rsvd[8];
> > +    } QEMU_PACKED updated_entries[];
> > +} QEMU_PACKED CXLUpdateDCExtentListInPl;
> > +
> > +/*
> > + * For the extents in the extent list to operate, check whether they are valid
> > + * 1. The extent should be in the range of a valid DC region;
> > + * 2. The extent should not cross multiple regions;
> > + * 3. The start DPA and the length of the extent should align with the block
> > + * size of the region;
> > + * 4. The address range of multiple extents in the list should not overlap.
> 
> Hmm. Interesting.  I was thinking a given add / remove command rather than
> just the extents can't overlap a region.  However I can't find text on that
> so I believe your interpretation is correct. It is only specified for the
> event records, but that is good enough I think.  We might want to propose
> tightening the spec on this to allow devices to say no to such complex
> extent lists. Maybe a nice friendly Memory vendor should query this one if
> it's a potential problem for real devices.  Might not be!
> 
> > + */
> > +static CXLRetCode cxl_detect_malformed_extent_list(CXLType3Dev *ct3d,
> > +        const CXLUpdateDCExtentListInPl *in)
> > +{
> > +    uint64_t min_block_size = UINT64_MAX;
> > +    CXLDCRegion *region = &ct3d->dc.regions[0];
> > +    CXLDCRegion *lastregion = &ct3d->dc.regions[ct3d->dc.num_regions - 1];
> > +    g_autofree unsigned long *blk_bitmap = NULL;
> > +    uint64_t dpa, len;
> > +    uint32_t i;
> > +
> > +    for (i = 0; i < ct3d->dc.num_regions; i++) {
> > +        region = &ct3d->dc.regions[i];
> > +        min_block_size = MIN(min_block_size, region->block_size);
> > +    }
> > +
> > +    blk_bitmap = bitmap_new((lastregion->base + lastregion->len -
> > +                             ct3d->dc.regions[0].base) / min_block_size);
> > +
> > +    for (i = 0; i < in->num_entries_updated; i++) {
> > +        dpa = in->updated_entries[i].start_dpa;
> > +        len = in->updated_entries[i].len;
> > +
> > +        region = cxl_find_dc_region(ct3d, dpa, len);
> > +        if (!region) {
> > +            return CXL_MBOX_INVALID_PA;
> > +        }
> > +
> > +        dpa -= ct3d->dc.regions[0].base;
> > +        if (dpa % region->block_size || len % region->block_size) {
> > +            return CXL_MBOX_INVALID_EXTENT_LIST;
> > +        }
> > +        /* the dpa range already covered by some other extents in the list */
> > +        if (test_any_bits_set(blk_bitmap, dpa / min_block_size,
> > +            len / min_block_size)) {
> > +            return CXL_MBOX_INVALID_EXTENT_LIST;
> > +        }
> > +        bitmap_set(blk_bitmap, dpa / min_block_size, len / min_block_size);
> > +   }
> > +
> > +    return CXL_MBOX_SUCCESS;
> > +}
> > +
> > +/*
> > + * CXL r3.1 section 8.2.9.9.9.3: Add Dynamic Capacity Response (Opcode 4802h)
> > + * An extent is added to the extent list and becomes usable only after the
> > + * response is processed successfully
> > + */
> > +static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd,
> > +                                          uint8_t *payload_in,
> > +                                          size_t len_in,
> > +                                          uint8_t *payload_out,
> > +                                          size_t *len_out,
> > +                                          CXLCCI *cci)
> > +{
> > +    CXLUpdateDCExtentListInPl *in = (void *)payload_in;
> > +    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> > +    CXLDCExtentList *extent_list = &ct3d->dc.extents;
> > +    CXLDCExtent *ent;
> > +    uint32_t i;
> > +    uint64_t dpa, len;
> > +    CXLRetCode ret;
> > +
> > +    if (in->num_entries_updated == 0) {
> > +        return CXL_MBOX_SUCCESS;
> > +    }
> > +
> > +    /* Adding extents causes exceeding device's extent tracking ability. */
> > +    if (in->num_entries_updated + ct3d->dc.total_extent_count >
> > +        CXL_NUM_EXTENTS_SUPPORTED) {
> > +        return CXL_MBOX_RESOURCES_EXHAUSTED;
> > +    }
> > +
> > +    ret = cxl_detect_malformed_extent_list(ct3d, in);
> > +    if (ret != CXL_MBOX_SUCCESS) {
> > +        return ret;
> > +    }
> > +
> > +    for (i = 0; i < in->num_entries_updated; i++) {
> > +        dpa = in->updated_entries[i].start_dpa;
> > +        len = in->updated_entries[i].len;
> > +
> > +        /*
> > +         * Check if the DPA range of the to-be-added extent overlaps with
> > +         * existing extent list maintained by the device.
> > +         */
> > +        QTAILQ_FOREACH(ent, extent_list, node) {
> 
> There are too many checks in here for an overlapping test.
> 
> Conditions are
> 
> 	|  Extent tested against |
> |  Overlap entirely                 |
> | overlap left edge |
>                     | overlap right edge |
> Think of it in the inverse condition and it is easier to reason about.
> 
>               | Extent tested against |
> | to left |---                        ---| to right |
> 
> which I think is something like.
> 
>         if (!((dpa + len <= ent->start_dpa) || (dpa >= ent->start_dpa + ent->len)) {
> 		 return CXL_MBOX_INVALID_PA;
> 	}
> 
> Hmm. For internal tracking (not the exposed values) we should probably use
> struct range from include/qemu/range.h.
> Felt like there had to be something better than doing this ourselves so I went
> looking.  Note it uses inclusive upper bound so be careful with that!
> 
> Advantage is we get this checks for free.
> https://elixir.bootlin.com/qemu/latest/source/include/qemu/range.h#L152
> range_overlaps_range()
> 
> There are functions to set them up nicely for us and by base and size
> as well which should tidy that part up.
> 
> 	    
> 
> > +            if (ent->start_dpa <= dpa &&
> > +                    dpa + len <= ent->start_dpa + ent->len) {
> > +                return CXL_MBOX_INVALID_PA;
> > +            /* Overlapping one end of the other */
> > +            } else if ((dpa < ent->start_dpa + ent->len &&
> > +                        dpa + len > ent->start_dpa + ent->len) ||
> > +                       (dpa < ent->start_dpa && dpa + len > ent->start_dpa)) {
> > +                return CXL_MBOX_INVALID_PA;
> > +            }
> > +        }
> > +
> > +        /*
> > +         * TODO: we will add a pending extent list based on event log record
> > +         * and verify the input response; also, the "More" flag is not
> > +         * considered at the moment.
> > +         */
> > +
> > +        cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
> > +        ct3d->dc.total_extent_count += 1;
> > +    }
> > +
> > +    return CXL_MBOX_SUCCESS;
> > +}
> > +
> > +/*
> > + * CXL r3.1 section 8.2.9.9.9.4: Release Dynamic Capacity (Opcode 4803h)
> > + */
> > +static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd,
> > +                                          uint8_t *payload_in,
> > +                                          size_t len_in,
> > +                                          uint8_t *payload_out,
> > +                                          size_t *len_out,
> > +                                          CXLCCI *cci)
> > +{
> > +    CXLUpdateDCExtentListInPl *in = (void *)payload_in;
> > +    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
> > +    CXLDCExtentList *extent_list = &ct3d->dc.extents;
> > +    CXLDCExtent *ent;
> > +    uint32_t i;
> > +    uint64_t dpa, len;
> > +    CXLRetCode ret;
> > +
> > +    if (in->num_entries_updated == 0) {
> > +        return CXL_MBOX_INVALID_INPUT;
> > +    }
> > +
> > +    ret = cxl_detect_malformed_extent_list(ct3d, in);
> > +    if (ret != CXL_MBOX_SUCCESS) {
> > +        return ret;
> > +    }
> > +
> > +    for (i = 0; i < in->num_entries_updated; i++) {
> > +        bool found = false;
> > +
> > +        dpa = in->updated_entries[i].start_dpa;
> > +        len = in->updated_entries[i].len;
> > +
> > +        QTAILQ_FOREACH(ent, extent_list, node) {
> > +            /* Found the extent overlapping with */
> > +            if (ent->start_dpa <= dpa && dpa < ent->start_dpa + ent->len) {
> > +                if (dpa + len <= ent->start_dpa + ent->len) {
> > +                    /*
> > +                     * The incoming extent covers a portion of an extent
> > +                     * in the device extent list, remove only the overlapping
> > +                     * portion, meaning
> > +                     * 1. the portions that are not covered by the incoming
> > +                     *    extent at both end of the original extent will become
> > +                     *    new extents and inserted to the extent list; and
> > +                     * 2. the original extent is removed from the extent list;
> > +                     * 3. DC extent count is updated accordingly.
> > +                     */
> > +                    uint64_t ent_start_dpa = ent->start_dpa;
> > +                    uint64_t ent_len = ent->len;
> > +                    uint64_t len1 = dpa - ent_start_dpa;
> > +                    uint64_t len2 = ent_start_dpa + ent_len - dpa - len;
> > +
> > +                    /*
> > +                     * TODO: checking for possible extent overflow, will be
> > +                     * moved into a dedicated function of detecting extent
> > +                     * overflow.
> > +                     */
> > +                    if (len1 && len2 && ct3d->dc.total_extent_count ==
> > +                        CXL_NUM_EXTENTS_SUPPORTED) {
> > +                        return CXL_MBOX_RESOURCES_EXHAUSTED;
> > +                    }
> > +
> > +                    found = true;
> > +                    cxl_remove_extent_from_extent_list(extent_list, ent);
> > +                    ct3d->dc.total_extent_count -= 1;
> > +
> > +                    if (len1) {
> > +                        cxl_insert_extent_to_extent_list(extent_list,
> > +                                                         ent_start_dpa, len1,
> > +                                                         NULL, 0);
> > +                        ct3d->dc.total_extent_count += 1;
> > +                    }
> > +                    if (len2) {
> > +                        cxl_insert_extent_to_extent_list(extent_list, dpa + len,
> > +                                                         len2, NULL, 0);
> > +                        ct3d->dc.total_extent_count += 1;
> > +                    }
> > +                    break;
> Maybe this makes sense after the support below is added, but at this
> point in the series 
> 			return CXL_MBOX_SUCCESS;
> then found isn't relevant so can drop that. Looks like you drop it later in the
> series anyway.
> 
> > +                } else {
> > +                    /*
> > +                     * TODO: we reject the attempt to remove an extent that
> > +                     * overlaps with multiple extents in the device for now,
> > +                     * once the bitmap indicating whether a DPA range is
> > +                     * covered by valid extents is introduced, will allow it.
> > +                     */
> > +                    return CXL_MBOX_INVALID_PA;
> > +                }
> > +            }
> > +        }
> > +
> > +        if (!found) {
> > +            /* Try to remove a non-existing extent. */
> > +            return CXL_MBOX_INVALID_PA;
> > +        }
> > +    }
> > +
> > +    return CXL_MBOX_SUCCESS;
> > +}
> > +
> 
> >  static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
> > diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
> > index 102fa8151e..dccfaaad3a 100644
> > --- a/hw/mem/cxl_type3.c
> > +++ b/hw/mem/cxl_type3.c
> > @@ -678,6 +678,16 @@ static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
> >      return true;
> >  }
> >  
> > +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> > +{
> > +    CXLDCExtent *ent;
> > +
> > +    while (!QTAILQ_EMPTY(&ct3d->dc.extents)) {
> > +        ent = QTAILQ_FIRST(&ct3d->dc.extents);
> > +        cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
> 
> Isn't this same a something like.
>     QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node)) {
> 	cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
>         //This wrapper is small enough I'd be tempted to just have the
>         //code inline at the places it's called.
> 
We will have more to release after we introduce pending list as well as
bitmap. Keep it?

Fan

>     }
> > +    }
> > +}
>
Jonathan Cameron March 7, 2024, 12:20 p.m. UTC | #4
On Wed, 6 Mar 2024 13:39:50 -0800
fan <nifan.cxl@gmail.com> wrote:



> > > +                    }
> > > +                    if (len2) {
> > > +                        cxl_insert_extent_to_extent_list(extent_list, dpa + len,
> > > +                                                         len2, NULL, 0);
> > > +                        ct3d->dc.total_extent_count += 1;
> > > +                    }
> > > +                    break;  
> > Maybe this makes sense after the support below is added, but at this
> > point in the series 
> > 			return CXL_MBOX_SUCCESS;
> > then found isn't relevant so can drop that. Looks like you drop it later in the
> > series anyway.  
> 
> We cannot return directly as we have more extents to release.

Ah good point. I'd missed the double loop.

> One thing I think I need to add is a dry run to test if any extent in
> the income list is not contained by an extent in the extent list and
> return error before starting to do the real release. The spec just says
> we need to return invalid PA but not specify whether we should update the list
> until we found a "bad" extent or reject the request directly. Current code
> leaves a situation where we may have updated the extent list until we found a
> "bad" extent to release.

Yes, I'm not sure on the correct answer to this either. My assumption is that in
error cases there are no side effects, but I don't see a clear statement of that.

So I think we are in the world of best practice, not spec compliance.
If we wanted to recover from such an error case we'd have to verify the current
extent list.  I'll fire off a question to relevant folk in appropriate forum.

Jonathan
Jonathan Cameron March 7, 2024, 12:30 p.m. UTC | #5
> > > +static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
> > > +{
> > > +    CXLDCExtent *ent;
> > > +
> > > +    while (!QTAILQ_EMPTY(&ct3d->dc.extents)) {
> > > +        ent = QTAILQ_FIRST(&ct3d->dc.extents);
> > > +        cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);  
> > 
> > Isn't this same a something like.
> >     QTAILQ_FOREACH_SAFE(ent, &ct3d->dc.extents, node)) {
> > 	cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
> >         //This wrapper is small enough I'd be tempted to just have the
> >         //code inline at the places it's called.
> >   
> We will have more to release after we introduce pending list as well as
> bitmap. Keep it?
ok. 
> 
> Fan
> 
> >     }  
> > > +    }
> > > +}  
> >
diff mbox series

Patch

diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c
index 425b378a2c..8c59635a9f 100644
--- a/hw/cxl/cxl-mailbox-utils.c
+++ b/hw/cxl/cxl-mailbox-utils.c
@@ -85,6 +85,8 @@  enum {
     DCD_CONFIG  = 0x48,
         #define GET_DC_CONFIG          0x0
         #define GET_DYN_CAP_EXT_LIST   0x1
+        #define ADD_DYN_CAP_RSP        0x2
+        #define RELEASE_DYN_CAP        0x3
     PHYSICAL_SWITCH = 0x51,
         #define IDENTIFY_SWITCH_DEVICE      0x0
         #define GET_PHYSICAL_PORT_STATE     0x1
@@ -1399,6 +1401,308 @@  static CXLRetCode cmd_dcd_get_dyn_cap_ext_list(const struct cxl_cmd *cmd,
     return CXL_MBOX_SUCCESS;
 }
 
+/*
+ * Check whether any bit between addr[nr, nr+size) is set,
+ * return true if any bit is set, otherwise return false
+ */
+static bool test_any_bits_set(const unsigned long *addr, unsigned long nr,
+                              unsigned long size)
+{
+    unsigned long res = find_next_bit(addr, size + nr, nr);
+
+    return res < nr + size;
+}
+
+CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len)
+{
+    int i;
+    CXLDCRegion *region = &ct3d->dc.regions[0];
+
+    if (dpa < region->base ||
+        dpa >= region->base + ct3d->dc.total_capacity) {
+        return NULL;
+    }
+
+    /*
+     * CXL r3.1 section 9.13.3: Dynamic Capacity Device (DCD)
+     *
+     * Regions are used in increasing-DPA order, with Region 0 being used for
+     * the lowest DPA of Dynamic Capacity and Region 7 for the highest DPA.
+     * So check from the last region to find where the dpa belongs. Extents that
+     * cross multiple regions are not allowed.
+     */
+    for (i = ct3d->dc.num_regions - 1; i >= 0; i--) {
+        region = &ct3d->dc.regions[i];
+        if (dpa >= region->base) {
+            if (dpa + len > region->base + region->len) {
+                return NULL;
+            }
+            return region;
+        }
+    }
+
+    return NULL;
+}
+
+static void cxl_insert_extent_to_extent_list(CXLDCExtentList *list,
+                                             uint64_t dpa,
+                                             uint64_t len,
+                                             uint8_t *tag,
+                                             uint16_t shared_seq)
+{
+    CXLDCExtent *extent;
+
+    extent = g_new0(CXLDCExtent, 1);
+    extent->start_dpa = dpa;
+    extent->len = len;
+    if (tag) {
+        memcpy(extent->tag, tag, 0x10);
+    }
+    extent->shared_seq = shared_seq;
+
+    QTAILQ_INSERT_TAIL(list, extent, node);
+}
+
+void cxl_remove_extent_from_extent_list(CXLDCExtentList *list,
+                                        CXLDCExtent *extent)
+{
+    QTAILQ_REMOVE(list, extent, node);
+    g_free(extent);
+}
+
+/*
+ * CXL r3.1 Table 8-168: Add Dynamic Capacity Response Input Payload
+ * CXL r3.1 Table 8-170: Release Dynamic Capacity Input Payload
+ */
+typedef struct CXLUpdateDCExtentListInPl {
+    uint32_t num_entries_updated;
+    uint8_t flags;
+    uint8_t rsvd[3];
+    /* CXL r3.1 Table 8-169: Updated Extent */
+    struct {
+        uint64_t start_dpa;
+        uint64_t len;
+        uint8_t rsvd[8];
+    } QEMU_PACKED updated_entries[];
+} QEMU_PACKED CXLUpdateDCExtentListInPl;
+
+/*
+ * For the extents in the extent list to operate, check whether they are valid
+ * 1. The extent should be in the range of a valid DC region;
+ * 2. The extent should not cross multiple regions;
+ * 3. The start DPA and the length of the extent should align with the block
+ * size of the region;
+ * 4. The address range of multiple extents in the list should not overlap.
+ */
+static CXLRetCode cxl_detect_malformed_extent_list(CXLType3Dev *ct3d,
+        const CXLUpdateDCExtentListInPl *in)
+{
+    uint64_t min_block_size = UINT64_MAX;
+    CXLDCRegion *region = &ct3d->dc.regions[0];
+    CXLDCRegion *lastregion = &ct3d->dc.regions[ct3d->dc.num_regions - 1];
+    g_autofree unsigned long *blk_bitmap = NULL;
+    uint64_t dpa, len;
+    uint32_t i;
+
+    for (i = 0; i < ct3d->dc.num_regions; i++) {
+        region = &ct3d->dc.regions[i];
+        min_block_size = MIN(min_block_size, region->block_size);
+    }
+
+    blk_bitmap = bitmap_new((lastregion->base + lastregion->len -
+                             ct3d->dc.regions[0].base) / min_block_size);
+
+    for (i = 0; i < in->num_entries_updated; i++) {
+        dpa = in->updated_entries[i].start_dpa;
+        len = in->updated_entries[i].len;
+
+        region = cxl_find_dc_region(ct3d, dpa, len);
+        if (!region) {
+            return CXL_MBOX_INVALID_PA;
+        }
+
+        dpa -= ct3d->dc.regions[0].base;
+        if (dpa % region->block_size || len % region->block_size) {
+            return CXL_MBOX_INVALID_EXTENT_LIST;
+        }
+        /* the dpa range already covered by some other extents in the list */
+        if (test_any_bits_set(blk_bitmap, dpa / min_block_size,
+            len / min_block_size)) {
+            return CXL_MBOX_INVALID_EXTENT_LIST;
+        }
+        bitmap_set(blk_bitmap, dpa / min_block_size, len / min_block_size);
+   }
+
+    return CXL_MBOX_SUCCESS;
+}
+
+/*
+ * CXL r3.1 section 8.2.9.9.9.3: Add Dynamic Capacity Response (Opcode 4802h)
+ * An extent is added to the extent list and becomes usable only after the
+ * response is processed successfully
+ */
+static CXLRetCode cmd_dcd_add_dyn_cap_rsp(const struct cxl_cmd *cmd,
+                                          uint8_t *payload_in,
+                                          size_t len_in,
+                                          uint8_t *payload_out,
+                                          size_t *len_out,
+                                          CXLCCI *cci)
+{
+    CXLUpdateDCExtentListInPl *in = (void *)payload_in;
+    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+    CXLDCExtentList *extent_list = &ct3d->dc.extents;
+    CXLDCExtent *ent;
+    uint32_t i;
+    uint64_t dpa, len;
+    CXLRetCode ret;
+
+    if (in->num_entries_updated == 0) {
+        return CXL_MBOX_SUCCESS;
+    }
+
+    /* Adding extents causes exceeding device's extent tracking ability. */
+    if (in->num_entries_updated + ct3d->dc.total_extent_count >
+        CXL_NUM_EXTENTS_SUPPORTED) {
+        return CXL_MBOX_RESOURCES_EXHAUSTED;
+    }
+
+    ret = cxl_detect_malformed_extent_list(ct3d, in);
+    if (ret != CXL_MBOX_SUCCESS) {
+        return ret;
+    }
+
+    for (i = 0; i < in->num_entries_updated; i++) {
+        dpa = in->updated_entries[i].start_dpa;
+        len = in->updated_entries[i].len;
+
+        /*
+         * Check if the DPA range of the to-be-added extent overlaps with
+         * existing extent list maintained by the device.
+         */
+        QTAILQ_FOREACH(ent, extent_list, node) {
+            if (ent->start_dpa <= dpa &&
+                    dpa + len <= ent->start_dpa + ent->len) {
+                return CXL_MBOX_INVALID_PA;
+            /* Overlapping one end of the other */
+            } else if ((dpa < ent->start_dpa + ent->len &&
+                        dpa + len > ent->start_dpa + ent->len) ||
+                       (dpa < ent->start_dpa && dpa + len > ent->start_dpa)) {
+                return CXL_MBOX_INVALID_PA;
+            }
+        }
+
+        /*
+         * TODO: we will add a pending extent list based on event log record
+         * and verify the input response; also, the "More" flag is not
+         * considered at the moment.
+         */
+
+        cxl_insert_extent_to_extent_list(extent_list, dpa, len, NULL, 0);
+        ct3d->dc.total_extent_count += 1;
+    }
+
+    return CXL_MBOX_SUCCESS;
+}
+
+/*
+ * CXL r3.1 section 8.2.9.9.9.4: Release Dynamic Capacity (Opcode 4803h)
+ */
+static CXLRetCode cmd_dcd_release_dyn_cap(const struct cxl_cmd *cmd,
+                                          uint8_t *payload_in,
+                                          size_t len_in,
+                                          uint8_t *payload_out,
+                                          size_t *len_out,
+                                          CXLCCI *cci)
+{
+    CXLUpdateDCExtentListInPl *in = (void *)payload_in;
+    CXLType3Dev *ct3d = CXL_TYPE3(cci->d);
+    CXLDCExtentList *extent_list = &ct3d->dc.extents;
+    CXLDCExtent *ent;
+    uint32_t i;
+    uint64_t dpa, len;
+    CXLRetCode ret;
+
+    if (in->num_entries_updated == 0) {
+        return CXL_MBOX_INVALID_INPUT;
+    }
+
+    ret = cxl_detect_malformed_extent_list(ct3d, in);
+    if (ret != CXL_MBOX_SUCCESS) {
+        return ret;
+    }
+
+    for (i = 0; i < in->num_entries_updated; i++) {
+        bool found = false;
+
+        dpa = in->updated_entries[i].start_dpa;
+        len = in->updated_entries[i].len;
+
+        QTAILQ_FOREACH(ent, extent_list, node) {
+            /* Found the extent overlapping with */
+            if (ent->start_dpa <= dpa && dpa < ent->start_dpa + ent->len) {
+                if (dpa + len <= ent->start_dpa + ent->len) {
+                    /*
+                     * The incoming extent covers a portion of an extent
+                     * in the device extent list, remove only the overlapping
+                     * portion, meaning
+                     * 1. the portions that are not covered by the incoming
+                     *    extent at both end of the original extent will become
+                     *    new extents and inserted to the extent list; and
+                     * 2. the original extent is removed from the extent list;
+                     * 3. DC extent count is updated accordingly.
+                     */
+                    uint64_t ent_start_dpa = ent->start_dpa;
+                    uint64_t ent_len = ent->len;
+                    uint64_t len1 = dpa - ent_start_dpa;
+                    uint64_t len2 = ent_start_dpa + ent_len - dpa - len;
+
+                    /*
+                     * TODO: checking for possible extent overflow, will be
+                     * moved into a dedicated function of detecting extent
+                     * overflow.
+                     */
+                    if (len1 && len2 && ct3d->dc.total_extent_count ==
+                        CXL_NUM_EXTENTS_SUPPORTED) {
+                        return CXL_MBOX_RESOURCES_EXHAUSTED;
+                    }
+
+                    found = true;
+                    cxl_remove_extent_from_extent_list(extent_list, ent);
+                    ct3d->dc.total_extent_count -= 1;
+
+                    if (len1) {
+                        cxl_insert_extent_to_extent_list(extent_list,
+                                                         ent_start_dpa, len1,
+                                                         NULL, 0);
+                        ct3d->dc.total_extent_count += 1;
+                    }
+                    if (len2) {
+                        cxl_insert_extent_to_extent_list(extent_list, dpa + len,
+                                                         len2, NULL, 0);
+                        ct3d->dc.total_extent_count += 1;
+                    }
+                    break;
+                } else {
+                    /*
+                     * TODO: we reject the attempt to remove an extent that
+                     * overlaps with multiple extents in the device for now,
+                     * once the bitmap indicating whether a DPA range is
+                     * covered by valid extents is introduced, will allow it.
+                     */
+                    return CXL_MBOX_INVALID_PA;
+                }
+            }
+        }
+
+        if (!found) {
+            /* Try to remove a non-existing extent. */
+            return CXL_MBOX_INVALID_PA;
+        }
+    }
+
+    return CXL_MBOX_SUCCESS;
+}
+
 #define IMMEDIATE_CONFIG_CHANGE (1 << 1)
 #define IMMEDIATE_DATA_CHANGE (1 << 2)
 #define IMMEDIATE_POLICY_CHANGE (1 << 3)
@@ -1449,6 +1753,12 @@  static const struct cxl_cmd cxl_cmd_set_dcd[256][256] = {
     [DCD_CONFIG][GET_DYN_CAP_EXT_LIST] = {
         "DCD_GET_DYNAMIC_CAPACITY_EXTENT_LIST", cmd_dcd_get_dyn_cap_ext_list,
         8, 0 },
+    [DCD_CONFIG][ADD_DYN_CAP_RSP] = {
+        "DCD_ADD_DYNAMIC_CAPACITY_RESPONSE", cmd_dcd_add_dyn_cap_rsp,
+        ~0, IMMEDIATE_DATA_CHANGE },
+    [DCD_CONFIG][RELEASE_DYN_CAP] = {
+        "DCD_RELEASE_DYNAMIC_CAPACITY", cmd_dcd_release_dyn_cap,
+        ~0, IMMEDIATE_DATA_CHANGE },
 };
 
 static const struct cxl_cmd cxl_cmd_set_sw[256][256] = {
diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c
index 102fa8151e..dccfaaad3a 100644
--- a/hw/mem/cxl_type3.c
+++ b/hw/mem/cxl_type3.c
@@ -678,6 +678,16 @@  static bool cxl_create_dc_regions(CXLType3Dev *ct3d, Error **errp)
     return true;
 }
 
+static void cxl_destroy_dc_regions(CXLType3Dev *ct3d)
+{
+    CXLDCExtent *ent;
+
+    while (!QTAILQ_EMPTY(&ct3d->dc.extents)) {
+        ent = QTAILQ_FIRST(&ct3d->dc.extents);
+        cxl_remove_extent_from_extent_list(&ct3d->dc.extents, ent);
+    }
+}
+
 static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp)
 {
     DeviceState *ds = DEVICE(ct3d);
@@ -874,6 +884,7 @@  err_free_special_ops:
     g_free(regs->special_ops);
 err_address_space_free:
     if (ct3d->dc.host_dc) {
+        cxl_destroy_dc_regions(ct3d);
         address_space_destroy(&ct3d->dc.host_dc_as);
     }
     if (ct3d->hostpmem) {
@@ -895,6 +906,7 @@  static void ct3_exit(PCIDevice *pci_dev)
     cxl_doe_cdat_release(cxl_cstate);
     g_free(regs->special_ops);
     if (ct3d->dc.host_dc) {
+        cxl_destroy_dc_regions(ct3d);
         address_space_destroy(&ct3d->dc.host_dc_as);
     }
     if (ct3d->hostpmem) {
diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h
index 8148bcc34b..341260e6e4 100644
--- a/include/hw/cxl/cxl_device.h
+++ b/include/hw/cxl/cxl_device.h
@@ -547,4 +547,8 @@  void cxl_event_irq_assert(CXLType3Dev *ct3d);
 
 void cxl_set_poison_list_overflowed(CXLType3Dev *ct3d);
 
+CXLDCRegion *cxl_find_dc_region(CXLType3Dev *ct3d, uint64_t dpa, uint64_t len);
+
+void cxl_remove_extent_from_extent_list(CXLDCExtentList *list,
+                                        CXLDCExtent *extent);
 #endif