@@ -40,6 +40,9 @@
* sriov_vi_flexible=<N[optional]> \
* sriov_max_vi_per_vf=<N[optional]> \
* sriov_max_vq_per_vf=<N[optional]> \
+ * atomic.dn=<on|off[optional]>, \
+ * atomic.awun<N[optional]>, \
+ * atomic.awupf<N[optional]>, \
* subsys=<subsys_id>
* -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\
* zoned=<true|false[optional]>, \
@@ -254,6 +257,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
[NVME_ERROR_RECOVERY] = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
[NVME_VOLATILE_WRITE_CACHE] = NVME_FEAT_CAP_CHANGE,
[NVME_NUMBER_OF_QUEUES] = NVME_FEAT_CAP_CHANGE,
+ [NVME_WRITE_ATOMICITY] = NVME_FEAT_CAP_CHANGE,
[NVME_ASYNCHRONOUS_EVENT_CONF] = NVME_FEAT_CAP_CHANGE,
[NVME_TIMESTAMP] = NVME_FEAT_CAP_CHANGE,
[NVME_HOST_BEHAVIOR_SUPPORT] = NVME_FEAT_CAP_CHANGE,
@@ -6309,8 +6313,10 @@ defaults:
if (ret) {
return ret;
}
- goto out;
+ break;
+ case NVME_WRITE_ATOMICITY:
+ result = n->dn;
break;
default:
result = nvme_feature_default[fid];
@@ -6394,6 +6400,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
uint8_t save = NVME_SETFEAT_SAVE(dw10);
uint16_t status;
int i;
+ NvmeIdCtrl *id = &n->id_ctrl;
+ NvmeAtomic *atomic = &n->atomic;
trace_pci_nvme_setfeat(nvme_cid(req), nsid, fid, save, dw11);
@@ -6546,6 +6554,22 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
return NVME_CMD_SEQ_ERROR | NVME_DNR;
case NVME_FDP_EVENTS:
return nvme_set_feature_fdp_events(n, ns, req);
+ case NVME_WRITE_ATOMICITY:
+
+ n->dn = 0x1 & dw11;
+
+ if (n->dn) {
+ atomic->atomic_max_write_size = le16_to_cpu(id->awupf) + 1;
+ } else {
+ atomic->atomic_max_write_size = le16_to_cpu(id->awun) + 1;
+ }
+
+ if (atomic->atomic_max_write_size == 1) {
+ atomic->atomic_writes = 0;
+ } else {
+ atomic->atomic_writes = 1;
+ }
+ break;
default:
return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
}
@@ -7243,6 +7267,81 @@ static void nvme_update_sq_tail(NvmeSQueue *sq)
trace_pci_nvme_update_sq_tail(sq->sqid, sq->tail);
}
+#define NVME_ATOMIC_NO_START 0
+#define NVME_ATOMIC_START_ATOMIC 1
+#define NVME_ATOMIC_START_NONATOMIC 2
+
+static int nvme_atomic_write_check(NvmeCtrl *n, NvmeCmd *cmd,
+ NvmeAtomic *atomic)
+{
+ NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
+ uint64_t slba = le64_to_cpu(rw->slba);
+ uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb);
+ uint64_t elba = slba + nlb;
+ bool cmd_atomic_wr = true;
+ int i;
+
+ if ((cmd->opcode == NVME_CMD_READ) || ((cmd->opcode == NVME_CMD_WRITE) &&
+ ((rw->nlb + 1) > atomic->atomic_max_write_size))) {
+ cmd_atomic_wr = false;
+ }
+
+ /*
+ * Walk the queues to see if there are any atomic conflicts.
+ */
+ for (i = 1; i < n->params.max_ioqpairs + 1; i++) {
+ NvmeSQueue *sq;
+ NvmeRequest *req;
+ NvmeRwCmd *req_rw;
+ uint64_t req_slba;
+ uint32_t req_nlb;
+ uint64_t req_elba;
+
+ sq = n->sq[i];
+ if (!sq) {
+ continue;
+ }
+
+ /*
+ * Walk all the requests on a given queue.
+ */
+ QTAILQ_FOREACH(req, &sq->out_req_list, entry) {
+ req_rw = (NvmeRwCmd *)&req->cmd;
+
+ if (((req_rw->opcode == NVME_CMD_WRITE) ||
+ (req_rw->opcode == NVME_CMD_READ)) &&
+ (cmd->nsid == req->ns->params.nsid)) {
+ req_slba = le64_to_cpu(req_rw->slba);
+ req_nlb = (uint32_t)le16_to_cpu(req_rw->nlb);
+ req_elba = req_slba + req_nlb;
+
+ if (cmd_atomic_wr) {
+ if ((elba >= req_slba) && (slba <= req_elba)) {
+ return NVME_ATOMIC_NO_START;
+ }
+ } else {
+ if (req->atomic_write && ((elba >= req_slba) &&
+ (slba <= req_elba))) {
+ return NVME_ATOMIC_NO_START;
+ }
+ }
+ }
+ }
+ }
+ if (cmd_atomic_wr) {
+ return NVME_ATOMIC_START_ATOMIC;
+ }
+ return NVME_ATOMIC_START_NONATOMIC;
+}
+
+static NvmeAtomic *nvme_get_atomic(NvmeCtrl *n, NvmeCmd *cmd)
+{
+ if (n->atomic.atomic_writes) {
+ return &n->atomic;
+ }
+ return NULL;
+}
+
static void nvme_process_sq(void *opaque)
{
NvmeSQueue *sq = opaque;
@@ -7259,6 +7358,9 @@ static void nvme_process_sq(void *opaque)
}
while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
+ NvmeAtomic *atomic;
+ bool cmd_is_atomic;
+
addr = sq->dma_addr + (sq->head << NVME_SQES);
if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
trace_pci_nvme_err_addr_read(addr);
@@ -7266,6 +7368,26 @@ static void nvme_process_sq(void *opaque)
stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
break;
}
+
+ atomic = nvme_get_atomic(n, &cmd);
+
+ cmd_is_atomic = false;
+ if (sq->sqid && atomic) {
+ int ret;
+
+ ret = nvme_atomic_write_check(n, &cmd, atomic);
+ switch (ret) {
+ case NVME_ATOMIC_NO_START:
+ qemu_bh_schedule(sq->bh);
+ return;
+ case NVME_ATOMIC_START_ATOMIC:
+ cmd_is_atomic = true;
+ break;
+ case NVME_ATOMIC_START_NONATOMIC:
+ default:
+ break;
+ }
+ }
nvme_inc_sq_head(sq);
req = QTAILQ_FIRST(&sq->req_list);
@@ -7275,6 +7397,10 @@ static void nvme_process_sq(void *opaque)
req->cqe.cid = cmd.cid;
memcpy(&req->cmd, &cmd, sizeof(NvmeCmd));
+ if (sq->sqid && atomic) {
+ req->atomic_write = cmd_is_atomic;
+ }
+
status = sq->sqid ? nvme_io_cmd(n, req) :
nvme_admin_cmd(n, req);
if (status != NVME_NO_COMPLETE) {
@@ -7378,6 +7504,8 @@ static void nvme_ctrl_reset(NvmeCtrl *n, NvmeResetType rst)
n->outstanding_aers = 0;
n->qs_created = false;
+ n->dn = n->params.atomic_dn; /* Set Disable Normal */
+
nvme_update_msixcap_ts(pci_dev, n->conf_msix_qsize);
if (pci_is_vf(pci_dev)) {
@@ -8154,6 +8282,8 @@ static void nvme_init_state(NvmeCtrl *n)
NvmeSecCtrlEntry *list = n->sec_ctrl_list;
NvmeSecCtrlEntry *sctrl;
PCIDevice *pci = PCI_DEVICE(n);
+ NvmeAtomic *atomic = &n->atomic;
+ NvmeIdCtrl *id = &n->id_ctrl;
uint8_t max_vfs;
int i;
@@ -8211,6 +8341,29 @@ static void nvme_init_state(NvmeCtrl *n)
cpu_to_le16(n->params.sriov_max_vi_per_vf) :
cap->vifrt / MAX(max_vfs, 1);
}
+
+ /* Atomic Write */
+ id->awun = cpu_to_le16(n->params.atomic_awun);
+ id->awupf = cpu_to_le16(n->params.atomic_awupf);
+ n->dn = n->params.atomic_dn;
+
+ if (id->awun || id->awupf) {
+ if (id->awupf > id->awun) {
+ id->awupf = 0;
+ }
+
+ if (n->dn) {
+ atomic->atomic_max_write_size = id->awupf + 1;
+ } else {
+ atomic->atomic_max_write_size = id->awun + 1;
+ }
+
+ if (atomic->atomic_max_write_size == 1) {
+ atomic->atomic_writes = 0;
+ } else {
+ atomic->atomic_writes = 1;
+ }
+ }
}
static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
@@ -8756,6 +8909,9 @@ static Property nvme_props[] = {
DEFINE_PROP_UINT16("mqes", NvmeCtrl, params.mqes, 0x7ff),
DEFINE_PROP_UINT16("spdm_port", PCIDevice, spdm_port, 0),
DEFINE_PROP_BOOL("ctratt.mem", NvmeCtrl, params.ctratt.mem, false),
+ DEFINE_PROP_BOOL("atomic.dn", NvmeCtrl, params.atomic_dn, 0),
+ DEFINE_PROP_UINT16("atomic.awun", NvmeCtrl, params.atomic_awun, 0),
+ DEFINE_PROP_UINT16("atomic.awupf", NvmeCtrl, params.atomic_awupf, 0),
DEFINE_PROP_END_OF_LIST(),
};
@@ -220,6 +220,11 @@ typedef struct NvmeNamespaceParams {
} fdp;
} NvmeNamespaceParams;
+typedef struct NvmeAtomic {
+ uint32_t atomic_max_write_size;
+ bool atomic_writes;
+} NvmeAtomic;
+
typedef struct NvmeNamespace {
DeviceState parent_obj;
BlockConf blkconf;
@@ -421,6 +426,7 @@ typedef struct NvmeRequest {
NvmeCmd cmd;
BlockAcctCookie acct;
NvmeSg sg;
+ bool atomic_write;
QTAILQ_ENTRY(NvmeRequest)entry;
} NvmeRequest;
@@ -542,6 +548,10 @@ typedef struct NvmeParams {
struct {
bool mem;
} ctratt;
+
+ uint16_t atomic_awun;
+ uint16_t atomic_awupf;
+ bool atomic_dn;
} NvmeParams;
typedef struct NvmeCtrl {
@@ -623,6 +633,8 @@ typedef struct NvmeCtrl {
uint16_t vqrfap;
uint16_t virfap;
} next_pri_ctrl_cap; /* These override pri_ctrl_cap after reset */
+ uint32_t dn; /* Disable Normal */
+ NvmeAtomic atomic;
} NvmeCtrl;
typedef enum NvmeResetType {