* -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
* cmb_size_mb=<cmb_size_mb[optional]>, \
* [pmrdev=<mem_backend_file_id>,] \
- * max_ioqpairs=<N[optional]>
+ * max_ioqpairs=<N[optional]>, \
+ * aerl=<N[optional]>, aer_max_queued=<N[optional]>
*
* Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
* offset 0 in BAR2 and supports only WDS, RDS and SQS for now.
* For example:
* -object memory-backend-file,id=<mem_id>,share=on,mem-path=<file_path>, \
* size=<size> .... -device nvme,...,pmrdev=<mem_id>
+ *
+ *
+ * nvme device parameters
+ * ~~~~~~~~~~~~~~~~~~~~~~
+ * - `aerl`
+ * The Asynchronous Event Request Limit (AERL). Indicates the maximum number
+ * of concurrently outstanding Asynchronous Event Request commands suppoert
+ * by the controller. This is a 0's based value.
+ *
+ * - `aer_max_queued`
+ * This is the maximum number of events that the device will enqueue for
+ * completion when there are no oustanding AERs. When the maximum number of
+ * enqueued events are reached, subsequent events will be dropped.
+ *
*/
#include "qemu/osdep.h"
timer_mod(cq->timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 500);
}
+static void nvme_process_aers(void *opaque)
+{
+ NvmeCtrl *n = opaque;
+ NvmeAsyncEvent *event, *next;
+
+ trace_pci_nvme_process_aers(n->aer_queued);
+
+ QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) {
+ NvmeRequest *req;
+ NvmeAerResult *result;
+
+ /* can't post cqe if there is nothing to complete */
+ if (!n->outstanding_aers) {
+ trace_pci_nvme_no_outstanding_aers();
+ break;
+ }
+
+ /* ignore if masked (cqe posted, but event not cleared) */
+ if (n->aer_mask & (1 << event->result.event_type)) {
+ trace_pci_nvme_aer_masked(event->result.event_type, n->aer_mask);
+ continue;
+ }
+
+ QTAILQ_REMOVE(&n->aer_queue, event, entry);
+ n->aer_queued--;
+
+ n->aer_mask |= 1 << event->result.event_type;
+ n->outstanding_aers--;
+
+ req = n->aer_reqs[n->outstanding_aers];
+
+ result = (NvmeAerResult *) &req->cqe.result;
+ result->event_type = event->result.event_type;
+ result->event_info = event->result.event_info;
+ result->log_page = event->result.log_page;
+ g_free(event);
+
+ req->status = NVME_SUCCESS;
+
+ trace_pci_nvme_aer_post_cqe(result->event_type, result->event_info,
+ result->log_page);
+
+ nvme_enqueue_req_completion(&n->admin_cq, req);
+ }
+}
+
+static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type,
+ uint8_t event_info, uint8_t log_page)
+{
+ NvmeAsyncEvent *event;
+
+ trace_pci_nvme_enqueue_event(event_type, event_info, log_page);
+
+ if (n->aer_queued == n->params.aer_max_queued) {
+ trace_pci_nvme_enqueue_event_noqueue(n->aer_queued);
+ return;
+ }
+
+ event = g_new(NvmeAsyncEvent, 1);
+ event->result = (NvmeAerResult) {
+ .event_type = event_type,
+ .event_info = event_info,
+ .log_page = log_page,
+ };
+
+ QTAILQ_INSERT_TAIL(&n->aer_queue, event, entry);
+ n->aer_queued++;
+
+ nvme_process_aers(n);
+}
+
+static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type)
+{
+ n->aer_mask &= ~(1 << event_type);
+ if (!QTAILQ_EMPTY(&n->aer_queue)) {
+ nvme_process_aers(n);
+ }
+}
+
static void nvme_rw_cb(void *opaque, int ret)
{
NvmeRequest *req = opaque;
return NVME_SUCCESS;
}
-static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
- uint64_t off, NvmeRequest *req)
+static uint16_t nvme_smart_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
+ uint32_t buf_len, uint64_t off,
+ NvmeRequest *req)
{
uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
smart.power_on_hours[0] =
cpu_to_le64((((current_ms - n->starttime_ms) / 1000) / 60) / 60);
+ if (!rae) {
+ nvme_clear_events(n, NVME_AER_TYPE_SMART);
+ }
+
return nvme_dma_read_prp(n, (uint8_t *) &smart + off, trans_len, prp1,
prp2);
}
prp2);
}
-static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint32_t buf_len,
- uint64_t off, NvmeRequest *req)
+static uint16_t nvme_error_info(NvmeCtrl *n, NvmeCmd *cmd, uint8_t rae,
+ uint32_t buf_len, uint64_t off,
+ NvmeRequest *req)
{
uint32_t trans_len;
uint64_t prp1 = le64_to_cpu(cmd->dptr.prp1);
uint64_t prp2 = le64_to_cpu(cmd->dptr.prp2);
NvmeErrorLog errlog;
+ if (!rae) {
+ nvme_clear_events(n, NVME_AER_TYPE_ERROR);
+ }
+
if (off > sizeof(errlog)) {
return NVME_INVALID_FIELD | NVME_DNR;
}
switch (lid) {
case NVME_LOG_ERROR_INFO:
- return nvme_error_info(n, cmd, len, off, req);
+ return nvme_error_info(n, cmd, rae, len, off, req);
case NVME_LOG_SMART_INFO:
- return nvme_smart_info(n, cmd, len, off, req);
+ return nvme_smart_info(n, cmd, rae, len, off, req);
case NVME_LOG_FW_SLOT_INFO:
return nvme_fw_log_info(n, cmd, len, off, req);
default:
((n->params.max_ioqpairs - 1) << 16);
trace_pci_nvme_getfeat_numq(result);
break;
+ case NVME_ASYNCHRONOUS_EVENT_CONF:
+ result = n->features.async_config;
+ break;
case NVME_TIMESTAMP:
return nvme_get_feature_timestamp(n, cmd);
default:
return NVME_INVALID_FIELD | NVME_DNR;
}
+ if (((n->temperature >= n->features.temp_thresh_hi) ||
+ (n->temperature <= n->features.temp_thresh_low)) &&
+ NVME_AEC_SMART(n->features.async_config) & NVME_SMART_TEMPERATURE) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_SMART,
+ NVME_AER_INFO_SMART_TEMP_THRESH,
+ NVME_LOG_SMART_INFO);
+ }
+
break;
case NVME_VOLATILE_WRITE_CACHE:
blk_set_enable_write_cache(n->conf.blk, dw11 & 1);
req->cqe.result = cpu_to_le32((n->params.max_ioqpairs - 1) |
((n->params.max_ioqpairs - 1) << 16));
break;
+ case NVME_ASYNCHRONOUS_EVENT_CONF:
+ n->features.async_config = dw11;
+ break;
case NVME_TIMESTAMP:
return nvme_set_feature_timestamp(n, cmd);
default:
return NVME_SUCCESS;
}
+static uint16_t nvme_aer(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
+{
+ trace_pci_nvme_aer(nvme_cid(req));
+
+ if (n->outstanding_aers > n->params.aerl) {
+ trace_pci_nvme_aer_aerl_exceeded();
+ return NVME_AER_LIMIT_EXCEEDED;
+ }
+
+ n->aer_reqs[n->outstanding_aers] = req;
+ n->outstanding_aers++;
+
+ if (!QTAILQ_EMPTY(&n->aer_queue)) {
+ nvme_process_aers(n);
+ }
+
+ return NVME_NO_COMPLETE;
+}
+
static uint16_t nvme_admin_cmd(NvmeCtrl *n, NvmeCmd *cmd, NvmeRequest *req)
{
trace_pci_nvme_admin_cmd(nvme_cid(req), nvme_sqid(req), cmd->opcode);
return nvme_set_feature(n, cmd, req);
case NVME_ADM_CMD_GET_FEATURES:
return nvme_get_feature(n, cmd, req);
+ case NVME_ADM_CMD_ASYNC_EV_REQ:
+ return nvme_aer(n, cmd, req);
default:
trace_pci_nvme_err_invalid_admin_opc(cmd->opcode);
return NVME_INVALID_OPCODE | NVME_DNR;
}
}
+ while (!QTAILQ_EMPTY(&n->aer_queue)) {
+ NvmeAsyncEvent *event = QTAILQ_FIRST(&n->aer_queue);
+ QTAILQ_REMOVE(&n->aer_queue, event, entry);
+ g_free(event);
+ }
+
+ n->aer_queued = 0;
+ n->outstanding_aers = 0;
+
blk_flush(n->conf.blk);
n->bar.cc = 0;
}
nvme_set_timestamp(n, 0ULL);
+ QTAILQ_INIT(&n->aer_queue);
+
return 0;
}
"completion queue doorbell write"
" for nonexistent queue,"
" sqid=%"PRIu32", ignoring", qid);
+
+ /*
+ * NVM Express v1.3d, Section 4.1 state: "If host software writes
+ * an invalid value to the Submission Queue Tail Doorbell or
+ * Completion Queue Head Doorbell regiter and an Asynchronous Event
+ * Request command is outstanding, then an asynchronous event is
+ * posted to the Admin Completion Queue with a status code of
+ * Invalid Doorbell Write Value."
+ *
+ * Also note that the spec includes the "Invalid Doorbell Register"
+ * status code, but nowhere does it specify when to use it.
+ * However, it seems reasonable to use it here in a similar
+ * fashion.
+ */
+ if (n->outstanding_aers) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+ NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
+ NVME_LOG_ERROR_INFO);
+ }
+
return;
}
" beyond queue size, sqid=%"PRIu32","
" new_head=%"PRIu16", ignoring",
qid, new_head);
+
+ if (n->outstanding_aers) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+ NVME_AER_INFO_ERR_INVALID_DB_VALUE,
+ NVME_LOG_ERROR_INFO);
+ }
+
return;
}
"submission queue doorbell write"
" for nonexistent queue,"
" sqid=%"PRIu32", ignoring", qid);
+
+ if (n->outstanding_aers) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+ NVME_AER_INFO_ERR_INVALID_DB_REGISTER,
+ NVME_LOG_ERROR_INFO);
+ }
+
return;
}
" beyond queue size, sqid=%"PRIu32","
" new_tail=%"PRIu16", ignoring",
qid, new_tail);
+
+ if (n->outstanding_aers) {
+ nvme_enqueue_event(n, NVME_AER_TYPE_ERROR,
+ NVME_AER_INFO_ERR_INVALID_DB_VALUE,
+ NVME_LOG_ERROR_INFO);
+ }
+
return;
}
n->temperature = NVME_TEMPERATURE;
n->features.temp_thresh_hi = NVME_TEMPERATURE_WARNING;
n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
+ n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
}
static void nvme_init_blk(NvmeCtrl *n, Error **errp)
* inconsequential.
*/
id->acl = 3;
+ id->aerl = n->params.aerl;
id->frmw = (NVME_NUM_FW_SLOTS << 1) | NVME_FRMW_SLOT1_RO;
id->lpa = NVME_LPA_EXTENDED;
g_free(n->namespaces);
g_free(n->cq);
g_free(n->sq);
+ g_free(n->aer_reqs);
if (n->params.cmb_size_mb) {
g_free(n->cmbuf);
DEFINE_PROP_UINT32("num_queues", NvmeCtrl, params.num_queues, 0),
DEFINE_PROP_UINT32("max_ioqpairs", NvmeCtrl, params.max_ioqpairs, 64),
DEFINE_PROP_UINT16("msix_qsize", NvmeCtrl, params.msix_qsize, 65),
+ DEFINE_PROP_UINT8("aerl", NvmeCtrl, params.aerl, 3),
+ DEFINE_PROP_UINT32("aer_max_queued", NvmeCtrl, params.aer_max_queued, 64),
DEFINE_PROP_END_OF_LIST(),
};