summaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/dev/ic/nvme.c367
-rw-r--r--sys/include/dev/ic/nvmevar.h221
2 files changed, 588 insertions, 0 deletions
diff --git a/sys/dev/ic/nvme.c b/sys/dev/ic/nvme.c
new file mode 100644
index 0000000..b4ab531
--- /dev/null
+++ b/sys/dev/ic/nvme.c
@@ -0,0 +1,367 @@
+/*
+ * Copyright (c) 2023-2024 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/driver.h>
+#include <sys/cdefs.h>
+#include <sys/syslog.h>
+#include <sys/timer.h>
+#include <dev/pci/pci.h>
+#include <dev/ic/nvmevar.h>
+#include <vm/dynalloc.h>
+#include <vm/vm.h>
+#include <string.h>
+
+__MODULE_NAME("nvme");
+__KERNEL_META("$Hyra$: nvme.c, Ian Marco Moffett, "
+ "NVMe driver");
+
+#define COMMAND_SIZE 64 /* In bytes (defined by spec) */
+
+#define CAP_MPSMIN(caps) ((caps >> 48) & 0xF)
+#define CAP_MPSMAX(caps) ((caps >> 52) & 0xF)
+#define CAP_TIMEOUT(caps) ((caps >> 24) & 0xFF)
+#define CAP_STRIDE(caps) ((caps >> 32) & 0xF)
+#define CAP_MQES(caps) (caps & 0xFFFF)
+#define CAP_CSS(caps) (caps & 0xFF)
+
+#define STATUS_READY(status) (status & 1)
+
+#define CONFIG_EN __BIT(0)
+#define CONFIG_CSS_SHIFT 4
+
+static struct pci_device *nvme_dev;
+static struct timer driver_tmr;
+
+static inline int
+is_4k_aligned(void *ptr)
+{
+ return ((uintptr_t)ptr & (0x1000 - 1)) == 0;
+}
+
+/*
+ * Poll CSTS.RDY to equal `val'
+ *
+ * Returns `val' on success, returns < 0 value
+ * upon failure.
+ */
+static int
+nvme_poll_ready(struct nvme_bar *bar, uint8_t val)
+{
+ uint8_t timeout = CAP_TIMEOUT(bar->caps);
+ uint8_t time_waited = 0;
+
+ do {
+ if (STATUS_READY(bar->status) == val) {
+ /* Done waiting */
+ break;
+ }
+
+ /*
+ * If CSTS.RDY hasn't changed, we can try to wait a
+ * little longer.
+ *
+ * XXX: The spec states that CAP.TO (Timeout) is in 500
+ * millisecond units.
+ */
+ if (time_waited < timeout) {
+ driver_tmr.msleep(500);
+ ++time_waited;
+ } else {
+ return -1;
+ }
+ } while (1);
+
+ return val;
+}
+
+/*
+ * Create an admin queue.
+ */
+static int
+nvme_create_adminq(struct nvme_state *s, struct nvme_queue *queue)
+{
+ struct nvme_bar *bar = s->bar;
+ const size_t PAGESZ = vm_get_page_size();
+ const uint8_t DBSTRIDE = CAP_STRIDE(bar->caps);
+ const uint16_t SLOTS = CAP_MQES(bar->caps);
+
+ queue->sq = dynalloc_memalign(sizeof(void *) * SLOTS, 0x1000);
+ queue->cq = dynalloc_memalign(sizeof(void *) * SLOTS, 0x1000);
+
+ if (queue->sq == NULL) {
+ return -1;
+ }
+ if (queue->cq == NULL) {
+ dynfree(queue->sq);
+ return -1;
+ }
+
+ memset(queue->sq, 0, sizeof(void *) * SLOTS);
+ memset(queue->cq, 0, sizeof(void *) * SLOTS);
+
+ queue->sq_head = 0;
+ queue->sq_tail = 0;
+ queue->size = SLOTS;
+ queue->sq_db = PHYS_TO_VIRT((uintptr_t)bar + PAGESZ);
+ queue->cq_db = PHYS_TO_VIRT((uintptr_t)bar + PAGESZ + 1 * (4 << DBSTRIDE));
+ queue->cq_phase = 1;
+ return 0;
+}
+
+/*
+ * Submit a command
+ *
+ * @queue: Target queue.
+ * @cmd: Command to submit
+ */
+static void
+nvme_submit_cmd(struct nvme_queue *queue, struct nvme_cmd cmd)
+{
+ /* Submit the command to the queue */
+ queue->sq[queue->sq_tail++] = cmd;
+ if (queue->sq_tail >= queue->size) {
+ queue->sq_tail = 0;
+ }
+ *(queue->sq_db) = queue->sq_tail;
+}
+
+/*
+ * Submit a command and poll for completion
+ *
+ * @queue: Target queue.
+ * @cmd: Command to submit
+ */
+static int
+nvme_poll_submit_cmd(struct nvme_queue *queue, struct nvme_cmd cmd)
+{
+ uint16_t status;
+ size_t spins = 0;
+
+ nvme_submit_cmd(queue, cmd);
+
+ /*
+ * Wait for the current command to complete by
+ * polling the phase bit.
+ */
+ while (1) {
+ status = queue->cq[queue->cq_head].status;
+ if ((status & 1) == queue->cq_phase) {
+ /*
+ * The phase bit matches the phase for the most
+ * recently submitted command, the command has completed.
+ */
+ break;
+ }
+ if ((status & ~1) != 0) {
+ KDEBUG("NVMe cmd error (bits=0x%x)\n", status >> 1);
+ break;
+ }
+ if (spins > 5) {
+ /* Attempts exhausted */
+ KERR("Hang on phase bit poll, giving up (cmd error)\n");
+ break;
+ }
+
+ /* Not done, give it some more time */
+ driver_tmr.msleep(150);
+ ++spins;
+ }
+
+ ++queue->cq_head;
+ if (queue->cq_head >= queue->size) {
+ queue->cq_head = 0;
+ queue->cq_phase = !queue->cq_phase;
+ }
+
+ /* Tell the controller that `head' updated */
+ *(queue->cq_db) = queue->cq_head;
+ return 0;
+}
+
+/*
+ * Issue anidentify command for the current
+ * controller.
+ *
+ * XXX: `id' must be aligned on a 4k byte boundary to avoid
+ * crossing a page boundary. This keeps the implementation
+ * as simple as possible here.
+ */
+static int
+nvme_identify(struct nvme_state *state, struct nvme_id *id)
+{
+ struct nvme_cmd cmd = {0};
+ struct nvme_identify_cmd *identify = &cmd.identify;
+
+ /* Ensure `id' is aligned on a 4k byte boundary */
+ if (!is_4k_aligned(id)) {
+ return -1;
+ }
+
+ identify->opcode = NVME_OP_IDENTIFY;
+ identify->nsid = 0;
+ identify->cns = 1; /* Identify controller */
+ identify->prp1 = VIRT_TO_PHYS(id);
+ identify->prp2 = 0; /* No need, data address is 4k aligned */
+ nvme_poll_submit_cmd(&state->adminq, cmd);
+ return 0;
+}
+
+static int
+nvme_disable_controller(struct nvme_state *state)
+{
+ struct nvme_bar *bar = state->bar;
+
+ if (__TEST(bar->config, CONFIG_EN)) {
+ bar->config &= ~CONFIG_EN;
+ }
+
+ if (nvme_poll_ready(bar, 0) < 0) {
+ KERR("Failed to disable controller\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * For debugging purposes, logs some information
+ * found within the controller identify data structure.
+ */
+static void
+nvme_log_ctrl_id(struct nvme_id *id)
+{
+ char mn[41] = {0};
+ char fr[9] = {0};
+
+ for (size_t i = 0; i < sizeof(id->mn); ++i) {
+ mn[i] = id->mn[i];
+ }
+ for (size_t i = 0; i < sizeof(id->fr); ++i) {
+ fr[i] = id->fr[i];
+ }
+
+ KDEBUG("NVMe model: %s\n", mn);
+ KDEBUG("NVMe firmware revision: %s\n", fr);
+}
+
+static int
+nvme_enable_controller(struct nvme_state *state)
+{
+ struct nvme_bar *bar = state->bar;
+ struct nvme_id *id;
+
+ if (!__TEST(bar->config, CONFIG_EN)) {
+ bar->config |= CONFIG_EN;
+ }
+
+ if (nvme_poll_ready(bar, 1) < 0) {
+ KERR("Failed to enable controller\n");
+ }
+
+ id = dynalloc_memalign(sizeof(struct nvme_id), 0x1000);
+
+ if (id == NULL) {
+ return -1;
+ }
+
+ nvme_identify(state, id);
+ nvme_log_ctrl_id(id);
+ dynfree(id);
+ return 0;
+}
+
+static int
+nvme_init_controller(struct nvme_bar *bar)
+{
+ struct nvme_state state = { . bar = bar };
+ struct nvme_queue *adminq = &state.adminq;
+
+ uint8_t cap_css = CAP_CSS(bar->caps);
+ uint16_t mqes = CAP_MQES(bar->caps);
+ uint16_t cmdreg_bits = PCI_BUS_MASTERING |
+ PCI_MEM_SPACE;
+
+ pci_set_cmdreg(nvme_dev, cmdreg_bits);
+ nvme_disable_controller(&state);
+
+ nvme_create_adminq(&state, adminq);
+
+ /* Setup admin submission and admin completion queues */
+ bar->aqa = (mqes | mqes << 16);
+ bar->asq = VIRT_TO_PHYS(adminq->sq);
+ bar->acq = VIRT_TO_PHYS(adminq->cq);
+
+ /* Set up supported command sets */
+ if (__TEST(cap_css, __BIT(7))) {
+ /* Admin command sets only */
+ bar->config |= (7UL << CONFIG_CSS_SHIFT);
+ } else if (__TEST(cap_css, __BIT(6))) {
+ /* All supported I/O command sets */
+ bar->config |= (6UL << CONFIG_CSS_SHIFT);
+ }
+
+ nvme_enable_controller(&state);
+ return 0;
+}
+
+static int
+nvme_init(void)
+{
+ struct nvme_bar *bar;
+ struct pci_lookup nvme_lookup = {
+ .pci_class = 1,
+ .pci_subclass = 8
+ };
+
+ if (req_timer(TIMER_GP, &driver_tmr) != 0) {
+ KERR("Failed to fetch general purpose timer\n");
+ return -1;
+ }
+
+ if (driver_tmr.msleep == NULL) {
+ KERR("Timer does not have msleep()\n");
+ return -1;
+ }
+
+ nvme_dev = pci_get_device(nvme_lookup, PCI_CLASS | PCI_SUBCLASS);
+ if (nvme_dev == NULL) {
+ return -1;
+ }
+
+ bar = (struct nvme_bar *)(nvme_dev->bar[0] & ~7);
+ KINFO("NVMe BAR0 @ 0x%p\n", bar);
+
+ if (nvme_init_controller(bar) < 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+DRIVER_EXPORT(nvme_init);
diff --git a/sys/include/dev/ic/nvmevar.h b/sys/include/dev/ic/nvmevar.h
new file mode 100644
index 0000000..a4a5db6
--- /dev/null
+++ b/sys/include/dev/ic/nvmevar.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2023-2024 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IC_NVMEVAR_H_
+#define _IC_NVMEVAR_H_
+
+#include <sys/cdefs.h>
+
+#define NVME_OP_IDENTIFY 0x06
+
+/* I/O commands */
+#define NVME_OP_READ 0x02
+
+struct nvme_common_cmd {
+ uint8_t opcode;
+ uint8_t flags;
+ uint16_t cid;
+ uint32_t nsid;
+ uint32_t cdw1[2];
+ uint64_t metadata;
+ uint64_t prp1;
+ uint64_t prp2;
+ uint32_t cdw2[6];
+};
+
+struct nvme_identify_cmd {
+ uint8_t opcode;
+ uint8_t flags;
+ uint16_t cid;
+ uint32_t nsid;
+ uint64_t unused1;
+ uint64_t unused2;
+ uint64_t prp1;
+ uint64_t prp2;
+ uint32_t cns;
+ uint32_t unused3[5];
+};
+
+/* Read/write */
+struct nvme_rw_cmd {
+ uint8_t opcode;
+ uint8_t flags;
+ uint16_t cid;
+ uint32_t nsid;
+ uint64_t unused;
+ uint64_t metadata;
+ uint64_t prp1;
+ uint64_t prp2;
+ uint64_t slba;
+ uint16_t len;
+ uint16_t control;
+ uint32_t dsmgmt;
+ uint32_t ref;
+ uint16_t apptag;
+ uint16_t appmask;
+};
+
+struct nvme_cmd {
+ union {
+ struct nvme_identify_cmd identify;
+ struct nvme_common_cmd common;
+ };
+};
+
+struct nvme_id {
+ uint16_t vid;
+ uint16_t ssvid;
+ char sn[20];
+ char mn[40];
+ char fr[8];
+ uint8_t rab;
+ uint8_t ieee[3];
+ uint8_t mic;
+ uint8_t mdts;
+ uint16_t ctrlid;
+ uint32_t version;
+ uint32_t unused1[43];
+ uint16_t oacs;
+ uint8_t acl;
+ uint8_t aerl;
+ uint8_t fw;
+ uint8_t lpa;
+ uint8_t elpe;
+ uint8_t npss;
+ uint8_t avscc;
+ uint8_t apsta;
+ uint16_t wctemp;
+ uint16_t cctemp;
+ uint16_t unused2[121];
+ uint8_t sqes;
+ uint8_t cqes;
+ uint16_t unused3;
+ uint32_t nn;
+ uint16_t oncs;
+ uint16_t fuses;
+ uint8_t fna;
+ uint8_t vwc;
+ uint16_t awun;
+ uint16_t awupf;
+ uint8_t nvscc;
+ uint8_t unused4;
+ uint16_t acwu;
+ uint16_t unused5;
+ uint32_t sgls;
+ uint32_t unused6[1401];
+ uint8_t vs[1024];
+};
+
+/* Command completion queue entry */
+struct nvme_cq_entry {
+ uint32_t res;
+ uint32_t unused;
+ uint16_t sqhead;
+ uint16_t sqid;
+ uint16_t cid;
+ uint16_t status;
+};
+
+/* NVMe controller */
+struct __packed nvme_bar {
+ uint64_t caps;
+ uint32_t version;
+ uint32_t intms; /* Interrupt mask set */
+ uint32_t intmc; /* Interrupt mask clear */
+ uint32_t config;
+ uint32_t unused1;
+ uint32_t status;
+ uint32_t unused2;
+ uint32_t aqa; /* Admin queue attributes */
+ uint64_t asq; /* Admin submission queue */
+ uint64_t acq; /* Admin completion queue */
+};
+
+struct nvme_lbaf {
+ uint16_t ms; /* Number of metadata bytes per LBA */
+ uint8_t ds; /* Data size */
+ uint8_t rp;
+};
+
+/* Identify namespace data */
+struct nvme_id_ns {
+ uint64_t size;
+ uint64_t capabilities;
+ uint64_t nuse;
+ uint8_t features;
+ uint8_t nlbaf;
+ uint8_t flbas;
+ uint8_t mc;
+ uint8_t dpc;
+ uint8_t dps;
+ uint8_t nmic;
+ uint8_t rescap;
+ uint8_t fpi;
+ uint8_t unused1;
+ uint16_t nawun;
+ uint16_t nawupf;
+ uint16_t nacwu;
+ uint16_t nabsn;
+ uint16_t nabo;
+ uint16_t nabspf;
+ uint16_t unused2;
+ uint64_t nvmcap[2];
+ uint64_t unusued3[5];
+ uint8_t nguid[16];
+ uint8_t eui64[8];
+ struct nvme_lbaf lbaf[16];
+ uint64_t unused3[24];
+ uint8_t vs[3712];
+};
+
+struct nvme_queue {
+ struct nvme_cmd *sq; /* Submission queue */
+ struct nvme_cq_entry *cq; /* Completion queue */
+ uint16_t sq_head; /* Submission queue head */
+ uint16_t sq_tail; /* Submission queue tail */
+ uint16_t cq_head; /* Completion queue head */
+ uint8_t cq_phase : 1; /* Completion queue phase bit */
+ uint16_t size; /* Size in elements */
+ volatile uint32_t *sq_db; /* Submission doorbell */
+ volatile uint32_t *cq_db; /* Completion doorbell */
+};
+
+struct nvme_state {
+ struct nvme_queue adminq;
+ struct nvme_bar *bar;
+};
+
+/* NVMe namespace */
+struct nvme_ns {
+ size_t nsid; /* Namespace ID */
+ size_t lba_bsize; /* LBA block size */
+ struct nvme_state *cntl; /* NVMe controller */
+};
+
+#endif