aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/ic/nvme.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/ic/nvme.c')
-rw-r--r--sys/dev/ic/nvme.c389
1 files changed, 389 insertions, 0 deletions
diff --git a/sys/dev/ic/nvme.c b/sys/dev/ic/nvme.c
new file mode 100644
index 0000000..5f92d0e
--- /dev/null
+++ b/sys/dev/ic/nvme.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright (c) 2023-2024 Ian Marco Moffett and the Osmora Team.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of Hyra nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/driver.h>
+#include <sys/errno.h>
+#include <sys/sched.h>
+#include <sys/syslog.h>
+#include <sys/mmio.h>
+#include <dev/ic/nvmeregs.h>
+#include <dev/ic/nvmevar.h>
+#include <dev/pci/pci.h>
+#include <dev/pci/pciregs.h>
+#include <dev/timer.h>
+#include <vm/dynalloc.h>
+#include <vm/vm.h>
+#include <string.h>
+
+#define pr_trace(fmt, ...) kprintf("nvme: " fmt, ##__VA_ARGS__)
+#define pr_error(...) pr_trace(__VA_ARGS__)
+
+static struct pci_device *nvme_dev;
+static struct timer tmr;
+
+static inline int
+is_4k_aligned(void *ptr)
+{
+ return ((uintptr_t)ptr & (0x1000 - 1)) == 0;
+}
+
+/*
+ * Poll register to have 'bits' set/unset.
+ *
+ * @reg: Register to poll.
+ * @bits: Bits to be checked.
+ * @pollset: True to poll as set.
+ */
+static int
+nvme_poll_reg(struct nvme_bar *bar, volatile uint32_t *reg, uint32_t bits,
+ bool pollset)
+{
+ size_t usec_start, usec;
+ size_t elapsed_msec;
+ uint32_t val, caps;
+ bool tmp;
+
+ usec_start = tmr.get_time_usec();
+ caps = mmio_read32(&bar->caps);
+
+ for (;;) {
+ val = mmio_read32(reg);
+ tmp = (pollset) ? ISSET(val, bits) : !ISSET(val, bits);
+
+ usec = tmr.get_time_usec();
+ elapsed_msec = (usec - usec_start) / 1000;
+
+ /* If tmp is set, the register updated in time */
+ if (tmp) {
+ break;
+ }
+
+ /* Exit with an error if we timeout */
+ if (elapsed_msec > CAP_TIMEOUT(caps)) {
+ return -ETIME;
+ }
+ }
+
+ return val;
+}
+
+static int
+nvme_create_queue(struct nvme_bar *bar, struct nvme_queue *queue, size_t id)
+{
+ uint8_t dbstride;
+ uint16_t slots;
+ uint64_t caps;
+ uintptr_t sq_db, cq_db;
+
+ caps = mmio_read32(&bar->caps);
+ dbstride = CAP_STRIDE(caps);
+ slots = CAP_MQES(caps);
+
+ queue->sq = dynalloc_memalign(sizeof(void *) * slots, 0x1000);
+ queue->cq = dynalloc_memalign(sizeof(void *) * slots, 0x1000);
+
+ if (queue->sq == NULL) {
+ return -ENOMEM;
+ }
+
+ if (queue->cq == NULL) {
+ dynfree(queue->sq);
+ return -ENOMEM;
+ }
+
+ memset(queue->sq, 0, sizeof(void *) * slots);
+ memset(queue->cq, 0, sizeof(void *) * slots);
+
+ sq_db = (uintptr_t)bar + DEFAULT_PAGESIZE + (2 * id * (4 << dbstride));
+ cq_db = (uintptr_t)bar + DEFAULT_PAGESIZE + ((2 * id + 1) * (4 << dbstride));
+
+ queue->sq_head = 0;
+ queue->sq_tail = 0;
+
+ queue->size = slots;
+ queue->cq_phase = 1;
+ queue->sq_db = (void *)sq_db;
+ queue->cq_db = (void *)cq_db;
+ return 0;
+}
+
+/*
+ * Stop and reset the NVMe controller.
+ */
+static int
+nvme_stop_ctrl(struct nvme_bar *bar)
+{
+ uint32_t config, status;
+
+ /* Do not reset if CSTS.RDY is 0 */
+ status = mmio_read32(&bar->status);
+ if (!ISSET(status, STATUS_RDY)) {
+ return 0;
+ }
+
+ /* Clear the enable bit to begin the reset */
+ config = mmio_read32(&bar->config);
+ config &= ~CONFIG_EN;
+ mmio_write32(&bar->config, config);
+
+ if (nvme_poll_reg(bar, &bar->status, STATUS_RDY, false) < 0) {
+ pr_error("Controller reset timeout\n");
+ return -ETIME;
+ }
+
+ return 0;
+}
+
+/*
+ * Start up the controller.
+ */
+static int
+nvme_start_ctrl(struct nvme_bar *bar)
+{
+ uint32_t config, status;
+
+ /* Cannot start if already started */
+ status = mmio_read32(&bar->status);
+ if (ISSET(status, STATUS_RDY)) {
+ return 0;
+ }
+
+ /* Enable the controller */
+ config = mmio_read32(&bar->config);
+ config |= CONFIG_EN;
+ mmio_write32(&bar->config, config);
+
+ if (nvme_poll_reg(bar, &bar->status, STATUS_RDY, true) < 0) {
+ pr_error("Controller startup timeout\n");
+ return -ETIME;
+ }
+
+ return 0;
+}
+
+/*
+ * Submit a command.
+ */
+static void
+nvme_submit_cmd(struct nvme_queue *q, struct nvme_cmd cmd)
+{
+ q->sq[q->sq_tail++] = cmd;
+ if (q->sq_tail >= q->size) {
+ q->sq_tail = 0;
+ }
+
+ mmio_write32(q->sq_db, q->sq_tail);
+}
+
+/*
+ * Submit a command and poll for completion.
+ */
+static int
+nvme_poll_submit_cmd(struct nvme_queue *q, struct nvme_cmd cmd)
+{
+ uint16_t status;
+ uint8_t spins = 0;
+
+ nvme_submit_cmd(q, cmd);
+
+ for (;;) {
+ /*
+ * If the phase bit matches the most recently submitted
+ * command then the command has completed
+ */
+ status = q->cq[q->cq_head].status;
+ if ((status & 1) == q->cq_phase) {
+ break;
+ }
+
+ /* Are any error bits set? */
+ if ((status & ~1) != 0) {
+ pr_trace("Command error (bits=0x%x)\n", status >> 1);
+ return -EIO;
+ }
+
+ /* Check for timeout */
+ if (spins > 5) {
+ pr_error("Hang while polling phase bit, giving up\n");
+ return -ETIME;
+ }
+
+ tmr.msleep(150);
+ ++spins;
+ }
+
+ return 0;
+}
+
+static int
+nvme_identify(struct nvme_ctrl *ctrl, struct nvme_id *id)
+{
+ struct nvme_cmd cmd = {0};
+ struct nvme_identify_cmd *idcmd = &cmd.identify;
+
+ if (!is_4k_aligned(id)) {
+ return -1;
+ }
+
+ idcmd->opcode = NVME_OP_IDENTIFY;
+ idcmd->nsid = 0;
+ idcmd->cns = 1; /* Identify controller */
+ idcmd->prp1 = VIRT_TO_PHYS(id);
+ idcmd->prp2 = 0;
+ return nvme_poll_submit_cmd(&ctrl->adminq, cmd);
+}
+
+/*
+ * For debugging purposes, logs some information
+ * found within the controller identify data structure.
+ */
+static void
+nvme_log_ctrl_id(struct nvme_id *id)
+{
+ char mn[41] = {0};
+ char sn[21] = {0};
+ char fr[9] = {0};
+
+ for (size_t i = 0; i < sizeof(id->mn); ++i) {
+ mn[i] = id->mn[i];
+ }
+
+ for (size_t i = 0; i < sizeof(id->fr); ++i) {
+ fr[i] = id->fr[i];
+ }
+
+ for (size_t i = 0; i < sizeof(id->sn); ++i) {
+ sn[i] = id->sn[i];
+ }
+
+ pr_trace("Model number: %s\n", mn);
+ pr_trace("Serial number: %s\n", sn);
+ pr_trace("Firmware revision: %s\n", fr);
+}
+
+/*
+ * Init PCI related controller bits
+ */
+static void
+nvme_init_pci(void)
+{
+ uint32_t tmp;
+
+ /* Enable bus mastering and MMIO */
+ tmp = pci_readl(nvme_dev, PCIREG_CMDSTATUS);
+ tmp |= (PCI_BUS_MASTERING | PCI_MEM_SPACE);
+ pci_writel(nvme_dev, PCIREG_CMDSTATUS, tmp);
+}
+
+static int
+nvme_init_ctrl(struct nvme_bar *bar)
+{
+ int error;
+ uint64_t caps;
+ uint16_t mqes;
+ struct nvme_ctrl ctrl = {0};
+ struct nvme_queue *adminq;
+ struct nvme_id *id;
+
+ /* Ensure the controller is stopped */
+ if ((error = nvme_stop_ctrl(bar)) != 0) {
+ return error;
+ }
+
+ adminq = &ctrl.adminq;
+ caps = mmio_read64(&bar->caps);
+ mqes = CAP_MQES(caps);
+
+ /* Setup admin queues */
+ nvme_create_queue(bar, adminq, 0);
+ mmio_write32(&bar->aqa, (mqes | mqes << 16));
+ mmio_write64(&bar->asq, VIRT_TO_PHYS(adminq->sq));
+ mmio_write64(&bar->acq, VIRT_TO_PHYS(adminq->cq));
+
+ /* Now bring the controller back up */
+ if ((error = nvme_start_ctrl(bar)) != 0) {
+ return error;
+ }
+
+ id = dynalloc_memalign(sizeof(*id), 0x1000);
+ if (id == NULL) {
+ return -ENOMEM;
+ }
+
+ nvme_identify(&ctrl, id);
+ nvme_log_ctrl_id(id);
+ dynfree(id);
+ return 0;
+}
+
+static int
+nvme_init(void)
+{
+ struct pci_lookup lookup;
+ struct nvme_bar *bar;
+ int error;
+
+ lookup.pci_class = 1;
+ lookup.pci_subclass = 8;
+ nvme_dev = pci_get_device(lookup, PCI_CLASS | PCI_SUBCLASS);
+
+ if (nvme_dev == NULL) {
+ return -ENODEV;
+ }
+
+ /* Try to request a general purpose timer */
+ if (req_timer(TIMER_GP, &tmr) != TMRR_SUCCESS) {
+ pr_error("Failed to fetch general purpose timer\n");
+ return -ENODEV;
+ }
+
+ /* Ensure it has get_time_usec() */
+ if (tmr.get_time_usec == NULL) {
+ pr_error("General purpose timer has no get_time_usec()\n");
+ return -ENODEV;
+ }
+
+ /* We also need msleep() */
+ if (tmr.msleep == NULL) {
+ pr_error("General purpose timer has no msleep()\n");
+ return -ENODEV;
+ }
+
+ nvme_init_pci();
+
+ if ((error = pci_map_bar(nvme_dev, 0, (void *)&bar)) != 0) {
+ return error;
+ }
+
+ return nvme_init_ctrl(bar);
+}
+
+DRIVER_EXPORT(nvme_init);