aboutsummaryrefslogtreecommitdiff
path: root/sys/dev/ic/nvme.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/ic/nvme.c')
-rw-r--r--sys/dev/ic/nvme.c614
1 files changed, 0 insertions, 614 deletions
diff --git a/sys/dev/ic/nvme.c b/sys/dev/ic/nvme.c
deleted file mode 100644
index df533a3..0000000
--- a/sys/dev/ic/nvme.c
+++ /dev/null
@@ -1,614 +0,0 @@
-/*
- * Copyright (c) 2023-2024 Ian Marco Moffett and the Osmora Team.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of Hyra nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <sys/driver.h>
-#include <sys/cdefs.h>
-#include <sys/syslog.h>
-#include <sys/timer.h>
-#include <sys/device.h>
-#include <dev/pci/pci.h>
-#include <dev/ic/nvmevar.h>
-#include <vm/dynalloc.h>
-#include <vm/vm.h>
-#include <fs/devfs.h>
-#include <string.h>
-
-__MODULE_NAME("nvme");
-__KERNEL_META("$Hyra$: nvme.c, Ian Marco Moffett, "
- "NVMe driver");
-
-#define pr_trace(fmt, ...) kprintf("nvme: " fmt, ##__VA_ARGS__)
-#define pr_error(...) pr_trace(__VA_ARGS__)
-
-static struct pci_device *nvme_dev;
-static struct timer driver_tmr;
-static TAILQ_HEAD(,nvme_ns) namespaces;
-
-static inline int
-is_4k_aligned(void *ptr)
-{
- return ((uintptr_t)ptr & (0x1000 - 1)) == 0;
-}
-
-/*
- * Poll CSTS.RDY to equal `val'
- *
- * Returns `val' on success, returns < 0 value
- * upon failure.
- */
-static int
-nvme_poll_ready(struct nvme_bar *bar, uint8_t val)
-{
- uint8_t timeout = CAP_TIMEOUT(bar->caps);
- uint8_t time_waited = 0;
-
- do {
- if (STATUS_READY(bar->status) == val) {
- /* Done waiting */
- break;
- }
-
- /*
- * If CSTS.RDY hasn't changed, we can try to wait a
- * little longer.
- *
- * XXX: The spec states that CAP.TO (Timeout) is in 500
- * millisecond units.
- */
- if (time_waited < timeout) {
- driver_tmr.msleep(500);
- ++time_waited;
- } else {
- return -1;
- }
- } while (1);
-
- return val;
-}
-
-/*
- * Create an NVMe queue.
- */
-static int
-nvme_create_queue(struct nvme_state *s, struct nvme_queue *queue, size_t id)
-{
- struct nvme_bar *bar = s->bar;
- const size_t PAGESZ = vm_get_page_size();
- const uint8_t DBSTRIDE = CAP_STRIDE(bar->caps);
- const uint16_t SLOTS = CAP_MQES(bar->caps);
-
- queue->sq = dynalloc_memalign(sizeof(void *) * SLOTS, 0x1000);
- queue->cq = dynalloc_memalign(sizeof(void *) * SLOTS, 0x1000);
-
- if (queue->sq == NULL) {
- return -1;
- }
- if (queue->cq == NULL) {
- dynfree(queue->sq);
- return -1;
- }
-
- memset(queue->sq, 0, sizeof(void *) * SLOTS);
- memset(queue->cq, 0, sizeof(void *) * SLOTS);
-
- queue->sq_head = 0;
- queue->sq_tail = 0;
- queue->size = SLOTS;
- queue->sq_db = PHYS_TO_VIRT((uintptr_t)bar + PAGESZ + (2 * id * (4 << DBSTRIDE)));
- queue->cq_db = PHYS_TO_VIRT((uintptr_t)bar + PAGESZ + ((2 * id + 1) * (4 << DBSTRIDE)));
- queue->cq_phase = 1;
- return 0;
-}
-
-/*
- * Submit a command
- *
- * @queue: Target queue.
- * @cmd: Command to submit
- */
-static void
-nvme_submit_cmd(struct nvme_queue *queue, struct nvme_cmd cmd)
-{
- /* Submit the command to the queue */
- queue->sq[queue->sq_tail++] = cmd;
- if (queue->sq_tail >= queue->size) {
- queue->sq_tail = 0;
- }
- *(queue->sq_db) = queue->sq_tail;
-}
-
-/*
- * Submit a command and poll for completion
- *
- * @queue: Target queue.
- * @cmd: Command to submit
- */
-static int
-nvme_poll_submit_cmd(struct nvme_queue *queue, struct nvme_cmd cmd)
-{
- uint16_t status;
- size_t spins = 0;
-
- nvme_submit_cmd(queue, cmd);
-
- /*
- * Wait for the current command to complete by
- * polling the phase bit.
- */
- while (1) {
- status = queue->cq[queue->cq_head].status;
- if ((status & 1) == queue->cq_phase) {
- /*
- * The phase bit matches the phase for the most
- * recently submitted command, the command has completed.
- */
- break;
- }
- if ((status & ~1) != 0) {
- pr_trace("NVMe cmd error (bits=0x%x)\n", status >> 1);
- break;
- }
- if (spins > 5) {
- /* Attempts exhausted */
- pr_error("Hang on phase bit poll, giving up (cmd error)\n");
- break;
- }
-
- /* Not done, give it some more time */
- driver_tmr.msleep(150);
- ++spins;
- }
-
- ++queue->cq_head;
- if (queue->cq_head >= queue->size) {
- queue->cq_head = 0;
- queue->cq_phase = !queue->cq_phase;
- }
-
- /* Tell the controller that `head' updated */
- *(queue->cq_db) = queue->cq_head;
- return 0;
-}
-
-/*
- * Create an I/O queue for a specific namespace.
- *
- * @ns: Namespace
- * @id: I/O queue ID
- */
-static int
-nvme_create_ioq(struct nvme_ns *ns, size_t id)
-{
- struct nvme_queue *ioq = &ns->ioq;
- struct nvme_state *cntl = ns->cntl;
-
- struct nvme_bar *bar = cntl->bar;
- struct nvme_cmd cmd = {0};
- size_t mqes = CAP_MQES(bar->caps);
-
- struct nvme_create_iocq_cmd *create_iocq;
- struct nvme_create_iosq_cmd *create_iosq;
- int status;
-
- if ((status = nvme_create_queue(ns->cntl, ioq, id)) != 0) {
- return status;
- }
-
- create_iocq = &cmd.create_iocq;
- create_iocq->opcode = NVME_OP_CREATE_IOCQ;
- create_iocq->qflags |= __BIT(0); /* Physically contiguous */
- create_iocq->qsize = mqes;
- create_iocq->qid = id;
- create_iocq->prp1 = VIRT_TO_PHYS(ns->ioq.cq);
-
- if ((status = nvme_poll_submit_cmd(&cntl->adminq, cmd)) != 0) {
- return status;
- }
-
- create_iosq = &cmd.create_iosq;
- create_iosq->opcode = NVME_OP_CREATE_IOSQ;
- create_iosq->qflags |= __BIT(0); /* Physically contiguous */
- create_iosq->qsize = mqes;
- create_iosq->cqid = id;
- create_iosq->sqid = id;
- create_iosq->prp1 = VIRT_TO_PHYS(ns->ioq.sq);
- return nvme_poll_submit_cmd(&cntl->adminq, cmd);
-}
-
-/*
- * Issue an identify command for the current
- * controller.
- *
- * XXX: `id' must be aligned on a 4k byte boundary to avoid
- * crossing a page boundary. This keeps the implementation
- * as simple as possible here.
- */
-static int
-nvme_identify(struct nvme_state *state, struct nvme_id *id)
-{
- struct nvme_cmd cmd = {0};
- struct nvme_identify_cmd *identify = &cmd.identify;
-
- /* Ensure `id' is aligned on a 4k byte boundary */
- if (!is_4k_aligned(id)) {
- return -1;
- }
-
- identify->opcode = NVME_OP_IDENTIFY;
- identify->nsid = 0;
- identify->cns = 1; /* Identify controller */
- identify->prp1 = VIRT_TO_PHYS(id);
- identify->prp2 = 0; /* No need, data address is 4k aligned */
- return nvme_poll_submit_cmd(&state->adminq, cmd);
-}
-
-/*
- * Issue a read/write command for a specific
- * namespace.
- *
- * `buf' must be 4k aligned.
- */
-static int
-nvme_rw(struct nvme_ns *ns, char *buf, off_t slba, size_t count, bool write)
-{
- struct nvme_cmd cmd = {0};
- struct nvme_rw_cmd *rw = &cmd.rw;
-
- if (!is_4k_aligned(buf)) {
- return -1;
- }
-
- rw->opcode = write ? NVME_OP_WRITE : NVME_OP_READ;
- rw->nsid = ns->nsid;
- rw->slba = slba;
- rw->len = count - 1;
- rw->prp1 = VIRT_TO_PHYS(buf);
- return nvme_poll_submit_cmd(&ns->ioq, cmd);
-}
-
-/*
- * Fetch a namespace from its ID
- *
- * @nsid: Namespace ID of namespace to fetch
- */
-static struct nvme_ns *
-nvme_get_ns(size_t nsid)
-{
- struct nvme_ns *ns;
-
- TAILQ_FOREACH(ns, &namespaces, link) {
- if (ns->nsid == nsid) {
- return ns;
- }
- }
-
- return NULL;
-}
-
-/*
- * Device interface read/write helper
- */
-static int
-nvme_dev_rw(struct device *dev, struct sio_txn *sio, bool write)
-{
- struct nvme_ns *ns;
-
- if (sio == NULL) {
- return -1;
- }
-
- ns = nvme_get_ns(dev->minor);
- if (ns == NULL || sio->buf == NULL) {
- return -1;
- }
-
- return nvme_rw(ns, sio->buf, sio->offset, sio->len, write);
-}
-
-/*
- * Device interface read
- */
-static int
-nvme_dev_read(struct device *dev, struct sio_txn *sio)
-{
- return nvme_dev_rw(dev, sio, false);
-}
-
-/*
- * Device interface write
- */
-static int
-nvme_dev_write(struct device *dev, struct sio_txn *sio)
-{
- return nvme_dev_rw(dev, sio, true);
-}
-
-static int
-nvme_dev_open(struct device *dev)
-{
- return 0;
-}
-
-/*
- * Get identify data for namespace
- *
- * @id_ns: Data will be written to this pointer via DMA.
- * @nsid: Namespace ID.
- *
- * XXX: `id_ns' must be 4k aligned.
- */
-static int
-nvme_id_ns(struct nvme_state *s, struct nvme_id_ns *id_ns, uint16_t nsid)
-{
- struct nvme_cmd cmd = {0};
- struct nvme_identify_cmd *identify = &cmd.identify;
-
- if (!is_4k_aligned(id_ns)) {
- return -1;
- }
-
- identify->opcode = NVME_OP_IDENTIFY;
- identify->nsid = nsid;
- identify->cns = 0;
- identify->prp1 = VIRT_TO_PHYS(id_ns);
- return nvme_poll_submit_cmd(&s->adminq, cmd);
-}
-
-/*
- * Init a namespace.
- *
- * @nsid: Namespace ID
- */
-static int
-nvme_init_ns(struct nvme_state *state, uint16_t nsid)
-{
- char devname[128];
- struct nvme_ns *ns = NULL;
- struct nvme_id_ns *id_ns = NULL;
- struct device *dev;
- uint8_t lba_format;
- int status = 0;
-
- ns = dynalloc(sizeof(struct nvme_ns));
- if (ns == NULL) {
- status = -1;
- goto done;
- }
-
- id_ns = dynalloc_memalign(sizeof(struct nvme_id_ns), 0x1000);
- if ((status = nvme_id_ns(state, id_ns, nsid)) != 0) {
- dynfree(ns);
- goto done;
- }
-
- lba_format = id_ns->flbas & 0xF;
- ns->lba_fmt = id_ns->lbaf[lba_format];
- ns->nsid = nsid;
- ns->lba_bsize = 1 << ns->lba_fmt.ds;
- ns->size = id_ns->size;
- ns->cntl = state;
- nvme_create_ioq(ns, ns->nsid);
-
- dev = device_alloc();
- dev->read = nvme_dev_read;
- dev->write = nvme_dev_write;
- dev->open = nvme_dev_open;
- dev->blocksize = ns->lba_bsize;
- dev->mmap = NULL;
- ns->dev_id = device_create(dev, state->major, nsid);
-
- snprintf(devname, sizeof(devname), "nvme0n%d", nsid);
- if (devfs_add_dev(devname, dev) != 0) {
- pr_error("Failed to create /dev/%s\n", devname);
- }
-
- TAILQ_INSERT_TAIL(&namespaces, ns, link);
-done:
- if (id_ns != NULL)
- dynfree(id_ns);
-
- return status;
-}
-
-static int
-nvme_disable_controller(struct nvme_state *state)
-{
- struct nvme_bar *bar = state->bar;
-
- if (__TEST(bar->config, CONFIG_EN)) {
- bar->config &= ~CONFIG_EN;
- }
-
- if (nvme_poll_ready(bar, 0) < 0) {
- pr_error("Failed to disable controller\n");
- return -1;
- }
-
- return 0;
-}
-
-/*
- * For debugging purposes, logs some information
- * found within the controller identify data structure.
- */
-static void
-nvme_log_ctrl_id(struct nvme_id *id)
-{
- char mn[41] = {0};
- char fr[9] = {0};
-
- for (size_t i = 0; i < sizeof(id->mn); ++i) {
- mn[i] = id->mn[i];
- }
- for (size_t i = 0; i < sizeof(id->fr); ++i) {
- fr[i] = id->fr[i];
- }
-
- pr_trace("NVMe model: %s\n", mn);
- pr_trace("NVMe firmware revision: %s\n", fr);
-}
-
-/*
- * Fetch the list of namespace IDs
- *
- * @nsids_out: NSIDs will be written here via DMA.
- *
- * XXX: `nsids_out' must be 4k aligned.
- */
-static int
-nvme_get_nsids(struct nvme_state *state, uint32_t *nsids_out)
-{
- struct nvme_cmd cmd = {0};
- struct nvme_identify_cmd *identify = &cmd.identify;
-
- if (!is_4k_aligned(nsids_out)) {
- return -1;
- }
-
- identify->opcode = NVME_OP_IDENTIFY;
- identify->cns = 2; /* Active NSID list */
- identify->prp1 = VIRT_TO_PHYS(nsids_out);
- return nvme_poll_submit_cmd(&state->adminq, cmd);
-}
-
-static int
-nvme_enable_controller(struct nvme_state *state)
-{
- struct nvme_bar *bar = state->bar;
- struct nvme_id *id;
-
- uint32_t *nsids;
- uint8_t max_sqes, max_cqes;
-
- if (!__TEST(bar->config, CONFIG_EN)) {
- bar->config |= CONFIG_EN;
- }
-
- if (nvme_poll_ready(bar, 1) < 0) {
- pr_error("Failed to enable controller\n");
- return -1;
- }
-
- id = dynalloc_memalign(sizeof(struct nvme_id), 0x1000);
- if (id == NULL) {
- return -1;
- }
-
- nsids = dynalloc_memalign(0x1000, 0x1000);
- if (nsids == NULL) {
- return -1;
- }
-
- nvme_identify(state, id);
- nvme_log_ctrl_id(id);
- nvme_get_nsids(state, nsids);
-
- /*
- * Before creating any I/O queues we need to set CC.IOCQES
- * and CC.IOSQES... Bits 3:0 is the minimum and bits 7:4
- * is the maximum. We'll choose the maximum.
- */
- max_sqes = id->sqes >> 4;
- max_cqes = id->cqes >> 4;
- bar->config |= (max_sqes << CONFIG_IOSQES_SHIFT);
- bar->config |= (max_cqes << CONFIG_IOCQES_SHIFT);
-
- /* Init NVMe namespaces */
- for (size_t i = 0; i < id->nn; ++i) {
- if (nsids[i] != 0) {
- pr_trace("Found NVMe namespace (id=%d)\n", nsids[i]);
- nvme_init_ns(state, nsids[i]);
- }
- }
-
- dynfree(nsids);
- dynfree(id);
- return 0;
-}
-
-static int
-nvme_init_controller(struct nvme_bar *bar)
-{
- struct nvme_state state = { . bar = bar };
- struct nvme_queue *adminq = &state.adminq;
-
- uint16_t mqes = CAP_MQES(bar->caps);
- uint16_t cmdreg_bits = PCI_BUS_MASTERING |
- PCI_MEM_SPACE;
-
- pci_set_cmdreg(nvme_dev, cmdreg_bits);
- nvme_disable_controller(&state);
-
- nvme_create_queue(&state, adminq, 0);
-
- /* Setup admin submission and admin completion queues */
- bar->aqa = (mqes | mqes << 16);
- bar->asq = VIRT_TO_PHYS(adminq->sq);
- bar->acq = VIRT_TO_PHYS(adminq->cq);
-
- state.major = device_alloc_major();
- return nvme_enable_controller(&state);
-}
-
-static int
-nvme_init(void)
-{
- struct nvme_bar *bar;
- struct pci_lookup nvme_lookup = {
- .pci_class = 1,
- .pci_subclass = 8
- };
-
- if (req_timer(TIMER_GP, &driver_tmr) != 0) {
- pr_error("Failed to fetch general purpose timer\n");
- return -1;
- }
-
- if (driver_tmr.msleep == NULL) {
- pr_error("Timer does not have msleep()\n");
- return -1;
- }
-
- nvme_dev = pci_get_device(nvme_lookup, PCI_CLASS | PCI_SUBCLASS);
- if (nvme_dev == NULL) {
- return -1;
- }
-
- bar = PCI_BAR_MEMBASE(nvme_dev->bar[0]);
- pr_trace("NVMe BAR0 @ 0x%p\n", bar);
- TAILQ_INIT(&namespaces);
-
- if (nvme_init_controller(bar) < 0) {
- return -1;
- }
-
- return 0;
-}
-
-DRIVER_EXPORT(nvme_init);