From e819cf0ade273bcf5fb7756cfe5496caa875e8c1 Mon Sep 17 00:00:00 2001 From: Ian Moffett Date: Thu, 28 Mar 2024 22:45:43 -0400 Subject: kernel: nvme: Add initial NVMe driver code Signed-off-by: Ian Moffett --- sys/dev/ic/nvme.c | 367 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 367 insertions(+) create mode 100644 sys/dev/ic/nvme.c (limited to 'sys/dev/ic') diff --git a/sys/dev/ic/nvme.c b/sys/dev/ic/nvme.c new file mode 100644 index 0000000..b4ab531 --- /dev/null +++ b/sys/dev/ic/nvme.c @@ -0,0 +1,367 @@ +/* + * Copyright (c) 2023-2024 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +__MODULE_NAME("nvme"); +__KERNEL_META("$Hyra$: nvme.c, Ian Marco Moffett, " + "NVMe driver"); + +#define COMMAND_SIZE 64 /* In bytes (defined by spec) */ + +#define CAP_MPSMIN(caps) ((caps >> 48) & 0xF) +#define CAP_MPSMAX(caps) ((caps >> 52) & 0xF) +#define CAP_TIMEOUT(caps) ((caps >> 24) & 0xFF) +#define CAP_STRIDE(caps) ((caps >> 32) & 0xF) +#define CAP_MQES(caps) (caps & 0xFFFF) +#define CAP_CSS(caps) (caps & 0xFF) + +#define STATUS_READY(status) (status & 1) + +#define CONFIG_EN __BIT(0) +#define CONFIG_CSS_SHIFT 4 + +static struct pci_device *nvme_dev; +static struct timer driver_tmr; + +static inline int +is_4k_aligned(void *ptr) +{ + return ((uintptr_t)ptr & (0x1000 - 1)) == 0; +} + +/* + * Poll CSTS.RDY to equal `val' + * + * Returns `val' on success, returns < 0 value + * upon failure. + */ +static int +nvme_poll_ready(struct nvme_bar *bar, uint8_t val) +{ + uint8_t timeout = CAP_TIMEOUT(bar->caps); + uint8_t time_waited = 0; + + do { + if (STATUS_READY(bar->status) == val) { + /* Done waiting */ + break; + } + + /* + * If CSTS.RDY hasn't changed, we can try to wait a + * little longer. + * + * XXX: The spec states that CAP.TO (Timeout) is in 500 + * millisecond units. + */ + if (time_waited < timeout) { + driver_tmr.msleep(500); + ++time_waited; + } else { + return -1; + } + } while (1); + + return val; +} + +/* + * Create an admin queue. + */ +static int +nvme_create_adminq(struct nvme_state *s, struct nvme_queue *queue) +{ + struct nvme_bar *bar = s->bar; + const size_t PAGESZ = vm_get_page_size(); + const uint8_t DBSTRIDE = CAP_STRIDE(bar->caps); + const uint16_t SLOTS = CAP_MQES(bar->caps); + + queue->sq = dynalloc_memalign(sizeof(void *) * SLOTS, 0x1000); + queue->cq = dynalloc_memalign(sizeof(void *) * SLOTS, 0x1000); + + if (queue->sq == NULL) { + return -1; + } + if (queue->cq == NULL) { + dynfree(queue->sq); + return -1; + } + + memset(queue->sq, 0, sizeof(void *) * SLOTS); + memset(queue->cq, 0, sizeof(void *) * SLOTS); + + queue->sq_head = 0; + queue->sq_tail = 0; + queue->size = SLOTS; + queue->sq_db = PHYS_TO_VIRT((uintptr_t)bar + PAGESZ); + queue->cq_db = PHYS_TO_VIRT((uintptr_t)bar + PAGESZ + 1 * (4 << DBSTRIDE)); + queue->cq_phase = 1; + return 0; +} + +/* + * Submit a command + * + * @queue: Target queue. + * @cmd: Command to submit + */ +static void +nvme_submit_cmd(struct nvme_queue *queue, struct nvme_cmd cmd) +{ + /* Submit the command to the queue */ + queue->sq[queue->sq_tail++] = cmd; + if (queue->sq_tail >= queue->size) { + queue->sq_tail = 0; + } + *(queue->sq_db) = queue->sq_tail; +} + +/* + * Submit a command and poll for completion + * + * @queue: Target queue. + * @cmd: Command to submit + */ +static int +nvme_poll_submit_cmd(struct nvme_queue *queue, struct nvme_cmd cmd) +{ + uint16_t status; + size_t spins = 0; + + nvme_submit_cmd(queue, cmd); + + /* + * Wait for the current command to complete by + * polling the phase bit. + */ + while (1) { + status = queue->cq[queue->cq_head].status; + if ((status & 1) == queue->cq_phase) { + /* + * The phase bit matches the phase for the most + * recently submitted command, the command has completed. + */ + break; + } + if ((status & ~1) != 0) { + KDEBUG("NVMe cmd error (bits=0x%x)\n", status >> 1); + break; + } + if (spins > 5) { + /* Attempts exhausted */ + KERR("Hang on phase bit poll, giving up (cmd error)\n"); + break; + } + + /* Not done, give it some more time */ + driver_tmr.msleep(150); + ++spins; + } + + ++queue->cq_head; + if (queue->cq_head >= queue->size) { + queue->cq_head = 0; + queue->cq_phase = !queue->cq_phase; + } + + /* Tell the controller that `head' updated */ + *(queue->cq_db) = queue->cq_head; + return 0; +} + +/* + * Issue anidentify command for the current + * controller. + * + * XXX: `id' must be aligned on a 4k byte boundary to avoid + * crossing a page boundary. This keeps the implementation + * as simple as possible here. + */ +static int +nvme_identify(struct nvme_state *state, struct nvme_id *id) +{ + struct nvme_cmd cmd = {0}; + struct nvme_identify_cmd *identify = &cmd.identify; + + /* Ensure `id' is aligned on a 4k byte boundary */ + if (!is_4k_aligned(id)) { + return -1; + } + + identify->opcode = NVME_OP_IDENTIFY; + identify->nsid = 0; + identify->cns = 1; /* Identify controller */ + identify->prp1 = VIRT_TO_PHYS(id); + identify->prp2 = 0; /* No need, data address is 4k aligned */ + nvme_poll_submit_cmd(&state->adminq, cmd); + return 0; +} + +static int +nvme_disable_controller(struct nvme_state *state) +{ + struct nvme_bar *bar = state->bar; + + if (__TEST(bar->config, CONFIG_EN)) { + bar->config &= ~CONFIG_EN; + } + + if (nvme_poll_ready(bar, 0) < 0) { + KERR("Failed to disable controller\n"); + return -1; + } + + return 0; +} + +/* + * For debugging purposes, logs some information + * found within the controller identify data structure. + */ +static void +nvme_log_ctrl_id(struct nvme_id *id) +{ + char mn[41] = {0}; + char fr[9] = {0}; + + for (size_t i = 0; i < sizeof(id->mn); ++i) { + mn[i] = id->mn[i]; + } + for (size_t i = 0; i < sizeof(id->fr); ++i) { + fr[i] = id->fr[i]; + } + + KDEBUG("NVMe model: %s\n", mn); + KDEBUG("NVMe firmware revision: %s\n", fr); +} + +static int +nvme_enable_controller(struct nvme_state *state) +{ + struct nvme_bar *bar = state->bar; + struct nvme_id *id; + + if (!__TEST(bar->config, CONFIG_EN)) { + bar->config |= CONFIG_EN; + } + + if (nvme_poll_ready(bar, 1) < 0) { + KERR("Failed to enable controller\n"); + } + + id = dynalloc_memalign(sizeof(struct nvme_id), 0x1000); + + if (id == NULL) { + return -1; + } + + nvme_identify(state, id); + nvme_log_ctrl_id(id); + dynfree(id); + return 0; +} + +static int +nvme_init_controller(struct nvme_bar *bar) +{ + struct nvme_state state = { . bar = bar }; + struct nvme_queue *adminq = &state.adminq; + + uint8_t cap_css = CAP_CSS(bar->caps); + uint16_t mqes = CAP_MQES(bar->caps); + uint16_t cmdreg_bits = PCI_BUS_MASTERING | + PCI_MEM_SPACE; + + pci_set_cmdreg(nvme_dev, cmdreg_bits); + nvme_disable_controller(&state); + + nvme_create_adminq(&state, adminq); + + /* Setup admin submission and admin completion queues */ + bar->aqa = (mqes | mqes << 16); + bar->asq = VIRT_TO_PHYS(adminq->sq); + bar->acq = VIRT_TO_PHYS(adminq->cq); + + /* Set up supported command sets */ + if (__TEST(cap_css, __BIT(7))) { + /* Admin command sets only */ + bar->config |= (7UL << CONFIG_CSS_SHIFT); + } else if (__TEST(cap_css, __BIT(6))) { + /* All supported I/O command sets */ + bar->config |= (6UL << CONFIG_CSS_SHIFT); + } + + nvme_enable_controller(&state); + return 0; +} + +static int +nvme_init(void) +{ + struct nvme_bar *bar; + struct pci_lookup nvme_lookup = { + .pci_class = 1, + .pci_subclass = 8 + }; + + if (req_timer(TIMER_GP, &driver_tmr) != 0) { + KERR("Failed to fetch general purpose timer\n"); + return -1; + } + + if (driver_tmr.msleep == NULL) { + KERR("Timer does not have msleep()\n"); + return -1; + } + + nvme_dev = pci_get_device(nvme_lookup, PCI_CLASS | PCI_SUBCLASS); + if (nvme_dev == NULL) { + return -1; + } + + bar = (struct nvme_bar *)(nvme_dev->bar[0] & ~7); + KINFO("NVMe BAR0 @ 0x%p\n", bar); + + if (nvme_init_controller(bar) < 0) { + return -1; + } + + return 0; +} + +DRIVER_EXPORT(nvme_init); -- cgit v1.2.3