From e819cf0ade273bcf5fb7756cfe5496caa875e8c1 Mon Sep 17 00:00:00 2001 From: Ian Moffett Date: Thu, 28 Mar 2024 22:45:43 -0400 Subject: kernel: nvme: Add initial NVMe driver code Signed-off-by: Ian Moffett --- sys/dev/ic/nvme.c | 367 +++++++++++++++++++++++++++++++++++++++++++ sys/include/dev/ic/nvmevar.h | 221 ++++++++++++++++++++++++++ 2 files changed, 588 insertions(+) create mode 100644 sys/dev/ic/nvme.c create mode 100644 sys/include/dev/ic/nvmevar.h (limited to 'sys') diff --git a/sys/dev/ic/nvme.c b/sys/dev/ic/nvme.c new file mode 100644 index 0000000..b4ab531 --- /dev/null +++ b/sys/dev/ic/nvme.c @@ -0,0 +1,367 @@ +/* + * Copyright (c) 2023-2024 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +__MODULE_NAME("nvme"); +__KERNEL_META("$Hyra$: nvme.c, Ian Marco Moffett, " + "NVMe driver"); + +#define COMMAND_SIZE 64 /* In bytes (defined by spec) */ + +#define CAP_MPSMIN(caps) ((caps >> 48) & 0xF) +#define CAP_MPSMAX(caps) ((caps >> 52) & 0xF) +#define CAP_TIMEOUT(caps) ((caps >> 24) & 0xFF) +#define CAP_STRIDE(caps) ((caps >> 32) & 0xF) +#define CAP_MQES(caps) (caps & 0xFFFF) +#define CAP_CSS(caps) (caps & 0xFF) + +#define STATUS_READY(status) (status & 1) + +#define CONFIG_EN __BIT(0) +#define CONFIG_CSS_SHIFT 4 + +static struct pci_device *nvme_dev; +static struct timer driver_tmr; + +static inline int +is_4k_aligned(void *ptr) +{ + return ((uintptr_t)ptr & (0x1000 - 1)) == 0; +} + +/* + * Poll CSTS.RDY to equal `val' + * + * Returns `val' on success, returns < 0 value + * upon failure. + */ +static int +nvme_poll_ready(struct nvme_bar *bar, uint8_t val) +{ + uint8_t timeout = CAP_TIMEOUT(bar->caps); + uint8_t time_waited = 0; + + do { + if (STATUS_READY(bar->status) == val) { + /* Done waiting */ + break; + } + + /* + * If CSTS.RDY hasn't changed, we can try to wait a + * little longer. + * + * XXX: The spec states that CAP.TO (Timeout) is in 500 + * millisecond units. + */ + if (time_waited < timeout) { + driver_tmr.msleep(500); + ++time_waited; + } else { + return -1; + } + } while (1); + + return val; +} + +/* + * Create an admin queue. + */ +static int +nvme_create_adminq(struct nvme_state *s, struct nvme_queue *queue) +{ + struct nvme_bar *bar = s->bar; + const size_t PAGESZ = vm_get_page_size(); + const uint8_t DBSTRIDE = CAP_STRIDE(bar->caps); + const uint16_t SLOTS = CAP_MQES(bar->caps); + + queue->sq = dynalloc_memalign(sizeof(void *) * SLOTS, 0x1000); + queue->cq = dynalloc_memalign(sizeof(void *) * SLOTS, 0x1000); + + if (queue->sq == NULL) { + return -1; + } + if (queue->cq == NULL) { + dynfree(queue->sq); + return -1; + } + + memset(queue->sq, 0, sizeof(void *) * SLOTS); + memset(queue->cq, 0, sizeof(void *) * SLOTS); + + queue->sq_head = 0; + queue->sq_tail = 0; + queue->size = SLOTS; + queue->sq_db = PHYS_TO_VIRT((uintptr_t)bar + PAGESZ); + queue->cq_db = PHYS_TO_VIRT((uintptr_t)bar + PAGESZ + 1 * (4 << DBSTRIDE)); + queue->cq_phase = 1; + return 0; +} + +/* + * Submit a command + * + * @queue: Target queue. + * @cmd: Command to submit + */ +static void +nvme_submit_cmd(struct nvme_queue *queue, struct nvme_cmd cmd) +{ + /* Submit the command to the queue */ + queue->sq[queue->sq_tail++] = cmd; + if (queue->sq_tail >= queue->size) { + queue->sq_tail = 0; + } + *(queue->sq_db) = queue->sq_tail; +} + +/* + * Submit a command and poll for completion + * + * @queue: Target queue. + * @cmd: Command to submit + */ +static int +nvme_poll_submit_cmd(struct nvme_queue *queue, struct nvme_cmd cmd) +{ + uint16_t status; + size_t spins = 0; + + nvme_submit_cmd(queue, cmd); + + /* + * Wait for the current command to complete by + * polling the phase bit. + */ + while (1) { + status = queue->cq[queue->cq_head].status; + if ((status & 1) == queue->cq_phase) { + /* + * The phase bit matches the phase for the most + * recently submitted command, the command has completed. + */ + break; + } + if ((status & ~1) != 0) { + KDEBUG("NVMe cmd error (bits=0x%x)\n", status >> 1); + break; + } + if (spins > 5) { + /* Attempts exhausted */ + KERR("Hang on phase bit poll, giving up (cmd error)\n"); + break; + } + + /* Not done, give it some more time */ + driver_tmr.msleep(150); + ++spins; + } + + ++queue->cq_head; + if (queue->cq_head >= queue->size) { + queue->cq_head = 0; + queue->cq_phase = !queue->cq_phase; + } + + /* Tell the controller that `head' updated */ + *(queue->cq_db) = queue->cq_head; + return 0; +} + +/* + * Issue anidentify command for the current + * controller. + * + * XXX: `id' must be aligned on a 4k byte boundary to avoid + * crossing a page boundary. This keeps the implementation + * as simple as possible here. + */ +static int +nvme_identify(struct nvme_state *state, struct nvme_id *id) +{ + struct nvme_cmd cmd = {0}; + struct nvme_identify_cmd *identify = &cmd.identify; + + /* Ensure `id' is aligned on a 4k byte boundary */ + if (!is_4k_aligned(id)) { + return -1; + } + + identify->opcode = NVME_OP_IDENTIFY; + identify->nsid = 0; + identify->cns = 1; /* Identify controller */ + identify->prp1 = VIRT_TO_PHYS(id); + identify->prp2 = 0; /* No need, data address is 4k aligned */ + nvme_poll_submit_cmd(&state->adminq, cmd); + return 0; +} + +static int +nvme_disable_controller(struct nvme_state *state) +{ + struct nvme_bar *bar = state->bar; + + if (__TEST(bar->config, CONFIG_EN)) { + bar->config &= ~CONFIG_EN; + } + + if (nvme_poll_ready(bar, 0) < 0) { + KERR("Failed to disable controller\n"); + return -1; + } + + return 0; +} + +/* + * For debugging purposes, logs some information + * found within the controller identify data structure. + */ +static void +nvme_log_ctrl_id(struct nvme_id *id) +{ + char mn[41] = {0}; + char fr[9] = {0}; + + for (size_t i = 0; i < sizeof(id->mn); ++i) { + mn[i] = id->mn[i]; + } + for (size_t i = 0; i < sizeof(id->fr); ++i) { + fr[i] = id->fr[i]; + } + + KDEBUG("NVMe model: %s\n", mn); + KDEBUG("NVMe firmware revision: %s\n", fr); +} + +static int +nvme_enable_controller(struct nvme_state *state) +{ + struct nvme_bar *bar = state->bar; + struct nvme_id *id; + + if (!__TEST(bar->config, CONFIG_EN)) { + bar->config |= CONFIG_EN; + } + + if (nvme_poll_ready(bar, 1) < 0) { + KERR("Failed to enable controller\n"); + } + + id = dynalloc_memalign(sizeof(struct nvme_id), 0x1000); + + if (id == NULL) { + return -1; + } + + nvme_identify(state, id); + nvme_log_ctrl_id(id); + dynfree(id); + return 0; +} + +static int +nvme_init_controller(struct nvme_bar *bar) +{ + struct nvme_state state = { . bar = bar }; + struct nvme_queue *adminq = &state.adminq; + + uint8_t cap_css = CAP_CSS(bar->caps); + uint16_t mqes = CAP_MQES(bar->caps); + uint16_t cmdreg_bits = PCI_BUS_MASTERING | + PCI_MEM_SPACE; + + pci_set_cmdreg(nvme_dev, cmdreg_bits); + nvme_disable_controller(&state); + + nvme_create_adminq(&state, adminq); + + /* Setup admin submission and admin completion queues */ + bar->aqa = (mqes | mqes << 16); + bar->asq = VIRT_TO_PHYS(adminq->sq); + bar->acq = VIRT_TO_PHYS(adminq->cq); + + /* Set up supported command sets */ + if (__TEST(cap_css, __BIT(7))) { + /* Admin command sets only */ + bar->config |= (7UL << CONFIG_CSS_SHIFT); + } else if (__TEST(cap_css, __BIT(6))) { + /* All supported I/O command sets */ + bar->config |= (6UL << CONFIG_CSS_SHIFT); + } + + nvme_enable_controller(&state); + return 0; +} + +static int +nvme_init(void) +{ + struct nvme_bar *bar; + struct pci_lookup nvme_lookup = { + .pci_class = 1, + .pci_subclass = 8 + }; + + if (req_timer(TIMER_GP, &driver_tmr) != 0) { + KERR("Failed to fetch general purpose timer\n"); + return -1; + } + + if (driver_tmr.msleep == NULL) { + KERR("Timer does not have msleep()\n"); + return -1; + } + + nvme_dev = pci_get_device(nvme_lookup, PCI_CLASS | PCI_SUBCLASS); + if (nvme_dev == NULL) { + return -1; + } + + bar = (struct nvme_bar *)(nvme_dev->bar[0] & ~7); + KINFO("NVMe BAR0 @ 0x%p\n", bar); + + if (nvme_init_controller(bar) < 0) { + return -1; + } + + return 0; +} + +DRIVER_EXPORT(nvme_init); diff --git a/sys/include/dev/ic/nvmevar.h b/sys/include/dev/ic/nvmevar.h new file mode 100644 index 0000000..a4a5db6 --- /dev/null +++ b/sys/include/dev/ic/nvmevar.h @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2023-2024 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _IC_NVMEVAR_H_ +#define _IC_NVMEVAR_H_ + +#include + +#define NVME_OP_IDENTIFY 0x06 + +/* I/O commands */ +#define NVME_OP_READ 0x02 + +struct nvme_common_cmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint32_t cdw1[2]; + uint64_t metadata; + uint64_t prp1; + uint64_t prp2; + uint32_t cdw2[6]; +}; + +struct nvme_identify_cmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t unused1; + uint64_t unused2; + uint64_t prp1; + uint64_t prp2; + uint32_t cns; + uint32_t unused3[5]; +}; + +/* Read/write */ +struct nvme_rw_cmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t unused; + uint64_t metadata; + uint64_t prp1; + uint64_t prp2; + uint64_t slba; + uint16_t len; + uint16_t control; + uint32_t dsmgmt; + uint32_t ref; + uint16_t apptag; + uint16_t appmask; +}; + +struct nvme_cmd { + union { + struct nvme_identify_cmd identify; + struct nvme_common_cmd common; + }; +}; + +struct nvme_id { + uint16_t vid; + uint16_t ssvid; + char sn[20]; + char mn[40]; + char fr[8]; + uint8_t rab; + uint8_t ieee[3]; + uint8_t mic; + uint8_t mdts; + uint16_t ctrlid; + uint32_t version; + uint32_t unused1[43]; + uint16_t oacs; + uint8_t acl; + uint8_t aerl; + uint8_t fw; + uint8_t lpa; + uint8_t elpe; + uint8_t npss; + uint8_t avscc; + uint8_t apsta; + uint16_t wctemp; + uint16_t cctemp; + uint16_t unused2[121]; + uint8_t sqes; + uint8_t cqes; + uint16_t unused3; + uint32_t nn; + uint16_t oncs; + uint16_t fuses; + uint8_t fna; + uint8_t vwc; + uint16_t awun; + uint16_t awupf; + uint8_t nvscc; + uint8_t unused4; + uint16_t acwu; + uint16_t unused5; + uint32_t sgls; + uint32_t unused6[1401]; + uint8_t vs[1024]; +}; + +/* Command completion queue entry */ +struct nvme_cq_entry { + uint32_t res; + uint32_t unused; + uint16_t sqhead; + uint16_t sqid; + uint16_t cid; + uint16_t status; +}; + +/* NVMe controller */ +struct __packed nvme_bar { + uint64_t caps; + uint32_t version; + uint32_t intms; /* Interrupt mask set */ + uint32_t intmc; /* Interrupt mask clear */ + uint32_t config; + uint32_t unused1; + uint32_t status; + uint32_t unused2; + uint32_t aqa; /* Admin queue attributes */ + uint64_t asq; /* Admin submission queue */ + uint64_t acq; /* Admin completion queue */ +}; + +struct nvme_lbaf { + uint16_t ms; /* Number of metadata bytes per LBA */ + uint8_t ds; /* Data size */ + uint8_t rp; +}; + +/* Identify namespace data */ +struct nvme_id_ns { + uint64_t size; + uint64_t capabilities; + uint64_t nuse; + uint8_t features; + uint8_t nlbaf; + uint8_t flbas; + uint8_t mc; + uint8_t dpc; + uint8_t dps; + uint8_t nmic; + uint8_t rescap; + uint8_t fpi; + uint8_t unused1; + uint16_t nawun; + uint16_t nawupf; + uint16_t nacwu; + uint16_t nabsn; + uint16_t nabo; + uint16_t nabspf; + uint16_t unused2; + uint64_t nvmcap[2]; + uint64_t unusued3[5]; + uint8_t nguid[16]; + uint8_t eui64[8]; + struct nvme_lbaf lbaf[16]; + uint64_t unused3[24]; + uint8_t vs[3712]; +}; + +struct nvme_queue { + struct nvme_cmd *sq; /* Submission queue */ + struct nvme_cq_entry *cq; /* Completion queue */ + uint16_t sq_head; /* Submission queue head */ + uint16_t sq_tail; /* Submission queue tail */ + uint16_t cq_head; /* Completion queue head */ + uint8_t cq_phase : 1; /* Completion queue phase bit */ + uint16_t size; /* Size in elements */ + volatile uint32_t *sq_db; /* Submission doorbell */ + volatile uint32_t *cq_db; /* Completion doorbell */ +}; + +struct nvme_state { + struct nvme_queue adminq; + struct nvme_bar *bar; +}; + +/* NVMe namespace */ +struct nvme_ns { + size_t nsid; /* Namespace ID */ + size_t lba_bsize; /* LBA block size */ + struct nvme_state *cntl; /* NVMe controller */ +}; + +#endif -- cgit v1.2.3