diff options
-rw-r--r-- | sys/dev/ic/nvme.c | 389 | ||||
-rw-r--r-- | sys/include/dev/ic/nvmeregs.h | 74 | ||||
-rw-r--r-- | sys/include/dev/ic/nvmevar.h | 129 |
3 files changed, 592 insertions, 0 deletions
diff --git a/sys/dev/ic/nvme.c b/sys/dev/ic/nvme.c new file mode 100644 index 0000000..5f92d0e --- /dev/null +++ b/sys/dev/ic/nvme.c @@ -0,0 +1,389 @@ +/* + * Copyright (c) 2023-2024 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/param.h> +#include <sys/driver.h> +#include <sys/errno.h> +#include <sys/sched.h> +#include <sys/syslog.h> +#include <sys/mmio.h> +#include <dev/ic/nvmeregs.h> +#include <dev/ic/nvmevar.h> +#include <dev/pci/pci.h> +#include <dev/pci/pciregs.h> +#include <dev/timer.h> +#include <vm/dynalloc.h> +#include <vm/vm.h> +#include <string.h> + +#define pr_trace(fmt, ...) kprintf("nvme: " fmt, ##__VA_ARGS__) +#define pr_error(...) pr_trace(__VA_ARGS__) + +static struct pci_device *nvme_dev; +static struct timer tmr; + +static inline int +is_4k_aligned(void *ptr) +{ + return ((uintptr_t)ptr & (0x1000 - 1)) == 0; +} + +/* + * Poll register to have 'bits' set/unset. + * + * @reg: Register to poll. + * @bits: Bits to be checked. + * @pollset: True to poll as set. + */ +static int +nvme_poll_reg(struct nvme_bar *bar, volatile uint32_t *reg, uint32_t bits, + bool pollset) +{ + size_t usec_start, usec; + size_t elapsed_msec; + uint32_t val, caps; + bool tmp; + + usec_start = tmr.get_time_usec(); + caps = mmio_read32(&bar->caps); + + for (;;) { + val = mmio_read32(reg); + tmp = (pollset) ? ISSET(val, bits) : !ISSET(val, bits); + + usec = tmr.get_time_usec(); + elapsed_msec = (usec - usec_start) / 1000; + + /* If tmp is set, the register updated in time */ + if (tmp) { + break; + } + + /* Exit with an error if we timeout */ + if (elapsed_msec > CAP_TIMEOUT(caps)) { + return -ETIME; + } + } + + return val; +} + +static int +nvme_create_queue(struct nvme_bar *bar, struct nvme_queue *queue, size_t id) +{ + uint8_t dbstride; + uint16_t slots; + uint64_t caps; + uintptr_t sq_db, cq_db; + + caps = mmio_read32(&bar->caps); + dbstride = CAP_STRIDE(caps); + slots = CAP_MQES(caps); + + queue->sq = dynalloc_memalign(sizeof(void *) * slots, 0x1000); + queue->cq = dynalloc_memalign(sizeof(void *) * slots, 0x1000); + + if (queue->sq == NULL) { + return -ENOMEM; + } + + if (queue->cq == NULL) { + dynfree(queue->sq); + return -ENOMEM; + } + + memset(queue->sq, 0, sizeof(void *) * slots); + memset(queue->cq, 0, sizeof(void *) * slots); + + sq_db = (uintptr_t)bar + DEFAULT_PAGESIZE + (2 * id * (4 << dbstride)); + cq_db = (uintptr_t)bar + DEFAULT_PAGESIZE + ((2 * id + 1) * (4 << dbstride)); + + queue->sq_head = 0; + queue->sq_tail = 0; + + queue->size = slots; + queue->cq_phase = 1; + queue->sq_db = (void *)sq_db; + queue->cq_db = (void *)cq_db; + return 0; +} + +/* + * Stop and reset the NVMe controller. + */ +static int +nvme_stop_ctrl(struct nvme_bar *bar) +{ + uint32_t config, status; + + /* Do not reset if CSTS.RDY is 0 */ + status = mmio_read32(&bar->status); + if (!ISSET(status, STATUS_RDY)) { + return 0; + } + + /* Clear the enable bit to begin the reset */ + config = mmio_read32(&bar->config); + config &= ~CONFIG_EN; + mmio_write32(&bar->config, config); + + if (nvme_poll_reg(bar, &bar->status, STATUS_RDY, false) < 0) { + pr_error("Controller reset timeout\n"); + return -ETIME; + } + + return 0; +} + +/* + * Start up the controller. + */ +static int +nvme_start_ctrl(struct nvme_bar *bar) +{ + uint32_t config, status; + + /* Cannot start if already started */ + status = mmio_read32(&bar->status); + if (ISSET(status, STATUS_RDY)) { + return 0; + } + + /* Enable the controller */ + config = mmio_read32(&bar->config); + config |= CONFIG_EN; + mmio_write32(&bar->config, config); + + if (nvme_poll_reg(bar, &bar->status, STATUS_RDY, true) < 0) { + pr_error("Controller startup timeout\n"); + return -ETIME; + } + + return 0; +} + +/* + * Submit a command. + */ +static void +nvme_submit_cmd(struct nvme_queue *q, struct nvme_cmd cmd) +{ + q->sq[q->sq_tail++] = cmd; + if (q->sq_tail >= q->size) { + q->sq_tail = 0; + } + + mmio_write32(q->sq_db, q->sq_tail); +} + +/* + * Submit a command and poll for completion. + */ +static int +nvme_poll_submit_cmd(struct nvme_queue *q, struct nvme_cmd cmd) +{ + uint16_t status; + uint8_t spins = 0; + + nvme_submit_cmd(q, cmd); + + for (;;) { + /* + * If the phase bit matches the most recently submitted + * command then the command has completed + */ + status = q->cq[q->cq_head].status; + if ((status & 1) == q->cq_phase) { + break; + } + + /* Are any error bits set? */ + if ((status & ~1) != 0) { + pr_trace("Command error (bits=0x%x)\n", status >> 1); + return -EIO; + } + + /* Check for timeout */ + if (spins > 5) { + pr_error("Hang while polling phase bit, giving up\n"); + return -ETIME; + } + + tmr.msleep(150); + ++spins; + } + + return 0; +} + +static int +nvme_identify(struct nvme_ctrl *ctrl, struct nvme_id *id) +{ + struct nvme_cmd cmd = {0}; + struct nvme_identify_cmd *idcmd = &cmd.identify; + + if (!is_4k_aligned(id)) { + return -1; + } + + idcmd->opcode = NVME_OP_IDENTIFY; + idcmd->nsid = 0; + idcmd->cns = 1; /* Identify controller */ + idcmd->prp1 = VIRT_TO_PHYS(id); + idcmd->prp2 = 0; + return nvme_poll_submit_cmd(&ctrl->adminq, cmd); +} + +/* + * For debugging purposes, logs some information + * found within the controller identify data structure. + */ +static void +nvme_log_ctrl_id(struct nvme_id *id) +{ + char mn[41] = {0}; + char sn[21] = {0}; + char fr[9] = {0}; + + for (size_t i = 0; i < sizeof(id->mn); ++i) { + mn[i] = id->mn[i]; + } + + for (size_t i = 0; i < sizeof(id->fr); ++i) { + fr[i] = id->fr[i]; + } + + for (size_t i = 0; i < sizeof(id->sn); ++i) { + sn[i] = id->sn[i]; + } + + pr_trace("Model number: %s\n", mn); + pr_trace("Serial number: %s\n", sn); + pr_trace("Firmware revision: %s\n", fr); +} + +/* + * Init PCI related controller bits + */ +static void +nvme_init_pci(void) +{ + uint32_t tmp; + + /* Enable bus mastering and MMIO */ + tmp = pci_readl(nvme_dev, PCIREG_CMDSTATUS); + tmp |= (PCI_BUS_MASTERING | PCI_MEM_SPACE); + pci_writel(nvme_dev, PCIREG_CMDSTATUS, tmp); +} + +static int +nvme_init_ctrl(struct nvme_bar *bar) +{ + int error; + uint64_t caps; + uint16_t mqes; + struct nvme_ctrl ctrl = {0}; + struct nvme_queue *adminq; + struct nvme_id *id; + + /* Ensure the controller is stopped */ + if ((error = nvme_stop_ctrl(bar)) != 0) { + return error; + } + + adminq = &ctrl.adminq; + caps = mmio_read64(&bar->caps); + mqes = CAP_MQES(caps); + + /* Setup admin queues */ + nvme_create_queue(bar, adminq, 0); + mmio_write32(&bar->aqa, (mqes | mqes << 16)); + mmio_write64(&bar->asq, VIRT_TO_PHYS(adminq->sq)); + mmio_write64(&bar->acq, VIRT_TO_PHYS(adminq->cq)); + + /* Now bring the controller back up */ + if ((error = nvme_start_ctrl(bar)) != 0) { + return error; + } + + id = dynalloc_memalign(sizeof(*id), 0x1000); + if (id == NULL) { + return -ENOMEM; + } + + nvme_identify(&ctrl, id); + nvme_log_ctrl_id(id); + dynfree(id); + return 0; +} + +static int +nvme_init(void) +{ + struct pci_lookup lookup; + struct nvme_bar *bar; + int error; + + lookup.pci_class = 1; + lookup.pci_subclass = 8; + nvme_dev = pci_get_device(lookup, PCI_CLASS | PCI_SUBCLASS); + + if (nvme_dev == NULL) { + return -ENODEV; + } + + /* Try to request a general purpose timer */ + if (req_timer(TIMER_GP, &tmr) != TMRR_SUCCESS) { + pr_error("Failed to fetch general purpose timer\n"); + return -ENODEV; + } + + /* Ensure it has get_time_usec() */ + if (tmr.get_time_usec == NULL) { + pr_error("General purpose timer has no get_time_usec()\n"); + return -ENODEV; + } + + /* We also need msleep() */ + if (tmr.msleep == NULL) { + pr_error("General purpose timer has no msleep()\n"); + return -ENODEV; + } + + nvme_init_pci(); + + if ((error = pci_map_bar(nvme_dev, 0, (void *)&bar)) != 0) { + return error; + } + + return nvme_init_ctrl(bar); +} + +DRIVER_EXPORT(nvme_init); diff --git a/sys/include/dev/ic/nvmeregs.h b/sys/include/dev/ic/nvmeregs.h new file mode 100644 index 0000000..6b18e4d --- /dev/null +++ b/sys/include/dev/ic/nvmeregs.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2023-2024 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _IC_NVMEREGS_H_ +#define _IC_NVMEREGS_H_ + +#include <sys/types.h> +#include <sys/param.h> + +/* Controller Capabilities */ +#define CAP_MPSMIN(caps) ((caps >> 48) & 0xF) +#define CAP_MPSMAX(caps) ((caps >> 52) & 0xF) +#define CAP_TIMEOUT(caps) ((caps >> 24) & 0xFF) +#define CAP_STRIDE(caps) ((caps >> 32) & 0xF) +#define CAP_MQES(caps) (caps & 0xFFFF) +#define CAP_CSS(caps) (caps & 0xFF) + +/* Controller Configuration */ +#define CONFIG_EN BIT(0) +#define CONFIG_CSS_SHIFT 4 +#define CONFIG_IOSQES_SHIFT 16 +#define CONFIG_IOCQES_SHIFT 20 + +/* Controller status */ +#define STATUS_RDY BIT(0) +#define STATUS_CFS BIT(1) + +/* Command sets supported */ +#define CSS_NVM BIT(0) +#define CSS_IO BIT(6) +#define CSS_NO_IO BIT(7) + +/* NVMe controller */ +struct nvme_bar { + volatile uint64_t caps; + volatile uint32_t version; + volatile uint32_t intms; /* Interrupt mask set */ + volatile uint32_t intmc; /* Interrupt mask clear */ + volatile uint32_t config; + volatile uint32_t unused1; + volatile uint32_t status; + volatile uint32_t unused2; + volatile uint32_t aqa; /* Admin queue attributes */ + volatile uint64_t asq; /* Admin submission queue */ + volatile uint64_t acq; /* Admin completion queue */ +}; + +#endif /* !_IC_NVMEREGS_H_ */ diff --git a/sys/include/dev/ic/nvmevar.h b/sys/include/dev/ic/nvmevar.h new file mode 100644 index 0000000..aa68331 --- /dev/null +++ b/sys/include/dev/ic/nvmevar.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2023-2024 Ian Marco Moffett and the Osmora Team. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Hyra nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _IC_NVMEVAR_H_ +#define _IC_NVMEVAR_H_ + +#include <sys/types.h> + +/* Admin commands */ +#define NVME_OP_CREATE_IOSQ 0x01 +#define NVME_OP_CREATE_IOCQ 0x05 +#define NVME_OP_IDENTIFY 0x06 + +struct nvme_identify_cmd { + uint8_t opcode; + uint8_t flags; + uint16_t cid; + uint32_t nsid; + uint64_t unused1; + uint64_t unused2; + uint64_t prp1; + uint64_t prp2; + uint32_t cns; + uint32_t unused3[5]; +}; + +/* Command completion queue entry */ +struct nvme_cq_entry { + uint32_t res; + uint32_t unused; + uint16_t sqhead; + uint16_t sqid; + uint16_t cid; + uint16_t status; +}; + +struct nvme_cmd { + union { + struct nvme_identify_cmd identify; + }; +}; + +struct nvme_queue { + struct nvme_cmd *sq; /* Submission queue */ + struct nvme_cq_entry *cq; /* Completion queue */ + uint16_t sq_head; /* Submission queue head */ + uint16_t sq_tail; /* Submission queue tail */ + uint16_t cq_head; /* Completion queue head */ + uint8_t cq_phase : 1; /* Completion queue phase bit */ + uint16_t size; /* Size in elements */ + volatile uint32_t *sq_db; /* Submission doorbell */ + volatile uint32_t *cq_db; /* Completion doorbell */ +}; + +struct nvme_id { + uint16_t vid; + uint16_t ssvid; + char sn[20]; + char mn[40]; + char fr[8]; + uint8_t rab; + uint8_t ieee[3]; + uint8_t mic; + uint8_t mdts; + uint16_t ctrlid; + uint32_t version; + uint32_t unused1[43]; + uint16_t oacs; + uint8_t acl; + uint8_t aerl; + uint8_t fw; + uint8_t lpa; + uint8_t elpe; + uint8_t npss; + uint8_t avscc; + uint8_t apsta; + uint16_t wctemp; + uint16_t cctemp; + uint16_t unused2[121]; + uint8_t sqes; + uint8_t cqes; + uint16_t unused3; + uint32_t nn; + uint16_t oncs; + uint16_t fuses; + uint8_t fna; + uint8_t vwc; + uint16_t awun; + uint16_t awupf; + uint8_t nvscc; + uint8_t unused4; + uint16_t acwu; + uint16_t unused5; + uint32_t sgls; + uint32_t unused6[1401]; + uint8_t vs[1024]; +}; + +struct nvme_ctrl { + struct nvme_queue adminq; +}; + +#endif /* !_IC_NVMEVAR_H_ */ |