diff options
Diffstat (limited to 'sys/dev/ic')
-rw-r--r-- | sys/dev/ic/ahci.c | 771 | ||||
-rw-r--r-- | sys/dev/ic/nvme.c | 614 |
2 files changed, 0 insertions, 1385 deletions
diff --git a/sys/dev/ic/ahci.c b/sys/dev/ic/ahci.c deleted file mode 100644 index 257fd00..0000000 --- a/sys/dev/ic/ahci.c +++ /dev/null @@ -1,771 +0,0 @@ -/* - * Copyright (c) 2023-2024 Ian Marco Moffett and the Osmora Team. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of Hyra nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <sys/driver.h> -#include <sys/timer.h> -#include <sys/types.h> -#include <sys/cdefs.h> -#include <sys/syslog.h> -#include <sys/mmio.h> -#include <sys/errno.h> -#include <sys/device.h> -#include <fs/devfs.h> -#include <dev/pci/pci.h> -#include <dev/ic/ahciregs.h> -#include <dev/ic/ahcivar.h> -#include <machine/bus.h> -#include <vm/vm.h> -#include <string.h> - -__KERNEL_META("$Hyra$: ahci.c, Ian Marco Moffett, " - "AHCI driver"); - -#define pr_trace(fmt, ...) kprintf("ahci: " fmt, ##__VA_ARGS__) -#define pr_error(...) pr_trace(__VA_ARGS__) - -static TAILQ_HEAD(, ahci_device) sata_devs; - -static struct pci_device *dev; -static struct timer driver_tmr; -static struct mutex io_lock; -static size_t dev_count = 0; - -static bool -is_word_aligned(void *ptr) -{ - return (((uintptr_t)ptr) & 1) == 0; -} - -/* - * Fetch a SATA device with a SATA device - * minor. - * - * @dev_minor: SATA device minor. - */ -static struct ahci_device * -ahci_get_sata(dev_t dev_minor) -{ - struct ahci_device *dev; - - TAILQ_FOREACH(dev, &sata_devs, link) { - if (dev->minor == dev_minor) { - return dev; - } - } - - return NULL; -} - -/* - * Poll register to have `bits' set/unset. - * - * @reg: Register to poll. - * @bits: Bits expected to be set/unset. - * @pollset: True to poll as set. - */ -static int -ahci_poll_reg32(volatile uint32_t *reg, uint32_t bits, bool pollset) -{ - uint32_t time_waited = 0; - uint32_t val; - bool tmp; - - for (;;) { - val = mmio_read32(reg); - tmp = (pollset) ? __TEST(val, bits) : !__TEST(val, bits); - if (tmp) - break; - if (time_waited >= AHCI_TIMEOUT) - /* Timeout */ - return -1; - - driver_tmr.msleep(10); - time_waited += 10; - } - - return 0; -} - -/* - * Put the HBA in AHCI mode. - */ -static inline void -ahci_set_ahci(struct ahci_hba *hba) -{ - struct hba_memspace *abar = hba->abar; - uint32_t ghc; - - /* Enable AHCI mode */ - ghc = mmio_read32(&abar->ghc); - ghc |= AHCI_GHC_AE; - mmio_write32(&abar->ghc, ghc); -} - -/* - * Reset the HBA with GHC.HR - * - * XXX: The spec states that all port registers *except* - * PxFB, PxFBU, PxCLB, PxCLBU are reset. - */ -static int -ahci_hba_reset(struct ahci_hba *hba) -{ - struct hba_memspace *abar = hba->abar; - uint32_t ghc; - uint8_t attempts = 0; - int status; - - ghc = mmio_read32(&abar->ghc); - ghc |= AHCI_GHC_HR; - mmio_write32(&abar->ghc, ghc); - - /* - * Poll the GHC.HR bit. The HBA is supposed to flip - * it back to zero once the reset is complete. If - * the HBA does not do this, something is screwed - * up. - * - * XXX: We do this twice in case of slow hardware... - */ - while ((attempts++) < 2) { - status = ahci_poll_reg32(&abar->ghc, AHCI_GHC_HR, false); - if (status == 0) { - break; - } - } - - /* We hope this doesn't happen */ - if (status != 0) { - pr_error("HBA reset failure: GHC.HR stuck (HBA hung)\n"); - return status; - } - - ahci_set_ahci(hba); - return 0; -} - -/* - * Stop port and put it in an idle state. - */ -static int -ahci_stop_port(struct hba_port *port) -{ - const uint32_t RUN_MASK = (AHCI_PXCMD_FR | AHCI_PXCMD_CR); - uint32_t cmd = mmio_read32(&port->cmd); - - /* Check if it is already stopped */ - if (!__TEST(cmd, RUN_MASK)) - return 0; - - /* - * Stop the FIS receive and disable proessing - * of the command list. - */ - cmd &= ~(AHCI_PXCMD_ST | AHCI_PXCMD_FRE); - mmio_write32(&port->cmd, cmd); - return ahci_poll_reg32(&port->cmd, RUN_MASK, false); -} - -/* - * Put a port in a running state. - */ -static int -ahci_start_port(struct hba_port *port) -{ - const uint32_t RUN_MASK = (AHCI_PXCMD_FR | AHCI_PXCMD_CR); - uint32_t cmd = mmio_read32(&port->cmd); - - /* Check if it is already running */ - if (__TEST(cmd, RUN_MASK)) - return 0; - - /* Start everything up */ - cmd |= (AHCI_PXCMD_ST | AHCI_PXCMD_FRE); - mmio_write32(&port->cmd, cmd); - return ahci_poll_reg32(&port->cmd, RUN_MASK, true); -} - -/* - * Check if a port is active. - * - * @port: Port to check. - */ -static bool -ahci_port_active(struct hba_port *port) -{ - uint32_t ssts; - uint8_t det, ipm; - - ssts = mmio_read32(&port->ssts); - det = AHCI_PXSSTS_DET(ssts); - ipm = AHCI_PXSSTS_IPM(ssts); - return (det == AHCI_DET_COMM && ipm == AHCI_IPM_ACTIVE); -} - -/* - * Dump identify structure for debugging - * purposes. - */ -static void -ahci_dump_identity(struct ata_identity *identity) -{ - char serial_number[20]; - char model_number[40]; - char tmp; - - memcpy(serial_number, identity->serial_number, sizeof(serial_number)); - memcpy(model_number, identity->model_number, sizeof(model_number)); - - serial_number[sizeof(serial_number) - 1] = '\0'; - model_number[sizeof(model_number) - 1] = '\0'; - - /* Fixup endianess for serial number */ - for (size_t i = 0; i < sizeof(serial_number); i += 2) { - tmp = serial_number[i]; - serial_number[i] = serial_number[i + 1]; - serial_number[i + 1] = tmp; - } - - /* Fixup endianess for model number */ - for (size_t i = 0; i < sizeof(model_number); i += 2) { - tmp = model_number[i]; - model_number[i] = model_number[i + 1]; - model_number[i + 1] = tmp; - } - - pr_trace("DRIVE MODEL NUMBER: %s\n", model_number); - pr_trace("DRIVE SERIAL NUMBER: %s\n", serial_number); -} - -/* - * Allocate a command slot. - */ -static int -ahci_alloc_cmdslot(struct ahci_hba *hba, struct hba_port *port) -{ - uint32_t slotlist = (port->ci | port->sact); - - for (uint16_t i = 0; i < hba->ncmdslots; ++i) { - if (!__TEST(slotlist, i)) - return i; - } - - return -1; -} - -/* - * Submit a command to a device - * - * @port: Port of device to submit command to - * @cmdslot: Command slot. - */ -static int -ahci_submit_cmd(struct ahci_hba *hba, struct hba_port *port, uint8_t cmdslot) -{ - const uint32_t BUSY_BITS = (AHCI_PXTFD_BSY | AHCI_PXTFD_DRQ); - const uint8_t MAX_ATTEMPTS = 3; - uint8_t attempts = 0; - int status = 0; - - /* - * Ensure the port isn't busy before we try to send - * any commands. Spin on BSY and DRQ bits until they - * become unset or we timeout. - */ - if (ahci_poll_reg32(&port->tfd, BUSY_BITS, false) < 0) { - pr_error("Command failed: Port is busy! (slot=%d)\n", cmdslot); - return -EBUSY; - } - - /* Activate the command slot */ - mutex_acquire(&io_lock); - mmio_write32(&port->ci, __BIT(cmdslot)); - - /* - * Wait for completion. since this might take a bit, we - * give it a few attempts in case it doesn't finish - * right away. - */ - while ((attempts++) < MAX_ATTEMPTS) { - status = ahci_poll_reg32(&port->ci, __BIT(cmdslot), false); - if (status == 0) { - break; - } - } - - /* Did we timeout? */ - if (status != 0) { - pr_error("IDENTIFY timeout: slot %d still set!\n", cmdslot); - } - - mutex_release(&io_lock); - return status; -} - -static int -ahci_sata_rw(struct ahci_hba *hba, struct hba_port *port, struct sio_txn *sio, - bool write) -{ - paddr_t buf_phys; - struct ahci_cmd_hdr *cmdhdr; - struct ahci_cmdtab *cmdtbl; - struct ahci_fis_h2d *fis; - int cmdslot, status; - - if (sio->buf == NULL || !is_word_aligned(sio->buf)) - return -EINVAL; - if (sio->len == 0) - return -EINVAL; - - buf_phys = VIRT_TO_PHYS(sio->buf); - cmdslot = ahci_alloc_cmdslot(hba, port); - - /* Setup command header */ - cmdhdr = PHYS_TO_VIRT(port->clb + cmdslot * sizeof(struct ahci_cmd_hdr)); - cmdhdr->w = 0; - cmdhdr->cfl = sizeof(struct ahci_fis_h2d) / 4; - cmdhdr->prdtl = 1; - - /* Setup physical region descriptor */ - cmdtbl = PHYS_TO_VIRT(cmdhdr->ctba); - cmdtbl->prdt[0].dba = buf_phys; - cmdtbl->prdt[0].dbc = (sio->len << 9) - 1; - cmdtbl->prdt[0].i = 0; - - /* Setup command FIS */ - fis = (void *)&cmdtbl->cfis; - fis->type = FIS_TYPE_H2D; - fis->command = write ? ATA_CMD_WRITE_DMA : ATA_CMD_READ_DMA; - fis->c = 1; - fis->device = (1 << 6); - - /* Setup LBA */ - fis->lba0 = sio->offset & 0xFF; - fis->lba1 = (sio->offset >> 8) & 0xFF; - fis->lba2 = (sio->offset >> 16) & 0xFF; - fis->lba3 = (sio->offset >> 24) & 0xFF; - fis->lba4 = (sio->offset >> 32) & 0xFF; - fis->lba5 = (sio->offset >> 40) & 0xFF; - - /* Setup count */ - fis->countl = sio->len & 0xFF; - fis->counth = (sio->len >> 8) & 0xFF; - - if ((status = ahci_submit_cmd(hba, port, cmdslot)) != 0) { - return status; - } - - return 0; -} - -/* - * Send the IDENTIFY command to a device and - * log info for debugging purposes. - */ -static int -ahci_identify(struct ahci_hba *hba, struct hba_port *port) -{ - paddr_t buf_phys; - struct ahci_cmd_hdr *cmdhdr; - struct ahci_cmdtab *cmdtbl; - struct ahci_fis_h2d *fis; - int cmdslot; - void *buf; - int status = 0; - - cmdslot = ahci_alloc_cmdslot(hba, port); - buf_phys = vm_alloc_pageframe(1); - buf = PHYS_TO_VIRT(buf_phys); - - if (buf_phys == 0) { - status = -ENOMEM; - goto done; - } - - if (cmdslot < 0) { - status = cmdslot; - goto done; - } - - memset(buf, 0, vm_get_page_size()); - cmdhdr = PHYS_TO_VIRT(port->clb + cmdslot * sizeof(struct ahci_cmd_hdr)); - cmdhdr->w = 0; - cmdhdr->cfl = sizeof(struct ahci_fis_h2d) / 4; - cmdhdr->prdtl = 1; - - cmdtbl = PHYS_TO_VIRT(cmdhdr->ctba); - cmdtbl->prdt[0].dba = VIRT_TO_PHYS(buf); - cmdtbl->prdt[0].dbc = 511; - cmdtbl->prdt[0].i = 0; - - fis = (void *)&cmdtbl->cfis; - fis->command = ATA_CMD_IDENTIFY; - fis->c = 1; - fis->type = FIS_TYPE_H2D; - - if ((status = ahci_submit_cmd(hba, port, cmdslot)) != 0) { - goto done; - } - - ahci_dump_identity(buf); -done: - vm_free_pageframe(VIRT_TO_PHYS(buf), 1); - return status; -} - -/* - * Device interface read/write helper - */ -static int -sata_dev_rw(struct device *dev, struct sio_txn *sio, bool write) -{ - struct ahci_device *sata; - - if (sio == NULL) - return -1; - if (sio->buf == NULL) - return -1; - - sata = ahci_get_sata(dev->minor); - - if (sata == NULL) - return -1; - - return ahci_sata_rw(sata->hba, sata->port, sio, write); -} - -/* - * Device interface read - */ -static int -sata_dev_read(struct device *dev, struct sio_txn *sio) -{ - return sata_dev_rw(dev, sio, false); -} - -/* - * Device interface write - */ -static int -sata_dev_write(struct device *dev, struct sio_txn *sio) -{ - return sata_dev_rw(dev, sio, true); -} - -/* - * Device interface open - */ -static int -sata_dev_open(struct device *dev) -{ - return 0; -} - -/* - * Device interface close - */ -static int -sata_dev_close(struct device *dev) -{ - return 0; -} - -/* - * Register a SATA device to the rest of the system - * and expose to userland as a device file. - */ -static int -ahci_sata_register(struct ahci_hba *hba, struct hba_port *port) -{ - char devname[128]; - struct device *dev = NULL; - struct ahci_device *sata = NULL; - dev_t dev_id; - dev_t major; - - sata = dynalloc(sizeof(struct ahci_device)); - if (sata == NULL) { - return -ENOMEM; - } - - dev_id = ++dev_count; - major = device_alloc_major(); - - dev = device_alloc(); - dev->open = sata_dev_open; - dev->close = sata_dev_close; - dev->read = sata_dev_read; - dev->write = sata_dev_write; - dev->blocksize = 512; - device_create(dev, dev_id, major); - - sata->port = port; - sata->hba = hba; - sata->minor = dev->minor; - - snprintf(devname, sizeof(devname), "sd%d", dev_id); - devfs_add_dev(devname, dev); - TAILQ_INSERT_TAIL(&sata_devs, sata, link); - return 0; -} - -/* - * Init a single port. - * - * @port: Port to init. - */ -static int -ahci_init_port(struct ahci_hba *hba, struct hba_port *port, size_t portno) -{ - paddr_t tmp; - struct ahci_cmd_hdr *cmdlist; - void *fis; - size_t cmdlist_size, pagesize; - uint32_t sig; - uint8_t ncmdslots; - int status = 0; - - sig = mmio_read32(&port->sig); - status = ahci_stop_port(port); - if (status != 0) { - pr_trace("Failed to stop port %d\n", portno); - return status; - } - - /* Try to report device type based on signature */ - switch (sig) { - case AHCI_SIG_PM: - pr_trace("Port %d has port multiplier signature\n", portno); - return 0; /* TODO */ - case AHCI_SIG_ATA: - pr_trace("Port %d has ATA signature (SATA drive)\n", portno); - break; - default: - return 0; /* TODO */ - } - - ncmdslots = hba->ncmdslots; - pagesize = vm_get_page_size(); - - /* Allocate our command list */ - cmdlist_size = __ALIGN_UP(ncmdslots * AHCI_CMDENTRY_SIZE, pagesize); - tmp = vm_alloc_pageframe(cmdlist_size / pagesize); - cmdlist = PHYS_TO_VIRT(tmp); - if (tmp == 0) { - pr_trace("Failed to allocate cmdlist\n"); - status = -ENOMEM; - goto done; - } - - tmp = vm_alloc_pageframe(1); - fis = PHYS_TO_VIRT(tmp); - if (tmp == 0) { - pr_trace("Failed to allocate FIS\n"); - status = -ENOMEM; - goto done; - } - - memset(cmdlist, 0, cmdlist_size); - memset(fis, 0, AHCI_FIS_SIZE); - hba->cmdlist = cmdlist; - - /* Set the registers */ - port->clb = VIRT_TO_PHYS(cmdlist); - port->fb = VIRT_TO_PHYS(fis); - - for (int i = 0; i < ncmdslots; ++i) { - cmdlist[i].prdtl = 1; - cmdlist[i].ctba = vm_alloc_pageframe(1); - } - - /* Now try to start up the port */ - if ((status = ahci_start_port(port)) != 0) { - pr_trace("Failed to start port %d\n", portno); - goto done; - } - - ahci_identify(hba, port); - ahci_sata_register(hba, port); -done: - if (status != 0 && cmdlist != NULL) - vm_free_pageframe(port->clb, cmdlist_size / pagesize); - if (status != 0 && fis != NULL) - vm_free_pageframe(port->fb, 1); - - return status; -} - -/* - * Hard reset port and reinitialize - * link. - */ -static int -ahci_reset_port(struct hba_port *port) -{ - uint32_t sctl, ssts; - - /* - * Some odd behaviour may occur if a COMRESET is sent - * to the port while it is in an idle state... - * A workaround to this is to bring the port up - * then immediately transmit the COMRESET to the device. - */ - ahci_start_port(port); - sctl = mmio_read32(&port->sctl); - - /* Transmit COMRESET for ~2ms */ - sctl = (sctl & ~0xF) | AHCI_DET_COMRESET; - mmio_write32(&port->sctl, sctl); - driver_tmr.msleep(2); - - /* Stop transmission of COMRESET */ - sctl &= ~AHCI_DET_COMRESET; - mmio_write32(&port->sctl, sctl); - - /* - * Give around ~150ms for the link to become - * reestablished. Then make sure that it is - * actually established by checking PxSSTS.DET - */ - driver_tmr.msleep(150); - ssts = mmio_read32(&port->ssts); - if (AHCI_PXSSTS_DET(ssts) != AHCI_DET_COMM) { - return -1; - } - - return 0; -} - -/* - * Sets up devices connected to the physical ports - * on the HBA. - * - * XXX: Since this is called after ahci_init_hba() which also - * resets the HBA, we'll need to reestablish the link - * between the devices and the HBA. - */ -static int -ahci_init_ports(struct ahci_hba *hba) -{ - struct hba_memspace *abar = hba->abar; - uint32_t ports_impl; - struct hba_port *port; - - pr_trace("HBA supports max %d port(s)\n", hba->nports); - ports_impl = mmio_read32(&abar->pi); - - /* Initialize active ports */ - for (size_t i = 0; i < sizeof(abar->pi) * 8; ++i) { - if (!__TEST(ports_impl, __BIT(i))) { - continue; - } - - port = &abar->ports[i]; - if (ahci_reset_port(port) != 0) { - continue; - } - - if (ahci_port_active(port)) { - ahci_init_port(hba, port, i); - } - } - - return 0; -} - -/* - * Sets up the HBA - */ -static int -ahci_init_hba(struct ahci_hba *hba) -{ - struct hba_memspace *abar = hba->abar; - uint32_t cap; - - /* Reset the HBA to ensure it is a known state */ - ahci_hba_reset(hba); - - /* Setup HBA structure and save some state */ - cap = mmio_read32(&abar->cap); - hba->ncmdslots = AHCI_CAP_NCS(cap) + 1; - hba->nports = AHCI_CAP_NP(cap) + 1; - - ahci_init_ports(hba); - return 0; -} - -static int -ahci_init(void) -{ - int status; - uint16_t cmdreg_bits; - uint32_t bar_size; - struct ahci_hba hba = {0}; - struct pci_lookup ahci_lookup = { - .pci_class = 0x01, - .pci_subclass = 0x06 - }; - - dev = pci_get_device(ahci_lookup, PCI_CLASS | PCI_SUBCLASS); - - if (dev == NULL) { - return -1; - } - - cmdreg_bits = PCI_BUS_MASTERING | PCI_MEM_SPACE; - pci_set_cmdreg(dev, cmdreg_bits); - - if (req_timer(TIMER_GP, &driver_tmr) != TMRR_SUCCESS) { - pr_error("Failed to fetch general purpose timer\n"); - return -1; - } - - if (driver_tmr.msleep == NULL) { - pr_error("Timer does not have msleep()\n"); - return -1; - } - - if ((bar_size = pci_bar_size(dev, 5)) == 0) { - pr_error("Failed to fetch BAR size\n"); - return -1; - } - - status = bus_map(dev->bar[5], bar_size, 0, (void *)&hba.abar); - if (status != 0) { - pr_error("Failed to map BAR into higher half\n"); - return -1; - } - - pr_trace("AHCI HBA memspace @ 0x%p\n", hba.abar); - TAILQ_INIT(&sata_devs); - ahci_init_hba(&hba); - return 0; -} - -DRIVER_EXPORT(ahci_init); diff --git a/sys/dev/ic/nvme.c b/sys/dev/ic/nvme.c deleted file mode 100644 index df533a3..0000000 --- a/sys/dev/ic/nvme.c +++ /dev/null @@ -1,614 +0,0 @@ -/* - * Copyright (c) 2023-2024 Ian Marco Moffett and the Osmora Team. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of Hyra nor the names of its - * contributors may be used to endorse or promote products derived from - * this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include <sys/driver.h> -#include <sys/cdefs.h> -#include <sys/syslog.h> -#include <sys/timer.h> -#include <sys/device.h> -#include <dev/pci/pci.h> -#include <dev/ic/nvmevar.h> -#include <vm/dynalloc.h> -#include <vm/vm.h> -#include <fs/devfs.h> -#include <string.h> - -__MODULE_NAME("nvme"); -__KERNEL_META("$Hyra$: nvme.c, Ian Marco Moffett, " - "NVMe driver"); - -#define pr_trace(fmt, ...) kprintf("nvme: " fmt, ##__VA_ARGS__) -#define pr_error(...) pr_trace(__VA_ARGS__) - -static struct pci_device *nvme_dev; -static struct timer driver_tmr; -static TAILQ_HEAD(,nvme_ns) namespaces; - -static inline int -is_4k_aligned(void *ptr) -{ - return ((uintptr_t)ptr & (0x1000 - 1)) == 0; -} - -/* - * Poll CSTS.RDY to equal `val' - * - * Returns `val' on success, returns < 0 value - * upon failure. - */ -static int -nvme_poll_ready(struct nvme_bar *bar, uint8_t val) -{ - uint8_t timeout = CAP_TIMEOUT(bar->caps); - uint8_t time_waited = 0; - - do { - if (STATUS_READY(bar->status) == val) { - /* Done waiting */ - break; - } - - /* - * If CSTS.RDY hasn't changed, we can try to wait a - * little longer. - * - * XXX: The spec states that CAP.TO (Timeout) is in 500 - * millisecond units. - */ - if (time_waited < timeout) { - driver_tmr.msleep(500); - ++time_waited; - } else { - return -1; - } - } while (1); - - return val; -} - -/* - * Create an NVMe queue. - */ -static int -nvme_create_queue(struct nvme_state *s, struct nvme_queue *queue, size_t id) -{ - struct nvme_bar *bar = s->bar; - const size_t PAGESZ = vm_get_page_size(); - const uint8_t DBSTRIDE = CAP_STRIDE(bar->caps); - const uint16_t SLOTS = CAP_MQES(bar->caps); - - queue->sq = dynalloc_memalign(sizeof(void *) * SLOTS, 0x1000); - queue->cq = dynalloc_memalign(sizeof(void *) * SLOTS, 0x1000); - - if (queue->sq == NULL) { - return -1; - } - if (queue->cq == NULL) { - dynfree(queue->sq); - return -1; - } - - memset(queue->sq, 0, sizeof(void *) * SLOTS); - memset(queue->cq, 0, sizeof(void *) * SLOTS); - - queue->sq_head = 0; - queue->sq_tail = 0; - queue->size = SLOTS; - queue->sq_db = PHYS_TO_VIRT((uintptr_t)bar + PAGESZ + (2 * id * (4 << DBSTRIDE))); - queue->cq_db = PHYS_TO_VIRT((uintptr_t)bar + PAGESZ + ((2 * id + 1) * (4 << DBSTRIDE))); - queue->cq_phase = 1; - return 0; -} - -/* - * Submit a command - * - * @queue: Target queue. - * @cmd: Command to submit - */ -static void -nvme_submit_cmd(struct nvme_queue *queue, struct nvme_cmd cmd) -{ - /* Submit the command to the queue */ - queue->sq[queue->sq_tail++] = cmd; - if (queue->sq_tail >= queue->size) { - queue->sq_tail = 0; - } - *(queue->sq_db) = queue->sq_tail; -} - -/* - * Submit a command and poll for completion - * - * @queue: Target queue. - * @cmd: Command to submit - */ -static int -nvme_poll_submit_cmd(struct nvme_queue *queue, struct nvme_cmd cmd) -{ - uint16_t status; - size_t spins = 0; - - nvme_submit_cmd(queue, cmd); - - /* - * Wait for the current command to complete by - * polling the phase bit. - */ - while (1) { - status = queue->cq[queue->cq_head].status; - if ((status & 1) == queue->cq_phase) { - /* - * The phase bit matches the phase for the most - * recently submitted command, the command has completed. - */ - break; - } - if ((status & ~1) != 0) { - pr_trace("NVMe cmd error (bits=0x%x)\n", status >> 1); - break; - } - if (spins > 5) { - /* Attempts exhausted */ - pr_error("Hang on phase bit poll, giving up (cmd error)\n"); - break; - } - - /* Not done, give it some more time */ - driver_tmr.msleep(150); - ++spins; - } - - ++queue->cq_head; - if (queue->cq_head >= queue->size) { - queue->cq_head = 0; - queue->cq_phase = !queue->cq_phase; - } - - /* Tell the controller that `head' updated */ - *(queue->cq_db) = queue->cq_head; - return 0; -} - -/* - * Create an I/O queue for a specific namespace. - * - * @ns: Namespace - * @id: I/O queue ID - */ -static int -nvme_create_ioq(struct nvme_ns *ns, size_t id) -{ - struct nvme_queue *ioq = &ns->ioq; - struct nvme_state *cntl = ns->cntl; - - struct nvme_bar *bar = cntl->bar; - struct nvme_cmd cmd = {0}; - size_t mqes = CAP_MQES(bar->caps); - - struct nvme_create_iocq_cmd *create_iocq; - struct nvme_create_iosq_cmd *create_iosq; - int status; - - if ((status = nvme_create_queue(ns->cntl, ioq, id)) != 0) { - return status; - } - - create_iocq = &cmd.create_iocq; - create_iocq->opcode = NVME_OP_CREATE_IOCQ; - create_iocq->qflags |= __BIT(0); /* Physically contiguous */ - create_iocq->qsize = mqes; - create_iocq->qid = id; - create_iocq->prp1 = VIRT_TO_PHYS(ns->ioq.cq); - - if ((status = nvme_poll_submit_cmd(&cntl->adminq, cmd)) != 0) { - return status; - } - - create_iosq = &cmd.create_iosq; - create_iosq->opcode = NVME_OP_CREATE_IOSQ; - create_iosq->qflags |= __BIT(0); /* Physically contiguous */ - create_iosq->qsize = mqes; - create_iosq->cqid = id; - create_iosq->sqid = id; - create_iosq->prp1 = VIRT_TO_PHYS(ns->ioq.sq); - return nvme_poll_submit_cmd(&cntl->adminq, cmd); -} - -/* - * Issue an identify command for the current - * controller. - * - * XXX: `id' must be aligned on a 4k byte boundary to avoid - * crossing a page boundary. This keeps the implementation - * as simple as possible here. - */ -static int -nvme_identify(struct nvme_state *state, struct nvme_id *id) -{ - struct nvme_cmd cmd = {0}; - struct nvme_identify_cmd *identify = &cmd.identify; - - /* Ensure `id' is aligned on a 4k byte boundary */ - if (!is_4k_aligned(id)) { - return -1; - } - - identify->opcode = NVME_OP_IDENTIFY; - identify->nsid = 0; - identify->cns = 1; /* Identify controller */ - identify->prp1 = VIRT_TO_PHYS(id); - identify->prp2 = 0; /* No need, data address is 4k aligned */ - return nvme_poll_submit_cmd(&state->adminq, cmd); -} - -/* - * Issue a read/write command for a specific - * namespace. - * - * `buf' must be 4k aligned. - */ -static int -nvme_rw(struct nvme_ns *ns, char *buf, off_t slba, size_t count, bool write) -{ - struct nvme_cmd cmd = {0}; - struct nvme_rw_cmd *rw = &cmd.rw; - - if (!is_4k_aligned(buf)) { - return -1; - } - - rw->opcode = write ? NVME_OP_WRITE : NVME_OP_READ; - rw->nsid = ns->nsid; - rw->slba = slba; - rw->len = count - 1; - rw->prp1 = VIRT_TO_PHYS(buf); - return nvme_poll_submit_cmd(&ns->ioq, cmd); -} - -/* - * Fetch a namespace from its ID - * - * @nsid: Namespace ID of namespace to fetch - */ -static struct nvme_ns * -nvme_get_ns(size_t nsid) -{ - struct nvme_ns *ns; - - TAILQ_FOREACH(ns, &namespaces, link) { - if (ns->nsid == nsid) { - return ns; - } - } - - return NULL; -} - -/* - * Device interface read/write helper - */ -static int -nvme_dev_rw(struct device *dev, struct sio_txn *sio, bool write) -{ - struct nvme_ns *ns; - - if (sio == NULL) { - return -1; - } - - ns = nvme_get_ns(dev->minor); - if (ns == NULL || sio->buf == NULL) { - return -1; - } - - return nvme_rw(ns, sio->buf, sio->offset, sio->len, write); -} - -/* - * Device interface read - */ -static int -nvme_dev_read(struct device *dev, struct sio_txn *sio) -{ - return nvme_dev_rw(dev, sio, false); -} - -/* - * Device interface write - */ -static int -nvme_dev_write(struct device *dev, struct sio_txn *sio) -{ - return nvme_dev_rw(dev, sio, true); -} - -static int -nvme_dev_open(struct device *dev) -{ - return 0; -} - -/* - * Get identify data for namespace - * - * @id_ns: Data will be written to this pointer via DMA. - * @nsid: Namespace ID. - * - * XXX: `id_ns' must be 4k aligned. - */ -static int -nvme_id_ns(struct nvme_state *s, struct nvme_id_ns *id_ns, uint16_t nsid) -{ - struct nvme_cmd cmd = {0}; - struct nvme_identify_cmd *identify = &cmd.identify; - - if (!is_4k_aligned(id_ns)) { - return -1; - } - - identify->opcode = NVME_OP_IDENTIFY; - identify->nsid = nsid; - identify->cns = 0; - identify->prp1 = VIRT_TO_PHYS(id_ns); - return nvme_poll_submit_cmd(&s->adminq, cmd); -} - -/* - * Init a namespace. - * - * @nsid: Namespace ID - */ -static int -nvme_init_ns(struct nvme_state *state, uint16_t nsid) -{ - char devname[128]; - struct nvme_ns *ns = NULL; - struct nvme_id_ns *id_ns = NULL; - struct device *dev; - uint8_t lba_format; - int status = 0; - - ns = dynalloc(sizeof(struct nvme_ns)); - if (ns == NULL) { - status = -1; - goto done; - } - - id_ns = dynalloc_memalign(sizeof(struct nvme_id_ns), 0x1000); - if ((status = nvme_id_ns(state, id_ns, nsid)) != 0) { - dynfree(ns); - goto done; - } - - lba_format = id_ns->flbas & 0xF; - ns->lba_fmt = id_ns->lbaf[lba_format]; - ns->nsid = nsid; - ns->lba_bsize = 1 << ns->lba_fmt.ds; - ns->size = id_ns->size; - ns->cntl = state; - nvme_create_ioq(ns, ns->nsid); - - dev = device_alloc(); - dev->read = nvme_dev_read; - dev->write = nvme_dev_write; - dev->open = nvme_dev_open; - dev->blocksize = ns->lba_bsize; - dev->mmap = NULL; - ns->dev_id = device_create(dev, state->major, nsid); - - snprintf(devname, sizeof(devname), "nvme0n%d", nsid); - if (devfs_add_dev(devname, dev) != 0) { - pr_error("Failed to create /dev/%s\n", devname); - } - - TAILQ_INSERT_TAIL(&namespaces, ns, link); -done: - if (id_ns != NULL) - dynfree(id_ns); - - return status; -} - -static int -nvme_disable_controller(struct nvme_state *state) -{ - struct nvme_bar *bar = state->bar; - - if (__TEST(bar->config, CONFIG_EN)) { - bar->config &= ~CONFIG_EN; - } - - if (nvme_poll_ready(bar, 0) < 0) { - pr_error("Failed to disable controller\n"); - return -1; - } - - return 0; -} - -/* - * For debugging purposes, logs some information - * found within the controller identify data structure. - */ -static void -nvme_log_ctrl_id(struct nvme_id *id) -{ - char mn[41] = {0}; - char fr[9] = {0}; - - for (size_t i = 0; i < sizeof(id->mn); ++i) { - mn[i] = id->mn[i]; - } - for (size_t i = 0; i < sizeof(id->fr); ++i) { - fr[i] = id->fr[i]; - } - - pr_trace("NVMe model: %s\n", mn); - pr_trace("NVMe firmware revision: %s\n", fr); -} - -/* - * Fetch the list of namespace IDs - * - * @nsids_out: NSIDs will be written here via DMA. - * - * XXX: `nsids_out' must be 4k aligned. - */ -static int -nvme_get_nsids(struct nvme_state *state, uint32_t *nsids_out) -{ - struct nvme_cmd cmd = {0}; - struct nvme_identify_cmd *identify = &cmd.identify; - - if (!is_4k_aligned(nsids_out)) { - return -1; - } - - identify->opcode = NVME_OP_IDENTIFY; - identify->cns = 2; /* Active NSID list */ - identify->prp1 = VIRT_TO_PHYS(nsids_out); - return nvme_poll_submit_cmd(&state->adminq, cmd); -} - -static int -nvme_enable_controller(struct nvme_state *state) -{ - struct nvme_bar *bar = state->bar; - struct nvme_id *id; - - uint32_t *nsids; - uint8_t max_sqes, max_cqes; - - if (!__TEST(bar->config, CONFIG_EN)) { - bar->config |= CONFIG_EN; - } - - if (nvme_poll_ready(bar, 1) < 0) { - pr_error("Failed to enable controller\n"); - return -1; - } - - id = dynalloc_memalign(sizeof(struct nvme_id), 0x1000); - if (id == NULL) { - return -1; - } - - nsids = dynalloc_memalign(0x1000, 0x1000); - if (nsids == NULL) { - return -1; - } - - nvme_identify(state, id); - nvme_log_ctrl_id(id); - nvme_get_nsids(state, nsids); - - /* - * Before creating any I/O queues we need to set CC.IOCQES - * and CC.IOSQES... Bits 3:0 is the minimum and bits 7:4 - * is the maximum. We'll choose the maximum. - */ - max_sqes = id->sqes >> 4; - max_cqes = id->cqes >> 4; - bar->config |= (max_sqes << CONFIG_IOSQES_SHIFT); - bar->config |= (max_cqes << CONFIG_IOCQES_SHIFT); - - /* Init NVMe namespaces */ - for (size_t i = 0; i < id->nn; ++i) { - if (nsids[i] != 0) { - pr_trace("Found NVMe namespace (id=%d)\n", nsids[i]); - nvme_init_ns(state, nsids[i]); - } - } - - dynfree(nsids); - dynfree(id); - return 0; -} - -static int -nvme_init_controller(struct nvme_bar *bar) -{ - struct nvme_state state = { . bar = bar }; - struct nvme_queue *adminq = &state.adminq; - - uint16_t mqes = CAP_MQES(bar->caps); - uint16_t cmdreg_bits = PCI_BUS_MASTERING | - PCI_MEM_SPACE; - - pci_set_cmdreg(nvme_dev, cmdreg_bits); - nvme_disable_controller(&state); - - nvme_create_queue(&state, adminq, 0); - - /* Setup admin submission and admin completion queues */ - bar->aqa = (mqes | mqes << 16); - bar->asq = VIRT_TO_PHYS(adminq->sq); - bar->acq = VIRT_TO_PHYS(adminq->cq); - - state.major = device_alloc_major(); - return nvme_enable_controller(&state); -} - -static int -nvme_init(void) -{ - struct nvme_bar *bar; - struct pci_lookup nvme_lookup = { - .pci_class = 1, - .pci_subclass = 8 - }; - - if (req_timer(TIMER_GP, &driver_tmr) != 0) { - pr_error("Failed to fetch general purpose timer\n"); - return -1; - } - - if (driver_tmr.msleep == NULL) { - pr_error("Timer does not have msleep()\n"); - return -1; - } - - nvme_dev = pci_get_device(nvme_lookup, PCI_CLASS | PCI_SUBCLASS); - if (nvme_dev == NULL) { - return -1; - } - - bar = PCI_BAR_MEMBASE(nvme_dev->bar[0]); - pr_trace("NVMe BAR0 @ 0x%p\n", bar); - TAILQ_INIT(&namespaces); - - if (nvme_init_controller(bar) < 0) { - return -1; - } - - return 0; -} - -DRIVER_EXPORT(nvme_init); |