aboutsummaryrefslogtreecommitdiff
path: root/lib/mlibc/options/internal/generic
diff options
context:
space:
mode:
authorIan Moffett <ian@osmora.org>2024-03-07 17:28:00 -0500
committerIan Moffett <ian@osmora.org>2024-03-07 17:28:32 -0500
commitbd5969fc876a10b18613302db7087ef3c40f18e1 (patch)
tree7c2b8619afe902abf99570df2873fbdf40a4d1a1 /lib/mlibc/options/internal/generic
parenta95b38b1b92b172e6cc4e8e56a88a30cc65907b0 (diff)
lib: Add mlibc
Signed-off-by: Ian Moffett <ian@osmora.org>
Diffstat (limited to 'lib/mlibc/options/internal/generic')
-rw-r--r--lib/mlibc/options/internal/generic/allocator.cpp196
-rw-r--r--lib/mlibc/options/internal/generic/charcode.cpp244
-rw-r--r--lib/mlibc/options/internal/generic/charset.cpp144
-rw-r--r--lib/mlibc/options/internal/generic/debug.cpp22
-rw-r--r--lib/mlibc/options/internal/generic/ensure.cpp18
-rw-r--r--lib/mlibc/options/internal/generic/essential.cpp217
-rw-r--r--lib/mlibc/options/internal/generic/frigg.cpp14
-rw-r--r--lib/mlibc/options/internal/generic/global-config.cpp27
-rw-r--r--lib/mlibc/options/internal/generic/inline-emitter.cpp16
-rw-r--r--lib/mlibc/options/internal/generic/locale.cpp87
-rw-r--r--lib/mlibc/options/internal/generic/sigset.cpp37
-rw-r--r--lib/mlibc/options/internal/generic/strings.cpp22
-rw-r--r--lib/mlibc/options/internal/generic/threads.cpp342
-rw-r--r--lib/mlibc/options/internal/generic/ubsan.cpp254
14 files changed, 1640 insertions, 0 deletions
diff --git a/lib/mlibc/options/internal/generic/allocator.cpp b/lib/mlibc/options/internal/generic/allocator.cpp
new file mode 100644
index 0000000..d738212
--- /dev/null
+++ b/lib/mlibc/options/internal/generic/allocator.cpp
@@ -0,0 +1,196 @@
+
+#include <string.h>
+
+#include <bits/ensure.h>
+#include <frg/eternal.hpp>
+#include <mlibc/allocator.hpp>
+#include <mlibc/internal-sysdeps.hpp>
+#include <internal-config.h>
+
+#if !MLIBC_DEBUG_ALLOCATOR
+
+// --------------------------------------------------------
+// Globals
+// --------------------------------------------------------
+
+MemoryAllocator &getAllocator() {
+ // use frg::eternal to prevent a call to __cxa_atexit().
+ // this is necessary because __cxa_atexit() call this function.
+ static frg::eternal<VirtualAllocator> virtualAllocator;
+ static frg::eternal<MemoryPool> heap{virtualAllocator.get()};
+ static frg::eternal<MemoryAllocator> singleton{&heap.get()};
+ return singleton.get();
+}
+
+// --------------------------------------------------------
+// VirtualAllocator
+// --------------------------------------------------------
+
+uintptr_t VirtualAllocator::map(size_t length) {
+ void *ptr;
+ __ensure(!mlibc::sys_anon_allocate(length, &ptr));
+ return (uintptr_t)ptr;
+}
+
+void VirtualAllocator::unmap(uintptr_t address, size_t length) {
+ __ensure(!mlibc::sys_anon_free((void *)address, length));
+}
+
+#else
+
+namespace {
+ struct AllocatorMeta {
+ size_t allocatedSize;
+ size_t pagesSize;
+ frg::array<uint64_t, 4> magic;
+ };
+
+ constexpr frg::array<uint64_t, 4> allocatorMagic {
+ 0x6d4bbb9f3446e83f, 0x25e213a7a7f9f954,
+ 0x1a3c667586538bef, 0x994f34ff71c090bc
+ };
+} // namespace anonymous
+
+// Turn vm_unmap calls in free into vm_map(..., PROT_NONE, ...) calls to prevent
+// those addresses from being reused. This is useful for detecting situations like this:
+// 1. Allocate object X at address Y
+// 2. Do some computation using object X
+// 3. Free object X at address Y
+// 4. Allocate object Z at address W, and it so happens that W == Y
+// 5. Try to use object X, but the memory which was backing it now contains object Z
+constexpr bool neverReleaseVa = false;
+constexpr bool logAllocations = false;
+
+// Area before the returned allocated block (which exists due to us offseting
+// the block to be as close to the edge of a page).
+constexpr uint8_t offsetAreaValue = 'A';
+// Area which we return a pointer to in allocate and reallocate.
+constexpr uint8_t allocatedAreaValue = 'B';
+// Area after the allocated block, which exists due to the alignment constraints.
+constexpr uint8_t alignmentAreaValue = 'C';
+// Remaining area within the metadata page after the metadata.
+constexpr uint8_t metaAreaValue = 'D';
+
+// Alignment of the returned memory.
+// TODO(qookie): Eventually accept alignment as an argument of allocate.
+constexpr size_t pointerAlignment = 16;
+
+// TODO(qookie): Support this. Perhaps by overallocating by 2x and then picking
+// an offset that guarantees the desired alignment.
+static_assert(pointerAlignment <= 4096, "Pointer aligment of more than 4096 bytes is unsupported");
+static_assert(!(pointerAlignment & (pointerAlignment - 1)),
+ "Pointer aligment must be a power of 2");
+
+constexpr size_t pageSize = 0x1000;
+
+void *MemoryAllocator::allocate(size_t size) {
+ size_t pg_size = (size + size_t{pageSize - 1}) & ~size_t{pageSize - 1};
+ size_t offset = (pg_size - size) & ~size_t{pointerAlignment - 1};
+
+ void *ptr;
+
+ // Two extra pages for metadata in front and guard page at the end
+ // Reserve the whole region as PROT_NONE...
+ if (int e = mlibc::sys_vm_map(nullptr, pg_size + pageSize * 2, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0, &ptr))
+ mlibc::panicLogger() << "sys_vm_map failed in MemoryAllocator::allocate (errno " << e << ")" << frg::endlog;
+
+ // ...Then replace pages to make them accessible, excluding the guard page
+ if (int e = mlibc::sys_vm_map(ptr, pg_size + pageSize, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0, &ptr))
+ mlibc::panicLogger() << "sys_vm_map failed in MemoryAllocator::allocate (errno " << e << ")" << frg::endlog;
+
+ void *meta = ptr;
+ void *out_page = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(ptr) + pageSize);
+ void *out = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(out_page) + offset);
+ void *out_align_area = reinterpret_cast<void *>(reinterpret_cast<uintptr_t>(out) + size);
+
+ AllocatorMeta metaData{size, pg_size, allocatorMagic};
+
+ memset(meta, metaAreaValue, pageSize);
+ memcpy(meta, &metaData, sizeof(AllocatorMeta));
+
+ memset(out_page, offsetAreaValue, offset);
+ memset(out, allocatedAreaValue, size);
+ memset(out_align_area, alignmentAreaValue, pg_size - offset - size);
+
+ if constexpr (logAllocations)
+ mlibc::infoLogger() << "MemoryAllocator::allocate(" << size << ") = " << out << frg::endlog;
+
+ return out;
+}
+
+void MemoryAllocator::free(void *ptr) {
+ if (!ptr)
+ return;
+
+ if constexpr (logAllocations)
+ mlibc::infoLogger() << "MemoryAllocator::free(" << ptr << ")" << frg::endlog;
+
+ uintptr_t page_addr = reinterpret_cast<uintptr_t>(ptr) & ~size_t{pageSize - 1};
+ AllocatorMeta *meta = reinterpret_cast<AllocatorMeta *>(page_addr - pageSize);
+
+ if (meta->magic != allocatorMagic)
+ mlibc::panicLogger() << "Invalid allocator metadata magic in MemoryAllocator::free" << frg::endlog;
+
+ deallocate(ptr, meta->allocatedSize);
+}
+
+void MemoryAllocator::deallocate(void *ptr, size_t size) {
+ if (!ptr)
+ return;
+
+ if constexpr (logAllocations)
+ mlibc::infoLogger() << "MemoryAllocator::deallocate(" << ptr << ", " << size << ")" << frg::endlog;
+
+ uintptr_t page_addr = reinterpret_cast<uintptr_t>(ptr) & ~size_t{pageSize - 1};
+ AllocatorMeta *meta = reinterpret_cast<AllocatorMeta *>(page_addr - pageSize);
+
+ if (meta->magic != allocatorMagic)
+ mlibc::panicLogger() << "Invalid allocator metadata magic in MemoryAllocator::deallocate" << frg::endlog;
+
+ if (size != meta->allocatedSize)
+ mlibc::panicLogger() << "Invalid allocated size in metadata in MemoryAllocator::deallocate (given " << size << ", stored " << meta->allocatedSize << ")" << frg::endlog;
+
+ if constexpr (neverReleaseVa) {
+ void *unused;
+ if (int e = mlibc::sys_vm_map(meta, meta->pagesSize + pageSize * 2, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0, &unused))
+ mlibc::panicLogger() << "sys_vm_map failed in MemoryAllocator::deallocate (errno " << e << ")" << frg::endlog;
+ } else {
+ if (int e = mlibc::sys_vm_unmap(meta, meta->pagesSize + pageSize * 2))
+ mlibc::panicLogger() << "sys_vm_unmap failed in MemoryAllocator::deallocate (errno " << e << ")" << frg::endlog;
+ }
+}
+
+void *MemoryAllocator::reallocate(void *ptr, size_t size) {
+ if (!size) {
+ free(ptr);
+ return nullptr;
+ }
+
+ void *newArea = allocate(size);
+
+ if (ptr) {
+ uintptr_t page_addr = reinterpret_cast<uintptr_t>(ptr) & ~size_t{pageSize - 1};
+ AllocatorMeta *meta = reinterpret_cast<AllocatorMeta *>(page_addr - pageSize);
+
+ if (meta->magic != allocatorMagic)
+ mlibc::panicLogger() << "Invalid allocator metadata magic in MemoryAllocator::reallocate" << frg::endlog;
+
+ memcpy(newArea, ptr, frg::min(meta->allocatedSize, size));
+
+ deallocate(ptr, meta->allocatedSize);
+ }
+
+ if constexpr (logAllocations)
+ mlibc::infoLogger() << "MemoryAllocator::reallocate(" << ptr << ", " << size << ") = " << newArea << frg::endlog;
+
+ return newArea;
+}
+
+MemoryAllocator &getAllocator() {
+ // use frg::eternal to prevent a call to __cxa_atexit().
+ // this is necessary because __cxa_atexit() call this function.
+ static frg::eternal<MemoryAllocator> singleton{};
+ return singleton.get();
+}
+
+#endif /* !MLIBC_DEBUG_ALLOCATOR */
diff --git a/lib/mlibc/options/internal/generic/charcode.cpp b/lib/mlibc/options/internal/generic/charcode.cpp
new file mode 100644
index 0000000..e09d5cd
--- /dev/null
+++ b/lib/mlibc/options/internal/generic/charcode.cpp
@@ -0,0 +1,244 @@
+
+#include <bits/ensure.h>
+#include <frg/string.hpp>
+#include <mlibc/charcode.hpp>
+#include <mlibc/debug.hpp>
+
+namespace mlibc {
+
+struct utf8_charcode {
+ static constexpr bool preserves_7bit_units = true;
+ static constexpr bool has_shift_states = false;
+
+ struct decode_state {
+ decode_state()
+ : _progress{0}, _cpoint{0} { }
+
+ auto progress() { return _progress; }
+ auto cpoint() { return _cpoint; }
+
+ charcode_error operator() (code_seq<const char> &seq) {
+ auto uc = static_cast<unsigned char>(*seq.it);
+ if(!_progress) {
+ if(!(uc & 0b1000'0000)) {
+ // ASCII-compatible.
+ _cpoint = uc;
+ }else if((uc & 0b1110'0000) == 0b1100'0000) {
+ _cpoint = uc & 0b1'1111;
+ _progress = 1;
+ }else if((uc & 0b1111'0000) == 0b1110'0000) {
+ _cpoint = uc & 0b1111;
+ _progress = 2;
+ }else if((uc & 0b1111'1000) == 0b1111'0000) {
+ _cpoint = uc & 0b111;
+ _progress = 3;
+ }else{
+ // If the highest two bits are 0b10, this is the second (or later) unit.
+ // Units with highest five bits = 0b11111 do not occur in valid UTF-8.
+ __ensure((uc & 0b1100'0000) == 0b1000'0000
+ || (uc & 0b1111'1000) == 0b1111'1000);
+ return charcode_error::illegal_input;
+ }
+ }else{
+ // TODO: Return an error.
+ __ensure((uc & 0b1100'0000) == 0b1000'0000);
+ _cpoint = (_cpoint << 6) | (uc & 0x3F);
+ --_progress;
+ }
+ ++seq.it;
+ return charcode_error::null;
+ }
+
+ private:
+ int _progress;
+ codepoint _cpoint;
+ };
+
+ struct encode_state {
+ // Encodes a single character from wseq + the current state and stores it in nseq.
+ // TODO: Convert decode_state to the same strategy.
+ charcode_error operator() (code_seq<char> &nseq, code_seq<const codepoint> &wseq) {
+ auto wc = *wseq.it;
+ __ensure(wc <= 0x7F && "utf8_charcode cannot encode multibyte chars yet");
+ *nseq.it = wc;
+ ++wseq.it;
+ ++nseq.it;
+ return charcode_error::null;
+ }
+ };
+};
+
+polymorphic_charcode::~polymorphic_charcode() = default;
+
+// For *decoding, this class assumes that:
+// - G::decode_state has members progress() and cpoint().
+// - G::decode_state::progress() >= 0 at all times.
+// TODO: This will be needed on platforms like Windows, where wchar_t is UTF-16.
+// TODO: There, we can use negative __mlibc_mbstate::progress to represent encoding to UTF-16.
+// - If G::decode_state::progress() == 0, the code point (given by cpoint())
+// was decoded successfully.
+template<typename G>
+struct polymorphic_charcode_adapter : polymorphic_charcode {
+ polymorphic_charcode_adapter()
+ : polymorphic_charcode{G::preserves_7bit_units, G::has_shift_states} { }
+
+ charcode_error decode(code_seq<const char> &nseq, code_seq<codepoint> &wseq,
+ __mlibc_mbstate &st) override {
+ __ensure(!st.__progress); // TODO: Update st with ds.progress() and ds.cpoint().
+
+ code_seq<const char> decode_nseq = nseq;
+ typename G::decode_state ds;
+
+ while(decode_nseq && wseq) {
+ // Consume the next code unit.
+ if(auto e = ds(decode_nseq); e != charcode_error::null)
+ return e;
+
+ // Produce a new code point.
+ if(!ds.progress()) {
+ // "Commit" consumed code units (as there was no decode error).
+ nseq.it = decode_nseq.it;
+ if(!ds.cpoint()) // Stop on null characters.
+ return charcode_error::null;
+ *wseq.it = ds.cpoint();
+ ++wseq.it;
+ }
+ }
+
+ if(ds.progress())
+ return charcode_error::input_underflow;
+ return charcode_error::null;
+ }
+
+ charcode_error decode_wtranscode(code_seq<const char> &nseq, code_seq<wchar_t> &wseq,
+ __mlibc_mbstate &st) override {
+ __ensure(!st.__progress); // TODO: Update st with ds.progress() and ds.cpoint().
+
+ code_seq<const char> decode_nseq = nseq;
+ typename G::decode_state ds;
+
+ while(decode_nseq && wseq) {
+ // Consume the next code unit.
+ if(auto e = ds(decode_nseq); e != charcode_error::null)
+ return e;
+
+ // Produce a new code point.
+ if(!ds.progress()) {
+ nseq.it = decode_nseq.it;
+ // "Commit" consumed code units (as there was no decode error).
+ if(!ds.cpoint()) // Stop on null characters.
+ return charcode_error::null;
+ *wseq.it = ds.cpoint();
+ ++wseq.it;
+ }
+ }
+
+ if(ds.progress())
+ return charcode_error::input_underflow;
+ return charcode_error::null;
+ }
+
+ charcode_error decode_wtranscode_length(code_seq<const char> &nseq, size_t *n,
+ __mlibc_mbstate &st) override {
+ __ensure(!st.__progress); // TODO: Update st with ds.progress() and ds.cpoint().
+
+ code_seq<const char> decode_nseq = nseq;
+ typename G::decode_state ds;
+
+ *n = 0;
+ while(decode_nseq) {
+ // Consume the next code unit.
+ if(auto e = ds(decode_nseq); e != charcode_error::null)
+ return e;
+
+ if(!ds.progress()) {
+ nseq.it = decode_nseq.it;
+ // "Commit" consumed code units (as there was no decode error).
+ if(!ds.cpoint()) // Stop on null code points.
+ return charcode_error::null;
+ ++(*n);
+ }
+ }
+
+ if(ds.progress())
+ return charcode_error::input_underflow;
+ return charcode_error::null;
+ }
+
+ charcode_error encode_wtranscode(code_seq<char> &nseq, code_seq<const wchar_t> &wseq,
+ __mlibc_mbstate &st) override {
+ __ensure(!st.__progress); // TODO: Update st with es.progress() and es.cpoint().
+
+ code_seq<char> encode_nseq = nseq;
+ typename G::encode_state es;
+
+ while(encode_nseq && wseq) {
+ codepoint cp = *wseq.it;
+ if(!cp)
+ return charcode_error::null;
+
+ code_seq<const codepoint> cps{&cp, &cp + 1};
+ if(auto e = es(encode_nseq, cps); e == charcode_error::dirty) {
+ continue;
+ }else if(e != charcode_error::null) {
+ return e;
+ }
+ __ensure(cps.it == cps.end);
+ ++wseq.it;
+
+ // "Commit" produced code units (as there was no encode error).
+ nseq.it = encode_nseq.it;
+ }
+
+ if(encode_nseq.it != nseq.it)
+ return charcode_error::output_overflow;
+ return charcode_error::null;
+ }
+
+ charcode_error encode_wtranscode_length(code_seq<const wchar_t> &wseq, size_t *n,
+ __mlibc_mbstate &st) override {
+ __ensure(!st.__progress); // TODO: Update st with es.progress() and es.cpoint().
+
+ typename G::encode_state es;
+
+ *n = 0;
+ while(wseq) {
+ char temp[4];
+ code_seq<char> encode_nseq{temp, temp + 4};
+ codepoint cp = *wseq.it;
+ if(!cp)
+ return charcode_error::null;
+ // Consume the next code unit.
+ code_seq<const codepoint> cps{&cp, &cp + 1};
+ if(auto e = es(encode_nseq, cps); e == charcode_error::dirty) {
+ continue;
+ }else if(e != charcode_error::null) {
+ return e;
+ }
+
+ ++(*n);
+ ++wseq.it;
+ }
+
+ return charcode_error::null;
+ }
+};
+
+polymorphic_charcode *current_charcode() {
+ static polymorphic_charcode_adapter<utf8_charcode> global_charcode;
+ return &global_charcode;
+}
+
+charcode_error wide_charcode::promote(wchar_t nc, codepoint &wc) {
+ // TODO: Allow non-identity encodings of wchar_t.
+ wc = nc;
+ return charcode_error::null;
+}
+
+wide_charcode *platform_wide_charcode() {
+ static wide_charcode global_wide_charcode;
+ return &global_wide_charcode;
+}
+
+} // namespace mlibc
+
diff --git a/lib/mlibc/options/internal/generic/charset.cpp b/lib/mlibc/options/internal/generic/charset.cpp
new file mode 100644
index 0000000..c42b4f4
--- /dev/null
+++ b/lib/mlibc/options/internal/generic/charset.cpp
@@ -0,0 +1,144 @@
+
+#include <bits/ensure.h>
+#include <mlibc/charset.hpp>
+#include <mlibc/debug.hpp>
+
+namespace mlibc {
+
+bool charset::is_ascii_superset() {
+ // TODO: For locales that change the meaning of ASCII chars, this needs to be changed.
+ return true;
+}
+
+bool charset::is_alpha(codepoint c) {
+ if(c <= 0x7F && is_ascii_superset())
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+ if(c > 0x7F)
+ mlibc::infoLogger() << "mlibc: charset::is_alpha() is not implemented"
+ " for the full Unicode charset" << frg::endlog;
+ return false;
+}
+
+bool charset::is_digit(codepoint c) {
+ if(c <= 0x7F && is_ascii_superset())
+ return c >= '0' && c <= '9';
+ if(c > 0x7F)
+ mlibc::infoLogger() << "mlibc: charset::is_digit() is not implemented"
+ " for the full Unicode charset" << frg::endlog;
+ return false;
+}
+
+bool charset::is_xdigit(codepoint c) {
+ if(c <= 0x7F && is_ascii_superset())
+ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
+ if(c > 0x7F)
+ mlibc::infoLogger() << "mlibc: charset::is_xdigit() is not implemented"
+ " for the full Unicode charset" << frg::endlog;
+ return false;
+}
+
+bool charset::is_alnum(codepoint c) {
+ if(c <= 0x7F && is_ascii_superset())
+ return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+ if(c > 0x7F)
+ mlibc::infoLogger() << "mlibc: charset::is_alnum() is not implemented"
+ " for the full Unicode charset" << frg::endlog;
+ return false;
+}
+
+bool charset::is_punct(codepoint c) {
+ if(c <= 0x7F && is_ascii_superset())
+ return c == '!' || c == '"' || c == '#' || c == '$' || c == '%' || c == '&'
+ || c == '\'' || c == '(' || c == ')' || c == '*' || c == '+' || c == ','
+ || c == '-' || c == '.' || c == '/'
+ || c == ':' || c == ';' || c == '<' || c == '=' || c == '>' || c == '?'
+ || c == '@'
+ || c == '[' || c == '\\' || c == ']' || c == '^' || c == '_' || c == '`'
+ || c == '{' || c == '|' || c == '}' || c == '~';
+ if(c > 0x7F)
+ mlibc::infoLogger() << "mlibc: charset::is_punct() is not implemented"
+ " for the full Unicode charset" << frg::endlog;
+ return false;
+}
+
+bool charset::is_graph(codepoint c) {
+ if(c <= 0x7F && is_ascii_superset())
+ return c >= 0x21 && c <= 0x7E;
+ if(c > 0x7F)
+ mlibc::infoLogger() << "mlibc: charset::is_graph() is not implemented"
+ " for the full Unicode charset" << frg::endlog;
+ return false;
+}
+
+bool charset::is_blank(codepoint c) {
+ if(c <= 0x7F && is_ascii_superset())
+ return c == ' ' || c == '\t';
+ if(c > 0x7F)
+ mlibc::infoLogger() << "mlibc: charset::is_blank() is not implemented"
+ " for the full Unicode charset " << c << frg::endlog;
+ return false;
+}
+
+bool charset::is_space(codepoint c) {
+ if(c <= 0x7F && is_ascii_superset())
+ return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
+ if(c > 0x7F)
+ mlibc::infoLogger() << "mlibc: charset::is_space() is not implemented"
+ " for the full Unicode charset" << frg::endlog;
+ return false;
+}
+
+bool charset::is_print(codepoint c) {
+ if(c <= 0x7F && is_ascii_superset())
+ return c >= 0x20 && c <= 0x7E;
+ if(c > 0x7F)
+ mlibc::infoLogger() << "mlibc: charset::is_print() is not implemented"
+ " for the full Unicode charset" << frg::endlog;
+ return false;
+}
+
+bool charset::is_lower(codepoint c) {
+ if(c <= 0x7F && is_ascii_superset())
+ return (c >= 'a' && c <= 'z');
+ if(c > 0x7F)
+ mlibc::infoLogger() << "mlibc: charset::is_print() is not implemented"
+ " for the full Unicode charset" << frg::endlog;
+ return false;
+}
+
+bool charset::is_upper(codepoint c) {
+ if(c <= 0x7F && is_ascii_superset())
+ return (c >= 'A' && c <= 'Z');
+ if(c > 0x7F)
+ mlibc::infoLogger() << "mlibc: charset::is_print() is not implemented"
+ " for the full Unicode charset" << frg::endlog;
+ return false;
+}
+
+codepoint charset::to_lower(codepoint c) {
+ if(c <= 0x7F && is_ascii_superset())
+ if(c >= 'A' && c <= 'Z')
+ return c - 'A' + 'a';
+ if(c > 0x7F)
+ mlibc::infoLogger() << "mlibc: charset::to_lower() is not implemented"
+ " for the full Unicode charset" << frg::endlog;
+ return c;
+}
+
+codepoint charset::to_upper(codepoint c) {
+ if(c <= 0x7F && is_ascii_superset())
+ if(c >= 'a' && c <= 'z')
+ return c - 'a' + 'A';
+ if(c > 0x7F)
+ mlibc::infoLogger() << "mlibc: charset::to_upper() is not implemented"
+ " for the full Unicode charset" << frg::endlog;
+ return c;
+}
+
+charset *current_charset() {
+ static charset global_charset;
+ return &global_charset;
+}
+
+} // namespace mlibc
+
diff --git a/lib/mlibc/options/internal/generic/debug.cpp b/lib/mlibc/options/internal/generic/debug.cpp
new file mode 100644
index 0000000..19427c8
--- /dev/null
+++ b/lib/mlibc/options/internal/generic/debug.cpp
@@ -0,0 +1,22 @@
+
+#include <bits/ensure.h>
+#include <mlibc/debug.hpp>
+#include <mlibc/internal-sysdeps.hpp>
+
+namespace mlibc {
+
+frg::stack_buffer_logger<InfoSink, 512> infoLogger;
+frg::stack_buffer_logger<PanicSink, 512> panicLogger;
+
+void InfoSink::operator() (const char *message) {
+ sys_libc_log(message);
+}
+
+void PanicSink::operator() (const char *message) {
+// sys_libc_log("mlibc: Write to PanicSink");
+ sys_libc_log(message);
+ sys_libc_panic();
+}
+
+} // namespace mlibc
+
diff --git a/lib/mlibc/options/internal/generic/ensure.cpp b/lib/mlibc/options/internal/generic/ensure.cpp
new file mode 100644
index 0000000..57c953a
--- /dev/null
+++ b/lib/mlibc/options/internal/generic/ensure.cpp
@@ -0,0 +1,18 @@
+
+#include <bits/ensure.h>
+#include <mlibc/debug.hpp>
+
+void __ensure_fail(const char *assertion, const char *file, unsigned int line,
+ const char *function) {
+ mlibc::panicLogger() << "In function " << function
+ << ", file " << file << ":" << line << "\n"
+ << "__ensure(" << assertion << ") failed" << frg::endlog;
+}
+
+void __ensure_warn(const char *assertion, const char *file, unsigned int line,
+ const char *function) {
+ mlibc::infoLogger() << "In function " << function
+ << ", file " << file << ":" << line << "\n"
+ << "__ensure(" << assertion << ") failed" << frg::endlog;
+}
+
diff --git a/lib/mlibc/options/internal/generic/essential.cpp b/lib/mlibc/options/internal/generic/essential.cpp
new file mode 100644
index 0000000..d00df1e
--- /dev/null
+++ b/lib/mlibc/options/internal/generic/essential.cpp
@@ -0,0 +1,217 @@
+#include <string.h>
+#include <stdint.h>
+
+namespace {
+ // Needed since we cannot declare a templated enum.
+ template<typename T>
+ struct word_helper {
+ using underlying [[gnu::aligned(1)]] = T;
+ enum class [[gnu::may_alias]] word_enum : underlying { };
+ };
+
+ template<typename T>
+ using word = typename word_helper<T>::word_enum;
+
+ template<typename T>
+ [[gnu::always_inline]]
+ inline word<T> alias_load(const unsigned char *&p) {
+ word<T> value = *reinterpret_cast<const word<T> *>(p);
+ p += sizeof(T);
+ return value;
+ }
+
+ template<typename T>
+ [[gnu::always_inline]]
+ inline void alias_store(unsigned char *&p, word<T> value) {
+ *reinterpret_cast<word<T> *>(p) = value;
+ p += sizeof(T);
+ }
+
+#ifdef __LP64__
+ void *forward_copy(void *__restrict dest, const void *__restrict src, size_t n) {
+ auto curDest = reinterpret_cast<unsigned char *>(dest);
+ auto curSrc = reinterpret_cast<const unsigned char *>(src);
+
+ while(n >= 8 * 8) {
+ auto w1 = alias_load<uint64_t>(curSrc);
+ auto w2 = alias_load<uint64_t>(curSrc);
+ auto w3 = alias_load<uint64_t>(curSrc);
+ auto w4 = alias_load<uint64_t>(curSrc);
+ auto w5 = alias_load<uint64_t>(curSrc);
+ auto w6 = alias_load<uint64_t>(curSrc);
+ auto w7 = alias_load<uint64_t>(curSrc);
+ auto w8 = alias_load<uint64_t>(curSrc);
+ alias_store<uint64_t>(curDest, w1);
+ alias_store<uint64_t>(curDest, w2);
+ alias_store<uint64_t>(curDest, w3);
+ alias_store<uint64_t>(curDest, w4);
+ alias_store<uint64_t>(curDest, w5);
+ alias_store<uint64_t>(curDest, w6);
+ alias_store<uint64_t>(curDest, w7);
+ alias_store<uint64_t>(curDest, w8);
+ n -= 8 * 8;
+ }
+ if(n >= 4 * 8) {
+ auto w1 = alias_load<uint64_t>(curSrc);
+ auto w2 = alias_load<uint64_t>(curSrc);
+ auto w3 = alias_load<uint64_t>(curSrc);
+ auto w4 = alias_load<uint64_t>(curSrc);
+ alias_store<uint64_t>(curDest, w1);
+ alias_store<uint64_t>(curDest, w2);
+ alias_store<uint64_t>(curDest, w3);
+ alias_store<uint64_t>(curDest, w4);
+ n -= 4 * 8;
+ }
+ if(n >= 2 * 8) {
+ auto w1 = alias_load<uint64_t>(curSrc);
+ auto w2 = alias_load<uint64_t>(curSrc);
+ alias_store<uint64_t>(curDest, w1);
+ alias_store<uint64_t>(curDest, w2);
+ n -= 2 * 8;
+ }
+ if(n >= 8) {
+ auto w = alias_load<uint64_t>(curSrc);
+ alias_store<uint64_t>(curDest, w);
+ n -= 8;
+ }
+ if(n >= 4) {
+ auto w = alias_load<uint32_t>(curSrc);
+ alias_store<uint32_t>(curDest, w);
+ n -= 4;
+ }
+ if(n >= 2) {
+ auto w = alias_load<uint16_t>(curSrc);
+ alias_store<uint16_t>(curDest, w);
+ n -= 2;
+ }
+ if(n)
+ *curDest = *curSrc;
+ return dest;
+ }
+#else // !__LP64__
+ void *forward_copy(void *dest, const void *src, size_t n) {
+ for(size_t i = 0; i < n; i++)
+ ((char *)dest)[i] = ((const char *)src)[i];
+ return dest;
+ }
+#endif // __LP64__ / !__LP64__
+}
+
+// --------------------------------------------------------------------------------------
+// memcpy() implementation.
+// --------------------------------------------------------------------------------------
+
+
+void *memcpy(void *__restrict dest, const void *__restrict src, size_t n) {
+ return forward_copy(dest, src, n);
+}
+
+
+// --------------------------------------------------------------------------------------
+// memset() implementation.
+// --------------------------------------------------------------------------------------
+
+#ifdef __LP64__
+
+void *memset(void *dest, int val, size_t n) {
+ auto curDest = reinterpret_cast<unsigned char *>(dest);
+ unsigned char byte = val;
+
+ // Get rid of misalignment.
+ while(n && (reinterpret_cast<uintptr_t>(curDest) & 7)) {
+ *curDest++ = byte;
+ --n;
+ }
+
+ auto pattern64 = static_cast<word<uint64_t>>(
+ static_cast<uint64_t>(byte)
+ | (static_cast<uint64_t>(byte) << 8)
+ | (static_cast<uint64_t>(byte) << 16)
+ | (static_cast<uint64_t>(byte) << 24)
+ | (static_cast<uint64_t>(byte) << 32)
+ | (static_cast<uint64_t>(byte) << 40)
+ | (static_cast<uint64_t>(byte) << 48)
+ | (static_cast<uint64_t>(byte) << 56));
+
+ auto pattern32 = static_cast<word<uint32_t>>(
+ static_cast<uint32_t>(byte)
+ | (static_cast<uint32_t>(byte) << 8)
+ | (static_cast<uint32_t>(byte) << 16)
+ | (static_cast<uint32_t>(byte) << 24));
+
+ auto pattern16 = static_cast<word<uint16_t>>(
+ static_cast<uint16_t>(byte)
+ | (static_cast<uint16_t>(byte) << 8));
+
+ while(n >= 8 * 8) {
+ alias_store<uint64_t>(curDest, pattern64);
+ alias_store<uint64_t>(curDest, pattern64);
+ alias_store<uint64_t>(curDest, pattern64);
+ alias_store<uint64_t>(curDest, pattern64);
+ alias_store<uint64_t>(curDest, pattern64);
+ alias_store<uint64_t>(curDest, pattern64);
+ alias_store<uint64_t>(curDest, pattern64);
+ alias_store<uint64_t>(curDest, pattern64);
+ n -= 8 * 8;
+ }
+ if(n >= 4 * 8) {
+ alias_store<uint64_t>(curDest, pattern64);
+ alias_store<uint64_t>(curDest, pattern64);
+ alias_store<uint64_t>(curDest, pattern64);
+ alias_store<uint64_t>(curDest, pattern64);
+ n -= 4 * 8;
+ }
+ if(n >= 2 * 8) {
+ alias_store<uint64_t>(curDest, pattern64);
+ alias_store<uint64_t>(curDest, pattern64);
+ n -= 2 * 8;
+ }
+ if(n >= 8) {
+ alias_store<uint64_t>(curDest, pattern64);
+ n -= 8;
+ }
+ if(n >= 4) {
+ alias_store<uint32_t>(curDest, pattern32);
+ n -= 4;
+ }
+ if(n >= 2) {
+ alias_store<uint16_t>(curDest, pattern16);
+ n -= 2;
+ }
+ if(n)
+ *curDest = byte;
+ return dest;
+}
+
+#else // !__LP64__
+
+void *memset(void *dest, int byte, size_t count) {
+ for(size_t i = 0; i < count; i++)
+ ((char *)dest)[i] = (char)byte;
+ return dest;
+}
+
+#endif // __LP64__ / !__LP64__
+
+// --------------------------------------------------------------------------------------
+// "Non-optimized" functions.
+// --------------------------------------------------------------------------------------
+
+void *memmove(void *dest, const void *src, size_t size) {
+ char *dest_bytes = (char *)dest;
+ char *src_bytes = (char *)src;
+ if(dest_bytes < src_bytes) {
+ return forward_copy(dest, src, size);
+ }else if(dest_bytes > src_bytes) {
+ for(size_t i = 0; i < size; i++)
+ dest_bytes[size - i - 1] = src_bytes[size - i - 1];
+ }
+ return dest;
+}
+
+size_t strlen(const char *s) {
+ size_t len = 0;
+ for(size_t i = 0; s[i]; i++)
+ len++;
+ return len;
+}
diff --git a/lib/mlibc/options/internal/generic/frigg.cpp b/lib/mlibc/options/internal/generic/frigg.cpp
new file mode 100644
index 0000000..7575c9c
--- /dev/null
+++ b/lib/mlibc/options/internal/generic/frigg.cpp
@@ -0,0 +1,14 @@
+
+#include <bits/ensure.h>
+#include <mlibc/debug.hpp>
+#include <mlibc/internal-sysdeps.hpp>
+
+extern "C" void frg_panic(const char *mstr) {
+// mlibc::sys_libc_log("mlibc: Call to frg_panic");
+ mlibc::sys_libc_log(mstr);
+ mlibc::sys_libc_panic();
+}
+
+extern "C" void frg_log(const char *mstr) {
+ mlibc::sys_libc_log(mstr);
+}
diff --git a/lib/mlibc/options/internal/generic/global-config.cpp b/lib/mlibc/options/internal/generic/global-config.cpp
new file mode 100644
index 0000000..264a984
--- /dev/null
+++ b/lib/mlibc/options/internal/generic/global-config.cpp
@@ -0,0 +1,27 @@
+#include <stdlib.h>
+#include <string.h>
+#include <mlibc/global-config.hpp>
+
+namespace mlibc {
+
+struct GlobalConfigGuard {
+ GlobalConfigGuard();
+};
+
+GlobalConfigGuard guard;
+
+GlobalConfigGuard::GlobalConfigGuard() {
+ // Force the config to be created during initialization of libc.so.
+ mlibc::globalConfig();
+}
+
+static bool envEnabled(const char *env) {
+ auto value = getenv(env);
+ return value && *value && *value != '0';
+}
+
+GlobalConfig::GlobalConfig() {
+ debugMalloc = envEnabled("MLIBC_DEBUG_MALLOC");
+}
+
+}
diff --git a/lib/mlibc/options/internal/generic/inline-emitter.cpp b/lib/mlibc/options/internal/generic/inline-emitter.cpp
new file mode 100644
index 0000000..bf81c0b
--- /dev/null
+++ b/lib/mlibc/options/internal/generic/inline-emitter.cpp
@@ -0,0 +1,16 @@
+// This translation unit provides symbols for functions marked with __MLIBC_INLINE_DEFINITION.
+// All headers with such functions must be included here.
+
+#define __MLIBC_EMIT_INLINE_DEFINITIONS
+
+#include <mlibc-config.h>
+
+#include <elf.h>
+
+#if __MLIBC_LINUX_OPTION
+#include <sys/sysmacros.h>
+#endif /* __MLIBC_LINUX_OPTION */
+
+#ifndef MLIBC_BUILDING_RTDL
+#include <math.h>
+#endif
diff --git a/lib/mlibc/options/internal/generic/locale.cpp b/lib/mlibc/options/internal/generic/locale.cpp
new file mode 100644
index 0000000..7ba040f
--- /dev/null
+++ b/lib/mlibc/options/internal/generic/locale.cpp
@@ -0,0 +1,87 @@
+#include <bits/ensure.h>
+#include <mlibc/debug.hpp>
+#include <mlibc/locale.hpp>
+
+namespace mlibc {
+
+char *nl_langinfo(nl_item item) {
+ if(item == CODESET) {
+ return const_cast<char *>("UTF-8");
+ } else if(item >= ABMON_1 && item <= ABMON_12) {
+ switch(item) {
+ case ABMON_1: return const_cast<char *>("Jan");
+ case ABMON_2: return const_cast<char *>("Feb");
+ case ABMON_3: return const_cast<char *>("Mar");
+ case ABMON_4: return const_cast<char *>("Apr");
+ case ABMON_5: return const_cast<char *>("May");
+ case ABMON_6: return const_cast<char *>("Jun");
+ case ABMON_7: return const_cast<char *>("Jul");
+ case ABMON_8: return const_cast<char *>("Aug");
+ case ABMON_9: return const_cast<char *>("Sep");
+ case ABMON_10: return const_cast<char *>("Oct");
+ case ABMON_11: return const_cast<char *>("Nov");
+ case ABMON_12: return const_cast<char *>("Dec");
+ default:
+ __ensure(!"ABMON_* constants don't seem to be contiguous!");
+ __builtin_unreachable();
+ }
+ } else if(item >= MON_1 && item <= MON_12) {
+ switch(item) {
+ case MON_1: return const_cast<char *>("January");
+ case MON_2: return const_cast<char *>("Feburary");
+ case MON_3: return const_cast<char *>("March");
+ case MON_4: return const_cast<char *>("April");
+ case MON_5: return const_cast<char *>("May");
+ case MON_6: return const_cast<char *>("June");
+ case MON_7: return const_cast<char *>("July");
+ case MON_8: return const_cast<char *>("August");
+ case MON_9: return const_cast<char *>("September");
+ case MON_10: return const_cast<char *>("October");
+ case MON_11: return const_cast<char *>("November");
+ case MON_12: return const_cast<char *>("December");
+ default:
+ __ensure(!"MON_* constants don't seem to be contiguous!");
+ __builtin_unreachable();
+ }
+ } else if(item == AM_STR) {
+ return const_cast<char *>("AM");
+ } else if(item == PM_STR) {
+ return const_cast<char *>("PM");
+ } else if(item >= DAY_1 && item <= DAY_7) {
+ switch(item) {
+ case DAY_1: return const_cast<char *>("Sunday");
+ case DAY_2: return const_cast<char *>("Monday");
+ case DAY_3: return const_cast<char *>("Tuesday");
+ case DAY_4: return const_cast<char *>("Wednesday");
+ case DAY_5: return const_cast<char *>("Thursday");
+ case DAY_6: return const_cast<char *>("Friday");
+ case DAY_7: return const_cast<char *>("Saturday");
+ default:
+ __ensure(!"DAY_* constants don't seem to be contiguous!");
+ __builtin_unreachable();
+ }
+ } else if(item >= ABDAY_1 && item <= ABDAY_7) {
+ switch(item) {
+ case ABDAY_1: return const_cast<char *>("Sun");
+ case ABDAY_2: return const_cast<char *>("Mon");
+ case ABDAY_3: return const_cast<char *>("Tue");
+ case ABDAY_4: return const_cast<char *>("Wed");
+ case ABDAY_5: return const_cast<char *>("Thu");
+ case ABDAY_6: return const_cast<char *>("Fri");
+ case ABDAY_7: return const_cast<char *>("Sat");
+ default:
+ __ensure(!"ABDAY_* constants don't seem to be contiguous!");
+ __builtin_unreachable();
+ }
+ }else if(item == D_FMT) {
+ return const_cast<char *>("%m/%d/%y");
+ }else if(item == T_FMT) {
+ return const_cast<char *>("%H:%M:%S");
+ }else{
+ mlibc::infoLogger() << "mlibc: nl_langinfo item "
+ << item << " is not implemented properly" << frg::endlog;
+ return const_cast<char *>("");
+ }
+}
+
+}
diff --git a/lib/mlibc/options/internal/generic/sigset.cpp b/lib/mlibc/options/internal/generic/sigset.cpp
new file mode 100644
index 0000000..134277d
--- /dev/null
+++ b/lib/mlibc/options/internal/generic/sigset.cpp
@@ -0,0 +1,37 @@
+#include <bits/sigset_t.h>
+#include <bits/ensure.h>
+
+int sigemptyset(sigset_t *sigset) {
+ *sigset = 0;
+ return 0;
+}
+
+int sigfillset(sigset_t *sigset) {
+ *sigset = ~sigset_t(0);
+ return 0;
+}
+
+// TODO: Return EINVAL instead of __ensure()ing.
+
+int sigaddset(sigset_t *sigset, int sig) {
+ int signo = sig - 1;
+ // TODO: do not hard code CHAR_BITS
+ __ensure((unsigned int)signo < sizeof(sigset_t) * 8);
+ *sigset |= sigset_t(1) << signo;
+ return 0;
+}
+
+int sigdelset(sigset_t *sigset, int sig) {
+ int signo = sig - 1;
+ // TODO: do not hard code CHAR_BITS
+ __ensure((unsigned int)signo < sizeof(sigset_t) * 8);
+ *sigset &= ~(sigset_t(1) << signo);
+ return 0;
+}
+
+int sigismember(const sigset_t *set, int sig) {
+ int signo = sig - 1;
+ // TODO: do not hard code CHAR_BITS
+ __ensure((unsigned int)signo < sizeof(sigset_t) * 8);
+ return (*set) & (sigset_t(1) << signo);
+}
diff --git a/lib/mlibc/options/internal/generic/strings.cpp b/lib/mlibc/options/internal/generic/strings.cpp
new file mode 100644
index 0000000..ce4f84b
--- /dev/null
+++ b/lib/mlibc/options/internal/generic/strings.cpp
@@ -0,0 +1,22 @@
+#include <ctype.h>
+
+#include <mlibc/strings.hpp>
+
+namespace mlibc {
+
+int strncasecmp(const char *a, const char *b, size_t size) {
+ for(size_t i = 0; i < size; i++) {
+ unsigned char a_byte = tolower(a[i]);
+ unsigned char b_byte = tolower(b[i]);
+ if(!a_byte && !b_byte)
+ return 0;
+ // If only one char is null, one of the following cases applies.
+ if(a_byte < b_byte)
+ return -1;
+ if(a_byte > b_byte)
+ return 1;
+ }
+ return 0;
+}
+
+}
diff --git a/lib/mlibc/options/internal/generic/threads.cpp b/lib/mlibc/options/internal/generic/threads.cpp
new file mode 100644
index 0000000..5f1168c
--- /dev/null
+++ b/lib/mlibc/options/internal/generic/threads.cpp
@@ -0,0 +1,342 @@
+#include <abi-bits/errno.h>
+#include <bits/threads.h>
+#include <bits/ensure.h>
+#include <mlibc/all-sysdeps.hpp>
+#include <mlibc/debug.hpp>
+#include <mlibc/lock.hpp>
+#include <mlibc/threads.hpp>
+#include <mlibc/tcb.hpp>
+
+extern "C" Tcb *__rtdl_allocateTcb();
+
+namespace mlibc {
+
+int thread_create(struct __mlibc_thread_data **__restrict thread, const struct __mlibc_threadattr *__restrict attrp, void *entry, void *__restrict user_arg, bool returns_int) {
+ auto new_tcb = __rtdl_allocateTcb();
+ pid_t tid;
+ struct __mlibc_threadattr attr = {};
+ if (!attrp)
+ thread_attr_init(&attr);
+ else
+ attr = *attrp;
+
+ if (attr.__mlibc_cpuset)
+ mlibc::infoLogger() << "pthread_create(): cpuset is ignored!" << frg::endlog;
+ if (attr.__mlibc_sigmaskset)
+ mlibc::infoLogger() << "pthread_create(): sigmask is ignored!" << frg::endlog;
+
+ // TODO: due to alignment guarantees, the stackaddr and stacksize might change
+ // when the stack is allocated. Currently this isn't propagated to the TCB,
+ // but it should be.
+ void *stack = attr.__mlibc_stackaddr;
+ if (!mlibc::sys_prepare_stack) {
+ MLIBC_MISSING_SYSDEP();
+ return ENOSYS;
+ }
+ int ret = mlibc::sys_prepare_stack(&stack, entry,
+ user_arg, new_tcb, &attr.__mlibc_stacksize, &attr.__mlibc_guardsize, &new_tcb->stackAddr);
+ if (ret)
+ return ret;
+
+ if (!mlibc::sys_clone) {
+ MLIBC_MISSING_SYSDEP();
+ return ENOSYS;
+ }
+ new_tcb->stackSize = attr.__mlibc_stacksize;
+ new_tcb->guardSize = attr.__mlibc_guardsize;
+ new_tcb->returnValueType = (returns_int) ? TcbThreadReturnValue::Integer : TcbThreadReturnValue::Pointer;
+ mlibc::sys_clone(new_tcb, &tid, stack);
+ *thread = reinterpret_cast<struct __mlibc_thread_data *>(new_tcb);
+
+ __atomic_store_n(&new_tcb->tid, tid, __ATOMIC_RELAXED);
+ mlibc::sys_futex_wake(&new_tcb->tid);
+
+ return 0;
+}
+
+int thread_join(struct __mlibc_thread_data *thread, void *ret) {
+ auto tcb = reinterpret_cast<Tcb *>(thread);
+
+ if (!__atomic_load_n(&tcb->isJoinable, __ATOMIC_ACQUIRE))
+ return EINVAL;
+
+ while (!__atomic_load_n(&tcb->didExit, __ATOMIC_ACQUIRE)) {
+ mlibc::sys_futex_wait(&tcb->didExit, 0, nullptr);
+ }
+
+ if(ret && tcb->returnValueType == TcbThreadReturnValue::Pointer)
+ *reinterpret_cast<void **>(ret) = tcb->returnValue.voidPtr;
+ else if(ret && tcb->returnValueType == TcbThreadReturnValue::Integer)
+ *reinterpret_cast<int *>(ret) = tcb->returnValue.intVal;
+
+ // FIXME: destroy tcb here, currently we leak it
+
+ return 0;
+}
+
+static constexpr size_t default_stacksize = 0x200000;
+static constexpr size_t default_guardsize = 4096;
+
+int thread_attr_init(struct __mlibc_threadattr *attr) {
+ *attr = __mlibc_threadattr{};
+ attr->__mlibc_stacksize = default_stacksize;
+ attr->__mlibc_guardsize = default_guardsize;
+ attr->__mlibc_detachstate = __MLIBC_THREAD_CREATE_JOINABLE;
+ return 0;
+}
+
+static constexpr unsigned int mutexRecursive = 1;
+static constexpr unsigned int mutexErrorCheck = 2;
+
+// TODO: either use uint32_t or determine the bit based on sizeof(int).
+static constexpr unsigned int mutex_owner_mask = (static_cast<uint32_t>(1) << 30) - 1;
+static constexpr unsigned int mutex_waiters_bit = static_cast<uint32_t>(1) << 31;
+
+// Only valid for the internal __mlibc_m mutex of wrlocks.
+static constexpr unsigned int mutex_excl_bit = static_cast<uint32_t>(1) << 30;
+
+int thread_mutex_init(struct __mlibc_mutex *__restrict mutex,
+ const struct __mlibc_mutexattr *__restrict attr) {
+ auto type = attr ? attr->__mlibc_type : __MLIBC_THREAD_MUTEX_DEFAULT;
+ auto robust = attr ? attr->__mlibc_robust : __MLIBC_THREAD_MUTEX_STALLED;
+ auto protocol = attr ? attr->__mlibc_protocol : __MLIBC_THREAD_PRIO_NONE;
+ auto pshared = attr ? attr->__mlibc_pshared : __MLIBC_THREAD_PROCESS_PRIVATE;
+
+ mutex->__mlibc_state = 0;
+ mutex->__mlibc_recursion = 0;
+ mutex->__mlibc_flags = 0;
+ mutex->__mlibc_prioceiling = 0; // TODO: We don't implement this.
+
+ if(type == __MLIBC_THREAD_MUTEX_RECURSIVE) {
+ mutex->__mlibc_flags |= mutexRecursive;
+ }else if(type == __MLIBC_THREAD_MUTEX_ERRORCHECK) {
+ mutex->__mlibc_flags |= mutexErrorCheck;
+ }else{
+ __ensure(type == __MLIBC_THREAD_MUTEX_NORMAL);
+ }
+
+ // TODO: Other values aren't supported yet.
+ __ensure(robust == __MLIBC_THREAD_MUTEX_STALLED);
+ __ensure(protocol == __MLIBC_THREAD_PRIO_NONE);
+ __ensure(pshared == __MLIBC_THREAD_PROCESS_PRIVATE);
+
+ return 0;
+}
+
+int thread_mutex_destroy(struct __mlibc_mutex *mutex) {
+ __ensure(!mutex->__mlibc_state);
+ return 0;
+}
+
+int thread_mutex_lock(struct __mlibc_mutex *mutex) {
+ unsigned int this_tid = mlibc::this_tid();
+ unsigned int expected = 0;
+ while(true) {
+ if(!expected) {
+ // Try to take the mutex here.
+ if(__atomic_compare_exchange_n(&mutex->__mlibc_state,
+ &expected, this_tid, false, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)) {
+ __ensure(!mutex->__mlibc_recursion);
+ mutex->__mlibc_recursion = 1;
+ return 0;
+ }
+ }else{
+ // If this (recursive) mutex is already owned by us, increment the recursion level.
+ if((expected & mutex_owner_mask) == this_tid) {
+ if(!(mutex->__mlibc_flags & mutexRecursive)) {
+ if (mutex->__mlibc_flags & mutexErrorCheck)
+ return EDEADLK;
+ else
+ mlibc::panicLogger() << "mlibc: pthread_mutex deadlock detected!"
+ << frg::endlog;
+ }
+ ++mutex->__mlibc_recursion;
+ return 0;
+ }
+
+ // Wait on the futex if the waiters flag is set.
+ if(expected & mutex_waiters_bit) {
+ int e = mlibc::sys_futex_wait((int *)&mutex->__mlibc_state, expected, nullptr);
+
+ // If the wait returns EAGAIN, that means that the mutex_waiters_bit was just unset by
+ // some other thread. In this case, we should loop back around.
+ if (e && e != EAGAIN)
+ mlibc::panicLogger() << "sys_futex_wait() failed with error code " << e << frg::endlog;
+
+ // Opportunistically try to take the lock after we wake up.
+ expected = 0;
+ }else{
+ // Otherwise we have to set the waiters flag first.
+ unsigned int desired = expected | mutex_waiters_bit;
+ if(__atomic_compare_exchange_n((int *)&mutex->__mlibc_state,
+ reinterpret_cast<int*>(&expected), desired, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+ expected = desired;
+ }
+ }
+ }
+}
+
+int thread_mutex_unlock(struct __mlibc_mutex *mutex) {
+ // Decrement the recursion level and unlock if we hit zero.
+ __ensure(mutex->__mlibc_recursion);
+ if(--mutex->__mlibc_recursion)
+ return 0;
+
+ auto flags = mutex->__mlibc_flags;
+
+ // Reset the mutex to the unlocked state.
+ auto state = __atomic_exchange_n(&mutex->__mlibc_state, 0, __ATOMIC_RELEASE);
+
+ // After this point the mutex is unlocked, and therefore we cannot access its contents as it
+ // may have been destroyed by another thread.
+
+ unsigned int this_tid = mlibc::this_tid();
+ if ((flags & mutexErrorCheck) && (state & mutex_owner_mask) != this_tid)
+ return EPERM;
+
+ if ((flags & mutexErrorCheck) && !(state & mutex_owner_mask))
+ return EINVAL;
+
+ __ensure((state & mutex_owner_mask) == this_tid);
+
+ if(state & mutex_waiters_bit) {
+ // Wake the futex if there were waiters. Since the mutex might not exist at this location
+ // anymore, we must conservatively ignore EACCES and EINVAL which may occur as a result.
+ int e = mlibc::sys_futex_wake((int *)&mutex->__mlibc_state);
+ __ensure(e >= 0 || e == EACCES || e == EINVAL);
+ }
+
+ return 0;
+}
+
+int thread_mutexattr_init(struct __mlibc_mutexattr *attr) {
+ attr->__mlibc_type = __MLIBC_THREAD_MUTEX_DEFAULT;
+ attr->__mlibc_robust = __MLIBC_THREAD_MUTEX_STALLED;
+ attr->__mlibc_pshared = __MLIBC_THREAD_PROCESS_PRIVATE;
+ attr->__mlibc_protocol = __MLIBC_THREAD_PRIO_NONE;
+ return 0;
+}
+
+int thread_mutexattr_destroy(struct __mlibc_mutexattr *attr) {
+ memset(attr, 0, sizeof(*attr));
+ return 0;
+}
+
+int thread_mutexattr_gettype(const struct __mlibc_mutexattr *__restrict attr, int *__restrict type) {
+ *type = attr->__mlibc_type;
+ return 0;
+}
+
+int thread_mutexattr_settype(struct __mlibc_mutexattr *attr, int type) {
+ if (type != __MLIBC_THREAD_MUTEX_NORMAL && type != __MLIBC_THREAD_MUTEX_ERRORCHECK
+ && type != __MLIBC_THREAD_MUTEX_RECURSIVE)
+ return EINVAL;
+
+ attr->__mlibc_type = type;
+ return 0;
+}
+
+int thread_cond_init(struct __mlibc_cond *__restrict cond, const struct __mlibc_condattr *__restrict attr) {
+ auto clock = attr ? attr->__mlibc_clock : CLOCK_REALTIME;
+ auto pshared = attr ? attr->__mlibc_pshared : __MLIBC_THREAD_PROCESS_PRIVATE;
+
+ cond->__mlibc_clock = clock;
+ cond->__mlibc_flags = pshared;
+
+ __atomic_store_n(&cond->__mlibc_seq, 1, __ATOMIC_RELAXED);
+
+ return 0;
+}
+
+int thread_cond_destroy(struct __mlibc_cond *) {
+ return 0;
+}
+
+int thread_cond_broadcast(struct __mlibc_cond *cond) {
+ __atomic_fetch_add(&cond->__mlibc_seq, 1, __ATOMIC_RELEASE);
+ if(int e = mlibc::sys_futex_wake((int *)&cond->__mlibc_seq); e)
+ __ensure(!"sys_futex_wake() failed");
+
+ return 0;
+}
+
+int thread_cond_timedwait(struct __mlibc_cond *__restrict cond, __mlibc_mutex *__restrict mutex,
+ const struct timespec *__restrict abstime) {
+ // TODO: pshared isn't supported yet.
+ __ensure(cond->__mlibc_flags == 0);
+
+ constexpr long nanos_per_second = 1'000'000'000;
+ if (abstime && (abstime->tv_nsec < 0 || abstime->tv_nsec >= nanos_per_second))
+ return EINVAL;
+
+ auto seq = __atomic_load_n(&cond->__mlibc_seq, __ATOMIC_ACQUIRE);
+
+ // TODO: handle locking errors and cancellation properly.
+ while (true) {
+ if (thread_mutex_unlock(mutex))
+ __ensure(!"Failed to unlock the mutex");
+
+ int e;
+ if (abstime) {
+ // Adjust for the fact that sys_futex_wait accepts a *timeout*, but
+ // pthread_cond_timedwait accepts an *absolute time*.
+ // Note: mlibc::sys_clock_get is available unconditionally.
+ struct timespec now;
+ if (mlibc::sys_clock_get(cond->__mlibc_clock, &now.tv_sec, &now.tv_nsec))
+ __ensure(!"sys_clock_get() failed");
+
+ struct timespec timeout;
+ timeout.tv_sec = abstime->tv_sec - now.tv_sec;
+ timeout.tv_nsec = abstime->tv_nsec - now.tv_nsec;
+
+ // Check if abstime has already passed.
+ if (timeout.tv_sec < 0 || (timeout.tv_sec == 0 && timeout.tv_nsec < 0)) {
+ if (thread_mutex_lock(mutex))
+ __ensure(!"Failed to lock the mutex");
+ return ETIMEDOUT;
+ } else if (timeout.tv_nsec >= nanos_per_second) {
+ timeout.tv_nsec -= nanos_per_second;
+ timeout.tv_sec++;
+ __ensure(timeout.tv_nsec < nanos_per_second);
+ } else if (timeout.tv_nsec < 0) {
+ timeout.tv_nsec += nanos_per_second;
+ timeout.tv_sec--;
+ __ensure(timeout.tv_nsec >= 0);
+ }
+
+ e = mlibc::sys_futex_wait((int *)&cond->__mlibc_seq, seq, &timeout);
+ } else {
+ e = mlibc::sys_futex_wait((int *)&cond->__mlibc_seq, seq, nullptr);
+ }
+
+ if (thread_mutex_lock(mutex))
+ __ensure(!"Failed to lock the mutex");
+
+ // There are four cases to handle:
+ // 1. e == 0: this indicates a (potentially spurious) wakeup. The value of
+ // seq *must* be checked to distinguish these two cases.
+ // 2. e == EAGAIN: this indicates that the value of seq changed before we
+ // went to sleep. We don't need to check seq in this case.
+ // 3. e == EINTR: a signal was delivered. The man page allows us to choose
+ // whether to go to sleep again or to return 0, but we do the former
+ // to match other libcs.
+ // 4. e == ETIMEDOUT: this should only happen if abstime is set.
+ if (e == 0) {
+ auto cur_seq = __atomic_load_n(&cond->__mlibc_seq, __ATOMIC_ACQUIRE);
+ if (cur_seq > seq)
+ return 0;
+ } else if (e == EAGAIN) {
+ __ensure(__atomic_load_n(&cond->__mlibc_seq, __ATOMIC_ACQUIRE) > seq);
+ return 0;
+ } else if (e == EINTR) {
+ continue;
+ } else if (e == ETIMEDOUT) {
+ __ensure(abstime);
+ return ETIMEDOUT;
+ } else {
+ mlibc::panicLogger() << "sys_futex_wait() failed with error " << e << frg::endlog;
+ }
+ }
+}
+
+} // namespace mlibc
diff --git a/lib/mlibc/options/internal/generic/ubsan.cpp b/lib/mlibc/options/internal/generic/ubsan.cpp
new file mode 100644
index 0000000..3491729
--- /dev/null
+++ b/lib/mlibc/options/internal/generic/ubsan.cpp
@@ -0,0 +1,254 @@
+#include <limits.h>
+#include <mlibc/debug.hpp>
+
+#define FMT(obj) format_object((obj), opts, formatter)
+
+#define LOG_NAME_LOC(name, loc) "ubsan: " name " at " << loc << "\n "
+#define LOG_LHS_RHS(lhs, rhs) "LHS = " << (lhs) << ", RHS = " << (rhs)
+
+struct SourceLocation {
+ const char *filename;
+ uint32_t line;
+ uint32_t column;
+};
+
+template<class F>
+void format_object(const SourceLocation &loc, frg::format_options opts, F &formatter) {
+ FMT(loc.filename);
+ FMT(":");
+ FMT(loc.line);
+ FMT(":");
+ FMT(loc.column);
+}
+
+using ValueHandle = uintptr_t;
+
+struct TypeDescriptor {
+ enum class Kind : uint16_t {
+ Integer = 0x0000,
+ Float = 0x0001,
+ Unknown = 0xffff
+ } kind;
+
+ uint16_t info;
+ char name[];
+
+ unsigned bitWidthInt() const {
+ return 1 << (info >> 1);
+ }
+
+ bool isInlineInt() const {
+ if (kind != Kind::Integer)
+ return false;
+
+ auto inlineBits = sizeof(ValueHandle) * CHAR_BIT;
+ auto valueBits = bitWidthInt();
+ return inlineBits <= valueBits;
+ }
+
+ bool isSigned() const {
+ return info & 1;
+ }
+};
+
+template<class F>
+void format_object(const TypeDescriptor &type, frg::format_options opts, F &formatter) {
+ FMT(type.name);
+}
+
+struct Value {
+ const TypeDescriptor &type;
+ ValueHandle val;
+
+ Value(const TypeDescriptor &type, ValueHandle val) : type(type), val(val) {}
+};
+
+template<class F>
+void format_object(const Value &val, frg::format_options opts, F &formatter) {
+ if (val.type.isInlineInt() && val.type.isSigned()) {
+ auto signedValue = static_cast<int64_t>(val.val);
+ FMT(signedValue);
+ } else if (val.type.isInlineInt() && !val.type.isSigned()) {
+ auto unsignedValue = static_cast<uint64_t>(val.val);
+ FMT(unsignedValue);
+ }
+
+ FMT(" (");
+ FMT(val.type);
+ FMT(")");
+}
+
+
+// --- Hook implementations ---
+
+struct TypeMismatch {
+ SourceLocation loc;
+ const TypeDescriptor &type;
+ unsigned char logAlignment;
+ unsigned char kind;
+};
+
+extern "C" [[gnu::visibility("hidden")]]
+void __ubsan_handle_type_mismatch_v1(TypeMismatch *tm, ValueHandle pointer) {
+ // TODO: Make this print more information.
+ mlibc::panicLogger()
+ << LOG_NAME_LOC("type mismatch", tm->loc)
+ << "accessed address " << (void *)pointer << " but type "
+ << tm->type << " requires alignment " << (1 << tm->logAlignment)
+ << frg::endlog;
+}
+
+struct PointerOverflowData {
+ SourceLocation loc;
+};
+
+extern "C" [[gnu::visibility("hidden")]]
+void __ubsan_handle_pointer_overflow(PointerOverflowData *pod, ValueHandle base, ValueHandle result) {
+ (void)base;
+ (void)result;
+ mlibc::panicLogger()
+ << LOG_NAME_LOC("pointer overflow", pod->loc)
+ << frg::endlog;
+}
+
+struct InvalidValueData {
+ SourceLocation loc;
+ const TypeDescriptor &type;
+};
+
+extern "C" [[gnu::visibility("hidden")]]
+void __ubsan_handle_load_invalid_value(InvalidValueData *ivd, ValueHandle value) {
+ (void)value;
+ mlibc::panicLogger()
+ << LOG_NAME_LOC("load of invalid value", ivd->loc)
+ << frg::endlog;
+}
+
+struct OverflowData {
+ SourceLocation loc;
+ const TypeDescriptor &type;
+};
+
+extern "C" [[gnu::visibility("hidden")]]
+void __ubsan_handle_add_overflow(OverflowData *od, ValueHandle lhs, ValueHandle rhs) {
+ mlibc::panicLogger()
+ << LOG_NAME_LOC("add overflowed ", od->loc)
+ << LOG_LHS_RHS(Value(od->type, lhs), Value(od->type, rhs))
+ << frg::endlog;
+}
+
+extern "C" [[gnu::visibility("hidden")]]
+void __ubsan_handle_sub_overflow(OverflowData *od, ValueHandle lhs, ValueHandle rhs) {
+ mlibc::panicLogger()
+ << LOG_NAME_LOC("sub overflowed", od->loc)
+ << LOG_LHS_RHS(Value(od->type, lhs), Value(od->type, rhs))
+ << frg::endlog;
+}
+
+extern "C" [[gnu::visibility("hidden")]]
+void __ubsan_handle_mul_overflow(OverflowData *od, ValueHandle lhs, ValueHandle rhs) {
+ mlibc::panicLogger()
+ << LOG_NAME_LOC("mul overflowed", od->loc)
+ << LOG_LHS_RHS(Value(od->type, lhs), Value(od->type, rhs))
+ << frg::endlog;
+}
+
+extern "C" [[gnu::visibility("hidden")]]
+void __ubsan_handle_divrem_overflow(OverflowData *od, ValueHandle lhs, ValueHandle rhs) {
+ mlibc::panicLogger()
+ << LOG_NAME_LOC("divrem overflowed", od->loc)
+ << LOG_LHS_RHS(Value(od->type, lhs), Value(od->type, rhs))
+ << frg::endlog;
+}
+
+extern "C" [[gnu::visibility("hidden")]]
+void __ubsan_handle_negate_overflow(OverflowData *od, ValueHandle lhs, ValueHandle rhs) {
+ mlibc::panicLogger()
+ << LOG_NAME_LOC("negate overflowed", od->loc)
+ << LOG_LHS_RHS(Value(od->type, lhs), Value(od->type, rhs))
+ << frg::endlog;
+}
+
+struct ShiftOutOfBoundsData {
+ SourceLocation loc;
+ const TypeDescriptor &lhsType;
+ const TypeDescriptor &rhsType;
+};
+
+extern "C" [[gnu::visibility("hidden")]]
+void __ubsan_handle_shift_out_of_bounds(ShiftOutOfBoundsData *soob, ValueHandle lhs, ValueHandle rhs) {
+ mlibc::panicLogger()
+ << LOG_NAME_LOC("shift out of bounds", soob->loc)
+ << LOG_LHS_RHS(Value(soob->lhsType, lhs), Value(soob->rhsType, rhs))
+ << frg::endlog;
+}
+
+struct OutOfBoundsData {
+ SourceLocation loc;
+ const TypeDescriptor &arrayType;
+ const TypeDescriptor &indexType;
+};
+
+extern "C" [[gnu::visibility("hidden")]]
+void __ubsan_handle_out_of_bounds(OutOfBoundsData *oobd, ValueHandle data) {
+ (void)data;
+ mlibc::panicLogger()
+ << LOG_NAME_LOC("out of bounds access", oobd->loc)
+ << frg::endlog;
+}
+
+struct UnreachableData {
+ SourceLocation loc;
+};
+
+extern "C" [[gnu::visibility("hidden")]]
+void __ubsan_handle_builtin_unreachable(UnreachableData *ubd) {
+ mlibc::panicLogger()
+ << LOG_NAME_LOC("reached __builtin_unreachable()", ubd->loc)
+ << frg::endlog;
+}
+
+struct InvalidBuiltinData {
+ SourceLocation loc;
+ unsigned char kind;
+};
+
+extern "C" [[gnu::visibility("hidden")]]
+void __ubsan_handle_invalid_builtin(InvalidBuiltinData *ibd) {
+ mlibc::panicLogger()
+ << LOG_NAME_LOC("reached invalid builtin", ibd->loc)
+ << frg::endlog;
+}
+
+struct VLABoundData {
+ SourceLocation loc;
+ const TypeDescriptor &type;
+};
+
+extern "C" [[gnu::visibility("hidden")]]
+void __ubsan_handle_vla_bound_not_positive(VLABoundData *vlabd) {
+ mlibc::panicLogger()
+ << LOG_NAME_LOC("VLA bound not positive", vlabd->loc)
+ << frg::endlog;
+}
+
+extern "C" [[gnu::visibility("hidden")]]
+void __ubsan_handle_missing_return(UnreachableData *data) {
+ mlibc::panicLogger()
+ << LOG_NAME_LOC("reached end of a value-returning function without returning a value", data->loc)
+ << frg::endlog;
+}
+
+struct NonNullArgData {
+ SourceLocation loc;
+ SourceLocation attr_loc;
+ int arg_index;
+};
+
+extern "C" [[gnu::visibility("hidden")]]
+void __ubsan_handle_nonnull_arg(NonNullArgData *data) {
+ mlibc::panicLogger()
+ << LOG_NAME_LOC("null pointer passed to non-null argument", data->loc)
+ << "argument " << data->arg_index << " is required to be non-null in "
+ << data->attr_loc << frg::endlog;
+}