From bd5969fc876a10b18613302db7087ef3c40f18e1 Mon Sep 17 00:00:00 2001 From: Ian Moffett Date: Thu, 7 Mar 2024 17:28:00 -0500 Subject: lib: Add mlibc Signed-off-by: Ian Moffett --- .../options/posix/musl-generic-regex/fnmatch.c | 321 +++++++++++++++++++++ 1 file changed, 321 insertions(+) create mode 100644 lib/mlibc/options/posix/musl-generic-regex/fnmatch.c (limited to 'lib/mlibc/options/posix/musl-generic-regex/fnmatch.c') diff --git a/lib/mlibc/options/posix/musl-generic-regex/fnmatch.c b/lib/mlibc/options/posix/musl-generic-regex/fnmatch.c new file mode 100644 index 0000000..0e6de47 --- /dev/null +++ b/lib/mlibc/options/posix/musl-generic-regex/fnmatch.c @@ -0,0 +1,321 @@ +/* + * An implementation of what I call the "Sea of Stars" algorithm for + * POSIX fnmatch(). The basic idea is that we factor the pattern into + * a head component (which we match first and can reject without ever + * measuring the length of the string), an optional tail component + * (which only exists if the pattern contains at least one star), and + * an optional "sea of stars", a set of star-separated components + * between the head and tail. After the head and tail matches have + * been removed from the input string, the components in the "sea of + * stars" are matched sequentially by searching for their first + * occurrence past the end of the previous match. + * + * - Rich Felker, April 2012 + */ + +#include +#include +#include +#include +#include +// #include "locale_impl.h" + +#define END 0 +#define UNMATCHABLE -2 +#define BRACKET -3 +#define QUESTION -4 +#define STAR -5 + +static int str_next(const char *str, size_t n, size_t *step) +{ + if (!n) { + *step = 0; + return 0; + } + if (str[0] >= 128U) { + wchar_t wc; + int k = mbtowc(&wc, str, n); + if (k<0) { + *step = 1; + return -1; + } + *step = k; + return wc; + } + *step = 1; + return str[0]; +} + +static int pat_next(const char *pat, size_t m, size_t *step, int flags) +{ + int esc = 0; + if (!m || !*pat) { + *step = 0; + return END; + } + *step = 1; + if (pat[0]=='\\' && pat[1] && !(flags & FNM_NOESCAPE)) { + *step = 2; + pat++; + esc = 1; + goto escaped; + } + if (pat[0]=='[') { + size_t k = 1; + if (k= 128U) { + wchar_t wc; + int k = mbtowc(&wc, pat, m); + if (k<0) { + *step = 0; + return UNMATCHABLE; + } + *step = k + esc; + return wc; + } + return pat[0]; +} + +static int casefold(int k) +{ + int c = towupper(k); + return c == k ? towlower(k) : c; +} + +static int match_bracket(const char *p, int k, int kfold) +{ + wchar_t wc; + int inv = 0; + p++; + if (*p=='^' || *p=='!') { + inv = 1; + p++; + } + if (*p==']') { + if (k==']') return !inv; + p++; + } else if (*p=='-') { + if (k=='-') return !inv; + p++; + } + wc = p[-1]; + for (; *p != ']'; p++) { + if (p[0]=='-' && p[1]!=']') { + wchar_t wc2; + int l = mbtowc(&wc2, p+1, 4); + if (l < 0) return 0; + if (wc <= wc2) + if ((unsigned)k-wc <= wc2-wc || + (unsigned)kfold-wc <= wc2-wc) + return !inv; + p += l-1; + continue; + } + if (p[0]=='[' && (p[1]==':' || p[1]=='.' || p[1]=='=')) { + const char *p0 = p+2; + int z = p[1]; + p+=3; + while (p[-1]!=z || p[0]!=']') p++; + if (z == ':' && p-1-p0 < 16) { + char buf[16]; + memcpy(buf, p0, p-1-p0); + buf[p-1-p0] = 0; + if (iswctype(k, wctype(buf)) || + iswctype(kfold, wctype(buf))) + return !inv; + } + continue; + } + if (*p < 128U) { + wc = (unsigned char)*p; + } else { + int l = mbtowc(&wc, p, 4); + if (l < 0) return 0; + p += l-1; + } + if (wc==(wchar_t)k || wc==(wchar_t)kfold) return !inv; + } + return inv; +} + +static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n, int flags) +{ + const char *p, *ptail, *endpat; + const char *s, *stail, *endstr; + size_t pinc, sinc, tailcnt=0; + int c, k, kfold; + + if (flags & FNM_PERIOD) { + if (*str == '.' && *pat != '.') + return FNM_NOMATCH; + } + for (;;) { + switch ((c = pat_next(pat, m, &pinc, flags))) { + case UNMATCHABLE: + return FNM_NOMATCH; + case STAR: + pat++; + m--; + break; + default: + k = str_next(str, n, &sinc); + if (k <= 0) + return (c==END) ? 0 : FNM_NOMATCH; + str += sinc; + n -= sinc; + kfold = flags & FNM_CASEFOLD ? casefold(k) : k; + if (c == BRACKET) { + if (!match_bracket(pat, k, kfold)) + return FNM_NOMATCH; + } else if (c != QUESTION && k != c && kfold != c) { + return FNM_NOMATCH; + } + pat+=pinc; + m-=pinc; + continue; + } + break; + } + + /* Compute real pat length if it was initially unknown/-1 */ + m = strnlen(pat, m); + endpat = pat + m; + + /* Find the last * in pat and count chars needed after it */ + for (p=ptail=pat; pstr && tailcnt; tailcnt--) { + if (s[-1] < 128U || MB_CUR_MAX==1) s--; + else while ((unsigned char)*--s-0x80U<0x40 && s>str); + } + if (tailcnt) return FNM_NOMATCH; + stail = s; + + /* Check that the pat and str tails match */ + p = ptail; + for (;;) { + c = pat_next(p, endpat-p, &pinc, flags); + p += pinc; + if ((k = str_next(s, endstr-s, &sinc)) <= 0) { + if (c != END) return FNM_NOMATCH; + break; + } + s += sinc; + kfold = flags & FNM_CASEFOLD ? casefold(k) : k; + if (c == BRACKET) { + if (!match_bracket(p-pinc, k, kfold)) + return FNM_NOMATCH; + } else if (c != QUESTION && k != c && kfold != c) { + return FNM_NOMATCH; + } + } + + /* We're all done with the tails now, so throw them out */ + endstr = stail; + endpat = ptail; + + /* Match pattern components until there are none left */ + while (pat 0) str += sinc; + else for (str++; str_next(str, endstr-str, &sinc)<0; str++); + } + + return 0; +} + +int fnmatch(const char *pat, const char *str, int flags) +{ + const char *s, *p; + size_t inc; + int c; + if (flags & FNM_PATHNAME) for (;;) { + for (s=str; *s && *s!='/'; s++); + for (p=pat; (c=pat_next(p, -1, &inc, flags))!=END && c!='/'; p+=inc); + if (c!=*s && (!*s || !(flags & FNM_LEADING_DIR))) + return FNM_NOMATCH; + if (fnmatch_internal(pat, p-pat, str, s-str, flags)) + return FNM_NOMATCH; + if (!c) return 0; + str = s+1; + pat = p+inc; + } else if (flags & FNM_LEADING_DIR) { + for (s=str; *s; s++) { + if (*s != '/') continue; + if (!fnmatch_internal(pat, -1, str, s-str, flags)) + return 0; + } + } + return fnmatch_internal(pat, -1, str, -1, flags); +} -- cgit v1.2.3