/* * An implementation of what I call the "Sea of Stars" algorithm for * POSIX fnmatch(). The basic idea is that we factor the pattern into * a head component (which we match first and can reject without ever * measuring the length of the string), an optional tail component * (which only exists if the pattern contains at least one star), and * an optional "sea of stars", a set of star-separated components * between the head and tail. After the head and tail matches have * been removed from the input string, the components in the "sea of * stars" are matched sequentially by searching for their first * occurrence past the end of the previous match. * * - Rich Felker, April 2012 */ #include #include #include #include #include // #include "locale_impl.h" #define END 0 #define UNMATCHABLE -2 #define BRACKET -3 #define QUESTION -4 #define STAR -5 static int str_next(const char *str, size_t n, size_t *step) { if (!n) { *step = 0; return 0; } if (str[0] >= 128U) { wchar_t wc; int k = mbtowc(&wc, str, n); if (k<0) { *step = 1; return -1; } *step = k; return wc; } *step = 1; return str[0]; } static int pat_next(const char *pat, size_t m, size_t *step, int flags) { int esc = 0; if (!m || !*pat) { *step = 0; return END; } *step = 1; if (pat[0]=='\\' && pat[1] && !(flags & FNM_NOESCAPE)) { *step = 2; pat++; esc = 1; goto escaped; } if (pat[0]=='[') { size_t k = 1; if (k= 128U) { wchar_t wc; int k = mbtowc(&wc, pat, m); if (k<0) { *step = 0; return UNMATCHABLE; } *step = k + esc; return wc; } return pat[0]; } static int casefold(int k) { int c = towupper(k); return c == k ? towlower(k) : c; } static int match_bracket(const char *p, int k, int kfold) { wchar_t wc; int inv = 0; p++; if (*p=='^' || *p=='!') { inv = 1; p++; } if (*p==']') { if (k==']') return !inv; p++; } else if (*p=='-') { if (k=='-') return !inv; p++; } wc = p[-1]; for (; *p != ']'; p++) { if (p[0]=='-' && p[1]!=']') { wchar_t wc2; int l = mbtowc(&wc2, p+1, 4); if (l < 0) return 0; if (wc <= wc2) if ((unsigned)k-wc <= wc2-wc || (unsigned)kfold-wc <= wc2-wc) return !inv; p += l-1; continue; } if (p[0]=='[' && (p[1]==':' || p[1]=='.' || p[1]=='=')) { const char *p0 = p+2; int z = p[1]; p+=3; while (p[-1]!=z || p[0]!=']') p++; if (z == ':' && p-1-p0 < 16) { char buf[16]; memcpy(buf, p0, p-1-p0); buf[p-1-p0] = 0; if (iswctype(k, wctype(buf)) || iswctype(kfold, wctype(buf))) return !inv; } continue; } if (*p < 128U) { wc = (unsigned char)*p; } else { int l = mbtowc(&wc, p, 4); if (l < 0) return 0; p += l-1; } if (wc==(wchar_t)k || wc==(wchar_t)kfold) return !inv; } return inv; } static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n, int flags) { const char *p, *ptail, *endpat; const char *s, *stail, *endstr; size_t pinc, sinc, tailcnt=0; int c, k, kfold; if (flags & FNM_PERIOD) { if (*str == '.' && *pat != '.') return FNM_NOMATCH; } for (;;) { switch ((c = pat_next(pat, m, &pinc, flags))) { case UNMATCHABLE: return FNM_NOMATCH; case STAR: pat++; m--; break; default: k = str_next(str, n, &sinc); if (k <= 0) return (c==END) ? 0 : FNM_NOMATCH; str += sinc; n -= sinc; kfold = flags & FNM_CASEFOLD ? casefold(k) : k; if (c == BRACKET) { if (!match_bracket(pat, k, kfold)) return FNM_NOMATCH; } else if (c != QUESTION && k != c && kfold != c) { return FNM_NOMATCH; } pat+=pinc; m-=pinc; continue; } break; } /* Compute real pat length if it was initially unknown/-1 */ m = strnlen(pat, m); endpat = pat + m; /* Find the last * in pat and count chars needed after it */ for (p=ptail=pat; pstr && tailcnt; tailcnt--) { if (s[-1] < 128U || MB_CUR_MAX==1) s--; else while ((unsigned char)*--s-0x80U<0x40 && s>str); } if (tailcnt) return FNM_NOMATCH; stail = s; /* Check that the pat and str tails match */ p = ptail; for (;;) { c = pat_next(p, endpat-p, &pinc, flags); p += pinc; if ((k = str_next(s, endstr-s, &sinc)) <= 0) { if (c != END) return FNM_NOMATCH; break; } s += sinc; kfold = flags & FNM_CASEFOLD ? casefold(k) : k; if (c == BRACKET) { if (!match_bracket(p-pinc, k, kfold)) return FNM_NOMATCH; } else if (c != QUESTION && k != c && kfold != c) { return FNM_NOMATCH; } } /* We're all done with the tails now, so throw them out */ endstr = stail; endpat = ptail; /* Match pattern components until there are none left */ while (pat 0) str += sinc; else for (str++; str_next(str, endstr-str, &sinc)<0; str++); } return 0; } int fnmatch(const char *pat, const char *str, int flags) { const char *s, *p; size_t inc; int c; if (flags & FNM_PATHNAME) for (;;) { for (s=str; *s && *s!='/'; s++); for (p=pat; (c=pat_next(p, -1, &inc, flags))!=END && c!='/'; p+=inc); if (c!=*s && (!*s || !(flags & FNM_LEADING_DIR))) return FNM_NOMATCH; if (fnmatch_internal(pat, p-pat, str, s-str, flags)) return FNM_NOMATCH; if (!c) return 0; str = s+1; pat = p+inc; } else if (flags & FNM_LEADING_DIR) { for (s=str; *s; s++) { if (*s != '/') continue; if (!fnmatch_internal(pat, -1, str, s-str, flags)) return 0; } } return fnmatch_internal(pat, -1, str, -1, flags); }