diff options
-rwxr-xr-x | emcc | 1 | ||||
-rw-r--r-- | system/lib/libc/musl/src/regex/fnmatch.c | 299 | ||||
-rw-r--r-- | system/lib/libcextra.symbols | 1 | ||||
-rw-r--r-- | tests/core/fnmatch.c | 79 | ||||
-rw-r--r-- | tests/core/fnmatch.out | 23 | ||||
-rw-r--r-- | tests/test_core.py | 5 | ||||
-rw-r--r-- | tools/shared.py | 2 |
7 files changed, 409 insertions, 1 deletions
@@ -1584,6 +1584,7 @@ try: 'wctomb.c', ]], ['regex', [ + 'fnmatch.c', 'regcomp.c', 'regerror.c', 'regexec.c', diff --git a/system/lib/libc/musl/src/regex/fnmatch.c b/system/lib/libc/musl/src/regex/fnmatch.c new file mode 100644 index 00000000..ffd3ea0d --- /dev/null +++ b/system/lib/libc/musl/src/regex/fnmatch.c @@ -0,0 +1,299 @@ +/* + * An implementation of what I call the "Sea of Stars" algorithm for + * POSIX fnmatch(). The basic idea is that we factor the pattern into + * a head component (which we match first and can reject without ever + * measuring the length of the string), an optional tail component + * (which only exists if the pattern contains at least one star), and + * an optional "sea of stars", a set of star-separated components + * between the head and tail. After the head and tail matches have + * been removed from the input string, the components in the "sea of + * stars" are matched sequentially by searching for their first + * occurrence past the end of the previous match. + * + * - Rich Felker, April 2012 + */ + +#include <string.h> +#include <fnmatch.h> +#include <stdlib.h> +#include <wchar.h> +#include <wctype.h> + +#define END -1 +#define UNMATCHABLE -2 +#define BRACKET -3 +#define QUESTION -4 +#define STAR -5 + +static int str_next(const char *str, size_t n, size_t *step) +{ + if (!n) { + *step = 0; + return 0; + } + if (str[0] >= 128U) { + wchar_t wc; + int k = mbtowc(&wc, str, n); + if (k<0) { + *step = 1; + return -1; + } + *step = k; + return wc; + } + *step = 1; + return str[0]; +} + +static int pat_next(const char *pat, size_t m, size_t *step, int flags) +{ + int esc = 0; + if (!m || !*pat) { + *step = 0; + return END; + } + *step = 1; + if (pat[0]=='\\' && !(flags & FNM_NOESCAPE)) { + *step = 2; + pat++; + esc = 1; + goto escaped; + } + if (pat[0]=='[') { + size_t k = 1; + if (k<m) if (pat[k] == '^' || pat[k] == '!') k++; + if (k<m) if (pat[k] == ']') k++; + for (; k<m && pat[k] && pat[k]!=']'; k++) { + if (k+1<m && pat[k+1] && pat[k]=='[' && (pat[k+1]==':' || pat[k+1]=='.' || pat[k+1]=='=')) { + int z = pat[k+1]; + k+=2; + if (k<m && pat[k]) k++; + while (k<m && pat[k] && (pat[k-1]!=z || pat[k]!=']')) k++; + if (k==m || !pat[k]) break; + } + } + if (k==m || !pat[k]) { + *step = 1; + return '['; + } + *step = k+1; + return BRACKET; + } + if (pat[0] == '*') + return STAR; + if (pat[0] == '?') + return QUESTION; +escaped: + if (pat[0] >= 128U) { + wchar_t wc; + int k = mbtowc(&wc, pat, m); + if (k<0) { + *step = 0; + return UNMATCHABLE; + } + *step = k + esc; + return wc; + } + return pat[0]; +} + +static int match_bracket(const char *p, int k) +{ + wchar_t wc; + int inv = 0; + p++; + if (*p=='^' || *p=='!') { + inv = 1; + p++; + } + if (*p==']') { + if (k==']') return !inv; + p++; + } else if (*p=='-') { + if (k=='-') return !inv; + p++; + } + wc = p[-1]; + for (; *p != ']'; p++) { + if (p[0]=='-' && p[1]!=']') { + wchar_t wc2; + int l = mbtowc(&wc2, p+1, 4); + if (l < 0) return 0; + if (wc<=wc2 && (unsigned)k-wc <= wc2-wc) return !inv; + p += l-1; + continue; + } + if (p[0]=='[' && (p[1]==':' || p[1]=='.' || p[1]=='=')) { + const char *p0 = p+2; + int z = p[1]; + p+=3; + while (p[-1]!=z || p[0]!=']') p++; + if (z == ':' && p-1-p0 < 16) { + char buf[16]; + memcpy(buf, p0, p-1-p0); + buf[p-1-p0] = 0; + if (iswctype(k, wctype(buf))) return !inv; + } + continue; + } + if (*p < 128U) { + wc = (unsigned char)*p; + } else { + int l = mbtowc(&wc, p, 4); + if (l < 0) return 0; + p += l-1; + } + if (wc==k) return !inv; + } + return inv; +} + +static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n, int flags) +{ + const char *p, *ptail, *endpat; + const char *s, *stail, *endstr; + size_t pinc, sinc, tailcnt=0; + int c, k; + + if (flags & FNM_PERIOD) { + if (*str == '.' && *pat != '.') + return FNM_NOMATCH; + } + for (;;) { + switch ((c = pat_next(pat, m, &pinc, flags))) { + case UNMATCHABLE: + return FNM_NOMATCH; + case STAR: + pat++; + m--; + break; + default: + k = str_next(str, n, &sinc); + if (k <= 0) + return (c==END) ? 0 : FNM_NOMATCH; + str += sinc; + n -= sinc; + if (c == BRACKET) { + if (!match_bracket(pat, k)) + return FNM_NOMATCH; + } else if (c != QUESTION && k != c) { + return FNM_NOMATCH; + } + pat+=pinc; + m-=pinc; + continue; + } + break; + } + + /* Compute real pat length if it was initially unknown/-1 */ + m = strnlen(pat, m); + endpat = pat + m; + + /* Find the last * in pat and count chars needed after it */ + for (p=ptail=pat; p<endpat; p+=pinc) { + switch (pat_next(p, endpat-p, &pinc, flags)) { + case UNMATCHABLE: + return FNM_NOMATCH; + case STAR: + tailcnt=0; + ptail = p+1; + break; + default: + tailcnt++; + break; + } + } + + /* Past this point we need not check for UNMATCHABLE in pat, + * because all of pat has already been parsed once. */ + + /* Compute real str length if it was initially unknown/-1 */ + n = strnlen(str, n); + endstr = str + n; + if (n < tailcnt) return FNM_NOMATCH; + + /* Find the final tailcnt chars of str, accounting for UTF-8. + * On illegal sequences we may get it wrong, but in that case + * we necessarily have a matching failure anyway. */ + for (s=endstr; s>str && tailcnt; tailcnt--) { + if (s[-1] < 128U) s--; + else while ((unsigned char)*--s-0x80U<0x40 && s>str); + } + if (tailcnt) return FNM_NOMATCH; + stail = s; + + /* Check that the pat and str tails match */ + p = ptail; + for (;;) { + c = pat_next(p, endpat-p, &pinc, flags); + p += pinc; + if ((k = str_next(s, endstr-s, &sinc)) <= 0) { + if (c != END) return FNM_NOMATCH; + break; + } + s += sinc; + if (c == BRACKET) { + if (!match_bracket(p-pinc, k)) + return FNM_NOMATCH; + } else if (c != QUESTION && k != c) { + return FNM_NOMATCH; + } + } + + /* We're all done with the tails now, so throw them out */ + endstr = stail; + endpat = ptail; + + /* Match pattern components until there are none left */ + while (pat<endpat) { + p = pat; + s = str; + for (;;) { + c = pat_next(p, endpat-p, &pinc, flags); + p += pinc; + /* Encountering * completes/commits a component */ + if (c == STAR) { + pat = p; + str = s; + break; + } + k = str_next(s, endstr-s, &sinc); + if (!k) + return FNM_NOMATCH; + if (c == BRACKET) { + if (!match_bracket(p-pinc, k)) + break; + } else if (c != QUESTION && k != c) { + break; + } + s += sinc; + } + if (c == STAR) continue; + /* If we failed, advance str, by 1 char if it's a valid + * char, or past all invalid bytes otherwise. */ + k = str_next(str, endstr-str, &sinc); + if (k > 0) str += sinc; + else for (str++; str_next(str, endstr-str, &sinc)<0; str++); + } + + return 0; +} + +int fnmatch(const char *pat, const char *str, int flags) +{ + const char *s, *p; + size_t inc; + int c; + if (flags & FNM_PATHNAME) for (;;) { + for (s=str; *s && *s!='/'; s++); + for (p=pat; (c=pat_next(p, -1, &inc, flags))!=END && c!='/'; p+=inc); + if (*s && *p!=*s) return FNM_NOMATCH; + if (fnmatch_internal(pat, p-pat, str, s-str, flags)) + return FNM_NOMATCH; + if (!*s && c==END) return 0; + str = s+1; + pat = p+1; + } + return fnmatch_internal(pat, -1, str, -1, flags); +} diff --git a/system/lib/libcextra.symbols b/system/lib/libcextra.symbols index 6f1039f1..54176b1d 100644 --- a/system/lib/libcextra.symbols +++ b/system/lib/libcextra.symbols @@ -187,3 +187,4 @@ T wmemmove T wmemset T wprintf + T fnmatch diff --git a/tests/core/fnmatch.c b/tests/core/fnmatch.c new file mode 100644 index 00000000..ebdb2009 --- /dev/null +++ b/tests/core/fnmatch.c @@ -0,0 +1,79 @@ +// Begin test_fnmatch.cpp +#include <fnmatch.h> +#include <iostream> +#include <vector> +#include <string> + +using namespace std; + +class TestCase { +public: + TestCase(const string& pattern, const string& testString, int flags, int expected) : + pattern(pattern), + testString(testString), + flags(flags), + expected(expected) + {} + string pattern; + string testString; + int flags; + int expected; +}; + +int main() +{ + vector<TestCase> testCases; + + testCases.push_back(TestCase("*","anything",0,0)); + testCases.push_back(TestCase("*.txt","readme.txt",0,0)); + testCases.push_back(TestCase("*.txt","readme.info",0,FNM_NOMATCH)); + testCases.push_back(TestCase("*.t?t","readme.txt",0,0)); + testCases.push_back(TestCase("*.t?t","readme.tot",0,0)); + testCases.push_back(TestCase("*.t?t","readme.txxt",0,FNM_NOMATCH)); + testCases.push_back(TestCase("[a-g]1","c1",0,0)); + testCases.push_back(TestCase("[a-g]1","i1",0,FNM_NOMATCH)); + testCases.push_back(TestCase("[!a-g]1","i1",0,0)); + testCases.push_back(TestCase("a\\*","anything",0,FNM_NOMATCH)); + testCases.push_back(TestCase("a\\*","a*",0,0)); + testCases.push_back(TestCase("a\\*","a*",FNM_NOESCAPE,FNM_NOMATCH)); + testCases.push_back(TestCase("a\\*","a\\*",FNM_NOESCAPE,0)); + testCases.push_back(TestCase("*readme","/etc/readme",0,0)); + testCases.push_back(TestCase("*readme","/etc/readme",FNM_PATHNAME,FNM_NOMATCH)); + testCases.push_back(TestCase("/*/readme","/etc/readme",FNM_PATHNAME,0)); + testCases.push_back(TestCase("*readme","/etc/.readme",0,0)); + testCases.push_back(TestCase("*readme",".readme",FNM_PERIOD,FNM_NOMATCH)); + testCases.push_back(TestCase("*.readme","/etc/.readme",FNM_PERIOD,0)); + testCases.push_back(TestCase("*.readme","/etc/.readme",FNM_PERIOD|FNM_PATHNAME,FNM_NOMATCH)); + testCases.push_back(TestCase("/*/.readme","/etc/.readme",FNM_PERIOD|FNM_PATHNAME,0)); + testCases.push_back(TestCase("ReAdME","readme",0,FNM_NOMATCH)); + + bool pass = true; + + for (vector<TestCase>::const_iterator it = testCases.begin(); it != testCases.end(); ++it) + { + int result = fnmatch(it->pattern.c_str(), it->testString.c_str(), it->flags); + if (result == it->expected) + cout << "Pass: "; + else + { + cout << "Fail: "; + pass = false; + } + + cout << "fnmatch(" << it->pattern << ", " << it->testString << ", " + << it->flags << ") returned " << result << ", expected " + << it->expected << endl; + } + + if (pass) + { + cout << "All tests passed." << endl; + return 0; + } + else + { + cout << "Some tests failed." << endl; + return 1; + } +} + diff --git a/tests/core/fnmatch.out b/tests/core/fnmatch.out new file mode 100644 index 00000000..303f7449 --- /dev/null +++ b/tests/core/fnmatch.out @@ -0,0 +1,23 @@ +Pass: fnmatch(*, anything, 0) returned 0, expected 0 +Pass: fnmatch(*.txt, readme.txt, 0) returned 0, expected 0 +Pass: fnmatch(*.txt, readme.info, 0) returned 1, expected 1 +Pass: fnmatch(*.t?t, readme.txt, 0) returned 0, expected 0 +Pass: fnmatch(*.t?t, readme.tot, 0) returned 0, expected 0 +Pass: fnmatch(*.t?t, readme.txxt, 0) returned 1, expected 1 +Pass: fnmatch([a-g]1, c1, 0) returned 0, expected 0 +Pass: fnmatch([a-g]1, i1, 0) returned 1, expected 1 +Pass: fnmatch([!a-g]1, i1, 0) returned 0, expected 0 +Pass: fnmatch(a\*, anything, 0) returned 1, expected 1 +Pass: fnmatch(a\*, a*, 0) returned 0, expected 0 +Pass: fnmatch(a\*, a*, 2) returned 1, expected 1 +Pass: fnmatch(a\*, a\*, 2) returned 0, expected 0 +Pass: fnmatch(*readme, /etc/readme, 0) returned 0, expected 0 +Pass: fnmatch(*readme, /etc/readme, 1) returned 1, expected 1 +Pass: fnmatch(/*/readme, /etc/readme, 1) returned 0, expected 0 +Pass: fnmatch(*readme, /etc/.readme, 0) returned 0, expected 0 +Pass: fnmatch(*readme, .readme, 4) returned 1, expected 1 +Pass: fnmatch(*.readme, /etc/.readme, 4) returned 0, expected 0 +Pass: fnmatch(*.readme, /etc/.readme, 5) returned 1, expected 1 +Pass: fnmatch(/*/.readme, /etc/.readme, 5) returned 0, expected 0 +Pass: fnmatch(ReAdME, readme, 0) returned 1, expected 1 +All tests passed. diff --git a/tests/test_core.py b/tests/test_core.py index 59f2f9e9..a884ea75 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -3705,6 +3705,11 @@ ok self.do_run_from_file(src, output) + def test_fnmatch(self): + test_path = path_from_root('tests', 'core', 'fnmatch') + src, output = (test_path + s for s in ('.c', '.out')) + self.do_run_from_file(src, output) + def test_sscanf(self): if self.emcc_args is None: return self.skip('needs emcc for libc') if not self.is_le32(): return self.skip('le32 needed for accurate math') diff --git a/tools/shared.py b/tools/shared.py index 1ee5cce8..bb50350d 100644 --- a/tools/shared.py +++ b/tools/shared.py @@ -345,7 +345,7 @@ def find_temp_directory(): # we re-check sanity when the settings are changed) # We also re-check sanity and clear the cache when the version changes -EMSCRIPTEN_VERSION = '1.9.2' +EMSCRIPTEN_VERSION = '1.9.3' def generate_sanity(): return EMSCRIPTEN_VERSION + '|' + get_llvm_target() + '|' + LLVM_ROOT + '|' + get_clang_version() |