aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlon Zakai <alonzakai@gmail.com>2014-01-16 14:42:22 -0800
committerAlon Zakai <alonzakai@gmail.com>2014-01-16 15:42:14 -0800
commit5bb976ed71f1402f8f7853cc9a0c8ea4dd551615 (patch)
tree4cc78b8fb5f7e4bfc94a8baf2775f7d84c39e390
parent3d771ae92881d253aae30e80a400ba133de08d26 (diff)
add fnmatch; fixes #20021.9.3
-rwxr-xr-xemcc1
-rw-r--r--system/lib/libc/musl/src/regex/fnmatch.c299
-rw-r--r--system/lib/libcextra.symbols1
-rw-r--r--tests/core/fnmatch.c79
-rw-r--r--tests/core/fnmatch.out23
-rw-r--r--tests/test_core.py5
-rw-r--r--tools/shared.py2
7 files changed, 409 insertions, 1 deletions
diff --git a/emcc b/emcc
index 4be24e20..c295a76e 100755
--- a/emcc
+++ b/emcc
@@ -1584,6 +1584,7 @@ try:
'wctomb.c',
]],
['regex', [
+ 'fnmatch.c',
'regcomp.c',
'regerror.c',
'regexec.c',
diff --git a/system/lib/libc/musl/src/regex/fnmatch.c b/system/lib/libc/musl/src/regex/fnmatch.c
new file mode 100644
index 00000000..ffd3ea0d
--- /dev/null
+++ b/system/lib/libc/musl/src/regex/fnmatch.c
@@ -0,0 +1,299 @@
+/*
+ * An implementation of what I call the "Sea of Stars" algorithm for
+ * POSIX fnmatch(). The basic idea is that we factor the pattern into
+ * a head component (which we match first and can reject without ever
+ * measuring the length of the string), an optional tail component
+ * (which only exists if the pattern contains at least one star), and
+ * an optional "sea of stars", a set of star-separated components
+ * between the head and tail. After the head and tail matches have
+ * been removed from the input string, the components in the "sea of
+ * stars" are matched sequentially by searching for their first
+ * occurrence past the end of the previous match.
+ *
+ * - Rich Felker, April 2012
+ */
+
+#include <string.h>
+#include <fnmatch.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#define END -1
+#define UNMATCHABLE -2
+#define BRACKET -3
+#define QUESTION -4
+#define STAR -5
+
+static int str_next(const char *str, size_t n, size_t *step)
+{
+ if (!n) {
+ *step = 0;
+ return 0;
+ }
+ if (str[0] >= 128U) {
+ wchar_t wc;
+ int k = mbtowc(&wc, str, n);
+ if (k<0) {
+ *step = 1;
+ return -1;
+ }
+ *step = k;
+ return wc;
+ }
+ *step = 1;
+ return str[0];
+}
+
+static int pat_next(const char *pat, size_t m, size_t *step, int flags)
+{
+ int esc = 0;
+ if (!m || !*pat) {
+ *step = 0;
+ return END;
+ }
+ *step = 1;
+ if (pat[0]=='\\' && !(flags & FNM_NOESCAPE)) {
+ *step = 2;
+ pat++;
+ esc = 1;
+ goto escaped;
+ }
+ if (pat[0]=='[') {
+ size_t k = 1;
+ if (k<m) if (pat[k] == '^' || pat[k] == '!') k++;
+ if (k<m) if (pat[k] == ']') k++;
+ for (; k<m && pat[k] && pat[k]!=']'; k++) {
+ if (k+1<m && pat[k+1] && pat[k]=='[' && (pat[k+1]==':' || pat[k+1]=='.' || pat[k+1]=='=')) {
+ int z = pat[k+1];
+ k+=2;
+ if (k<m && pat[k]) k++;
+ while (k<m && pat[k] && (pat[k-1]!=z || pat[k]!=']')) k++;
+ if (k==m || !pat[k]) break;
+ }
+ }
+ if (k==m || !pat[k]) {
+ *step = 1;
+ return '[';
+ }
+ *step = k+1;
+ return BRACKET;
+ }
+ if (pat[0] == '*')
+ return STAR;
+ if (pat[0] == '?')
+ return QUESTION;
+escaped:
+ if (pat[0] >= 128U) {
+ wchar_t wc;
+ int k = mbtowc(&wc, pat, m);
+ if (k<0) {
+ *step = 0;
+ return UNMATCHABLE;
+ }
+ *step = k + esc;
+ return wc;
+ }
+ return pat[0];
+}
+
+static int match_bracket(const char *p, int k)
+{
+ wchar_t wc;
+ int inv = 0;
+ p++;
+ if (*p=='^' || *p=='!') {
+ inv = 1;
+ p++;
+ }
+ if (*p==']') {
+ if (k==']') return !inv;
+ p++;
+ } else if (*p=='-') {
+ if (k=='-') return !inv;
+ p++;
+ }
+ wc = p[-1];
+ for (; *p != ']'; p++) {
+ if (p[0]=='-' && p[1]!=']') {
+ wchar_t wc2;
+ int l = mbtowc(&wc2, p+1, 4);
+ if (l < 0) return 0;
+ if (wc<=wc2 && (unsigned)k-wc <= wc2-wc) return !inv;
+ p += l-1;
+ continue;
+ }
+ if (p[0]=='[' && (p[1]==':' || p[1]=='.' || p[1]=='=')) {
+ const char *p0 = p+2;
+ int z = p[1];
+ p+=3;
+ while (p[-1]!=z || p[0]!=']') p++;
+ if (z == ':' && p-1-p0 < 16) {
+ char buf[16];
+ memcpy(buf, p0, p-1-p0);
+ buf[p-1-p0] = 0;
+ if (iswctype(k, wctype(buf))) return !inv;
+ }
+ continue;
+ }
+ if (*p < 128U) {
+ wc = (unsigned char)*p;
+ } else {
+ int l = mbtowc(&wc, p, 4);
+ if (l < 0) return 0;
+ p += l-1;
+ }
+ if (wc==k) return !inv;
+ }
+ return inv;
+}
+
+static int fnmatch_internal(const char *pat, size_t m, const char *str, size_t n, int flags)
+{
+ const char *p, *ptail, *endpat;
+ const char *s, *stail, *endstr;
+ size_t pinc, sinc, tailcnt=0;
+ int c, k;
+
+ if (flags & FNM_PERIOD) {
+ if (*str == '.' && *pat != '.')
+ return FNM_NOMATCH;
+ }
+ for (;;) {
+ switch ((c = pat_next(pat, m, &pinc, flags))) {
+ case UNMATCHABLE:
+ return FNM_NOMATCH;
+ case STAR:
+ pat++;
+ m--;
+ break;
+ default:
+ k = str_next(str, n, &sinc);
+ if (k <= 0)
+ return (c==END) ? 0 : FNM_NOMATCH;
+ str += sinc;
+ n -= sinc;
+ if (c == BRACKET) {
+ if (!match_bracket(pat, k))
+ return FNM_NOMATCH;
+ } else if (c != QUESTION && k != c) {
+ return FNM_NOMATCH;
+ }
+ pat+=pinc;
+ m-=pinc;
+ continue;
+ }
+ break;
+ }
+
+ /* Compute real pat length if it was initially unknown/-1 */
+ m = strnlen(pat, m);
+ endpat = pat + m;
+
+ /* Find the last * in pat and count chars needed after it */
+ for (p=ptail=pat; p<endpat; p+=pinc) {
+ switch (pat_next(p, endpat-p, &pinc, flags)) {
+ case UNMATCHABLE:
+ return FNM_NOMATCH;
+ case STAR:
+ tailcnt=0;
+ ptail = p+1;
+ break;
+ default:
+ tailcnt++;
+ break;
+ }
+ }
+
+ /* Past this point we need not check for UNMATCHABLE in pat,
+ * because all of pat has already been parsed once. */
+
+ /* Compute real str length if it was initially unknown/-1 */
+ n = strnlen(str, n);
+ endstr = str + n;
+ if (n < tailcnt) return FNM_NOMATCH;
+
+ /* Find the final tailcnt chars of str, accounting for UTF-8.
+ * On illegal sequences we may get it wrong, but in that case
+ * we necessarily have a matching failure anyway. */
+ for (s=endstr; s>str && tailcnt; tailcnt--) {
+ if (s[-1] < 128U) s--;
+ else while ((unsigned char)*--s-0x80U<0x40 && s>str);
+ }
+ if (tailcnt) return FNM_NOMATCH;
+ stail = s;
+
+ /* Check that the pat and str tails match */
+ p = ptail;
+ for (;;) {
+ c = pat_next(p, endpat-p, &pinc, flags);
+ p += pinc;
+ if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
+ if (c != END) return FNM_NOMATCH;
+ break;
+ }
+ s += sinc;
+ if (c == BRACKET) {
+ if (!match_bracket(p-pinc, k))
+ return FNM_NOMATCH;
+ } else if (c != QUESTION && k != c) {
+ return FNM_NOMATCH;
+ }
+ }
+
+ /* We're all done with the tails now, so throw them out */
+ endstr = stail;
+ endpat = ptail;
+
+ /* Match pattern components until there are none left */
+ while (pat<endpat) {
+ p = pat;
+ s = str;
+ for (;;) {
+ c = pat_next(p, endpat-p, &pinc, flags);
+ p += pinc;
+ /* Encountering * completes/commits a component */
+ if (c == STAR) {
+ pat = p;
+ str = s;
+ break;
+ }
+ k = str_next(s, endstr-s, &sinc);
+ if (!k)
+ return FNM_NOMATCH;
+ if (c == BRACKET) {
+ if (!match_bracket(p-pinc, k))
+ break;
+ } else if (c != QUESTION && k != c) {
+ break;
+ }
+ s += sinc;
+ }
+ if (c == STAR) continue;
+ /* If we failed, advance str, by 1 char if it's a valid
+ * char, or past all invalid bytes otherwise. */
+ k = str_next(str, endstr-str, &sinc);
+ if (k > 0) str += sinc;
+ else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
+ }
+
+ return 0;
+}
+
+int fnmatch(const char *pat, const char *str, int flags)
+{
+ const char *s, *p;
+ size_t inc;
+ int c;
+ if (flags & FNM_PATHNAME) for (;;) {
+ for (s=str; *s && *s!='/'; s++);
+ for (p=pat; (c=pat_next(p, -1, &inc, flags))!=END && c!='/'; p+=inc);
+ if (*s && *p!=*s) return FNM_NOMATCH;
+ if (fnmatch_internal(pat, p-pat, str, s-str, flags))
+ return FNM_NOMATCH;
+ if (!*s && c==END) return 0;
+ str = s+1;
+ pat = p+1;
+ }
+ return fnmatch_internal(pat, -1, str, -1, flags);
+}
diff --git a/system/lib/libcextra.symbols b/system/lib/libcextra.symbols
index 6f1039f1..54176b1d 100644
--- a/system/lib/libcextra.symbols
+++ b/system/lib/libcextra.symbols
@@ -187,3 +187,4 @@
T wmemmove
T wmemset
T wprintf
+ T fnmatch
diff --git a/tests/core/fnmatch.c b/tests/core/fnmatch.c
new file mode 100644
index 00000000..ebdb2009
--- /dev/null
+++ b/tests/core/fnmatch.c
@@ -0,0 +1,79 @@
+// Begin test_fnmatch.cpp
+#include <fnmatch.h>
+#include <iostream>
+#include <vector>
+#include <string>
+
+using namespace std;
+
+class TestCase {
+public:
+ TestCase(const string& pattern, const string& testString, int flags, int expected) :
+ pattern(pattern),
+ testString(testString),
+ flags(flags),
+ expected(expected)
+ {}
+ string pattern;
+ string testString;
+ int flags;
+ int expected;
+};
+
+int main()
+{
+ vector<TestCase> testCases;
+
+ testCases.push_back(TestCase("*","anything",0,0));
+ testCases.push_back(TestCase("*.txt","readme.txt",0,0));
+ testCases.push_back(TestCase("*.txt","readme.info",0,FNM_NOMATCH));
+ testCases.push_back(TestCase("*.t?t","readme.txt",0,0));
+ testCases.push_back(TestCase("*.t?t","readme.tot",0,0));
+ testCases.push_back(TestCase("*.t?t","readme.txxt",0,FNM_NOMATCH));
+ testCases.push_back(TestCase("[a-g]1","c1",0,0));
+ testCases.push_back(TestCase("[a-g]1","i1",0,FNM_NOMATCH));
+ testCases.push_back(TestCase("[!a-g]1","i1",0,0));
+ testCases.push_back(TestCase("a\\*","anything",0,FNM_NOMATCH));
+ testCases.push_back(TestCase("a\\*","a*",0,0));
+ testCases.push_back(TestCase("a\\*","a*",FNM_NOESCAPE,FNM_NOMATCH));
+ testCases.push_back(TestCase("a\\*","a\\*",FNM_NOESCAPE,0));
+ testCases.push_back(TestCase("*readme","/etc/readme",0,0));
+ testCases.push_back(TestCase("*readme","/etc/readme",FNM_PATHNAME,FNM_NOMATCH));
+ testCases.push_back(TestCase("/*/readme","/etc/readme",FNM_PATHNAME,0));
+ testCases.push_back(TestCase("*readme","/etc/.readme",0,0));
+ testCases.push_back(TestCase("*readme",".readme",FNM_PERIOD,FNM_NOMATCH));
+ testCases.push_back(TestCase("*.readme","/etc/.readme",FNM_PERIOD,0));
+ testCases.push_back(TestCase("*.readme","/etc/.readme",FNM_PERIOD|FNM_PATHNAME,FNM_NOMATCH));
+ testCases.push_back(TestCase("/*/.readme","/etc/.readme",FNM_PERIOD|FNM_PATHNAME,0));
+ testCases.push_back(TestCase("ReAdME","readme",0,FNM_NOMATCH));
+
+ bool pass = true;
+
+ for (vector<TestCase>::const_iterator it = testCases.begin(); it != testCases.end(); ++it)
+ {
+ int result = fnmatch(it->pattern.c_str(), it->testString.c_str(), it->flags);
+ if (result == it->expected)
+ cout << "Pass: ";
+ else
+ {
+ cout << "Fail: ";
+ pass = false;
+ }
+
+ cout << "fnmatch(" << it->pattern << ", " << it->testString << ", "
+ << it->flags << ") returned " << result << ", expected "
+ << it->expected << endl;
+ }
+
+ if (pass)
+ {
+ cout << "All tests passed." << endl;
+ return 0;
+ }
+ else
+ {
+ cout << "Some tests failed." << endl;
+ return 1;
+ }
+}
+
diff --git a/tests/core/fnmatch.out b/tests/core/fnmatch.out
new file mode 100644
index 00000000..303f7449
--- /dev/null
+++ b/tests/core/fnmatch.out
@@ -0,0 +1,23 @@
+Pass: fnmatch(*, anything, 0) returned 0, expected 0
+Pass: fnmatch(*.txt, readme.txt, 0) returned 0, expected 0
+Pass: fnmatch(*.txt, readme.info, 0) returned 1, expected 1
+Pass: fnmatch(*.t?t, readme.txt, 0) returned 0, expected 0
+Pass: fnmatch(*.t?t, readme.tot, 0) returned 0, expected 0
+Pass: fnmatch(*.t?t, readme.txxt, 0) returned 1, expected 1
+Pass: fnmatch([a-g]1, c1, 0) returned 0, expected 0
+Pass: fnmatch([a-g]1, i1, 0) returned 1, expected 1
+Pass: fnmatch([!a-g]1, i1, 0) returned 0, expected 0
+Pass: fnmatch(a\*, anything, 0) returned 1, expected 1
+Pass: fnmatch(a\*, a*, 0) returned 0, expected 0
+Pass: fnmatch(a\*, a*, 2) returned 1, expected 1
+Pass: fnmatch(a\*, a\*, 2) returned 0, expected 0
+Pass: fnmatch(*readme, /etc/readme, 0) returned 0, expected 0
+Pass: fnmatch(*readme, /etc/readme, 1) returned 1, expected 1
+Pass: fnmatch(/*/readme, /etc/readme, 1) returned 0, expected 0
+Pass: fnmatch(*readme, /etc/.readme, 0) returned 0, expected 0
+Pass: fnmatch(*readme, .readme, 4) returned 1, expected 1
+Pass: fnmatch(*.readme, /etc/.readme, 4) returned 0, expected 0
+Pass: fnmatch(*.readme, /etc/.readme, 5) returned 1, expected 1
+Pass: fnmatch(/*/.readme, /etc/.readme, 5) returned 0, expected 0
+Pass: fnmatch(ReAdME, readme, 0) returned 1, expected 1
+All tests passed.
diff --git a/tests/test_core.py b/tests/test_core.py
index 59f2f9e9..a884ea75 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -3705,6 +3705,11 @@ ok
self.do_run_from_file(src, output)
+ def test_fnmatch(self):
+ test_path = path_from_root('tests', 'core', 'fnmatch')
+ src, output = (test_path + s for s in ('.c', '.out'))
+ self.do_run_from_file(src, output)
+
def test_sscanf(self):
if self.emcc_args is None: return self.skip('needs emcc for libc')
if not self.is_le32(): return self.skip('le32 needed for accurate math')
diff --git a/tools/shared.py b/tools/shared.py
index 1ee5cce8..bb50350d 100644
--- a/tools/shared.py
+++ b/tools/shared.py
@@ -345,7 +345,7 @@ def find_temp_directory():
# we re-check sanity when the settings are changed)
# We also re-check sanity and clear the cache when the version changes
-EMSCRIPTEN_VERSION = '1.9.2'
+EMSCRIPTEN_VERSION = '1.9.3'
def generate_sanity():
return EMSCRIPTEN_VERSION + '|' + get_llvm_target() + '|' + LLVM_ROOT + '|' + get_clang_version()