diff options
author | Bruce Mitchener <bruce.mitchener@gmail.com> | 2013-04-15 23:03:13 +0700 |
---|---|---|
committer | Bruce Mitchener <bruce.mitchener@gmail.com> | 2013-04-22 16:45:07 +0700 |
commit | 7cacf252e7f99fc2c98b969f8076011cfb6854d1 (patch) | |
tree | 36baa3a1af6723d24dae5b4e1dc1351abf06fa71 /system | |
parent | b9090fa1f2ba90a5745981d1a4d4108915ddd6e9 (diff) |
Add musl multibyte function implementations.
Diffstat (limited to 'system')
-rw-r--r-- | system/lib/libc/musl/src/multibyte/btowc.c | 7 | ||||
-rw-r--r-- | system/lib/libc/musl/src/multibyte/internal.c | 38 | ||||
-rw-r--r-- | system/lib/libc/musl/src/multibyte/internal.h | 22 | ||||
-rw-r--r-- | system/lib/libc/musl/src/multibyte/mblen.c | 17 | ||||
-rw-r--r-- | system/lib/libc/musl/src/multibyte/mbrlen.c | 18 | ||||
-rw-r--r-- | system/lib/libc/musl/src/multibyte/mbrtowc.c | 57 | ||||
-rw-r--r-- | system/lib/libc/musl/src/multibyte/mbsinit.c | 17 | ||||
-rw-r--r-- | system/lib/libc/musl/src/multibyte/mbsnrtowcs.c | 65 | ||||
-rw-r--r-- | system/lib/libc/musl/src/multibyte/mbsrtowcs.c | 100 | ||||
-rw-r--r-- | system/lib/libc/musl/src/multibyte/mbstowcs.c | 7 | ||||
-rw-r--r-- | system/lib/libc/musl/src/multibyte/mbtowc.c | 53 | ||||
-rw-r--r-- | system/lib/libc/musl/src/multibyte/wcrtomb.c | 38 | ||||
-rw-r--r-- | system/lib/libc/musl/src/multibyte/wcsnrtombs.c | 52 | ||||
-rw-r--r-- | system/lib/libc/musl/src/multibyte/wcsrtombs.c | 58 | ||||
-rw-r--r-- | system/lib/libc/musl/src/multibyte/wcstombs.c | 7 | ||||
-rw-r--r-- | system/lib/libc/musl/src/multibyte/wctob.c | 8 | ||||
-rw-r--r-- | system/lib/libc/musl/src/multibyte/wctomb.c | 18 |
17 files changed, 582 insertions, 0 deletions
diff --git a/system/lib/libc/musl/src/multibyte/btowc.c b/system/lib/libc/musl/src/multibyte/btowc.c new file mode 100644 index 00000000..9d2c3b16 --- /dev/null +++ b/system/lib/libc/musl/src/multibyte/btowc.c @@ -0,0 +1,7 @@ +#include <stdio.h> +#include <wchar.h> + +wint_t btowc(int c) +{ + return c<128U ? c : EOF; +} diff --git a/system/lib/libc/musl/src/multibyte/internal.c b/system/lib/libc/musl/src/multibyte/internal.c new file mode 100644 index 00000000..ab22806e --- /dev/null +++ b/system/lib/libc/musl/src/multibyte/internal.c @@ -0,0 +1,38 @@ +/* + * This code was written by Rich Felker in 2010; no copyright is claimed. + * This code is in the public domain. Attribution is appreciated but + * unnecessary. + */ + +#include <inttypes.h> + +#include "internal.h" + +#define C(x) ( x<2 ? -1 : ( R(0x80,0xc0) | x ) ) +#define D(x) C((x+16)) +#define E(x) ( ( x==0 ? R(0xa0,0xc0) : \ + x==0xd ? R(0x80,0xa0) : \ + R(0x80,0xc0) ) \ + | ( R(0x80,0xc0) >> 6 ) \ + | x ) +#define F(x) ( ( x>=5 ? 0 : \ + x==0 ? R(0x90,0xc0) : \ + x==4 ? R(0x80,0xa0) : \ + R(0x80,0xc0) ) \ + | ( R(0x80,0xc0) >> 6 ) \ + | ( R(0x80,0xc0) >> 12 ) \ + | x ) + +const uint32_t bittab[] = { + C(0x2),C(0x3),C(0x4),C(0x5),C(0x6),C(0x7), + C(0x8),C(0x9),C(0xa),C(0xb),C(0xc),C(0xd),C(0xe),C(0xf), + D(0x0),D(0x1),D(0x2),D(0x3),D(0x4),D(0x5),D(0x6),D(0x7), + D(0x8),D(0x9),D(0xa),D(0xb),D(0xc),D(0xd),D(0xe),D(0xf), + E(0x0),E(0x1),E(0x2),E(0x3),E(0x4),E(0x5),E(0x6),E(0x7), + E(0x8),E(0x9),E(0xa),E(0xb),E(0xc),E(0xd),E(0xe),E(0xf), + F(0x0),F(0x1),F(0x2),F(0x3),F(0x4) +}; + +#ifdef BROKEN_VISIBILITY +__asm__(".hidden __fsmu8"); +#endif diff --git a/system/lib/libc/musl/src/multibyte/internal.h b/system/lib/libc/musl/src/multibyte/internal.h new file mode 100644 index 00000000..25ba240e --- /dev/null +++ b/system/lib/libc/musl/src/multibyte/internal.h @@ -0,0 +1,22 @@ +/* + * This code was written by Rich Felker in 2010; no copyright is claimed. + * This code is in the public domain. Attribution is appreciated but + * unnecessary. + */ + +#define bittab __fsmu8 + +#include "libc.h" + +extern const uint32_t bittab[] ATTR_LIBC_VISIBILITY; + +/* Upper 6 state bits are a negative integer offset to bound-check next byte */ +/* equivalent to: ( (b-0x80) | (b+offset) ) & ~0x3f */ +#define OOB(c,b) (((((b)>>3)-0x10)|(((b)>>3)+((int32_t)(c)>>26))) & ~7) + +/* Interval [a,b). Either a must be 80 or b must be c0, lower 3 bits clear. */ +#define R(a,b) ((uint32_t)((a==0x80 ? 0x40-b : -a) << 23)) +#define FAILSTATE R(0x80,0x80) + +#define SA 0xc2u +#define SB 0xf4u diff --git a/system/lib/libc/musl/src/multibyte/mblen.c b/system/lib/libc/musl/src/multibyte/mblen.c new file mode 100644 index 00000000..26d35649 --- /dev/null +++ b/system/lib/libc/musl/src/multibyte/mblen.c @@ -0,0 +1,17 @@ +/* + * This code was written by Rich Felker in 2010; no copyright is claimed. + * This code is in the public domain. Attribution is appreciated but + * unnecessary. + */ + +#include <stdlib.h> +#include <inttypes.h> +#include <wchar.h> +#include <errno.h> + +#include "internal.h" + +int mblen(const char *s, size_t n) +{ + return mbtowc(0, s, n); +} diff --git a/system/lib/libc/musl/src/multibyte/mbrlen.c b/system/lib/libc/musl/src/multibyte/mbrlen.c new file mode 100644 index 00000000..c9714ef8 --- /dev/null +++ b/system/lib/libc/musl/src/multibyte/mbrlen.c @@ -0,0 +1,18 @@ +/* + * This code was written by Rich Felker in 2010; no copyright is claimed. + * This code is in the public domain. Attribution is appreciated but + * unnecessary. + */ + +#include <stdlib.h> +#include <inttypes.h> +#include <wchar.h> +#include <errno.h> + +#include "internal.h" + +size_t mbrlen(const char *restrict s, size_t n, mbstate_t *restrict st) +{ + static unsigned internal; + return mbrtowc(0, s, n, st ? st : (mbstate_t *)&internal); +} diff --git a/system/lib/libc/musl/src/multibyte/mbrtowc.c b/system/lib/libc/musl/src/multibyte/mbrtowc.c new file mode 100644 index 00000000..db803661 --- /dev/null +++ b/system/lib/libc/musl/src/multibyte/mbrtowc.c @@ -0,0 +1,57 @@ +/* + * This code was written by Rich Felker in 2010; no copyright is claimed. + * This code is in the public domain. Attribution is appreciated but + * unnecessary. + */ + +#include <stdlib.h> +#include <inttypes.h> +#include <wchar.h> +#include <errno.h> + +#include "internal.h" + +size_t mbrtowc(wchar_t *restrict wc, const char *restrict src, size_t n, mbstate_t *restrict st) +{ + static unsigned internal_state; + unsigned c; + const unsigned char *s = (const void *)src; + const unsigned N = n; + + if (!st) st = (void *)&internal_state; + c = *(unsigned *)st; + + if (!s) { + if (c) goto ilseq; + return 0; + } else if (!wc) wc = (void *)&wc; + + if (!n) return -2; + if (!c) { + if (*s < 0x80) return !!(*wc = *s); + if (*s-SA > SB-SA) goto ilseq; + c = bittab[*s++-SA]; n--; + } + + if (n) { + if (OOB(c,*s)) goto ilseq; +loop: + c = c<<6 | *s++-0x80; n--; + if (!(c&(1U<<31))) { + *(unsigned *)st = 0; + *wc = c; + return N-n; + } + if (n) { + if (*s-0x80u >= 0x40) goto ilseq; + goto loop; + } + } + + *(unsigned *)st = c; + return -2; +ilseq: + *(unsigned *)st = 0; + errno = EILSEQ; + return -1; +} diff --git a/system/lib/libc/musl/src/multibyte/mbsinit.c b/system/lib/libc/musl/src/multibyte/mbsinit.c new file mode 100644 index 00000000..c0e7e494 --- /dev/null +++ b/system/lib/libc/musl/src/multibyte/mbsinit.c @@ -0,0 +1,17 @@ +/* + * This code was written by Rich Felker in 2010; no copyright is claimed. + * This code is in the public domain. Attribution is appreciated but + * unnecessary. + */ + +#include <stdlib.h> +#include <inttypes.h> +#include <wchar.h> +#include <errno.h> + +#include "internal.h" + +int mbsinit(const mbstate_t *st) +{ + return !st || !*(unsigned *)st; +} diff --git a/system/lib/libc/musl/src/multibyte/mbsnrtowcs.c b/system/lib/libc/musl/src/multibyte/mbsnrtowcs.c new file mode 100644 index 00000000..33457f95 --- /dev/null +++ b/system/lib/libc/musl/src/multibyte/mbsnrtowcs.c @@ -0,0 +1,65 @@ +/* + * This code was written by Rich Felker in 2010; no copyright is claimed. + * This code is in the public domain. Attribution is appreciated but + * unnecessary. + */ + +#include <stdlib.h> +#include <inttypes.h> +#include <wchar.h> +#include <errno.h> +#include <stdio.h> + +#include "internal.h" + +size_t mbsnrtowcs(wchar_t *restrict wcs, const char **restrict src, size_t n, size_t wn, mbstate_t *restrict st) +{ + size_t l, cnt=0, n2; + wchar_t *ws, wbuf[256]; + const char *s = *src; + + if (!wcs) ws = wbuf, wn = sizeof wbuf / sizeof *wbuf; + else ws = wcs; + + /* making sure output buffer size is at most n/4 will ensure + * that mbsrtowcs never reads more than n input bytes. thus + * we can use mbsrtowcs as long as it's practical.. */ + + while ( s && wn && ( (n2=n/4)>=wn || n2>32 ) ) { + if (n2>=wn) n2=wn; + n -= n2; + l = mbsrtowcs(ws, &s, n2, st); + if (!(l+1)) { + cnt = l; + wn = 0; + break; + } + if (ws != wbuf) { + ws += l; + wn -= l; + } + cnt += l; + } + if (s) while (wn && n) { + l = mbrtowc(ws, s, n, st); + if (l+2<=2) { + if (!(l+1)) { + cnt = l; + break; + } + if (!l) { + s = 0; + break; + } + /* have to roll back partial character */ + *(unsigned *)st = 0; + break; + } + s += l; n -= l; + /* safe - this loop runs fewer than sizeof(wbuf)/8 times */ + ws++; wn--; + cnt++; + } + if (wcs) *src = s; + return cnt; +} diff --git a/system/lib/libc/musl/src/multibyte/mbsrtowcs.c b/system/lib/libc/musl/src/multibyte/mbsrtowcs.c new file mode 100644 index 00000000..75a493c4 --- /dev/null +++ b/system/lib/libc/musl/src/multibyte/mbsrtowcs.c @@ -0,0 +1,100 @@ +/* + * This code was written by Rich Felker in 2010; no copyright is claimed. + * This code is in the public domain. Attribution is appreciated but + * unnecessary. + */ + +#include <stdlib.h> +#include <inttypes.h> +#include <wchar.h> +#include <errno.h> + +#include "internal.h" + +size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbstate_t *restrict st) +{ + const unsigned char *s = (const void *)*src; + size_t wn0 = wn; + unsigned c = 0; + + if (st && (c = *(unsigned *)st)) { + if (ws) { + *(unsigned *)st = 0; + goto resume; + } else { + goto resume0; + } + } + + if (!ws) for (;;) { + if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) { + while (!(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) { + s += 4; + wn -= 4; + } + } + if (*s-1u < 0x7f) { + s++; + wn--; + continue; + } + if (*s-SA > SB-SA) break; + c = bittab[*s++-SA]; +resume0: + if (OOB(c,*s)) { s--; break; } + s++; + if (c&(1U<<25)) { + if (*s-0x80u >= 0x40) { s-=2; break; } + s++; + if (c&(1U<<19)) { + if (*s-0x80u >= 0x40) { s-=3; break; } + s++; + } + } + wn--; + c = 0; + } else for (;;) { + if (!wn) return wn0; + if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) { + while (wn>=4 && !(( *(uint32_t*)s | *(uint32_t*)s-0x01010101) & 0x80808080)) { + *ws++ = *s++; + *ws++ = *s++; + *ws++ = *s++; + *ws++ = *s++; + wn -= 4; + } + } + if (*s-1u < 0x7f) { + *ws++ = *s++; + wn--; + continue; + } + if (*s-SA > SB-SA) break; + c = bittab[*s++-SA]; +resume: + if (OOB(c,*s)) { s--; break; } + c = (c<<6) | *s++-0x80; + if (c&(1U<<31)) { + if (*s-0x80u >= 0x40) { s-=2; break; } + c = (c<<6) | *s++-0x80; + if (c&(1U<<31)) { + if (*s-0x80u >= 0x40) { s-=3; break; } + c = (c<<6) | *s++-0x80; + } + } + *ws++ = c; + wn--; + c = 0; + } + + if (!c && !*s) { + if (ws) { + *ws = 0; + *src = 0; + } + return wn0-wn; + } + errno = EILSEQ; + if (ws) *src = (const void *)s; + return -1; +} diff --git a/system/lib/libc/musl/src/multibyte/mbstowcs.c b/system/lib/libc/musl/src/multibyte/mbstowcs.c new file mode 100644 index 00000000..dc0d4594 --- /dev/null +++ b/system/lib/libc/musl/src/multibyte/mbstowcs.c @@ -0,0 +1,7 @@ +#include <stdlib.h> +#include <wchar.h> + +size_t mbstowcs(wchar_t *restrict ws, const char *restrict s, size_t wn) +{ + return mbsrtowcs(ws, (void*)&s, wn, 0); +} diff --git a/system/lib/libc/musl/src/multibyte/mbtowc.c b/system/lib/libc/musl/src/multibyte/mbtowc.c new file mode 100644 index 00000000..ec9e54ad --- /dev/null +++ b/system/lib/libc/musl/src/multibyte/mbtowc.c @@ -0,0 +1,53 @@ +/* + * This code was written by Rich Felker in 2010; no copyright is claimed. + * This code is in the public domain. Attribution is appreciated but + * unnecessary. + */ + +#include <stdlib.h> +#include <inttypes.h> +#include <wchar.h> +#include <errno.h> + +#include "internal.h" +#include <stdio.h> +int mbtowc(wchar_t *restrict wc, const char *restrict src, size_t n) +{ + unsigned c; + const unsigned char *s = (const void *)src; + + if (!s) return 0; + if (!n) goto ilseq; + if (!wc) wc = (void *)&wc; + + if (*s < 0x80) return !!(*wc = *s); + if (*s-SA > SB-SA) goto ilseq; + c = bittab[*s++-SA]; + + /* Avoid excessive checks against n: If shifting the state n-1 + * times does not clear the high bit, then the value of n is + * insufficient to read a character */ + if (n<4 && ((c<<(6*n-6)) & (1U<<31))) goto ilseq; + + if (OOB(c,*s)) goto ilseq; + c = c<<6 | *s++-0x80; + if (!(c&(1U<<31))) { + *wc = c; + return 2; + } + + if (*s-0x80u >= 0x40) goto ilseq; + c = c<<6 | *s++-0x80; + if (!(c&(1U<<31))) { + *wc = c; + return 3; + } + + if (*s-0x80u >= 0x40) goto ilseq; + *wc = c<<6 | *s++-0x80; + return 4; + +ilseq: + errno = EILSEQ; + return -1; +} diff --git a/system/lib/libc/musl/src/multibyte/wcrtomb.c b/system/lib/libc/musl/src/multibyte/wcrtomb.c new file mode 100644 index 00000000..250649f5 --- /dev/null +++ b/system/lib/libc/musl/src/multibyte/wcrtomb.c @@ -0,0 +1,38 @@ +/* + * This code was written by Rich Felker in 2010; no copyright is claimed. + * This code is in the public domain. Attribution is appreciated but + * unnecessary. + */ + +#include <stdlib.h> +#include <inttypes.h> +#include <wchar.h> +#include <errno.h> + +#include "internal.h" + +size_t wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict st) +{ + if (!s) return 1; + if ((unsigned)wc < 0x80) { + *s = wc; + return 1; + } else if ((unsigned)wc < 0x800) { + *s++ = 0xc0 | (wc>>6); + *s = 0x80 | (wc&0x3f); + return 2; + } else if ((unsigned)wc < 0xd800 || (unsigned)wc-0xe000 < 0x2000) { + *s++ = 0xe0 | (wc>>12); + *s++ = 0x80 | ((wc>>6)&0x3f); + *s = 0x80 | (wc&0x3f); + return 3; + } else if ((unsigned)wc-0x10000 < 0x100000) { + *s++ = 0xf0 | (wc>>18); + *s++ = 0x80 | ((wc>>12)&0x3f); + *s++ = 0x80 | ((wc>>6)&0x3f); + *s = 0x80 | (wc&0x3f); + return 4; + } + errno = EILSEQ; + return -1; +} diff --git a/system/lib/libc/musl/src/multibyte/wcsnrtombs.c b/system/lib/libc/musl/src/multibyte/wcsnrtombs.c new file mode 100644 index 00000000..a2e308b3 --- /dev/null +++ b/system/lib/libc/musl/src/multibyte/wcsnrtombs.c @@ -0,0 +1,52 @@ +/* + * This code was written by Rich Felker in 2010; no copyright is claimed. + * This code is in the public domain. Attribution is appreciated but + * unnecessary. + */ + +#include <stdlib.h> +#include <inttypes.h> +#include <wchar.h> +#include <errno.h> + +#include "internal.h" + +size_t wcsnrtombs(char *restrict dst, const wchar_t **restrict wcs, size_t wn, size_t n, mbstate_t *restrict st) +{ + size_t l, cnt=0, n2; + char *s, buf[256]; + const wchar_t *ws = *wcs; + + if (!dst) s = buf, n = sizeof buf; + else s = dst; + + while ( ws && n && ( (n2=wn)>=n || n2>32 ) ) { + if (n2>=n) n2=n; + wn -= n2; + l = wcsrtombs(s, &ws, n2, 0); + if (!(l+1)) { + cnt = l; + n = 0; + break; + } + if (s != buf) { + s += l; + n -= l; + } + cnt += l; + } + if (ws) while (n && wn) { + l = wcrtomb(s, *ws, 0); + if ((l+1)<=1) { + if (!l) ws = 0; + else cnt = l; + break; + } + ws++; wn--; + /* safe - this loop runs fewer than sizeof(buf) times */ + s+=l; n-=l; + cnt++; + } + if (dst) *wcs = ws; + return cnt; +} diff --git a/system/lib/libc/musl/src/multibyte/wcsrtombs.c b/system/lib/libc/musl/src/multibyte/wcsrtombs.c new file mode 100644 index 00000000..d48a65e7 --- /dev/null +++ b/system/lib/libc/musl/src/multibyte/wcsrtombs.c @@ -0,0 +1,58 @@ +/* + * This code was written by Rich Felker in 2010; no copyright is claimed. + * This code is in the public domain. Attribution is appreciated but + * unnecessary. + */ + +#include <stdlib.h> +#include <inttypes.h> +#include <wchar.h> +#include <errno.h> + +#include "internal.h" + +size_t wcsrtombs(char *restrict s, const wchar_t **restrict ws, size_t n, mbstate_t *restrict st) +{ + const wchar_t *ws2; + char buf[4]; + size_t N = n, l; + if (!s) { + for (n=0, ws2=*ws; *ws2; ws2++) { + if (*ws2 >= 0x80u) { + l = wcrtomb(buf, *ws2, 0); + if (!(l+1)) return -1; + n += l; + } else n++; + } + return n; + } + while (n>=4 && **ws) { + if (**ws >= 0x80u) { + l = wcrtomb(s, **ws, 0); + if (!(l+1)) return -1; + s += l; + n -= l; + } else { + *s++ = **ws; + n--; + } + (*ws)++; + } + while (n && **ws) { + if (**ws >= 0x80u) { + l = wcrtomb(buf, **ws, 0); + if (!(l+1)) return -1; + if (l>n) return N-n; + wcrtomb(s, **ws, 0); + s += l; + n -= l; + } else { + *s++ = **ws; + n--; + } + (*ws)++; + } + if (n) *s = 0; + *ws = 0; + return N-n; +} diff --git a/system/lib/libc/musl/src/multibyte/wcstombs.c b/system/lib/libc/musl/src/multibyte/wcstombs.c new file mode 100644 index 00000000..ab152874 --- /dev/null +++ b/system/lib/libc/musl/src/multibyte/wcstombs.c @@ -0,0 +1,7 @@ +#include <stdlib.h> +#include <wchar.h> + +size_t wcstombs(char *restrict s, const wchar_t *restrict ws, size_t n) +{ + return wcsrtombs(s, &(const wchar_t *){ws}, n, 0); +} diff --git a/system/lib/libc/musl/src/multibyte/wctob.c b/system/lib/libc/musl/src/multibyte/wctob.c new file mode 100644 index 00000000..d6353ee1 --- /dev/null +++ b/system/lib/libc/musl/src/multibyte/wctob.c @@ -0,0 +1,8 @@ +#include <stdio.h> +#include <wchar.h> + +int wctob(wint_t c) +{ + if (c < 128U) return c; + return EOF; +} diff --git a/system/lib/libc/musl/src/multibyte/wctomb.c b/system/lib/libc/musl/src/multibyte/wctomb.c new file mode 100644 index 00000000..6910ef37 --- /dev/null +++ b/system/lib/libc/musl/src/multibyte/wctomb.c @@ -0,0 +1,18 @@ +/* + * This code was written by Rich Felker in 2010; no copyright is claimed. + * This code is in the public domain. Attribution is appreciated but + * unnecessary. + */ + +#include <stdlib.h> +#include <inttypes.h> +#include <wchar.h> +#include <errno.h> + +#include "internal.h" + +int wctomb(char *s, wchar_t wc) +{ + if (!s) return 0; + return wcrtomb(s, wc, 0); +} |