diff options
Diffstat (limited to 'fs/nls/nls_base.c')
| -rw-r--r-- | fs/nls/nls_base.c | 247 |
1 files changed, 149 insertions, 98 deletions
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index a912debcd20..52ccd34b1e7 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -1,5 +1,5 @@ /* - * linux/fs/nls_base.c + * linux/fs/nls/nls_base.c * * Native language support--charsets and unicode translations. * By Gordon Chaffee 1996, 1997 @@ -10,14 +10,12 @@ #include <linux/module.h> #include <linux/string.h> -#include <linux/config.h> #include <linux/nls.h> #include <linux/kernel.h> #include <linux/errno.h> -#ifdef CONFIG_KMOD #include <linux/kmod.h> -#endif #include <linux/spinlock.h> +#include <asm/byteorder.h> static struct nls_table default_table; static struct nls_table *tables = &default_table; @@ -35,7 +33,7 @@ struct utf8_table { long lval; }; -static struct utf8_table utf8_table[] = +static const struct utf8_table utf8_table[] = { {0x80, 0x00, 0*6, 0x7F, 0, /* 1 byte sequence */}, {0xE0, 0xC0, 1*6, 0x7FF, 0x80, /* 2 byte sequence */}, @@ -46,12 +44,19 @@ static struct utf8_table utf8_table[] = {0, /* end of table */} }; -int -utf8_mbtowc(wchar_t *p, const __u8 *s, int n) +#define UNICODE_MAX 0x0010ffff +#define PLANE_SIZE 0x00010000 + +#define SURROGATE_MASK 0xfffff800 +#define SURROGATE_PAIR 0x0000d800 +#define SURROGATE_LOW 0x00000400 +#define SURROGATE_BITS 0x000003ff + +int utf8_to_utf32(const u8 *s, int inlen, unicode_t *pu) { - long l; + unsigned long l; int c0, c, nc; - struct utf8_table *t; + const struct utf8_table *t; nc = 0; c0 = *s; @@ -60,12 +65,13 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n) nc++; if ((c0 & t->cmask) == t->cval) { l &= t->lmask; - if (l < t->lval) + if (l < t->lval || l > UNICODE_MAX || + (l & SURROGATE_MASK) == SURROGATE_PAIR) return -1; - *p = l; + *pu = (unicode_t) l; return nc; } - if (n <= nc) + if (inlen <= nc) return -1; s++; c = (*s ^ 0x80) & 0xFF; @@ -75,100 +81,165 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n) } return -1; } +EXPORT_SYMBOL(utf8_to_utf32); -int -utf8_mbstowcs(wchar_t *pwcs, const __u8 *s, int n) +int utf32_to_utf8(unicode_t u, u8 *s, int maxout) { - __u16 *op; - const __u8 *ip; - int size; - - op = pwcs; - ip = s; - while (*ip && n > 0) { - if (*ip & 0x80) { - size = utf8_mbtowc(op, ip, n); - if (size == -1) { - /* Ignore character and move on */ - ip++; - n--; - } else { - op++; - ip += size; - n -= size; - } - } else { - *op++ = *ip++; - n--; - } - } - return (op - pwcs); -} - -int -utf8_wctomb(__u8 *s, wchar_t wc, int maxlen) -{ - long l; + unsigned long l; int c, nc; - struct utf8_table *t; - - if (s == 0) + const struct utf8_table *t; + + if (!s) return 0; - - l = wc; + + l = u; + if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR) + return -1; + nc = 0; - for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) { + for (t = utf8_table; t->cmask && maxout; t++, maxout--) { nc++; if (l <= t->lmask) { c = t->shift; - *s = t->cval | (l >> c); + *s = (u8) (t->cval | (l >> c)); while (c > 0) { c -= 6; s++; - *s = 0x80 | ((l >> c) & 0x3F); + *s = (u8) (0x80 | ((l >> c) & 0x3F)); } return nc; } } return -1; } +EXPORT_SYMBOL(utf32_to_utf8); + +static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian) +{ + switch (endian) { + default: + *s = (wchar_t) c; + break; + case UTF16_LITTLE_ENDIAN: + *s = __cpu_to_le16(c); + break; + case UTF16_BIG_ENDIAN: + *s = __cpu_to_be16(c); + break; + } +} -int -utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen) +int utf8s_to_utf16s(const u8 *s, int inlen, enum utf16_endian endian, + wchar_t *pwcs, int maxout) { - const __u16 *ip; - __u8 *op; + u16 *op; int size; + unicode_t u; + + op = pwcs; + while (inlen > 0 && maxout > 0 && *s) { + if (*s & 0x80) { + size = utf8_to_utf32(s, inlen, &u); + if (size < 0) + return -EINVAL; + s += size; + inlen -= size; + + if (u >= PLANE_SIZE) { + if (maxout < 2) + break; + u -= PLANE_SIZE; + put_utf16(op++, SURROGATE_PAIR | + ((u >> 10) & SURROGATE_BITS), + endian); + put_utf16(op++, SURROGATE_PAIR | + SURROGATE_LOW | + (u & SURROGATE_BITS), + endian); + maxout -= 2; + } else { + put_utf16(op++, u, endian); + maxout--; + } + } else { + put_utf16(op++, *s++, endian); + inlen--; + maxout--; + } + } + return op - pwcs; +} +EXPORT_SYMBOL(utf8s_to_utf16s); + +static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian) +{ + switch (endian) { + default: + return c; + case UTF16_LITTLE_ENDIAN: + return __le16_to_cpu(c); + case UTF16_BIG_ENDIAN: + return __be16_to_cpu(c); + } +} + +int utf16s_to_utf8s(const wchar_t *pwcs, int inlen, enum utf16_endian endian, + u8 *s, int maxout) +{ + u8 *op; + int size; + unsigned long u, v; op = s; - ip = pwcs; - while (*ip && maxlen > 0) { - if (*ip > 0x7f) { - size = utf8_wctomb(op, *ip, maxlen); + while (inlen > 0 && maxout > 0) { + u = get_utf16(*pwcs, endian); + if (!u) + break; + pwcs++; + inlen--; + if (u > 0x7f) { + if ((u & SURROGATE_MASK) == SURROGATE_PAIR) { + if (u & SURROGATE_LOW) { + /* Ignore character and move on */ + continue; + } + if (inlen <= 0) + break; + v = get_utf16(*pwcs, endian); + if ((v & SURROGATE_MASK) != SURROGATE_PAIR || + !(v & SURROGATE_LOW)) { + /* Ignore character and move on */ + continue; + } + u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10) + + (v & SURROGATE_BITS); + pwcs++; + inlen--; + } + size = utf32_to_utf8(u, op, maxout); if (size == -1) { /* Ignore character and move on */ - maxlen--; } else { op += size; - maxlen -= size; + maxout -= size; } } else { - *op++ = (__u8) *ip; + *op++ = (u8) u; + maxout--; } - ip++; } - return (op - s); + return op - s; } +EXPORT_SYMBOL(utf16s_to_utf8s); -int register_nls(struct nls_table * nls) +int __register_nls(struct nls_table *nls, struct module *owner) { struct nls_table ** tmp = &tables; - if (!nls) - return -EINVAL; if (nls->next) return -EBUSY; + nls->owner = owner; spin_lock(&nls_lock); while (*tmp) { if (nls == *tmp) { @@ -182,6 +253,7 @@ int register_nls(struct nls_table * nls) spin_unlock(&nls_lock); return 0; } +EXPORT_SYMBOL(__register_nls); int unregister_nls(struct nls_table * nls) { @@ -218,32 +290,16 @@ static struct nls_table *find_nls(char *charset) struct nls_table *load_nls(char *charset) { - struct nls_table *nls; -#ifdef CONFIG_KMOD - int ret; -#endif - - nls = find_nls(charset); - if (nls) - return nls; - -#ifdef CONFIG_KMOD - ret = request_module("nls_%s", charset); - if (ret != 0) { - printk("Unable to load NLS charset %s\n", charset); - return NULL; - } - nls = find_nls(charset); -#endif - return nls; + return try_then_request_module(find_nls(charset), "nls_%s", charset); } void unload_nls(struct nls_table *nls) { - module_put(nls->owner); + if (nls) + module_put(nls->owner); } -static wchar_t charset2uni[256] = { +static const wchar_t charset2uni[256] = { /* 0x00*/ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, @@ -326,7 +382,7 @@ static wchar_t charset2uni[256] = { 0x00fc, 0x00fd, 0x00fe, 0x00ff, }; -static unsigned char page00[256] = { +static const unsigned char page00[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ @@ -362,11 +418,11 @@ static unsigned char page00[256] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; -static unsigned char *page_uni2charset[256] = { +static const unsigned char *const page_uni2charset[256] = { page00 }; -static unsigned char charset2lower[256] = { +static const unsigned char charset2lower[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ @@ -402,7 +458,7 @@ static unsigned char charset2lower[256] = { 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */ }; -static unsigned char charset2upper[256] = { +static const unsigned char charset2upper[256] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */ @@ -441,7 +497,7 @@ static unsigned char charset2upper[256] = { static int uni2char(wchar_t uni, unsigned char *out, int boundlen) { - unsigned char *uni2charset; + const unsigned char *uni2charset; unsigned char cl = uni & 0x00ff; unsigned char ch = (uni & 0xff00) >> 8; @@ -484,14 +540,9 @@ struct nls_table *load_nls_default(void) return &default_table; } -EXPORT_SYMBOL(register_nls); EXPORT_SYMBOL(unregister_nls); EXPORT_SYMBOL(unload_nls); EXPORT_SYMBOL(load_nls); EXPORT_SYMBOL(load_nls_default); -EXPORT_SYMBOL(utf8_mbtowc); -EXPORT_SYMBOL(utf8_mbstowcs); -EXPORT_SYMBOL(utf8_wctomb); -EXPORT_SYMBOL(utf8_wcstombs); MODULE_LICENSE("Dual BSD/GPL"); |
