diff options
Diffstat (limited to 'arch/x86/include/asm/string_32.h')
| -rw-r--r-- | arch/x86/include/asm/string_32.h | 326 | 
1 files changed, 326 insertions, 0 deletions
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h new file mode 100644 index 00000000000..0e0e3ba827f --- /dev/null +++ b/arch/x86/include/asm/string_32.h @@ -0,0 +1,326 @@ +#ifndef _ASM_X86_STRING_32_H +#define _ASM_X86_STRING_32_H + +#ifdef __KERNEL__ + +/* Let gcc decide whether to inline or use the out of line functions */ + +#define __HAVE_ARCH_STRCPY +extern char *strcpy(char *dest, const char *src); + +#define __HAVE_ARCH_STRNCPY +extern char *strncpy(char *dest, const char *src, size_t count); + +#define __HAVE_ARCH_STRCAT +extern char *strcat(char *dest, const char *src); + +#define __HAVE_ARCH_STRNCAT +extern char *strncat(char *dest, const char *src, size_t count); + +#define __HAVE_ARCH_STRCMP +extern int strcmp(const char *cs, const char *ct); + +#define __HAVE_ARCH_STRNCMP +extern int strncmp(const char *cs, const char *ct, size_t count); + +#define __HAVE_ARCH_STRCHR +extern char *strchr(const char *s, int c); + +#define __HAVE_ARCH_STRLEN +extern size_t strlen(const char *s); + +static __always_inline void *__memcpy(void *to, const void *from, size_t n) +{ +	int d0, d1, d2; +	asm volatile("rep ; movsl\n\t" +		     "movl %4,%%ecx\n\t" +		     "andl $3,%%ecx\n\t" +		     "jz 1f\n\t" +		     "rep ; movsb\n\t" +		     "1:" +		     : "=&c" (d0), "=&D" (d1), "=&S" (d2) +		     : "0" (n / 4), "g" (n), "1" ((long)to), "2" ((long)from) +		     : "memory"); +	return to; +} + +/* + * This looks ugly, but the compiler can optimize it totally, + * as the count is constant. + */ +static __always_inline void *__constant_memcpy(void *to, const void *from, +					       size_t n) +{ +	long esi, edi; +	if (!n) +		return to; + +	switch (n) { +	case 1: +		*(char *)to = *(char *)from; +		return to; +	case 2: +		*(short *)to = *(short *)from; +		return to; +	case 4: +		*(int *)to = *(int *)from; +		return to; + +	case 3: +		*(short *)to = *(short *)from; +		*((char *)to + 2) = *((char *)from + 2); +		return to; +	case 5: +		*(int *)to = *(int *)from; +		*((char *)to + 4) = *((char *)from + 4); +		return to; +	case 6: +		*(int *)to = *(int *)from; +		*((short *)to + 2) = *((short *)from + 2); +		return to; +	case 8: +		*(int *)to = *(int *)from; +		*((int *)to + 1) = *((int *)from + 1); +		return to; +	} + +	esi = (long)from; +	edi = (long)to; +	if (n >= 5 * 4) { +		/* large block: use rep prefix */ +		int ecx; +		asm volatile("rep ; movsl" +			     : "=&c" (ecx), "=&D" (edi), "=&S" (esi) +			     : "0" (n / 4), "1" (edi), "2" (esi) +			     : "memory" +		); +	} else { +		/* small block: don't clobber ecx + smaller code */ +		if (n >= 4 * 4) +			asm volatile("movsl" +				     : "=&D"(edi), "=&S"(esi) +				     : "0"(edi), "1"(esi) +				     : "memory"); +		if (n >= 3 * 4) +			asm volatile("movsl" +				     : "=&D"(edi), "=&S"(esi) +				     : "0"(edi), "1"(esi) +				     : "memory"); +		if (n >= 2 * 4) +			asm volatile("movsl" +				     : "=&D"(edi), "=&S"(esi) +				     : "0"(edi), "1"(esi) +				     : "memory"); +		if (n >= 1 * 4) +			asm volatile("movsl" +				     : "=&D"(edi), "=&S"(esi) +				     : "0"(edi), "1"(esi) +				     : "memory"); +	} +	switch (n % 4) { +		/* tail */ +	case 0: +		return to; +	case 1: +		asm volatile("movsb" +			     : "=&D"(edi), "=&S"(esi) +			     : "0"(edi), "1"(esi) +			     : "memory"); +		return to; +	case 2: +		asm volatile("movsw" +			     : "=&D"(edi), "=&S"(esi) +			     : "0"(edi), "1"(esi) +			     : "memory"); +		return to; +	default: +		asm volatile("movsw\n\tmovsb" +			     : "=&D"(edi), "=&S"(esi) +			     : "0"(edi), "1"(esi) +			     : "memory"); +		return to; +	} +} + +#define __HAVE_ARCH_MEMCPY + +#ifdef CONFIG_X86_USE_3DNOW + +#include <asm/mmx.h> + +/* + *	This CPU favours 3DNow strongly (eg AMD Athlon) + */ + +static inline void *__constant_memcpy3d(void *to, const void *from, size_t len) +{ +	if (len < 512) +		return __constant_memcpy(to, from, len); +	return _mmx_memcpy(to, from, len); +} + +static inline void *__memcpy3d(void *to, const void *from, size_t len) +{ +	if (len < 512) +		return __memcpy(to, from, len); +	return _mmx_memcpy(to, from, len); +} + +#define memcpy(t, f, n)				\ +	(__builtin_constant_p((n))		\ +	 ? __constant_memcpy3d((t), (f), (n))	\ +	 : __memcpy3d((t), (f), (n))) + +#else + +/* + *	No 3D Now! + */ + +#define memcpy(t, f, n)				\ +	(__builtin_constant_p((n))		\ +	 ? __constant_memcpy((t), (f), (n))	\ +	 : __memcpy((t), (f), (n))) + +#endif + +#define __HAVE_ARCH_MEMMOVE +void *memmove(void *dest, const void *src, size_t n); + +#define memcmp __builtin_memcmp + +#define __HAVE_ARCH_MEMCHR +extern void *memchr(const void *cs, int c, size_t count); + +static inline void *__memset_generic(void *s, char c, size_t count) +{ +	int d0, d1; +	asm volatile("rep\n\t" +		     "stosb" +		     : "=&c" (d0), "=&D" (d1) +		     : "a" (c), "1" (s), "0" (count) +		     : "memory"); +	return s; +} + +/* we might want to write optimized versions of these later */ +#define __constant_count_memset(s, c, count) __memset_generic((s), (c), (count)) + +/* + * memset(x, 0, y) is a reasonably common thing to do, so we want to fill + * things 32 bits at a time even when we don't know the size of the + * area at compile-time.. + */ +static __always_inline +void *__constant_c_memset(void *s, unsigned long c, size_t count) +{ +	int d0, d1; +	asm volatile("rep ; stosl\n\t" +		     "testb $2,%b3\n\t" +		     "je 1f\n\t" +		     "stosw\n" +		     "1:\ttestb $1,%b3\n\t" +		     "je 2f\n\t" +		     "stosb\n" +		     "2:" +		     : "=&c" (d0), "=&D" (d1) +		     : "a" (c), "q" (count), "0" (count/4), "1" ((long)s) +		     : "memory"); +	return s; +} + +/* Added by Gertjan van Wingerde to make minix and sysv module work */ +#define __HAVE_ARCH_STRNLEN +extern size_t strnlen(const char *s, size_t count); +/* end of additional stuff */ + +#define __HAVE_ARCH_STRSTR +extern char *strstr(const char *cs, const char *ct); + +/* + * This looks horribly ugly, but the compiler can optimize it totally, + * as we by now know that both pattern and count is constant.. + */ +static __always_inline +void *__constant_c_and_count_memset(void *s, unsigned long pattern, +				    size_t count) +{ +	switch (count) { +	case 0: +		return s; +	case 1: +		*(unsigned char *)s = pattern & 0xff; +		return s; +	case 2: +		*(unsigned short *)s = pattern & 0xffff; +		return s; +	case 3: +		*(unsigned short *)s = pattern & 0xffff; +		*((unsigned char *)s + 2) = pattern & 0xff; +		return s; +	case 4: +		*(unsigned long *)s = pattern; +		return s; +	} + +#define COMMON(x)							\ +	asm volatile("rep ; stosl"					\ +		     x							\ +		     : "=&c" (d0), "=&D" (d1)				\ +		     : "a" (eax), "0" (count/4), "1" ((long)s)	\ +		     : "memory") + +	{ +		int d0, d1; +#if __GNUC__ == 4 && __GNUC_MINOR__ == 0 +		/* Workaround for broken gcc 4.0 */ +		register unsigned long eax asm("%eax") = pattern; +#else +		unsigned long eax = pattern; +#endif + +		switch (count % 4) { +		case 0: +			COMMON(""); +			return s; +		case 1: +			COMMON("\n\tstosb"); +			return s; +		case 2: +			COMMON("\n\tstosw"); +			return s; +		default: +			COMMON("\n\tstosw\n\tstosb"); +			return s; +		} +	} + +#undef COMMON +} + +#define __constant_c_x_memset(s, c, count)			\ +	(__builtin_constant_p(count)				\ +	 ? __constant_c_and_count_memset((s), (c), (count))	\ +	 : __constant_c_memset((s), (c), (count))) + +#define __memset(s, c, count)				\ +	(__builtin_constant_p(count)			\ +	 ? __constant_count_memset((s), (c), (count))	\ +	 : __memset_generic((s), (c), (count))) + +#define __HAVE_ARCH_MEMSET +#define memset(s, c, count)						\ +	(__builtin_constant_p(c)					\ +	 ? __constant_c_x_memset((s), (0x01010101UL * (unsigned char)(c)), \ +				 (count))				\ +	 : __memset((s), (c), (count))) + +/* + * find the first occurrence of byte 'c', or 1 past the area if none + */ +#define __HAVE_ARCH_MEMSCAN +extern void *memscan(void *addr, int c, size_t size); + +#endif /* __KERNEL__ */ + +#endif /* _ASM_X86_STRING_32_H */  | 
