arch/tile: core support for Tilera 32-bit chips.

This change is the core kernel support for TILEPro and TILE64 chips. No driver support (except the console driver) is included yet. This includes the relevant Linux headers in asm/; the low-level low-level "Tile architecture" headers in arch/, which are shared with the hypervisor, etc., and are build-system agnostic; and the relevant hypervisor headers in hv/. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Reviewed-by: Paul Mundt <lethal@linux-sh.org>
author: Chris Metcalf <cmetcalf@tilera.com> 2010-05-28 23:09:12 -0400
committer: Chris Metcalf <cmetcalf@tilera.com> 2010-06-04 17:11:18 -0400
commit: 867e359b97c970a60626d5d76bbe2a8fadbf38fb (patch)
tree: c5ccbb7f5172e8555977119608ecb1eee3cc37e3 /arch/tile/lib
parent: 5360bd776f73d0a7da571d72a09a03f237e99900 (diff)
20 files changed, 2910 insertions, 0 deletions
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile
new file mode 100644
index 00000000000..ea9c209d33f
--- /dev/null
+++ b/arch/tile/lib/Makefile
@@ -0,0 +1,16 @@
+#
+# Makefile for TILE-specific library files..
+#
+
+lib-y = checksum.o cpumask.o delay.o __invalidate_icache.o \
+	mb_incoherent.o uaccess.o \
+	memcpy_$(BITS).o memchr_$(BITS).o memmove_$(BITS).o memset_$(BITS).o \
+	strchr_$(BITS).o strlen_$(BITS).o
+
+ifneq ($(CONFIG_TILEGX),y)
+lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o
+endif
+
+lib-$(CONFIG_SMP) += spinlock_$(BITS).o usercopy_$(BITS).o
+
+obj-$(CONFIG_MODULES) += exports.o
diff --git a/arch/tile/lib/__invalidate_icache.S b/arch/tile/lib/__invalidate_icache.S
new file mode 100644
index 00000000000..92e70505912
--- /dev/null
+++ b/arch/tile/lib/__invalidate_icache.S
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ * A routine for synchronizing the instruction and data caches.
+ * Useful for self-modifying code.
+ *
+ * r0 holds the buffer address
+ * r1 holds the size in bytes
+ */
+
+#include <arch/chip.h>
+#include <feedback.h>
+
+#if defined(__NEWLIB__) || defined(__BME__)
+#include <sys/page.h>
+#else
+#include <asm/page.h>
+#endif
+
+#ifdef __tilegx__
+/* Share code among Tile family chips but adjust opcodes appropriately. */
+#define slt cmpltu
+#define bbst blbst
+#define bnezt bnzt
+#endif
+
+#if defined(__tilegx__) && __SIZEOF_POINTER__ == 4
+/* Force 32-bit ops so pointers wrap around appropriately. */
+#define ADD_PTR addx
+#define ADDI_PTR addxi
+#else
+#define ADD_PTR add
+#define ADDI_PTR addi
+#endif
+
+        .section .text.__invalidate_icache, "ax"
+        .global __invalidate_icache
+        .type __invalidate_icache,@function
+        .hidden __invalidate_icache
+        .align 8
+__invalidate_icache:
+        FEEDBACK_ENTER(__invalidate_icache)
+        {
+         ADD_PTR r1, r0, r1       /* end of buffer */
+         blez r1, .Lexit      /* skip out if size <= 0 */
+        }
+        {
+         ADDI_PTR r1, r1, -1      /* point to last byte to flush */
+         andi r0, r0, -CHIP_L1I_LINE_SIZE()  /* align to cache-line size */
+        }
+        {
+         andi r1, r1, -CHIP_L1I_LINE_SIZE()  /* last cache line to flush */
+         mf
+        }
+#if CHIP_L1I_CACHE_SIZE() > PAGE_SIZE
+        {
+         moveli r4, CHIP_L1I_CACHE_SIZE() / PAGE_SIZE  /* loop counter */
+         move r2, r0          /* remember starting address */
+        }
+#endif
+        drain
+	{
+         slt r3, r0, r1       /* set up loop invariant */
+#if CHIP_L1I_CACHE_SIZE() > PAGE_SIZE
+	 moveli r6, PAGE_SIZE
+#endif
+	}
+.Lentry:
+        {
+         icoh r0
+         ADDI_PTR r0, r0, CHIP_L1I_LINE_SIZE()   /* advance buffer */
+        }
+        {
+         slt r3, r0, r1       /* check if buffer < buffer + size */
+         bbst r3, .Lentry     /* loop if buffer < buffer + size */
+        }
+#if CHIP_L1I_CACHE_SIZE() > PAGE_SIZE
+        {
+         ADD_PTR r2, r2, r6
+         ADD_PTR r1, r1, r6
+        }
+	{
+         move r0, r2
+         addi r4, r4, -1
+	}
+	{
+         slt r3, r0, r1        /* set up loop invariant */
+         bnezt r4, .Lentry
+	}
+#endif
+        drain
+.Lexit:
+        jrp lr
+
+.Lend___invalidate_icache:
+        .size __invalidate_icache, \
+		.Lend___invalidate_icache - __invalidate_icache
diff --git a/arch/tile/lib/atomic_32.c b/arch/tile/lib/atomic_32.c
new file mode 100644
index 00000000000..be1e8acd105
--- /dev/null
+++ b/arch/tile/lib/atomic_32.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/cache.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <asm/atomic.h>
+#include <arch/chip.h>
+
+/* The routines in atomic_asm.S are private, so we only declare them here. */
+extern struct __get_user __atomic_cmpxchg(volatile int *p,
+					  int *lock, int o, int n);
+extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_xchg_add(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
+						  int *lock, int o, int n);
+extern struct __get_user __atomic_or(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n);
+extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n);
+
+extern u64 __atomic64_cmpxchg(volatile u64 *p, int *lock, u64 o, u64 n);
+extern u64 __atomic64_xchg(volatile u64 *p, int *lock, u64 n);
+extern u64 __atomic64_xchg_add(volatile u64 *p, int *lock, u64 n);
+extern u64 __atomic64_xchg_add_unless(volatile u64 *p,
+				      int *lock, u64 o, u64 n);
+
+
+/* See <asm/atomic.h> */
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+
+/*
+ * A block of memory containing locks for atomic ops. Each instance of this
+ * struct will be homed on a different CPU.
+ */
+struct atomic_locks_on_cpu {
+	int lock[ATOMIC_HASH_L2_SIZE];
+} __attribute__((aligned(ATOMIC_HASH_L2_SIZE * 4)));
+
+static DEFINE_PER_CPU(struct atomic_locks_on_cpu, atomic_lock_pool);
+
+/* The locks we'll use until __init_atomic_per_cpu is called. */
+static struct atomic_locks_on_cpu __initdata initial_atomic_locks;
+
+/* Hash into this vector to get a pointer to lock for the given atomic. */
+struct atomic_locks_on_cpu *atomic_lock_ptr[ATOMIC_HASH_L1_SIZE]
+	__write_once = {
+	[0 ... ATOMIC_HASH_L1_SIZE-1] (&initial_atomic_locks)
+};
+
+#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+/* This page is remapped on startup to be hash-for-home. */
+int atomic_locks[PAGE_SIZE / sizeof(int) /* Only ATOMIC_HASH_SIZE is used */]
+  __attribute__((aligned(PAGE_SIZE), section(".bss.page_aligned")));
+
+#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+static inline int *__atomic_hashed_lock(volatile void *v)
+{
+	/* NOTE: this code must match "sys_cmpxchg" in kernel/intvec.S */
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+	unsigned long i =
+		(unsigned long) v & ((PAGE_SIZE-1) & -sizeof(long long));
+	unsigned long n = __insn_crc32_32(0, i);
+
+	/* Grab high bits for L1 index. */
+	unsigned long l1_index = n >> ((sizeof(n) * 8) - ATOMIC_HASH_L1_SHIFT);
+	/* Grab low bits for L2 index. */
+	unsigned long l2_index = n & (ATOMIC_HASH_L2_SIZE - 1);
+
+	return &atomic_lock_ptr[l1_index]->lock[l2_index];
+#else
+	/*
+	 * Use bits [3, 3 + ATOMIC_HASH_SHIFT) as the lock index.
+	 * Using mm works here because atomic_locks is page aligned.
+	 */
+	unsigned long ptr = __insn_mm((unsigned long)v >> 1,
+				      (unsigned long)atomic_locks,
+				      2, (ATOMIC_HASH_SHIFT + 2) - 1);
+	return (int *)ptr;
+#endif
+}
+
+#ifdef CONFIG_SMP
+/* Return whether the passed pointer is a valid atomic lock pointer. */
+static int is_atomic_lock(int *p)
+{
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+	int i;
+	for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
+
+		if (p >= &atomic_lock_ptr[i]->lock[0] &&
+		    p < &atomic_lock_ptr[i]->lock[ATOMIC_HASH_L2_SIZE]) {
+			return 1;
+		}
+	}
+	return 0;
+#else
+	return p >= &atomic_locks[0] && p < &atomic_locks[ATOMIC_HASH_SIZE];
+#endif
+}
+
+void __atomic_fault_unlock(int *irqlock_word)
+{
+	BUG_ON(!is_atomic_lock(irqlock_word));
+	BUG_ON(*irqlock_word != 1);
+	*irqlock_word = 0;
+}
+
+#endif /* CONFIG_SMP */
+
+static inline int *__atomic_setup(volatile void *v)
+{
+	/* Issue a load to the target to bring it into cache. */
+	*(volatile int *)v;
+	return __atomic_hashed_lock(v);
+}
+
+int _atomic_xchg(atomic_t *v, int n)
+{
+	return __atomic_xchg(&v->counter, __atomic_setup(v), n).val;
+}
+EXPORT_SYMBOL(_atomic_xchg);
+
+int _atomic_xchg_add(atomic_t *v, int i)
+{
+	return __atomic_xchg_add(&v->counter, __atomic_setup(v), i).val;
+}
+EXPORT_SYMBOL(_atomic_xchg_add);
+
+int _atomic_xchg_add_unless(atomic_t *v, int a, int u)
+{
+	/*
+	 * Note: argument order is switched here since it is easier
+	 * to use the first argument consistently as the "old value"
+	 * in the assembly, as is done for _atomic_cmpxchg().
+	 */
+	return __atomic_xchg_add_unless(&v->counter, __atomic_setup(v), u, a)
+		.val;
+}
+EXPORT_SYMBOL(_atomic_xchg_add_unless);
+
+int _atomic_cmpxchg(atomic_t *v, int o, int n)
+{
+	return __atomic_cmpxchg(&v->counter, __atomic_setup(v), o, n).val;
+}
+EXPORT_SYMBOL(_atomic_cmpxchg);
+
+unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask)
+{
+	return __atomic_or((int *)p, __atomic_setup(p), mask).val;
+}
+EXPORT_SYMBOL(_atomic_or);
+
+unsigned long _atomic_andn(volatile unsigned long *p, unsigned long mask)
+{
+	return __atomic_andn((int *)p, __atomic_setup(p), mask).val;
+}
+EXPORT_SYMBOL(_atomic_andn);
+
+unsigned long _atomic_xor(volatile unsigned long *p, unsigned long mask)
+{
+	return __atomic_xor((int *)p, __atomic_setup(p), mask).val;
+}
+EXPORT_SYMBOL(_atomic_xor);
+
+
+u64 _atomic64_xchg(atomic64_t *v, u64 n)
+{
+	return __atomic64_xchg(&v->counter, __atomic_setup(v), n);
+}
+EXPORT_SYMBOL(_atomic64_xchg);
+
+u64 _atomic64_xchg_add(atomic64_t *v, u64 i)
+{
+	return __atomic64_xchg_add(&v->counter, __atomic_setup(v), i);
+}
+EXPORT_SYMBOL(_atomic64_xchg_add);
+
+u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u)
+{
+	/*
+	 * Note: argument order is switched here since it is easier
+	 * to use the first argument consistently as the "old value"
+	 * in the assembly, as is done for _atomic_cmpxchg().
+	 */
+	return __atomic64_xchg_add_unless(&v->counter, __atomic_setup(v),
+					  u, a);
+}
+EXPORT_SYMBOL(_atomic64_xchg_add_unless);
+
+u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
+{
+	return __atomic64_cmpxchg(&v->counter, __atomic_setup(v), o, n);
+}
+EXPORT_SYMBOL(_atomic64_cmpxchg);
+
+
+static inline int *__futex_setup(__user int *v)
+{
+	/*
+	 * Issue a prefetch to the counter to bring it into cache.
+	 * As for __atomic_setup, but we can't do a read into the L1
+	 * since it might fault; instead we do a prefetch into the L2.
+	 */
+	__insn_prefetch(v);
+	return __atomic_hashed_lock(v);
+}
+
+struct __get_user futex_set(int *v, int i)
+{
+	return __atomic_xchg(v, __futex_setup(v), i);
+}
+
+struct __get_user futex_add(int *v, int n)
+{
+	return __atomic_xchg_add(v, __futex_setup(v), n);
+}
+
+struct __get_user futex_or(int *v, int n)
+{
+	return __atomic_or(v, __futex_setup(v), n);
+}
+
+struct __get_user futex_andn(int *v, int n)
+{
+	return __atomic_andn(v, __futex_setup(v), n);
+}
+
+struct __get_user futex_xor(int *v, int n)
+{
+	return __atomic_xor(v, __futex_setup(v), n);
+}
+
+struct __get_user futex_cmpxchg(int *v, int o, int n)
+{
+	return __atomic_cmpxchg(v, __futex_setup(v), o, n);
+}
+
+/*
+ * If any of the atomic or futex routines hit a bad address (not in
+ * the page tables at kernel PL) this routine is called.  The futex
+ * routines are never used on kernel space, and the normal atomics and
+ * bitops are never used on user space.  So a fault on kernel space
+ * must be fatal, but a fault on userspace is a futex fault and we
+ * need to return -EFAULT.  Note that the context this routine is
+ * invoked in is the context of the "_atomic_xxx()" routines called
+ * by the functions in this file.
+ */
+struct __get_user __atomic_bad_address(int *addr)
+{
+	if (unlikely(!access_ok(VERIFY_WRITE, addr, sizeof(int))))
+		panic("Bad address used for kernel atomic op: %p\n", addr);
+	return (struct __get_user) { .err = -EFAULT };
+}
+
+
+#if CHIP_HAS_CBOX_HOME_MAP()
+static int __init noatomichash(char *str)
+{
+	printk("noatomichash is deprecated.\n");
+	return 1;
+}
+__setup("noatomichash", noatomichash);
+#endif
+
+void __init __init_atomic_per_cpu(void)
+{
+#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
+
+	unsigned int i;
+	int actual_cpu;
+
+	/*
+	 * Before this is called from setup, we just have one lock for
+	 * all atomic objects/operations.  Here we replace the
+	 * elements of atomic_lock_ptr so that they point at per_cpu
+	 * integers.  This seemingly over-complex approach stems from
+	 * the fact that DEFINE_PER_CPU defines an entry for each cpu
+	 * in the grid, not each cpu from 0..ATOMIC_HASH_SIZE-1.  But
+	 * for efficient hashing of atomics to their locks we want a
+	 * compile time constant power of 2 for the size of this
+	 * table, so we use ATOMIC_HASH_SIZE.
+	 *
+	 * Here we populate atomic_lock_ptr from the per cpu
+	 * atomic_lock_pool, interspersing by actual cpu so that
+	 * subsequent elements are homed on consecutive cpus.
+	 */
+
+	actual_cpu = cpumask_first(cpu_possible_mask);
+
+	for (i = 0; i < ATOMIC_HASH_L1_SIZE; ++i) {
+		/*
+		 * Preincrement to slightly bias against using cpu 0,
+		 * which has plenty of stuff homed on it already.
+		 */
+		actual_cpu = cpumask_next(actual_cpu, cpu_possible_mask);
+		if (actual_cpu >= nr_cpu_ids)
+			actual_cpu = cpumask_first(cpu_possible_mask);
+
+		atomic_lock_ptr[i] = &per_cpu(atomic_lock_pool, actual_cpu);
+	}
+
+#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+	/* Validate power-of-two and "bigger than cpus" assumption */
+	BUG_ON(ATOMIC_HASH_SIZE & (ATOMIC_HASH_SIZE-1));
+	BUG_ON(ATOMIC_HASH_SIZE < nr_cpu_ids);
+
+	/*
+	 * On TILEPro we prefer to use a single hash-for-home
+	 * page, since this means atomic operations are less
+	 * likely to encounter a TLB fault and thus should
+	 * in general perform faster.  You may wish to disable
+	 * this in situations where few hash-for-home tiles
+	 * are configured.
+	 */
+	BUG_ON((unsigned long)atomic_locks % PAGE_SIZE != 0);
+
+	/* The locks must all fit on one page. */
+	BUG_ON(ATOMIC_HASH_SIZE * sizeof(int) > PAGE_SIZE);
+
+	/*
+	 * We use the page offset of the atomic value's address as
+	 * an index into atomic_locks, excluding the low 3 bits.
+	 * That should not produce more indices than ATOMIC_HASH_SIZE.
+	 */
+	BUG_ON((PAGE_SIZE >> 3) > ATOMIC_HASH_SIZE);
+
+#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
+
+	/* The futex code makes this assumption, so we validate it here. */
+	BUG_ON(sizeof(atomic_t) != sizeof(int));
+}
diff --git a/arch/tile/lib/atomic_asm_32.S b/arch/tile/lib/atomic_asm_32.S
new file mode 100644
index 00000000000..c0d05857819
--- /dev/null
+++ b/arch/tile/lib/atomic_asm_32.S
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Support routines for atomic operations.  Each function takes:
+ *
+ * r0: address to manipulate
+ * r1: pointer to atomic lock guarding this operation (for FUTEX_LOCK_REG)
+ * r2: new value to write, or for cmpxchg/add_unless, value to compare against
+ * r3: (cmpxchg/xchg_add_unless) new value to write or add;
+ *     (atomic64 ops) high word of value to write
+ * r4/r5: (cmpxchg64/add_unless64) new value to write or add
+ *
+ * The 32-bit routines return a "struct __get_user" so that the futex code
+ * has an opportunity to return -EFAULT to the user if needed.
+ * The 64-bit routines just return a "long long" with the value,
+ * since they are only used from kernel space and don't expect to fault.
+ * Support for 16-bit ops is included in the framework but we don't provide
+ * any (x86_64 has an atomic_inc_short(), so we might want to some day).
+ *
+ * Note that the caller is advised to issue a suitable L1 or L2
+ * prefetch on the address being manipulated to avoid extra stalls.
+ * In addition, the hot path is on two icache lines, and we start with
+ * a jump to the second line to make sure they are both in cache so
+ * that we never stall waiting on icache fill while holding the lock.
+ * (This doesn't work out with most 64-bit ops, since they consume
+ * too many bundles, so may take an extra i-cache stall.)
+ *
+ * These routines set the INTERRUPT_CRITICAL_SECTION bit, just
+ * like sys_cmpxchg(), so that NMIs like PERF_COUNT will not interrupt
+ * the code, just page faults.
+ *
+ * If the load or store faults in a way that can be directly fixed in
+ * the do_page_fault_ics() handler (e.g. a vmalloc reference) we fix it
+ * directly, return to the instruction that faulted, and retry it.
+ *
+ * If the load or store faults in a way that potentially requires us
+ * to release the atomic lock, then retry (e.g. a migrating PTE), we
+ * reset the PC in do_page_fault_ics() to the "tns" instruction so
+ * that on return we will reacquire the lock and restart the op.  We
+ * are somewhat overloading the exception_table_entry notion by doing
+ * this, since those entries are not normally used for migrating PTEs.
+ *
+ * If the main page fault handler discovers a bad address, it will see
+ * the PC pointing to the "tns" instruction (due to the earlier
+ * exception_table_entry processing in do_page_fault_ics), and
+ * re-reset the PC to the fault handler, atomic_bad_address(), which
+ * effectively takes over from the atomic op and can either return a
+ * bad "struct __get_user" (for user addresses) or can just panic (for
+ * bad kernel addresses).
+ *
+ * Note that if the value we would store is the same as what we
+ * loaded, we bypass the load.  Other platforms with true atomics can
+ * make the guarantee that a non-atomic __clear_bit(), for example,
+ * can safely race with an atomic test_and_set_bit(); this example is
+ * from bit_spinlock.h in slub_lock() / slub_unlock().  We can't do
+ * that on Tile since the "atomic" op is really just a
+ * read/modify/write, and can race with the non-atomic
+ * read/modify/write.  However, if we can short-circuit the write when
+ * it is not needed, in the atomic case, we avoid the race.
+ */
+
+#include <linux/linkage.h>
+#include <asm/atomic.h>
+#include <asm/page.h>
+#include <asm/processor.h>
+
+	.section .text.atomic,"ax"
+ENTRY(__start_atomic_asm_code)
+
+	.macro  atomic_op, name, bitwidth, body
+	.align  64
+STD_ENTRY_SECTION(__atomic\name, .text.atomic)
+	{
+	 movei  r24, 1
+	 j      4f		/* branch to second cache line */
+	}
+1:	{
+	 .ifc \bitwidth,16
+	 lh     r22, r0
+	 .else
+	 lw     r22, r0
+	 addi   r23, r0, 4
+	 .endif
+	}
+	.ifc \bitwidth,64
+	lw      r23, r23
+	.endif
+	\body /* set r24, and r25 if 64-bit */
+	{
+	 seq    r26, r22, r24
+	 seq    r27, r23, r25
+	}
+	.ifc \bitwidth,64
+	bbnst   r27, 2f
+	.endif
+	bbs     r26, 3f		/* skip write-back if it's the same value */
+2:	{
+	 .ifc \bitwidth,16
+	 sh     r0, r24
+	 .else
+	 sw     r0, r24
+	 addi   r23, r0, 4
+	 .endif
+	}
+	.ifc \bitwidth,64
+	sw      r23, r25
+	.endif
+	mf
+3:	{
+	 move   r0, r22
+	 .ifc \bitwidth,64
+	 move   r1, r23
+	 .else
+	 move   r1, zero
+	 .endif
+	 sw     ATOMIC_LOCK_REG_NAME, zero
+	}
+	mtspr   INTERRUPT_CRITICAL_SECTION, zero
+	jrp     lr
+4:	{
+	 move   ATOMIC_LOCK_REG_NAME, r1
+	 mtspr  INTERRUPT_CRITICAL_SECTION, r24
+	}
+#ifndef CONFIG_SMP
+	j       1b		/* no atomic locks */
+#else
+	{
+	 tns    r21, ATOMIC_LOCK_REG_NAME
+	 moveli r23, 2048       /* maximum backoff time in cycles */
+	}
+	{
+	 bzt    r21, 1b		/* branch if lock acquired */
+	 moveli r25, 32         /* starting backoff time in cycles */
+	}
+5:	mtspr   INTERRUPT_CRITICAL_SECTION, zero
+	mfspr   r26, CYCLE_LOW  /* get start point for this backoff */
+6:	mfspr   r22, CYCLE_LOW  /* test to see if we've backed off enough */
+	sub     r22, r22, r26
+	slt     r22, r22, r25
+	bbst    r22, 6b
+	{
+	 mtspr  INTERRUPT_CRITICAL_SECTION, r24
+	 shli   r25, r25, 1     /* double the backoff; retry the tns */
+	}
+	{
+	 tns    r21, ATOMIC_LOCK_REG_NAME
+	 slt    r26, r23, r25   /* is the proposed backoff too big? */
+	}
+	{
+	 bzt    r21, 1b		/* branch if lock acquired */
+	 mvnz   r25, r26, r23
+	}
+	j       5b
+#endif
+	STD_ENDPROC(__atomic\name)
+	.ifc \bitwidth,32
+	.pushsection __ex_table,"a"
+	.word   1b, __atomic\name
+	.word   2b, __atomic\name
+	.word   __atomic\name, __atomic_bad_address
+	.popsection
+	.endif
+	.endm
+
+atomic_op _cmpxchg, 32, "seq r26, r22, r2; { bbns r26, 3f; move r24, r3 }"
+atomic_op _xchg, 32, "move r24, r2"
+atomic_op _xchg_add, 32, "add r24, r22, r2"
+atomic_op _xchg_add_unless, 32, \
+	"sne r26, r22, r2; { bbns r26, 3f; add r24, r22, r3 }"
+atomic_op _or, 32, "or r24, r22, r2"
+atomic_op _andn, 32, "nor r2, r2, zero; and r24, r22, r2"
+atomic_op _xor, 32, "xor r24, r22, r2"
+
+atomic_op 64_cmpxchg, 64, "{ seq r26, r22, r2; seq r27, r23, r3 }; \
+	{ bbns r26, 3f; move r24, r4 }; { bbns r27, 3f; move r25, r5 }"
+atomic_op 64_xchg, 64, "{ move r24, r2; move r25, r3 }"
+atomic_op 64_xchg_add, 64, "{ add r24, r22, r2; add r25, r23, r3 }; \
+	slt_u r26, r24, r22; add r25, r25, r26"
+atomic_op 64_xchg_add_unless, 64, \
+	"{ sne r26, r22, r2; sne r27, r23, r3 }; \
+	{ bbns r26, 3f; add r24, r22, r4 }; \
+	{ bbns r27, 3f; add r25, r23, r5 }; \
+	slt_u r26, r24, r22; add r25, r25, r26"
+
+	jrp     lr              /* happy backtracer */
+
+ENTRY(__end_atomic_asm_code)
diff --git a/arch/tile/lib/checksum.c b/arch/tile/lib/checksum.c
new file mode 100644
index 00000000000..e4bab5bd3f3
--- /dev/null
+++ b/arch/tile/lib/checksum.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ * Support code for the main lib/checksum.c.
+ */
+
+#include <net/checksum.h>
+#include <linux/module.h>
+
+static inline unsigned int longto16(unsigned long x)
+{
+	unsigned long ret;
+#ifdef __tilegx__
+	ret = __insn_v2sadu(x, 0);
+	ret = __insn_v2sadu(ret, 0);
+#else
+	ret = __insn_sadh_u(x, 0);
+	ret = __insn_sadh_u(ret, 0);
+#endif
+	return ret;
+}
+
+__wsum do_csum(const unsigned char *buff, int len)
+{
+	int odd, count;
+	unsigned long result = 0;
+
+	if (len <= 0)
+		goto out;
+	odd = 1 & (unsigned long) buff;
+	if (odd) {
+		result = (*buff << 8);
+		len--;
+		buff++;
+	}
+	count = len >> 1;		/* nr of 16-bit words.. */
+	if (count) {
+		if (2 & (unsigned long) buff) {
+			result += *(const unsigned short *)buff;
+			count--;
+			len -= 2;
+			buff += 2;
+		}
+		count >>= 1;		/* nr of 32-bit words.. */
+		if (count) {
+#ifdef __tilegx__
+			if (4 & (unsigned long) buff) {
+				unsigned int w = *(const unsigned int *)buff;
+				result = __insn_v2sadau(result, w, 0);
+				count--;
+				len -= 4;
+				buff += 4;
+			}
+			count >>= 1;		/* nr of 64-bit words.. */
+#endif
+
+			/*
+			 * This algorithm could wrap around for very
+			 * large buffers, but those should be impossible.
+			 */
+			BUG_ON(count >= 65530);
+
+			while (count) {
+				unsigned long w = *(const unsigned long *)buff;
+				count--;
+				buff += sizeof(w);
+#ifdef __tilegx__
+				result = __insn_v2sadau(result, w, 0);
+#else
+				result = __insn_sadah_u(result, w, 0);
+#endif
+			}
+#ifdef __tilegx__
+			if (len & 4) {
+				unsigned int w = *(const unsigned int *)buff;
+				result = __insn_v2sadau(result, w, 0);
+				buff += 4;
+			}
+#endif
+		}
+		if (len & 2) {
+			result += *(const unsigned short *) buff;
+			buff += 2;
+		}
+	}
+	if (len & 1)
+		result += *buff;
+	result = longto16(result);
+	if (odd)
+		result = swab16(result);
+out:
+	return result;
+}
diff --git a/arch/tile/lib/cpumask.c b/arch/tile/lib/cpumask.c
new file mode 100644
index 00000000000..af745b3b255
--- /dev/null
+++ b/arch/tile/lib/cpumask.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/cpumask.h>
+#include <linux/ctype.h>
+#include <linux/errno.h>
+
+/*
+ * Allow cropping out bits beyond the end of the array.
+ * Move to "lib" directory if more clients want to use this routine.
+ */
+int bitmap_parselist_crop(const char *bp, unsigned long *maskp, int nmaskbits)
+{
+	unsigned a, b;
+
+	bitmap_zero(maskp, nmaskbits);
+	do {
+		if (!isdigit(*bp))
+			return -EINVAL;
+		a = simple_strtoul(bp, (char **)&bp, 10);
+		b = a;
+		if (*bp == '-') {
+			bp++;
+			if (!isdigit(*bp))
+				return -EINVAL;
+			b = simple_strtoul(bp, (char **)&bp, 10);
+		}
+		if (!(a <= b))
+			return -EINVAL;
+		if (b >= nmaskbits)
+			b = nmaskbits-1;
+		while (a <= b) {
+			set_bit(a, maskp);
+			a++;
+		}
+		if (*bp == ',')
+			bp++;
+	} while (*bp != '\0' && *bp != '\n');
+	return 0;
+}
diff --git a/arch/tile/lib/delay.c b/arch/tile/lib/delay.c
new file mode 100644
index 00000000000..5801b03c13e
--- /dev/null
+++ b/arch/tile/lib/delay.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/thread_info.h>
+#include <asm/fixmap.h>
+#include <hv/hypervisor.h>
+
+void __udelay(unsigned long usecs)
+{
+	hv_nanosleep(usecs * 1000);
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long nsecs)
+{
+	hv_nanosleep(nsecs);
+}
+EXPORT_SYMBOL(__ndelay);
+
+/* FIXME: should be declared in a header somewhere. */
+EXPORT_SYMBOL(__delay);
diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c
new file mode 100644
index 00000000000..af8e70e2a0c
--- /dev/null
+++ b/arch/tile/lib/exports.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ *   This program is free software; you can redistribute it and/or
+ *   modify it under the terms of the GNU General Public License
+ *   as published by the Free Software Foundation, version 2.
+ *
+ *   This program is distributed in the hope that it will be useful, but
+ *   WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ *   NON INFRINGEMENT.  See the GNU General Public License for
+ *   more details.
+ *
+ * Exports from assembler code and from libtile-cc.
+ */
+
+#include <linux/module.h>
+
+/* arch/tile/lib/usercopy.S */
+#include <linux/uaccess.h>
+EXPORT_SYMBOL(__get_user_1);
+EXPORT_SYMBOL(__get_user_2);
+EXPORT_SYMBOL(__get_user_4);
+EXPORT_SYMBOL(__put_user_1);
+EXPORT_SYMBOL(__put_user_2);
+EXPORT_SYMBOL(__put_user_4);
+EXPORT_SYMBOL(__put_user_8);
+EXPORT_SYMBOL(strnlen_user_asm);
+EXPORT_SYMBOL(strncpy_from_user_asm);
+EXPORT_SYMBOL(clear_user_asm);
+
+/* arch/tile/kernel/entry.S */
+#include <linux/kernel.h>
+#include <asm/processor.h>
+EXPORT_SYMBOL(current_text_addr);
+EXPORT_SYMBOL(dump_stack);
+
+/* arch/tile/lib/__memcpy.S */
+/* NOTE: on TILE64, these symbols appear in arch/tile/lib/memcpy_tile64.c */
+EXPORT_SYMBOL(memcpy);
author	Chris Metcalf <cmetcalf@tilera.com>	2010-05-28 23:09:12 -0400
committer	Chris Metcalf <cmetcalf@tilera.com>	2010-06-04 17:11:18 -0400
commit	867e359b97c970a60626d5d76bbe2a8fadbf38fb (patch)
tree	c5ccbb7f5172e8555977119608ecb1eee3cc37e3 /arch/tile/lib
parent	5360bd776f73d0a7da571d72a09a03f237e99900 (diff)