aboutsummaryrefslogtreecommitdiff
path: root/arch/tile/include/asm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/tile/include/asm')
-rw-r--r--arch/tile/include/asm/Kbuild17
-rw-r--r--arch/tile/include/asm/atomic.h126
-rw-r--r--arch/tile/include/asm/atomic_32.h150
-rw-r--r--arch/tile/include/asm/atomic_64.h49
-rw-r--r--arch/tile/include/asm/barrier.h92
-rw-r--r--arch/tile/include/asm/bitops.h48
-rw-r--r--arch/tile/include/asm/bitops_32.h11
-rw-r--r--arch/tile/include/asm/bitops_64.h21
-rw-r--r--arch/tile/include/asm/bitsperlong.h26
-rw-r--r--arch/tile/include/asm/byteorder.h1
-rw-r--r--arch/tile/include/asm/cache.h25
-rw-r--r--arch/tile/include/asm/cacheflush.h55
-rw-r--r--arch/tile/include/asm/checksum.h18
-rw-r--r--arch/tile/include/asm/cmpxchg.h134
-rw-r--r--arch/tile/include/asm/compat.h106
-rw-r--r--arch/tile/include/asm/device.h (renamed from arch/tile/include/asm/siginfo.h)32
-rw-r--r--arch/tile/include/asm/dma-mapping.h160
-rw-r--r--arch/tile/include/asm/elf.h22
-rw-r--r--arch/tile/include/asm/fixmap.h55
-rw-r--r--arch/tile/include/asm/ftrace.h22
-rw-r--r--arch/tile/include/asm/futex.h144
-rw-r--r--arch/tile/include/asm/hardirq.h2
-rw-r--r--arch/tile/include/asm/hardwall.h41
-rw-r--r--arch/tile/include/asm/highmem.h2
-rw-r--r--arch/tile/include/asm/homecache.h30
-rw-r--r--arch/tile/include/asm/hugetlb.h26
-rw-r--r--arch/tile/include/asm/io.h282
-rw-r--r--arch/tile/include/asm/irq.h6
-rw-r--r--arch/tile/include/asm/irqflags.h89
-rw-r--r--arch/tile/include/asm/kdebug.h (renamed from arch/tile/include/asm/hw_irq.h)18
-rw-r--r--arch/tile/include/asm/kexec.h12
-rw-r--r--arch/tile/include/asm/kgdb.h71
-rw-r--r--arch/tile/include/asm/kmap_types.h31
-rw-r--r--arch/tile/include/asm/kprobes.h79
-rw-r--r--arch/tile/include/asm/memprof.h33
-rw-r--r--arch/tile/include/asm/mman.h41
-rw-r--r--arch/tile/include/asm/mmu.h3
-rw-r--r--arch/tile/include/asm/mmu_context.h10
-rw-r--r--arch/tile/include/asm/mmzone.h2
-rw-r--r--arch/tile/include/asm/module.h40
-rw-r--r--arch/tile/include/asm/page.h83
-rw-r--r--arch/tile/include/asm/pci.h167
-rw-r--r--arch/tile/include/asm/percpu.h34
-rw-r--r--arch/tile/include/asm/perf_event.h (renamed from arch/tile/include/asm/auxvec.h)12
-rw-r--r--arch/tile/include/asm/pgalloc.h92
-rw-r--r--arch/tile/include/asm/pgtable.h115
-rw-r--r--arch/tile/include/asm/pgtable_32.h56
-rw-r--r--arch/tile/include/asm/pgtable_64.h74
-rw-r--r--arch/tile/include/asm/pmc.h64
-rw-r--r--arch/tile/include/asm/processor.h108
-rw-r--r--arch/tile/include/asm/ptrace.h85
-rw-r--r--arch/tile/include/asm/sections.h10
-rw-r--r--arch/tile/include/asm/setup.h30
-rw-r--r--arch/tile/include/asm/sigcontext.h37
-rw-r--r--arch/tile/include/asm/signal.h12
-rw-r--r--arch/tile/include/asm/smp.h9
-rw-r--r--arch/tile/include/asm/spinlock_32.h1
-rw-r--r--arch/tile/include/asm/spinlock_64.h6
-rw-r--r--arch/tile/include/asm/stack.h1
-rw-r--r--arch/tile/include/asm/stat.h4
-rw-r--r--arch/tile/include/asm/string.h2
-rw-r--r--arch/tile/include/asm/swab.h23
-rw-r--r--arch/tile/include/asm/switch_to.h79
-rw-r--r--arch/tile/include/asm/syscall.h6
-rw-r--r--arch/tile/include/asm/syscalls.h20
-rw-r--r--arch/tile/include/asm/system.h261
-rw-r--r--arch/tile/include/asm/thread_info.h61
-rw-r--r--arch/tile/include/asm/timex.h2
-rw-r--r--arch/tile/include/asm/tlbflush.h17
-rw-r--r--arch/tile/include/asm/topology.h63
-rw-r--r--arch/tile/include/asm/traps.h19
-rw-r--r--arch/tile/include/asm/uaccess.h268
-rw-r--r--arch/tile/include/asm/unaligned.h29
-rw-r--r--arch/tile/include/asm/unistd.h31
-rw-r--r--arch/tile/include/asm/vdso.h49
75 files changed, 2360 insertions, 1702 deletions
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 0bb42642343..0aa5675e702 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -1,33 +1,32 @@
-include include/asm-generic/Kbuild.asm
header-y += ../arch/
-header-y += ucontext.h
-header-y += hardwall.h
-
generic-y += bug.h
generic-y += bugs.h
+generic-y += clkdev.h
generic-y += cputime.h
-generic-y += device.h
generic-y += div64.h
generic-y += emergency-restart.h
generic-y += errno.h
+generic-y += exec.h
generic-y += fb.h
generic-y += fcntl.h
+generic-y += hash.h
+generic-y += hw_irq.h
generic-y += ioctl.h
generic-y += ioctls.h
-generic-y += ipc.h
generic-y += ipcbuf.h
generic-y += irq_regs.h
-generic-y += kdebug.h
generic-y += local.h
-generic-y += module.h
+generic-y += local64.h
+generic-y += mcs_spinlock.h
generic-y += msgbuf.h
generic-y += mutex.h
generic-y += param.h
generic-y += parport.h
generic-y += poll.h
generic-y += posix_types.h
+generic-y += preempt.h
generic-y += resource.h
generic-y += scatterlist.h
generic-y += sembuf.h
@@ -39,6 +38,6 @@ generic-y += sockios.h
generic-y += statfs.h
generic-y += termbits.h
generic-y += termios.h
+generic-y += trace_clock.h
generic-y += types.h
-generic-y += ucontext.h
generic-y += xor.h
diff --git a/arch/tile/include/asm/atomic.h b/arch/tile/include/asm/atomic.h
index 921dbeb8a70..70979846076 100644
--- a/arch/tile/include/asm/atomic.h
+++ b/arch/tile/include/asm/atomic.h
@@ -17,10 +17,12 @@
#ifndef _ASM_TILE_ATOMIC_H
#define _ASM_TILE_ATOMIC_H
+#include <asm/cmpxchg.h>
+
#ifndef __ASSEMBLY__
#include <linux/compiler.h>
-#include <asm/system.h>
+#include <linux/types.h>
#define ATOMIC_INIT(i) { (i) }
@@ -112,6 +114,32 @@ static inline int atomic_read(const atomic_t *v)
#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
/**
+ * atomic_xchg - atomically exchange contents of memory with a new value
+ * @v: pointer of type atomic_t
+ * @i: integer value to store in memory
+ *
+ * Atomically sets @v to @i and returns old @v
+ */
+static inline int atomic_xchg(atomic_t *v, int n)
+{
+ return xchg(&v->counter, n);
+}
+
+/**
+ * atomic_cmpxchg - atomically exchange contents of memory if it matches
+ * @v: pointer of type atomic_t
+ * @o: old value that memory should have
+ * @n: new value to write to memory if it matches
+ *
+ * Atomically checks if @v holds @o and replaces it with @n if so.
+ * Returns the old value at @v.
+ */
+static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
+{
+ return cmpxchg(&v->counter, o, n);
+}
+
+/**
* atomic_add_negative - add and test if negative
* @v: pointer of type atomic_t
* @i: integer value to add
@@ -121,54 +149,6 @@ static inline int atomic_read(const atomic_t *v)
*/
#define atomic_add_negative(i, v) (atomic_add_return((i), (v)) < 0)
-/* Nonexistent functions intended to cause link errors. */
-extern unsigned long __xchg_called_with_bad_pointer(void);
-extern unsigned long __cmpxchg_called_with_bad_pointer(void);
-
-#define xchg(ptr, x) \
- ({ \
- typeof(*(ptr)) __x; \
- switch (sizeof(*(ptr))) { \
- case 4: \
- __x = (typeof(__x))(typeof(__x-__x))atomic_xchg( \
- (atomic_t *)(ptr), \
- (u32)(typeof((x)-(x)))(x)); \
- break; \
- case 8: \
- __x = (typeof(__x))(typeof(__x-__x))atomic64_xchg( \
- (atomic64_t *)(ptr), \
- (u64)(typeof((x)-(x)))(x)); \
- break; \
- default: \
- __xchg_called_with_bad_pointer(); \
- } \
- __x; \
- })
-
-#define cmpxchg(ptr, o, n) \
- ({ \
- typeof(*(ptr)) __x; \
- switch (sizeof(*(ptr))) { \
- case 4: \
- __x = (typeof(__x))(typeof(__x-__x))atomic_cmpxchg( \
- (atomic_t *)(ptr), \
- (u32)(typeof((o)-(o)))(o), \
- (u32)(typeof((n)-(n)))(n)); \
- break; \
- case 8: \
- __x = (typeof(__x))(typeof(__x-__x))atomic64_cmpxchg( \
- (atomic64_t *)(ptr), \
- (u64)(typeof((o)-(o)))(o), \
- (u64)(typeof((n)-(n)))(n)); \
- break; \
- default: \
- __cmpxchg_called_with_bad_pointer(); \
- } \
- __x; \
- })
-
-#define tas(ptr) (xchg((ptr), 1))
-
#endif /* __ASSEMBLY__ */
#ifndef __tilegx__
@@ -177,4 +157,52 @@ extern unsigned long __cmpxchg_called_with_bad_pointer(void);
#include <asm/atomic_64.h>
#endif
+#ifndef __ASSEMBLY__
+
+/**
+ * atomic64_xchg - atomically exchange contents of memory with a new value
+ * @v: pointer of type atomic64_t
+ * @i: integer value to store in memory
+ *
+ * Atomically sets @v to @i and returns old @v
+ */
+static inline long long atomic64_xchg(atomic64_t *v, long long n)
+{
+ return xchg64(&v->counter, n);
+}
+
+/**
+ * atomic64_cmpxchg - atomically exchange contents of memory if it matches
+ * @v: pointer of type atomic64_t
+ * @o: old value that memory should have
+ * @n: new value to write to memory if it matches
+ *
+ * Atomically checks if @v holds @o and replaces it with @n if so.
+ * Returns the old value at @v.
+ */
+static inline long long atomic64_cmpxchg(atomic64_t *v, long long o,
+ long long n)
+{
+ return cmpxchg64(&v->counter, o, n);
+}
+
+static inline long long atomic64_dec_if_positive(atomic64_t *v)
+{
+ long long c, old, dec;
+
+ c = atomic64_read(v);
+ for (;;) {
+ dec = c - 1;
+ if (unlikely(dec < 0))
+ break;
+ old = atomic64_cmpxchg((v), c, dec);
+ if (likely(old == c))
+ break;
+ c = old;
+ }
+ return dec;
+}
+
+#endif /* __ASSEMBLY__ */
+
#endif /* _ASM_TILE_ATOMIC_H */
diff --git a/arch/tile/include/asm/atomic_32.h b/arch/tile/include/asm/atomic_32.h
index c03349e0ca9..1b109fad9ff 100644
--- a/arch/tile/include/asm/atomic_32.h
+++ b/arch/tile/include/asm/atomic_32.h
@@ -17,44 +17,11 @@
#ifndef _ASM_TILE_ATOMIC_32_H
#define _ASM_TILE_ATOMIC_32_H
+#include <asm/barrier.h>
#include <arch/chip.h>
#ifndef __ASSEMBLY__
-/* Tile-specific routines to support <linux/atomic.h>. */
-int _atomic_xchg(atomic_t *v, int n);
-int _atomic_xchg_add(atomic_t *v, int i);
-int _atomic_xchg_add_unless(atomic_t *v, int a, int u);
-int _atomic_cmpxchg(atomic_t *v, int o, int n);
-
-/**
- * atomic_xchg - atomically exchange contents of memory with a new value
- * @v: pointer of type atomic_t
- * @i: integer value to store in memory
- *
- * Atomically sets @v to @i and returns old @v
- */
-static inline int atomic_xchg(atomic_t *v, int n)
-{
- smp_mb(); /* barrier for proper semantics */
- return _atomic_xchg(v, n);
-}
-
-/**
- * atomic_cmpxchg - atomically exchange contents of memory if it matches
- * @v: pointer of type atomic_t
- * @o: old value that memory should have
- * @n: new value to write to memory if it matches
- *
- * Atomically checks if @v holds @o and replaces it with @n if so.
- * Returns the old value at @v.
- */
-static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
-{
- smp_mb(); /* barrier for proper semantics */
- return _atomic_cmpxchg(v, o, n);
-}
-
/**
* atomic_add - add integer to atomic variable
* @i: integer value to add
@@ -64,7 +31,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
*/
static inline void atomic_add(int i, atomic_t *v)
{
- _atomic_xchg_add(v, i);
+ _atomic_xchg_add(&v->counter, i);
}
/**
@@ -77,7 +44,7 @@ static inline void atomic_add(int i, atomic_t *v)
static inline int atomic_add_return(int i, atomic_t *v)
{
smp_mb(); /* barrier for proper semantics */
- return _atomic_xchg_add(v, i) + i;
+ return _atomic_xchg_add(&v->counter, i) + i;
}
/**
@@ -92,7 +59,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
static inline int __atomic_add_unless(atomic_t *v, int a, int u)
{
smp_mb(); /* barrier for proper semantics */
- return _atomic_xchg_add_unless(v, a, u);
+ return _atomic_xchg_add_unless(&v->counter, a, u);
}
/**
@@ -107,64 +74,31 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
*/
static inline void atomic_set(atomic_t *v, int n)
{
- _atomic_xchg(v, n);
+ _atomic_xchg(&v->counter, n);
}
/* A 64bit atomic type */
typedef struct {
- u64 __aligned(8) counter;
+ long long counter;
} atomic64_t;
#define ATOMIC64_INIT(val) { (val) }
-u64 _atomic64_xchg(atomic64_t *v, u64 n);
-u64 _atomic64_xchg_add(atomic64_t *v, u64 i);
-u64 _atomic64_xchg_add_unless(atomic64_t *v, u64 a, u64 u);
-u64 _atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n);
-
/**
* atomic64_read - read atomic variable
* @v: pointer of type atomic64_t
*
* Atomically reads the value of @v.
*/
-static inline u64 atomic64_read(const atomic64_t *v)
+static inline long long atomic64_read(const atomic64_t *v)
{
/*
* Requires an atomic op to read both 32-bit parts consistently.
* Casting away const is safe since the atomic support routines
* do not write to memory if the value has not been modified.
*/
- return _atomic64_xchg_add((atomic64_t *)v, 0);
-}
-
-/**
- * atomic64_xchg - atomically exchange contents of memory with a new value
- * @v: pointer of type atomic64_t
- * @i: integer value to store in memory
- *
- * Atomically sets @v to @i and returns old @v
- */
-static inline u64 atomic64_xchg(atomic64_t *v, u64 n)
-{
- smp_mb(); /* barrier for proper semantics */
- return _atomic64_xchg(v, n);
-}
-
-/**
- * atomic64_cmpxchg - atomically exchange contents of memory if it matches
- * @v: pointer of type atomic64_t
- * @o: old value that memory should have
- * @n: new value to write to memory if it matches
- *
- * Atomically checks if @v holds @o and replaces it with @n if so.
- * Returns the old value at @v.
- */
-static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
-{
- smp_mb(); /* barrier for proper semantics */
- return _atomic64_cmpxchg(v, o, n);
+ return _atomic64_xchg_add((long long *)&v->counter, 0);
}
/**
@@ -174,9 +108,9 @@ static inline u64 atomic64_cmpxchg(atomic64_t *v, u64 o, u64 n)
*
* Atomically adds @i to @v.
*/
-static inline void atomic64_add(u64 i, atomic64_t *v)
+static inline void atomic64_add(long long i, atomic64_t *v)
{
- _atomic64_xchg_add(v, i);
+ _atomic64_xchg_add(&v->counter, i);
}
/**
@@ -186,10 +120,10 @@ static inline void atomic64_add(u64 i, atomic64_t *v)
*
* Atomically adds @i to @v and returns @i + @v
*/
-static inline u64 atomic64_add_return(u64 i, atomic64_t *v)
+static inline long long atomic64_add_return(long long i, atomic64_t *v)
{
smp_mb(); /* barrier for proper semantics */
- return _atomic64_xchg_add(v, i) + i;
+ return _atomic64_xchg_add(&v->counter, i) + i;
}
/**
@@ -199,12 +133,13 @@ static inline u64 atomic64_add_return(u64 i, atomic64_t *v)
* @u: ...unless v is equal to u.
*
* Atomically adds @a to @v, so long as @v was not already @u.
- * Returns the old value of @v.
+ * Returns non-zero if @v was not @u, and zero otherwise.
*/
-static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u)
+static inline long long atomic64_add_unless(atomic64_t *v, long long a,
+ long long u)
{
smp_mb(); /* barrier for proper semantics */
- return _atomic64_xchg_add_unless(v, a, u) != u;
+ return _atomic64_xchg_add_unless(&v->counter, a, u) != u;
}
/**
@@ -217,9 +152,9 @@ static inline u64 atomic64_add_unless(atomic64_t *v, u64 a, u64 u)
* atomic64_set() can't be just a raw store, since it would be lost if it
* fell between the load and store of one of the other atomic ops.
*/
-static inline void atomic64_set(atomic64_t *v, u64 n)
+static inline void atomic64_set(atomic64_t *v, long long n)
{
- _atomic64_xchg(v, n);
+ _atomic64_xchg(&v->counter, n);
}
#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
@@ -234,16 +169,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n)
#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL)
-/*
- * We need to barrier before modifying the word, since the _atomic_xxx()
- * routines just tns the lock and then read/modify/write of the word.
- * But after the word is updated, the routine issues an "mf" before returning,
- * and since it's a function call, we don't even need a compiler barrier.
- */
-#define smp_mb__before_atomic_dec() smp_mb()
-#define smp_mb__before_atomic_inc() smp_mb()
-#define smp_mb__after_atomic_dec() do { } while (0)
-#define smp_mb__after_atomic_inc() do { } while (0)
#endif /* !__ASSEMBLY__ */
@@ -251,21 +176,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n)
* Internal definitions only beyond this point.
*/
-#define ATOMIC_LOCKS_FOUND_VIA_TABLE() \
- (!CHIP_HAS_CBOX_HOME_MAP() && defined(CONFIG_SMP))
-
-#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
-
-/* Number of entries in atomic_lock_ptr[]. */
-#define ATOMIC_HASH_L1_SHIFT 6
-#define ATOMIC_HASH_L1_SIZE (1 << ATOMIC_HASH_L1_SHIFT)
-
-/* Number of locks in each struct pointed to by atomic_lock_ptr[]. */
-#define ATOMIC_HASH_L2_SHIFT (CHIP_L2_LOG_LINE_SIZE() - 2)
-#define ATOMIC_HASH_L2_SIZE (1 << ATOMIC_HASH_L2_SHIFT)
-
-#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-
/*
* Number of atomic locks in atomic_locks[]. Must be a power of two.
* There is no reason for more than PAGE_SIZE / 8 entries, since that
@@ -280,8 +190,6 @@ static inline void atomic64_set(atomic64_t *v, u64 n)
extern int atomic_locks[];
#endif
-#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
-
/*
* All the code that may fault while holding an atomic lock must
* place the pointer to the lock in ATOMIC_LOCK_REG so the fault code
@@ -302,7 +210,14 @@ void __init_atomic_per_cpu(void);
void __atomic_fault_unlock(int *lock_ptr);
#endif
+/* Return a pointer to the lock for the given address. */
+int *__atomic_hashed_lock(volatile void *v);
+
/* Private helper routines in lib/atomic_asm_32.S */
+struct __get_user {
+ unsigned long val;
+ int err;
+};
extern struct __get_user __atomic_cmpxchg(volatile int *p,
int *lock, int o, int n);
extern struct __get_user __atomic_xchg(volatile int *p, int *lock, int n);
@@ -312,11 +227,16 @@ extern struct __get_user __atomic_xchg_add_unless(volatile int *p,
extern struct __get_user __atomic_or(volatile int *p, int *lock, int n);
extern struct __get_user __atomic_andn(volatile int *p, int *lock, int n);
extern struct __get_user __atomic_xor(volatile int *p, int *lock, int n);
-extern u64 __atomic64_cmpxchg(volatile u64 *p, int *lock, u64 o, u64 n);
-extern u64 __atomic64_xchg(volatile u64 *p, int *lock, u64 n);
-extern u64 __atomic64_xchg_add(volatile u64 *p, int *lock, u64 n);
-extern u64 __atomic64_xchg_add_unless(volatile u64 *p,
- int *lock, u64 o, u64 n);
+extern long long __atomic64_cmpxchg(volatile long long *p, int *lock,
+ long long o, long long n);
+extern long long __atomic64_xchg(volatile long long *p, int *lock, long long n);
+extern long long __atomic64_xchg_add(volatile long long *p, int *lock,
+ long long n);
+extern long long __atomic64_xchg_add_unless(volatile long long *p,
+ int *lock, long long o, long long n);
+
+/* Return failure from the atomic wrappers. */
+struct __get_user __atomic_bad_address(int __user *addr);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/atomic_64.h b/arch/tile/include/asm/atomic_64.h
index 27fe667fddf..7b11c5fadd4 100644
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -19,6 +19,7 @@
#ifndef __ASSEMBLY__
+#include <asm/barrier.h>
#include <arch/spr_def.h>
/* First, the 32-bit atomic ops that are "real" on our 64-bit platform. */
@@ -31,25 +32,6 @@
* on any routine which updates memory and returns a value.
*/
-static inline int atomic_cmpxchg(atomic_t *v, int o, int n)
-{
- int val;
- __insn_mtspr(SPR_CMPEXCH_VALUE, o);
- smp_mb(); /* barrier for proper semantics */
- val = __insn_cmpexch4((void *)&v->counter, n);
- smp_mb(); /* barrier for proper semantics */
- return val;
-}
-
-static inline int atomic_xchg(atomic_t *v, int n)
-{
- int val;
- smp_mb(); /* barrier for proper semantics */
- val = __insn_exch4((void *)&v->counter, n);
- smp_mb(); /* barrier for proper semantics */
- return val;
-}
-
static inline void atomic_add(int i, atomic_t *v)
{
__insn_fetchadd4((void *)&v->counter, i);
@@ -71,7 +53,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
if (oldval == u)
break;
guess = oldval;
- oldval = atomic_cmpxchg(v, guess, guess + a);
+ oldval = cmpxchg(&v->counter, guess, guess + a);
} while (guess != oldval);
return oldval;
}
@@ -83,25 +65,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
#define atomic64_read(v) ((v)->counter)
#define atomic64_set(v, i) ((v)->counter = (i))
-static inline long atomic64_cmpxchg(atomic64_t *v, long o, long n)
-{
- long val;
- smp_mb(); /* barrier for proper semantics */
- __insn_mtspr(SPR_CMPEXCH_VALUE, o);
- val = __insn_cmpexch((void *)&v->counter, n);
- smp_mb(); /* barrier for proper semantics */
- return val;
-}
-
-static inline long atomic64_xchg(atomic64_t *v, long n)
-{
- long val;
- smp_mb(); /* barrier for proper semantics */
- val = __insn_exch((void *)&v->counter, n);
- smp_mb(); /* barrier for proper semantics */
- return val;
-}
-
static inline void atomic64_add(long i, atomic64_t *v)
{
__insn_fetchadd((void *)&v->counter, i);
@@ -123,7 +86,7 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
if (oldval == u)
break;
guess = oldval;
- oldval = atomic64_cmpxchg(v, guess, guess + a);
+ oldval = cmpxchg(&v->counter, guess, guess + a);
} while (guess != oldval);
return oldval != u;
}
@@ -142,12 +105,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
-/* Atomic dec and inc don't implement barrier, so provide them if needed. */
-#define smp_mb__before_atomic_dec() smp_mb()
-#define smp_mb__after_atomic_dec() smp_mb()
-#define smp_mb__before_atomic_inc() smp_mb()
-#define smp_mb__after_atomic_inc() smp_mb()
-
/* Define this to indicate that cmpxchg is an efficient operation. */
#define __HAVE_ARCH_CMPXCHG
diff --git a/arch/tile/include/asm/barrier.h b/arch/tile/include/asm/barrier.h
new file mode 100644
index 00000000000..96a42ae79f4
--- /dev/null
+++ b/arch/tile/include/asm/barrier.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_BARRIER_H
+#define _ASM_TILE_BARRIER_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <arch/chip.h>
+#include <arch/spr_def.h>
+#include <asm/timex.h>
+
+#define __sync() __insn_mf()
+
+#include <hv/syscall_public.h>
+/*
+ * Issue an uncacheable load to each memory controller, then
+ * wait until those loads have completed.
+ */
+static inline void __mb_incoherent(void)
+{
+ long clobber_r10;
+ asm volatile("swint2"
+ : "=R10" (clobber_r10)
+ : "R10" (HV_SYS_fence_incoherent)
+ : "r0", "r1", "r2", "r3", "r4",
+ "r5", "r6", "r7", "r8", "r9",
+ "r11", "r12", "r13", "r14",
+ "r15", "r16", "r17", "r18", "r19",
+ "r20", "r21", "r22", "r23", "r24",
+ "r25", "r26", "r27", "r28", "r29");
+}
+
+/* Fence to guarantee visibility of stores to incoherent memory. */
+static inline void
+mb_incoherent(void)
+{
+ __insn_mf();
+
+ {
+#if CHIP_HAS_TILE_WRITE_PENDING()
+ const unsigned long WRITE_TIMEOUT_CYCLES = 400;
+ unsigned long start = get_cycles_low();
+ do {
+ if (__insn_mfspr(SPR_TILE_WRITE_PENDING) == 0)
+ return;
+ } while ((get_cycles_low() - start) < WRITE_TIMEOUT_CYCLES);
+#endif /* CHIP_HAS_TILE_WRITE_PENDING() */
+ (void) __mb_incoherent();
+ }
+}
+
+#define fast_wmb() __sync()
+#define fast_rmb() __sync()
+#define fast_mb() __sync()
+#define fast_iob() mb_incoherent()
+
+#define wmb() fast_wmb()
+#define rmb() fast_rmb()
+#define mb() fast_mb()
+#define iob() fast_iob()
+
+#ifndef __tilegx__ /* 32 bit */
+/*
+ * We need to barrier before modifying the word, since the _atomic_xxx()
+ * routines just tns the lock and then read/modify/write of the word.
+ * But after the word is updated, the routine issues an "mf" before returning,
+ * and since it's a function call, we don't even need a compiler barrier.
+ */
+#define smp_mb__before_atomic() smp_mb()
+#define smp_mb__after_atomic() do { } while (0)
+#else /* 64 bit */
+#define smp_mb__before_atomic() smp_mb()
+#define smp_mb__after_atomic() smp_mb()
+#endif
+
+#include <asm-generic/barrier.h>
+
+#endif /* !__ASSEMBLY__ */
+#endif /* _ASM_TILE_BARRIER_H */
diff --git a/arch/tile/include/asm/bitops.h b/arch/tile/include/asm/bitops.h
index 16f1fa51fea..20caa346ac0 100644
--- a/arch/tile/include/asm/bitops.h
+++ b/arch/tile/include/asm/bitops.h
@@ -17,6 +17,7 @@
#define _ASM_TILE_BITOPS_H
#include <linux/types.h>
+#include <asm/barrier.h>
#ifndef _LINUX_BITOPS_H
#error only <linux/bitops.h> can be included directly
@@ -29,17 +30,6 @@
#endif
/**
- * __ffs - find first set bit in word
- * @word: The word to search
- *
- * Undefined if no set bit exists, so code should check against 0 first.
- */
-static inline unsigned long __ffs(unsigned long word)
-{
- return __builtin_ctzl(word);
-}
-
-/**
* ffz - find first zero bit in word
* @word: The word to search
*
@@ -50,31 +40,9 @@ static inline unsigned long ffz(unsigned long word)
return __builtin_ctzl(~word);
}
-/**
- * __fls - find last set bit in word
- * @word: The word to search
- *
- * Undefined if no set bit exists, so code should check against 0 first.
- */
-static inline unsigned long __fls(unsigned long word)
-{
- return (sizeof(word) * 8) - 1 - __builtin_clzl(word);
-}
-
-/**
- * ffs - find first set bit in word
- * @x: the word to search
- *
- * This is defined the same way as the libc and compiler builtin ffs
- * routines, therefore differs in spirit from the other bitops.
- *
- * ffs(value) returns 0 if value is 0 or the position of the first
- * set bit if value is nonzero. The first (least significant) bit
- * is at position 1.
- */
-static inline int ffs(int x)
+static inline int fls64(__u64 w)
{
- return __builtin_ffs(x);
+ return (sizeof(__u64) * 8) - __builtin_clzll(w);
}
/**
@@ -90,12 +58,7 @@ static inline int ffs(int x)
*/
static inline int fls(int x)
{
- return (sizeof(int) * 8) - __builtin_clz(x);
-}
-
-static inline int fls64(__u64 w)
-{
- return (sizeof(__u64) * 8) - __builtin_clzll(w);
+ return fls64((unsigned int) x);
}
static inline unsigned int __arch_hweight32(unsigned int w)
@@ -118,6 +81,9 @@ static inline unsigned long __arch_hweight64(__u64 w)
return __builtin_popcountll(w);
}
+#include <asm-generic/bitops/builtin-__ffs.h>
+#include <asm-generic/bitops/builtin-__fls.h>
+#include <asm-generic/bitops/builtin-ffs.h>
#include <asm-generic/bitops/const_hweight.h>
#include <asm-generic/bitops/lock.h>
#include <asm-generic/bitops/find.h>
diff --git a/arch/tile/include/asm/bitops_32.h b/arch/tile/include/asm/bitops_32.h
index 571b118bfd9..bbf7b666f21 100644
--- a/arch/tile/include/asm/bitops_32.h
+++ b/arch/tile/include/asm/bitops_32.h
@@ -16,8 +16,7 @@
#define _ASM_TILE_BITOPS_32_H
#include <linux/compiler.h>
-#include <linux/atomic.h>
-#include <asm/system.h>
+#include <asm/barrier.h>
/* Tile-specific routines to support <asm/bitops.h>. */
unsigned long _atomic_or(volatile unsigned long *p, unsigned long mask);
@@ -50,8 +49,8 @@ static inline void set_bit(unsigned nr, volatile unsigned long *addr)
* restricted to acting on a single-word quantity.
*
* clear_bit() may not contain a memory barrier, so if it is used for
- * locking purposes, you should call smp_mb__before_clear_bit() and/or
- * smp_mb__after_clear_bit() to ensure changes are visible on other cpus.
+ * locking purposes, you should call smp_mb__before_atomic() and/or
+ * smp_mb__after_atomic() to ensure changes are visible on other cpus.
*/
static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
{
@@ -122,10 +121,6 @@ static inline int test_and_change_bit(unsigned nr,
return (_atomic_xor(addr, mask) & mask) != 0;
}
-/* See discussion at smp_mb__before_atomic_dec() in <asm/atomic_32.h>. */
-#define smp_mb__before_clear_bit() smp_mb()
-#define smp_mb__after_clear_bit() do {} while (0)
-
#include <asm-generic/bitops/ext2-atomic.h>
#endif /* _ASM_TILE_BITOPS_32_H */
diff --git a/arch/tile/include/asm/bitops_64.h b/arch/tile/include/asm/bitops_64.h
index e9c8e381ee0..bb1a29221fc 100644
--- a/arch/tile/include/asm/bitops_64.h
+++ b/arch/tile/include/asm/bitops_64.h
@@ -16,8 +16,7 @@
#define _ASM_TILE_BITOPS_64_H
#include <linux/compiler.h>
-#include <linux/atomic.h>
-#include <asm/system.h>
+#include <asm/cmpxchg.h>
/* See <asm/bitops.h> for API comments. */
@@ -33,20 +32,15 @@ static inline void clear_bit(unsigned nr, volatile unsigned long *addr)
__insn_fetchand((void *)(addr + nr / BITS_PER_LONG), ~mask);
}
-#define smp_mb__before_clear_bit() smp_mb()
-#define smp_mb__after_clear_bit() smp_mb()
-
-
static inline void change_bit(unsigned nr, volatile unsigned long *addr)
{
- unsigned long old, mask = (1UL << (nr % BITS_PER_LONG));
- long guess, oldval;
+ unsigned long mask = (1UL << (nr % BITS_PER_LONG));
+ unsigned long guess, oldval;
addr += nr / BITS_PER_LONG;
- old = *addr;
+ oldval = *addr;
do {
guess = oldval;
- oldval = atomic64_cmpxchg((atomic64_t *)addr,
- guess, guess ^ mask);
+ oldval = cmpxchg(addr, guess, guess ^ mask);
} while (guess != oldval);
}
@@ -86,13 +80,12 @@ static inline int test_and_change_bit(unsigned nr,
volatile unsigned long *addr)
{
unsigned long mask = (1UL << (nr % BITS_PER_LONG));
- long guess, oldval = *addr;
+ unsigned long guess, oldval;
addr += nr / BITS_PER_LONG;
oldval = *addr;
do {
guess = oldval;
- oldval = atomic64_cmpxchg((atomic64_t *)addr,
- guess, guess ^ mask);
+ oldval = cmpxchg(addr, guess, guess ^ mask);
} while (guess != oldval);
return (oldval & mask) != 0;
}
diff --git a/arch/tile/include/asm/bitsperlong.h b/arch/tile/include/asm/bitsperlong.h
deleted file mode 100644
index 58c771f2af2..00000000000
--- a/arch/tile/include/asm/bitsperlong.h
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- */
-
-#ifndef _ASM_TILE_BITSPERLONG_H
-#define _ASM_TILE_BITSPERLONG_H
-
-#ifdef __LP64__
-# define __BITS_PER_LONG 64
-#else
-# define __BITS_PER_LONG 32
-#endif
-
-#include <asm-generic/bitsperlong.h>
-
-#endif /* _ASM_TILE_BITSPERLONG_H */
diff --git a/arch/tile/include/asm/byteorder.h b/arch/tile/include/asm/byteorder.h
deleted file mode 100644
index 9558416d578..00000000000
--- a/arch/tile/include/asm/byteorder.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <linux/byteorder/little_endian.h>
diff --git a/arch/tile/include/asm/cache.h b/arch/tile/include/asm/cache.h
index 392e5333dd8..6160761d5f6 100644
--- a/arch/tile/include/asm/cache.h
+++ b/arch/tile/include/asm/cache.h
@@ -27,11 +27,17 @@
#define L2_CACHE_ALIGN(x) (((x)+(L2_CACHE_BYTES-1)) & -L2_CACHE_BYTES)
/*
- * TILE-Gx is fully coherent so we don't need to define ARCH_DMA_MINALIGN.
+ * TILEPro I/O is not always coherent (networking typically uses coherent
+ * I/O, but PCI traffic does not) and setting ARCH_DMA_MINALIGN to the
+ * L2 cacheline size helps ensure that kernel heap allocations are aligned.
+ * TILE-Gx I/O is always coherent when used on hash-for-home pages.
+ *
+ * However, it's possible at runtime to request not to use hash-for-home
+ * for the kernel heap, in which case the kernel will use flush-and-inval
+ * to manage coherence. As a result, we use L2_CACHE_BYTES for the
+ * DMA minimum alignment to avoid false sharing in the kernel heap.
*/
-#ifndef __tilegx__
#define ARCH_DMA_MINALIGN L2_CACHE_BYTES
-#endif
/* use the cache line size for the L2, which is where it counts */
#define SMP_CACHE_BYTES_SHIFT L2_CACHE_SHIFT
@@ -43,9 +49,16 @@
#define __read_mostly __attribute__((__section__(".data..read_mostly")))
/*
- * Attribute for data that is kept read/write coherent until the end of
- * initialization, then bumped to read/only incoherent for performance.
+ * Originally we used small TLB pages for kernel data and grouped some
+ * things together as "write once", enforcing the property at the end
+ * of initialization by making those pages read-only and non-coherent.
+ * This allowed better cache utilization since cache inclusion did not
+ * need to be maintained. However, to do this requires an extra TLB
+ * entry, which on balance is more of a performance hit than the
+ * non-coherence is a performance gain, so we now just make "read
+ * mostly" and "write once" be synonyms. We keep the attribute
+ * separate in case we change our minds at a future date.
*/
-#define __write_once __attribute__((__section__(".w1data")))
+#define __write_once __read_mostly
#endif /* _ASM_TILE_CACHE_H */
diff --git a/arch/tile/include/asm/cacheflush.h b/arch/tile/include/asm/cacheflush.h
index e925f4bb498..92ee4c8a4f7 100644
--- a/arch/tile/include/asm/cacheflush.h
+++ b/arch/tile/include/asm/cacheflush.h
@@ -20,7 +20,6 @@
/* Keep includes the same across arches. */
#include <linux/mm.h>
#include <linux/cache.h>
-#include <asm/system.h>
#include <arch/icache.h>
/* Caches are physically-indexed and so don't need special treatment */
@@ -76,23 +75,6 @@ static inline void copy_to_user_page(struct vm_area_struct *vma,
#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
memcpy((dst), (src), (len))
-/*
- * Invalidate a VA range; pads to L2 cacheline boundaries.
- *
- * Note that on TILE64, __inv_buffer() actually flushes modified
- * cache lines in addition to invalidating them, i.e., it's the
- * same as __finv_buffer().
- */
-static inline void __inv_buffer(void *buffer, size_t size)
-{
- char *next = (char *)((long)buffer & -L2_CACHE_BYTES);
- char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size);
- while (next < finish) {
- __insn_inv(next);
- next += CHIP_INV_STRIDE();
- }
-}
-
/* Flush a VA range; pads to L2 cacheline boundaries. */
static inline void __flush_buffer(void *buffer, size_t size)
{
@@ -116,13 +98,6 @@ static inline void __finv_buffer(void *buffer, size_t size)
}
-/* Invalidate a VA range and wait for it to be complete. */
-static inline void inv_buffer(void *buffer, size_t size)
-{
- __inv_buffer(buffer, size);
- mb();
-}
-
/*
* Flush a locally-homecached VA range and wait for the evicted
* cachelines to hit memory.
@@ -143,6 +118,26 @@ static inline void finv_buffer_local(void *buffer, size_t size)
mb_incoherent();
}
+#ifdef __tilepro__
+/* Invalidate a VA range; pads to L2 cacheline boundaries. */
+static inline void __inv_buffer(void *buffer, size_t size)
+{
+ char *next = (char *)((long)buffer & -L2_CACHE_BYTES);
+ char *finish = (char *)L2_CACHE_ALIGN((long)buffer + size);
+ while (next < finish) {
+ __insn_inv(next);
+ next += CHIP_INV_STRIDE();
+ }
+}
+
+/* Invalidate a VA range and wait for it to be complete. */
+static inline void inv_buffer(void *buffer, size_t size)
+{
+ __inv_buffer(buffer, size);
+ mb();
+}
+#endif
+
/*
* Flush and invalidate a VA range that is homed remotely, waiting
* until the memory controller holds the flushed values. If "hfh" is
@@ -152,4 +147,14 @@ static inline void finv_buffer_local(void *buffer, size_t size)
*/
void finv_buffer_remote(void *buffer, size_t size, int hfh);
+/*
+ * On SMP systems, when the scheduler does migration-cost autodetection,
+ * it needs a way to flush as much of the CPU's caches as possible:
+ *
+ * TODO: fill this in!
+ */
+static inline void sched_cacheflush(void)
+{
+}
+
#endif /* _ASM_TILE_CACHEFLUSH_H */
diff --git a/arch/tile/include/asm/checksum.h b/arch/tile/include/asm/checksum.h
index a120766c726..b21a2fdec9f 100644
--- a/arch/tile/include/asm/checksum.h
+++ b/arch/tile/include/asm/checksum.h
@@ -21,4 +21,22 @@
__wsum do_csum(const unsigned char *buff, int len);
#define do_csum do_csum
+/*
+ * Return the sum of all the 16-bit subwords in a long.
+ * This sums two subwords on a 32-bit machine, and four on 64 bits.
+ * The implementation does two vector adds to capture any overflow.
+ */
+static inline unsigned int csum_long(unsigned long x)
+{
+ unsigned long ret;
+#ifdef __tilegx__
+ ret = __insn_v2sadu(x, 0);
+ ret = __insn_v2sadu(ret, 0);
+#else
+ ret = __insn_sadh_u(x, 0);
+ ret = __insn_sadh_u(ret, 0);
+#endif
+ return ret;
+}
+
#endif /* _ASM_TILE_CHECKSUM_H */
diff --git a/arch/tile/include/asm/cmpxchg.h b/arch/tile/include/asm/cmpxchg.h
new file mode 100644
index 00000000000..0ccda3c425b
--- /dev/null
+++ b/arch/tile/include/asm/cmpxchg.h
@@ -0,0 +1,134 @@
+/*
+ * cmpxchg.h -- forked from asm/atomic.h with this copyright:
+ *
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _ASM_TILE_CMPXCHG_H
+#define _ASM_TILE_CMPXCHG_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/barrier.h>
+
+/* Nonexistent functions intended to cause compile errors. */
+extern void __xchg_called_with_bad_pointer(void)
+ __compiletime_error("Bad argument size for xchg");
+extern void __cmpxchg_called_with_bad_pointer(void)
+ __compiletime_error("Bad argument size for cmpxchg");
+
+#ifndef __tilegx__
+
+/* Note the _atomic_xxx() routines include a final mb(). */
+int _atomic_xchg(int *ptr, int n);
+int _atomic_xchg_add(int *v, int i);
+int _atomic_xchg_add_unless(int *v, int a, int u);
+int _atomic_cmpxchg(int *ptr, int o, int n);
+long long _atomic64_xchg(long long *v, long long n);
+long long _atomic64_xchg_add(long long *v, long long i);
+long long _atomic64_xchg_add_unless(long long *v, long long a, long long u);
+long long _atomic64_cmpxchg(long long *v, long long o, long long n);
+
+#define xchg(ptr, n) \
+ ({ \
+ if (sizeof(*(ptr)) != 4) \
+ __xchg_called_with_bad_pointer(); \
+ smp_mb(); \
+ (typeof(*(ptr)))_atomic_xchg((int *)(ptr), (int)(n)); \
+ })
+
+#define cmpxchg(ptr, o, n) \
+ ({ \
+ if (sizeof(*(ptr)) != 4) \
+ __cmpxchg_called_with_bad_pointer(); \
+ smp_mb(); \
+ (typeof(*(ptr)))_atomic_cmpxchg((int *)ptr, (int)o, \
+ (int)n); \
+ })
+
+#define xchg64(ptr, n) \
+ ({ \
+ if (sizeof(*(ptr)) != 8) \
+ __xchg_called_with_bad_pointer(); \
+ smp_mb(); \
+ (typeof(*(ptr)))_atomic64_xchg((long long *)(ptr), \
+ (long long)(n)); \
+ })
+
+#define cmpxchg64(ptr, o, n) \
+ ({ \
+ if (sizeof(*(ptr)) != 8) \
+ __cmpxchg_called_with_bad_pointer(); \
+ smp_mb(); \
+ (typeof(*(ptr)))_atomic64_cmpxchg((long long *)ptr, \
+ (long long)o, (long long)n); \
+ })
+
+#else
+
+#define xchg(ptr, n) \
+ ({ \
+ typeof(*(ptr)) __x; \
+ smp_mb(); \
+ switch (sizeof(*(ptr))) { \
+ case 4: \
+ __x = (typeof(__x))(unsigned long) \
+ __insn_exch4((ptr), \
+ (u32)(unsigned long)(n)); \
+ break; \
+ case 8: \
+ __x = (typeof(__x)) \
+ __insn_exch((ptr), (unsigned long)(n)); \
+ break; \
+ default: \
+ __xchg_called_with_bad_pointer(); \
+ break; \
+ } \
+ smp_mb(); \
+ __x; \
+ })
+
+#define cmpxchg(ptr, o, n) \
+ ({ \
+ typeof(*(ptr)) __x; \
+ __insn_mtspr(SPR_CMPEXCH_VALUE, (unsigned long)(o)); \
+ smp_mb(); \
+ switch (sizeof(*(ptr))) { \
+ case 4: \
+ __x = (typeof(__x))(unsigned long) \
+ __insn_cmpexch4((ptr), \
+ (u32)(unsigned long)(n)); \
+ break; \
+ case 8: \
+ __x = (typeof(__x))__insn_cmpexch((ptr), \
+ (long long)(n)); \
+ break; \
+ default: \
+ __cmpxchg_called_with_bad_pointer(); \
+ break; \
+ } \
+ smp_mb(); \
+ __x; \
+ })
+
+#define xchg64 xchg
+#define cmpxchg64 cmpxchg
+
+#endif
+
+#define tas(ptr) xchg((ptr), 1)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_TILE_CMPXCHG_H */
diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index bf95f55b82b..ffd4493efc7 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h
@@ -44,7 +44,6 @@ typedef __kernel_uid32_t __compat_gid32_t;
typedef __kernel_mode_t compat_mode_t;
typedef __kernel_dev_t compat_dev_t;
typedef __kernel_loff_t compat_loff_t;
-typedef __kernel_nlink_t compat_nlink_t;
typedef __kernel_ipc_pid_t compat_ipc_pid_t;
typedef __kernel_daddr_t compat_daddr_t;
typedef __kernel_fsid_t compat_fsid_t;
@@ -111,6 +110,68 @@ struct compat_flock64 {
typedef u32 compat_sigset_word;
+typedef union compat_sigval {
+ compat_int_t sival_int;
+ compat_uptr_t sival_ptr;
+} compat_sigval_t;
+
+#define COMPAT_SI_PAD_SIZE (128/sizeof(int) - 3)
+
+typedef struct compat_siginfo {
+ int si_signo;
+ int si_errno;
+ int si_code;
+
+ union {
+ int _pad[COMPAT_SI_PAD_SIZE];
+
+ /* kill() */
+ struct {
+ unsigned int _pid; /* sender's pid */
+ unsigned int _uid; /* sender's uid */
+ } _kill;
+
+ /* POSIX.1b timers */
+ struct {
+ compat_timer_t _tid; /* timer id */
+ int _overrun; /* overrun count */
+ compat_sigval_t _sigval; /* same as below */
+ int _sys_private; /* not to be passed to user */
+ int _overrun_incr; /* amount to add to overrun */
+ } _timer;
+
+ /* POSIX.1b signals */
+ struct {
+ unsigned int _pid; /* sender's pid */
+ unsigned int _uid; /* sender's uid */
+ compat_sigval_t _sigval;
+ } _rt;
+
+ /* SIGCHLD */
+ struct {
+ unsigned int _pid; /* which child */
+ unsigned int _uid; /* sender's uid */
+ int _status; /* exit code */
+ compat_clock_t _utime;
+ compat_clock_t _stime;
+ } _sigchld;
+
+ /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+ struct {
+ unsigned int _addr; /* faulting insn/memory ref. */
+#ifdef __ARCH_SI_TRAPNO
+ int _trapno; /* TRAP # which caused the signal */
+#endif
+ } _sigfault;
+
+ /* SIGPOLL */
+ struct {
+ int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
+ int _fd;
+ } _sigpoll;
+ } _sifields;
+} compat_siginfo_t;
+
#define COMPAT_OFF_T_MAX 0x7fffffff
#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL
@@ -211,57 +272,26 @@ extern int compat_setup_rt_frame(int sig, struct k_sigaction *ka,
struct pt_regs *regs);
/* Compat syscalls. */
-struct compat_sigaction;
struct compat_siginfo;
struct compat_sigaltstack;
-long compat_sys_execve(const char __user *path,
- compat_uptr_t __user *argv,
- compat_uptr_t __user *envp, struct pt_regs *);
-long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act,
- struct compat_sigaction __user *oact,
- size_t sigsetsize);
-long compat_sys_rt_sigqueueinfo(int pid, int sig,
- struct compat_siginfo __user *uinfo);
-long compat_sys_rt_sigreturn(struct pt_regs *);
-long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
- struct compat_sigaltstack __user *uoss_ptr,
- struct pt_regs *);
+long compat_sys_rt_sigreturn(void);
long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high);
long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high);
long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count,
u32 dummy, u32 low, u32 high);
long compat_sys_pwrite64(unsigned int fd, char __user *ubuf, size_t count,
u32 dummy, u32 low, u32 high);
-long compat_sys_lookup_dcookie(u32 low, u32 high, char __user *buf, size_t len);
long compat_sys_sync_file_range2(int fd, unsigned int flags,
u32 offset_lo, u32 offset_hi,
u32 nbytes_lo, u32 nbytes_hi);
long compat_sys_fallocate(int fd, int mode,
u32 offset_lo, u32 offset_hi,
u32 len_lo, u32 len_hi);
-long compat_sys_sched_rr_get_interval(compat_pid_t pid,
- struct compat_timespec __user *interval);
-
-/* Versions of compat functions that differ from generic Linux. */
-struct compat_msgbuf;
-long tile_compat_sys_msgsnd(int msqid,
- struct compat_msgbuf __user *msgp,
- size_t msgsz, int msgflg);
-long tile_compat_sys_msgrcv(int msqid,
- struct compat_msgbuf __user *msgp,
- size_t msgsz, long msgtyp, int msgflg);
-long tile_compat_sys_ptrace(compat_long_t request, compat_long_t pid,
- compat_long_t addr, compat_long_t data);
-
-/* Tilera Linux syscalls that don't have "compat" versions. */
-#define compat_sys_flush_cache sys_flush_cache
-
-/* These are the intvec_64.S trampolines. */
-long _compat_sys_execve(const char __user *path,
- const compat_uptr_t __user *argv,
- const compat_uptr_t __user *envp);
-long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
- struct compat_sigaltstack __user *uoss_ptr);
+long compat_sys_llseek(unsigned int fd, unsigned int offset_high,
+ unsigned int offset_low, loff_t __user * result,
+ unsigned int origin);
+
+/* Assembly trampoline to avoid clobbering r0. */
long _compat_sys_rt_sigreturn(void);
#endif /* _ASM_TILE_COMPAT_H */
diff --git a/arch/tile/include/asm/siginfo.h b/arch/tile/include/asm/device.h
index 56d661bb010..6ab8bf146d4 100644
--- a/arch/tile/include/asm/siginfo.h
+++ b/arch/tile/include/asm/device.h
@@ -10,25 +10,27 @@
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
+ * Arch specific extensions to struct device
*/
-#ifndef _ASM_TILE_SIGINFO_H
-#define _ASM_TILE_SIGINFO_H
+#ifndef _ASM_TILE_DEVICE_H
+#define _ASM_TILE_DEVICE_H
-#define __ARCH_SI_TRAPNO
+struct dev_archdata {
+ /* DMA operations on that device */
+ struct dma_map_ops *dma_ops;
-#ifdef __LP64__
-# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
-#endif
+ /* Offset of the DMA address from the PA. */
+ dma_addr_t dma_offset;
-#include <asm-generic/siginfo.h>
+ /*
+ * Highest DMA address that can be generated by devices that
+ * have limited DMA capability, i.e. non 64-bit capable.
+ */
+ dma_addr_t max_direct_dma_addr;
+};
-/*
- * Additional Tile-specific SIGILL si_codes
- */
-#define ILL_DBLFLT (__SI_FAULT|9) /* double fault */
-#define ILL_HARDWALL (__SI_FAULT|10) /* user networks hardwall violation */
-#undef NSIGILL
-#define NSIGILL 10
+struct pdev_archdata {
+};
-#endif /* _ASM_TILE_SIGINFO_H */
+#endif /* _ASM_TILE_DEVICE_H */
diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h
index eaa06d175b3..1eae359d831 100644
--- a/arch/tile/include/asm/dma-mapping.h
+++ b/arch/tile/include/asm/dma-mapping.h
@@ -20,69 +20,94 @@
#include <linux/cache.h>
#include <linux/io.h>
-/*
- * Note that on x86 and powerpc, there is a "struct dma_mapping_ops"
- * that is used for all the DMA operations. For now, we don't have an
- * equivalent on tile, because we only have a single way of doing DMA.
- * (Tilera bug 7994 to use dma_mapping_ops.)
- */
+#ifdef __tilegx__
+#define ARCH_HAS_DMA_GET_REQUIRED_MASK
+#endif
+
+extern struct dma_map_ops *tile_dma_map_ops;
+extern struct dma_map_ops *gx_pci_dma_map_ops;
+extern struct dma_map_ops *gx_legacy_pci_dma_map_ops;
+extern struct dma_map_ops *gx_hybrid_pci_dma_map_ops;
+
+static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+{
+ if (dev && dev->archdata.dma_ops)
+ return dev->archdata.dma_ops;
+ else
+ return tile_dma_map_ops;
+}
+
+static inline dma_addr_t get_dma_offset(struct device *dev)
+{
+ return dev->archdata.dma_offset;
+}
+
+static inline void set_dma_offset(struct device *dev, dma_addr_t off)
+{
+ dev->archdata.dma_offset = off;
+}
+
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+ return paddr;
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+ return daddr;
+}
+
+static inline void dma_mark_clean(void *addr, size_t size) {}
+
+#include <asm-generic/dma-mapping-common.h>
+
+static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
+{
+ dev->archdata.dma_ops = ops;
+}
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
-extern dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
- enum dma_data_direction);
-extern void dma_unmap_single(struct device *dev, dma_addr_t dma_addr,
- size_t size, enum dma_data_direction);
-extern int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
- enum dma_data_direction);
-extern void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nhwentries, enum dma_data_direction);
-extern dma_addr_t dma_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction);
-extern void dma_unmap_page(struct device *dev, dma_addr_t dma_address,
- size_t size, enum dma_data_direction);
-extern void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
- int nelems, enum dma_data_direction);
-extern void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
- int nelems, enum dma_data_direction);
-
-
-void *dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag);
-
-void dma_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle);
-
-extern void dma_sync_single_for_cpu(struct device *, dma_addr_t, size_t,
- enum dma_data_direction);
-extern void dma_sync_single_for_device(struct device *, dma_addr_t,
- size_t, enum dma_data_direction);
-extern void dma_sync_single_range_for_cpu(struct device *, dma_addr_t,
- unsigned long offset, size_t,
- enum dma_data_direction);
-extern void dma_sync_single_range_for_device(struct device *, dma_addr_t,
- unsigned long offset, size_t,
- enum dma_data_direction);
-extern void dma_cache_sync(struct device *dev, void *vaddr, size_t,
- enum dma_data_direction);
+static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
+{
+ if (!dev->dma_mask)
+ return 0;
+
+ return addr + size - 1 <= *dev->dma_mask;
+}
static inline int
dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
- return 0;
+ debug_dma_mapping_error(dev, dma_addr);
+ return get_dma_ops(dev)->mapping_error(dev, dma_addr);
}
static inline int
dma_supported(struct device *dev, u64 mask)
{
- return 1;
+ return get_dma_ops(dev)->dma_supported(dev, mask);
}
static inline int
dma_set_mask(struct device *dev, u64 mask)
{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+ /*
+ * For PCI devices with 64-bit DMA addressing capability, promote
+ * the dma_ops to hybrid, with the consistent memory DMA space limited
+ * to 32-bit. For 32-bit capable devices, limit the streaming DMA
+ * address range to max_direct_dma_addr.
+ */
+ if (dma_ops == gx_pci_dma_map_ops ||
+ dma_ops == gx_hybrid_pci_dma_map_ops ||
+ dma_ops == gx_legacy_pci_dma_map_ops) {
+ if (mask == DMA_BIT_MASK(64) &&
+ dma_ops == gx_legacy_pci_dma_map_ops)
+ set_dma_ops(dev, gx_hybrid_pci_dma_map_ops);
+ else if (mask > dev->archdata.max_direct_dma_addr)
+ mask = dev->archdata.max_direct_dma_addr;
+ }
+
if (!dev->dma_mask || !dma_supported(dev, mask))
return -EIO;
@@ -91,4 +116,43 @@ dma_set_mask(struct device *dev, u64 mask)
return 0;
}
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag,
+ struct dma_attrs *attrs)
+{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+ void *cpu_addr;
+
+ cpu_addr = dma_ops->alloc(dev, size, dma_handle, flag, attrs);
+
+ debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
+
+ return cpu_addr;
+}
+
+static inline void dma_free_attrs(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
+{
+ struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+ debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
+
+ dma_ops->free(dev, size, cpu_addr, dma_handle, attrs);
+}
+
+#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
+#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
+#define dma_free_coherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
+#define dma_free_noncoherent(d, s, v, h) dma_free_attrs(d, s, v, h, NULL)
+
+/*
+ * dma_alloc_noncoherent() is #defined to return coherent memory,
+ * so there's no need to do any flushing here.
+ */
+static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+ enum dma_data_direction direction)
+{
+}
+
#endif /* _ASM_TILE_DMA_MAPPING_H */
diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h
index 623a6bb741c..41d9878a968 100644
--- a/arch/tile/include/asm/elf.h
+++ b/arch/tile/include/asm/elf.h
@@ -30,7 +30,6 @@ typedef unsigned long elf_greg_t;
#define ELF_NGREG (sizeof(struct pt_regs) / sizeof(elf_greg_t))
typedef elf_greg_t elf_gregset_t[ELF_NGREG];
-#define EM_TILE64 187
#define EM_TILEPRO 188
#define EM_TILEGX 191
@@ -44,7 +43,11 @@ typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG];
#else
#define ELF_CLASS ELFCLASS32
#endif
+#ifdef __BIG_ENDIAN__
+#define ELF_DATA ELFDATA2MSB
+#else
#define ELF_DATA ELFDATA2LSB
+#endif
/*
* There seems to be a bug in how compat_binfmt_elf.c works: it
@@ -59,6 +62,7 @@ enum { ELF_ARCH = CHIP_ELF_TYPE() };
*/
#define elf_check_arch(x) \
((x)->e_ident[EI_CLASS] == ELF_CLASS && \
+ (x)->e_ident[EI_DATA] == ELF_DATA && \
(x)->e_machine == CHIP_ELF_TYPE())
/* The module loader only handles a few relocation types. */
@@ -127,6 +131,15 @@ extern int dump_task_regs(struct task_struct *, elf_gregset_t *);
struct linux_binprm;
extern int arch_setup_additional_pages(struct linux_binprm *bprm,
int executable_stack);
+#define ARCH_DLINFO \
+do { \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_BASE); \
+} while (0)
+
+struct mm_struct;
+extern unsigned long arch_randomize_brk(struct mm_struct *mm);
+#define arch_randomize_brk arch_randomize_brk
+
#ifdef CONFIG_COMPAT
#define COMPAT_ELF_PLATFORM "tilegx-m32"
@@ -143,6 +156,7 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
#define compat_start_thread(regs, ip, usp) do { \
regs->pc = ptr_to_compat_reg((void *)(ip)); \
regs->sp = ptr_to_compat_reg((void *)(usp)); \
+ single_step_execve(); \
} while (0)
/*
@@ -151,12 +165,12 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
#undef SET_PERSONALITY
#define SET_PERSONALITY(ex) \
do { \
- current->personality = PER_LINUX; \
+ set_personality(PER_LINUX | (current->personality & (~PER_MASK))); \
current_thread_info()->status &= ~TS_COMPAT; \
} while (0)
#define COMPAT_SET_PERSONALITY(ex) \
do { \
- current->personality = PER_LINUX_32BIT; \
+ set_personality(PER_LINUX | (current->personality & (~PER_MASK))); \
current_thread_info()->status |= TS_COMPAT; \
} while (0)
@@ -164,4 +178,6 @@ do { \
#endif /* CONFIG_COMPAT */
+#define CORE_DUMP_USE_REGSET
+
#endif /* _ASM_TILE_ELF_H */
diff --git a/arch/tile/include/asm/fixmap.h b/arch/tile/include/asm/fixmap.h
index c66f7933bea..ffe2637aeb3 100644
--- a/arch/tile/include/asm/fixmap.h
+++ b/arch/tile/include/asm/fixmap.h
@@ -25,9 +25,6 @@
#include <asm/kmap_types.h>
#endif
-#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
-#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
-
/*
* Here we define all the compile-time 'special' virtual
* addresses. The point is to have a constant address at
@@ -45,15 +42,23 @@
*
* TLB entries of such buffers will not be flushed across
* task switches.
- *
- * We don't bother with a FIX_HOLE since above the fixmaps
- * is unmapped memory in any case.
*/
enum fixed_addresses {
+#ifdef __tilegx__
+ /*
+ * TILEPro has unmapped memory above so the hole isn't needed,
+ * and in any case the hole pushes us over a single 16MB pmd.
+ */
+ FIX_HOLE,
+#endif
#ifdef CONFIG_HIGHMEM
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
#endif
+#ifdef __tilegx__ /* see homecache.c */
+ FIX_HOMECACHE_BEGIN,
+ FIX_HOMECACHE_END = FIX_HOMECACHE_BEGIN+(NR_CPUS)-1,
+#endif
__end_of_permanent_fixed_addresses,
/*
@@ -70,48 +75,12 @@ enum fixed_addresses {
#endif
};
-extern void __set_fixmap(enum fixed_addresses idx,
- unsigned long phys, pgprot_t flags);
-
-#define set_fixmap(idx, phys) \
- __set_fixmap(idx, phys, PAGE_KERNEL)
-#define clear_fixmap(idx) \
- __set_fixmap(idx, 0, __pgprot(0))
-
#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
#define __FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
#define FIXADDR_START (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_SIZE)
#define FIXADDR_BOOT_START (FIXADDR_TOP + PAGE_SIZE - __FIXADDR_BOOT_SIZE)
-extern void __this_fixmap_does_not_exist(void);
-
-/*
- * 'index to address' translation. If anyone tries to use the idx
- * directly without tranlation, we catch the bug with a NULL-deference
- * kernel oops. Illegal ranges of incoming indices are caught too.
- */
-static __always_inline unsigned long fix_to_virt(const unsigned int idx)
-{
- /*
- * this branch gets completely eliminated after inlining,
- * except when someone tries to use fixaddr indices in an
- * illegal way. (such as mixing up address types or using
- * out-of-range indices).
- *
- * If it doesn't get removed, the linker will complain
- * loudly with a reasonably clear error message..
- */
- if (idx >= __end_of_fixed_addresses)
- __this_fixmap_does_not_exist();
-
- return __fix_to_virt(idx);
-}
-
-static inline unsigned long virt_to_fix(const unsigned long vaddr)
-{
- BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
- return __virt_to_fix(vaddr);
-}
+#include <asm-generic/fixmap.h>
#endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/ftrace.h b/arch/tile/include/asm/ftrace.h
index 461459b06d9..13a9bb81a8a 100644
--- a/arch/tile/include/asm/ftrace.h
+++ b/arch/tile/include/asm/ftrace.h
@@ -15,6 +15,26 @@
#ifndef _ASM_TILE_FTRACE_H
#define _ASM_TILE_FTRACE_H
-/* empty */
+#ifdef CONFIG_FUNCTION_TRACER
+
+#define MCOUNT_ADDR ((unsigned long)(__mcount))
+#define MCOUNT_INSN_SIZE 8 /* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+extern void __mcount(void);
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ return addr;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_FUNCTION_TRACER */
#endif /* _ASM_TILE_FTRACE_H */
diff --git a/arch/tile/include/asm/futex.h b/arch/tile/include/asm/futex.h
index d03ec124a59..1a6ef1b69cb 100644
--- a/arch/tile/include/asm/futex.h
+++ b/arch/tile/include/asm/futex.h
@@ -28,29 +28,82 @@
#include <linux/futex.h>
#include <linux/uaccess.h>
#include <linux/errno.h>
+#include <asm/atomic.h>
-extern struct __get_user futex_set(u32 __user *v, int i);
-extern struct __get_user futex_add(u32 __user *v, int n);
-extern struct __get_user futex_or(u32 __user *v, int n);
-extern struct __get_user futex_andn(u32 __user *v, int n);
-extern struct __get_user futex_cmpxchg(u32 __user *v, int o, int n);
+/*
+ * Support macros for futex operations. Do not use these macros directly.
+ * They assume "ret", "val", "oparg", and "uaddr" in the lexical context.
+ * __futex_cmpxchg() additionally assumes "oldval".
+ */
+
+#ifdef __tilegx__
+
+#define __futex_asm(OP) \
+ asm("1: {" #OP " %1, %3, %4; movei %0, 0 }\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "0: { movei %0, %5; j 9f }\n" \
+ ".section __ex_table,\"a\"\n" \
+ ".align 8\n" \
+ ".quad 1b, 0b\n" \
+ ".popsection\n" \
+ "9:" \
+ : "=r" (ret), "=r" (val), "+m" (*(uaddr)) \
+ : "r" (uaddr), "r" (oparg), "i" (-EFAULT))
+
+#define __futex_set() __futex_asm(exch4)
+#define __futex_add() __futex_asm(fetchadd4)
+#define __futex_or() __futex_asm(fetchor4)
+#define __futex_andn() ({ oparg = ~oparg; __futex_asm(fetchand4); })
+#define __futex_cmpxchg() \
+ ({ __insn_mtspr(SPR_CMPEXCH_VALUE, oldval); __futex_asm(cmpexch4); })
+
+#define __futex_xor() \
+ ({ \
+ u32 oldval, n = oparg; \
+ if ((ret = __get_user(oldval, uaddr)) == 0) { \
+ do { \
+ oparg = oldval ^ n; \
+ __futex_cmpxchg(); \
+ } while (ret == 0 && oldval != val); \
+ } \
+ })
+
+/* No need to prefetch, since the atomic ops go to the home cache anyway. */
+#define __futex_prolog()
-#ifndef __tilegx__
-extern struct __get_user futex_xor(u32 __user *v, int n);
#else
-static inline struct __get_user futex_xor(u32 __user *uaddr, int n)
-{
- struct __get_user asm_ret = __get_user_4(uaddr);
- if (!asm_ret.err) {
- int oldval, newval;
- do {
- oldval = asm_ret.val;
- newval = oldval ^ n;
- asm_ret = futex_cmpxchg(uaddr, oldval, newval);
- } while (asm_ret.err == 0 && oldval != asm_ret.val);
+
+#define __futex_call(FN) \
+ { \
+ struct __get_user gu = FN((u32 __force *)uaddr, lock, oparg); \
+ val = gu.val; \
+ ret = gu.err; \
}
- return asm_ret;
-}
+
+#define __futex_set() __futex_call(__atomic_xchg)
+#define __futex_add() __futex_call(__atomic_xchg_add)
+#define __futex_or() __futex_call(__atomic_or)
+#define __futex_andn() __futex_call(__atomic_andn)
+#define __futex_xor() __futex_call(__atomic_xor)
+
+#define __futex_cmpxchg() \
+ { \
+ struct __get_user gu = __atomic_cmpxchg((u32 __force *)uaddr, \
+ lock, oldval, oparg); \
+ val = gu.val; \
+ ret = gu.err; \
+ }
+
+/*
+ * Find the lock pointer for the atomic calls to use, and issue a
+ * prefetch to the user address to bring it into cache. Similar to
+ * __atomic_setup(), but we can't do a read into the L1 since it might
+ * fault; instead we do a prefetch into the L2.
+ */
+#define __futex_prolog() \
+ int *lock; \
+ __insn_prefetch(uaddr); \
+ lock = __atomic_hashed_lock((int __force *)uaddr)
#endif
static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
@@ -59,8 +112,12 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
int cmp = (encoded_op >> 24) & 15;
int oparg = (encoded_op << 8) >> 20;
int cmparg = (encoded_op << 20) >> 20;
- int ret;
- struct __get_user asm_ret;
+ int uninitialized_var(val), ret;
+
+ __futex_prolog();
+
+ /* The 32-bit futex code makes this assumption, so validate it here. */
+ BUILD_BUG_ON(sizeof(atomic_t) != sizeof(int));
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
@@ -71,46 +128,45 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
- asm_ret = futex_set(uaddr, oparg);
+ __futex_set();
break;
case FUTEX_OP_ADD:
- asm_ret = futex_add(uaddr, oparg);
+ __futex_add();
break;
case FUTEX_OP_OR:
- asm_ret = futex_or(uaddr, oparg);
+ __futex_or();
break;
case FUTEX_OP_ANDN:
- asm_ret = futex_andn(uaddr, oparg);
+ __futex_andn();
break;
case FUTEX_OP_XOR:
- asm_ret = futex_xor(uaddr, oparg);
+ __futex_xor();
break;
default:
- asm_ret.err = -ENOSYS;
+ ret = -ENOSYS;
+ break;
}
pagefault_enable();
- ret = asm_ret.err;
-
if (!ret) {
switch (cmp) {
case FUTEX_OP_CMP_EQ:
- ret = (asm_ret.val == cmparg);
+ ret = (val == cmparg);
break;
case FUTEX_OP_CMP_NE:
- ret = (asm_ret.val != cmparg);
+ ret = (val != cmparg);
break;
case FUTEX_OP_CMP_LT:
- ret = (asm_ret.val < cmparg);
+ ret = (val < cmparg);
break;
case FUTEX_OP_CMP_GE:
- ret = (asm_ret.val >= cmparg);
+ ret = (val >= cmparg);
break;
case FUTEX_OP_CMP_LE:
- ret = (asm_ret.val <= cmparg);
+ ret = (val <= cmparg);
break;
case FUTEX_OP_CMP_GT:
- ret = (asm_ret.val > cmparg);
+ ret = (val > cmparg);
break;
default:
ret = -ENOSYS;
@@ -120,22 +176,20 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
}
static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
- u32 oldval, u32 newval)
+ u32 oldval, u32 oparg)
{
- struct __get_user asm_ret;
+ int ret, val;
+
+ __futex_prolog();
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
- asm_ret = futex_cmpxchg(uaddr, oldval, newval);
- *uval = asm_ret.val;
- return asm_ret.err;
-}
+ __futex_cmpxchg();
-#ifndef __tilegx__
-/* Return failure from the atomic wrappers. */
-struct __get_user __atomic_bad_address(int __user *addr);
-#endif
+ *uval = val;
+ return ret;
+}
#endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/hardirq.h b/arch/tile/include/asm/hardirq.h
index 822390f9a15..54110af2398 100644
--- a/arch/tile/include/asm/hardirq.h
+++ b/arch/tile/include/asm/hardirq.h
@@ -42,6 +42,4 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
-#define HARDIRQ_BITS 8
-
#endif /* _ASM_TILE_HARDIRQ_H */
diff --git a/arch/tile/include/asm/hardwall.h b/arch/tile/include/asm/hardwall.h
index 2ac422848c7..2f572b6b7bc 100644
--- a/arch/tile/include/asm/hardwall.h
+++ b/arch/tile/include/asm/hardwall.h
@@ -11,45 +11,13 @@
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*
- * Provide methods for the HARDWALL_FILE for accessing the UDN.
+ * Provide methods for access control of per-cpu resources like
+ * UDN, IDN, or IPI.
*/
-
#ifndef _ASM_TILE_HARDWALL_H
#define _ASM_TILE_HARDWALL_H
-#include <linux/ioctl.h>
-
-#define HARDWALL_IOCTL_BASE 0xa2
-
-/*
- * The HARDWALL_CREATE() ioctl is a macro with a "size" argument.
- * The resulting ioctl value is passed to the kernel in conjunction
- * with a pointer to a little-endian bitmask of cpus, which must be
- * physically in a rectangular configuration on the chip.
- * The "size" is the number of bytes of cpu mask data.
- */
-#define _HARDWALL_CREATE 1
-#define HARDWALL_CREATE(size) \
- _IOC(_IOC_READ, HARDWALL_IOCTL_BASE, _HARDWALL_CREATE, (size))
-
-#define _HARDWALL_ACTIVATE 2
-#define HARDWALL_ACTIVATE \
- _IO(HARDWALL_IOCTL_BASE, _HARDWALL_ACTIVATE)
-
-#define _HARDWALL_DEACTIVATE 3
-#define HARDWALL_DEACTIVATE \
- _IO(HARDWALL_IOCTL_BASE, _HARDWALL_DEACTIVATE)
-
-#define _HARDWALL_GET_ID 4
-#define HARDWALL_GET_ID \
- _IO(HARDWALL_IOCTL_BASE, _HARDWALL_GET_ID)
-
-#ifndef __KERNEL__
-
-/* This is the canonical name expected by userspace. */
-#define HARDWALL_FILE "/dev/hardwall"
-
-#else
+#include <uapi/asm/hardwall.h>
/* /proc hooks for hardwall. */
struct proc_dir_entry;
@@ -59,7 +27,4 @@ int proc_pid_hardwall(struct task_struct *task, char *buffer);
#else
static inline void proc_tile_hardwall_init(struct proc_dir_entry *root) {}
#endif
-
-#endif
-
#endif /* _ASM_TILE_HARDWALL_H */
diff --git a/arch/tile/include/asm/highmem.h b/arch/tile/include/asm/highmem.h
index b2a6c5de79a..fc8429a31c8 100644
--- a/arch/tile/include/asm/highmem.h
+++ b/arch/tile/include/asm/highmem.h
@@ -59,7 +59,7 @@ void *kmap_fix_kpte(struct page *page, int finished);
/* This macro is used only in map_new_virtual() to map "page". */
#define kmap_prot page_to_kpgprot(page)
-void *__kmap_atomic(struct page *page);
+void *kmap_atomic(struct page *page);
void __kunmap_atomic(void *kvaddr);
void *kmap_atomic_pfn(unsigned long pfn);
void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
diff --git a/arch/tile/include/asm/homecache.h b/arch/tile/include/asm/homecache.h
index a8243865d49..7ddd1b8d691 100644
--- a/arch/tile/include/asm/homecache.h
+++ b/arch/tile/include/asm/homecache.h
@@ -33,8 +33,7 @@ struct zone;
/*
* Is this page immutable (unwritable) and thus able to be cached more
- * widely than would otherwise be possible? On tile64 this means we
- * mark the PTE to cache locally; on tilepro it means we have "nc" set.
+ * widely than would otherwise be possible? This means we have "nc" set.
*/
#define PAGE_HOME_IMMUTABLE -2
@@ -44,16 +43,8 @@ struct zone;
*/
#define PAGE_HOME_INCOHERENT -3
-#if CHIP_HAS_CBOX_HOME_MAP()
/* Home for the page is distributed via hash-for-home. */
#define PAGE_HOME_HASH -4
-#endif
-
-/* Homing is unknown or unspecified. Not valid for page_home(). */
-#define PAGE_HOME_UNKNOWN -5
-
-/* Home on the current cpu. Not valid for page_home(). */
-#define PAGE_HOME_HERE -6
/* Support wrapper to use instead of explicit hv_flush_remote(). */
extern void flush_remote(unsigned long cache_pfn, unsigned long cache_length,
@@ -79,10 +70,17 @@ extern void homecache_change_page_home(struct page *, int order, int home);
/*
* Flush a page out of whatever cache(s) it is in.
* This is more than just finv, since it properly handles waiting
- * for the data to reach memory on tilepro, but it can be quite
- * heavyweight, particularly on hash-for-home memory.
+ * for the data to reach memory, but it can be quite
+ * heavyweight, particularly on incoherent or immutable memory.
+ */
+extern void homecache_finv_page(struct page *);
+
+/*
+ * Flush a page out of the specified home cache.
+ * Note that the specified home need not be the actual home of the page,
+ * as for example might be the case when coordinating with I/O devices.
*/
-extern void homecache_flush_cache(struct page *, int order);
+extern void homecache_finv_map_page(struct page *, int home);
/*
* Allocate a page with the given GFP flags, home, and optionally
@@ -104,10 +102,10 @@ extern struct page *homecache_alloc_pages_node(int nid, gfp_t gfp_mask,
* routines use homecache_change_page_home() to reset the home
* back to the default before returning the page to the allocator.
*/
+void __homecache_free_pages(struct page *, unsigned int order);
void homecache_free_pages(unsigned long addr, unsigned int order);
-#define homecache_free_page(page) \
- homecache_free_pages((page), 0)
-
+#define __homecache_free_page(page) __homecache_free_pages((page), 0)
+#define homecache_free_page(page) homecache_free_pages((page), 0)
/*
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h
index d396d180516..3257733003f 100644
--- a/arch/tile/include/asm/hugetlb.h
+++ b/arch/tile/include/asm/hugetlb.h
@@ -16,6 +16,7 @@
#define _ASM_TILE_HUGETLB_H
#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
static inline int is_hugepage_only_range(struct mm_struct *mm,
@@ -106,4 +107,29 @@ static inline void arch_release_hugepage(struct page *page)
{
}
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
+#ifdef CONFIG_HUGETLB_SUPER_PAGES
+static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
+ struct page *page, int writable)
+{
+ size_t pagesize = huge_page_size(hstate_vma(vma));
+ if (pagesize != PUD_SIZE && pagesize != PMD_SIZE)
+ entry = pte_mksuper(entry);
+ return entry;
+}
+#define arch_make_huge_pte arch_make_huge_pte
+
+/* Sizes to scale up page size for PTEs with HV_PTE_SUPER bit. */
+enum {
+ HUGE_SHIFT_PGDIR = 0,
+ HUGE_SHIFT_PMD = 1,
+ HUGE_SHIFT_PAGE = 2,
+ HUGE_SHIFT_ENTRIES
+};
+extern int huge_shift[HUGE_SHIFT_ENTRIES];
+#endif
+
#endif /* _ASM_TILE_HUGETLB_H */
diff --git a/arch/tile/include/asm/io.h b/arch/tile/include/asm/io.h
index d2152deb1f3..9fe434969fa 100644
--- a/arch/tile/include/asm/io.h
+++ b/arch/tile/include/asm/io.h
@@ -19,7 +19,8 @@
#include <linux/bug.h>
#include <asm/page.h>
-#define IO_SPACE_LIMIT 0xfffffffful
+/* Maximum PCI I/O space address supported. */
+#define IO_SPACE_LIMIT 0xffffffff
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
@@ -62,6 +63,92 @@ extern void iounmap(volatile void __iomem *addr);
#define mm_ptov(addr) ((void *)phys_to_virt(addr))
#define mm_vtop(addr) ((unsigned long)virt_to_phys(addr))
+#if CHIP_HAS_MMIO()
+
+/*
+ * We use inline assembly to guarantee that the compiler does not
+ * split an access into multiple byte-sized accesses as it might
+ * sometimes do if a register data structure is marked "packed".
+ * Obviously on tile we can't tolerate such an access being
+ * actually unaligned, but we want to avoid the case where the
+ * compiler conservatively would generate multiple accesses even
+ * for an aligned read or write.
+ */
+
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+ return *(const volatile u8 __force *)addr;
+}
+
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+ u16 ret;
+ asm volatile("ld2u %0, %1" : "=r" (ret) : "r" (addr));
+ barrier();
+ return le16_to_cpu(ret);
+}
+
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+ u32 ret;
+ /* Sign-extend to conform to u32 ABI sign-extension convention. */
+ asm volatile("ld4s %0, %1" : "=r" (ret) : "r" (addr));
+ barrier();
+ return le32_to_cpu(ret);
+}
+
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+ u64 ret;
+ asm volatile("ld %0, %1" : "=r" (ret) : "r" (addr));
+ barrier();
+ return le64_to_cpu(ret);
+}
+
+static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
+{
+ *(volatile u8 __force *)addr = val;
+}
+
+static inline void __raw_writew(u16 val, volatile void __iomem *addr)
+{
+ asm volatile("st2 %0, %1" :: "r" (addr), "r" (cpu_to_le16(val)));
+}
+
+static inline void __raw_writel(u32 val, volatile void __iomem *addr)
+{
+ asm volatile("st4 %0, %1" :: "r" (addr), "r" (cpu_to_le32(val)));
+}
+
+static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
+{
+ asm volatile("st %0, %1" :: "r" (addr), "r" (cpu_to_le64(val)));
+}
+
+/*
+ * The on-chip I/O hardware on tilegx is configured with VA=PA for the
+ * kernel's PA range. The low-level APIs and field names use "va" and
+ * "void *" nomenclature, to be consistent with the general notion
+ * that the addresses in question are virtualizable, but in the kernel
+ * context we are actually manipulating PA values. (In other contexts,
+ * e.g. access from user space, we do in fact use real virtual addresses
+ * in the va fields.) To allow readers of the code to understand what's
+ * happening, we direct their attention to this comment by using the
+ * following two functions that just duplicate __va() and __pa().
+ */
+typedef unsigned long tile_io_addr_t;
+static inline tile_io_addr_t va_to_tile_io_addr(void *va)
+{
+ BUILD_BUG_ON(sizeof(phys_addr_t) != sizeof(tile_io_addr_t));
+ return __pa(va);
+}
+static inline void *tile_io_addr_to_va(tile_io_addr_t tile_io_addr)
+{
+ return __va(tile_io_addr);
+}
+
+#else /* CHIP_HAS_MMIO() */
+
#ifdef CONFIG_PCI
extern u8 _tile_readb(unsigned long addr);
@@ -73,10 +160,19 @@ extern void _tile_writew(u16 val, unsigned long addr);
extern void _tile_writel(u32 val, unsigned long addr);
extern void _tile_writeq(u64 val, unsigned long addr);
-#else
+#define __raw_readb(addr) _tile_readb((unsigned long)addr)
+#define __raw_readw(addr) _tile_readw((unsigned long)addr)
+#define __raw_readl(addr) _tile_readl((unsigned long)addr)
+#define __raw_readq(addr) _tile_readq((unsigned long)addr)
+#define __raw_writeb(val, addr) _tile_writeb(val, (unsigned long)addr)
+#define __raw_writew(val, addr) _tile_writew(val, (unsigned long)addr)
+#define __raw_writel(val, addr) _tile_writel(val, (unsigned long)addr)
+#define __raw_writeq(val, addr) _tile_writeq(val, (unsigned long)addr)
+
+#else /* CONFIG_PCI */
/*
- * The Tile architecture does not support IOMEM unless PCI is enabled.
+ * The tilepro architecture does not support IOMEM unless PCI is enabled.
* Unfortunately we can't yet simply not declare these methods,
* since some generic code that compiles into the kernel, but
* we never run, uses them unconditionally.
@@ -88,65 +184,58 @@ static inline int iomem_panic(void)
return 0;
}
-static inline u8 _tile_readb(unsigned long addr)
+static inline u8 readb(unsigned long addr)
{
return iomem_panic();
}
-static inline u16 _tile_readw(unsigned long addr)
+static inline u16 _readw(unsigned long addr)
{
return iomem_panic();
}
-static inline u32 _tile_readl(unsigned long addr)
+static inline u32 readl(unsigned long addr)
{
return iomem_panic();
}
-static inline u64 _tile_readq(unsigned long addr)
+static inline u64 readq(unsigned long addr)
{
return iomem_panic();
}
-static inline void _tile_writeb(u8 val, unsigned long addr)
+static inline void writeb(u8 val, unsigned long addr)
{
iomem_panic();
}
-static inline void _tile_writew(u16 val, unsigned long addr)
+static inline void writew(u16 val, unsigned long addr)
{
iomem_panic();
}
-static inline void _tile_writel(u32 val, unsigned long addr)
+static inline void writel(u32 val, unsigned long addr)
{
iomem_panic();
}
-static inline void _tile_writeq(u64 val, unsigned long addr)
+static inline void writeq(u64 val, unsigned long addr)
{
iomem_panic();
}
-#endif
+#endif /* CONFIG_PCI */
-#define readb(addr) _tile_readb((unsigned long)addr)
-#define readw(addr) _tile_readw((unsigned long)addr)
-#define readl(addr) _tile_readl((unsigned long)addr)
-#define readq(addr) _tile_readq((unsigned long)addr)
-#define writeb(val, addr) _tile_writeb(val, (unsigned long)addr)
-#define writew(val, addr) _tile_writew(val, (unsigned long)addr)
-#define writel(val, addr) _tile_writel(val, (unsigned long)addr)
-#define writeq(val, addr) _tile_writeq(val, (unsigned long)addr)
-
-#define __raw_readb readb
-#define __raw_readw readw
-#define __raw_readl readl
-#define __raw_readq readq
-#define __raw_writeb writeb
-#define __raw_writew writew
-#define __raw_writel writel
-#define __raw_writeq writeq
+#endif /* CHIP_HAS_MMIO() */
+
+#define readb __raw_readb
+#define readw __raw_readw
+#define readl __raw_readl
+#define readq __raw_readq
+#define writeb __raw_writeb
+#define writew __raw_writew
+#define writel __raw_writel
+#define writeq __raw_writeq
#define readb_relaxed readb
#define readw_relaxed readw
@@ -162,9 +251,11 @@ static inline void _tile_writeq(u64 val, unsigned long addr)
#define iowrite32 writel
#define iowrite64 writeq
-static inline void memset_io(void *dst, int val, size_t len)
+#if CHIP_HAS_MMIO() || defined(CONFIG_PCI)
+
+static inline void memset_io(volatile void *dst, int val, size_t len)
{
- int x;
+ size_t x;
BUG_ON((unsigned long)dst & 0x3);
val = (val & 0xff) * 0x01010101;
for (x = 0; x < len; x += 4)
@@ -174,7 +265,7 @@ static inline void memset_io(void *dst, int val, size_t len)
static inline void memcpy_fromio(void *dst, const volatile void __iomem *src,
size_t len)
{
- int x;
+ size_t x;
BUG_ON((unsigned long)src & 0x3);
for (x = 0; x < len; x += 4)
*(u32 *)(dst + x) = readl(src + x);
@@ -183,14 +274,116 @@ static inline void memcpy_fromio(void *dst, const volatile void __iomem *src,
static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
size_t len)
{
- int x;
+ size_t x;
BUG_ON((unsigned long)dst & 0x3);
for (x = 0; x < len; x += 4)
writel(*(u32 *)(src + x), dst + x);
}
+#endif
+
+#if CHIP_HAS_MMIO() && defined(CONFIG_TILE_PCI_IO)
+
+static inline u8 inb(unsigned long addr)
+{
+ return readb((volatile void __iomem *) addr);
+}
+
+static inline u16 inw(unsigned long addr)
+{
+ return readw((volatile void __iomem *) addr);
+}
+
+static inline u32 inl(unsigned long addr)
+{
+ return readl((volatile void __iomem *) addr);
+}
+
+static inline void outb(u8 b, unsigned long addr)
+{
+ writeb(b, (volatile void __iomem *) addr);
+}
+
+static inline void outw(u16 b, unsigned long addr)
+{
+ writew(b, (volatile void __iomem *) addr);
+}
+
+static inline void outl(u32 b, unsigned long addr)
+{
+ writel(b, (volatile void __iomem *) addr);
+}
+
+static inline void insb(unsigned long addr, void *buffer, int count)
+{
+ if (count) {
+ u8 *buf = buffer;
+ do {
+ u8 x = inb(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+
+static inline void insw(unsigned long addr, void *buffer, int count)
+{
+ if (count) {
+ u16 *buf = buffer;
+ do {
+ u16 x = inw(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+
+static inline void insl(unsigned long addr, void *buffer, int count)
+{
+ if (count) {
+ u32 *buf = buffer;
+ do {
+ u32 x = inl(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+
+static inline void outsb(unsigned long addr, const void *buffer, int count)
+{
+ if (count) {
+ const u8 *buf = buffer;
+ do {
+ outb(*buf++, addr);
+ } while (--count);
+ }
+}
+
+static inline void outsw(unsigned long addr, const void *buffer, int count)
+{
+ if (count) {
+ const u16 *buf = buffer;
+ do {
+ outw(*buf++, addr);
+ } while (--count);
+ }
+}
+
+static inline void outsl(unsigned long addr, const void *buffer, int count)
+{
+ if (count) {
+ const u32 *buf = buffer;
+ do {
+ outl(*buf++, addr);
+ } while (--count);
+ }
+}
+
+extern void __iomem *ioport_map(unsigned long port, unsigned int len);
+extern void ioport_unmap(void __iomem *addr);
+
+#else
+
/*
- * The Tile architecture does not support IOPORT, even with PCI.
+ * The TilePro architecture does not support IOPORT, even with PCI.
* Unfortunately we can't yet simply not declare these methods,
* since some generic code that compiles into the kernel, but
* we never run, uses them unconditionally.
@@ -198,7 +391,12 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
static inline long ioport_panic(void)
{
+#ifdef __tilegx__
+ panic("PCI IO space support is disabled. Configure the kernel with"
+ " CONFIG_TILE_PCI_IO to enable it");
+#else
panic("inb/outb and friends do not exist on tile");
+#endif
return 0;
}
@@ -243,13 +441,6 @@ static inline void outl(u32 b, unsigned long addr)
ioport_panic();
}
-#define inb_p(addr) inb(addr)
-#define inw_p(addr) inw(addr)
-#define inl_p(addr) inl(addr)
-#define outb_p(x, addr) outb((x), (addr))
-#define outw_p(x, addr) outw((x), (addr))
-#define outl_p(x, addr) outl((x), (addr))
-
static inline void insb(unsigned long addr, void *buffer, int count)
{
ioport_panic();
@@ -280,6 +471,15 @@ static inline void outsl(unsigned long addr, const void *buffer, int count)
ioport_panic();
}
+#endif /* CHIP_HAS_MMIO() && defined(CONFIG_TILE_PCI_IO) */
+
+#define inb_p(addr) inb(addr)
+#define inw_p(addr) inw(addr)
+#define inl_p(addr) inl(addr)
+#define outb_p(x, addr) outb((x), (addr))
+#define outw_p(x, addr) outw((x), (addr))
+#define outl_p(x, addr) outl((x), (addr))
+
#define ioread16be(addr) be16_to_cpu(ioread16(addr))
#define ioread32be(addr) be32_to_cpu(ioread32(addr))
#define iowrite16be(v, addr) iowrite16(be16_to_cpu(v), (addr))
diff --git a/arch/tile/include/asm/irq.h b/arch/tile/include/asm/irq.h
index f80f8ceabc6..1fe86911838 100644
--- a/arch/tile/include/asm/irq.h
+++ b/arch/tile/include/asm/irq.h
@@ -18,10 +18,12 @@
#include <linux/hardirq.h>
/* The hypervisor interface provides 32 IRQs. */
-#define NR_IRQS 32
+#define NR_IRQS 32
/* IRQ numbers used for linux IPIs. */
-#define IRQ_RESCHEDULE 1
+#define IRQ_RESCHEDULE 0
+/* Interrupts for dynamic allocation start at 1. Let the core allocate irq0 */
+#define NR_IRQS_LEGACY 1
#define irq_canonicalize(irq) (irq)
diff --git a/arch/tile/include/asm/irqflags.h b/arch/tile/include/asm/irqflags.h
index 5db0ce54284..71af5747874 100644
--- a/arch/tile/include/asm/irqflags.h
+++ b/arch/tile/include/asm/irqflags.h
@@ -18,32 +18,20 @@
#include <arch/interrupts.h>
#include <arch/chip.h>
-#if !defined(__tilegx__) && defined(__ASSEMBLY__)
-
/*
* The set of interrupts we want to allow when interrupts are nominally
* disabled. The remainder are effectively "NMI" interrupts from
* the point of view of the generic Linux code. Note that synchronous
* interrupts (aka "non-queued") are not blocked by the mask in any case.
*/
-#if CHIP_HAS_AUX_PERF_COUNTERS()
-#define LINUX_MASKABLE_INTERRUPTS_HI \
- (~(INT_MASK_HI(INT_PERF_COUNT) | INT_MASK_HI(INT_AUX_PERF_COUNT)))
-#else
-#define LINUX_MASKABLE_INTERRUPTS_HI \
- (~(INT_MASK_HI(INT_PERF_COUNT)))
-#endif
-
-#else
-
-#if CHIP_HAS_AUX_PERF_COUNTERS()
-#define LINUX_MASKABLE_INTERRUPTS \
- (~(INT_MASK(INT_PERF_COUNT) | INT_MASK(INT_AUX_PERF_COUNT)))
-#else
#define LINUX_MASKABLE_INTERRUPTS \
- (~(INT_MASK(INT_PERF_COUNT)))
-#endif
+ (~((_AC(1,ULL) << INT_PERF_COUNT) | (_AC(1,ULL) << INT_AUX_PERF_COUNT)))
+#if CHIP_HAS_SPLIT_INTR_MASK()
+/* The same macro, but for the two 32-bit SPRs separately. */
+#define LINUX_MASKABLE_INTERRUPTS_LO (-1)
+#define LINUX_MASKABLE_INTERRUPTS_HI \
+ (~((1 << (INT_PERF_COUNT - 32)) | (1 << (INT_AUX_PERF_COUNT - 32))))
#endif
#ifndef __ASSEMBLY__
@@ -52,7 +40,15 @@
#include <asm/percpu.h>
#include <arch/spr_def.h>
-/* Set and clear kernel interrupt masks. */
+/*
+ * Set and clear kernel interrupt masks.
+ *
+ * NOTE: __insn_mtspr() is a compiler builtin marked as a memory
+ * clobber. We rely on it being equivalent to a compiler barrier in
+ * this code since arch_local_irq_save() and friends must act as
+ * compiler barriers. This compiler semantic is baked into enough
+ * places that the compiler will maintain it going forward.
+ */
#if CHIP_HAS_SPLIT_INTR_MASK()
#if INT_PERF_COUNT < 32 || INT_AUX_PERF_COUNT < 32 || INT_MEM_ERROR >= 32
# error Fix assumptions about which word various interrupts are in
@@ -90,6 +86,14 @@
__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_0, (unsigned long)(__m)); \
__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K_1, (unsigned long)(__m>>32)); \
} while (0)
+#define interrupt_mask_save_mask() \
+ (__insn_mfspr(SPR_INTERRUPT_MASK_SET_K_0) | \
+ (((unsigned long long)__insn_mfspr(SPR_INTERRUPT_MASK_SET_K_1))<<32))
+#define interrupt_mask_restore_mask(mask) do { \
+ unsigned long long __m = (mask); \
+ __insn_mtspr(SPR_INTERRUPT_MASK_K_0, (unsigned long)(__m)); \
+ __insn_mtspr(SPR_INTERRUPT_MASK_K_1, (unsigned long)(__m>>32)); \
+} while (0)
#else
#define interrupt_mask_set(n) \
__insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (1UL << (n)))
@@ -101,6 +105,10 @@
__insn_mtspr(SPR_INTERRUPT_MASK_SET_K, (mask))
#define interrupt_mask_reset_mask(mask) \
__insn_mtspr(SPR_INTERRUPT_MASK_RESET_K, (mask))
+#define interrupt_mask_save_mask() \
+ __insn_mfspr(SPR_INTERRUPT_MASK_K)
+#define interrupt_mask_restore_mask(mask) \
+ __insn_mtspr(SPR_INTERRUPT_MASK_K, (mask))
#endif
/*
@@ -114,7 +122,13 @@
* to know our current state.
*/
DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
-#define INITIAL_INTERRUPTS_ENABLED INT_MASK(INT_MEM_ERROR)
+#define INITIAL_INTERRUPTS_ENABLED (1ULL << INT_MEM_ERROR)
+
+#ifdef CONFIG_DEBUG_PREEMPT
+/* Due to inclusion issues, we can't rely on <linux/smp.h> here. */
+extern unsigned int debug_smp_processor_id(void);
+# define smp_processor_id() debug_smp_processor_id()
+#endif
/* Disable interrupts. */
#define arch_local_irq_disable() \
@@ -122,11 +136,20 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
/* Disable all interrupts, including NMIs. */
#define arch_local_irq_disable_all() \
- interrupt_mask_set_mask(-1UL)
+ interrupt_mask_set_mask(-1ULL)
+
+/*
+ * Read the set of maskable interrupts.
+ * We avoid the preemption warning here via __this_cpu_ptr since even
+ * if irqs are already enabled, it's harmless to read the wrong cpu's
+ * enabled mask.
+ */
+#define arch_local_irqs_enabled() \
+ (*__this_cpu_ptr(&interrupts_enabled_mask))
/* Re-enable all maskable interrupts. */
#define arch_local_irq_enable() \
- interrupt_mask_reset_mask(__get_cpu_var(interrupts_enabled_mask))
+ interrupt_mask_reset_mask(arch_local_irqs_enabled())
/* Disable or enable interrupts based on flag argument. */
#define arch_local_irq_restore(disabled) do { \
@@ -153,7 +176,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
/* Prevent the given interrupt from being enabled next time we enable irqs. */
#define arch_local_irq_mask(interrupt) \
- (__get_cpu_var(interrupts_enabled_mask) &= ~INT_MASK(interrupt))
+ this_cpu_and(interrupts_enabled_mask, ~(1ULL << (interrupt)))
/* Prevent the given interrupt from being enabled immediately. */
#define arch_local_irq_mask_now(interrupt) do { \
@@ -163,7 +186,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
/* Allow the given interrupt to be enabled next time we enable irqs. */
#define arch_local_irq_unmask(interrupt) \
- (__get_cpu_var(interrupts_enabled_mask) |= INT_MASK(interrupt))
+ this_cpu_or(interrupts_enabled_mask, (1ULL << (interrupt)))
/* Allow the given interrupt to be enabled immediately, if !irqs_disabled. */
#define arch_local_irq_unmask_now(interrupt) do { \
@@ -179,7 +202,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
#ifdef __tilegx__
#if INT_MEM_ERROR != 0
-# error Fix IRQ_DISABLED() macro
+# error Fix IRQS_DISABLED() macro
#endif
/* Return 0 or 1 to indicate whether interrupts are currently disabled. */
@@ -207,9 +230,10 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
mtspr SPR_INTERRUPT_MASK_SET_K, tmp
/* Enable interrupts. */
-#define IRQ_ENABLE(tmp0, tmp1) \
+#define IRQ_ENABLE_LOAD(tmp0, tmp1) \
GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0); \
- ld tmp0, tmp0; \
+ ld tmp0, tmp0
+#define IRQ_ENABLE_APPLY(tmp0, tmp1) \
mtspr SPR_INTERRUPT_MASK_RESET_K, tmp0
#else /* !__tilegx__ */
@@ -237,7 +261,7 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
/* Disable interrupts. */
#define IRQ_DISABLE(tmp0, tmp1) \
{ \
- movei tmp0, -1; \
+ movei tmp0, LINUX_MASKABLE_INTERRUPTS_LO; \
moveli tmp1, lo16(LINUX_MASKABLE_INTERRUPTS_HI) \
}; \
{ \
@@ -253,17 +277,22 @@ DECLARE_PER_CPU(unsigned long long, interrupts_enabled_mask);
mtspr SPR_INTERRUPT_MASK_SET_K_1, tmp
/* Enable interrupts. */
-#define IRQ_ENABLE(tmp0, tmp1) \
+#define IRQ_ENABLE_LOAD(tmp0, tmp1) \
GET_INTERRUPTS_ENABLED_MASK_PTR(tmp0); \
{ \
lw tmp0, tmp0; \
addi tmp1, tmp0, 4 \
}; \
- lw tmp1, tmp1; \
+ lw tmp1, tmp1
+#define IRQ_ENABLE_APPLY(tmp0, tmp1) \
mtspr SPR_INTERRUPT_MASK_RESET_K_0, tmp0; \
mtspr SPR_INTERRUPT_MASK_RESET_K_1, tmp1
#endif
+#define IRQ_ENABLE(tmp0, tmp1) \
+ IRQ_ENABLE_LOAD(tmp0, tmp1); \
+ IRQ_ENABLE_APPLY(tmp0, tmp1)
+
/*
* Do the CPU's IRQ-state tracing from assembly code. We call a
* C function, but almost everywhere we do, we don't mind clobbering
diff --git a/arch/tile/include/asm/hw_irq.h b/arch/tile/include/asm/kdebug.h
index 4fac5fbf333..5bbbfa904c2 100644
--- a/arch/tile/include/asm/hw_irq.h
+++ b/arch/tile/include/asm/kdebug.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -12,7 +12,17 @@
* more details.
*/
-#ifndef _ASM_TILE_HW_IRQ_H
-#define _ASM_TILE_HW_IRQ_H
+#ifndef _ASM_TILE_KDEBUG_H
+#define _ASM_TILE_KDEBUG_H
-#endif /* _ASM_TILE_HW_IRQ_H */
+#include <linux/notifier.h>
+
+enum die_val {
+ DIE_OOPS = 1,
+ DIE_BREAK,
+ DIE_SSTEPBP,
+ DIE_PAGE_FAULT,
+ DIE_COMPILED_BPT
+};
+
+#endif /* _ASM_TILE_KDEBUG_H */
diff --git a/arch/tile/include/asm/kexec.h b/arch/tile/include/asm/kexec.h
index c11a6cc73bb..fc98ccfc98a 100644
--- a/arch/tile/include/asm/kexec.h
+++ b/arch/tile/include/asm/kexec.h
@@ -19,12 +19,24 @@
#include <asm/page.h>
+#ifndef __tilegx__
/* Maximum physical address we can use pages from. */
#define KEXEC_SOURCE_MEMORY_LIMIT TASK_SIZE
/* Maximum address we can reach in physical address mode. */
#define KEXEC_DESTINATION_MEMORY_LIMIT TASK_SIZE
/* Maximum address we can use for the control code buffer. */
#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+#else
+/* We need to limit the memory below PGDIR_SIZE since
+ * we only setup page table for [0, PGDIR_SIZE) before final kexec.
+ */
+/* Maximum physical address we can use pages from. */
+#define KEXEC_SOURCE_MEMORY_LIMIT PGDIR_SIZE
+/* Maximum address we can reach in physical address mode. */
+#define KEXEC_DESTINATION_MEMORY_LIMIT PGDIR_SIZE
+/* Maximum address we can use for the control code buffer. */
+#define KEXEC_CONTROL_MEMORY_LIMIT PGDIR_SIZE
+#endif
#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE
diff --git a/arch/tile/include/asm/kgdb.h b/arch/tile/include/asm/kgdb.h
new file mode 100644
index 00000000000..280c181cf0d
--- /dev/null
+++ b/arch/tile/include/asm/kgdb.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2013 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ *
+ * TILE-Gx KGDB support.
+ */
+
+#ifndef __TILE_KGDB_H__
+#define __TILE_KGDB_H__
+
+#include <linux/kdebug.h>
+#include <arch/opcode.h>
+
+#define GDB_SIZEOF_REG sizeof(unsigned long)
+
+/*
+ * TILE-Gx gdb is expecting the following register layout:
+ * 56 GPRs(R0 - R52, TP, SP, LR), 8 special GPRs(networks and ZERO),
+ * plus the PC and the faultnum.
+ *
+ * Even though kernel not use the 8 special GPRs, they need to be present
+ * in the registers sent for correct processing in the host-side gdb.
+ *
+ */
+#define DBG_MAX_REG_NUM (56+8+2)
+#define NUMREGBYTES (DBG_MAX_REG_NUM * GDB_SIZEOF_REG)
+
+/*
+ * BUFMAX defines the maximum number of characters in inbound/outbound
+ * buffers at least NUMREGBYTES*2 are needed for register packets,
+ * Longer buffer is needed to list all threads.
+ */
+#define BUFMAX 2048
+
+#define BREAK_INSTR_SIZE TILEGX_BUNDLE_SIZE_IN_BYTES
+
+/*
+ * Require cache flush for set/clear a software breakpoint or write memory.
+ */
+#define CACHE_FLUSH_IS_SAFE 1
+
+/*
+ * The compiled-in breakpoint instruction can be used to "break" into
+ * the debugger via magic system request key (sysrq-G).
+ */
+static tile_bundle_bits compiled_bpt = TILEGX_BPT_BUNDLE | DIE_COMPILED_BPT;
+
+enum tilegx_regnum {
+ TILEGX_PC_REGNUM = TREG_LAST_GPR + 9,
+ TILEGX_FAULTNUM_REGNUM,
+};
+
+/*
+ * Generate a breakpoint exception to "break" into the debugger.
+ */
+static inline void arch_kgdb_breakpoint(void)
+{
+ asm volatile (".quad %0\n\t"
+ ::""(compiled_bpt));
+}
+
+#endif /* __TILE_KGDB_H__ */
diff --git a/arch/tile/include/asm/kmap_types.h b/arch/tile/include/asm/kmap_types.h
index 3d0f2024626..92b28e3e997 100644
--- a/arch/tile/include/asm/kmap_types.h
+++ b/arch/tile/include/asm/kmap_types.h
@@ -23,35 +23,6 @@
* adds 4MB of required address-space. For now we leave KM_TYPE_NR
* set to depth 8.
*/
-enum km_type {
- KM_TYPE_NR = 8
-};
-
-/*
- * We provide dummy definitions of all the stray values that used to be
- * required for kmap_atomic() and no longer are.
- */
-enum {
- KM_BOUNCE_READ,
- KM_SKB_SUNRPC_DATA,
- KM_SKB_DATA_SOFTIRQ,
- KM_USER0,
- KM_USER1,
- KM_BIO_SRC_IRQ,
- KM_BIO_DST_IRQ,
- KM_PTE0,
- KM_PTE1,
- KM_IRQ0,
- KM_IRQ1,
- KM_SOFTIRQ0,
- KM_SOFTIRQ1,
- KM_SYNC_ICACHE,
- KM_SYNC_DCACHE,
- KM_UML_USERCOPY,
- KM_IRQ_PTE,
- KM_NMI,
- KM_NMI_PTE,
- KM_KDB
-};
+#define KM_TYPE_NR 8
#endif /* _ASM_TILE_KMAP_TYPES_H */
diff --git a/arch/tile/include/asm/kprobes.h b/arch/tile/include/asm/kprobes.h
new file mode 100644
index 00000000000..d8f9a83943b
--- /dev/null
+++ b/arch/tile/include/asm/kprobes.h
@@ -0,0 +1,79 @@
+/*
+ * arch/tile/include/asm/kprobes.h
+ *
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_KPROBES_H
+#define _ASM_TILE_KPROBES_H
+
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+
+#include <arch/opcode.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE 2
+
+#define kretprobe_blacklist_size 0
+
+typedef tile_bundle_bits kprobe_opcode_t;
+
+#define flush_insn_slot(p) \
+ flush_icache_range((unsigned long)p->addr, \
+ (unsigned long)p->addr + \
+ (MAX_INSN_SIZE * sizeof(kprobe_opcode_t)))
+
+struct kprobe;
+
+/* Architecture specific copy of original instruction. */
+struct arch_specific_insn {
+ kprobe_opcode_t *insn;
+};
+
+struct prev_kprobe {
+ struct kprobe *kp;
+ unsigned long status;
+ unsigned long saved_pc;
+};
+
+#define MAX_JPROBES_STACK_SIZE 128
+#define MAX_JPROBES_STACK_ADDR \
+ (((unsigned long)current_thread_info()) + THREAD_SIZE - 32 \
+ - sizeof(struct pt_regs))
+
+#define MIN_JPROBES_STACK_SIZE(ADDR) \
+ ((((ADDR) + MAX_JPROBES_STACK_SIZE) > MAX_JPROBES_STACK_ADDR) \
+ ? MAX_JPROBES_STACK_ADDR - (ADDR) \
+ : MAX_JPROBES_STACK_SIZE)
+
+/* per-cpu kprobe control block. */
+struct kprobe_ctlblk {
+ unsigned long kprobe_status;
+ unsigned long kprobe_saved_pc;
+ unsigned long jprobe_saved_sp;
+ struct prev_kprobe prev_kprobe;
+ struct pt_regs jprobe_saved_regs;
+ char jprobes_stack[MAX_JPROBES_STACK_SIZE];
+};
+
+extern tile_bundle_bits breakpoint2_insn;
+extern tile_bundle_bits breakpoint_insn;
+
+void arch_remove_kprobe(struct kprobe *);
+
+extern int kprobe_exceptions_notify(struct notifier_block *self,
+ unsigned long val, void *data);
+
+#endif /* _ASM_TILE_KPROBES_H */
diff --git a/arch/tile/include/asm/memprof.h b/arch/tile/include/asm/memprof.h
deleted file mode 100644
index 359949be28c..00000000000
--- a/arch/tile/include/asm/memprof.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- *
- * The hypervisor's memory controller profiling infrastructure allows
- * the programmer to find out what fraction of the available memory
- * bandwidth is being consumed at each memory controller. The
- * profiler provides start, stop, and clear operations to allows
- * profiling over a specific time window, as well as an interface for
- * reading the most recent profile values.
- *
- * This header declares IOCTL codes necessary to control memprof.
- */
-#ifndef _ASM_TILE_MEMPROF_H
-#define _ASM_TILE_MEMPROF_H
-
-#include <linux/ioctl.h>
-
-#define MEMPROF_IOCTL_TYPE 0xB4
-#define MEMPROF_IOCTL_START _IO(MEMPROF_IOCTL_TYPE, 0)
-#define MEMPROF_IOCTL_STOP _IO(MEMPROF_IOCTL_TYPE, 1)
-#define MEMPROF_IOCTL_CLEAR _IO(MEMPROF_IOCTL_TYPE, 2)
-
-#endif /* _ASM_TILE_MEMPROF_H */
diff --git a/arch/tile/include/asm/mman.h b/arch/tile/include/asm/mman.h
deleted file mode 100644
index 81b8fc348d6..00000000000
--- a/arch/tile/include/asm/mman.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- */
-
-#ifndef _ASM_TILE_MMAN_H
-#define _ASM_TILE_MMAN_H
-
-#include <asm-generic/mman-common.h>
-#include <arch/chip.h>
-
-/* Standard Linux flags */
-
-#define MAP_POPULATE 0x0040 /* populate (prefault) pagetables */
-#define MAP_NONBLOCK 0x0080 /* do not block on IO */
-#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
-#define MAP_STACK MAP_GROWSDOWN /* provide convenience alias */
-#define MAP_LOCKED 0x0200 /* pages are locked */
-#define MAP_NORESERVE 0x0400 /* don't check for reservations */
-#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
-#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
-#define MAP_HUGETLB 0x4000 /* create a huge page mapping */
-
-
-/*
- * Flags for mlockall
- */
-#define MCL_CURRENT 1 /* lock all current mappings */
-#define MCL_FUTURE 2 /* lock all future mappings */
-
-
-#endif /* _ASM_TILE_MMAN_H */
diff --git a/arch/tile/include/asm/mmu.h b/arch/tile/include/asm/mmu.h
index 92f94c77b6e..0cab1182bde 100644
--- a/arch/tile/include/asm/mmu.h
+++ b/arch/tile/include/asm/mmu.h
@@ -21,7 +21,8 @@ struct mm_context {
* Written under the mmap_sem semaphore; read without the
* semaphore but atomically, but it is conservatively set.
*/
- unsigned int priority_cached;
+ unsigned long priority_cached;
+ unsigned long vdso_base;
};
typedef struct mm_context mm_context_t;
diff --git a/arch/tile/include/asm/mmu_context.h b/arch/tile/include/asm/mmu_context.h
index 15fb2464112..4734215e2ad 100644
--- a/arch/tile/include/asm/mmu_context.h
+++ b/arch/tile/include/asm/mmu_context.h
@@ -30,18 +30,22 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm)
return 0;
}
-/* Note that arch/tile/kernel/head.S also calls hv_install_context() */
+/*
+ * Note that arch/tile/kernel/head_NN.S and arch/tile/mm/migrate_NN.S
+ * also call hv_install_context().
+ */
static inline void __install_page_table(pgd_t *pgdir, int asid, pgprot_t prot)
{
/* FIXME: DIRECTIO should not always be set. FIXME. */
- int rc = hv_install_context(__pa(pgdir), prot, asid, HV_CTX_DIRECTIO);
+ int rc = hv_install_context(__pa(pgdir), prot, asid,
+ HV_CTX_DIRECTIO | CTX_PAGE_FLAG);
if (rc < 0)
panic("hv_install_context failed: %d", rc);
}
static inline void install_page_table(pgd_t *pgdir, int asid)
{
- pte_t *ptep = virt_to_pte(NULL, (unsigned long)pgdir);
+ pte_t *ptep = virt_to_kpte((unsigned long)pgdir);
__install_page_table(pgdir, asid, *ptep);
}
diff --git a/arch/tile/include/asm/mmzone.h b/arch/tile/include/asm/mmzone.h
index 9d3dbce8f95..804f1098b6c 100644
--- a/arch/tile/include/asm/mmzone.h
+++ b/arch/tile/include/asm/mmzone.h
@@ -42,7 +42,7 @@ static inline int pfn_to_nid(unsigned long pfn)
#define kern_addr_valid(kaddr) virt_addr_valid((void *)kaddr)
-static inline int pfn_valid(int pfn)
+static inline int pfn_valid(unsigned long pfn)
{
int nid = pfn_to_nid(pfn);
diff --git a/arch/tile/include/asm/module.h b/arch/tile/include/asm/module.h
new file mode 100644
index 00000000000..44ed07ccd3d
--- /dev/null
+++ b/arch/tile/include/asm/module.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2011 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_MODULE_H
+#define _ASM_TILE_MODULE_H
+
+#include <arch/chip.h>
+
+#include <asm-generic/module.h>
+
+/* We can't use modules built with different page sizes. */
+#if defined(CONFIG_PAGE_SIZE_16KB)
+# define MODULE_PGSZ " 16KB"
+#elif defined(CONFIG_PAGE_SIZE_64KB)
+# define MODULE_PGSZ " 64KB"
+#else
+# define MODULE_PGSZ ""
+#endif
+
+/* We don't really support no-SMP so tag if someone tries. */
+#ifdef CONFIG_SMP
+#define MODULE_NOSMP ""
+#else
+#define MODULE_NOSMP " nosmp"
+#endif
+
+#define MODULE_ARCH_VERMAGIC CHIP_ARCH_NAME MODULE_PGSZ MODULE_NOSMP
+
+#endif /* _ASM_TILE_MODULE_H */
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index db93518fac0..67276800861 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -20,8 +20,17 @@
#include <arch/chip.h>
/* PAGE_SHIFT and HPAGE_SHIFT determine the page sizes. */
-#define PAGE_SHIFT HV_LOG2_PAGE_SIZE_SMALL
-#define HPAGE_SHIFT HV_LOG2_PAGE_SIZE_LARGE
+#if defined(CONFIG_PAGE_SIZE_16KB)
+#define PAGE_SHIFT 14
+#define CTX_PAGE_FLAG HV_CTX_PG_SM_16K
+#elif defined(CONFIG_PAGE_SIZE_64KB)
+#define PAGE_SHIFT 16
+#define CTX_PAGE_FLAG HV_CTX_PG_SM_64K
+#else
+#define PAGE_SHIFT HV_LOG2_DEFAULT_PAGE_SIZE_SMALL
+#define CTX_PAGE_FLAG 0
+#endif
+#define HPAGE_SHIFT HV_LOG2_DEFAULT_PAGE_SIZE_LARGE
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT)
@@ -30,6 +39,12 @@
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
/*
+ * We do define AT_SYSINFO_EHDR to support vDSO,
+ * but don't use the gate mechanism.
+ */
+#define __HAVE_ARCH_GATE_AREA 1
+
+/*
* If the Kconfig doesn't specify, set a maximum zone order that
* is enough so that we can create huge pages from small pages given
* the respective sizes of the two page types. See <linux/mmzone.h>.
@@ -78,8 +93,7 @@ typedef HV_PTE pgprot_t;
/*
* User L2 page tables are managed as one L2 page table per page,
* because we use the page allocator for them. This keeps the allocation
- * simple and makes it potentially useful to implement HIGHPTE at some point.
- * However, it's also inefficient, since L2 page tables are much smaller
+ * simple, but it's also inefficient, since L2 page tables are much smaller
* than pages (currently 2KB vs 64KB). So we should revisit this.
*/
typedef struct page *pgtable_t;
@@ -128,14 +142,18 @@ static inline __attribute_const__ int get_order(unsigned long size)
#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT)
-#define HUGE_MAX_HSTATE 2
+#define HUGE_MAX_HSTATE 6
#ifdef CONFIG_HUGETLB_PAGE
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#endif
+/* Allow overriding how much VA or PA the kernel will use. */
+#define MAX_PA_WIDTH CHIP_PA_WIDTH()
+#define MAX_VA_WIDTH CHIP_VA_WIDTH()
+
/* Each memory controller has PAs distinct in their high bits. */
-#define NR_PA_HIGHBIT_SHIFT (CHIP_PA_WIDTH() - CHIP_LOG_NUM_MSHIMS())
+#define NR_PA_HIGHBIT_SHIFT (MAX_PA_WIDTH - CHIP_LOG_NUM_MSHIMS())
#define NR_PA_HIGHBIT_VALUES (1 << CHIP_LOG_NUM_MSHIMS())
#define __pa_to_highbits(pa) ((phys_addr_t)(pa) >> NR_PA_HIGHBIT_SHIFT)
#define __pfn_to_highbits(pfn) ((pfn) >> (NR_PA_HIGHBIT_SHIFT - PAGE_SHIFT))
@@ -146,7 +164,7 @@ static inline __attribute_const__ int get_order(unsigned long size)
* We reserve the lower half of memory for user-space programs, and the
* upper half for system code. We re-map all of physical memory in the
* upper half, which takes a quarter of our VA space. Then we have
- * the vmalloc regions. The supervisor code lives at 0xfffffff700000000,
+ * the vmalloc regions. The supervisor code lives at the highest address,
* with the hypervisor above that.
*
* Loadable kernel modules are placed immediately after the static
@@ -158,27 +176,18 @@ static inline __attribute_const__ int get_order(unsigned long size)
* Similarly, for now we don't play any struct page mapping games.
*/
-#if CHIP_PA_WIDTH() + 2 > CHIP_VA_WIDTH()
+#if MAX_PA_WIDTH + 2 > MAX_VA_WIDTH
# error Too much PA to map with the VA available!
#endif
-#define HALF_VA_SPACE (_AC(1, UL) << (CHIP_VA_WIDTH() - 1))
-
-#define MEM_LOW_END (HALF_VA_SPACE - 1) /* low half */
-#define MEM_HIGH_START (-HALF_VA_SPACE) /* high half */
-#define PAGE_OFFSET MEM_HIGH_START
-#define _VMALLOC_START _AC(0xfffffff500000000, UL) /* 4 GB */
-#define HUGE_VMAP_BASE _AC(0xfffffff600000000, UL) /* 4 GB */
-#define MEM_SV_START _AC(0xfffffff700000000, UL) /* 256 MB */
-#define MEM_SV_INTRPT MEM_SV_START
-#define MEM_MODULE_START _AC(0xfffffff710000000, UL) /* 256 MB */
-#define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024))
-#define MEM_HV_START _AC(0xfffffff800000000, UL) /* 32 GB */
-
-/* Highest DTLB address we will use */
-#define KERNEL_HIGH_VADDR MEM_SV_START
-/* Since we don't currently provide any fixmaps, we use an impossible VA. */
-#define FIXADDR_TOP MEM_HV_START
+#define PAGE_OFFSET (-(_AC(1, UL) << (MAX_VA_WIDTH - 1)))
+#define KERNEL_HIGH_VADDR _AC(0xfffffff800000000, UL) /* high 32GB */
+#define FIXADDR_BASE (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */
+#define FIXADDR_TOP (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */
+#define _VMALLOC_START FIXADDR_TOP
+#define MEM_SV_START (KERNEL_HIGH_VADDR - 0x100000000) /* 256 MB */
+#define MEM_MODULE_START (MEM_SV_START + (256*1024*1024)) /* 256 MB */
+#define MEM_MODULE_END (MEM_MODULE_START + (256*1024*1024))
#else /* !__tilegx__ */
@@ -200,25 +209,18 @@ static inline __attribute_const__ int get_order(unsigned long size)
* values, and after that, we show "typical" values, since the actual
* addresses depend on kernel #defines.
*
- * MEM_HV_INTRPT 0xfe000000
- * MEM_SV_INTRPT (kernel code) 0xfd000000
+ * MEM_HV_START 0xfe000000
+ * MEM_SV_START (kernel code) 0xfd000000
* MEM_USER_INTRPT (user vector) 0xfc000000
- * FIX_KMAP_xxx 0xf8000000 (via NR_CPUS * KM_TYPE_NR)
- * PKMAP_BASE 0xf7000000 (via LAST_PKMAP)
- * HUGE_VMAP 0xf3000000 (via CONFIG_NR_HUGE_VMAPS)
- * VMALLOC_START 0xf0000000 (via __VMALLOC_RESERVE)
+ * FIX_KMAP_xxx 0xfa000000 (via NR_CPUS * KM_TYPE_NR)
+ * PKMAP_BASE 0xf9000000 (via LAST_PKMAP)
+ * VMALLOC_START 0xf7000000 (via VMALLOC_RESERVE)
* mapped LOWMEM 0xc0000000
*/
#define MEM_USER_INTRPT _AC(0xfc000000, UL)
-#if CONFIG_KERNEL_PL == 1
-#define MEM_SV_INTRPT _AC(0xfd000000, UL)
-#define MEM_HV_INTRPT _AC(0xfe000000, UL)
-#else
-#define MEM_GUEST_INTRPT _AC(0xfd000000, UL)
-#define MEM_SV_INTRPT _AC(0xfe000000, UL)
-#define MEM_HV_INTRPT _AC(0xff000000, UL)
-#endif
+#define MEM_SV_START _AC(0xfd000000, UL)
+#define MEM_HV_START _AC(0xfe000000, UL)
#define INTRPT_SIZE 0x4000
@@ -239,7 +241,7 @@ static inline __attribute_const__ int get_order(unsigned long size)
#endif /* __tilegx__ */
-#ifndef __ASSEMBLY__
+#if !defined(__ASSEMBLY__) && !defined(VDSO_BUILD)
#ifdef CONFIG_HIGHMEM
@@ -325,6 +327,7 @@ static inline int pfn_valid(unsigned long pfn)
struct mm_struct;
extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
+extern pte_t *virt_to_kpte(unsigned long kaddr);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/pci.h b/arch/tile/include/asm/pci.h
index 5d5a635530b..dfedd7ac729 100644
--- a/arch/tile/include/asm/pci.h
+++ b/arch/tile/include/asm/pci.h
@@ -15,9 +15,12 @@
#ifndef _ASM_TILE_PCI_H
#define _ASM_TILE_PCI_H
+#include <linux/dma-mapping.h>
#include <linux/pci.h>
#include <asm-generic/pci_iomap.h>
+#ifndef __tilegx__
+
/*
* Structure of a PCI controller (host bridge)
*/
@@ -25,7 +28,6 @@ struct pci_controller {
int index; /* PCI domain number */
struct pci_bus *root_bus;
- int first_busno;
int last_busno;
int hv_cfg_fd[2]; /* config{0,1} fds for this PCIe controller */
@@ -41,20 +43,161 @@ struct pci_controller {
};
/*
+ * This flag tells if the platform is TILEmpower that needs
+ * special configuration for the PLX switch chip.
+ */
+extern int tile_plx_gen1;
+
+static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
+
+#define TILE_NUM_PCIE 2
+
+/*
* The hypervisor maps the entirety of CPA-space as bus addresses, so
* bus addresses are physical addresses. The networking and block
* device layers use this boolean for bounce buffer decisions.
*/
#define PCI_DMA_BUS_IS_PHYS 1
-int __devinit tile_pci_init(void);
-int __devinit pcibios_init(void);
+/* generic pci stuff */
+#include <asm-generic/pci.h>
-static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
+#else
-void __devinit pcibios_fixup_bus(struct pci_bus *bus);
+#include <asm/page.h>
+#include <gxio/trio.h>
-#define TILE_NUM_PCIE 2
+/**
+ * We reserve the hugepage-size address range at the top of the 64-bit address
+ * space to serve as the PCI window, emulating the BAR0 space of an endpoint
+ * device. This window is used by the chip-to-chip applications running on
+ * the RC node. The reason for carving out this window is that Mem-Maps that
+ * back up this window will not overlap with those that map the real physical
+ * memory.
+ */
+#define PCIE_HOST_BAR0_SIZE HPAGE_SIZE
+#define PCIE_HOST_BAR0_START HPAGE_MASK
+
+/**
+ * The first PAGE_SIZE of the above "BAR" window is mapped to the
+ * gxpci_host_regs structure.
+ */
+#define PCIE_HOST_REGS_SIZE PAGE_SIZE
+
+/*
+ * This is the PCI address where the Mem-Map interrupt regions start.
+ * We use the 2nd to the last huge page of the 64-bit address space.
+ * The last huge page is used for the rootcomplex "bar", for C2C purpose.
+ */
+#define MEM_MAP_INTR_REGIONS_BASE (HPAGE_MASK - HPAGE_SIZE)
+
+/*
+ * Each Mem-Map interrupt region occupies 4KB.
+ */
+#define MEM_MAP_INTR_REGION_SIZE (1 << TRIO_MAP_MEM_LIM__ADDR_SHIFT)
+
+/*
+ * Allocate the PCI BAR window right below 4GB.
+ */
+#define TILE_PCI_BAR_WINDOW_TOP (1ULL << 32)
+
+/*
+ * Allocate 1GB for the PCI BAR window.
+ */
+#define TILE_PCI_BAR_WINDOW_SIZE (1 << 30)
+
+/*
+ * This is the highest bus address targeting the host memory that
+ * can be generated by legacy PCI devices with 32-bit or less
+ * DMA capability, dictated by the BAR window size and location.
+ */
+#define TILE_PCI_MAX_DIRECT_DMA_ADDRESS \
+ (TILE_PCI_BAR_WINDOW_TOP - TILE_PCI_BAR_WINDOW_SIZE - 1)
+
+/*
+ * We shift the PCI bus range for all the physical memory up by the whole PA
+ * range. The corresponding CPA of an incoming PCI request will be the PCI
+ * address minus TILE_PCI_MEM_MAP_BASE_OFFSET. This also implies
+ * that the 64-bit capable devices will be given DMA addresses as
+ * the CPA plus TILE_PCI_MEM_MAP_BASE_OFFSET. To support 32-bit
+ * devices, we create a separate map region that handles the low
+ * 4GB.
+ *
+ * This design lets us avoid the "PCI hole" problem where the host bridge
+ * won't pass DMA traffic with target addresses that happen to fall within the
+ * BAR space. This enables us to use all the physical memory for DMA, instead
+ * of wasting the same amount of physical memory as the BAR window size.
+ */
+#define TILE_PCI_MEM_MAP_BASE_OFFSET (1ULL << CHIP_PA_WIDTH())
+
+/*
+ * Start of the PCI memory resource, which starts at the end of the
+ * maximum system physical RAM address.
+ */
+#define TILE_PCI_MEM_START (1ULL << CHIP_PA_WIDTH())
+
+/*
+ * Structure of a PCI controller (host bridge) on Gx.
+ */
+struct pci_controller {
+
+ /* Pointer back to the TRIO that this PCIe port is connected to. */
+ gxio_trio_context_t *trio;
+ int mac; /* PCIe mac index on the TRIO shim */
+ int trio_index; /* Index of TRIO shim that contains the MAC. */
+
+ int pio_mem_index; /* PIO region index for memory access */
+
+#ifdef CONFIG_TILE_PCI_IO
+ int pio_io_index; /* PIO region index for I/O space access */
+#endif
+
+ /*
+ * Mem-Map regions for all the memory controllers so that Linux can
+ * map all of its physical memory space to the PCI bus.
+ */
+ int mem_maps[MAX_NUMNODES];
+
+ int index; /* PCI domain number */
+ struct pci_bus *root_bus;
+
+ /* PCI I/O space resource for this controller. */
+ struct resource io_space;
+ char io_space_name[32];
+
+ /* PCI memory space resource for this controller. */
+ struct resource mem_space;
+ char mem_space_name[32];
+
+ uint64_t mem_offset; /* cpu->bus memory mapping offset. */
+
+ int first_busno;
+
+ struct pci_ops *ops;
+
+ /* Table that maps the INTx numbers to Linux irq numbers. */
+ int irq_intx_table[4];
+};
+
+extern struct pci_controller pci_controllers[TILEGX_NUM_TRIO * TILEGX_TRIO_PCIES];
+extern gxio_trio_context_t trio_contexts[TILEGX_NUM_TRIO];
+extern int num_trio_shims;
+
+extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
+
+/*
+ * The PCI address space does not equal the physical memory address
+ * space (we have an IOMMU). The IDE and SCSI device layers use this
+ * boolean for bounce buffer decisions.
+ */
+#define PCI_DMA_BUS_IS_PHYS 0
+
+#endif /* __tilegx__ */
+
+int __init tile_pci_init(void);
+int __init pcibios_init(void);
+
+void pcibios_fixup_bus(struct pci_bus *bus);
#define pci_domain_nr(bus) (((struct pci_controller *)(bus)->sysdata)->index)
@@ -77,13 +220,8 @@ static inline int pcibios_assign_all_busses(void)
}
#define PCIBIOS_MIN_MEM 0
-#define PCIBIOS_MIN_IO 0
-
-/*
- * This flag tells if the platform is TILEmpower that needs
- * special configuration for the PLX switch chip.
- */
-extern int tile_plx_gen1;
+/* Minimum PCI I/O address, starting at the page boundary. */
+#define PCIBIOS_MIN_IO PAGE_SIZE
/* Use any cpu for PCI. */
#define cpumask_of_pcibus(bus) cpu_online_mask
@@ -91,7 +229,4 @@ extern int tile_plx_gen1;
/* implement the pci_ DMA API in terms of the generic device dma_ one */
#include <asm-generic/pci-dma-compat.h>
-/* generic pci stuff */
-#include <asm-generic/pci.h>
-
#endif /* _ASM_TILE_PCI_H */
diff --git a/arch/tile/include/asm/percpu.h b/arch/tile/include/asm/percpu.h
index 63294f5a8ef..4f7ae39fa20 100644
--- a/arch/tile/include/asm/percpu.h
+++ b/arch/tile/include/asm/percpu.h
@@ -15,9 +15,37 @@
#ifndef _ASM_TILE_PERCPU_H
#define _ASM_TILE_PERCPU_H
-register unsigned long __my_cpu_offset __asm__("tp");
-#define __my_cpu_offset __my_cpu_offset
-#define set_my_cpu_offset(tp) (__my_cpu_offset = (tp))
+register unsigned long my_cpu_offset_reg asm("tp");
+
+#ifdef CONFIG_PREEMPT
+/*
+ * For full preemption, we can't just use the register variable
+ * directly, since we need barrier() to hazard against it, causing the
+ * compiler to reload anything computed from a previous "tp" value.
+ * But we also don't want to use volatile asm, since we'd like the
+ * compiler to be able to cache the value across multiple percpu reads.
+ * So we use a fake stack read as a hazard against barrier().
+ * The 'U' constraint is like 'm' but disallows postincrement.
+ */
+static inline unsigned long __my_cpu_offset(void)
+{
+ unsigned long tp;
+ register unsigned long *sp asm("sp");
+ asm("move %0, tp" : "=r" (tp) : "U" (*sp));
+ return tp;
+}
+#define __my_cpu_offset __my_cpu_offset()
+#else
+/*
+ * We don't need to hazard against barrier() since "tp" doesn't ever
+ * change with PREEMPT_NONE, and with PREEMPT_VOLUNTARY it only
+ * changes at function call points, at which we are already re-reading
+ * the value of "tp" due to "my_cpu_offset_reg" being a global variable.
+ */
+#define __my_cpu_offset my_cpu_offset_reg
+#endif
+
+#define set_my_cpu_offset(tp) (my_cpu_offset_reg = (tp))
#include <asm-generic/percpu.h>
diff --git a/arch/tile/include/asm/auxvec.h b/arch/tile/include/asm/perf_event.h
index 1d393edb064..59c5b164e5b 100644
--- a/arch/tile/include/asm/auxvec.h
+++ b/arch/tile/include/asm/perf_event.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ * Copyright 2014 Tilera Corporation. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -12,9 +12,11 @@
* more details.
*/
-#ifndef _ASM_TILE_AUXVEC_H
-#define _ASM_TILE_AUXVEC_H
+#ifndef _ASM_TILE_PERF_EVENT_H
+#define _ASM_TILE_PERF_EVENT_H
-/* No extensions to auxvec */
+#include <linux/percpu.h>
+DECLARE_PER_CPU(u64, perf_irqs);
-#endif /* _ASM_TILE_AUXVEC_H */
+unsigned long handle_syscall_link_address(void);
+#endif /* _ASM_TILE_PERF_EVENT_H */
diff --git a/arch/tile/include/asm/pgalloc.h b/arch/tile/include/asm/pgalloc.h
index e919c0bdc22..1b902508b66 100644
--- a/arch/tile/include/asm/pgalloc.h
+++ b/arch/tile/include/asm/pgalloc.h
@@ -19,24 +19,24 @@
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <asm/fixmap.h>
+#include <asm/page.h>
#include <hv/hypervisor.h>
/* Bits for the size of the second-level page table. */
-#define L2_KERNEL_PGTABLE_SHIFT \
- (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL + HV_LOG2_PTE_SIZE)
+#define L2_KERNEL_PGTABLE_SHIFT _HV_LOG2_L2_SIZE(HPAGE_SHIFT, PAGE_SHIFT)
+
+/* How big is a kernel L2 page table? */
+#define L2_KERNEL_PGTABLE_SIZE (1UL << L2_KERNEL_PGTABLE_SHIFT)
/* We currently allocate user L2 page tables by page (unlike kernel L2s). */
-#if L2_KERNEL_PGTABLE_SHIFT < HV_LOG2_PAGE_SIZE_SMALL
-#define L2_USER_PGTABLE_SHIFT HV_LOG2_PAGE_SIZE_SMALL
+#if L2_KERNEL_PGTABLE_SHIFT < PAGE_SHIFT
+#define L2_USER_PGTABLE_SHIFT PAGE_SHIFT
#else
#define L2_USER_PGTABLE_SHIFT L2_KERNEL_PGTABLE_SHIFT
#endif
/* How many pages do we need, as an "order", for a user L2 page table? */
-#define L2_USER_PGTABLE_ORDER (L2_USER_PGTABLE_SHIFT - HV_LOG2_PAGE_SIZE_SMALL)
-
-/* How big is a kernel L2 page table? */
-#define L2_KERNEL_PGTABLE_SIZE (1 << L2_KERNEL_PGTABLE_SHIFT)
+#define L2_USER_PGTABLE_ORDER (L2_USER_PGTABLE_SHIFT - PAGE_SHIFT)
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
@@ -50,14 +50,14 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
static inline void pmd_populate_kernel(struct mm_struct *mm,
pmd_t *pmd, pte_t *ptep)
{
- set_pmd(pmd, ptfn_pmd(__pa(ptep) >> HV_LOG2_PAGE_TABLE_ALIGN,
+ set_pmd(pmd, ptfn_pmd(HV_CPA_TO_PTFN(__pa(ptep)),
__pgprot(_PAGE_PRESENT)));
}
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pgtable_t page)
{
- set_pmd(pmd, ptfn_pmd(HV_PFN_TO_PTFN(page_to_pfn(page)),
+ set_pmd(pmd, ptfn_pmd(HV_CPA_TO_PTFN(PFN_PHYS(page_to_pfn(page))),
__pgprot(_PAGE_PRESENT)));
}
@@ -68,8 +68,20 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
extern pgd_t *pgd_alloc(struct mm_struct *mm);
extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address);
-extern void pte_free(struct mm_struct *mm, struct page *pte);
+extern pgtable_t pgtable_alloc_one(struct mm_struct *mm, unsigned long address,
+ int order);
+extern void pgtable_free(struct mm_struct *mm, struct page *pte, int order);
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
+ unsigned long address)
+{
+ return pgtable_alloc_one(mm, address, L2_USER_PGTABLE_ORDER);
+}
+
+static inline void pte_free(struct mm_struct *mm, struct page *pte)
+{
+ pgtable_free(mm, pte, L2_USER_PGTABLE_ORDER);
+}
#define pmd_pgtable(pmd) pmd_page(pmd)
@@ -85,8 +97,13 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
pte_free(mm, virt_to_page(pte));
}
-extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
- unsigned long address);
+extern void __pgtable_free_tlb(struct mmu_gather *tlb, struct page *pte,
+ unsigned long address, int order);
+static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
+ unsigned long address)
+{
+ __pgtable_free_tlb(tlb, pte, address, L2_USER_PGTABLE_ORDER);
+}
#define check_pgt_cache() do { } while (0)
@@ -104,19 +121,44 @@ void shatter_pmd(pmd_t *pmd);
void shatter_huge_page(unsigned long addr);
#ifdef __tilegx__
-/* We share a single page allocator for both L1 and L2 page tables. */
-#if HV_L1_SIZE != HV_L2_SIZE
-# error Rework assumption that L1 and L2 page tables are same size.
-#endif
-#define L1_USER_PGTABLE_ORDER L2_USER_PGTABLE_ORDER
+
#define pud_populate(mm, pud, pmd) \
pmd_populate_kernel((mm), (pmd_t *)(pud), (pte_t *)(pmd))
-#define pmd_alloc_one(mm, addr) \
- ((pmd_t *)page_to_virt(pte_alloc_one((mm), (addr))))
-#define pmd_free(mm, pmdp) \
- pte_free((mm), virt_to_page(pmdp))
-#define __pmd_free_tlb(tlb, pmdp, address) \
- __pte_free_tlb((tlb), virt_to_page(pmdp), (address))
+
+/* Bits for the size of the L1 (intermediate) page table. */
+#define L1_KERNEL_PGTABLE_SHIFT _HV_LOG2_L1_SIZE(HPAGE_SHIFT)
+
+/* How big is a kernel L2 page table? */
+#define L1_KERNEL_PGTABLE_SIZE (1UL << L1_KERNEL_PGTABLE_SHIFT)
+
+/* We currently allocate L1 page tables by page. */
+#if L1_KERNEL_PGTABLE_SHIFT < PAGE_SHIFT
+#define L1_USER_PGTABLE_SHIFT PAGE_SHIFT
+#else
+#define L1_USER_PGTABLE_SHIFT L1_KERNEL_PGTABLE_SHIFT
#endif
+/* How many pages do we need, as an "order", for an L1 page table? */
+#define L1_USER_PGTABLE_ORDER (L1_USER_PGTABLE_SHIFT - PAGE_SHIFT)
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+ struct page *p = pgtable_alloc_one(mm, address, L1_USER_PGTABLE_ORDER);
+ return (pmd_t *)page_to_virt(p);
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
+{
+ pgtable_free(mm, virt_to_page(pmdp), L1_USER_PGTABLE_ORDER);
+}
+
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
+ unsigned long address)
+{
+ __pgtable_free_tlb(tlb, virt_to_page(pmdp), address,
+ L1_USER_PGTABLE_ORDER);
+}
+
+#endif /* __tilegx__ */
+
#endif /* _ASM_TILE_PGALLOC_H */
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index 1a20b7ef8ea..33587f16c15 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -27,9 +27,10 @@
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/spinlock.h>
+#include <linux/pfn.h>
#include <asm/processor.h>
#include <asm/fixmap.h>
-#include <asm/system.h>
+#include <asm/page.h>
struct mm_struct;
struct vm_area_struct;
@@ -70,6 +71,7 @@ extern void set_page_homes(void);
#define _PAGE_PRESENT HV_PTE_PRESENT
#define _PAGE_HUGE_PAGE HV_PTE_PAGE
+#define _PAGE_SUPER_PAGE HV_PTE_SUPER
#define _PAGE_READABLE HV_PTE_READABLE
#define _PAGE_WRITABLE HV_PTE_WRITABLE
#define _PAGE_EXECUTABLE HV_PTE_EXECUTABLE
@@ -86,6 +88,7 @@ extern void set_page_homes(void);
#define _PAGE_ALL (\
_PAGE_PRESENT | \
_PAGE_HUGE_PAGE | \
+ _PAGE_SUPER_PAGE | \
_PAGE_READABLE | \
_PAGE_WRITABLE | \
_PAGE_EXECUTABLE | \
@@ -163,7 +166,7 @@ extern void set_page_homes(void);
(pgprot_t) { ((oldprot).val & ~_PAGE_ALL) | (newprot).val }
/* Just setting the PFN to zero suffices. */
-#define pte_pgprot(x) hv_pte_set_pfn((x), 0)
+#define pte_pgprot(x) hv_pte_set_pa((x), 0)
/*
* For PTEs and PDEs, we must clear the Present bit first when
@@ -188,6 +191,7 @@ static inline void __pte_clear(pte_t *ptep)
* Undefined behaviour if not..
*/
#define pte_present hv_pte_get_present
+#define pte_mknotpresent hv_pte_clear_present
#define pte_user hv_pte_get_user
#define pte_read hv_pte_get_readable
#define pte_dirty hv_pte_get_dirty
@@ -195,6 +199,7 @@ static inline void __pte_clear(pte_t *ptep)
#define pte_write hv_pte_get_writable
#define pte_exec hv_pte_get_executable
#define pte_huge hv_pte_get_page
+#define pte_super hv_pte_get_super
#define pte_rdprotect hv_pte_clear_readable
#define pte_exprotect hv_pte_clear_executable
#define pte_mkclean hv_pte_clear_dirty
@@ -207,6 +212,7 @@ static inline void __pte_clear(pte_t *ptep)
#define pte_mkyoung hv_pte_set_accessed
#define pte_mkwrite hv_pte_set_writable
#define pte_mkhuge hv_pte_set_page
+#define pte_mksuper hv_pte_set_super
#define pte_special(pte) 0
#define pte_mkspecial(pte) (pte)
@@ -262,7 +268,7 @@ static inline int pte_none(pte_t pte)
static inline unsigned long pte_pfn(pte_t pte)
{
- return hv_pte_get_pfn(pte);
+ return PFN_DOWN(hv_pte_get_pa(pte));
}
/* Set or get the remote cache cpu in a pgprot with remote caching. */
@@ -271,7 +277,7 @@ extern int get_remote_cache_cpu(pgprot_t prot);
static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
{
- return hv_pte_set_pfn(prot, pfn);
+ return hv_pte_set_pa(prot, PFN_PHYS(pfn));
}
/* Support for priority mappings. */
@@ -313,7 +319,7 @@ extern void check_mm_caching(struct mm_struct *prev, struct mm_struct *next);
*/
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
- return pfn_pte(hv_pte_get_pfn(pte), newprot);
+ return pfn_pte(pte_pfn(pte), newprot);
}
/*
@@ -336,13 +342,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
*/
#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-#if defined(CONFIG_HIGHPTE)
-extern pte_t *pte_offset_map(pmd_t *, unsigned long address);
-#define pte_unmap(pte) kunmap_atomic(pte)
-#else
#define pte_offset_map(dir, address) pte_offset_kernel(dir, address)
#define pte_unmap(pte) do { } while (0)
-#endif
/* Clear a non-executable kernel PTE and flush it from the TLB. */
#define kpte_clear_flush(ptep, vaddr) \
@@ -361,9 +362,6 @@ do { \
#define kern_addr_valid(addr) (1)
#endif /* CONFIG_FLATMEM */
-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
- remap_pfn_range(vma, vaddr, pfn, size, prot)
-
extern void vmalloc_sync_all(void);
#endif /* !__ASSEMBLY__ */
@@ -411,6 +409,46 @@ static inline unsigned long pmd_index(unsigned long address)
return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
}
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address,
+ pmd_t *pmdp)
+{
+ return ptep_test_and_clear_young(vma, address, pmdp_ptep(pmdp));
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
+{
+ ptep_set_wrprotect(mm, address, pmdp_ptep(pmdp));
+}
+
+
+#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
+static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm,
+ unsigned long address,
+ pmd_t *pmdp)
+{
+ return pte_pmd(ptep_get_and_clear(mm, address, pmdp_ptep(pmdp)));
+}
+
+static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval)
+{
+ set_pte(pmdp_ptep(pmdp), pmd_pte(pmdval));
+}
+
+#define set_pmd_at(mm, addr, pmdp, pmdval) __set_pmd(pmdp, pmdval)
+
+/* Create a pmd from a PTFN. */
+static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
+{
+ return pte_pmd(hv_pte_set_ptfn(prot, ptfn));
+}
+
+/* Return the page-table frame number (ptfn) that a pmd_t points at. */
+#define pmd_ptfn(pmd) hv_pte_get_ptfn(pmd_pte(pmd))
+
/*
* A given kernel pmd_t maps to a specific virtual address (either a
* kernel huge page or a kernel pte_t table). Since kernel pte_t
@@ -431,7 +469,48 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
* OK for pte_lockptr(), since we just end up with potentially one
* lock being used for several pte_t arrays.
*/
-#define pmd_page(pmd) pfn_to_page(HV_PTFN_TO_PFN(pmd_ptfn(pmd)))
+#define pmd_page(pmd) pfn_to_page(PFN_DOWN(HV_PTFN_TO_CPA(pmd_ptfn(pmd))))
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+ __pte_clear(pmdp_ptep(pmdp));
+}
+
+#define pmd_mknotpresent(pmd) pte_pmd(pte_mknotpresent(pmd_pte(pmd)))
+#define pmd_young(pmd) pte_young(pmd_pte(pmd))
+#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
+#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#define pmd_write(pmd) pte_write(pmd_pte(pmd))
+#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
+#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+#define pmd_huge_page(pmd) pte_huge(pmd_pte(pmd))
+#define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd)))
+#define __HAVE_ARCH_PMD_WRITE
+
+#define pfn_pmd(pfn, pgprot) pte_pmd(pfn_pte((pfn), (pgprot)))
+#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))
+#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
+
+static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ return pfn_pmd(pmd_pfn(pmd), newprot);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define has_transparent_hugepage() 1
+#define pmd_trans_huge pmd_huge_page
+
+static inline pmd_t pmd_mksplitting(pmd_t pmd)
+{
+ return pte_pmd(hv_pte_set_client2(pmd_pte(pmd)));
+}
+
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+ return hv_pte_get_client2(pmd_pte(pmd));
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
* The pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
@@ -449,17 +528,13 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
}
-static inline int pmd_huge_page(pmd_t pmd)
-{
- return pmd_val(pmd) & _PAGE_HUGE_PAGE;
-}
-
#include <asm-generic/pgtable.h>
/* Support /proc/NN/pgtable API. */
struct seq_file;
int arch_proc_pgtable_show(struct seq_file *m, struct mm_struct *mm,
- unsigned long vaddr, pte_t *ptep, void **datap);
+ unsigned long vaddr, unsigned long pagesize,
+ pte_t *ptep, void **datap);
#endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h
index 9f98529761f..d26a4227903 100644
--- a/arch/tile/include/asm/pgtable_32.h
+++ b/arch/tile/include/asm/pgtable_32.h
@@ -20,11 +20,12 @@
* The level-1 index is defined by the huge page size. A PGD is composed
* of PTRS_PER_PGD pgd_t's and is the top level of the page table.
*/
-#define PGDIR_SHIFT HV_LOG2_PAGE_SIZE_LARGE
-#define PGDIR_SIZE HV_PAGE_SIZE_LARGE
+#define PGDIR_SHIFT HPAGE_SHIFT
+#define PGDIR_SIZE HPAGE_SIZE
#define PGDIR_MASK (~(PGDIR_SIZE-1))
-#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT))
-#define SIZEOF_PGD (PTRS_PER_PGD * sizeof(pgd_t))
+#define PTRS_PER_PGD _HV_L1_ENTRIES(HPAGE_SHIFT)
+#define PGD_INDEX(va) _HV_L1_INDEX(va, HPAGE_SHIFT)
+#define SIZEOF_PGD _HV_L1_SIZE(HPAGE_SHIFT)
/*
* The level-2 index is defined by the difference between the huge
@@ -33,8 +34,9 @@
* Note that the hypervisor docs use PTE for what we call pte_t, so
* this nomenclature is somewhat confusing.
*/
-#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL))
-#define SIZEOF_PTE (PTRS_PER_PTE * sizeof(pte_t))
+#define PTRS_PER_PTE _HV_L2_ENTRIES(HPAGE_SHIFT, PAGE_SHIFT)
+#define PTE_INDEX(va) _HV_L2_INDEX(va, HPAGE_SHIFT, PAGE_SHIFT)
+#define SIZEOF_PTE _HV_L2_SIZE(HPAGE_SHIFT, PAGE_SHIFT)
#ifndef __ASSEMBLY__
@@ -53,17 +55,9 @@
#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE*LAST_PKMAP) & PGDIR_MASK)
#ifdef CONFIG_HIGHMEM
-# define __VMAPPING_END (PKMAP_BASE & ~(HPAGE_SIZE-1))
+# define _VMALLOC_END (PKMAP_BASE & ~(HPAGE_SIZE-1))
#else
-# define __VMAPPING_END (FIXADDR_START & ~(HPAGE_SIZE-1))
-#endif
-
-#ifdef CONFIG_HUGEVMAP
-#define HUGE_VMAP_END __VMAPPING_END
-#define HUGE_VMAP_BASE (HUGE_VMAP_END - CONFIG_NR_HUGE_VMAPS * HPAGE_SIZE)
-#define _VMALLOC_END HUGE_VMAP_BASE
-#else
-#define _VMALLOC_END __VMAPPING_END
+# define _VMALLOC_END (FIXADDR_START & ~(HPAGE_SIZE-1))
#endif
/*
@@ -82,10 +76,12 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */;
/* We have no pmd or pud since we are strictly a two-level page table */
#include <asm-generic/pgtable-nopmd.h>
+static inline int pud_huge_page(pud_t pud) { return 0; }
+
/* We don't define any pgds for these addresses. */
static inline int pgd_addr_invalid(unsigned long addr)
{
- return addr >= MEM_HV_INTRPT;
+ return addr >= MEM_HV_START;
}
/*
@@ -111,24 +107,14 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
return pte;
}
-static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
- set_pte(&pmdp->pud.pgd, pmdval.pud.pgd);
-}
-
-/* Create a pmd from a PTFN. */
-static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
-{
- return (pmd_t){ { hv_pte_set_ptfn(prot, ptfn) } };
-}
-
-/* Return the page-table frame number (ptfn) that a pmd_t points at. */
-#define pmd_ptfn(pmd) hv_pte_get_ptfn((pmd).pud.pgd)
-
-static inline void pmd_clear(pmd_t *pmdp)
-{
- __pte_clear(&pmdp->pud.pgd);
-}
+/*
+ * pmds are wrappers around pgds, which are the same as ptes.
+ * It's often convenient to "cast" back and forth and use the pte methods,
+ * which are the methods supplied by the hypervisor.
+ */
+#define pmd_pte(pmd) ((pmd).pud.pgd)
+#define pmdp_ptep(pmdp) (&(pmdp)->pud.pgd)
+#define pte_pmd(pte) ((pmd_t){ { (pte) } })
#endif /* __ASSEMBLY__ */
diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h
index fd80328523b..2c8a9cd102d 100644
--- a/arch/tile/include/asm/pgtable_64.h
+++ b/arch/tile/include/asm/pgtable_64.h
@@ -21,17 +21,19 @@
#define PGDIR_SIZE HV_L1_SPAN
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define PTRS_PER_PGD HV_L0_ENTRIES
-#define SIZEOF_PGD (PTRS_PER_PGD * sizeof(pgd_t))
+#define PGD_INDEX(va) HV_L0_INDEX(va)
+#define SIZEOF_PGD HV_L0_SIZE
/*
* The level-1 index is defined by the huge page size. A PMD is composed
* of PTRS_PER_PMD pgd_t's and is the middle level of the page table.
*/
-#define PMD_SHIFT HV_LOG2_PAGE_SIZE_LARGE
-#define PMD_SIZE HV_PAGE_SIZE_LARGE
+#define PMD_SHIFT HPAGE_SHIFT
+#define PMD_SIZE HPAGE_SIZE
#define PMD_MASK (~(PMD_SIZE-1))
-#define PTRS_PER_PMD (1 << (PGDIR_SHIFT - PMD_SHIFT))
-#define SIZEOF_PMD (PTRS_PER_PMD * sizeof(pmd_t))
+#define PTRS_PER_PMD _HV_L1_ENTRIES(HPAGE_SHIFT)
+#define PMD_INDEX(va) _HV_L1_INDEX(va, HPAGE_SHIFT)
+#define SIZEOF_PMD _HV_L1_SIZE(HPAGE_SHIFT)
/*
* The level-2 index is defined by the difference between the huge
@@ -40,25 +42,34 @@
* Note that the hypervisor docs use PTE for what we call pte_t, so
* this nomenclature is somewhat confusing.
*/
-#define PTRS_PER_PTE (1 << (HV_LOG2_PAGE_SIZE_LARGE - HV_LOG2_PAGE_SIZE_SMALL))
-#define SIZEOF_PTE (PTRS_PER_PTE * sizeof(pte_t))
+#define PTRS_PER_PTE _HV_L2_ENTRIES(HPAGE_SHIFT, PAGE_SHIFT)
+#define PTE_INDEX(va) _HV_L2_INDEX(va, HPAGE_SHIFT, PAGE_SHIFT)
+#define SIZEOF_PTE _HV_L2_SIZE(HPAGE_SHIFT, PAGE_SHIFT)
/*
- * Align the vmalloc area to an L2 page table, and leave a guard page
- * at the beginning and end. The vmalloc code also puts in an internal
+ * Align the vmalloc area to an L2 page table. Omit guard pages at
+ * the beginning and end for simplicity (particularly in the per-cpu
+ * memory allocation code). The vmalloc code puts in an internal
* guard page between each allocation.
*/
-#define _VMALLOC_END HUGE_VMAP_BASE
-#define VMALLOC_END (_VMALLOC_END - PAGE_SIZE)
-#define VMALLOC_START (_VMALLOC_START + PAGE_SIZE)
-
-#define HUGE_VMAP_END (HUGE_VMAP_BASE + PGDIR_SIZE)
+#define _VMALLOC_END MEM_SV_START
+#define VMALLOC_END _VMALLOC_END
+#define VMALLOC_START _VMALLOC_START
#ifndef __ASSEMBLY__
/* We have no pud since we are a three-level page table. */
#include <asm-generic/pgtable-nopud.h>
+/*
+ * pmds are the same as pgds and ptes, so converting is a no-op.
+ */
+#define pmd_pte(pmd) (pmd)
+#define pmdp_ptep(pmdp) (pmdp)
+#define pte_pmd(pte) (pte)
+
+#define pud_pte(pud) ((pud).pgd)
+
static inline int pud_none(pud_t pud)
{
return pud_val(pud) == 0;
@@ -69,6 +80,11 @@ static inline int pud_present(pud_t pud)
return pud_val(pud) & _PAGE_PRESENT;
}
+static inline int pud_huge_page(pud_t pud)
+{
+ return pud_val(pud) & _PAGE_HUGE_PAGE;
+}
+
#define pmd_ERROR(e) \
pr_err("%s:%d: bad pmd 0x%016llx.\n", __FILE__, __LINE__, pmd_val(e))
@@ -85,6 +101,9 @@ static inline int pud_bad(pud_t pud)
/* Return the page-table frame number (ptfn) that a pud_t points at. */
#define pud_ptfn(pud) hv_pte_get_ptfn((pud).pgd)
+/* Return the page frame number (pfn) that a pud_t points at. */
+#define pud_pfn(pud) pte_pfn(pud_pte(pud))
+
/*
* A given kernel pud_t maps to a kernel pmd_t table at a specific
* virtual address. Since kernel pmd_t tables can be aligned at
@@ -98,7 +117,7 @@ static inline int pud_bad(pud_t pud)
* A pud_t points to a pmd_t array. Since we can have multiple per
* page, we don't have a one-to-one mapping of pud_t's to pages.
*/
-#define pud_page(pud) pfn_to_page(HV_PTFN_TO_PFN(pud_ptfn(pud)))
+#define pud_page(pud) pfn_to_page(PFN_DOWN(HV_PTFN_TO_CPA(pud_ptfn(pud))))
static inline unsigned long pud_index(unsigned long address)
{
@@ -108,28 +127,6 @@ static inline unsigned long pud_index(unsigned long address)
#define pmd_offset(pud, address) \
((pmd_t *)pud_page_vaddr(*(pud)) + pmd_index(address))
-static inline void __set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
- set_pte(pmdp, pmdval);
-}
-
-/* Create a pmd from a PTFN and pgprot. */
-static inline pmd_t ptfn_pmd(unsigned long ptfn, pgprot_t prot)
-{
- return hv_pte_set_ptfn(prot, ptfn);
-}
-
-/* Return the page-table frame number (ptfn) that a pmd_t points at. */
-static inline unsigned long pmd_ptfn(pmd_t pmd)
-{
- return hv_pte_get_ptfn(pmd);
-}
-
-static inline void pmd_clear(pmd_t *pmdp)
-{
- __pte_clear(pmdp);
-}
-
/* Normalize an address to having the correct high bits set. */
#define pgd_addr_normalize pgd_addr_normalize
static inline unsigned long pgd_addr_normalize(unsigned long addr)
@@ -141,8 +138,7 @@ static inline unsigned long pgd_addr_normalize(unsigned long addr)
/* We don't define any pgds for these addresses. */
static inline int pgd_addr_invalid(unsigned long addr)
{
- return addr >= MEM_HV_START ||
- (addr > MEM_LOW_END && addr < MEM_HIGH_START);
+ return addr >= KERNEL_HIGH_VADDR || addr != pgd_addr_normalize(addr);
}
/*
diff --git a/arch/tile/include/asm/pmc.h b/arch/tile/include/asm/pmc.h
new file mode 100644
index 00000000000..7ae3956d900
--- /dev/null
+++ b/arch/tile/include/asm/pmc.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2014 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_PMC_H
+#define _ASM_TILE_PMC_H
+
+#include <linux/ptrace.h>
+
+#define TILE_BASE_COUNTERS 2
+
+/* Bitfields below are derived from SPR PERF_COUNT_CTL*/
+#ifndef __tilegx__
+/* PERF_COUNT_CTL on TILEPro */
+#define TILE_CTL_EXCL_USER (1 << 7) /* exclude user level */
+#define TILE_CTL_EXCL_KERNEL (1 << 8) /* exclude kernel level */
+#define TILE_CTL_EXCL_HV (1 << 9) /* exclude hypervisor level */
+
+#define TILE_SEL_MASK 0x7f /* 7 bits for event SEL,
+ COUNT_0_SEL */
+#define TILE_PLM_MASK 0x780 /* 4 bits priv level msks,
+ COUNT_0_MASK*/
+#define TILE_EVENT_MASK (TILE_SEL_MASK | TILE_PLM_MASK)
+
+#else /* __tilegx__*/
+/* PERF_COUNT_CTL on TILEGx*/
+#define TILE_CTL_EXCL_USER (1 << 10) /* exclude user level */
+#define TILE_CTL_EXCL_KERNEL (1 << 11) /* exclude kernel level */
+#define TILE_CTL_EXCL_HV (1 << 12) /* exclude hypervisor level */
+
+#define TILE_SEL_MASK 0x3f /* 6 bits for event SEL,
+ COUNT_0_SEL*/
+#define TILE_BOX_MASK 0x1c0 /* 3 bits box msks,
+ COUNT_0_BOX */
+#define TILE_PLM_MASK 0x3c00 /* 4 bits priv level msks,
+ COUNT_0_MASK */
+#define TILE_EVENT_MASK (TILE_SEL_MASK | TILE_BOX_MASK | TILE_PLM_MASK)
+#endif /* __tilegx__*/
+
+/* Takes register and fault number. Returns error to disable the interrupt. */
+typedef int (*perf_irq_t)(struct pt_regs *, int);
+
+int userspace_perf_handler(struct pt_regs *regs, int fault);
+
+perf_irq_t reserve_pmc_hardware(perf_irq_t new_perf_irq);
+void release_pmc_hardware(void);
+
+unsigned long pmc_get_overflow(void);
+void pmc_ack_overflow(unsigned long status);
+
+void unmask_pmc_interrupts(void);
+void mask_pmc_interrupts(void);
+
+#endif /* _ASM_TILE_PMC_H */
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 34c1e01ffb5..42323636c45 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -15,6 +15,8 @@
#ifndef _ASM_TILE_PROCESSOR_H
#define _ASM_TILE_PROCESSOR_H
+#include <arch/chip.h>
+
#ifndef __ASSEMBLY__
/*
@@ -25,7 +27,6 @@
#include <asm/ptrace.h>
#include <asm/percpu.h>
-#include <arch/chip.h>
#include <arch/spr_def.h>
struct task_struct;
@@ -76,6 +77,17 @@ struct async_tlb {
#ifdef CONFIG_HARDWALL
struct hardwall_info;
+struct hardwall_task {
+ /* Which hardwall is this task tied to? (or NULL if none) */
+ struct hardwall_info *info;
+ /* Chains this task into the list at info->task_head. */
+ struct list_head list;
+};
+#ifdef __tilepro__
+#define HARDWALL_TYPES 1 /* udn */
+#else
+#define HARDWALL_TYPES 3 /* udn, idn, and ipi */
+#endif
#endif
struct thread_struct {
@@ -99,47 +111,38 @@ struct thread_struct {
unsigned long long interrupt_mask;
/* User interrupt-control 0 state */
unsigned long intctrl_0;
-#if CHIP_HAS_PROC_STATUS_SPR()
+ /* Is this task currently doing a backtrace? */
+ bool in_backtrace;
/* Any other miscellaneous processor state bits */
unsigned long proc_status;
-#endif
#if !CHIP_HAS_FIXED_INTVEC_BASE()
/* Interrupt base for PL0 interrupts */
unsigned long interrupt_vector_base;
#endif
-#if CHIP_HAS_TILE_RTF_HWM()
/* Tile cache retry fifo high-water mark */
unsigned long tile_rtf_hwm;
-#endif
#if CHIP_HAS_DSTREAM_PF()
/* Data stream prefetch control */
unsigned long dstream_pf;
#endif
#ifdef CONFIG_HARDWALL
- /* Is this task tied to an activated hardwall? */
- struct hardwall_info *hardwall;
- /* Chains this task into the list at hardwall->list. */
- struct list_head hardwall_list;
+ /* Hardwall information for various resources. */
+ struct hardwall_task hardwall[HARDWALL_TYPES];
#endif
#if CHIP_HAS_TILE_DMA()
/* Async DMA TLB fault information */
struct async_tlb dma_async_tlb;
#endif
-#if CHIP_HAS_SN_PROC()
- /* Was static network processor when we were switched out? */
- int sn_proc_running;
- /* Async SNI TLB fault information */
- struct async_tlb sn_async_tlb;
-#endif
};
#endif /* !__ASSEMBLY__ */
/*
* Start with "sp" this many bytes below the top of the kernel stack.
- * This preserves the invariant that a called function may write to *sp.
+ * This allows us to be cache-aware when handling the initial save
+ * of the pt_regs value to the stack.
*/
-#define STACK_TOP_DELTA 8
+#define STACK_TOP_DELTA 64
/*
* When entering the kernel via a fault, start with the top of the
@@ -155,7 +158,7 @@ struct thread_struct {
#ifndef __ASSEMBLY__
#ifdef __tilegx__
-#define TASK_SIZE_MAX (MEM_LOW_END + 1)
+#define TASK_SIZE_MAX (_AC(1, UL) << (MAX_VA_WIDTH - 1))
#else
#define TASK_SIZE_MAX PAGE_OFFSET
#endif
@@ -169,10 +172,10 @@ struct thread_struct {
#define TASK_SIZE TASK_SIZE_MAX
#endif
-/* We provide a minimal "vdso" a la x86; just the sigreturn code for now. */
-#define VDSO_BASE (TASK_SIZE - PAGE_SIZE)
+#define VDSO_BASE ((unsigned long)current->active_mm->context.vdso_base)
+#define VDSO_SYM(x) (VDSO_BASE + (unsigned long)(x))
-#define STACK_TOP VDSO_BASE
+#define STACK_TOP TASK_SIZE
/* STACK_TOP_MAX is used temporarily in execve and should not check COMPAT. */
#define STACK_TOP_MAX TASK_SIZE_MAX
@@ -202,6 +205,7 @@ static inline void start_thread(struct pt_regs *regs,
{
regs->pc = pc;
regs->sp = usp;
+ single_step_execve();
}
/* Free all resources held by a thread. */
@@ -210,35 +214,40 @@ static inline void release_thread(struct task_struct *dead_task)
/* Nothing for now */
}
-/* Prepare to copy thread state - unlazy all lazy status. */
-#define prepare_to_copy(tsk) do { } while (0)
-
-extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-
extern int do_work_pending(struct pt_regs *regs, u32 flags);
/*
* Return saved (kernel) PC of a blocked thread.
- * Only used in a printk() in kernel/sched.c, so don't work too hard.
+ * Only used in a printk() in kernel/sched/core.c, so don't work too hard.
*/
#define thread_saved_pc(t) ((t)->thread.pc)
unsigned long get_wchan(struct task_struct *p);
/* Return initial ksp value for given task. */
-#define task_ksp0(task) ((unsigned long)(task)->stack + THREAD_SIZE)
+#define task_ksp0(task) \
+ ((unsigned long)(task)->stack + THREAD_SIZE - STACK_TOP_DELTA)
/* Return some info about the user process TASK. */
-#define KSTK_TOP(task) (task_ksp0(task) - STACK_TOP_DELTA)
#define task_pt_regs(task) \
- ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1)
+ ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1)
+#define current_pt_regs() \
+ ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \
+ STACK_TOP_DELTA - (KSTK_PTREGS_GAP - 1)) - 1)
#define task_sp(task) (task_pt_regs(task)->sp)
#define task_pc(task) (task_pt_regs(task)->pc)
/* Aliases for pc and sp (used in fs/proc/array.c) */
#define KSTK_EIP(task) task_pc(task)
#define KSTK_ESP(task) task_sp(task)
+/* Fine-grained unaligned JIT support */
+#define GET_UNALIGN_CTL(tsk, adr) get_unalign_ctl((tsk), (adr))
+#define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val))
+
+extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr);
+extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
+
/* Standard format for printing registers and other word-size data. */
#ifdef __tilegx__
# define REGFMT "0x%016lx"
@@ -267,7 +276,6 @@ extern char chip_model[64];
/* Data on which physical memory controller corresponds to which NUMA node. */
extern int node_controller[];
-#if CHIP_HAS_CBOX_HOME_MAP()
/* Does the heap allocator return hash-for-home pages by default? */
extern int hash_default;
@@ -277,11 +285,6 @@ extern int kstack_hash;
/* Does MAP_ANONYMOUS return hash-for-home pages by default? */
#define uheap_hash hash_default
-#else
-#define hash_default 0
-#define kstack_hash 0
-#define uheap_hash 0
-#endif
/* Are we using huge pages in the TLB for kernel data? */
extern int kdata_huge;
@@ -329,7 +332,6 @@ extern int kdata_huge;
/*
* Provide symbolic constants for PLs.
- * Note that assembly code assumes that USER_PL is zero.
*/
#define USER_PL 0
#if CONFIG_KERNEL_PL == 2
@@ -338,20 +340,38 @@ extern int kdata_huge;
#define KERNEL_PL CONFIG_KERNEL_PL
/* SYSTEM_SAVE_K_0 holds the current cpu number ORed with ksp0. */
-#define CPU_LOG_MASK_VALUE 12
-#define CPU_MASK_VALUE ((1 << CPU_LOG_MASK_VALUE) - 1)
-#if CONFIG_NR_CPUS > CPU_MASK_VALUE
-# error Too many cpus!
+#ifdef __tilegx__
+#define CPU_SHIFT 48
+#if CHIP_VA_WIDTH() > CPU_SHIFT
+# error Too many VA bits!
#endif
+#define MAX_CPU_ID ((1 << (64 - CPU_SHIFT)) - 1)
#define raw_smp_processor_id() \
- ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & CPU_MASK_VALUE)
+ ((int)(__insn_mfspr(SPR_SYSTEM_SAVE_K_0) >> CPU_SHIFT))
#define get_current_ksp0() \
- (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~CPU_MASK_VALUE)
+ ((unsigned long)(((long)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) << \
+ (64 - CPU_SHIFT)) >> (64 - CPU_SHIFT)))
+#define next_current_ksp0(task) ({ \
+ unsigned long __ksp0 = task_ksp0(task) & ((1UL << CPU_SHIFT) - 1); \
+ unsigned long __cpu = (long)raw_smp_processor_id() << CPU_SHIFT; \
+ __ksp0 | __cpu; \
+})
+#else
+#define LOG2_NR_CPU_IDS 6
+#define MAX_CPU_ID ((1 << LOG2_NR_CPU_IDS) - 1)
+#define raw_smp_processor_id() \
+ ((int)__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & MAX_CPU_ID)
+#define get_current_ksp0() \
+ (__insn_mfspr(SPR_SYSTEM_SAVE_K_0) & ~MAX_CPU_ID)
#define next_current_ksp0(task) ({ \
unsigned long __ksp0 = task_ksp0(task); \
int __cpu = raw_smp_processor_id(); \
- BUG_ON(__ksp0 & CPU_MASK_VALUE); \
+ BUG_ON(__ksp0 & MAX_CPU_ID); \
__ksp0 | __cpu; \
})
+#endif
+#if CONFIG_NR_CPUS > (MAX_CPU_ID + 1)
+# error Too many cpus!
+#endif
#endif /* _ASM_TILE_PROCESSOR_H */
diff --git a/arch/tile/include/asm/ptrace.h b/arch/tile/include/asm/ptrace.h
index c6cddd7e8d5..b9620c077ab 100644
--- a/arch/tile/include/asm/ptrace.h
+++ b/arch/tile/include/asm/ptrace.h
@@ -11,87 +11,20 @@
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
-
#ifndef _ASM_TILE_PTRACE_H
#define _ASM_TILE_PTRACE_H
-#include <arch/chip.h>
-#include <arch/abi.h>
-
-/* These must match struct pt_regs, below. */
-#if CHIP_WORD_SIZE() == 32
-#define PTREGS_OFFSET_REG(n) ((n)*4)
-#else
-#define PTREGS_OFFSET_REG(n) ((n)*8)
-#endif
-#define PTREGS_OFFSET_BASE 0
-#define PTREGS_OFFSET_TP PTREGS_OFFSET_REG(53)
-#define PTREGS_OFFSET_SP PTREGS_OFFSET_REG(54)
-#define PTREGS_OFFSET_LR PTREGS_OFFSET_REG(55)
-#define PTREGS_NR_GPRS 56
-#define PTREGS_OFFSET_PC PTREGS_OFFSET_REG(56)
-#define PTREGS_OFFSET_EX1 PTREGS_OFFSET_REG(57)
-#define PTREGS_OFFSET_FAULTNUM PTREGS_OFFSET_REG(58)
-#define PTREGS_OFFSET_ORIG_R0 PTREGS_OFFSET_REG(59)
-#define PTREGS_OFFSET_FLAGS PTREGS_OFFSET_REG(60)
-#if CHIP_HAS_CMPEXCH()
-#define PTREGS_OFFSET_CMPEXCH PTREGS_OFFSET_REG(61)
-#endif
-#define PTREGS_SIZE PTREGS_OFFSET_REG(64)
+#include <linux/compiler.h>
#ifndef __ASSEMBLY__
-
-#ifdef __KERNEL__
/* Benefit from consistent use of "long" on all chips. */
typedef unsigned long pt_reg_t;
-#else
-/* Provide appropriate length type to userspace regardless of -m32/-m64. */
-typedef uint_reg_t pt_reg_t;
-#endif
-
-/*
- * This struct defines the way the registers are stored on the stack during a
- * system call or exception. "struct sigcontext" has the same shape.
- */
-struct pt_regs {
- /* Saved main processor registers; 56..63 are special. */
- /* tp, sp, and lr must immediately follow regs[] for aliasing. */
- pt_reg_t regs[53];
- pt_reg_t tp; /* aliases regs[TREG_TP] */
- pt_reg_t sp; /* aliases regs[TREG_SP] */
- pt_reg_t lr; /* aliases regs[TREG_LR] */
-
- /* Saved special registers. */
- pt_reg_t pc; /* stored in EX_CONTEXT_K_0 */
- pt_reg_t ex1; /* stored in EX_CONTEXT_K_1 (PL and ICS bit) */
- pt_reg_t faultnum; /* fault number (INT_SWINT_1 for syscall) */
- pt_reg_t orig_r0; /* r0 at syscall entry, else zero */
- pt_reg_t flags; /* flags (see below) */
-#if !CHIP_HAS_CMPEXCH()
- pt_reg_t pad[3];
-#else
- pt_reg_t cmpexch; /* value of CMPEXCH_VALUE SPR at interrupt */
- pt_reg_t pad[2];
#endif
-};
-
-#endif /* __ASSEMBLY__ */
-#define PTRACE_GETREGS 12
-#define PTRACE_SETREGS 13
-#define PTRACE_GETFPREGS 14
-#define PTRACE_SETFPREGS 15
+#include <uapi/asm/ptrace.h>
-/* Support TILE-specific ptrace options, with events starting at 16. */
-#define PTRACE_O_TRACEMIGRATE 0x00010000
-#define PTRACE_EVENT_MIGRATE 16
-#ifdef __KERNEL__
#define PTRACE_O_MASK_TILE (PTRACE_O_TRACEMIGRATE)
-#define PT_TRACE_MIGRATE 0x00080000
-#define PT_TRACE_MASK_TILE (PT_TRACE_MIGRATE)
-#endif
-
-#ifdef __KERNEL__
+#define PT_TRACE_MIGRATE PT_EVENT_FLAG(PTRACE_EVENT_MIGRATE)
/* Flag bits in pt_regs.flags */
#define PT_FLAGS_DISABLE_IRQ 1 /* on return to kernel, disable irqs */
@@ -100,17 +33,20 @@ struct pt_regs {
#ifndef __ASSEMBLY__
+#define regs_return_value(regs) ((regs)->regs[0])
#define instruction_pointer(regs) ((regs)->pc)
#define profile_pc(regs) instruction_pointer(regs)
+#define user_stack_pointer(regs) ((regs)->sp)
/* Does the process account for user or for system time? */
-#define user_mode(regs) (EX1_PL((regs)->ex1) == USER_PL)
+#define user_mode(regs) (EX1_PL((regs)->ex1) < KERNEL_PL)
/* Fill in a struct pt_regs with the current kernel registers. */
struct pt_regs *get_pt_regs(struct pt_regs *);
/* Trace the current syscall. */
-extern void do_syscall_trace(void);
+extern int do_syscall_trace_enter(struct pt_regs *regs);
+extern void do_syscall_trace_exit(struct pt_regs *regs);
#define arch_has_single_step() (1)
@@ -144,8 +80,7 @@ extern void single_step_execve(void);
struct task_struct;
-extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
- int error_code);
+extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs);
#ifdef __tilegx__
/* We need this since sigval_t has a user pointer in it, for GETSIGINFO etc. */
@@ -159,6 +94,4 @@ extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
#define SINGLESTEP_STATE_TARGET_LB 2
#define SINGLESTEP_STATE_TARGET_UB 7
-#endif /* !__KERNEL__ */
-
#endif /* _ASM_TILE_PTRACE_H */
diff --git a/arch/tile/include/asm/sections.h b/arch/tile/include/asm/sections.h
index d062d463fca..5d5d3b739a6 100644
--- a/arch/tile/include/asm/sections.h
+++ b/arch/tile/include/asm/sections.h
@@ -25,16 +25,22 @@ extern char _sinitdata[], _einitdata[];
/* Write-once data is writable only till the end of initialization. */
extern char __w1data_begin[], __w1data_end[];
+extern char vdso_start[], vdso_end[];
+#ifdef CONFIG_COMPAT
+extern char vdso32_start[], vdso32_end[];
+#endif
/* Not exactly sections, but PC comparison points in the code. */
extern char __rt_sigreturn[], __rt_sigreturn_end[];
-#ifndef __tilegx__
+#ifdef __tilegx__
+extern char __start_unalign_asm_code[], __end_unalign_asm_code[];
+#else
extern char sys_cmpxchg[], __sys_cmpxchg_end[];
extern char __sys_cmpxchg_grab_lock[];
extern char __start_atomic_asm_code[], __end_atomic_asm_code[];
#endif
-/* Handle the discontiguity between _sdata and _stext. */
+/* Handle the discontiguity between _sdata and _text. */
static inline int arch_is_kernel_data(unsigned long addr)
{
return addr >= (unsigned long)_sdata &&
diff --git a/arch/tile/include/asm/setup.h b/arch/tile/include/asm/setup.h
index 7caf0f36b03..e98909033e5 100644
--- a/arch/tile/include/asm/setup.h
+++ b/arch/tile/include/asm/setup.h
@@ -11,26 +11,42 @@
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
-
#ifndef _ASM_TILE_SETUP_H
#define _ASM_TILE_SETUP_H
-#define COMMAND_LINE_SIZE 2048
-
-#ifdef __KERNEL__
#include <linux/pfn.h>
#include <linux/init.h>
+#include <uapi/asm/setup.h>
/*
* Reserved space for vmalloc and iomap - defined in asm/page.h
*/
#define MAXMEM_PFN PFN_DOWN(MAXMEM)
+int tile_console_write(const char *buf, int count);
void early_panic(const char *fmt, ...);
-void warn_early_printk(void);
-void __init disable_early_printk(void);
-#endif /* __KERNEL__ */
+/* Init-time routine to do tile-specific per-cpu setup. */
+void setup_cpu(int boot);
+
+/* User-level DMA management functions */
+void grant_dma_mpls(void);
+void restrict_dma_mpls(void);
+
+#ifdef CONFIG_HARDWALL
+/* User-level network management functions */
+void reset_network_state(void);
+struct task_struct;
+void hardwall_switch_tasks(struct task_struct *prev, struct task_struct *next);
+void hardwall_deactivate_all(struct task_struct *task);
+int hardwall_ipi_valid(int cpu);
+
+/* Hook hardwall code into changes in affinity. */
+#define arch_set_cpus_allowed(p, new_mask) do { \
+ if (!cpumask_equal(&p->cpus_allowed, new_mask)) \
+ hardwall_deactivate_all(p); \
+} while (0)
+#endif
#endif /* _ASM_TILE_SETUP_H */
diff --git a/arch/tile/include/asm/sigcontext.h b/arch/tile/include/asm/sigcontext.h
deleted file mode 100644
index 6348e59d372..00000000000
--- a/arch/tile/include/asm/sigcontext.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- */
-
-#ifndef _ASM_TILE_SIGCONTEXT_H
-#define _ASM_TILE_SIGCONTEXT_H
-
-/* Don't pollute the namespace since <signal.h> includes this file. */
-#define __need_int_reg_t
-#include <arch/abi.h>
-
-/*
- * struct sigcontext has the same shape as struct pt_regs,
- * but is simplified since we know the fault is from userspace.
- */
-struct sigcontext {
- __uint_reg_t gregs[53]; /* General-purpose registers. */
- __uint_reg_t tp; /* Aliases gregs[TREG_TP]. */
- __uint_reg_t sp; /* Aliases gregs[TREG_SP]. */
- __uint_reg_t lr; /* Aliases gregs[TREG_LR]. */
- __uint_reg_t pc; /* Program counter. */
- __uint_reg_t ics; /* In Interrupt Critical Section? */
- __uint_reg_t faultnum; /* Fault number. */
- __uint_reg_t pad[5];
-};
-
-#endif /* _ASM_TILE_SIGCONTEXT_H */
diff --git a/arch/tile/include/asm/signal.h b/arch/tile/include/asm/signal.h
index 1e5e49aad54..10e183de96d 100644
--- a/arch/tile/include/asm/signal.h
+++ b/arch/tile/include/asm/signal.h
@@ -11,19 +11,11 @@
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
-
#ifndef _ASM_TILE_SIGNAL_H
#define _ASM_TILE_SIGNAL_H
-/* Do not notify a ptracer when this signal is handled. */
-#define SA_NOPTRACE 0x02000000u
-
-/* Used in earlier Tilera releases, so keeping for binary compatibility. */
-#define SA_RESTORER 0x04000000u
-
-#include <asm-generic/signal.h>
+#include <uapi/asm/signal.h>
-#if defined(__KERNEL__)
#if !defined(__ASSEMBLY__)
struct pt_regs;
int restore_sigcontext(struct pt_regs *, struct sigcontext __user *);
@@ -34,6 +26,4 @@ void signal_fault(const char *type, struct pt_regs *,
void trace_unhandled_signal(const char *type, struct pt_regs *regs,
unsigned long address, int signo);
#endif
-#endif
-
#endif /* _ASM_TILE_SIGNAL_H */
diff --git a/arch/tile/include/asm/smp.h b/arch/tile/include/asm/smp.h
index 532124ae4b1..9a326b64f7a 100644
--- a/arch/tile/include/asm/smp.h
+++ b/arch/tile/include/asm/smp.h
@@ -43,10 +43,6 @@ void evaluate_message(int tag);
/* Boot a secondary cpu */
void online_secondary(void);
-/* Call a function on a specified set of CPUs (may include this one). */
-extern void on_each_cpu_mask(const struct cpumask *mask,
- void (*func)(void *), void *info, bool wait);
-
/* Topology of the supervisor tile grid, and coordinates of boot processor */
extern HV_Topology smp_topology;
@@ -91,9 +87,6 @@ void print_disabled_cpus(void);
#else /* !CONFIG_SMP */
-#define on_each_cpu_mask(mask, func, info, wait) \
- do { if (cpumask_test_cpu(0, (mask))) func(info); } while (0)
-
#define smp_master_cpu 0
#define smp_height 1
#define smp_width 1
@@ -108,10 +101,8 @@ void print_disabled_cpus(void);
extern struct cpumask cpu_lotar_map;
#define cpu_is_valid_lotar(cpu) cpumask_test_cpu((cpu), &cpu_lotar_map)
-#if CHIP_HAS_CBOX_HOME_MAP()
/* Which processors are used for hash-for-home mapping */
extern struct cpumask hash_for_home_map;
-#endif
/* Which cpus can have their cache flushed by hv_flush_remote(). */
extern struct cpumask cpu_cacheable_map;
diff --git a/arch/tile/include/asm/spinlock_32.h b/arch/tile/include/asm/spinlock_32.h
index a5e4208d34f..c0a77b38d39 100644
--- a/arch/tile/include/asm/spinlock_32.h
+++ b/arch/tile/include/asm/spinlock_32.h
@@ -19,7 +19,6 @@
#include <linux/atomic.h>
#include <asm/page.h>
-#include <asm/system.h>
#include <linux/compiler.h>
/*
diff --git a/arch/tile/include/asm/spinlock_64.h b/arch/tile/include/asm/spinlock_64.h
index 72be5904e02..9a12b9c7e5d 100644
--- a/arch/tile/include/asm/spinlock_64.h
+++ b/arch/tile/include/asm/spinlock_64.h
@@ -27,7 +27,7 @@
* Return the "current" portion of a ticket lock value,
* i.e. the number that currently owns the lock.
*/
-static inline int arch_spin_current(u32 val)
+static inline u32 arch_spin_current(u32 val)
{
return val >> __ARCH_SPIN_CURRENT_SHIFT;
}
@@ -36,7 +36,7 @@ static inline int arch_spin_current(u32 val)
* Return the "next" portion of a ticket lock value,
* i.e. the number that the next task to try to acquire the lock will get.
*/
-static inline int arch_spin_next(u32 val)
+static inline u32 arch_spin_next(u32 val)
{
return val & __ARCH_SPIN_NEXT_MASK;
}
@@ -137,7 +137,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
__insn_mf();
- rw->lock = 0;
+ __insn_exch4(&rw->lock, 0); /* Avoid waiting in the write buffer. */
}
static inline int arch_read_trylock(arch_rwlock_t *rw)
diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h
index 4d97a2db932..0e9d382a2d4 100644
--- a/arch/tile/include/asm/stack.h
+++ b/arch/tile/include/asm/stack.h
@@ -25,7 +25,6 @@
struct KBacktraceIterator {
BacktraceIterator it;
struct task_struct *task; /* task we are backtracing */
- pte_t *pgtable; /* page table for user space access */
int end; /* iteration complete. */
int new_context; /* new context is starting */
int profile; /* profiling, so stop on async intrpt */
diff --git a/arch/tile/include/asm/stat.h b/arch/tile/include/asm/stat.h
deleted file mode 100644
index c0db34d56be..00000000000
--- a/arch/tile/include/asm/stat.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
-#define __ARCH_WANT_STAT64 /* Used for compat_sys_stat64() etc. */
-#endif
-#include <asm-generic/stat.h>
diff --git a/arch/tile/include/asm/string.h b/arch/tile/include/asm/string.h
index 7535cf1a30e..92b271bd9eb 100644
--- a/arch/tile/include/asm/string.h
+++ b/arch/tile/include/asm/string.h
@@ -21,8 +21,10 @@
#define __HAVE_ARCH_MEMMOVE
#define __HAVE_ARCH_STRCHR
#define __HAVE_ARCH_STRLEN
+#define __HAVE_ARCH_STRNLEN
extern __kernel_size_t strlen(const char *);
+extern __kernel_size_t strnlen(const char *, __kernel_size_t);
extern char *strchr(const char *s, int c);
extern void *memchr(const void *s, int c, size_t n);
extern void *memset(void *, int, __kernel_size_t);
diff --git a/arch/tile/include/asm/swab.h b/arch/tile/include/asm/swab.h
deleted file mode 100644
index 7c37b38f6c8..00000000000
--- a/arch/tile/include/asm/swab.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- */
-
-#ifndef _ASM_TILE_SWAB_H
-#define _ASM_TILE_SWAB_H
-
-/* Tile gcc is always >= 4.3.0, so we use __builtin_bswap. */
-#define __arch_swab32(x) __builtin_bswap32(x)
-#define __arch_swab64(x) __builtin_bswap64(x)
-#define __arch_swab16(x) (__builtin_bswap32(x) >> 16)
-
-#endif /* _ASM_TILE_SWAB_H */
diff --git a/arch/tile/include/asm/switch_to.h b/arch/tile/include/asm/switch_to.h
new file mode 100644
index 00000000000..b8f888cbe6b
--- /dev/null
+++ b/arch/tile/include/asm/switch_to.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2010 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _ASM_TILE_SWITCH_TO_H
+#define _ASM_TILE_SWITCH_TO_H
+
+#include <arch/sim_def.h>
+
+/*
+ * switch_to(n) should switch tasks to task nr n, first
+ * checking that n isn't the current task, in which case it does nothing.
+ * The number of callee-saved registers saved on the kernel stack
+ * is defined here for use in copy_thread() and must agree with __switch_to().
+ */
+#define CALLEE_SAVED_FIRST_REG 30
+#define CALLEE_SAVED_REGS_COUNT 24 /* r30 to r52, plus an empty to align */
+
+#ifndef __ASSEMBLY__
+
+struct task_struct;
+
+/*
+ * Pause the DMA engine and static network before task switching.
+ */
+#define prepare_arch_switch(next) _prepare_arch_switch(next)
+void _prepare_arch_switch(struct task_struct *next);
+
+struct task_struct;
+#define switch_to(prev, next, last) ((last) = _switch_to((prev), (next)))
+extern struct task_struct *_switch_to(struct task_struct *prev,
+ struct task_struct *next);
+
+/* Helper function for _switch_to(). */
+extern struct task_struct *__switch_to(struct task_struct *prev,
+ struct task_struct *next,
+ unsigned long new_system_save_k_0);
+
+/* Address that switched-away from tasks are at. */
+extern unsigned long get_switch_to_pc(void);
+
+/*
+ * Kernel threads can check to see if they need to migrate their
+ * stack whenever they return from a context switch; for user
+ * threads, we defer until they are returning to user-space.
+ */
+#define finish_arch_switch(prev) do { \
+ if (unlikely((prev)->state == TASK_DEAD)) \
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT | \
+ ((prev)->pid << _SIM_CONTROL_OPERATOR_BITS)); \
+ __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_SWITCH | \
+ (current->pid << _SIM_CONTROL_OPERATOR_BITS)); \
+ if (current->mm == NULL && !kstack_hash && \
+ current_thread_info()->homecache_cpu != smp_processor_id()) \
+ homecache_migrate_kthread(); \
+} while (0)
+
+/* Support function for forking a new task. */
+void ret_from_fork(void);
+
+/* Support function for forking a new kernel thread. */
+void ret_from_kernel_thread(void *fn, void *arg);
+
+/* Called from ret_from_xxx() when a new process starts up. */
+struct task_struct *sim_notify_fork(struct task_struct *prev);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_TILE_SWITCH_TO_H */
diff --git a/arch/tile/include/asm/syscall.h b/arch/tile/include/asm/syscall.h
index d35e0dcb67b..9644b88f133 100644
--- a/arch/tile/include/asm/syscall.h
+++ b/arch/tile/include/asm/syscall.h
@@ -22,6 +22,12 @@
#include <linux/err.h>
#include <arch/abi.h>
+/* The array of function pointers for syscalls. */
+extern void *sys_call_table[];
+#ifdef CONFIG_COMPAT
+extern void *compat_sys_call_table[];
+#endif
+
/*
* Only the low 32 bits of orig_r0 are meaningful, so we return int.
* This importantly ignores the high bits on 64-bit, so comparisons
diff --git a/arch/tile/include/asm/syscalls.h b/arch/tile/include/asm/syscalls.h
index 3b5507c31ea..07b298450ef 100644
--- a/arch/tile/include/asm/syscalls.h
+++ b/arch/tile/include/asm/syscalls.h
@@ -24,12 +24,6 @@
#include <linux/types.h>
#include <linux/compat.h>
-/* The array of function pointers for syscalls. */
-extern void *sys_call_table[];
-#ifdef CONFIG_COMPAT
-extern void *compat_sys_call_table[];
-#endif
-
/*
* Note that by convention, any syscall which requires the current
* register set takes an additional "struct pt_regs *" pointer; a
@@ -43,15 +37,15 @@ long sys32_fadvise64(int fd, u32 offset_lo, u32 offset_hi,
u32 len, int advice);
int sys32_fadvise64_64(int fd, u32 offset_lo, u32 offset_hi,
u32 len_lo, u32 len_hi, int advice);
-long sys_flush_cache(void);
+long sys_cacheflush(unsigned long addr, unsigned long len,
+ unsigned long flags);
#ifndef __tilegx__ /* No mmap() in the 32-bit kernel. */
#define sys_mmap sys_mmap
#endif
#ifndef __tilegx__
/* mm/fault.c */
-long sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *);
-long _sys_cmpxchg_badaddr(unsigned long address);
+long sys_cmpxchg_badaddr(unsigned long address);
#endif
#ifdef CONFIG_COMPAT
@@ -62,14 +56,14 @@ long sys_truncate64(const char __user *path, loff_t length);
long sys_ftruncate64(unsigned int fd, loff_t length);
#endif
+/* Provide versions of standard syscalls that use current_pt_regs(). */
+long sys_rt_sigreturn(void);
+#define sys_rt_sigreturn sys_rt_sigreturn
+
/* These are the intvec*.S trampolines. */
-long _sys_sigaltstack(const stack_t __user *, stack_t __user *);
long _sys_rt_sigreturn(void);
long _sys_clone(unsigned long clone_flags, unsigned long newsp,
void __user *parent_tid, void __user *child_tid);
-long _sys_execve(const char __user *filename,
- const char __user *const __user *argv,
- const char __user *const __user *envp);
#include <asm-generic/syscalls.h>
diff --git a/arch/tile/include/asm/system.h b/arch/tile/include/asm/system.h
deleted file mode 100644
index 23d1842f483..00000000000
--- a/arch/tile/include/asm/system.h
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- * Copyright 2010 Tilera Corporation. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, version 2.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for
- * more details.
- */
-
-#ifndef _ASM_TILE_SYSTEM_H
-#define _ASM_TILE_SYSTEM_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/types.h>
-#include <linux/irqflags.h>
-
-/* NOTE: we can't include <linux/ptrace.h> due to #include dependencies. */
-#include <asm/ptrace.h>
-
-#include <arch/chip.h>
-#include <arch/sim_def.h>
-#include <arch/spr_def.h>
-
-/*
- * read_barrier_depends - Flush all pending reads that subsequents reads
- * depend on.
- *
- * No data-dependent reads from memory-like regions are ever reordered
- * over this barrier. All reads preceding this primitive are guaranteed
- * to access memory (but not necessarily other CPUs' caches) before any
- * reads following this primitive that depend on the data return by
- * any of the preceding reads. This primitive is much lighter weight than
- * rmb() on most CPUs, and is never heavier weight than is
- * rmb().
- *
- * These ordering constraints are respected by both the local CPU
- * and the compiler.
- *
- * Ordering is not guaranteed by anything other than these primitives,
- * not even by data dependencies. See the documentation for
- * memory_barrier() for examples and URLs to more information.
- *
- * For example, the following code would force ordering (the initial
- * value of "a" is zero, "b" is one, and "p" is "&a"):
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * b = 2;
- * memory_barrier();
- * p = &b; q = p;
- * read_barrier_depends();
- * d = *q;
- * </programlisting>
- *
- * because the read of "*q" depends on the read of "p" and these
- * two reads are separated by a read_barrier_depends(). However,
- * the following code, with the same initial values for "a" and "b":
- *
- * <programlisting>
- * CPU 0 CPU 1
- *
- * a = 2;
- * memory_barrier();
- * b = 3; y = b;
- * read_barrier_depends();
- * x = a;
- * </programlisting>
- *
- * does not enforce ordering, since there is no data dependency between
- * the read of "a" and the read of "b". Therefore, on some CPUs, such
- * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
- * in cases like this where there are no data dependencies.
- */
-
-#define read_barrier_depends() do { } while (0)
-
-#define __sync() __insn_mf()
-
-#if CHIP_HAS_SPLIT_CYCLE()
-#define get_cycles_low() __insn_mfspr(SPR_CYCLE_LOW)
-#else
-#define get_cycles_low() __insn_mfspr(SPR_CYCLE) /* just get all 64 bits */
-#endif
-
-#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
-#include <hv/syscall_public.h>
-/*
- * Issue an uncacheable load to each memory controller, then
- * wait until those loads have completed.
- */
-static inline void __mb_incoherent(void)
-{
- long clobber_r10;
- asm volatile("swint2"
- : "=R10" (clobber_r10)
- : "R10" (HV_SYS_fence_incoherent)
- : "r0", "r1", "r2", "r3", "r4",
- "r5", "r6", "r7", "r8", "r9",
- "r11", "r12", "r13", "r14",
- "r15", "r16", "r17", "r18", "r19",
- "r20", "r21", "r22", "r23", "r24",
- "r25", "r26", "r27", "r28", "r29");
-}
-#endif
-
-/* Fence to guarantee visibility of stores to incoherent memory. */
-static inline void
-mb_incoherent(void)
-{
- __insn_mf();
-
-#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
- {
-#if CHIP_HAS_TILE_WRITE_PENDING()
- const unsigned long WRITE_TIMEOUT_CYCLES = 400;
- unsigned long start = get_cycles_low();
- do {
- if (__insn_mfspr(SPR_TILE_WRITE_PENDING) == 0)
- return;
- } while ((get_cycles_low() - start) < WRITE_TIMEOUT_CYCLES);
-#endif /* CHIP_HAS_TILE_WRITE_PENDING() */
- (void) __mb_incoherent();
- }
-#endif /* CHIP_HAS_MF_WAITS_FOR_VICTIMS() */
-}
-
-#define fast_wmb() __sync()
-#define fast_rmb() __sync()
-#define fast_mb() __sync()
-#define fast_iob() mb_incoherent()
-
-#define wmb() fast_wmb()
-#define rmb() fast_rmb()
-#define mb() fast_mb()
-#define iob() fast_iob()
-
-#ifdef CONFIG_SMP
-#define smp_mb() mb()
-#define smp_rmb() rmb()
-#define smp_wmb() wmb()
-#define smp_read_barrier_depends() read_barrier_depends()
-#else
-#define smp_mb() barrier()
-#define smp_rmb() barrier()
-#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do { } while (0)
-#endif
-
-#define set_mb(var, value) \
- do { var = value; mb(); } while (0)
-
-/*
- * Pause the DMA engine and static network before task switching.
- */
-#define prepare_arch_switch(next) _prepare_arch_switch(next)
-void _prepare_arch_switch(struct task_struct *next);
-
-
-/*
- * switch_to(n) should switch tasks to task nr n, first
- * checking that n isn't the current task, in which case it does nothing.
- * The number of callee-saved registers saved on the kernel stack
- * is defined here for use in copy_thread() and must agree with __switch_to().
- */
-#endif /* !__ASSEMBLY__ */
-#define CALLEE_SAVED_FIRST_REG 30
-#define CALLEE_SAVED_REGS_COUNT 24 /* r30 to r52, plus an empty to align */
-#ifndef __ASSEMBLY__
-struct task_struct;
-#define switch_to(prev, next, last) ((last) = _switch_to((prev), (next)))
-extern struct task_struct *_switch_to(struct task_struct *prev,
- struct task_struct *next);
-
-/* Helper function for _switch_to(). */
-extern struct task_struct *__switch_to(struct task_struct *prev,
- struct task_struct *next,
- unsigned long new_system_save_k_0);
-
-/* Address that switched-away from tasks are at. */
-extern unsigned long get_switch_to_pc(void);
-
-/*
- * On SMP systems, when the scheduler does migration-cost autodetection,
- * it needs a way to flush as much of the CPU's caches as possible:
- *
- * TODO: fill this in!
- */
-static inline void sched_cacheflush(void)
-{
-}
-
-#define arch_align_stack(x) (x)
-
-/*
- * Is the kernel doing fixups of unaligned accesses? If <0, no kernel
- * intervention occurs and SIGBUS is delivered with no data address
- * info. If 0, the kernel single-steps the instruction to discover
- * the data address to provide with the SIGBUS. If 1, the kernel does
- * a fixup.
- */
-extern int unaligned_fixup;
-
-/* Is the kernel printing on each unaligned fixup? */
-extern int unaligned_printk;
-
-/* Number of unaligned fixups performed */
-extern unsigned int unaligned_fixup_count;
-
-/* Init-time routine to do tile-specific per-cpu setup. */
-void setup_cpu(int boot);
-
-/* User-level DMA management functions */
-void grant_dma_mpls(void);
-void restrict_dma_mpls(void);
-
-#ifdef CONFIG_HARDWALL
-/* User-level network management functions */
-void reset_network_state(void);
-void grant_network_mpls(void);
-void restrict_network_mpls(void);
-int hardwall_deactivate(struct task_struct *task);
-
-/* Hook hardwall code into changes in affinity. */
-#define arch_set_cpus_allowed(p, new_mask) do { \
- if (p->thread.hardwall && !cpumask_equal(&p->cpus_allowed, new_mask)) \
- hardwall_deactivate(p); \
-} while (0)
-#endif
-
-/*
- * Kernel threads can check to see if they need to migrate their
- * stack whenever they return from a context switch; for user
- * threads, we defer until they are returning to user-space.
- */
-#define finish_arch_switch(prev) do { \
- if (unlikely((prev)->state == TASK_DEAD)) \
- __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT | \
- ((prev)->pid << _SIM_CONTROL_OPERATOR_BITS)); \
- __insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_SWITCH | \
- (current->pid << _SIM_CONTROL_OPERATOR_BITS)); \
- if (current->mm == NULL && !kstack_hash && \
- current_thread_info()->homecache_cpu != smp_processor_id()) \
- homecache_migrate_kthread(); \
-} while (0)
-
-/* Support function for forking a new task. */
-void ret_from_fork(void);
-
-/* Called from ret_from_fork() when a new process starts up. */
-struct task_struct *sim_notify_fork(struct task_struct *prev);
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* _ASM_TILE_SYSTEM_H */
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index bc4f562bd45..48e4fd0f38e 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -39,6 +39,11 @@ struct thread_info {
struct restart_block restart_block;
struct single_step_state *step_state; /* single step state
(if non-zero) */
+ int align_ctl; /* controls unaligned access */
+#ifdef __tilegx__
+ unsigned long unalign_jit_tmp[4]; /* temp r0..r3 storage */
+ void __user *unalign_jit_base; /* unalign fixup JIT base */
+#endif
};
/*
@@ -56,6 +61,7 @@ struct thread_info {
.fn = do_no_restart_syscall, \
}, \
.step_state = NULL, \
+ .align_ctl = 0, \
}
#define init_thread_info (init_thread_union.thread_info)
@@ -77,40 +83,36 @@ struct thread_info {
#ifndef __ASSEMBLY__
+void arch_release_thread_info(struct thread_info *info);
+
/* How to get the thread information struct from C. */
register unsigned long stack_pointer __asm__("sp");
#define current_thread_info() \
((struct thread_info *)(stack_pointer & -THREAD_SIZE))
-#define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
-extern struct thread_info *alloc_thread_info_node(struct task_struct *task, int node);
-extern void free_thread_info(struct thread_info *info);
-
/* Sit on a nap instruction until interrupted. */
extern void smp_nap(void);
-/* Enable interrupts racelessly and nap forever: helper for cpu_idle(). */
+/* Enable interrupts racelessly and nap forever: helper for arch_cpu_idle(). */
extern void _cpu_idle(void);
-/* Switch boot idle thread to a freshly-allocated stack and free old stack. */
-extern void cpu_idle_on_new_stack(struct thread_info *old_ti,
- unsigned long new_sp,
- unsigned long new_ss10);
-
#else /* __ASSEMBLY__ */
-/* how to get the thread information struct from ASM */
+/*
+ * How to get the thread information struct from assembly.
+ * Note that we use different macros since different architectures
+ * have different semantics in their "mm" instruction and we would
+ * like to guarantee that the macro expands to exactly one instruction.
+ */
#ifdef __tilegx__
-#define GET_THREAD_INFO(reg) move reg, sp; mm reg, zero, LOG2_THREAD_SIZE, 63
+#define EXTRACT_THREAD_INFO(reg) mm reg, zero, LOG2_THREAD_SIZE, 63
#else
#define GET_THREAD_INFO(reg) mm reg, sp, zero, LOG2_THREAD_SIZE, 31
#endif
#endif /* !__ASSEMBLY__ */
-#define PREEMPT_ACTIVE 0x10000000
-
/*
* Thread information flags that various assembly files may need to access.
* Keep flags accessed frequently in low bits, particular since it makes
@@ -126,6 +128,8 @@ extern void cpu_idle_on_new_stack(struct thread_info *old_ti,
#define TIF_SECCOMP 6 /* secure computing */
#define TIF_MEMDIE 7 /* OOM killer at work */
#define TIF_NOTIFY_RESUME 8 /* callback before returning to user */
+#define TIF_SYSCALL_TRACEPOINT 9 /* syscall tracepoint instrumentation */
+#define TIF_POLLING_NRFLAG 10 /* idle is polling for TIF_NEED_RESCHED */
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
@@ -136,12 +140,20 @@ extern void cpu_idle_on_new_stack(struct thread_info *old_ti,
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
#define _TIF_MEMDIE (1<<TIF_MEMDIE)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
+#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
+#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
/* Work to do on any return to user space. */
#define _TIF_ALLWORK_MASK \
(_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SINGLESTEP|\
_TIF_ASYNC_TLB|_TIF_NOTIFY_RESUME)
+/* Work to do at syscall entry. */
+#define _TIF_SYSCALL_ENTRY_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT)
+
+/* Work to do at syscall exit. */
+#define _TIF_SYSCALL_EXIT_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT)
+
/*
* Thread-synchronous status.
*
@@ -152,18 +164,31 @@ extern void cpu_idle_on_new_stack(struct thread_info *old_ti,
#ifdef __tilegx__
#define TS_COMPAT 0x0001 /* 32-bit compatibility mode */
#endif
-#define TS_POLLING 0x0004 /* in idle loop but not sleeping */
#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal */
-#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
-
#ifndef __ASSEMBLY__
#define HAVE_SET_RESTORE_SIGMASK 1
static inline void set_restore_sigmask(void)
{
struct thread_info *ti = current_thread_info();
ti->status |= TS_RESTORE_SIGMASK;
- set_bit(TIF_SIGPENDING, &ti->flags);
+ WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags));
+}
+static inline void clear_restore_sigmask(void)
+{
+ current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+}
+static inline bool test_restore_sigmask(void)
+{
+ return current_thread_info()->status & TS_RESTORE_SIGMASK;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+ struct thread_info *ti = current_thread_info();
+ if (!(ti->status & TS_RESTORE_SIGMASK))
+ return false;
+ ti->status &= ~TS_RESTORE_SIGMASK;
+ return true;
}
#endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/timex.h b/arch/tile/include/asm/timex.h
index 29921f0b86d..dc987d53e2a 100644
--- a/arch/tile/include/asm/timex.h
+++ b/arch/tile/include/asm/timex.h
@@ -29,11 +29,13 @@ typedef unsigned long long cycles_t;
#if CHIP_HAS_SPLIT_CYCLE()
cycles_t get_cycles(void);
+#define get_cycles_low() __insn_mfspr(SPR_CYCLE_LOW)
#else
static inline cycles_t get_cycles(void)
{
return __insn_mfspr(SPR_CYCLE);
}
+#define get_cycles_low() __insn_mfspr(SPR_CYCLE) /* just get all 64 bits */
#endif
cycles_t get_clock_rate(void);
diff --git a/arch/tile/include/asm/tlbflush.h b/arch/tile/include/asm/tlbflush.h
index 96199d214fb..dcf91b25a1e 100644
--- a/arch/tile/include/asm/tlbflush.h
+++ b/arch/tile/include/asm/tlbflush.h
@@ -38,16 +38,11 @@ DECLARE_PER_CPU(int, current_asid);
/* The hypervisor tells us what ASIDs are available to us. */
extern int min_asid, max_asid;
-static inline unsigned long hv_page_size(const struct vm_area_struct *vma)
-{
- return (vma->vm_flags & VM_HUGETLB) ? HPAGE_SIZE : PAGE_SIZE;
-}
-
/* Pass as vma pointer for non-executable mapping, if no vma available. */
-#define FLUSH_NONEXEC ((const struct vm_area_struct *)-1UL)
+#define FLUSH_NONEXEC ((struct vm_area_struct *)-1UL)
/* Flush a single user page on this cpu. */
-static inline void local_flush_tlb_page(const struct vm_area_struct *vma,
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
unsigned long addr,
unsigned long page_size)
{
@@ -60,7 +55,7 @@ static inline void local_flush_tlb_page(const struct vm_area_struct *vma,
}
/* Flush range of user pages on this cpu. */
-static inline void local_flush_tlb_pages(const struct vm_area_struct *vma,
+static inline void local_flush_tlb_pages(struct vm_area_struct *vma,
unsigned long addr,
unsigned long page_size,
unsigned long len)
@@ -117,10 +112,10 @@ extern void flush_tlb_all(void);
extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
extern void flush_tlb_current_task(void);
extern void flush_tlb_mm(struct mm_struct *);
-extern void flush_tlb_page(const struct vm_area_struct *, unsigned long);
-extern void flush_tlb_page_mm(const struct vm_area_struct *,
+extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+extern void flush_tlb_page_mm(struct vm_area_struct *,
struct mm_struct *, unsigned long);
-extern void flush_tlb_range(const struct vm_area_struct *,
+extern void flush_tlb_range(struct vm_area_struct *,
unsigned long start, unsigned long end);
#define flush_tlb() flush_tlb_current_task()
diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h
index 6fdd0c86019..93831184423 100644
--- a/arch/tile/include/asm/topology.h
+++ b/arch/tile/include/asm/topology.h
@@ -44,66 +44,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
/* For now, use numa node -1 for global allocation. */
#define pcibus_to_node(bus) ((void)(bus), -1)
-/*
- * TILE architecture has many cores integrated in one processor, so we need
- * setup bigger balance_interval for both CPU/NODE scheduling domains to
- * reduce process scheduling costs.
- */
-
-/* sched_domains SD_CPU_INIT for TILE architecture */
-#define SD_CPU_INIT (struct sched_domain) { \
- .min_interval = 4, \
- .max_interval = 128, \
- .busy_factor = 64, \
- .imbalance_pct = 125, \
- .cache_nice_tries = 1, \
- .busy_idx = 2, \
- .idle_idx = 1, \
- .newidle_idx = 0, \
- .wake_idx = 0, \
- .forkexec_idx = 0, \
- \
- .flags = 1*SD_LOAD_BALANCE \
- | 1*SD_BALANCE_NEWIDLE \
- | 1*SD_BALANCE_EXEC \
- | 1*SD_BALANCE_FORK \
- | 0*SD_BALANCE_WAKE \
- | 0*SD_WAKE_AFFINE \
- | 0*SD_PREFER_LOCAL \
- | 0*SD_SHARE_CPUPOWER \
- | 0*SD_SHARE_PKG_RESOURCES \
- | 0*SD_SERIALIZE \
- , \
- .last_balance = jiffies, \
- .balance_interval = 32, \
-}
-
-/* sched_domains SD_NODE_INIT for TILE architecture */
-#define SD_NODE_INIT (struct sched_domain) { \
- .min_interval = 16, \
- .max_interval = 512, \
- .busy_factor = 32, \
- .imbalance_pct = 125, \
- .cache_nice_tries = 1, \
- .busy_idx = 3, \
- .idle_idx = 1, \
- .newidle_idx = 2, \
- .wake_idx = 1, \
- .flags = 1*SD_LOAD_BALANCE \
- | 1*SD_BALANCE_NEWIDLE \
- | 1*SD_BALANCE_EXEC \
- | 1*SD_BALANCE_FORK \
- | 0*SD_BALANCE_WAKE \
- | 0*SD_WAKE_AFFINE \
- | 0*SD_PREFER_LOCAL \
- | 0*SD_SHARE_CPUPOWER \
- | 0*SD_SHARE_PKG_RESOURCES \
- | 1*SD_SERIALIZE \
- , \
- .last_balance = jiffies, \
- .balance_interval = 128, \
-}
-
/* By definition, we create nodes based on online memory. */
#define node_has_online_mem(nid) 1
@@ -116,9 +56,6 @@ static inline const struct cpumask *cpumask_of_node(int node)
#define topology_core_id(cpu) (cpu)
#define topology_core_cpumask(cpu) ((void)(cpu), cpu_online_mask)
#define topology_thread_cpumask(cpu) cpumask_of(cpu)
-
-/* indicates that pointers to the topology struct cpumask maps are valid */
-#define arch_provides_topology_pointers yes
#endif
#endif /* _ASM_TILE_TOPOLOGY_H */
diff --git a/arch/tile/include/asm/traps.h b/arch/tile/include/asm/traps.h
index 5f20f920f93..4b99a1c3aab 100644
--- a/arch/tile/include/asm/traps.h
+++ b/arch/tile/include/asm/traps.h
@@ -15,12 +15,13 @@
#ifndef _ASM_TILE_TRAPS_H
#define _ASM_TILE_TRAPS_H
+#ifndef __ASSEMBLY__
#include <arch/chip.h>
/* mm/fault.c */
void do_page_fault(struct pt_regs *, int fault_num,
unsigned long address, unsigned long write);
-#if CHIP_HAS_TILE_DMA() || CHIP_HAS_SN_PROC()
+#if CHIP_HAS_TILE_DMA()
void do_async_page_fault(struct pt_regs *);
#endif
@@ -64,7 +65,21 @@ void do_breakpoint(struct pt_regs *, int fault_num);
#ifdef __tilegx__
+/* kernel/single_step.c */
void gx_singlestep_handle(struct pt_regs *, int fault_num);
+
+/* kernel/intvec_64.S */
+void fill_ra_stack(void);
+
+/* Handle unalign data fixup. */
+extern void do_unaligned(struct pt_regs *regs, int vecnum);
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#ifdef __tilegx__
+/* 128 byte JIT per unalign fixup. */
+#define UNALIGN_JIT_SHIFT 7
#endif
-#endif /* _ASM_TILE_SYSCALLS_H */
+#endif /* _ASM_TILE_TRAPS_H */
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
index ef34d2caa5b..b6cde3209b9 100644
--- a/arch/tile/include/asm/uaccess.h
+++ b/arch/tile/include/asm/uaccess.h
@@ -114,45 +114,79 @@ struct exception_table_entry {
extern int fixup_exception(struct pt_regs *regs);
/*
- * We return the __get_user_N function results in a structure,
- * thus in r0 and r1. If "err" is zero, "val" is the result
- * of the read; otherwise, "err" is -EFAULT.
- *
- * We rarely need 8-byte values on a 32-bit architecture, but
- * we size the structure to accommodate. In practice, for the
- * the smaller reads, we can zero the high word for free, and
- * the caller will ignore it by virtue of casting anyway.
+ * Support macros for __get_user().
+ *
+ * Implementation note: The "case 8" logic of casting to the type of
+ * the result of subtracting the value from itself is basically a way
+ * of keeping all integer types the same, but casting any pointers to
+ * ptrdiff_t, i.e. also an integer type. This way there are no
+ * questionable casts seen by the compiler on an ILP32 platform.
+ *
+ * Note that __get_user() and __put_user() assume proper alignment.
*/
-struct __get_user {
- unsigned long long val;
- int err;
-};
-/*
- * FIXME: we should express these as inline extended assembler, since
- * they're fundamentally just a variable dereference and some
- * supporting exception_table gunk. Note that (a la i386) we can
- * extend the copy_to_user and copy_from_user routines to call into
- * such extended assembler routines, though we will have to use a
- * different return code in that case (1, 2, or 4, rather than -EFAULT).
- */
-extern struct __get_user __get_user_1(const void __user *);
-extern struct __get_user __get_user_2(const void __user *);
-extern struct __get_user __get_user_4(const void __user *);
-extern struct __get_user __get_user_8(const void __user *);
-extern int __put_user_1(long, void __user *);
-extern int __put_user_2(long, void __user *);
-extern int __put_user_4(long, void __user *);
-extern int __put_user_8(long long, void __user *);
-
-/* Unimplemented routines to cause linker failures */
-extern struct __get_user __get_user_bad(void);
-extern int __put_user_bad(void);
+#ifdef __LP64__
+#define _ASM_PTR ".quad"
+#define _ASM_ALIGN ".align 8"
+#else
+#define _ASM_PTR ".long"
+#define _ASM_ALIGN ".align 4"
+#endif
+
+#define __get_user_asm(OP, x, ptr, ret) \
+ asm volatile("1: {" #OP " %1, %2; movei %0, 0 }\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "0: { movei %1, 0; movei %0, %3 }\n" \
+ "j 9f\n" \
+ ".section __ex_table,\"a\"\n" \
+ _ASM_ALIGN "\n" \
+ _ASM_PTR " 1b, 0b\n" \
+ ".popsection\n" \
+ "9:" \
+ : "=r" (ret), "=r" (x) \
+ : "r" (ptr), "i" (-EFAULT))
+
+#ifdef __tilegx__
+#define __get_user_1(x, ptr, ret) __get_user_asm(ld1u, x, ptr, ret)
+#define __get_user_2(x, ptr, ret) __get_user_asm(ld2u, x, ptr, ret)
+#define __get_user_4(x, ptr, ret) __get_user_asm(ld4s, x, ptr, ret)
+#define __get_user_8(x, ptr, ret) __get_user_asm(ld, x, ptr, ret)
+#else
+#define __get_user_1(x, ptr, ret) __get_user_asm(lb_u, x, ptr, ret)
+#define __get_user_2(x, ptr, ret) __get_user_asm(lh_u, x, ptr, ret)
+#define __get_user_4(x, ptr, ret) __get_user_asm(lw, x, ptr, ret)
+#ifdef __LITTLE_ENDIAN
+#define __lo32(a, b) a
+#define __hi32(a, b) b
+#else
+#define __lo32(a, b) b
+#define __hi32(a, b) a
+#endif
+#define __get_user_8(x, ptr, ret) \
+ ({ \
+ unsigned int __a, __b; \
+ asm volatile("1: { lw %1, %3; addi %2, %3, 4 }\n" \
+ "2: { lw %2, %2; movei %0, 0 }\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "0: { movei %1, 0; movei %2, 0 }\n" \
+ "{ movei %0, %4; j 9f }\n" \
+ ".section __ex_table,\"a\"\n" \
+ ".align 4\n" \
+ ".word 1b, 0b\n" \
+ ".word 2b, 0b\n" \
+ ".popsection\n" \
+ "9:" \
+ : "=r" (ret), "=r" (__a), "=&r" (__b) \
+ : "r" (ptr), "i" (-EFAULT)); \
+ (x) = (__typeof(x))(__typeof((x)-(x))) \
+ (((u64)__hi32(__a, __b) << 32) | \
+ __lo32(__a, __b)); \
+ })
+#endif
+
+extern int __get_user_bad(void)
+ __attribute__((warning("sizeof __get_user argument not 1, 2, 4 or 8")));
-/*
- * Careful: we have to cast the result to the type of the pointer
- * for sign reasons.
- */
/**
* __get_user: - Get a simple variable from user space, with less checking.
* @x: Variable to store result.
@@ -174,30 +208,64 @@ extern int __put_user_bad(void);
* function.
*/
#define __get_user(x, ptr) \
-({ struct __get_user __ret; \
- __typeof__(*(ptr)) const __user *__gu_addr = (ptr); \
- __chk_user_ptr(__gu_addr); \
- switch (sizeof(*(__gu_addr))) { \
- case 1: \
- __ret = __get_user_1(__gu_addr); \
- break; \
- case 2: \
- __ret = __get_user_2(__gu_addr); \
- break; \
- case 4: \
- __ret = __get_user_4(__gu_addr); \
- break; \
- case 8: \
- __ret = __get_user_8(__gu_addr); \
- break; \
- default: \
- __ret = __get_user_bad(); \
- break; \
- } \
- (x) = (__typeof__(*__gu_addr)) (__typeof__(*__gu_addr - *__gu_addr)) \
- __ret.val; \
- __ret.err; \
-})
+ ({ \
+ int __ret; \
+ __chk_user_ptr(ptr); \
+ switch (sizeof(*(ptr))) { \
+ case 1: __get_user_1(x, ptr, __ret); break; \
+ case 2: __get_user_2(x, ptr, __ret); break; \
+ case 4: __get_user_4(x, ptr, __ret); break; \
+ case 8: __get_user_8(x, ptr, __ret); break; \
+ default: __ret = __get_user_bad(); break; \
+ } \
+ __ret; \
+ })
+
+/* Support macros for __put_user(). */
+
+#define __put_user_asm(OP, x, ptr, ret) \
+ asm volatile("1: {" #OP " %1, %2; movei %0, 0 }\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "0: { movei %0, %3; j 9f }\n" \
+ ".section __ex_table,\"a\"\n" \
+ _ASM_ALIGN "\n" \
+ _ASM_PTR " 1b, 0b\n" \
+ ".popsection\n" \
+ "9:" \
+ : "=r" (ret) \
+ : "r" (ptr), "r" (x), "i" (-EFAULT))
+
+#ifdef __tilegx__
+#define __put_user_1(x, ptr, ret) __put_user_asm(st1, x, ptr, ret)
+#define __put_user_2(x, ptr, ret) __put_user_asm(st2, x, ptr, ret)
+#define __put_user_4(x, ptr, ret) __put_user_asm(st4, x, ptr, ret)
+#define __put_user_8(x, ptr, ret) __put_user_asm(st, x, ptr, ret)
+#else
+#define __put_user_1(x, ptr, ret) __put_user_asm(sb, x, ptr, ret)
+#define __put_user_2(x, ptr, ret) __put_user_asm(sh, x, ptr, ret)
+#define __put_user_4(x, ptr, ret) __put_user_asm(sw, x, ptr, ret)
+#define __put_user_8(x, ptr, ret) \
+ ({ \
+ u64 __x = (__typeof((x)-(x)))(x); \
+ int __lo = (int) __x, __hi = (int) (__x >> 32); \
+ asm volatile("1: { sw %1, %2; addi %0, %1, 4 }\n" \
+ "2: { sw %0, %3; movei %0, 0 }\n" \
+ ".pushsection .fixup,\"ax\"\n" \
+ "0: { movei %0, %4; j 9f }\n" \
+ ".section __ex_table,\"a\"\n" \
+ ".align 4\n" \
+ ".word 1b, 0b\n" \
+ ".word 2b, 0b\n" \
+ ".popsection\n" \
+ "9:" \
+ : "=&r" (ret) \
+ : "r" (ptr), "r" (__lo32(__lo, __hi)), \
+ "r" (__hi32(__lo, __hi)), "i" (-EFAULT)); \
+ })
+#endif
+
+extern int __put_user_bad(void)
+ __attribute__((warning("sizeof __put_user argument not 1, 2, 4 or 8")));
/**
* __put_user: - Write a simple value into user space, with less checking.
@@ -217,39 +285,19 @@ extern int __put_user_bad(void);
* function.
*
* Returns zero on success, or -EFAULT on error.
- *
- * Implementation note: The "case 8" logic of casting to the type of
- * the result of subtracting the value from itself is basically a way
- * of keeping all integer types the same, but casting any pointers to
- * ptrdiff_t, i.e. also an integer type. This way there are no
- * questionable casts seen by the compiler on an ILP32 platform.
*/
#define __put_user(x, ptr) \
({ \
- int __pu_err = 0; \
- __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
- typeof(*__pu_addr) __pu_val = (x); \
- __chk_user_ptr(__pu_addr); \
- switch (sizeof(__pu_val)) { \
- case 1: \
- __pu_err = __put_user_1((long)__pu_val, __pu_addr); \
- break; \
- case 2: \
- __pu_err = __put_user_2((long)__pu_val, __pu_addr); \
- break; \
- case 4: \
- __pu_err = __put_user_4((long)__pu_val, __pu_addr); \
- break; \
- case 8: \
- __pu_err = \
- __put_user_8((__typeof__(__pu_val - __pu_val))__pu_val,\
- __pu_addr); \
- break; \
- default: \
- __pu_err = __put_user_bad(); \
- break; \
+ int __ret; \
+ __chk_user_ptr(ptr); \
+ switch (sizeof(*(ptr))) { \
+ case 1: __put_user_1(x, ptr, __ret); break; \
+ case 2: __put_user_2(x, ptr, __ret); break; \
+ case 4: __put_user_4(x, ptr, __ret); break; \
+ case 8: __put_user_8(x, ptr, __ret); break; \
+ default: __ret = __put_user_bad(); break; \
} \
- __pu_err; \
+ __ret; \
})
/*
@@ -353,7 +401,12 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
return n;
}
-#ifdef CONFIG_DEBUG_COPY_FROM_USER
+#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
+/*
+ * There are still unprovable places in the generic code as of 2.6.34, so this
+ * option is not really compatible with -Werror, which is more useful in
+ * general.
+ */
extern void copy_from_user_overflow(void)
__compiletime_warning("copy_from_user() size is not provably correct");
@@ -378,7 +431,7 @@ static inline unsigned long __must_check copy_from_user(void *to,
/**
* __copy_in_user() - copy data within user space, with less checking.
* @to: Destination address, in user space.
- * @from: Source address, in kernel space.
+ * @from: Source address, in user space.
* @n: Number of bytes to copy.
*
* Context: User context only. This function may sleep.
@@ -395,7 +448,7 @@ extern unsigned long __copy_in_user_inatomic(
static inline unsigned long __must_check
__copy_in_user(void __user *to, const void __user *from, unsigned long n)
{
- might_sleep();
+ might_fault();
return __copy_in_user_inatomic(to, from, n);
}
@@ -520,37 +573,6 @@ static inline unsigned long __must_check flush_user(
}
/**
- * inv_user: - Invalidate a block of memory in user space from cache.
- * @mem: Destination address, in user space.
- * @len: Number of bytes to invalidate.
- *
- * Returns number of bytes that could not be invalidated.
- * On success, this will be zero.
- *
- * Note that on Tile64, the "inv" operation is in fact a
- * "flush and invalidate", so cache write-backs will occur prior
- * to the cache being marked invalid.
- */
-extern unsigned long inv_user_asm(void __user *mem, unsigned long len);
-static inline unsigned long __must_check __inv_user(
- void __user *mem, unsigned long len)
-{
- int retval;
-
- might_fault();
- retval = inv_user_asm(mem, len);
- mb_incoherent();
- return retval;
-}
-static inline unsigned long __must_check inv_user(
- void __user *mem, unsigned long len)
-{
- if (access_ok(VERIFY_WRITE, mem, len))
- return __inv_user(mem, len);
- return len;
-}
-
-/**
* finv_user: - Flush-inval a block of memory in user space from cache.
* @mem: Destination address, in user space.
* @len: Number of bytes to invalidate.
diff --git a/arch/tile/include/asm/unaligned.h b/arch/tile/include/asm/unaligned.h
index 137e2de5b10..5a58a0d1144 100644
--- a/arch/tile/include/asm/unaligned.h
+++ b/arch/tile/include/asm/unaligned.h
@@ -15,10 +15,29 @@
#ifndef _ASM_TILE_UNALIGNED_H
#define _ASM_TILE_UNALIGNED_H
-#include <linux/unaligned/le_struct.h>
-#include <linux/unaligned/be_byteshift.h>
-#include <linux/unaligned/generic.h>
-#define get_unaligned __get_unaligned_le
-#define put_unaligned __put_unaligned_le
+/*
+ * We could implement faster get_unaligned_[be/le]64 using the ldna
+ * instruction on tilegx; however, we need to either copy all of the
+ * other generic functions to here (which is pretty ugly) or else
+ * modify both the generic code and other arch code to allow arch
+ * specific unaligned data access functions. Given these functions
+ * are not often called, we'll stick with the generic version.
+ */
+#include <asm-generic/unaligned.h>
+
+/*
+ * Is the kernel doing fixups of unaligned accesses? If <0, no kernel
+ * intervention occurs and SIGBUS is delivered with no data address
+ * info. If 0, the kernel single-steps the instruction to discover
+ * the data address to provide with the SIGBUS. If 1, the kernel does
+ * a fixup.
+ */
+extern int unaligned_fixup;
+
+/* Is the kernel printing on each unaligned fixup? */
+extern int unaligned_printk;
+
+/* Number of unaligned fixups performed */
+extern unsigned int unaligned_fixup_count;
#endif /* _ASM_TILE_UNALIGNED_H */
diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h
index f70bf1c541f..940831fe9e9 100644
--- a/arch/tile/include/asm/unistd.h
+++ b/arch/tile/include/asm/unistd.h
@@ -11,37 +11,10 @@
* NON INFRINGEMENT. See the GNU General Public License for
* more details.
*/
-
-#if !defined(_ASM_TILE_UNISTD_H) || defined(__SYSCALL)
-#define _ASM_TILE_UNISTD_H
-
-#if !defined(__LP64__) || defined(__SYSCALL_COMPAT)
-/* Use the flavor of this syscall that matches the 32-bit API better. */
-#define __ARCH_WANT_SYNC_FILE_RANGE2
-#endif
-
-/* Use the standard ABI for syscalls. */
-#include <asm-generic/unistd.h>
-
-/* Additional Tilera-specific syscalls. */
-#define __NR_flush_cache (__NR_arch_specific_syscall + 1)
-__SYSCALL(__NR_flush_cache, sys_flush_cache)
-
-#ifndef __tilegx__
-/* "Fast" syscalls provide atomic support for 32-bit chips. */
-#define __NR_FAST_cmpxchg -1
-#define __NR_FAST_atomic_update -2
-#define __NR_FAST_cmpxchg64 -3
-#define __NR_cmpxchg_badaddr (__NR_arch_specific_syscall + 0)
-__SYSCALL(__NR_cmpxchg_badaddr, sys_cmpxchg_badaddr)
-#endif
-
-#ifdef __KERNEL__
/* In compat mode, we use sys_llseek() for compat_sys_llseek(). */
#ifdef CONFIG_COMPAT
#define __ARCH_WANT_SYS_LLSEEK
#endif
#define __ARCH_WANT_SYS_NEWFSTATAT
-#endif
-
-#endif /* _ASM_TILE_UNISTD_H */
+#define __ARCH_WANT_SYS_CLONE
+#include <uapi/asm/unistd.h>
diff --git a/arch/tile/include/asm/vdso.h b/arch/tile/include/asm/vdso.h
new file mode 100644
index 00000000000..9f6a78d665f
--- /dev/null
+++ b/arch/tile/include/asm/vdso.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2012 Tilera Corporation. All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __TILE_VDSO_H__
+#define __TILE_VDSO_H__
+
+#include <linux/types.h>
+
+/*
+ * Note about the vdso_data structure:
+ *
+ * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the
+ * structure is supposed to be known only to the function in the vdso
+ * itself and may change without notice.
+ */
+
+struct vdso_data {
+ __u64 tz_update_count; /* Timezone atomicity ctr */
+ __u64 tb_update_count; /* Timebase atomicity ctr */
+ __u64 xtime_tod_stamp; /* TOD clock for xtime */
+ __u64 xtime_clock_sec; /* Kernel time second */
+ __u64 xtime_clock_nsec; /* Kernel time nanosecond */
+ __u64 wtom_clock_sec; /* Wall to monotonic clock second */
+ __u64 wtom_clock_nsec; /* Wall to monotonic clock nanosecond */
+ __u32 mult; /* Cycle to nanosecond multiplier */
+ __u32 shift; /* Cycle to nanosecond divisor (power of two) */
+ __u32 tz_minuteswest; /* Minutes west of Greenwich */
+ __u32 tz_dsttime; /* Type of dst correction */
+};
+
+extern struct vdso_data *vdso_data;
+
+/* __vdso_rt_sigreturn is defined with the addresses in the vdso page. */
+extern void __vdso_rt_sigreturn(void);
+
+extern int setup_vdso_pages(void);
+
+#endif /* __TILE_VDSO_H__ */