aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/include/asm/bitops.h
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include/asm/bitops.h')
-rw-r--r--arch/x86/include/asm/bitops.h172
1 files changed, 107 insertions, 65 deletions
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 903683b07e4..afcd35d331d 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -14,6 +14,18 @@
#include <linux/compiler.h>
#include <asm/alternative.h>
+#include <asm/rmwcc.h>
+#include <asm/barrier.h>
+
+#if BITS_PER_LONG == 32
+# define _BITOPS_LONG_SHIFT 5
+#elif BITS_PER_LONG == 64
+# define _BITOPS_LONG_SHIFT 6
+#else
+# error "Unexpected BITS_PER_LONG"
+#endif
+
+#define BIT_64(n) (U64_C(1) << (n))
/*
* These have to be done with inline assembly: that way the bit-setting
@@ -57,7 +69,7 @@
* restricted to acting on a single-word quantity.
*/
static __always_inline void
-set_bit(unsigned int nr, volatile unsigned long *addr)
+set_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "orb %1,%0"
@@ -79,7 +91,7 @@ set_bit(unsigned int nr, volatile unsigned long *addr)
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
-static inline void __set_bit(int nr, volatile unsigned long *addr)
+static inline void __set_bit(long nr, volatile unsigned long *addr)
{
asm volatile("bts %1,%0" : ADDR : "Ir" (nr) : "memory");
}
@@ -91,11 +103,11 @@ static inline void __set_bit(int nr, volatile unsigned long *addr)
*
* clear_bit() is atomic and may not be reordered. However, it does
* not contain a memory barrier, so if it is used for locking purposes,
- * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic()
* in order to ensure changes are visible on other processors.
*/
static __always_inline void
-clear_bit(int nr, volatile unsigned long *addr)
+clear_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "andb %1,%0"
@@ -116,13 +128,13 @@ clear_bit(int nr, volatile unsigned long *addr)
* clear_bit() is atomic and implies release semantics before the memory
* operation. It can be used for an unlock.
*/
-static inline void clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
+static inline void clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
clear_bit(nr, addr);
}
-static inline void __clear_bit(int nr, volatile unsigned long *addr)
+static inline void __clear_bit(long nr, volatile unsigned long *addr)
{
asm volatile("btr %1,%0" : ADDR : "Ir" (nr));
}
@@ -139,15 +151,12 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr)
* No memory barrier is required here, because x86 cannot reorder stores past
* older loads. Same principle as spin_unlock.
*/
-static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
+static inline void __clear_bit_unlock(long nr, volatile unsigned long *addr)
{
barrier();
__clear_bit(nr, addr);
}
-#define smp_mb__before_clear_bit() barrier()
-#define smp_mb__after_clear_bit() barrier()
-
/**
* __change_bit - Toggle a bit in memory
* @nr: the bit to change
@@ -157,7 +166,7 @@ static inline void __clear_bit_unlock(unsigned nr, volatile unsigned long *addr)
* If it's called on the same region of memory simultaneously, the effect
* may be that only one operation succeeds.
*/
-static inline void __change_bit(int nr, volatile unsigned long *addr)
+static inline void __change_bit(long nr, volatile unsigned long *addr)
{
asm volatile("btc %1,%0" : ADDR : "Ir" (nr));
}
@@ -171,7 +180,7 @@ static inline void __change_bit(int nr, volatile unsigned long *addr)
* Note that @nr may be almost arbitrarily large; this function is not
* restricted to acting on a single-word quantity.
*/
-static inline void change_bit(int nr, volatile unsigned long *addr)
+static inline void change_bit(long nr, volatile unsigned long *addr)
{
if (IS_IMMEDIATE(nr)) {
asm volatile(LOCK_PREFIX "xorb %1,%0"
@@ -192,14 +201,9 @@ static inline void change_bit(int nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
+static inline int test_and_set_bit(long nr, volatile unsigned long *addr)
{
- int oldbit;
-
- asm volatile(LOCK_PREFIX "bts %2,%1\n\t"
- "sbb %0,%0" : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
-
- return oldbit;
+ GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c");
}
/**
@@ -210,7 +214,7 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
* This is the same as test_and_set_bit on x86.
*/
static __always_inline int
-test_and_set_bit_lock(int nr, volatile unsigned long *addr)
+test_and_set_bit_lock(long nr, volatile unsigned long *addr)
{
return test_and_set_bit(nr, addr);
}
@@ -224,7 +228,7 @@ test_and_set_bit_lock(int nr, volatile unsigned long *addr)
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
*/
-static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
+static inline int __test_and_set_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
@@ -243,15 +247,9 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
+static inline int test_and_clear_bit(long nr, volatile unsigned long *addr)
{
- int oldbit;
-
- asm volatile(LOCK_PREFIX "btr %2,%1\n\t"
- "sbb %0,%0"
- : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
-
- return oldbit;
+ GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c");
}
/**
@@ -262,8 +260,15 @@ static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
* This operation is non-atomic and can be reordered.
* If two examples of this operation race, one can appear to succeed
* but actually fail. You must protect multiple accesses with a lock.
+ *
+ * Note: the operation is performed atomically with respect to
+ * the local CPU, but not other CPUs. Portable code should not
+ * rely on this behaviour.
+ * KVM relies on this behaviour on x86 for modifying memory that is also
+ * accessed from a hypervisor on the same CPU if running in a VM: don't change
+ * this without also updating arch/x86/kernel/kvm.c
*/
-static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
+static inline int __test_and_clear_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
@@ -275,7 +280,7 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
}
/* WARNING: non atomic and it can be reordered! */
-static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
+static inline int __test_and_change_bit(long nr, volatile unsigned long *addr)
{
int oldbit;
@@ -295,24 +300,18 @@ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
+static inline int test_and_change_bit(long nr, volatile unsigned long *addr)
{
- int oldbit;
-
- asm volatile(LOCK_PREFIX "btc %2,%1\n\t"
- "sbb %0,%0"
- : "=r" (oldbit), ADDR : "Ir" (nr) : "memory");
-
- return oldbit;
+ GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c");
}
-static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
+static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr)
{
- return ((1UL << (nr % BITS_PER_LONG)) &
- (addr[nr / BITS_PER_LONG])) != 0;
+ return ((1UL << (nr & (BITS_PER_LONG-1))) &
+ (addr[nr >> _BITOPS_LONG_SHIFT])) != 0;
}
-static inline int variable_test_bit(int nr, volatile const unsigned long *addr)
+static inline int variable_test_bit(long nr, volatile const unsigned long *addr)
{
int oldbit;
@@ -346,7 +345,7 @@ static int test_bit(int nr, const volatile unsigned long *addr);
*/
static inline unsigned long __ffs(unsigned long word)
{
- asm("bsf %1,%0"
+ asm("rep; bsf %1,%0"
: "=r" (word)
: "rm" (word));
return word;
@@ -360,7 +359,7 @@ static inline unsigned long __ffs(unsigned long word)
*/
static inline unsigned long ffz(unsigned long word)
{
- asm("bsf %1,%0"
+ asm("rep; bsf %1,%0"
: "=r" (word)
: "r" (~word));
return word;
@@ -380,6 +379,8 @@ static inline unsigned long __fls(unsigned long word)
return word;
}
+#undef ADDR
+
#ifdef __KERNEL__
/**
* ffs - find first set bit in word
@@ -395,10 +396,24 @@ static inline unsigned long __fls(unsigned long word)
static inline int ffs(int x)
{
int r;
-#ifdef CONFIG_X86_CMOV
+
+#ifdef CONFIG_X86_64
+ /*
+ * AMD64 says BSFL won't clobber the dest reg if x==0; Intel64 says the
+ * dest reg is undefined if x==0, but their CPU architect says its
+ * value is written to set it to the same as before, except that the
+ * top 32 bits will be cleared.
+ *
+ * We cannot do this on 32 bits because at the very least some
+ * 486 CPUs did not behave this way.
+ */
+ asm("bsfl %1,%0"
+ : "=r" (r)
+ : "rm" (x), "0" (-1));
+#elif defined(CONFIG_X86_CMOV)
asm("bsfl %1,%0\n\t"
"cmovzl %2,%0"
- : "=r" (r) : "rm" (x), "r" (-1));
+ : "=&r" (r) : "rm" (x), "r" (-1));
#else
asm("bsfl %1,%0\n\t"
"jnz 1f\n\t"
@@ -422,7 +437,21 @@ static inline int ffs(int x)
static inline int fls(int x)
{
int r;
-#ifdef CONFIG_X86_CMOV
+
+#ifdef CONFIG_X86_64
+ /*
+ * AMD64 says BSRL won't clobber the dest reg if x==0; Intel64 says the
+ * dest reg is undefined if x==0, but their CPU architect says its
+ * value is written to set it to the same as before, except that the
+ * top 32 bits will be cleared.
+ *
+ * We cannot do this on 32 bits because at the very least some
+ * 486 CPUs did not behave this way.
+ */
+ asm("bsrl %1,%0"
+ : "=r" (r)
+ : "rm" (x), "0" (-1));
+#elif defined(CONFIG_X86_CMOV)
asm("bsrl %1,%0\n\t"
"cmovzl %2,%0"
: "=&r" (r) : "rm" (x), "rm" (-1));
@@ -434,11 +463,35 @@ static inline int fls(int x)
#endif
return r + 1;
}
-#endif /* __KERNEL__ */
-
-#undef ADDR
-#ifdef __KERNEL__
+/**
+ * fls64 - find last set bit in a 64-bit word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffsll, but returns the position of the most significant set bit.
+ *
+ * fls64(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 64.
+ */
+#ifdef CONFIG_X86_64
+static __always_inline int fls64(__u64 x)
+{
+ int bitpos = -1;
+ /*
+ * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
+ * dest reg is undefined if x==0, but their CPU architect says its
+ * value is written to set it to the same as before.
+ */
+ asm("bsrq %1,%q0"
+ : "+r" (bitpos)
+ : "rm" (x));
+ return bitpos + 1;
+}
+#else
+#include <asm-generic/bitops/fls64.h>
+#endif
#include <asm-generic/bitops/find.h>
@@ -450,20 +503,9 @@ static inline int fls(int x)
#include <asm-generic/bitops/const_hweight.h>
-#endif /* __KERNEL__ */
-
-#include <asm-generic/bitops/fls64.h>
-
-#ifdef __KERNEL__
-
-#include <asm-generic/bitops/ext2-non-atomic.h>
-
-#define ext2_set_bit_atomic(lock, nr, addr) \
- test_and_set_bit((nr), (unsigned long *)(addr))
-#define ext2_clear_bit_atomic(lock, nr, addr) \
- test_and_clear_bit((nr), (unsigned long *)(addr))
+#include <asm-generic/bitops/le.h>
-#include <asm-generic/bitops/minix.h>
+#include <asm-generic/bitops/ext2-atomic-setbit.h>
#endif /* __KERNEL__ */
#endif /* _ASM_X86_BITOPS_H */