summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew W. Nosenko <andrew.w.nosenko@gmail.com>2012-04-27 18:39:13 +0300
committerMartin Sustrik <sustrik@250bpm.com>2012-04-29 07:30:48 +0200
commit13abf5c351cd989aafa3dda261282732e8c38a83 (patch)
tree63eda5979b55edfdc5977946333da6c732ae08e1
parent8aafb03dee4520ea62cd0cc0c78a9b958ec5ae18 (diff)
atomic: prever inline asm over GCC builtins for armv7a
As per Patrick Trantham comment http://groups.crossroads.io/r/topic/510xaadGgWVcXFdATnhAuN the GCC __sync_*() builtins are more generic but slower than specialized hand-written assembly for armv7a case. Therefore, the code is reordered for prefer inline assembly for armv7a over GCC builtins. * src/atomic_counter.hpp: (atomic_counter_t::add): (atomic_counter_t::sub): * src/atomic_ptr.hpp: (atomic_ptr_t::xchg): (atomic_ptr_t::cas): Prefer armv7a specific inline asm over GCC __sync_*() builtins.
-rw-r--r--src/atomic_counter.hpp50
-rw-r--r--src/atomic_ptr.hpp58
2 files changed, 54 insertions, 54 deletions
diff --git a/src/atomic_counter.hpp b/src/atomic_counter.hpp
index 32395d9..156eebf 100644
--- a/src/atomic_counter.hpp
+++ b/src/atomic_counter.hpp
@@ -69,17 +69,7 @@ namespace xs
{
integer_t old_value;
-#if defined(XS_ATOMIC_GCC_SYNC)
- old_value = __sync_fetch_and_add (&value, increment_);
-
-#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
- __asm__ volatile (
- "lock; xadd %0, %1 \n\t"
- : "=r" (old_value), "=m" (value)
- : "0" (increment_), "m" (value)
- : "cc", "memory");
-
-#elif (defined(__GNUC__) && defined(__ARM_ARCH_7A__))
+#if (defined(__GNUC__) && defined(__ARM_ARCH_7A__))
integer_t flag, tmp;
__asm__ volatile (
" dmb sy\n\t"
@@ -93,6 +83,16 @@ namespace xs
: "Ir"(increment_), "r"(&value)
: "cc");
+#elif defined(XS_ATOMIC_GCC_SYNC)
+ old_value = __sync_fetch_and_add (&value, increment_);
+
+#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
+ __asm__ volatile (
+ "lock; xadd %0, %1 \n\t"
+ : "=r" (old_value), "=m" (value)
+ : "0" (increment_), "m" (value)
+ : "cc", "memory");
+
#elif defined(XS_ATOMIC_SOLARIS)
integer_t new_value = atomic_add_32_nv (&value, increment_);
old_value = new_value - increment_;
@@ -112,20 +112,7 @@ namespace xs
// Atomic subtraction. Returns false if the counter drops to zero.
inline bool sub (integer_t decrement)
{
-#if defined(XS_ATOMIC_GCC_SYNC)
- integer_t new_value = __sync_sub_and_fetch (&value, decrement);
- return (new_value != 0);
-
-#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
- integer_t oldval = -decrement;
- volatile integer_t *val = &value;
- __asm__ volatile ("lock; xaddl %0,%1"
- : "=r" (oldval), "=m" (*val)
- : "0" (oldval), "m" (*val)
- : "cc", "memory");
- return oldval != decrement;
-
-#elif (defined(__GNUC__) && defined(__ARM_ARCH_7A__))
+#if (defined(__GNUC__) && defined(__ARM_ARCH_7A__))
integer_t old_value, flag, tmp;
__asm__ volatile (
" dmb sy\n\t"
@@ -140,6 +127,19 @@ namespace xs
: "cc");
return old_value - decrement != 0;
+#elif defined(XS_ATOMIC_GCC_SYNC)
+ integer_t new_value = __sync_sub_and_fetch (&value, decrement);
+ return (new_value != 0);
+
+#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
+ integer_t oldval = -decrement;
+ volatile integer_t *val = &value;
+ __asm__ volatile ("lock; xaddl %0,%1"
+ : "=r" (oldval), "=m" (*val)
+ : "0" (oldval), "m" (*val)
+ : "cc", "memory");
+ return oldval != decrement;
+
#elif defined(XS_ATOMIC_SOLARIS)
int32_t delta = - ((int32_t) decrement);
integer_t nv = atomic_add_32_nv (&value, delta);
diff --git a/src/atomic_ptr.hpp b/src/atomic_ptr.hpp
index 08418d3..adf1c87 100644
--- a/src/atomic_ptr.hpp
+++ b/src/atomic_ptr.hpp
@@ -70,7 +70,22 @@ namespace xs
// to the 'val' value. Old value is returned.
inline T *xchg (T *val_)
{
-#if defined(XS_ATOMIC_GCC_SYNC)
+#if (defined(__GNUC__) && defined(__ARM_ARCH_7A__))
+ T* old;
+ unsigned int flag;
+ __asm__ volatile (
+ " dmb sy\n\t"
+ "1: ldrex %1, [%3]\n\t"
+ " strex %0, %4, [%3]\n\t"
+ " teq %0, #0\n\t"
+ " bne 1b\n\t"
+ " dmb sy\n\t"
+ : "=&r"(flag), "=&r"(old), "+Qo"(ptr)
+ : "r"(&ptr), "r"(val_)
+ : "cc");
+ return old;
+
+#elif defined(XS_ATOMIC_GCC_SYNC)
{
T* ov;
do
@@ -88,21 +103,6 @@ namespace xs
: "m" (ptr), "0" (val_));
return old;
-#elif (defined(__GNUC__) && defined(__ARM_ARCH_7A__))
- T* old;
- unsigned int flag;
- __asm__ volatile (
- " dmb sy\n\t"
- "1: ldrex %1, [%3]\n\t"
- " strex %0, %4, [%3]\n\t"
- " teq %0, #0\n\t"
- " bne 1b\n\t"
- " dmb sy\n\t"
- : "=&r"(flag), "=&r"(old), "+Qo"(ptr)
- : "r"(&ptr), "r"(val_)
- : "cc");
- return old;
-
#elif defined(XS_ATOMIC_SOLARIS)
return (T*) atomic_swap_ptr (&ptr, val_);
@@ -124,19 +124,7 @@ namespace xs
// is returned.
inline T *cas (T *cmp_, T *val_)
{
-#if defined(XS_ATOMIC_GCC_SYNC)
- return (T*) __sync_val_compare_and_swap (&ptr, cmp_, val_);
-
-#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
- T *old;
- __asm__ volatile (
- "lock; cmpxchg %2, %3"
- : "=a" (old), "=m" (ptr)
- : "r" (val_), "m" (ptr), "0" (cmp_)
- : "cc");
- return old;
-
-#elif (defined(__GNUC__) && defined(__ARM_ARCH_7A__))
+#if (defined(__GNUC__) && defined(__ARM_ARCH_7A__))
T *old;
unsigned int flag;
__asm__ volatile (
@@ -153,6 +141,18 @@ namespace xs
: "cc");
return old;
+#elif defined(XS_ATOMIC_GCC_SYNC)
+ return (T*) __sync_val_compare_and_swap (&ptr, cmp_, val_);
+
+#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
+ T *old;
+ __asm__ volatile (
+ "lock; cmpxchg %2, %3"
+ : "=a" (old), "=m" (ptr)
+ : "r" (val_), "m" (ptr), "0" (cmp_)
+ : "cc");
+ return old;
+
#elif defined(XS_ATOMIC_SOLARIS)
return (T*) atomic_cas_ptr (&ptr, cmp_, val_);