From 13abf5c351cd989aafa3dda261282732e8c38a83 Mon Sep 17 00:00:00 2001 From: "Andrew W. Nosenko" Date: Fri, 27 Apr 2012 18:39:13 +0300 Subject: atomic: prever inline asm over GCC builtins for armv7a As per Patrick Trantham comment http://groups.crossroads.io/r/topic/510xaadGgWVcXFdATnhAuN the GCC __sync_*() builtins are more generic but slower than specialized hand-written assembly for armv7a case. Therefore, the code is reordered for prefer inline assembly for armv7a over GCC builtins. * src/atomic_counter.hpp: (atomic_counter_t::add): (atomic_counter_t::sub): * src/atomic_ptr.hpp: (atomic_ptr_t::xchg): (atomic_ptr_t::cas): Prefer armv7a specific inline asm over GCC __sync_*() builtins. --- src/atomic_counter.hpp | 50 +++++++++++++++++++++---------------------- src/atomic_ptr.hpp | 58 +++++++++++++++++++++++++------------------------- 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/src/atomic_counter.hpp b/src/atomic_counter.hpp index 32395d9..156eebf 100644 --- a/src/atomic_counter.hpp +++ b/src/atomic_counter.hpp @@ -69,17 +69,7 @@ namespace xs { integer_t old_value; -#if defined(XS_ATOMIC_GCC_SYNC) - old_value = __sync_fetch_and_add (&value, increment_); - -#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) - __asm__ volatile ( - "lock; xadd %0, %1 \n\t" - : "=r" (old_value), "=m" (value) - : "0" (increment_), "m" (value) - : "cc", "memory"); - -#elif (defined(__GNUC__) && defined(__ARM_ARCH_7A__)) +#if (defined(__GNUC__) && defined(__ARM_ARCH_7A__)) integer_t flag, tmp; __asm__ volatile ( " dmb sy\n\t" @@ -93,6 +83,16 @@ namespace xs : "Ir"(increment_), "r"(&value) : "cc"); +#elif defined(XS_ATOMIC_GCC_SYNC) + old_value = __sync_fetch_and_add (&value, increment_); + +#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) + __asm__ volatile ( + "lock; xadd %0, %1 \n\t" + : "=r" (old_value), "=m" (value) + : "0" (increment_), "m" (value) + : "cc", "memory"); + #elif defined(XS_ATOMIC_SOLARIS) integer_t new_value = atomic_add_32_nv (&value, increment_); old_value = new_value - increment_; @@ -112,20 +112,7 @@ namespace xs // Atomic subtraction. Returns false if the counter drops to zero. inline bool sub (integer_t decrement) { -#if defined(XS_ATOMIC_GCC_SYNC) - integer_t new_value = __sync_sub_and_fetch (&value, decrement); - return (new_value != 0); - -#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) - integer_t oldval = -decrement; - volatile integer_t *val = &value; - __asm__ volatile ("lock; xaddl %0,%1" - : "=r" (oldval), "=m" (*val) - : "0" (oldval), "m" (*val) - : "cc", "memory"); - return oldval != decrement; - -#elif (defined(__GNUC__) && defined(__ARM_ARCH_7A__)) +#if (defined(__GNUC__) && defined(__ARM_ARCH_7A__)) integer_t old_value, flag, tmp; __asm__ volatile ( " dmb sy\n\t" @@ -140,6 +127,19 @@ namespace xs : "cc"); return old_value - decrement != 0; +#elif defined(XS_ATOMIC_GCC_SYNC) + integer_t new_value = __sync_sub_and_fetch (&value, decrement); + return (new_value != 0); + +#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) + integer_t oldval = -decrement; + volatile integer_t *val = &value; + __asm__ volatile ("lock; xaddl %0,%1" + : "=r" (oldval), "=m" (*val) + : "0" (oldval), "m" (*val) + : "cc", "memory"); + return oldval != decrement; + #elif defined(XS_ATOMIC_SOLARIS) int32_t delta = - ((int32_t) decrement); integer_t nv = atomic_add_32_nv (&value, delta); diff --git a/src/atomic_ptr.hpp b/src/atomic_ptr.hpp index 08418d3..adf1c87 100644 --- a/src/atomic_ptr.hpp +++ b/src/atomic_ptr.hpp @@ -70,7 +70,22 @@ namespace xs // to the 'val' value. Old value is returned. inline T *xchg (T *val_) { -#if defined(XS_ATOMIC_GCC_SYNC) +#if (defined(__GNUC__) && defined(__ARM_ARCH_7A__)) + T* old; + unsigned int flag; + __asm__ volatile ( + " dmb sy\n\t" + "1: ldrex %1, [%3]\n\t" + " strex %0, %4, [%3]\n\t" + " teq %0, #0\n\t" + " bne 1b\n\t" + " dmb sy\n\t" + : "=&r"(flag), "=&r"(old), "+Qo"(ptr) + : "r"(&ptr), "r"(val_) + : "cc"); + return old; + +#elif defined(XS_ATOMIC_GCC_SYNC) { T* ov; do @@ -88,21 +103,6 @@ namespace xs : "m" (ptr), "0" (val_)); return old; -#elif (defined(__GNUC__) && defined(__ARM_ARCH_7A__)) - T* old; - unsigned int flag; - __asm__ volatile ( - " dmb sy\n\t" - "1: ldrex %1, [%3]\n\t" - " strex %0, %4, [%3]\n\t" - " teq %0, #0\n\t" - " bne 1b\n\t" - " dmb sy\n\t" - : "=&r"(flag), "=&r"(old), "+Qo"(ptr) - : "r"(&ptr), "r"(val_) - : "cc"); - return old; - #elif defined(XS_ATOMIC_SOLARIS) return (T*) atomic_swap_ptr (&ptr, val_); @@ -124,19 +124,7 @@ namespace xs // is returned. inline T *cas (T *cmp_, T *val_) { -#if defined(XS_ATOMIC_GCC_SYNC) - return (T*) __sync_val_compare_and_swap (&ptr, cmp_, val_); - -#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) - T *old; - __asm__ volatile ( - "lock; cmpxchg %2, %3" - : "=a" (old), "=m" (ptr) - : "r" (val_), "m" (ptr), "0" (cmp_) - : "cc"); - return old; - -#elif (defined(__GNUC__) && defined(__ARM_ARCH_7A__)) +#if (defined(__GNUC__) && defined(__ARM_ARCH_7A__)) T *old; unsigned int flag; __asm__ volatile ( @@ -153,6 +141,18 @@ namespace xs : "cc"); return old; +#elif defined(XS_ATOMIC_GCC_SYNC) + return (T*) __sync_val_compare_and_swap (&ptr, cmp_, val_); + +#elif (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) + T *old; + __asm__ volatile ( + "lock; cmpxchg %2, %3" + : "=a" (old), "=m" (ptr) + : "r" (val_), "m" (ptr), "0" (cmp_) + : "cc"); + return old; + #elif defined(XS_ATOMIC_SOLARIS) return (T*) atomic_cas_ptr (&ptr, cmp_, val_); -- cgit v1.2.3