From: Anton Blanchard Date: Tue, 6 Sep 2005 03:05:58 +0000 (+1000) Subject: [PATCH] ppc64: speedup cmpxchg X-Git-Tag: v2.6.14-rc1~747^2~13 X-Git-Url: http://pilppa.com/gitweb/?a=commitdiff_plain;h=b2c0ab17ba751abe13a28508b1ac7e9ca074cd87;p=linux-2.6-omap-h63xx.git [PATCH] ppc64: speedup cmpxchg cmpxchg has the following code: __typeof__(*(ptr)) _o_ = (o); __typeof__(*(ptr)) _n_ = (n); Unfortunately it makes gcc 4.0 store and load the variables to the stack. Eg in atomic_dec_and_test we get: stw r10,112(r1) stw r9,116(r1) lwz r9,112(r1) lwz r0,116(r1) x86 is just casting the values so do that instead. Also change __xchg* and __cmpxchg* to take unsigned values, removing a few sign extensions. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras --- diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h index b9e1835351e..c0396428cc3 100644 --- a/include/asm-ppc64/system.h +++ b/include/asm-ppc64/system.h @@ -158,7 +158,7 @@ static inline int __is_processor(unsigned long pv) * is more like most of the other architectures. */ static __inline__ unsigned long -__xchg_u32(volatile int *m, unsigned long val) +__xchg_u32(volatile unsigned int *m, unsigned long val) { unsigned long dummy; @@ -200,7 +200,7 @@ __xchg_u64(volatile long *m, unsigned long val) extern void __xchg_called_with_bad_pointer(void); static __inline__ unsigned long -__xchg(volatile void *ptr, unsigned long x, int size) +__xchg(volatile void *ptr, unsigned long x, unsigned int size) { switch (size) { case 4: @@ -223,7 +223,7 @@ __xchg(volatile void *ptr, unsigned long x, int size) #define __HAVE_ARCH_CMPXCHG 1 static __inline__ unsigned long -__cmpxchg_u32(volatile int *p, int old, int new) +__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) { unsigned int prev; @@ -271,7 +271,8 @@ __cmpxchg_u64(volatile long *p, unsigned long old, unsigned long new) extern void __cmpxchg_called_with_bad_pointer(void); static __inline__ unsigned long -__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) +__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, + unsigned int size) { switch (size) { case 4: @@ -283,13 +284,9 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) return old; } -#define cmpxchg(ptr,o,n) \ - ({ \ - __typeof__(*(ptr)) _o_ = (o); \ - __typeof__(*(ptr)) _n_ = (n); \ - (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \ - (unsigned long)_n_, sizeof(*(ptr))); \ - }) +#define cmpxchg(ptr,o,n)\ + ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\ + (unsigned long)(n),sizeof(*(ptr)))) /* * We handle most unaligned accesses in hardware. On the other hand