With -fvisibility=hidden, gcc gets too smart and inlines atomic_set into
tc_lock_lock. The optimizer can't tell that the asm in atomic_set is
changing anything, because its input is a fixed pointer, not the
volatile value it points too. So the x86_64 lock loop looks like this:
34: 48 c7 c0 01 00 00 00 mov $0x1,%rax
3b: 48 89 d9 mov %rbx,%rcx
3e: f0 48 87 01 lock xchg %rax,(%rcx)
42: 48 89 c2 mov %rax,%rdx
45: 0f 1f 00 nopl (%rax)
48: 48 85 d2 test %rdx,%rdx
4b: 75 fb jne 48 <tc_lock_lock+0x28>
That is, on failure 4b jumps back to 48 forever. It's also not really
correct to be using a 64-bit xchg, since the memory value is just int.
The 32-bit x86 version gets a similar loop with the xchg lifted out.
This patch greatly simplifies the asm, with its input "+m"(*val) ensuring
that gcc knows we're using the volatile value, and a broad "memory"
constraint so the lock can protect other data too. That loop is now:
11: b9 01 00 00 00 mov $0x1,%ecx
16: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
1d: 00 00 00
20: 89 ca mov %ecx,%edx
22: 87 13 xchg %edx,(%rbx)
24: 85 d2 test %edx,%edx
26: 75 f8 jne 20 <tc_lock_lock+0x20>
Signed-off-by: Josh Stone <jistone@xxxxxxxxxx>
---
dyninstAPI_RT/src/RTthread-x86-64.c | 67 +++++--------------------------------
dyninstAPI_RT/src/RTthread-x86.c | 31 ++++++-----------
2 files changed, 19 insertions(+), 79 deletions(-)
diff --git a/dyninstAPI_RT/src/RTthread-x86-64.c b/dyninstAPI_RT/src/RTthread-x86-64.c
index ce1be36..c5ab2cb 100644
--- a/dyninstAPI_RT/src/RTthread-x86-64.c
+++ b/dyninstAPI_RT/src/RTthread-x86-64.c
@@ -30,60 +30,15 @@
#include "dyninstAPI_RT/src/RTthread.h"
-long atomic_set(volatile int *val)
+static int atomic_set(volatile int *val)
{
- long result = 0;
-#if defined(MUTATEE_32)
- __asm("movl $1,%%eax\n"
- "movl %1,%%ecx\n"
- "lock\n"
- "xchgl %%eax, (%%ecx)\n"
- "movl %%eax, %0\n"
- : "=r" (result)
- : "r" (val)
- : "%eax",
- "%ecx");
-#else
- __asm("mov $1,%%rax\n"
- "mov %1,%%rcx\n"
- "lock\n"
- "xchg %%rax, (%%rcx)\n"
- "mov %%rax, %0\n"
- : "=r" (result)
- : "r" (val)
- : "%rax",
- "%rcx");
-#endif
+ int result = 1;
+ __asm__ __volatile__(
+ "xchgl %0, %1\n"
+ : "+r" (result), "+m" (*val)
+ :: "memory");
return !result;
}
-/*
-#if 1
- __asm(
- "movl $0,%%eax\n"
- "movl $1,%%ebx\n"
- "movl %1,%%ecx\n"
- "lock\n"
- "cmpxchgl %%ebx,(%%ecx)\n"
- "setz %%al\n"
- "movl %%eax,%0\n"
- : "=r" (result)
- : "r" (val)
- : "%eax", "%ebx", "%ecx");
-#else
- __asm(
- "mov $0,%%rax\n"
- "mov $1,%%rbx\n"
- "mov %1,%%rcx\n"
- "lock\n"
- "cmpxchg %%rbx,(%%rcx)\n"
- "setz %%al\n"
- "mov %%rax,%0\n"
- : "=r" (result)
- : "r" (val)
- : "%rax", "%rbx", "%rcx");
-#endif
- return result;
-*/
int tc_lock_lock(tc_lock_t *tc)
{
@@ -93,13 +48,9 @@ int tc_lock_lock(tc_lock_t *tc)
if (me == tc->tid)
return DYNINST_DEAD_LOCK;
- while (1) {
- int wasNotLocked = atomic_set(&tc->mutex);
- if( wasNotLocked ) {
- tc->tid = me;
- break;
- }
- }
+ while (!atomic_set(&tc->mutex));
+
+ tc->tid = me;
return 0;
}
diff --git a/dyninstAPI_RT/src/RTthread-x86.c b/dyninstAPI_RT/src/RTthread-x86.c
index a4e8c9b..21e31ba 100644
--- a/dyninstAPI_RT/src/RTthread-x86.c
+++ b/dyninstAPI_RT/src/RTthread-x86.c
@@ -57,21 +57,14 @@ int atomic_set(volatile int *val)
return result;
}
#else
-int atomic_set(volatile int *val)
+static int atomic_set(volatile int *val)
{
- int result;
- __asm(
- "movl $0,%%eax\n"
- "movl $1,%%edx\n"
- "movl %1,%%ecx\n"
- "lock\n"
- "cmpxchgl %%edx,(%%ecx)\n"
- "setz %%al\n"
- "movl %%eax,%0\n"
- : "=r" (result)
- : "r" (val)
- : "%eax", "%edx", "%ecx");
- return result;
+ int result = 1;
+ __asm__ __volatile__(
+ "xchgl %0, %1\n"
+ : "+r" (result), "+m" (*val)
+ :: "memory");
+ return !result;
}
#endif
@@ -83,13 +76,9 @@ int tc_lock_lock(tc_lock_t *tc)
if (me == tc->tid)
return DYNINST_DEAD_LOCK;
- while (1) {
- if (tc->mutex == 0 && atomic_set(&tc->mutex))
- {
- tc->tid = me;
- break;
- }
- }
+ while (!atomic_set(&tc->mutex));
+
+ tc->tid = me;
return 0;
}
--
1.8.3.1
|