[DynInst_API:] [PATCH] Fix infinite loops in x86/x86_64 tc_lock_lock


Date: Wed, 23 Oct 2013 23:16:00 -0700
From: Josh Stone <jistone@xxxxxxxxxx>
Subject: [DynInst_API:] [PATCH] Fix infinite loops in x86/x86_64 tc_lock_lock
With -fvisibility=hidden, gcc gets too smart and inlines atomic_set into
tc_lock_lock.  The optimizer can't tell that the asm in atomic_set is
changing anything, because its input is a fixed pointer, not the
volatile value it points too.  So the x86_64 lock loop looks like this:

  34:   48 c7 c0 01 00 00 00    mov    $0x1,%rax
  3b:   48 89 d9                mov    %rbx,%rcx
  3e:   f0 48 87 01             lock xchg %rax,(%rcx)
  42:   48 89 c2                mov    %rax,%rdx
  45:   0f 1f 00                nopl   (%rax)
  48:   48 85 d2                test   %rdx,%rdx
  4b:   75 fb                   jne    48 <tc_lock_lock+0x28>

That is, on failure 4b jumps back to 48 forever.  It's also not really
correct to be using a 64-bit xchg, since the memory value is just int.
The 32-bit x86 version gets a similar loop with the xchg lifted out.

This patch greatly simplifies the asm, with its input "+m"(*val) ensuring
that gcc knows we're using the volatile value, and a broad "memory"
constraint so the lock can protect other data too.  That loop is now:

  11:   b9 01 00 00 00          mov    $0x1,%ecx
  16:   66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
  1d:   00 00 00
  20:   89 ca                   mov    %ecx,%edx
  22:   87 13                   xchg   %edx,(%rbx)
  24:   85 d2                   test   %edx,%edx
  26:   75 f8                   jne    20 <tc_lock_lock+0x20>

Signed-off-by: Josh Stone <jistone@xxxxxxxxxx>
---
 dyninstAPI_RT/src/RTthread-x86-64.c | 67 +++++--------------------------------
 dyninstAPI_RT/src/RTthread-x86.c    | 31 ++++++-----------
 2 files changed, 19 insertions(+), 79 deletions(-)

diff --git a/dyninstAPI_RT/src/RTthread-x86-64.c b/dyninstAPI_RT/src/RTthread-x86-64.c
index ce1be36..c5ab2cb 100644
--- a/dyninstAPI_RT/src/RTthread-x86-64.c
+++ b/dyninstAPI_RT/src/RTthread-x86-64.c
@@ -30,60 +30,15 @@
 
 #include "dyninstAPI_RT/src/RTthread.h"
 
-long atomic_set(volatile int *val)
+static int atomic_set(volatile int *val)
 {
-   long result = 0;
-#if defined(MUTATEE_32)
-   __asm("movl $1,%%eax\n"
-         "movl %1,%%ecx\n"
-         "lock\n"
-         "xchgl %%eax, (%%ecx)\n"
-         "movl %%eax, %0\n"
-         : "=r" (result)
-         : "r" (val)
-         : "%eax",
-           "%ecx");
-#else
-   __asm("mov $1,%%rax\n"
-         "mov %1,%%rcx\n"
-         "lock\n"
-         "xchg %%rax, (%%rcx)\n"
-         "mov %%rax, %0\n"
-         : "=r" (result)
-         : "r" (val)
-         : "%rax",
-           "%rcx");
-#endif
+   int result = 1;
+   __asm__ __volatile__(
+           "xchgl %0, %1\n"
+           : "+r" (result), "+m" (*val)
+           :: "memory");
    return !result;
 }
-/*
-#if 1
-   __asm(
-         "movl $0,%%eax\n"
-         "movl $1,%%ebx\n"
-         "movl %1,%%ecx\n"
-         "lock\n"
-         "cmpxchgl %%ebx,(%%ecx)\n"
-         "setz %%al\n"
-         "movl %%eax,%0\n"
-         : "=r" (result)
-         : "r" (val)
-         : "%eax", "%ebx", "%ecx");
-#else
-      __asm(
-            "mov $0,%%rax\n"
-            "mov $1,%%rbx\n"
-            "mov %1,%%rcx\n"
-            "lock\n"
-            "cmpxchg %%rbx,(%%rcx)\n"
-            "setz %%al\n"
-            "mov %%rax,%0\n"
-            : "=r" (result)
-            : "r" (val)
-            : "%rax", "%rbx", "%rcx");
-#endif
-      return result;
-*/
 
 int tc_lock_lock(tc_lock_t *tc)
 {
@@ -93,13 +48,9 @@ int tc_lock_lock(tc_lock_t *tc)
    if (me == tc->tid)
       return DYNINST_DEAD_LOCK;
 
-   while (1) {
-      int wasNotLocked = atomic_set(&tc->mutex);
-      if( wasNotLocked ) {
-          tc->tid = me;
-          break;
-      }
-   }
+   while (!atomic_set(&tc->mutex));
+
+   tc->tid = me;
    return 0;
 }
 
diff --git a/dyninstAPI_RT/src/RTthread-x86.c b/dyninstAPI_RT/src/RTthread-x86.c
index a4e8c9b..21e31ba 100644
--- a/dyninstAPI_RT/src/RTthread-x86.c
+++ b/dyninstAPI_RT/src/RTthread-x86.c
@@ -57,21 +57,14 @@ int atomic_set(volatile int *val)
    return result;
 }
 #else
-int atomic_set(volatile int *val)
+static int atomic_set(volatile int *val)
 {
-   int result;
-   __asm(
-      "movl $0,%%eax\n"
-      "movl $1,%%edx\n"
-      "movl %1,%%ecx\n"
-      "lock\n"
-      "cmpxchgl %%edx,(%%ecx)\n"
-      "setz %%al\n"
-      "movl %%eax,%0\n"
-      : "=r" (result)
-      : "r" (val)
-      : "%eax", "%edx", "%ecx");
-   return result;
+   int result = 1;
+   __asm__ __volatile__(
+           "xchgl %0, %1\n"
+           : "+r" (result), "+m" (*val)
+           :: "memory");
+   return !result;
 }
 #endif
 
@@ -83,13 +76,9 @@ int tc_lock_lock(tc_lock_t *tc)
    if (me == tc->tid)
       return DYNINST_DEAD_LOCK;
 
-   while (1) {
-      if (tc->mutex == 0 && atomic_set(&tc->mutex))
-      {
-         tc->tid = me;
-         break;
-      }
-   }
+   while (!atomic_set(&tc->mutex));
+
+   tc->tid = me;
    return 0;
 }
 
-- 
1.8.3.1

[← Prev in Thread] Current Thread [Next in Thread→]
  • [DynInst_API:] [PATCH] Fix infinite loops in x86/x86_64 tc_lock_lock, Josh Stone <=