Re: [Gems-users] Ruby Segmentation Fault


Date: Tue, 17 Feb 2009 19:10:41 +0200
From: Konstantinos Nikas <knikas@xxxxxxxxxxxxxxxxx>
Subject: Re: [Gems-users] Ruby Segmentation Fault
Hi ,

I think I found out now what is happening (although I have no clue if it is "normal"). Thread 0 starts a transaction and I see the following :

41510444 1 [1,0] ISOLATE XACT STORE [0x1f247ec0, line 0x1f247ec0] XACT LEVEL: 1 PC = [0x13228, line 0x13200] 41510444 1 [1,0] LOGGING STORE: [0xff0fbec0, line 0xff0fbec0] 1 PC = [0x13228, line 0x13200] 41510444 1 [1,0] ADD UNDO LOG ENTRY: [0xff0fbec0, line 0xff0fbec0] [0x1f247ec0, line 0x1f247ec0] LogAddress: [0x2d9174, line 0x2d9140] 1

The transaction moves on and some point it needs to abort. The software handler kicks in to unroll the log and undoes the log entries which include the 64 bytes that start at 0xFF0FBEC0.

However, (for some reason), during this invocation of the software handler %fp + 0x44 = 0xFF0FBECC, which is used to store the value needed to access the right threadTransContext structure. When the line is restored in the tm_unroll_log_entry, this value is lost and the software handler saves the new xact_level in the wrong location.

In the previous invocations of the software handler, %fp+0x44 = 0xFF0FBDFC. This means, that the handler stores the new values of xact_level and xact_log_size in the right location as the memory line is not undone and the transaction can be correctly restarted.

Obviously, there shouldn't be any conflicts between the addresses used by the software handler and those included inside a transaction. I have followed all the instructions for preparing the workloads and hopefully I haven't missed anything.

Any workarounds?

Kind regards,

Kostis


Hi,

back again with some more info. Here are parts of the dump of the assembler code for tm_log_unroll

0x0001aedc <tm_log_unroll+0>:    save  %sp, -136, %sp
0x0001aee0 <tm_log_unroll+4>:    mov  %i0, %g5
0x0001aee4 <tm_log_unroll+8>:    st  %g5, [ %fp + 0x44 ]
0x0001aee8 <tm_log_unroll+12>:    b  0x1aef0 <tm_log_unroll+20>
0x0001aeec <tm_log_unroll+16>:    nop
0x0001aef0 <tm_log_unroll+20>:    sethi  %hi(0x30400), %g5
0x0001aef4 <tm_log_unroll+24>: add %g5, 0xa8, %o2 ! 0x304a8 <threadTransContext>
0x0001aef8 <tm_log_unroll+28>:    ld  [ %o2 ], %o3
0x0001aefc <tm_log_unroll+32>:    ld  [ %fp + 0x44 ], %o4
0x0001af00 <tm_log_unroll+36>:    sll  %o4, 2, %o5
0x0001af04 <tm_log_unroll+40>:    ld  [ %o3 + %o5 ], %o4
0x0001af08 <tm_log_unroll+44>:    sethi  %hi(0x1000), %o5
0x0001af0c <tm_log_unroll+48>:    add  %o5, 0x20, %o7    ! 0x1020
0x0001af10 <tm_log_unroll+52>:    add  %o4, %o7, %l0
0x0001af14 <tm_log_unroll+56>:    st  %l0, [ %sp + 0x7c ]
0x0001af18 <tm_log_unroll+60>:    sethi  %hi(0x30400), %l1
0x0001af1c <tm_log_unroll+64>: add %l1, 0xa8, %l2 ! 0x304a8 <threadTransContext>
0x0001af20 <tm_log_unroll+68>:    ld  [ %l2 ], %l3
0x0001af24 <tm_log_unroll+72>:    ld  [ %fp + 0x44 ], %l4
0x0001af28 <tm_log_unroll+76>:    sll  %l4, 2, %l5
0x0001af2c <tm_log_unroll+80>:    ld  [ %l3 + %l5 ], %l4
0x0001af30 <tm_log_unroll+84>:    sethi  %hi(0x1000), %l5
0x0001af34 <tm_log_unroll+88>:    add  %l5, 0x10, %l6    ! 0x1010
0x0001af38 <tm_log_unroll+92>:    ld  [ %l4 + %l6 ], %l5
0x0001af3c <tm_log_unroll+96>:    st  %l5, [ %sp + 0x78 ]
0x0001af40 <tm_log_unroll+100>:    ld  [ %sp + 0x78 ], %l6
0x0001af44 <tm_log_unroll+104>:    srl  %l6, 2, %l7
0x0001af48 <tm_log_unroll+108>:    sll  %l7, 2, %g5
0x0001af4c <tm_log_unroll+112>:    ld  [ %sp + 0x7c ], %o2
0x0001af50 <tm_log_unroll+116>:    add  %o2, %g5, %o3
0x0001af54 <tm_log_unroll+120>:    st  %o3, [ %sp + 0x74 ]
0x0001af58 <tm_log_unroll+124>:    sethi  %hi(0x30400), %o4
0x0001af5c <tm_log_unroll+128>: add %o4, 0xa8, %o5 ! 0x304a8 <threadTransContext>
0x0001af60 <tm_log_unroll+132>:    ld  [ %o5 ], %o7
0x0001af64 <tm_log_unroll+136>:    ld  [ %fp + 0x44 ], %l0
0x0001af68 <tm_log_unroll+140>:    sll  %l0, 2, %l1
0x0001af6c <tm_log_unroll+144>:    ld  [ %o7 + %l1 ], %l0
0x0001af70 <tm_log_unroll+148>:    sethi  %hi(0x1000), %l1
0x0001af74 <tm_log_unroll+152>:    add  %l1, 0x14, %l2    ! 0x1014
0x0001af78 <tm_log_unroll+156>:    ld  [ %l0 + %l2 ], %l1
0x0001af7c <tm_log_unroll+160>:    st  %l1, [ %sp + 0x70 ]
0x0001af80 <tm_log_unroll+164>:    sethi  %hi(0x30400), %l2
0x0001af84 <tm_log_unroll+168>: add %l2, 0xa8, %l3 ! 0x304a8 <threadTransContext>
0x0001af88 <tm_log_unroll+172>:    ld  [ %l3 ], %l4
0x0001af8c <tm_log_unroll+176>:    ld  [ %fp + 0x44 ], %l5
0x0001af90 <tm_log_unroll+180>:    sll  %l5, 2, %l6
0x0001af94 <tm_log_unroll+184>:    ld  [ %l4 + %l6 ], %l5
0x0001af98 <tm_log_unroll+188>:    sethi  %hi(0x1000), %l6
0x0001af9c <tm_log_unroll+192>:    add  %l6, 8, %l7    ! 0x1008
0x0001afa0 <tm_log_unroll+196>:    ld  [ %l5 + %l7 ], %l6
0x0001afa4 <tm_log_unroll+200>:    st  %l6, [ %sp + 0x6c ]
0x0001afa8 <tm_log_unroll+204>:    sethi  %hi(0x30400), %l7
0x0001afac <tm_log_unroll+208>: add %l7, 0xa8, %g5 ! 0x304a8 <threadTransContext>
0x0001afb0 <tm_log_unroll+212>:    ld  [ %g5 ], %o2
0x0001afb4 <tm_log_unroll+216>:    ld  [ %fp + 0x44 ], %o3
0x0001afb8 <tm_log_unroll+220>:    sll  %o3, 2, %o4
0x0001afbc <tm_log_unroll+224>:    ld  [ %o2 + %o4 ], %o3
0x0001afc0 <tm_log_unroll+228>:    sethi  %hi(0x1000), %o4
0x0001afc4 <tm_log_unroll+232>:    ld  [ %o3 + %o4 ], %o5
0x0001afc8 <tm_log_unroll+236>:    st  %o5, [ %sp + 0x80 ]
0x0001afcc <tm_log_unroll+240>:    clr  [ %sp + 0x68 ]
......................................
 **** code for while loop (omitted)  *****
.......................................
0x0001b1bc <tm_log_unroll+736>:    nop
0x0001b1c0 <tm_log_unroll+740>:    b  0x1b1c8 <tm_log_unroll+748>
0x0001b1c4 <tm_log_unroll+744>:    nop
0x0001b1c8 <tm_log_unroll+748>:    ld  [ %sp + 0x70 ], %g5
0x0001b1cc <tm_log_unroll+752>:    cmp  %g5, 0
0x0001b1d0 <tm_log_unroll+756>: bgu,pn %icc, 0x1b1e0 <tm_log_unroll+772>
0x0001b1d4 <tm_log_unroll+760>:    nop
0x0001b1d8 <tm_log_unroll+764>:    b  0x1b200 <tm_log_unroll+804>
0x0001b1dc <tm_log_unroll+768>:    nop
0x0001b1e0 <tm_log_unroll+772>:    ld  [ %sp + 0x70 ], %g5
0x0001b1e4 <tm_log_unroll+776>:    ld  [ %fp + 0x44 ], %o2
0x0001b1e8 <tm_log_unroll+780>:    mov  %g5, %o0
0x0001b1ec <tm_log_unroll+784>:    mov  %o2, %o1
0x0001b1f0 <tm_log_unroll+788>:    call  0x1a960 <randomized_backoff>
0x0001b1f4 <tm_log_unroll+792>:    nop
0x0001b1f8 <tm_log_unroll+796>:    b  0x1b200 <tm_log_unroll+804>
0x0001b1fc <tm_log_unroll+800>:    nop
0x0001b200 <tm_log_unroll+804>:    sethi  %hi(0x8ae000), %g00
0x0001b204 <tm_log_unroll+808>:    sethi  %hi(0x30400), %g5
0x0001b208 <tm_log_unroll+812>: add %g5, 0xa8, %o2 ! 0x304a8 <threadTransContext>
0x0001b20c <tm_log_unroll+816>:    ld  [ %o2 ], %o3
0x0001b210 <tm_log_unroll+820>:    ld  [ %fp + 0x44 ], %o4
0x0001b214 <tm_log_unroll+824>:    sll  %o4, 2, %o5
0x0001b218 <tm_log_unroll+828>:    ld  [ %o3 + %o5 ], %o4
0x0001b21c <tm_log_unroll+832>:    ld  [ %sp + 0x68 ], %o5
0x0001b220 <tm_log_unroll+836>:    sethi  %hi(0x1000), %o7
0x0001b224 <tm_log_unroll+840>:    add  %o7, 4, %l0    ! 0x1004
0x0001b228 <tm_log_unroll+844>:    st  %o5, [ %o4 + %l0 ]
0x0001b22c <tm_log_unroll+848>:    sethi  %hi(0x30400), %l1
0x0001b230 <tm_log_unroll+852>: add %l1, 0xa8, %l2 ! 0x304a8 <threadTransContext>
0x0001b234 <tm_log_unroll+856>:    ld  [ %l2 ], %l3
0x0001b238 <tm_log_unroll+860>:    ld  [ %fp + 0x44 ], %l4
0x0001b23c <tm_log_unroll+864>:    sll  %l4, 2, %l5
0x0001b240 <tm_log_unroll+868>:    ld  [ %l3 + %l5 ], %l4
0x0001b244 <tm_log_unroll+872>:    ld  [ %sp + 0x6c ], %l5
0x0001b248 <tm_log_unroll+876>:    sethi  %hi(0x1000), %l6
0x0001b24c <tm_log_unroll+880>:    add  %l6, 8, %l7    ! 0x1008
0x0001b250 <tm_log_unroll+884>:    st  %l5, [ %l4 + %l7 ]
0x0001b254 <tm_log_unroll+888>:    sethi  %hi(0x30400), %g5
0x0001b258 <tm_log_unroll+892>: add %g5, 0xa8, %o2 ! 0x304a8 <threadTransContext>
0x0001b25c <tm_log_unroll+896>:    ld  [ %o2 ], %o3
0x0001b260 <tm_log_unroll+900>:    ld  [ %fp + 0x44 ], %o4
0x0001b264 <tm_log_unroll+904>:    sll  %o4, 2, %o5
0x0001b268 <tm_log_unroll+908>:    ld  [ %o3 + %o5 ], %o4
0x0001b26c <tm_log_unroll+912>:    ld  [ %sp + 0x78 ], %o5
0x0001b270 <tm_log_unroll+916>:    sethi  %hi(0x1000), %o7
0x0001b274 <tm_log_unroll+920>:    add  %o7, 0x10, %l0    ! 0x1010
0x0001b278 <tm_log_unroll+924>:    st  %o5, [ %o4 + %l0 ]
0x0001b27c <tm_log_unroll+928>:    sethi  %hi(0x8ae000), %g0
0x0001b280 <tm_log_unroll+932>:    b  0x1b288 <tm_log_unroll+940>
0x0001b284 <tm_log_unroll+936>:    nop
0x0001b288 <tm_log_unroll+940>:    ret
0x0001b28c <tm_log_unroll+944>:    restore
End of assembler dump.

As you can see the handler uses mem [%fp + 0x44] to access the threadTransContext and at the start it stores at this location register %g5 which at the moment of execution is 0. Now, the first two times the handler is called everything works fine and the transaction is restarted correctly. However the third time it is called something goes wrong. So I added a breakpoint to monitor read and writes to that address (as I thought that there is a memory race and a stray write from some other cpu destroys the correct values). However, the write happens from the cpu that runs the software hanlder and it happens inside tm_unroll_log_entry (which is called inside the while loop). The dump of tm_unroll_log_entry is the following :

Dump of assembler code for function tm_unroll_log_entry:
0x0001ab6c <tm_unroll_log_entry+0>:     ld  [ %o0 + 0x40 ], %o5
0x0001ab70 <tm_unroll_log_entry+4>:     mov  %o0, %o4
0x0001ab74 <tm_unroll_log_entry+8>:     and  %o5, -64, %o5
0x0001ab78 <tm_unroll_log_entry+12>:    sub  %o5, %o0, %o3
0x0001ab7c <tm_unroll_log_entry+16>:    sra  %o3, 0x1f, %o1
0x0001ab80 <tm_unroll_log_entry+20>:    xor  %o3, %o1, %o2
0x0001ab84 <tm_unroll_log_entry+24>:    sub  %o2, %o1, %o0
0x0001ab88 <tm_unroll_log_entry+28>:    cmp  %o0, 0x40
0x0001ab8c <tm_unroll_log_entry+32>: bl,a,pn %icc, 0x1ac1c <tm_unroll_log_entry+176>
0x0001ab90 <tm_unroll_log_entry+36>:    ld  [ %o4 ], %g5
0x0001ab94 <tm_unroll_log_entry+40>:    ld  [ %o4 ], %o2
0x0001ab98 <tm_unroll_log_entry+44>:    ld  [ %o4 + 4 ], %o1
0x0001ab9c <tm_unroll_log_entry+48>:    ld  [ %o4 + 8 ], %o0
0x0001aba0 <tm_unroll_log_entry+52>:    st  %o2, [ %o5 ]
0x0001aba4 <tm_unroll_log_entry+56>:    st  %o1, [ %o5 + 4 ]
0x0001aba8 <tm_unroll_log_entry+60>:    st  %o0, [ %o5 + 8 ]
0x0001abac <tm_unroll_log_entry+64>:    ld  [ %o4 + 0xc ], %g5
0x0001abb0 <tm_unroll_log_entry+68>:    ld  [ %o4 + 0x10 ], %g1
0x0001abb4 <tm_unroll_log_entry+72>:    ld  [ %o4 + 0x14 ], %o3
0x0001abb8 <tm_unroll_log_entry+76>:    ld  [ %o4 + 0x18 ], %o2
0x0001abbc <tm_unroll_log_entry+80>:    ld  [ %o4 + 0x1c ], %o1
0x0001abc0 <tm_unroll_log_entry+84>:    ld  [ %o4 + 0x20 ], %o0
0x0001abc4 <tm_unroll_log_entry+88>:    st  %g5, [ %o5 + 0xc ]
0x0001abc8 <tm_unroll_log_entry+92>:    st  %g1, [ %o5 + 0x10 ]
0x0001abcc <tm_unroll_log_entry+96>:    st  %o3, [ %o5 + 0x14 ]
0x0001abd0 <tm_unroll_log_entry+100>:   st  %o2, [ %o5 + 0x18 ]
0x0001abd4 <tm_unroll_log_entry+104>:   st  %o1, [ %o5 + 0x1c ]
0x0001abd8 <tm_unroll_log_entry+108>:   st  %o0, [ %o5 + 0x20 ]
0x0001abdc <tm_unroll_log_entry+112>:   ld  [ %o4 + 0x24 ], %g5
0x0001abe0 <tm_unroll_log_entry+116>:   ld  [ %o4 + 0x28 ], %g1
0x0001abe4 <tm_unroll_log_entry+120>:   ld  [ %o4 + 0x2c ], %o3
0x0001abe8 <tm_unroll_log_entry+124>:   ld  [ %o4 + 0x30 ], %o2
0x0001abec <tm_unroll_log_entry+128>:   ld  [ %o4 + 0x34 ], %o1
0x0001abf0 <tm_unroll_log_entry+132>:   ld  [ %o4 + 0x38 ], %o0
0x0001abf4 <tm_unroll_log_entry+136>:   ld  [ %o4 + 0x3c ], %o4
0x0001abf8 <tm_unroll_log_entry+140>:   st  %g5, [ %o5 + 0x24 ]
0x0001abfc <tm_unroll_log_entry+144>:   st  %g1, [ %o5 + 0x28 ]
0x0001ac00 <tm_unroll_log_entry+148>:   st  %o3, [ %o5 + 0x2c ]
0x0001ac04 <tm_unroll_log_entry+152>:   st  %o2, [ %o5 + 0x30 ]
0x0001ac08 <tm_unroll_log_entry+156>:   st  %o1, [ %o5 + 0x34 ]
0x0001ac0c <tm_unroll_log_entry+160>:   st  %o0, [ %o5 + 0x38 ]
0x0001ac10 <tm_unroll_log_entry+164>:   st  %o4, [ %o5 + 0x3c ]
0x0001ac14 <tm_unroll_log_entry+168>:   retl
0x0001ac18 <tm_unroll_log_entry+172>:   nop
0x0001ac1c <tm_unroll_log_entry+176>:   st  %g5, [ %o5 ]
0x0001ac20 <tm_unroll_log_entry+180>:   ld  [ %o4 + 4 ], %g1
0x0001ac24 <tm_unroll_log_entry+184>:   st  %g1, [ %o5 + 4 ]
0x0001ac28 <tm_unroll_log_entry+188>:   ld  [ %o4 + 8 ], %o3
0x0001ac2c <tm_unroll_log_entry+192>:   st  %o3, [ %o5 + 8 ]
0x0001ac30 <tm_unroll_log_entry+196>:   ld  [ %o4 + 0xc ], %o2
0x0001ac34 <tm_unroll_log_entry+200>:   st  %o2, [ %o5 + 0xc ]
0x0001ac38 <tm_unroll_log_entry+204>:   ld  [ %o4 + 0x10 ], %o1
0x0001ac3c <tm_unroll_log_entry+208>:   st  %o1, [ %o5 + 0x10 ]
0x0001ac40 <tm_unroll_log_entry+212>:   ld  [ %o4 + 0x14 ], %o0
0x0001ac44 <tm_unroll_log_entry+216>:   st  %o0, [ %o5 + 0x14 ]
0x0001ac48 <tm_unroll_log_entry+220>:   ld  [ %o4 + 0x18 ], %g5
0x0001ac4c <tm_unroll_log_entry+224>:   st  %g5, [ %o5 + 0x18 ]
0x0001ac50 <tm_unroll_log_entry+228>:   ld  [ %o4 + 0x1c ], %g1
0x0001ac54 <tm_unroll_log_entry+232>:   st  %g1, [ %o5 + 0x1c ]
0x0001ac58 <tm_unroll_log_entry+236>:   ld  [ %o4 + 0x20 ], %o3
0x0001ac5c <tm_unroll_log_entry+240>:   st  %o3, [ %o5 + 0x20 ]
0x0001ac60 <tm_unroll_log_entry+244>:   ld  [ %o4 + 0x24 ], %o2
0x0001ac64 <tm_unroll_log_entry+248>:   st  %o2, [ %o5 + 0x24 ]
0x0001ac68 <tm_unroll_log_entry+252>:   ld  [ %o4 + 0x28 ], %o1
0x0001ac6c <tm_unroll_log_entry+256>:   st  %o1, [ %o5 + 0x28 ]
0x0001ac70 <tm_unroll_log_entry+260>:   ld  [ %o4 + 0x2c ], %o0
0x0001ac74 <tm_unroll_log_entry+264>:   st  %o0, [ %o5 + 0x2c ]
0x0001ac78 <tm_unroll_log_entry+268>:   ld  [ %o4 + 0x30 ], %g5
0x0001ac7c <tm_unroll_log_entry+272>:   st  %g5, [ %o5 + 0x30 ]
0x0001ac80 <tm_unroll_log_entry+276>:   ld  [ %o4 + 0x34 ], %g1
0x0001ac84 <tm_unroll_log_entry+280>:   st  %g1, [ %o5 + 0x34 ]
0x0001ac88 <tm_unroll_log_entry+284>:   ld  [ %o4 + 0x38 ], %o3
0x0001ac8c <tm_unroll_log_entry+288>:   st  %o3, [ %o5 + 0x38 ]
0x0001ac90 <tm_unroll_log_entry+292>:   ld  [ %o4 + 0x3c ], %o4
0x0001ac94 <tm_unroll_log_entry+296>:   retl
0x0001ac98 <tm_unroll_log_entry+300>:   st  %o4, [ %o5 + 0x3c ]
End of assembler dump.

The offending instruction is @ 0x1abc4 : st %g5, [ %o5 + 0xc ] which goes and writes to the memory location the value of 4. So when the handler returns to tm_log_unroll and tries to access [%fp+0x44] in order to save the new xact_level and xact_log_size values it reads the wrong values!

Any suggestions?????


Kind regards,

Kostis

I agree that a race should not occur. Could you try moving the randomized_backoff call to the end of the function so that the threadTransContext structures get updated right after the while loop. You might want to look and see if the simulator is passing the right threadID to the software handlers and if there are any stray simulator log writes from processor 2 to the log of the first thread.

Jayaram
_______________________________________________
Gems-users mailing list
Gems-users@xxxxxxxxxxx
https://lists.cs.wisc.edu/mailman/listinfo/gems-users
Use Google to search the GEMS Users mailing list by adding "site:https://lists.cs.wisc.edu/archive/gems-users/"; to your search.

_______________________________________________
Gems-users mailing list
Gems-users@xxxxxxxxxxx
https://lists.cs.wisc.edu/mailman/listinfo/gems-users
Use Google to search the GEMS Users mailing list by adding "site:https://lists.cs.wisc.edu/archive/gems-users/"; to your search.
[← Prev in Thread] Current Thread [Next in Thread→]