Workaround mentioned seems to do the trick. Appreciate the quick response.
From: Doga Alpman <dalpman@xxxxxxxx> When I run the following code I get segmentation fault. Using python 2.7 and htcondor '$CondorVersion: 8.6.1 Mar 01 2017 BuildID: 398585 $' I get segmentation fault after all jobs complete and I call print locals() import htcondor import classad import os from collections import Counter import time known_statuses = [ 'Unexpanded', 'Idle', 'Running', 'Removed', 'Completed', 'Held', 'Error', '<', '>' ] class HTCondorTask(object): def gen_proc_ads(self): proc_ads = [] logdir = os.path.join(os.environ['HOME'], 'logs') for i in xrange(100): proc_ads.append(({'Args': '{}/tmp/job.py'.format(os.environ['HOME']), 'UserLog': os.path.join(logdir,'test.log'), 'Iwd': logdir, 'Err': 'test-err.txt', 'Out': 'test-out.txt'}, 1)) return proc_ads def program_envs(self): env = ['PYTHONHOME=/opt/miniconda/envs/SANS'] return " ".join(env) def run(self): job_ad = { 'Cmd': '/opt/miniconda/envs/SANS/bin/python', 'JobUniverse': 5, 'RunAsOwner': classad.ExprTree('true'), 'Environment': self.program_envs(), 'ShouldTransferFiles': "NO", 'FileSystemDomain': "eglp.com", 'TransferIn': classad.ExprTree('false'), 'TransferINputSizeMB': 0, 'OnExitHold': classad.ExprTree("(ExitBySignal == True) || (ExitCode != 0)"), } schedd = htcondor.Schedd() proc_ads = self.gen_proc_ads() print "Submitting {} jobs to condor".format(len(proc_ads)) self.cluster_id = schedd.submitMany(job_ad, proc_ads) self.track_jobs() print locals() def track_jobs(self): schedd = htcondor.Schedd() cnt = Counter() while True: time.sleep(10) for job in schedd.xquery(requirements="ClusterId == %d" % self.cluster_id, projection=["ProcId", "JobStatus"]): job_status = known_statuses[job['JobStatus']] cnt[job_status] += 1 if list(cnt) == ['Held']: raise Exception() print "Job Status:{}".format(cnt) if len(cnt) == 0: print "Done" break cnt.clear() if __name__ == '__main__': task = HTCondorTask() task.run() From: Doga Alpman <dalpman@xxxxxxxx> Here is backtrace, I am trying to boil it down to a small script that I can share.
*** Error in `python': double free or corruption (!prev): 0x000000000231fe00 *** ======= Backtrace: ========= /lib64/libc.so.6(+0x7c503)[0x7f2a54c93503] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(+0x8f177)[0x7f2a5598c177] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(+0x783d2)[0x7f2a559753d2] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(+0x1265db)[0x7f2a55a235db] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(+0x1265eb)[0x7f2a55a235eb] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(+0x1265eb)[0x7f2a55a235eb] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(+0x1265eb)[0x7f2a55a235eb] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyDict_DelItem+0xf7)[0x7f2a5598b347] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyDict_DelItemString+0x31)[0x7f2a5598b9d1] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x724)[0x7f2a559f2384] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x86c3)[0x7f2a559fa323] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x86c3)[0x7f2a559fa323] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x86c3)[0x7f2a559fa323] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalCodeEx+0x89e)[0x7f2a559fb1ce] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x8596)[0x7f2a559fa1f6] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalCodeEx+0x89e)[0x7f2a559fb1ce] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x8596)[0x7f2a559fa1f6] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalCodeEx+0x89e)[0x7f2a559fb1ce] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x8596)[0x7f2a559fa1f6] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x86c3)[0x7f2a559fa323] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalFrameEx+0x86c3)[0x7f2a559fa323] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalCodeEx+0x89e)[0x7f2a559fb1ce] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyEval_EvalCode+0x32)[0x7f2a559fb2e2] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyRun_FileExFlags+0xb0)[0x7f2a55a1b960] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(PyRun_SimpleFileExFlags+0xef)[0x7f2a55a1bb3f] /nfs/opt/miniconda/envs/SANS/bin/../lib/libpython2.7.so.1.0(Py_Main+0xca4)[0x7f2a55a31484] /lib64/libc.so.6(__libc_start_main+0xf5)[0x7f2a54c38b35] python[0x400649] ======= Memory map: ======== 00400000-00401000 r-xp 00000000 00:24 63050125 /nfs/opt/miniconda/envs/SANS/bin/python2.7 00600000-00601000 rw-p 00000000 00:24 63050125 /nfs/opt/miniconda/envs/SANS/bin/python2.7 00e73000-0244d000 rw-p 00000000 00:00 0 [heap] 7f2a24000000-7f2a24021000 rw-p 00000000 00:00 0
7f2a24021000-7f2a28000000 ---p 00000000 00:00 0
7f2a2a2ea000-7f2a2a310000 r-xp 00000000 00:24 62914713 /nfs/opt/miniconda/envs/SANS/lib/libexpat.so.1.6.0 7f2a2a310000-7f2a2a50f000 ---p 00026000 00:24 62914713 /nfs/opt/miniconda/envs/SANS/lib/libexpat.so.1.6.0 7f2a2a50f000-7f2a2a512000 rw-p 00025000 00:24 62914713 /nfs/opt/miniconda/envs/SANS/lib/libexpat.so.1.6.0 7f2a2a512000-7f2a2a576000 r-xp 00000000 ca:01 33652297 /usr/lib64/condor/libvomsapi.so.1.0.0 7f2a2a576000-7f2a2a775000 ---p 00064000 ca:01 33652297 /usr/lib64/condor/libvomsapi.so.1.0.0 7f2a2a775000-7f2a2a776000 r--p 00063000 ca:01 33652297 /usr/lib64/condor/libvomsapi.so.1.0.0 7f2a2a776000-7f2a2a778000 rw-p 00064000 ca:01 33652297 /usr/lib64/condor/libvomsapi.so.1.0.0 7f2a2a778000-7f2a2a779000 rw-p 00000000 00:00 0
7f2a2a779000-7f2a2a786000 r-xp 00000000 ca:01 33652215 /usr/lib64/condor/libglobus_gss_assist.so.3.7.15 7f2a2a786000-7f2a2a985000 ---p 0000d000 ca:01 33652215 /usr/lib64/condor/libglobus_gss_assist.so.3.7.15 7f2a2a985000-7f2a2a986000 r--p 0000c000 ca:01 33652215 /usr/lib64/condor/libglobus_gss_assist.so.3.7.15 7f2a2a986000-7f2a2a987000 rw-p 0000d000 ca:01 33652215 /usr/lib64/condor/libglobus_gss_assist.so.3.7.15 7f2a2a987000-7f2a2a9a2000 r-xp 00000000 ca:01 33652221 /usr/lib64/condor/libglobus_gssapi_gsi.so.4.7.22 7f2a2a9a2000-7f2a2aba2000 ---p 0001b000 ca:01 33652221 /usr/lib64/condor/libglobus_gssapi_gsi.so.4.7.22 7f2a2aba2000-7f2a2aba3000 r--p 0001b000 ca:01 33652221 /usr/lib64/condor/libglobus_gssapi_gsi.so.4.7.22 7f2a2aba3000-7f2a2aba4000 rw-p 0001c000 ca:01 33652221 /usr/lib64/condor/libglobus_gssapi_gsi.so.4.7.22 7f2a2aba4000-7f2a2abb4000 r-xp 00000000 ca:01 33652209 /usr/lib64/condor/libglobus_gsi_proxy_core.so.0.7.7 7f2a2abb4000-7f2a2adb3000 ---p 00010000 ca:01 33652209 /usr/lib64/condor/libglobus_gsi_proxy_core.so.0.7.7 7f2a2adb3000-7f2a2adb4000 r--p 0000f000 ca:01 33652209 /usr/lib64/condor/libglobus_gsi_proxy_core.so.0.7.7 7f2a2adb4000-7f2a2adb5000 rw-p 00010000 ca:01 33652209 /usr/lib64/condor/libglobus_gsi_proxy_core.so.0.7.7 7f2a2adb5000-7f2a2adc5000 r-xp 00000000 ca:01 33652206 /usr/lib64/condor/libglobus_gsi_credential.so.1.6.9 7f2a2adc5000-7f2a2afc4000 ---p 00010000 ca:01 33652206 /usr/lib64/condor/libglobus_gsi_credential.so.1.6.9 7f2a2afc4000-7f2a2afc5000 r--p 0000f000 ca:01 33652206 /usr/lib64/condor/libglobus_gsi_credential.so.1.6.9 7f2a2afc5000-7f2a2afc6000 rw-p 00010000 ca:01 33652206 /usr/lib64/condor/libglobus_gsi_credential.so.1.6.9 7f2a2afc6000-7f2a2afd0000 r-xp 00000000 ca:01 33652200 /usr/lib64/condor/libglobus_gsi_callback.so.0.5.8Aborted From: Doga Alpman <dalpman@xxxxxxxx> Hi, We are using submitMany function on htcondor.Schedd object and using xquery to track jobs and we are seeing segmentation faults in python. Anybody else ran into this issue? Looks like some reference count
issue. Regards, |