Mailing List Archives
Authenticated access
|
|
|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[HTCondor-users] Cgroups & pid namespaces
- Date: Thu, 1 Aug 2024 12:16:10 +0200
- From: Petr Vokac <petr.vokac@xxxxxxx>
- Subject: [HTCondor-users] Cgroups & pid namespaces
Hi,
with enabled USE_PID_NAMESPACES jobs did not start and failed with
215077 write(9</var/log/condor/StarterLog.slot1_1>, "08/01/24 11:54:10 (fd:9) (pid:1) (D_ALWAYS:2) Creating cgroup system.slice/htcondor/condor_scratch_condor_slot1_1@xxxxxxxxxxxxxxxxxxxxxxxx for pid 215077\n", 154) = 154
215077 close(9</var/log/condor/StarterLog.slot1_1>) = 0
215077 rt_sigprocmask(SIG_SETMASK, ~[ILL TRAP ABRT BUS FPE KILL SEGV STOP PROF RTMIN RT_1], NULL, 8) = 0
215077 setresuid(-1, 0, -1) = 0
215077 setresgid(-1, 0, -1) = 0
215077 rt_sigprocmask(SIG_BLOCK, ~[ILL TRAP ABRT BUS FPE SEGV RTMIN RT_1], ~[ILL TRAP ABRT BUS FPE KILL SEGV STOP PROF RTMIN RT_1], 8) = 0
215077 setresuid(-1, 0, -1) = 0
215077 setresgid(-1, 992, -1) = 0
215077 setresuid(-1, 995, -1) = 0
215077 openat(AT_FDCWD</scratch/condor/dir_213192>, "/var/log/condor/StarterLog.slot1_1", O_WRONLY|O_APPEND) = 9</var/log/condor/StarterLog.slot1_1>
215077 fcntl(9</var/log/condor/StarterLog.slot1_1>, F_GETFL) = 0x8401 (flags O_WRONLY|O_APPEND|O_LARGEFILE)
215077 lseek(9</var/log/condor/StarterLog.slot1_1>, 0, SEEK_END) = 7937208
215077 getpid() = 1
215077 write(9</var/log/condor/StarterLog.slot1_1>, "08/01/24 11:54:10 (fd:9) (pid:1) (D_PRIV) PRIV_CONDOR --> PRIV_ROOT at /var/lib/condor/execute/slot1/dir_4179159/userdir/build-8ilf4W/BUILD/condor-23.8.1/src/condor_includes/condor_uid.h:169\n", 191) = 191
215077 close(9</var/log/condor/StarterLog.slot1_1>) = 0
215077 setresuid(-1, 0, -1) = 0
215077 setresgid(-1, 0, -1) = 0
215077 rt_sigprocmask(SIG_SETMASK, ~[ILL TRAP ABRT BUS FPE KILL SEGV STOP PROF RTMIN RT_1], NULL, 8) = 0
215077 openat(AT_FDCWD</scratch/condor/dir_213192>, "/sys/fs/cgroup/system.slice/htcondor/condor_scratch_condor_slot1_1@xxxxxxxxxxxxxxxxxxxxxxxx/cgroup.kill", O_RDONLY) = -1 ENOENT (No such file or directory)
215077 newfstatat(AT_FDCWD</scratch/condor/dir_213192>, "/sys/fs/cgroup/system.slice/htcondor/condor_scratch_condor_slot1_1@xxxxxxxxxxxxxxxxxxxxxxxx", 0x7ffd02c4a000, 0) = -1 ENOENT (No such file or directory)
215077 newfstatat(AT_FDCWD</scratch/condor/dir_213192>, "/sys/fs/cgroup/system.slice/htcondor/condor_scratch_condor_slot1_1@xxxxxxxxxxxxxxxxxxxxxxxx", 0x7ffd02c4a1e0, 0) = -1 ENOENT (No such file or directory)
215077 mkdir("/sys/fs/cgroup/system.slice", 0755) = -1 EEXIST (File exists)
215077 openat(AT_FDCWD</scratch/condor/dir_213192>, "/sys/fs/cgroup/system.slice/cgroup.subtree_control", O_WRONLY) = 9</sys/fs/cgroup/system.slice/cgroup.subtree_control>
215077 write(9</sys/fs/cgroup/system.slice/cgroup.subtree_control>, "+cpu +io +memory +pids", 22) = 22
215077 close(9</sys/fs/cgroup/system.slice/cgroup.subtree_control>) = 0
215077 mkdir("/sys/fs/cgroup/system.slice/htcondor", 0755) = -1 EEXIST (File exists)
215077 openat(AT_FDCWD</scratch/condor/dir_213192>, "/sys/fs/cgroup/system.slice/htcondor/cgroup.subtree_control", O_WRONLY) = 9</sys/fs/cgroup/system.slice/htcondor/cgroup.subtree_control>
215077 write(9</sys/fs/cgroup/system.slice/htcondor/cgroup.subtree_control>, "+cpu +io +memory +pids", 22) = 22
215077 close(9</sys/fs/cgroup/system.slice/htcondor/cgroup.subtree_control>) = 0
215077 mkdir("/sys/fs/cgroup/system.slice/htcondor/condor_scratch_condor_slot1_1@xxxxxxxxxxxxxxxxxxxxxxxx", 0755) = 0
215077 openat(AT_FDCWD</scratch/condor/dir_213192>, "/sys/fs/cgroup/system.slice/htcondor/condor_scratch_condor_slot1_1@xxxxxxxxxxxxxxxxxxxxxxxx/cgroup.procs", O_WRONLY) = 9</sys/fs/cgroup/system.slice/htcondor/condor_scratch_condor_slot1_1@xxxxxxxxxxxxxxxxxxxxxxxx/cgroup.procs>
215077 write(9</sys/fs/cgroup/system.slice/htcondor/condor_scratch_condor_slot1_1@xxxxxxxxxxxxxxxxxxxxxxxx/cgroup.procs>, "215077", 6) = -1 ESRCH (No such process)
215077 rt_sigprocmask(SIG_BLOCK, ~[ILL TRAP ABRT BUS FPE SEGV RTMIN RT_1], ~[ILL TRAP ABRT BUS FPE KILL SEGV STOP PROF RTMIN RT_1], 8) = 0
215077 setresuid(-1, 0, -1) = 0
215077 setresgid(-1, 992, -1) = 0
215077 setresuid(-1, 995, -1) = 0
215077 openat(AT_FDCWD</scratch/condor/dir_213192>, "/var/log/condor/StarterLog.slot1_1", O_WRONLY|O_APPEND) = 10</var/log/condor/StarterLog.slot1_1>
215077 fcntl(10</var/log/condor/StarterLog.slot1_1>, F_GETFL) = 0x8401 (flags O_WRONLY|O_APPEND|O_LARGEFILE)
215077 lseek(10</var/log/condor/StarterLog.slot1_1>, 0, SEEK_END) = 7937399
215077 getpid() = 1
215077 write(10</var/log/condor/StarterLog.slot1_1>, "08/01/24 11:54:10 (fd:10) (pid:1) (D_ALWAYS) Error writing procid 215077 to /sys/fs/cgroup/system.slice/htcondor/condor_scratch_condor_slot1_1@xxxxxxxxxxxxxxxxxxxxxxxx/cgroup.procs: No such process\n", 198) = 198
215077 close(10</var/log/condor/StarterLog.slot1_1>) = 0
Is this expected behavior for
[root@mff1701 config.d]# uname -a
Linux mff1701.farm.particle.cz 5.14.0-427.24.1.el9_4.x86_64 #1 SMP PREEMPT_DYNAMIC Tue Jul 9 15:32:20 EDT 2024 x86_64 x86_64 x86_64 GNU/Linux
[root@mff1701 config.d]# rpm -qa condor
condor-23.8.1-1.el9.x86_64
Petr