|
Hi, Attached is the condor-generated job, as I submitted things through an ARC CE. I run condor-8.2.0-254849.x86_64
The arc CE xrsl is : & (executable="testarc.sh") (inputFiles=("testarc.sh" "")) (stdout="stdout.txt") (stderr="stderr.txt") (count=1) (memory=100) (gmlog=".arc") And the testarc.sh just does some things like “env”, “mount” and a very long sleep… I wanted to test job killing (memory, walltime…) Since I had to find the condor-generated log, I also found this in the logs : ... 007 (106.000.000) 06/25 16:42:03 Shadow exception! Error from slot1@xxxxxxxxxxxxxxxxxxxxxxx: Starter configured to use PID NAMESPACES, but libexec/condor_pid_ns_init did not run properly 0 - Run Bytes Sent By Job 0 - Run Bytes Received By Job ... 001 (106.000.000) 06/25 16:45:53 Job executing on host: <192.54.207.242:60981> ... 007 (106.000.000) 06/25 16:50:53 Shadow exception! Error from slot1@xxxxxxxxxxxxxxxxxxxxxxx: Starter configured to use PID NAMESPACES, but libexec/condor_pid_ns_init did not run properly 0 - Run Bytes Sent By Job 19085 - Run Bytes Received By Job ... 001 (106.000.000) 06/25 16:52:53 Job executing on host: <192.54.207.242:60981> ... 007 (106.000.000) 06/25 16:57:53 Shadow exception! Error from slot1@xxxxxxxxxxxxxxxxxxxxxxx: Starter configured to use PID NAMESPACES, but libexec/condor_pid_ns_init did not run properly 0 - Run Bytes Sent By Job 19085 - Run Bytes Received By Job This goes on for a very long time, until I guess je job/sleep ends. I have “USE_PID_NAMESPACES = true” in the startd config.d directory I configured condor to run as condor and not root as I read it’s just dropping privileges (and running as root prevents benchmark from succeeding at start) and the CONDOR_IDS variable is correctly
defined to the condor uid/gid, but I realize the condor UID is different on the startd machine than on the scheduler and collector ones : might that be an issue ? Regards De : HTCondor-users
[mailto:htcondor-users-bounces@xxxxxxxxxxx] De la part de Greg Thain On 06/26/2014 05:20 AM, SCHAER Frederic wrote:
|
----- starting submit_condor_job -----
Warning: runtime script ENV/GLITE is missing
HTCondor job script built
HTCondor script follows:
-------------------------------------------------------------------
#!/bin/bash -l
# Overide umask of execution node (sometime values are really strange)
umask 077
# source with arguments for DASH shells
sourcewithargs() {
script=$1
shift
. $script
}
# Setting environment variables as specified by user
export 'GRID_GLOBAL_JOBID=gsiftp://dev7246.datagrid.cea.fr:2811/jobs/FKPMDmkO3JknN6eUDnOwVYjqABFKDmABFKDmACHKDmABFKDm5KmuDn'
RUNTIME_JOB_DIR=${_CONDOR_SCRATCH_DIR}/FKPMDmkO3JknN6eUDnOwVYjqABFKDmABFKDmACHKDmABFKDm5KmuDn
RUNTIME_JOB_DIAG=${_CONDOR_SCRATCH_DIR}/FKPMDmkO3JknN6eUDnOwVYjqABFKDmABFKDmACHKDmABFKDm5KmuDn.diag
RUNTIME_JOB_STDIN="/dev/null"
RUNTIME_JOB_STDOUT="${_CONDOR_SCRATCH_DIR}/FKPMDmkO3JknN6eUDnOwVYjqABFKDmABFKDmACHKDmABFKDm5KmuDn/stdout.txt"
RUNTIME_JOB_STDERR="${_CONDOR_SCRATCH_DIR}/FKPMDmkO3JknN6eUDnOwVYjqABFKDmABFKDmACHKDmABFKDm5KmuDn/stderr.txt"
RUNTIME_LOCAL_SCRATCH_DIR=${RUNTIME_LOCAL_SCRATCH_DIR:-${_CONDOR_SCRATCH_DIR}}
RUNTIME_FRONTEND_SEES_NODE=${RUNTIME_FRONTEND_SEES_NODE:-}
RUNTIME_NODE_SEES_FRONTEND=${RUNTIME_NODE_SEES_FRONTEND:-}
if [ ! -z "$RUNTIME_LOCAL_SCRATCH_DIR" ] && [ ! -z "$RUNTIME_NODE_SEES_FRONTEND" ]; then
RUNTIME_NODE_JOB_DIR="$RUNTIME_LOCAL_SCRATCH_DIR"/`basename "$RUNTIME_JOB_DIR"`
rm -rf "$RUNTIME_NODE_JOB_DIR"
mkdir -p "$RUNTIME_NODE_JOB_DIR"
# move directory contents
for f in "$RUNTIME_JOB_DIR"/.* "$RUNTIME_JOB_DIR"/*; do
[ "$f" = "$RUNTIME_JOB_DIR/*" ] && continue # glob failed, no files
[ "$f" = "$RUNTIME_JOB_DIR/." ] && continue
[ "$f" = "$RUNTIME_JOB_DIR/.." ] && continue
[ "$f" = "$RUNTIME_JOB_DIR/.diag" ] && continue
[ "$f" = "$RUNTIME_JOB_DIR/.comment" ] && continue
if ! mv "$f" "$RUNTIME_NODE_JOB_DIR"; then
echo "Failed to move '$f' to '$RUNTIME_NODE_JOB_DIR'" 1>&2
exit 1
fi
done
if [ ! -z "$RUNTIME_FRONTEND_SEES_NODE" ] ; then
# creating link for whole directory
ln -s "$RUNTIME_FRONTEND_SEES_NODE"/`basename "$RUNTIME_JOB_DIR"` "$RUNTIME_JOB_DIR"
else
# keep stdout, stderr and control directory on frontend
# recreate job directory
mkdir -p "$RUNTIME_JOB_DIR"
# make those files
mkdir -p `dirname "$RUNTIME_JOB_STDOUT"`
mkdir -p `dirname "$RUNTIME_JOB_STDERR"`
touch "$RUNTIME_JOB_STDOUT"
touch "$RUNTIME_JOB_STDERR"
RUNTIME_JOB_STDOUT__=`echo "$RUNTIME_JOB_STDOUT" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"`
RUNTIME_JOB_STDERR__=`echo "$RUNTIME_JOB_STDERR" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"`
rm "$RUNTIME_JOB_STDOUT__" 2>/dev/null
rm "$RUNTIME_JOB_STDERR__" 2>/dev/null
if [ ! -z "$RUNTIME_JOB_STDOUT__" ] && [ "$RUNTIME_JOB_STDOUT" != "$RUNTIME_JOB_STDOUT__" ]; then
ln -s "$RUNTIME_JOB_STDOUT" "$RUNTIME_JOB_STDOUT__"
fi
if [ "$RUNTIME_JOB_STDOUT__" != "$RUNTIME_JOB_STDERR__" ] ; then
if [ ! -z "$RUNTIME_JOB_STDERR__" ] && [ "$RUNTIME_JOB_STDERR" != "$RUNTIME_JOB_STDERR__" ]; then
ln -s "$RUNTIME_JOB_STDERR" "$RUNTIME_JOB_STDERR__"
fi
fi
if [ ! -z "$RUNTIME_CONTROL_DIR" ] ; then
# move control directory back to frontend
RUNTIME_CONTROL_DIR__=`echo "$RUNTIME_CONTROL_DIR" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"`
mv "$RUNTIME_CONTROL_DIR__" "$RUNTIME_CONTROL_DIR"
fi
fi
# adjust stdin,stdout & stderr pointers
RUNTIME_JOB_STDIN=`echo "$RUNTIME_JOB_STDIN" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"`
RUNTIME_JOB_STDOUT=`echo "$RUNTIME_JOB_STDOUT" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"`
RUNTIME_JOB_STDERR=`echo "$RUNTIME_JOB_STDERR" | sed "s#^${RUNTIME_JOB_DIR}#${RUNTIME_NODE_JOB_DIR}#"`
RUNTIME_FRONTEND_JOB_DIR="$RUNTIME_JOB_DIR"
RUNTIME_JOB_DIR="$RUNTIME_NODE_JOB_DIR"
fi
if [ -z "$RUNTIME_NODE_SEES_FRONTEND" ] ; then
mkdir -p "$RUNTIME_JOB_DIR"
fi
RESULT=0
# move input files to local working directory
mv ./testarc.sh FKPMDmkO3JknN6eUDnOwVYjqABFKDmABFKDmACHKDmABFKDm5KmuDn/.
if [ "$RESULT" = '0' ] ; then
# Running runtime scripts
export RUNTIME_CONFIG_DIR=${RUNTIME_CONFIG_DIR:-/etc/arc/runtime}
runtimeenvironments=
if [ ! -z "$RUNTIME_CONFIG_DIR" ] ; then
if [ -r "${RUNTIME_CONFIG_DIR}/ENV/GLITE" ] ; then
runtimeenvironments="${runtimeenvironments}ENV/GLITE;"
cmdl=${RUNTIME_CONFIG_DIR}/ENV/GLITE
sourcewithargs $cmdl 1
if [ $? -ne '0' ] ; then
echo "Runtime ENV/GLITE script failed " 1>&2
echo "Runtime ENV/GLITE script failed " 1>"$RUNTIME_JOB_DIAG"
exit 1
fi
fi
fi
echo "runtimeenvironments=$runtimeenvironments" >> "$RUNTIME_JOB_DIAG"
if [ "$RESULT" = '0' ] ; then
# Changing to session directory
HOME=$RUNTIME_JOB_DIR
export HOME
if ! cd "$RUNTIME_JOB_DIR"; then
echo "Failed to switch to '$RUNTIME_JOB_DIR'" 1>&2
RESULT=1
fi
if [ ! -z "$RESULT" ] && [ "$RESULT" != 0 ]; then
exit $RESULT
fi
nodename=`/bin/hostname -f`
echo "nodename=$nodename" >> "$RUNTIME_JOB_DIAG"
echo "ExecutionUnits=1" >> "$RUNTIME_JOB_DIAG"
executable='./testarc.sh'
# Check if executable exists
if [ ! -f "$executable" ];
then
echo "Path \"$executable\" does not seem to exist" 1>$RUNTIME_JOB_STDOUT 2>$RUNTIME_JOB_STDERR 1>&2
exit 1
fi
# See if executable is a script, and extract the name of the interpreter
line1=`dd if="$executable" count=1 2>/dev/null | head -n 1`
command=`echo $line1 | sed -n 's/^#! *//p'`
interpreter=`echo $command | awk '{print $1}'`
if [ "$interpreter" = /usr/bin/env ]; then interpreter=`echo $command | awk '{print $2}'`; fi
# If it's a script and the interpreter is not found ...
[ "x$interpreter" = x ] || type "$interpreter" > /dev/null 2>&1 || {
echo "Cannot run $executable: $interpreter: not found" 1>$RUNTIME_JOB_STDOUT 2>$RUNTIME_JOB_STDERR 1>&2
exit 1; }
GNU_TIME='/usr/bin/time'
if [ ! -z "$GNU_TIME" ] && ! "$GNU_TIME" --version >/dev/null 2>&1; then
echo "WARNING: GNU time not found at: $GNU_TIME" 2>&1;
GNU_TIME=
fi
if [ -z "$GNU_TIME" ] ; then
"./testarc.sh" <$RUNTIME_JOB_STDIN 1>$RUNTIME_JOB_STDOUT 2>$RUNTIME_JOB_STDERR
else
$GNU_TIME -o "$RUNTIME_JOB_DIAG" -a -f 'WallTime=%es\nKernelTime=%Ss\nUserTime=%Us\nCPUUsage=%P\nMaxResidentMemory=%MkB\nAverageResidentMemory=%tkB\nAverageTotalMemory=%KkB\nAverageUnsharedMemory=%DkB\nAverageUnsharedStack=%pkB\nAverageSharedMemory=%XkB\nPageSize=%ZB\nMajorPageFaults=%F\nMinorPageFaults=%R\nSwaps=%W\nForcedSwitches=%c\nWaitSwitches=%w\nInputs=%I\nOutputs=%O\nSocketReceived=%r\nSocketSent=%s\nSignals=%k\n' "./testarc.sh" <$RUNTIME_JOB_STDIN 1>$RUNTIME_JOB_STDOUT 2>$RUNTIME_JOB_STDERR
fi
RESULT=$?
fi
fi
if [ ! -z "$RUNTIME_CONFIG_DIR" ] ; then
if [ -r "${RUNTIME_CONFIG_DIR}/ENV/GLITE" ] ; then
cmdl=${RUNTIME_CONFIG_DIR}/ENV/GLITE
sourcewithargs $cmdl 2
fi
fi
if [ ! -z "$RUNTIME_LOCAL_SCRATCH_DIR" ] ; then
find ./ -type l -exec rm -f "{}" ";"
find ./ -type f -exec chmod u+w "{}" ";"
chmod -R u-w "$RUNTIME_JOB_DIR"/'stdout.txt' 2>/dev/null
mv "$RUNTIME_JOB_DIR"/'stdout.txt' ../.
chmod -R u-w "$RUNTIME_JOB_DIR"/'stderr.txt' 2>/dev/null
mv "$RUNTIME_JOB_DIR"/'stderr.txt' ../.
chmod -R u-w "$RUNTIME_JOB_DIR"/'.arc' 2>/dev/null
mv "$RUNTIME_JOB_DIR"/'.arc' ../.
find ./ -type f -perm /200 -exec rm -f "{}" ";"
find ./ -type f -exec chmod u+w "{}" ";"
fi
if [ ! -z "$RUNTIME_LOCAL_SCRATCH_DIR" ] && [ ! -z "$RUNTIME_NODE_SEES_FRONTEND" ]; then
if [ ! -z "$RUNTIME_FRONTEND_SEES_NODE" ] ; then
# just move it
rm -rf "$RUNTIME_FRONTEND_JOB_DIR"
destdir=`dirname "$RUNTIME_FRONTEND_JOB_DIR"`
if ! mv "$RUNTIME_NODE_JOB_DIR" "$destdir"; then
echo "Failed to move '$RUNTIME_NODE_JOB_DIR' to '$destdir'" 1>&2
RESULT=1
fi
else
# remove links
rm -f "$RUNTIME_JOB_STDOUT" 2>/dev/null
rm -f "$RUNTIME_JOB_STDERR" 2>/dev/null
# move directory contents
for f in "$RUNTIME_NODE_JOB_DIR"/.* "$RUNTIME_NODE_JOB_DIR"/*; do
[ "$f" = "$RUNTIME_NODE_JOB_DIR/*" ] && continue # glob failed, no files
[ "$f" = "$RUNTIME_NODE_JOB_DIR/." ] && continue
[ "$f" = "$RUNTIME_NODE_JOB_DIR/.." ] && continue
[ "$f" = "$RUNTIME_NODE_JOB_DIR/.diag" ] && continue
[ "$f" = "$RUNTIME_NODE_JOB_DIR/.comment" ] && continue
if ! mv "$f" "$RUNTIME_FRONTEND_JOB_DIR"; then
echo "Failed to move '$f' to '$RUNTIME_FRONTEND_JOB_DIR'" 1>&2
RESULT=1
fi
done
rm -rf "$RUNTIME_NODE_JOB_DIR"
fi
fi
echo "exitcode=$RESULT" >> "$RUNTIME_JOB_DIAG"
exit $RESULT
-------------------------------------------------------------------