Mailing List Archives
Authenticated access
|
|
|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Condor-users] "Failed to open as standard output" error
- Date: Fri, 10 Feb 2006 13:56:39 -0800 (PST)
- From: Ilya Narsky <narsky@xxxxxxxxxxxxxxx>
- Subject: [Condor-users] "Failed to open as standard output" error
We installed condor-6.7.13.x86_rh_9 on a testbed cluster at
Caltech. Now I am trying to submit a globus job:
[narsky@citgrid3 OSG]$ globus-job-run
citgrid3.cacr.caltech.edu:2119/jobmanager-condor /bin/date
The job becomes idle and never finishes. StarterLog.vm1 on the worker
node shows this error:
2/10 12:21:05 Communicating with shadow <192.168.0.254:38311>
2/10 12:21:05 Submitting machine is "citgrid3.local"
2/10 12:21:05 Starting a VANILLA universe job with ID: 8.0
2/10 12:21:05 IWD: /home/narsky
2/10 12:21:05 Failed to open
'/home/narsky/.globus/job/citgrid3.cacr.caltech.edu/18520.1139599210/stdout'
as standard output: No such file or directory (errno 2)
2/10 12:21:05 Failed to open
'/home/narsky/.globus/job/citgrid3.cacr.caltech.edu/18520.1139599210/stderr'
as standard error: No such file or directory (errno 2)
2/10 12:21:05 Failed to open some/all of the std files...
2/10 12:21:05 Aborting OsProc::StartJob.
2/10 12:21:05 Failed to start job, exiting
Permissions for directory
/home/narsky/.globus/job/citgrid3.cacr.caltech.edu are set to 777.
The condor_config file is enclosed below.
Can anyone help?
Thanks, -Ilya
====================================================================
etc/condor_config
RELEASE_DIR = /opt/condor/condor
CONDOR_HOST = 192.168.0.254
LOCAL_DIR = $(RELEASE_DIR)/hosts/$(HOSTNAME)
LOCAL_CONFIG_FILE = $(LOCAL_DIR)/condor_config.local
CONDOR_ADMIN = narsky@xxxxxxxxxxxxxxx
MAIL = /usr/bin/mail
UID_DOMAIN = local
FILESYSTEM_DOMAIN = local
COLLECTOR_NAME = CIT_ITB_1
CONDOR_IDS=503.503
FLOCK_FROM =
FLOCK_TO =
FLOCK_NEGOTIATOR_HOSTS = $(FLOCK_TO)
FLOCK_COLLECTOR_HOSTS = $(FLOCK_TO)
HOSTALLOW_ADMINISTRATOR = $(CONDOR_HOST)
HOSTALLOW_OWNER = $(FULL_HOSTNAME), $(HOSTALLOW_ADMINISTRATOR)
HOSTALLOW_READ = *
HOSTALLOW_WRITE = *
HOSTALLOW_NEGOTIATOR = $(CONDOR_HOST)
HOSTALLOW_NEGOTIATOR_SCHEDD = $(CONDOR_HOST), $(FLOCK_NEGOTIATOR_HOSTS)
HOSTALLOW_WRITE_COLLECTOR = $(HOSTALLOW_WRITE), $(FLOCK_FROM)
HOSTALLOW_WRITE_STARTD = $(HOSTALLOW_WRITE), $(FLOCK_FROM)
HOSTALLOW_READ_COLLECTOR = $(HOSTALLOW_READ), $(FLOCK_FROM)
HOSTALLOW_READ_STARTD = $(HOSTALLOW_READ), $(FLOCK_FROM)
LOCK = /var/lock/condor
GLIDEIN_SERVER_URLS = \
http://www.cs.wisc.edu/condor/glidein/binaries \
gsiftp://gridftp.cs.wisc.edu/p/condor/public/binaries/glidein
GLIDEIN_SITES =
ALL_DEBUG =
MAX_COLLECTOR_LOG = 1000000
COLLECTOR_DEBUG =
MAX_KBDD_LOG = 1000000
KBDD_DEBUG =
MAX_NEGOTIATOR_LOG = 1000000
NEGOTIATOR_DEBUG = D_MATCH
MAX_NEGOTIATOR_MATCH_LOG = 1000000
MAX_SCHEDD_LOG = 1000000
SCHEDD_DEBUG = D_COMMAND D_PID
MAX_SHADOW_LOG = 1000000
SHADOW_DEBUG =
MAX_STARTD_LOG = 1000000
STARTD_DEBUG = D_COMMAND
MAX_STARTER_LOG = 1000000
STARTER_DEBUG = D_NODATE
MAX_MASTER_LOG = 1000000
MASTER_DEBUG = D_COMMAND
MINUTE = 60
HOUR = (60 * $(MINUTE))
StateTimer = (CurrentTime - EnteredCurrentState)
ActivityTimer = (CurrentTime - EnteredCurrentActivity)
ActivationTimer = (CurrentTime - JobStart)
LastCkpt = (CurrentTime - LastPeriodicCheckpoint)
STANDARD = 1
PVM = 4
VANILLA = 5
MPI = 8
IsPVM = (TARGET.JobUniverse == $(PVM))
IsMPI = (TARGET.JobUniverse == $(MPI))
IsVanilla = (TARGET.JobUniverse == $(VANILLA))
IsStandard = (TARGET.JobUniverse == $(STANDARD))
NonCondorLoadAvg = (LoadAvg - CondorLoadAvg)
BackgroundLoad = 0.3
HighLoad = 0.5
StartIdleTime = 15 * $(MINUTE)
ContinueIdleTime = 5 * $(MINUTE)
MaxSuspendTime = 10 * $(MINUTE)
MaxVacateTime = 10 * $(MINUTE)
KeyboardBusy = (KeyboardIdle < $(MINUTE))
ConsoleBusy = (ConsoleIdle < $(MINUTE))
CPUIdle = ($(NonCondorLoadAvg) <= $(BackgroundLoad))
CPUBusy = ($(NonCondorLoadAvg) >= $(HighLoad))
KeyboardNotBusy = ($(KeyboardBusy) == False)
BigJob = (TARGET.ImageSize >= (50 * 1024))
MediumJob = (TARGET.ImageSize >= (15 * 1024) && TARGET.ImageSize <
(50 * 1024))
SmallJob = (TARGET.ImageSize < (15 * 1024))
JustCPU = ($(CPUBusy) && ($(KeyboardBusy) == False))
MachineBusy = ($(CPUBusy) || $(KeyboardBusy))
WANT_SUSPEND = $(UWCS_WANT_SUSPEND)
WANT_VACATE = $(UWCS_WANT_VACATE)
START = True
SUSPEND = False
CONTINUE = True
PREEMPT = False
KILL = $(UWCS_KILL)
PERIODIC_CHECKPOINT = $(UWCS_PERIODIC_CHECKPOINT)
PREEMPTION_REQUIREMENTS = $(UWCS_PREEMPTION_REQUIREMENTS)
PREEMPTION_RANK = $(UWCS_PREEMPTION_RANK)
NEGOTIATOR_PRE_JOB_RANK = $(UWCS_NEGOTIATOR_PRE_JOB_RANK)
NEGOTIATOR_POST_JOB_RANK = $(UWCS_NEGOTIATOR_POST_JOB_RANK)
MaxJobRetirementTime = $(UWCS_MaxJobRetirementTime)
UWCS_WANT_SUSPEND = ( $(SmallJob) || $(KeyboardNotBusy) \
|| $(IsPVM) || $(IsVanilla) )
UWCS_WANT_VACATE = ( $(ActivationTimer) > 10 * $(MINUTE) \
|| $(IsPVM) || $(IsVanilla) )
UWCS_START = ( (KeyboardIdle > $(StartIdleTime)) \
&& ( $(CPUIdle) || \
(State != "Unclaimed" && State != "Owner")) )
UWCS_SUSPEND = ( $(KeyboardBusy) || \
( (CpuBusyTime > 2 * $(MINUTE)) \
&& $(ActivationTimer) > 90 ) )
UWCS_CONTINUE = ( $(CPUIdle) && ($(ActivityTimer) > 10) \
&& (KeyboardIdle > $(ContinueIdleTime)) )
UWCS_PREEMPT = ( ((Activity == "Suspended") && \
($(ActivityTimer) > $(MaxSuspendTime))) \
|| (SUSPEND && (WANT_SUSPEND == False)) )
UWCS_MaxJobRetirementTime = 0
UWCS_KILL = $(ActivityTimer) > $(MaxVacateTime)
UWCS_PERIODIC_CHECKPOINT = $(LastCkpt) > (3 * $(HOUR))
UWCS_NEGOTIATOR_PRE_JOB_RANK = RemoteOwner =?= UNDEFINED
UWCS_PREEMPTION_REQUIREMENTS = ( $(StateTimer) > (1 * $(HOUR)) && \
RemoteUserPrio > SubmittorPrio * 1.2 ) || (MY.NiceUser == True)
UWCS_PREEMPTION_RANK = (RemoteUserPrio * 1000000) - TARGET.ImageSize
TESTINGMODE_WANT_SUSPEND = False
TESTINGMODE_WANT_VACATE = False
TESTINGMODE_START = True
TESTINGMODE_SUSPEND = False
TESTINGMODE_CONTINUE = True
TESTINGMODE_PREEMPT = False
TESTINGMODE_KILL = False
TESTINGMODE_PERIODIC_CHECKPOINT = False
TESTINGMODE_PREEMPTION_REQUIREMENTS = False
TESTINGMODE_PREEMPTION_RANK = 0
LOG = $(LOCAL_DIR)/log
SPOOL = $(LOCAL_DIR)/spool
EXECUTE = $(LOCAL_DIR)/execute
BIN = $(RELEASE_DIR)/bin
LIB = $(RELEASE_DIR)/lib
INCLUDE = $(RELEASE_DIR)/include
SBIN = $(RELEASE_DIR)/sbin
LIBEXEC = $(RELEASE_DIR)/libexec
HISTORY = $(SPOOL)/history
COLLECTOR_LOG = $(LOG)/CollectorLog
KBDD_LOG = $(LOG)/KbdLog
MASTER_LOG = $(LOG)/MasterLog
NEGOTIATOR_LOG = $(LOG)/NegotiatorLog
NEGOTIATOR_MATCH_LOG = $(LOG)/MatchLog
SCHEDD_LOG = $(LOG)/SchedLog
SHADOW_LOG = $(LOG)/ShadowLog
STARTD_LOG = $(LOG)/StartLog
STARTER_LOG = $(LOG)/StarterLog
SHADOW_LOCK = $(LOCK)/ShadowLock
COLLECTOR_HOST = $(CONDOR_HOST)
RESERVED_DISK = 5
DAEMON_LIST = MASTER, STARTD, SCHEDD
DC_DAEMON_LIST = \
MASTER, STARTD, SCHEDD, KBDD, COLLECTOR, NEGOTIATOR, EVENTD, \
VIEW_SERVER, CONDOR_VIEW, VIEW_COLLECTOR, HAWKEYE
MASTER = $(SBIN)/condor_master
STARTD = $(SBIN)/condor_startd
SCHEDD = $(SBIN)/condor_schedd
KBDD = $(SBIN)/condor_kbdd
NEGOTIATOR = $(SBIN)/condor_negotiator
COLLECTOR = $(SBIN)/condor_collector
MASTER_ADDRESS_FILE = $(LOG)/.master_address
PREEN = $(SBIN)/condor_preen
PREEN_ARGS = -m -r
STARTER_LIST = STARTER, STARTER_PVM, STARTER_STANDARD
STARTER = $(SBIN)/condor_starter
STARTER_PVM = $(SBIN)/condor_starter.pvm
STARTER_STANDARD = $(SBIN)/condor_starter.std
STARTD_ADDRESS_FILE = $(LOG)/.startd_address
BenchmarkTimer = (CurrentTime - LastBenchmark)
RunBenchmarks : (LastBenchmark == 0 ) || ($(BenchmarkTimer) >= (4 *
$(HOUR)))
CONSOLE_DEVICES = mouse, console
COLLECTOR_HOST_STRING = "$(COLLECTOR_HOST)"
STARTD_EXPRS = COLLECTOR_HOST_STRING
STARTD_JOB_EXPRS = ImageSize, ExecutableSize, JobUniverse, NiceUser
SHADOW_LIST = SHADOW, SHADOW_PVM, SHADOW_STANDARD
SHADOW = $(SBIN)/condor_shadow
SHADOW_PVM = $(SBIN)/condor_shadow.pvm
SHADOW_STANDARD = $(SBIN)/condor_shadow.std
SCHEDD_ADDRESS_FILE = $(LOG)/.schedd_address
SHADOW_SIZE_ESTIMATE = 1800
SHADOW_RENICE_INCREMENT = 10
PERIODIC_EXPR_INTERVAL = 60
QUEUE_SUPER_USERS = root, condor
PVMD = $(SBIN)/condor_pvmd
PVMGS = $(SBIN)/condor_pvmgs
VALID_SPOOL_FILES = job_queue.log, job_queue.log.tmp, history, \
Accountant.log, Accountantnew.log, \
local_univ_execute
INVALID_LOG_FILES = core
JAVA = /usr/bin/java
JAVA_MAXHEAP_ARGUMENT = -Xmx
JAVA_CLASSPATH_DEFAULT = $(LIB) $(LIB)/scimark2lib.jar .
JAVA_CLASSPATH_ARGUMENT = -classpath
JAVA_CLASSPATH_SEPARATOR = :
JAVA_BENCHMARK_TIME = 2
JAVA_EXTRA_ARGUMENTS =
GRIDMANAGER = $(SBIN)/condor_gridmanager
GT2_GAHP = $(SBIN)/gahp_server
GRID_MONITOR = $(SBIN)/grid_monitor.sh
MAX_GRIDMANAGER_LOG = 1000000
GRIDMANAGER_DEBUG = D_COMMAND
GRIDMANAGER_LOG = /tmp/GridmanagerLog.$(USERNAME)
DEFAULT_UNIVERSE = globus
CRED_MIN_TIME_LEFT = 120
ENABLE_GRID_MONITOR = TRUE
CONDOR_GAHP = $(SBIN)/condor_c-gahp
MAX_C_GAHP_LOG = 1000000
C_GAHP_LOG = /tmp/CGAHPLog.$(USERNAME)
C_GAHP_WORKER_THREAD_LOG = /tmp/CGAHPWorkerLog.$(USERNAME)
GT3_GAHP = $(SBIN)/gt3_gahp
GT3_LOCATION = $(LIB)/gt3
GT4_GAHP = $(SBIN)/gt4_gahp
GT4_LOCATION = $(LIB)/gt4
GRIDFTP_URL_BASE = gsiftp://$(FULL_HOSTNAME)
CREDD = $(SBIN)/condor_credd
CREDD_ADDRESS_FILE = $(LOG)/.credd_address
CREDD_PORT = 9620
CREDD_ARGS = -p $(CREDD_PORT) -f
CREDD_LOG = $(LOG)/CredLog
CREDD_DEBUG = D_FULLDEBUG
MAX_CREDD_LOG = 4000000
CRED_STORE_DIR = $(LOCAL_DIR)/cred_dir
STORK = $(SBIN)/stork_server
STORK_ADDRESS_FILE = $(LOG)/.stork_address
STORK_LOG_BASE = $(LOG)/Stork
STORK_LOG = $(LOG)/StorkLog
STORK_DEBUG = D_FULLDEBUG
MAX_STORK_LOG = 4000000
STORK_PORT = 9621
STORK_ARGS = -p $(STORK_PORT) -f -Serverlog $(STORK_LOG_BASE)
QUILL = $(SBIN)/condor_quill
QUILL_LOG = $(LOG)/QuillLog
QUILL_ADDRESS_FILE = $(LOG)/.quill_address
====================================================================