Hi all,
I am brand new to condor, just trying to get it to work on a single machine that is configured for execute, submit, and manager. Just using vanilla universe to submit a simple shell script. The jobs queue up, but do not execute. Daemons are all being started as root - jobs being submitted as user "condor" group "users". Part 1 config information is below. Any ideas how to get this working? I only modified part 1 of the condor_config file, there is no fully qualified domain name - just the machine name "s10-1200". S10-1200 is set to an ip address in the /etc/config.
Thanks,
-Jim
=========================
s10-1200:~ # condor_master
s10-1200:~ #
s10-1200:~ # ps -ef | grep condo
condor 3324 1 0 09:59 ? 00:00:00 condor_master
condor 3325 3324 1 09:59 ? 00:00:00 condor_collector -f
condor 3326 3324 1 09:59 ? 00:00:00 condor_negotiator -f
condor 3327 3324 1 09:59 ? 00:00:00 condor_schedd -f
condor 3328 3324 15 09:59 ? 00:00:03 condor_startd -f
root 3344 3295 0 09:59 pts/1 00:00:00 grep condo
s10-1200:~ # su - condor
condor@s10-1200:~> condor_q
-- Submitter: s10-1200 : <192.168.17.128:55041> : s10-1200
ID OWNER SUBMITTED RUN_TIME ST PRI SIZE CMD
22.0 condor 8/27 09:18 0+00:00:00 I 0 9.8 go.sh
1 jobs; 1 idle, 0 running, 0 held
condor@s10-1200:~> condor_q -better-analyze
-- Submitter: s10-1200 : <192.168.17.128:55041> : s10-1200
---
022.000: Run analysis summary. Of 1 machines,
0 are rejected by your job's requirements
1 reject your job because of their own requirements
0 match but are serving users with a better priority in the pool
0 match but reject the job for unknown reasons
0 match but will not currently preempt their existing job
0 are available to run your job
No successful match recorded.
Last failed match: Wed Aug 27 09:33:44 2008
Reason for last match failure: no match found
WARNING: Be advised: Request 22.0 did not match any resource's constraints
condor@s10-1200:~> cat go.cmd
Executable = /home/condor/go.sh
Universe = vanilla
requirements = Machine == "s10-1200"
Log = go.log
output = go.out
error = go.err
Queue
condor@s10-1200:~>
========================
condor@s10-1200:~> condor_status -l
MyType = "Machine"
TargetType = "Job"
Name = "s10-1200"
Machine = "s10-1200"
Rank = 0.000000
CpuBusy = ((LoadAvg - CondorLoadAvg) >= 0.500000)
COLLECTOR_HOST_STRING = "s10-1200"
CondorVersion = "$CondorVersion: 6.8.8 Dec 19 2007 $"
CondorPlatform = "$CondorPlatform: I386-LINUX_RHEL3 $"
VirtualMachineID = 1
VirtualMemory = 4184908
Disk = 11145532
CondorLoadAvg = 0.000000
LoadAvg = 0.010000
KeyboardIdle = 206
ConsoleIdle = 3004
Memory = 309
Cpus = 1
StartdIpAddr = "<192.168.17.128:33797>"
Arch = "X86_64"
OpSys = "LINUX"
UidDomain = "192.168.17.128"
FileSystemDomain = "192.168.17.128"
Subnet = "192.168.17"
HasIOProxy = TRUE
CheckpointPlatform = "LINUX X86_64 2.6.x normal"
TotalVirtualMemory = 4184908
TotalDisk = 11145532
TotalCpus = 1
TotalMemory = 309
KFlops = 995169
Mips = 1626
LastBenchmark = 1219856373
TotalLoadAvg = 0.010000
TotalCondorLoadAvg = 0.000000
ClockMin = 649
ClockDay = 3
TotalVirtualMachines = 1
HasFileTransfer = TRUE
HasPerFileEncryption = TRUE
HasReconnect = TRUE
HasMPI = TRUE
HasTDP = TRUE
HasJobDeferral = TRUE
HasJICLocalConfig = TRUE
HasJICLocalStdin = TRUE
JavaVendor = "Sun Microsystems Inc."
JavaVersion = "1.4.2_11"
JavaMFlops = 270.207855
HasJava = TRUE
HasPVM = TRUE
HasRemoteSyscalls = TRUE
HasCheckpointing = TRUE
StarterAbilityList = "HasFileTransfer,HasPerFileEncryption,HasReconnect,HasMPI,HasTDP,HasJobDeferral,HasJICLocalConfig,HasJICLocalStdin,HasJava,HasPVM,HasRemoteSyscalls,HasCheckpointing"
CpuBusyTime = 0
CpuIsBusy = FALSE
TimeToLive = 2147483647
State = "Owner"
EnteredCurrentState = 1219859073
Activity = "Idle"
EnteredCurrentActivity = 1219859073
Start = ((KeyboardIdle > 15 * 60) && (((LoadAvg - CondorLoadAvg) <= 0.300000) || (State != "Unclaimed" && State != "Owner")))
Requirements = (START) && (IsValidCheckpointPlatform)
IsValidCheckpointPlatform = (((TARGET.JobUniverse == 1) == FALSE) || ((MY.CheckpointPlatform =!= UNDEFINED) && ((TARGET.LastCheckpointPlatform =?= MY.CheckpointPlatform) || (TARGET.NumCkpts == 0))))
MaxJobRetirementTime = 0
CurrentRank = 0.000000
MonitorSelfTime = 1219859254
MonitorSelfCPUUsage = 0.008332
MonitorSelfImageSize = 8156.000000
MonitorSelfResidentSetSize = 3636
MonitorSelfAge = 0
MonitorSelfRegisteredSocketCount = 2
DaemonStartTime = 1219856368
UpdateSequenceNumber = 11
MyAddress = "<192.168.17.128:33797>"
LastHeardFrom = 1219859419
UpdatesTotal = 12
UpdatesSequenced = 11
UpdatesLost = 0
UpdatesHistory = "0x00000000000000000000000000000000"
===========================
/* Condor_config part 1 below */
######################################################################
######################################################################
##
## ###### #
## # # ## ##### ##### ##
## # # # # # # # # #
## ###### # # # # # #
## # ###### ##### # #
## # # # # # # #
## # # # # # # #####
##
## Part 1: Settings you must customize:
######################################################################
######################################################################
## What machine is your central manager?
CONDOR_HOST = s10-1200
##--------------------------------------------------------------------
## Pathnames:
##--------------------------------------------------------------------
## Where have you installed the bin, sbin and lib condor directories?
RELEASE_DIR = /usr/local/condor
## Where is the local condor directory for each host?
## This is where the local config file(s), logs and
## spool/execute directories are located
#LOCAL_DIR = $(TILDE)
LOCAL_DIR = /scratch/condor
#LOCAL_DIR = $(RELEASE_DIR)/hosts/$(HOSTNAME)
## Where is the machine-specific local config file for each host?
LOCAL_CONFIG_FILE = /scratch/condor/condor_config.local
## If the local config file is not present, is it an error?
## WARNING: This is a potential security issue.
## If not specificed, te default is True
REQUIRE_LOCAL_CONFIG_FILE = False
##--------------------------------------------------------------------
## Mail parameters:
##--------------------------------------------------------------------
## When something goes wrong with condor at your site, who should get
## the email?
CONDOR_ADMIN = condor@localhost
## Full path to a mail delivery program that understands that "-s"
## means you want to specify a subject:
MAIL = /usr/bin/mail
##--------------------------------------------------------------------
## Network domain parameters:
##--------------------------------------------------------------------
## Internet domain of machines sharing a common UID space. If your
## machines don't share a common UID space, set it to
## UID_DOMAIN = $(FULL_HOSTNAME)
## to specify that each machine has its own UID space.
## UID_DOMAIN = your.domain
UID_DOMAIN = s10-1200
## Internet domain of machines sharing a common file system.
## If your machines don't use a network file system, set it to
## FILESYSTEM_DOMAIN = $(FULL_HOSTNAME)
## to specify that each machine has its own file system.
## FILESYSTEM_DOMAIN = your.domain
FILESYSTEM_DOMAIN = s10-1200
## This macro is used to specify a short description of your pool.
## It should be about 20 characters long. For example, the name of
## the UW-Madison Computer Science Condor Pool is ``UW-Madison CS''.
COLLECTOR_NAME = condor_td
s10-1200:/usr/local/condor/etc #