Hi
I tried to submit the simplest MPI example from the manual, but did'nt succeed in running it.
Could you please help me to find out why are my MPI jobs rejected ?
Here is the result of condor_q -bette-analyze, then condor_status -l of
one of the box that should run the job and the submission file.
Thanks in advance
Nicolas
############################
## submit description file for a parallel program
#############################################
universe = parallel
executable = /bin/sleep
arguments = 30
machine_count = 4
queue
######################################
$ condor_q -better-analyze 16
-- Submitter: seurat.my.domain : < 172.XX.XX.XX:32857> :
seurat.lbt.ibpc.frAddConstraint: Condition value not literal
AddConstraint: Condition value not literal
AddConstraint: Condition value not literal
AddConstraint: Condition value not literal
AddConstraint: Condition value not literal
AddConstraint: Condition value not literal
AddConstraint: Condition value not literal
AddConstraint: Condition value not literal
AddConstraint: Condition value not literal
AddConstraint: Condition value not literal
AddConstraint: Condition value not literal
AddConstraint: Condition value not literal
AddConstraint: Condition value not literal
AddConstraint: Condition value not literal
AddConstraint: Condition value not literal
AddConstraint: Condition value not literal
AddConstraint: Condition value not literal
AddConstraint: Condition value not literal
---
016.000: Run analysis summary. Of 51 machines,
24 are rejected by your job's requirements
9 reject your job because of their own requirements
0 match but are serving users with a better priority in the pool
18 match but reject the job for unknown reasons
0 match but will not currently preempt their existing job
0 are available to run your job
The Requirements _expression_ for your job is:
( target.Arch == "INTEL" ) && ( target.OpSys == "LINUX" ) &&
( target.Disk >= DiskUsage ) && ( ( target.Memory * 1024 ) >= ImageSize ) &&
( TARGET.FileSystemDomain == MY.FileSystemDomain )
Condition Machines Matched Suggestion
--------- ---------------- ----------
1 ( target.Arch == "INTEL" ) 27
2 ( target.OpSys == "LINUX" ) 51
3 ( target.Disk >= 10000 ) 51
4 ( ( 1024 * target.Memory ) >= 10000 )51
5 ( TARGET.FileSystemDomain == "my.domain" )51
The following attributes are missing from the job ClassAd:
CheckpointPlatform
#############################################"
$ condor_status -l calisto
MyType = "Machine"
TargetType = "Job"
Name = "vm1@xxxxxxxxxxxxxxxxx"
Machine = "calisto.my.domain "
Rank = (Scheduler =?= "DedicatedScheduler@xxxxxxxxxxxxxxxx" *
10000000) + 0 CpuBusy = ((LoadAvg - CondorLoadAvg) >= 0.500000)
COLLECTOR_HOST_STRING = " io.my.domain"
DedicatedScheduler = "DedicatedScheduler@xxxxxxxxxxxxxxxx"
CondorVersion = "$CondorVersion: 6.8.3 Jan 4 2007 $"
CondorPlatform = "$CondorPlatform: I386-LINUX_RHEL3 $"
VirtualMachineID = 1
VirtualMemory = 1048568
Disk = 3006996
CondorLoadAvg = 0.000000
LoadAvg = 0.000000
KeyboardIdle = 236778
ConsoleIdle = 236778
Memory = 1010
Cpus = 1
StartdIpAddr = "< 172.XX.XX.XX:32814>"
Arch = "INTEL"
OpSys = "LINUX"
UidDomain = "my.domain"
FileSystemDomain = "my.domain"
Subnet = "172.XX.XX"
HasIOProxy = TRUE
CheckpointPlatform = "LINUX INTEL 2.4.x normal"
TotalVirtualMemory = 2097136
TotalDisk = 6013992
TotalCpus = 2
TotalMemory = 2020
KFlops = 672176
Mips = 1887
LastBenchmark = 1170660059
TotalLoadAvg = 0.010000
TotalCondorLoadAvg = 0.000000
ClockMin = 680
ClockDay = 1
TotalVirtualMachines = 2
HasFileTransfer = TRUE
HasPerFileEncryption = TRUE
HasReconnect = TRUE
HasMPI = TRUE
HasTDP = TRUE
HasJobDeferral = TRUE
HasJICLocalConfig = TRUE
HasJICLocalStdin = TRUE
HasPVM = TRUE
HasRemoteSyscalls = TRUE
HasCheckpointing = TRUE
StarterAbilityList =
"HasFileTransfer,HasPerFileEncryption,HasReconnect,HasMPI,HasTDP,HasJobDeferral,HasJICLocalConfig,HasJICLocalStdin,HasPVM,HasRemoteSyscalls,HasCheckpointing"
CpuBusyTime = 0 CpuIsBusy = FALSE
TimeToLive = 2147483647
State = "Unclaimed"
EnteredCurrentState = 1170613946
Activity = "Idle"
EnteredCurrentActivity = 1170613946
Start = (Scheduler =?= " DedicatedScheduler@xxxxxxxxxxxxxxxx") ||
(((KeyboardIdle > 15 * 60) && (((LoadAvg - CondorLoadAvg) <= 0.300000)
|| (State != "Unclaimed" && State != "Owner")))) Requirements = (START)
&& (IsValidCheckpointPlatform) IsValidCheckpointPlatform =
(((TARGET.JobUniverse == 1) == FALSE) || ((MY.CheckpointPlatform =!=
UNDEFINED) && ((TARGET.LastCheckpointPlatform =?=
MY.CheckpointPlatform ) || (TARGET.NumCkpts == 0))))
MaxJobRetirementTime = 0 CurrentRank = 0.000000 MonitorSelfTime =
1170670728 MonitorSelfCPUUsage = 0.012512 MonitorSelfImageSize =
8156.000000 MonitorSelfResidentSetSize = 3876 MonitorSelfAge = 0
MonitorSelfRegisteredSocketCount = 2
DaemonStartTime = 1170434080
UpdateSequenceNumber = 806
MyAddress = "<172.XX.XX.XX:32814>"
LastHeardFrom = 1170670862
UpdatesTotal = 807
UpdatesSequenced = 806
UpdatesLost = 0
UpdatesHistory = "0x00000000000000000000000000000000"
----------------------------------------------------
CNRS - UPR 9080 : Laboratoire de Biochimie Theorique
Institut de Biologie Physico-Chimique
13 rue Pierre et Marie Curie
75005 PARIS - FRANCE
Tel : +33 158 41 51 70
Fax : +33 158 41 50 26
----------------------------------------------------
_______________________________________________
Condor-users mailing list
To unsubscribe, send a message to condor-users-request@xxxxxxxxxxx with a
subject: Unsubscribe
You can also unsubscribe by visiting
https://lists.cs.wisc.edu/mailman/listinfo/condor-users
The archives can be found at either
https://lists.cs.wisc.edu/archive/condor-users/
http://www.opencondor.org/spaces/viewmailarchive.action?key=CONDOR