#IS_OWNER = ($(CpuBusy) || $(KeyboardBusy)) IS_OWNER = ($(KeyboardBusy))
#####
The Variable $(CpuBusy) is probably not evaluated
correctly.
#####
# # from /etc/condor/config.d/workstation # DEFAULT_DOMAIN_NAME = domain.de DAEMON_LIST = MASTER, STARTD, KBDD ALLOW_ADMINISTRATOR = *@domain.de/* UID_DOMAIN = domain.de STARTER_ALLOW_RUNAS_OWNER = true ENVIRONMENT_FOR_AssignedGPUs = GPU_DEVICE_ORDINAL=/(CUDA|OCL)// CUDA_VISIBLE_DEVICES=/CUDA// MACHINE_RESOURCE_INVENTORY_GPUs = $(LIBEXEC)/condor_gpu_discovery -properties $(GPU_DISCOVERY_EXTRA) STARTD_CRON_GPUs_MONITOR_CONDITION = TotalGPUs > 0 STARTD_CRON_GPUs_MONITOR_EXECUTABLE = $(GPU_MONITOR) STARTD_CRON_GPUs_MONITOR_METRICS = SUM:GPUs, PEAK:GPUsMemory STARTD_CRON_GPUs_MONITOR_MODE = WaitForExit STARTD_CRON_GPUs_MONITOR_PERIOD = 300 STARTD_CRON_JOBLIST = GPUs_MONITOR STARTD_JOB_ATTRS = GPUsUsage GPUsMemoryUsage GPU_DISCOVERY_EXTRA = -extra -not-nested ENVIRONMENT_VALUE_FOR_UnAssignedGPUs = void COUNT_HYPERTHREAD_CPU = false NUM_SLOTS = 1 NUM_SLOTS_TYPE_1 = 1 SLOT_TYPE_1_PARTITIONABLE = true SLOT_TYPE_1 = cpus=$(DETECTED_CORES), mem=$(DETECTED_MEMORY), gpus=auto DynamicSlot = True POOL = "workstation" HOUR = (60 * $(MINUTE)) ActivationTimer = (time() - JobStart) LastCkpt = (time() - LastPeriodicCheckpoint) NonCondorLoadAvg = (TotalLoadAvg - CondorLoadAvg) BackgroundLoad = 1.5 HighLoad = 2.0 StartIdleTime = 60 * $(MINUTE) ContinueIdleTime = 60 * $(MINUTE) MaxSuspendTime = 1 * $(MINUTE) KeyboardBusy = KeyboardIdle < $(StartIdleTime) ConsoleBusy = (ConsoleIdle < $(MINUTE)) CPUIdle = $(NonCondorLoadAvg) <= $(BackgroundLoad) CPUBusy = $(NonCondorLoadAvg) >= $(HighLoad) KeyboardNotBusy = ($(KeyboardBusy) == False) MachineBusy = ($(CPUBusy) || $(KeyboardBusy) IsDesktop = true START = ($(CPUIdle) || $(KeyboardNotBusy)) IS_OWNER = ($(KeyboardBusy)) WANT_SUSPEND = ( $(SmallJob) || $(IsVanilla) ) WANT_VACATE = ( $(ActivationTimer) > 10 * $(MINUTE) || $(IsVanilla) ) SUSPEND = $(KeyboardBusy) CONTINUE = ( $(CPUIdle) && ($(ActivityTimer) > 300) && ( (KeyboardIdle > $(ContinueIdleTime))) ) PREEMPT = $(KeyboardBusy) MAXJOBRETIREMENTTIME = (IsDesktop =!= True) * 0 MachineMaxVacateTime = 10 * $(MINUTE) STARTD_DEBUG = D_IDLE D_KEYBOARD SLOTS_CONNECTED_TO_KEYBOARD = 1024*1024 SLOTS_CONNECTED_TO_CONSOLE = 1024*1024 # # from /etc/condor/condor_config.local # JOB_RENICE_INCREMENT = true STARTD_ATTRS = IsDesktop GPURAM POOL CUDADeviceName GPURAM = 24564 CUDADeviceName = "NVIDIA RTX A5000"
Hi Daniel,
Our LTS series tends not to contain major changes because it is intended for bug fixes only. Doing some git archeology, there were not that many changes that effected the Startd/Starter daemons, so I am unsure why this is happening. Is that configuration for the desktop policy fully custom or built off of one of the configuration templates. The output of condor_config_val -summaryÂcould be helpful here.
-Cole Bollig
From: HTCondor-users <htcondor-users-bounces@xxxxxxxxxxx> on behalf of Daniel BrÃckner <daniel.brueckner@xxxxxxxxxxxxxxxxxx>
Sent: Monday, September 9, 2024 2:19 AM
To: HTCondor-Users Mail List <HTCondor-users@xxxxxxxxxxx>
Subject: [HTCondor-users] Jobs are starting even in Owner State after updating to 23.0.14ÂHi there,
After updating my nodes to "23.0.14 2024-08-07" my Start _expression_ is
ignored and jobs are starting in Owner-State.
#####
root@cm:~# condor_status pc40
NameÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ OpSysÂÂÂÂÂ ArchÂÂ State Activity LoadAv
MemÂÂÂ ActvtyTime
slot1@pc40ÂÂÂ LINUXÂÂÂÂÂ X86_64 OwnerÂÂÂÂ IdleÂÂÂÂÂ 0.000Â 1746 0+00:00:00
slot1_1@pc40Â LINUXÂÂÂÂÂ X86_64 ClaimedÂÂ BusyÂÂÂÂÂ 0.000 65536 0+00:00:00
slot1_2@pc40Â LINUXÂÂÂÂÂ X86_64 ClaimedÂÂ BusyÂÂÂÂÂ 0.000Â 4096 0+00:00:00
slot1_3@pc40Â LINUXÂÂÂÂÂ X86_64 ClaimedÂÂ BusyÂÂÂÂÂ 0.000Â 4096 0+00:00:00
slot1_4@pc40Â LINUXÂÂÂÂÂ X86_64 ClaimedÂÂ BusyÂÂÂÂÂ 0.000Â 4096 0+00:00:00
#####
After downgrading to "23.0.12 2024-06-13" everting works as aspected and
jobs are starting when the node is in "unclaimed" state only. Are there
any major changes in 23.0.14?
Here's my configuration:
#####
KeyboardBusyÂÂÂÂÂÂÂÂÂÂÂ = KeyboardIdle < $(StartIdleTime)
CPUIdleÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ = $(NonCondorLoadAvg) <= $(BackgroundLoad)
CPUBusyÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ = $(NonCondorLoadAvg) >= $(HighLoad)
KeyboardNotBusyÂÂÂÂÂÂÂÂ = ($(KeyboardBusy) == False)
START =Â ($(CPUIdle) || $(KeyboardNotBusy))
IS_OWNER =Â ($(CpuBusy) || $(KeyboardBusy))
####
condor_drain is working and will evict all running jobs.
Do you have any idea what's going on there?
Best regards and thanks for your support,
Daniel
_______________________________________________
HTCondor-users mailing list
To unsubscribe, send a message to htcondor-users-request@xxxxxxxxxxx with a
subject: Unsubscribe
You can also unsubscribe by visiting
https://lists.cs.wisc.edu/mailman/listinfo/htcondor-users
The archives can be found at:
https://lists.cs.wisc.edu/archive/htcondor-users/
_______________________________________________ HTCondor-users mailing list To unsubscribe, send a message to htcondor-users-request@xxxxxxxxxxx with a subject: Unsubscribe You can also unsubscribe by visiting https://lists.cs.wisc.edu/mailman/listinfo/htcondor-users The archives can be found at: https://lists.cs.wisc.edu/archive/htcondor-users/
Attachment:
smime.p7s
Description: Kryptografische S/MIME-Signatur