[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[HTCondor-users] not all X509 ads propagated from CondorCE to LRMS Condor



Hi all,

I just noticed, that our CondorCEs do not propagate all the incoming X509 related job ads to the routed RLMS jobs. I.e., a job on the CE [1.CondorCE] looses for example the `x509UserProxyFirstFQAN` ad towards its routed job [1.Condor]?

The SYSTEM_SECURE_JOB_ATTRS on both, the CE and LRMS schedd, look good to me [2].

AFAIS the token ads follow the `JOB_ROUTER_DEFAULTS_GENERATED` route [3] ending up as `orig_Auth*` - but I have not found a corresponding path where the x509 ads are copied over ð

For the moment, I am going to copy the missing x509 ads in my routes - but I fear that I have tuned a Condor(CE) knob, that would handle it for me.
Maybe somebody has an idea, which knob that could be?

Cheers and thanks for ideas,
  Thomas

[1.CondorCE]
x509userproxy
x509UserProxyExpiration
x509UserProxyFirstFQAN
x509UserProxyFQAN
x509userproxysubject
x509UserProxyVOName


[1.Condor]
x509userproxy
x509UserProxyExpiration
x509userproxysubject

[2]
> condor_config_val SYSTEM_SECURE_JOB_ATTRS
x509userProxySubject x509UserProxyEmail x509UserProxyVOName x509UserProxyFirstFQAN x509UserProxyFQAN TotalSubmitProcs AuthTokenSubject AuthTokenIssuer AuthTokenGroups AuthTokenId AuthTokenScopes

[3]
/usr/share/condor-ce/condor_ce_router_defaults

> condor_ce_config_val JOB_ROUTER_DEFAULTS_GENERATED
[ MaxIdleJobs = 2000;
MaxJobs = 10000;
/* by default, accept all jobs */
Requirements = True;
/* now modify routed job attributes */
/* remove routed job if the client disappears for 48 hours or it is idle for 6 */
/*set_PeriodicRemove = (LastClientContact - time() > 48*60*60) ||
(JobStatus == 1 && (time() - QDate) > 6*60);*/
delete_PeriodicRemove = true;
delete_CondorCE = true;
delete_TotalSubmitProcs = true;
set_RoutedJob = true;
/* Copy AuthToken attributes if they exist - the routed job will have the original attributes deleted */
copy_AuthTokenSubject = "orig_AuthTokenSubject";
copy_AuthTokenIssuer = "orig_AuthTokenIssuer";
copy_AuthTokenGroups = "orig_AuthTokenGroups";
copy_AuthTokenScopes = "orig_AuthTokenScopes";
copy_AuthTokenId = "orig_AuthTokenId";
/* Set the environment */
copy_environment = "orig_environment";
set_osg_environment = "";
eval_set_environment = mergeEnvironment(join(" ",
False =?= True ?
strcat("HOME=", userHome(Owner, "/")) :
"",
ifThenElse(False =?= True, "", strcat("CONDORCE_COLLECTOR_HOST=", "grid-htc-ce04.desy.de:9619"))),
osg_environment,
orig_environment,
"",
default_pilot_job_env);
/* Set new requirements */
/* set_requirements = LastClientContact - time() < 30*60; */
set_requirements = (RequestGpus?:0) >= (TARGET.Gpus?:0);
/* Note default memory request of 2GB */
/* Note yet another nested condition allow pass attributes (maxMemory,xcount,jobtype,queue)
via gWMS Factory described within ClassAd */
eval_set_OriginalMemory = ifThenElse(maxMemory isnt undefined,
maxMemory,
ifThenElse(default_maxMemory isnt undefined,
default_maxMemory,
2000));
/* Duplicate OriginalMemory expression and add remote_ prefix.
This passes the attribute from gridmanager to BLAHP. */
eval_set_remote_OriginalMemory = ifThenElse(maxMemory isnt undefined,
maxMemory,
ifThenElse(default_maxMemory isnt undefined,
default_maxMemory,
2000));
set_JOB_GLIDEIN_Memory = "$$(TotalMemory:0)";
set_JobMemory = JobIsRunning ? int(MATCH_EXP_JOB_GLIDEIN_Memory)*95/100 : OriginalMemory;
set_RequestMemory = ifThenElse(WantWholeNode is true,
!isUndefined(TotalMemory) ? TotalMemory*95/100 : JobMemory,
OriginalMemory);
eval_set_remote_queue = ifThenElse(batch_queue isnt undefined,
batch_queue,
ifThenElse(queue isnt undefined,
queue,
ifThenElse(default_queue isnt undefined,
default_queue,
"")));
/* Request GPUs for whole node jobs (HTCONDOR-103) */
/* If a whole node job requests GPUs and is matched to a machine with GPUs then set the job's RequestGPUs to all the GPUs on that machine */
copy_RequestGPUs = "orig_RequestGPUs";
copy_WantWholeNode = "WholeNodes";
eval_set_OriginalGPUs = orig_RequestGPUs;
/* MATCH_EXP_JOB_GLIDEIN_GPUs will be based on JOB_GLIDEIN_GPUs (set below) once the routed job is matched to an HTCondor slot */ set_GlideinGPUsIsGood = !isUndefined(MATCH_EXP_JOB_GLIDEIN_GPUs) && (int(MATCH_EXP_JOB_GLIDEIN_GPUs) isnt error); /* JobGPUs set below; TotalGPUs comes from the slot ad, WantWholeNode from the job ad */ set_JOB_GLIDEIN_GPUs = "$$(ifThenElse(WantWholeNode is true, !isUndefined(TotalGPUs) ? TotalGPUs : JobGPUs, OriginalGPUs))";
set_JobGPUs = JobIsRunning ? int(MATCH_EXP_JOB_GLIDEIN_GPUs) : OriginalGPUs;
set_RequestGPUs = ifThenElse((WantWholeNode is true && OriginalGPUs isnt undefined),
(!isUndefined(TotalGPUs) && TotalGPUs > 0)? TotalGPUs : JobGPUs,
OriginalGPUs);
/* HTCondor uses RequestCpus; blahp uses SMPGranularity and NodeNumber. Default is 1 core. */
copy_RequestCpus = "orig_RequestCpus";
eval_set_OriginalCpus = ifThenElse(xcount isnt undefined,
xcount,
ifThenElse(orig_RequestCpus isnt undefined,
ifThenElse(orig_RequestCpus > 1,
orig_RequestCpus,
ifThenElse(default_xcount isnt undefined,
default_xcount,
1)),
ifThenElse(default_xcount isnt undefined,
default_xcount,
1)));
set_GlideinCpusIsGood = !isUndefined(MATCH_EXP_JOB_GLIDEIN_Cpus) && (int(MATCH_EXP_JOB_GLIDEIN_Cpus) isnt error); set_JOB_GLIDEIN_Cpus = "$$(ifThenElse(WantWholeNode is true, !isUndefined(TotalCpus) ? TotalCpus : JobCpus, OriginalCpus))"; set_JobIsRunning = (JobStatus =!= 1) && (JobStatus =!= 5) && GlideinCpusIsGood;
set_JobCpus = JobIsRunning ? int(MATCH_EXP_JOB_GLIDEIN_Cpus) : OriginalCpus;
set_RequestCpus = ifThenElse(WantWholeNode is true,
!isUndefined(TotalCpus) ? TotalCpus : JobCpus,
OriginalCpus);
eval_set_remote_SMPGranularity = ifThenElse(xcount isnt undefined,
xcount,
ifThenElse(default_xcount isnt undefined,
default_xcount,
1));
eval_set_remote_NodeNumber = ifThenElse(xcount isnt undefined,
xcount,
ifThenElse(default_xcount isnt undefined,
default_xcount,
1));
/* BatchRuntime is in seconds but users configure default_maxWallTime and ROUTED_JOB_MAX_TIME in minutes */
copy_BatchRuntime = "orig_BatchRuntime";
eval_set_BatchRuntime = ifThenElse(maxWallTime isnt undefined,
60*maxWallTime,
ifThenElse(orig_BatchRuntime isnt undefined,
orig_BatchRuntime,
ifThenElse(default_maxWallTime isnt undefined,
60*default_maxWallTime,
60*4320)));
set_CondorCE = 1;
eval_set_CERequirements = ifThenElse(default_CERequirements isnt undefined,
strcat(default_CERequirements, ",CondorCE"),
"CondorCE");
copy_OnExitHold = "orig_OnExitHold";
set_OnExitHold = ifThenElse(orig_OnExitHold isnt undefined,
orig_OnExitHold,
false) ||
ifThenElse(minWalltime isnt undefined && RemoteWallClockTime isnt undefined,
RemoteWallClockTime < 60*minWallTime,
false);
copy_OnExitHoldReason = "orig_OnExitHoldReason";
set_OnExitHoldReason = ifThenElse((orig_OnExitHold isnt undefined) && orig_OnExitHold,
ifThenElse(orig_OnExitHoldReason isnt undefined,
orig_OnExitHoldReason,
strcat("The on_exit_hold expression (",
unparse(orig_OnExitHold),
") evaluated to TRUE.")),
ifThenElse(minWalltime isnt undefined &&
RemoteWallClockTime isnt undefined &&
(RemoteWallClockTime < 60*minWallTime),
strcat("The job's wall clock time, ",
int(RemoteWallClockTime/60),
"min, is less than the minimum specified by the job (",
minWalltime,
")"),
"Job held for unknown reason."));
copy_OnExitHoldSubCode = "orig_OnExitHoldSubCode";
set_OnExitHoldSubCode = ifThenElse((orig_OnExitHold isnt undefined) && orig_OnExitHold,
ifThenElse(orig_OnExitHoldSubCode isnt undefined,
orig_OnExitHoldSubCode,
1),
42);
]

Attachment: smime.p7s
Description: S/MIME Cryptographic Signature