Mailing List Archives
Authenticated access
|
|
|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Condor-users] GUI for quill mining
- Date: Wed, 13 Aug 2008 16:30:44 -0700 (PDT)
- From: Sean Manning <seangwm@xxxxxxx>
- Subject: Re: [Condor-users] GUI for quill mining
Dear Steven
Its not graphical, but part of our Web Services interface does
"condor_q" on the terminal using the method described in Matthew and
Todd's "Developer APIs to Condor + A Tutorial on Condor Web Services"
powerpoint. Have a look at the printJobStatus () methods of our
CondorJobStatus.java, which I have attached.
Good luck with your project!
Sean
Steven Timm wrote:
>
>Has anyone out there done work on a graphical frontend that
>displays historical data (job data mostly) from the quill/postgres
>database? We have a summer student working on that for us this
>summer but if he can start from something that partly works it could
>be a help. Any pointers are appreciated.
>
>Thanks
>
>Steve Timm
>
/*
* CondorJobStatus.java
*
* Created on December 1, 2007, 11:03 PM
*
* To change this template, choose Tools | Template Manager
* and open the template in the editor.
*/
package condorwsgui;
import birdbath.ClassAd;
import birdbath.Schedd;
import birdbath.Transaction;
import condor.ClassAdStructAttr;
import condor.FileInfo;
import java.lang.Thread;
import java.net.URL;
import java.io.File;
import java.rmi.RemoteException;
import java.io.FileNotFoundException;
//import java.util.regex.*;
/**
* See slides 30 and 32 of the "Developer APIs to Condor + a Tutorial on Condor's
* Web Services Interface" PPT for documentation
*
* @author Written by David Gong; updated and expanded by Sean Manning
*/
public class CondorJobStatus {
/*
private static enum OS {LINUX, WINDOWS, OTHERS};
private OS myOS; // Represents the family of operating system (eg. Linux or Windows) running on the system
*/
/*
* These constants are used with the various isJobFoo () methods
*/
// See http://pages.cs.wisc.edu/~adesmet/status.html for a reference.
private static int STATUS_UNEXPANDED = 0; // U
private static int STATUS_IDLE = 1; // I
private static int STATUS_RUNNING = 2; // R
private static int STATUS_REMOVED = 3; // X
private static int STATUS_COMPLETE = 4; // C
private static int STATUS_HOLD = 5; // H
/*
* These variables all represent aspects of condor or facts about the system
*/
private Schedd schedd; // Represents the Condor schedd daemon
private String owner; // SM
private autoUpdate myAutoUpdate; // This is an inner class of CondorJobStatus
private ClassAdStructAttr[][] jobsAttr; // An ordered list of jobs, which consist of ordered lists of attributes
private String[] colSelected;
// The titles of the six columns of job status output.
private String[] defaultColSelected = {"ClusterId", "ProcId", "Owner", "Qdate", "JobStatus", "Cmd"};
// Represents the possible states that a Condor job can be in.
// Padded to 8-10 characters long.
private static String[] statusName = { " ", "Idle ", "Running ", "Removed ", "Completed", "Held "};
// The path to the default directory for storing output files
private String defaultOutputFileDir;
/** Creates a new instance of CondorJobStatus */
public CondorJobStatus() {
colSelected = defaultColSelected;
defaultOutputFileDir = System.getProperty("user.home");
}
/**
* Detetermines which OS the system uses, assigning the correct value to myOS.
*/
/* private void determinteOS(){ // TODO Is this needed?
String myOSName = System.getProperty("os.name");
Pattern p1 = Pattern.compile("WIN"); // Compiles regular expression into Pattern.
Pattern p2 = Pattern.compile("Lin");
Matcher m1 = p1.matcher(myOSName);
Matcher m2 = p2.matcher(myOSName);
if (m1.find()) {
myOS = OS.WINDOWS;
}
else if (m2.find()) {
myOS = OS.LINUX;
}
else {
myOS = OS.OTHERS;
}
}*/
/**
* Assigns a new array to colSelected. TODO What is this for?
* @param col The value to assign to colSelected
*/
public void setColoumn(String[] col){
colSelected = col;
}
/**
* Start updating jobsAttr regularly with every job on the queue.
* @throws Exception
*/
public void updateJobStatus() throws Exception {
updateAdFromServer();
myAutoUpdate = new autoUpdate(30000);
myAutoUpdate.start();
}
/**
* Stop updating jobsAttr regularly with every job on the queue.
*/
public void stopAutoUpdate() {
myAutoUpdate.stopAutoUpdate();
}
/**
* Start retrieving the output of every Completed job to a fixed location.
*/
public void startAutoRetrieve () {
myAutoUpdate.startAutoRetrieve();
}
/**
* Stop retrieving the output of every Completed job to a fixed location.
*/
public void stopAutoRetrieve () {
myAutoUpdate.stopAutoRetrieve();
}
/**
* Prints the status of one job.
* @param ad The ClassAd from which the Job Status will be retrieved
*/
public void printJobStatus(ClassAd ad){
String message = ""; // The one-line message to print
int status;
// For each column, add something to the message
for (int i = 0; i < colSelected.length; i++){
if (colSelected[i].equalsIgnoreCase("JobStatus")){
status = Integer.valueOf (ad.get (colSelected[i]));
message += (statusName[status] + "\t");
}
else {
message += (ad.get(colSelected[i]) + "\t" );
}
}
System.out.println(message);
}
/**
* Prints the job status of every job recorded in jobsAttr, which should be
* every job known to the local scheduler.
*/
public void printJobsStatus(){
printTitle(); // Label the columns
if(jobsAttr == null) {
try {this.wait();}
catch( Exception err){
System.out.println("Waiting for update to finish.....");
}
}
for (int i=0; i < jobsAttr.length; i ++){
ClassAd ad = new ClassAd(jobsAttr[i]);
printJobStatus(ad);
}
}
/**
* Prints the titles of the six columns of printJobStatus output.
*/
private void printTitle(){
String title = "";
for (int i = 0; i < colSelected.length; i++){
title += (colSelected[i] + "\t");
}
System.out.println(title);
}
/**
* Prints the job status of a particular job
* @param cluster
* @param job Web Services
* @throws RemoteException
*/
public void printJobStatus(int cluster, int job) throws RemoteException{
Transaction xact = schedd.createTransaction();
xact.begin(30);
ClassAd ad = new ClassAd(xact.getJobAd(cluster, job));
xact.commit();
printTitle();
printJobStatus(ad);
}
/**
* At regular intervals, an autopUpdate updates jobsAttr to store all jobs
* which the local schedd is aware of, and retrieves any job which is complete.
* updateAdFromServer ()
* retrieveJobsIfDone ()
* @author Written by David Gong; commented by Sean Manning
*
*/
class autoUpdate extends Thread {
private long interVal = 30000; // Measured in ms so 30,000 = 30 s
private boolean needStop = false; // Should run () stop?
private boolean autoRetrieve = true;
/**
* Creates a new autoUpdate
* @param interval The new interval in ms
*/
public autoUpdate (long interval) {
interVal = interval;
needStop = false;
autoRetrieve = false;
}
/**
* Causes this autoUpdate () to stop permanently after
*
*/
public void stopAutoUpdate () {
needStop = true;
System.out.println ("Current need to stop ('true' expected):" + needStop);
}
/**
* Stop retrieving the output of every Complete job and letting it die.
*/
public void startAutoRetrieve () {
autoRetrieve = true;
}
/**
* Stop retrieving the output of every Complete job and letting it die.
*/
public void stopAutoRetrieve () {
autoRetrieve = false ;
}
/**
* Performs several operations automatically every interVal ms
*/
public void run () {
while (! needStop) {
System.out.println ("Current need to stop ('false' expected):" + needStop);
try {
updateAdFromServer ();
if (autoRetrieve) {
retrieveJobsIfDone ();
}
}
catch (Exception err) {
System.out.println ("Update from server failed in autoUpdate.run ()");
}
try { Thread.sleep(interVal); }
catch (Exception err) {
System.out.println ("Sleep failed in autoUpdate.run ()");
}
}
System.out.println("AutoUpdate is no longer running....");
}
}
/**
* Sets jobsAttr to store all jobs which the schedd knows about with a particular owner.
*
* @throws RemoteException If there is a problem contacting the schedd
*/
private void updateAdFromServer() throws RemoteException{
// Get all jobs with me as the owner (cp. slide 30 of PPT)
// SM Returns a ClassAdStructAttr[]
// SM SocketException thrown from here 3
jobsAttr = schedd.getJobAds("Owner==\"" + owner + "\""); // SM Was "Owner==\"daobgong\""
System.out.println("update from server finished");
}
/**
* Assigns a new Schedd to this CondorJobStatus
* @param schedd The Schedd to be associated with this object
*/
public void setSchedd(Schedd schedd){
this.schedd = schedd;
}
/**
* Sets the owner of this CondorJobStatus
* @param owner The name of the new owner
*/
public void setOwner (String owner) {
this.owner = owner;
}
/**
* This is the job retrieval method which underlies all the others. It copies
* back files from the spool/cluster folder on the Condor/Globus server to a directory on the client machine.
* Right now, that directory is /hepuser/seangwm/ashokProjects/CondorWebService/CondorWSProjectRon
* unless you give the full path to another directory such as /tmp
*
* See slide 32 of "Developer APIs to Condor" PPT for a model.
* @param cluster The cluster number of the job (the part of the job number before the period)
* @param job The job number of the job (the part of the job number after the period)
* @throws RemoteException Thrown by birdbath.Transaction.getFile () ???
* @throws FileNotFoundException Thrown by birdbath.Transaction.getFile () ???
* @throws Exception Thrown by birdbath.Transaction.getFile () ???
*/
public void retrieveJob(int cluster, int job) throws RemoteException, FileNotFoundException, Exception{
System.out.println ("In RetrieveJob (" + cluster + ", " + job + ")");
Transaction xact = schedd.createTransaction();
xact.begin(30);
// Check for an OutputSandbox attribute
ClassAd ad = new ClassAd (xact.getJobAd (cluster, job));
if (ad.get ("OutputSandbox") != null) {
/*
* If there is an OutputSandbox, retrieve all files specified in it.
*/
System.out.println ("OutputSandbox detected ...");
ad.get ("OutputSandbox");
// TODO Implement.
}
// else {
/*
* If there is no OutputSandbox, retrieve all files in the spool/cluster folder
*/
FileInfo[] files = xact.listSpool(cluster, job); // "Discover available files" (PPT)
// ClassAd ad = new ClassAd (xact.getJobAd (cluster, job));
System.out.println ("user.dir: " + getOutputLocalLocation(ad)); // TODO How is this defined?
System.setProperty("user.dir", getOutputLocalLocation(ad));
for (FileInfo file:files) { // file describes the remote file;
System.out.println ("Remote name of file to stage out: " + file.getName ());
// System.out.println ("New local path will be: " + file.getName ());
// System.out.println ("New local path will be: /tmp/" + file.getName());
System.out.println ("New local path will be: " + getOutputLocalLocation(ad) + "/" + file.getName());
// xact.getFile(cluster, job, file.getName(), (int)file.getSize(), new File(file.getName()));
// xact.getFile(cluster, job, file.getName(), (int)file.getSize(), new File("/tmp/" + file.getName()));
xact.getFile(cluster, job, file.getName(), (int)file.getSize(), new File(getOutputLocalLocation(ad) + "/" + file.getName()));
}
xact.closeSpool(cluster, job);
xact.removeJob(cluster, job, "Successfully retrieved result, close this job");
xact.commit();
// }
}
/**
* Copies back output files from the job represented by a particular ClassAd
* @param ad
* @throws RemoteException
* @throws FileNotFoundException
* @throws Exception
*/
public void retrieveJob(ClassAd ad) throws RemoteException, FileNotFoundException, Exception{
int cluster = clusterIdOf(ad);
int job = jobIdOf(ad);
retrieveJob(cluster, job);
}
public void retrieveJob (int cluster, int job, String outputSandbox) {;}
/**
* Retrieves the Class Ad of any job which is done
*/
public void retrieveJobsIfDone(){
for (int i=0; i < jobsAttr.length; i ++){
ClassAd ad = new ClassAd(jobsAttr[i]);
try{
if (isJobComplete(ad)) retrieveJob(ad);
} catch(RemoteException err){
System.out.print("Error on remote access output files");
} catch (FileNotFoundException err){
System.out.println("File not found while retrieve output files");
} catch(Exception err){
System.out.println("Error while retrieving output files");
}
}
}
/**
* Gets the job ID of a job
* @param ad The ClassAd of a job
* @return The cluster ID of that job
*/
private int clusterIdOf(ClassAd ad){
return Integer.valueOf(ad.get("ClusterId"));
}
/**
* Gets the job Id of a job
* @param ad The ClassAd of a job
* @return The job ID of that job
*/
private int jobIdOf(ClassAd ad){
return Integer.valueOf(ad.get("ProcId"));
}
/**
* Moves a job into Old Age (the history log) by removing the job from the queue
* and closing down its files in the spool directory.
*
* @param cluster The cluster ID of the job
* @param job The job ID of the job
* @param reason The reason why the job is being removed
* @throws RemoteException Thrown by the Transaction in case of a problem making the change
*/
public void closeSpoolAndRemoveJob(int cluster, int job, String reason)throws RemoteException{
Transaction xact = schedd.createTransaction();
xact.begin(30);
xact.closeSpool(cluster, job);
xact.removeJob(cluster, job, reason);
xact.commit();
}
/**
* Removes a particular job from the queue.
*
* @param cluster The cluster ID of the job
* @param job The job ID of the job
* @param reason The reason why this action is being done
* @throws RemoteException Thrown by the Transaction in case of a problem making the change
*/
public void removeJob(int cluster, int job, String reason)throws RemoteException{
Transaction xact = schedd.createTransaction();
xact.begin(30);
xact.removeJob(cluster, job, reason);
xact.commit();
}
/**
* Halts a job but leaves it in the queue.
*
* @param cluster The cluster ID of the job
* @param job The job ID of the job
* @param reason The reason why this action is being done
* @throws RemoteException Thrown by the Transaction in case of a problem making the change
*/
public void holdJob(int cluster, int job, String reason)throws RemoteException{
Transaction xact = schedd.createTransaction();
xact.begin (30);
xact.holdJob (cluster, job, reason);
xact.commit ();
}
/**
* ???
*
* @param cluster The cluster ID of the job
* @param job The job ID of the job
* @param reason The reason why this action is being done
* @throws RemoteException Thrown by the Transaction in case of a problem making the change
*/
public void releaseJob(int cluster, int job, String reason) throws RemoteException{
Transaction xact = schedd.createTransaction();
xact.begin(30);
xact.releaseJob(cluster, job, reason);
xact.commit();
}
/**
* Determines whether or not a job is complete
*
* @param cluster The cluster ID of the job
* @param job The job ID of the job
* @return True if the job is complete, false otherwise.
*/
public boolean isJobComplete(int cluster, int job){
// Old code by DG
/* ClassAd ad;
int tmpCluster, tmpJob;
for (int i=0; i < jobsAttr.length; i ++){
ad = new ClassAd(jobsAttr[i]);
tmpCluster = Integer.valueOf(ad.get("ClusterId"));
tmpJob = Integer.valueOf(ad.get("ProcId"));
if ((tmpCluster == cluster) && (job == tmpJob)){
return isJobComplete(ad);
}
}
return false;*/
return jobHasStatus (cluster, job, STATUS_RUNNING);
}
// Old code by DG
// TODO Replace reference to this method.
/**
* Determines whether or not a job is complete
*
* @param ad The ClassAd of the job.
* @return True if the job is complete, false otherwise
*/
private boolean isJobComplete(ClassAd ad){
return (Integer.valueOf(ad.get("JobStatus")) == 4); // Complete
}
/**
* Determines whether or not a job is in state Idle
*
* @param cluster The cluster ID of the job
* @param job The job ID of the job
* @return True if the job exists and is idle, false otherwise.
*/
public boolean isJobIdle (int cluster, int job) {
return jobHasStatus (cluster, job, STATUS_IDLE);
}
/**
* Determines whether or not a job is in state Running
*
* @param cluster The cluster ID of the job
* @param job The job ID of the job
* @return True if the job exists and is running, false otherwise.
*/
public boolean isJobRunning (int cluster, int job) {
return jobHasStatus (cluster, job, STATUS_RUNNING);
}
/**
* Determines whether or not a job is in state Hold
*
* @param cluster The cluster ID of the job
* @param job The job ID of the job
* @return True if the job exists and is being held, false otherwise.
*/
public boolean isJobHold (int cluster, int job) {
return jobHasStatus (cluster, job, STATUS_HOLD);
}
/**
* Returns true if the job with ID cluster.job has job status 'status'
*
* Status should be between zero and five, and if it is out of range then the output
* will always be false
* @param cluster The cluster ID of the job to check
* @param job The job ID of the job to check
* @param status A job status code (defined from zero to five)
* @return
*/
private boolean jobHasStatus (int cluster, int job, int status) {
ClassAd ad;
int tmpCluster, tmpJob;
for (int i=0; i < jobsAttr.length; i ++){
ad = new ClassAd(jobsAttr[i]);
tmpCluster = Integer.valueOf(ad.get("ClusterId"));
tmpJob = Integer.valueOf(ad.get("ProcId"));
if ((tmpCluster == cluster) && (job == tmpJob)){
return jobHasStatus (ad, status);
}
}
return false;
}
/**
* Returns true if the job represented by ad has job status 'status'
*
* Status should be between zero and five, and if it is out of range then the output
* will always be false.
* @param ad The job to check
* @param status A job status code (defined from zero to five)
* @return True if ad exists and the status of ad is equal to 'status'
*/
private boolean jobHasStatus (ClassAd ad, int status) {
return (Integer.valueOf(ad.get("JobStatus")) == status);
}
/**
* Gets the local folder where the output of a job is being stored.
*
* @param ad The ClassAd of the job
* @return The absolute path to the folder where output is being stored
*/
public String getOutputLocalLocation(ClassAd ad){
String tmp = ad.get("OutputFileDir");
if (tmp == null)
return defaultOutputFileDir;
else
return tmp;
}
/**
* Gets the interval at which this object's schedd is updated with a current list of jobs.
* @return 0 if autoUpdate does not exist, otherwise the auto-update interval in ms
*/
public long getAutoUpdateInterval () {
if (myAutoUpdate != null) {
return myAutoUpdate.interVal;
}
else {
return 0;
}
}
/**
* A simple test method for this class. Gets the status of one or more jobs and prints it to the screen.
*
* @param args This function does not take command-line arguments.
*/
public static void main (String[] args) {
/*
* Setting system properties (cp. slide 38 of "Developer APIs to Condor" PPT
*/
// For windows, commented out by DG
// java.lang.System.setProperty("javax.net.ssl.trustStore", "c:\\Program Files\\Java\\jre1.6.0_03\\bin\\truststore");
// java.lang.System.setProperty("javax.net.ssl.keyStore", "c:\\Users\\Daobgong\\JavaProject\\CondorWSGUI\\DavidGridKeyStore");
// For Unix/Linux
java.lang.System.setProperty("javax.net.ssl.trustStore", "/hepuser/seangwm/ashokProjects/CondorWebService/CondorWSProjectRon/src/supportfiles/truststore");
java.lang.System.setProperty("javax.net.ssl.keyStore", "/hepuser/seangwm/ashokProjects/CondorWebService/CondorWSProjectRon/src/supportfiles/keystore");
java.lang.System.setProperty("javax.net.ssl.keyStoreType", "PKCS12");
java.lang.System.setProperty("javax.net.ssl.keyStorePassword", "An5sh6An3-");
/*
* Create a Schedd listening on the correct port.
*/
Schedd mySchedd = null;
try { // Listen on a particular port
// String tmpStr="https://ugdev01.phys.uvic.ca:1980"; // DG commented this out
String tmpStr="https://babargt4.phys.uvic.ca:1980";
URL scheddLocation = new URL(tmpStr);
mySchedd = new Schedd(scheddLocation);
}
catch (Exception err) {
System.out.println("Failed to create scheduler, System is exiting.");
System.exit(-1);
}
/*
* Create a CondorJobStatus to monitor the job in question using the new Schedd
*/
CondorJobStatus myStatus = new CondorJobStatus();
myStatus.setSchedd(mySchedd);
myStatus.setOwner ("seangwm");
try {
System.out.println("+++ Trying three operations");
myStatus.updateJobStatus(); System.out.println("+++ Updated job status");
myStatus.startAutoRetrieve ();
myStatus.printJobsStatus(); System.out.println("+++ Print job status");
myStatus.stopAutoUpdate(); System.out.println("+++ Stop auto update");
}
catch (Exception err) {
err.printStackTrace();
System.out.println("Error on submit");
}
// Job 5450.0 by SM is Hold
/* if (myStatus.isJobIdle (5450, 0)) {
System.out.println ("Error in isJobIdle when False expected on 5450.0.");
}
if (myStatus.isJobRunning (5450, 0)) {
System.out.println ("Error in isJobRunning when False expected on 5450.0.");
}
if (myStatus.isJobComplete (5450, 0)) {
System.out.println ("Error in isJobComplete when False expected on 5450.0.");
}
if (!myStatus.isJobHold (5450, 0)) {
System.out.println ("Error in isJobHold when True expected on 5450.0.");
}*/
// Job 5267.0 by DG is Idle
/* if (myStatus.isJobIdle (5585, 0)) {
System.out.println ("Error in isJobIdle when False expected.");
}
if (!myStatus.isJobRunning (5585, 0)) {
System.out.println ("Error in isJobRunning when True expected.");
}
if (myStatus.isJobComplete (5585, 0)) {
System.out.println ("Error in isJobComplete when False expected.");
}
if (myStatus.isJobHold (5585, 0)) {
System.out.println ("Error in isJobHold when False expected.");
}*/
}
}