HTCondor Project List Archives



[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Condor-devel] LSB-ish init script for Condor




Todd Tannenbaum wrote:
Dan Bradley wrote:
I think it would be better to send the master SIGHUP rather than using condor_reconfig. The standard security settings do not allow condor_reconfig to work on most nodes, and there is no error checking on the client side, so the user would not know that the operation failed. Of course, those problems should be fixed too, but for the stable series, I think SIGHUP is better.

All in all, I agree with Dan. On Unix the only advantage of using condor_reconfig in this script would be you could do condor_reconfig -full.

I think this would be ok to put in 7.0.x.

regards,
Todd

Attached is a new patch, that I plan on committing. It sends a -HUP instead of using condor_reconfig and adds more of a delay between the -TERM (graceful shutdown) and -KILL (fast shutdown) when stopping the condor_master.

Best,


matt
diff --git a/src/condor_examples/Imakefile b/src/condor_examples/Imakefile
index 34c85cc..b7ced63 100644
--- a/src/condor_examples/Imakefile
+++ b/src/condor_examples/Imakefile
@@ -9,7 +9,8 @@ FILES = README condor_config.local.central.manager \
 	condor_vmgahp_config.xen \
 	condor_vmgahp_config.vmware \
 	lamscript \
-	mp1script
+	mp1script \
+	condor.init
 
 all::
 
diff --git a/src/condor_examples/condor.init b/src/condor_examples/condor.init
new file mode 100644
index 0000000..1d25a33
--- /dev/null
+++ b/src/condor_examples/condor.init
@@ -0,0 +1,174 @@
+#!/bin/bash
+#
+# condor	This script allows for starting and stopping Condor.
+#
+# chkconfig: - 90 10
+# description: Condor is a high throughput computing batch processing
+# 	       platform.
+# processname: condor_master
+# config: /etc/condor/condor_config
+# pidfile: /var/lib/condor/condor_master.pid
+
+### BEGIN INIT INFO
+# Provides: condor
+# Required-Start: $local_fs $network
+# Required-Stop: $local_fs $network
+# Short-Description: start and stop Condor
+# Description: Condor HTC computing platform
+### END INIT INFO
+
+# The program being managed
+prog=condor_master
+
+lockfile=/var/lock/subsys/$prog
+pidfile=/var/lib/condor/$prog.pid
+
+# Source function library
+. /etc/init.d/functions
+
+# Source networking configuration
+[ -f /etc/sysconfig/network ] && . /etc/sysconfig/network
+
+# Source Condor configuration
+[ -f /etc/sysconfig/condor ] && . /etc/sysconfig/condor
+
+# Check that networking is up
+[ "${NETWORKING}" = "no" ] && exit 1
+
+
+start() {
+    echo -n $"Starting Condor daemons: "
+    daemon --pidfile $pidfile --check $prog $prog -pidfile $pidfile
+    RETVAL=$?
+    echo
+    [ $RETVAL -eq 0 ] && touch $lockfile
+    return $RETVAL
+}
+
+stop() {
+    echo -n $"Stopping Condor daemons: "
+    killproc -p $pidfile $prog
+    RETVAL=$?
+    echo
+    [ $RETVAL -eq 0 ] && rm -f $lockfile
+    return $RETVAL
+}
+
+reload() {
+    echo -n $"Reloading Condor daemons: "
+    condor_reconfig # Always returns 0?
+    RETVAL=$?
+    echo
+    return $RETVAL
+}
+
+#
+# Determine if a process is running only by looking in a pidfile.
+# There is no use of pidof, which can find processes that are not
+# started by this script.
+#
+# ASSUMPTION: The pidfile will exist if the process does, see false
+# negative warning.
+#
+# WARNING: A false positive is possible if the process that dropped
+# the pid file has crashed and the pid has been recycled. A false
+# negative is possible if the process has not yet dropped the pidfile,
+# or it contains the incorrect pid.
+#
+# Usage: pid_status <pidfile> <lockfile>
+# Result: 0 = pid exists
+#         1 = pid does not exist, but pidfile does
+#         2 = pid does not exist, but lockfile does
+#         3 = pidfile does not exist, thus pid does not exist
+#         4 = status unknown
+#
+pid_status() {
+    if [ -f $1 ]; then
+        # this can fail if we're not privileged
+        pid=`cat $1` &>/dev/null
+        if [ $? -ne 0 -o -z "$pid" ]; then
+            return 4
+        fi
+
+        ps $pid &>/dev/null
+        if [ $? -ne 0 ]; then
+	    if [ -e $2 ]; then
+		return 2
+	    fi
+
+            return 1
+        fi
+
+        return 0
+    fi
+
+    return 3
+}
+
+
+pid_status $pidfile $lockfile
+running=$?
+
+if [ "$1" != "status" ]; then
+    # Report that $prog does not exist, or is not executable
+    if [ ! -x /usr/sbin/$prog ]; then
+	echo $"$0: error: program not installed"
+	exit 5
+    fi
+
+    [ $running -eq 4 ] && echo $"$0: error: insufficient privileges" && exit 7
+fi
+
+case "$1" in
+    start)
+	[ $running -eq 0 ] && exit 0
+	start
+	RETVAL=$?
+	;;
+    stop)
+	[ $running -eq 0 ] || exit 0
+	stop
+	RETVAL=$?
+	;;
+    restart)
+	[ $running -eq 0 ] && stop
+	start
+	RETVAL=$?
+	;;
+    try-restart)
+	[ $running -eq 0 ] || exit 0
+	stop
+	start
+	RETVAL=$?
+	;;
+    reload|force-reload)
+	if [ $running -ne 0 ]; then
+	    echo $"$0: error: $prog is not running"
+	    exit 7
+	fi
+	reload
+	RETVAL=$?
+	;;
+    status)
+	if [ $running -ne 0 ]; then
+	    case "$running" in
+		1) echo $"$prog dead but pid file exists" ;;
+		2) echo $"$prog dead but subsys locked" ;;
+		3) echo $"$prog is stopped" ;;
+		4) echo $"$prog status is unknown" ;;
+	    esac
+
+	    exit $running
+	fi
+
+	# WARNING: status uses pidof and may find more pids than it
+	# should.
+	status -p $pidfile $prog
+	RETVAL=$?
+	;;
+    *)
+	echo $"Usage: $0 {start|stop|restart|try-restart|reload|force-reload|status}"
+	RETVAL=2
+esac
+
+exit $RETVAL