LDAS logo
TclDOC logo

The manager.tcl Script

Modification Date: 11/18/2009

Table of Procedures

red ball kIlLjOb
red ball manager::reply
red ball mgr::abortBlockedJob
red ball mgr::abortJob
red ball mgr::activeJobStatusLockReport
red ball mgr::activeJobStatusSummary
red ball mgr::activelyMonitorDiskUsage
red ball mgr::addAPI
red ball mgr::addUser
red ball mgr::apiSocketStatus
red ball mgr::apiStatus
red ball mgr::apiStatusPage
red ball mgr::apiVersionCheck
red ball mgr::assignJobid
red ball mgr::blockDso
red ball mgr::blockUser
red ball mgr::bootstrapAPI
red ball mgr::bootstrapFatalErrorEmail
red ball mgr::bootstrapLock
red ball mgr::bootstrapLockTimeout
red ball mgr::bootstrapPause
red ball mgr::bootstrapStatus
red ball mgr::bootstrapSuccessEmail
red ball mgr::bootstrapUnlock
red ball mgr::chopQueueTo
red ball mgr::cmdParseUserTimeout
red ball mgr::commandSanityChecks
red ball mgr::cryptCheck
red ball mgr::deleteJobidFromUsersQueue
red ball mgr::delocaliseOutputURLs
red ball mgr::detectBlockage
red ball mgr::dontStepOnJob
red ball mgr::dropPrettyCommand
red ball mgr::duplicateOpts
red ball mgr::expandCmd
red ball mgr::expiresNextWeek
red ball mgr::expiresTomorrow
red ball mgr::forcePerms
red ball mgr::garbageCollect
red ball mgr::getCommandInputText
red ball mgr::getDiskUsageInfo
red ball mgr::getExtraEnv
red ball mgr::getLauncher
red ball mgr::getURLsForJob
red ball mgr::globMacros
red ball mgr::hideDirectories
red ball mgr::job2email
red ball mgr::job2name
red ball mgr::jobStatus
red ball mgr::jobTimingReport
red ball mgr::jobidToHref
red ball mgr::killJob
red ball mgr::killPing
red ball mgr::lastUsedPort
red ball mgr::libmallocPreload
red ball mgr::libstdcPreload
red ball mgr::liveJobListPush
red ball mgr::localiseInputURLs
red ball mgr::logRejectedJob
red ball mgr::logReply
red ball mgr::makeApiAtHome
red ball mgr::makeFtpDirectory
red ball mgr::manageLogs
red ball mgr::memUsage
red ball mgr::oneLineJobTimingReport
red ball mgr::parseBlock
red ball mgr::parseMeta
red ball mgr::perApiRunningStats
red ball mgr::ping
red ball mgr::postMortem
red ball mgr::preValidate
red ball mgr::prettifyCmdHtml
red ball mgr::promoteJobs
red ball mgr::pushFTPandHTTPinfo
red ball mgr::recryptWithNewKey
red ball mgr::removeAPI
red ball mgr::removeExpiredUsers
red ball mgr::removeUser
red ball mgr::reply
red ball mgr::reportCorruptedQueue
red ball mgr::reportOversizeQueue
red ball mgr::reportWrongHost
red ball mgr::reportWrongUser
red ball mgr::rmJobFiles
red ball mgr::runStatusSummary
red ball mgr::runningJobStatsFile
red ball mgr::sHuTdOwN
red ball mgr::scanNFSErrors
red ball mgr::shutdownAPI
red ball mgr::throttleOnDiskUsage
red ball mgr::throttlePipelines
red ball mgr::throttlePipelinesAtApi
red ball mgr::updateOrAddUser
red ball mgr::updateUserInfo
red ball mgr::userInfo
red ball mgr::userQuota
red ball mgr::validateUserOptions
red ball mgr::validateUserPassword
red ball mgr::validateopts
red ball mgr::writeActiveJobStatus
red ball mgr::zombies

-*- tcl-indent-level: 5; indent-tabs-mode: nil; -*- vi: set autoindent expandtab tabstop=5 shiftwidth=5 :
manager.tcl Version 1.0 Wraps the
genericAPI.tcl for use by the manager API.

set ::RCS_ID_managertcl {$Id: manager.tcl,v 1.519 2009/09/30 22:33:39 emaros Exp $}
set ::RCS_ID_managertcl [ string trim $::RCS_ID_managertcl "\$" ]
package provide manager 1.0
namespace eval mgr {
     set activejobs 0
}

§   §   §
§   §   §

Name: mgr::expandCmd

Description:
This is the entry point for the managers macro parser.
It calls itself recursively, and calls the mgr::parseMeta and mgr::parseBlock. The result of this expansion is a block of code which the assistant managers can use to drive the low level API's.
Usage:

Comments:

This is a "two pass" parser.
proc mgr::expandCmd { { cmd "" } } {
     ;## if its a xxx.meta command, expand and recurse.
     if { [ catch {
          if { [ llength [ split $cmd "\n" ] ] == 1 } {
               set cmd      [ mgr::parseMeta $cmd ]
               set sequence [ mgr::expandCmd $cmd ]
          } else {
               set sequence [ mgr::parseBlock $cmd ]
          }
     } err ] } {
          addLogEntry $err
          return -code error "[ myName ]: $err"
     }
     return $sequence
}

§   §   §

Name: mgr::commandSanityChecks

Description:
Catch-all for command sanity that short-circuits as early as possible.
Called from mgr::validateopts
Parameters: Usage:

Comments:

proc mgr::commandSanityChecks { cmd } {
     if { [ catch {
          ;## -np must be at least '2' since no search code is run
          ;## on the wrapper master and we include the wrapper master
          ;## as one of the nodes requested to avoid underrepresenting
          ;## the system requirements of large numbers of small jobs.
          if { [ regexp -nocase -- {\s+-np\s+[01]\s+} $cmd ] } {
               set err "minimum possible value of '-np' option is '2'"
               return -code error $err
          }
          ;## when ::REJECT_EXPLICIT_DYNLIB_PATHS is set to 1, we
          ;## reject al commands that attempt to specify a dynlib.so
          ;## by absolute path. this is done to prevent the running
          ;## of potentially hazardous experimental code on production
          ;## LDAS systems. PR #1825 10-07-2003
          if { [ info exists ::REJECT_EXPLICIT_DYNLIB_PATHS ] && \
                   [ string equal 1 $::REJECT_EXPLICIT_DYNLIB_PATHS ] } {
               set dyn_rx {\s+-dynlib\s+(\S+)\s+}
               if { [ regexp -nocase -- $dyn_rx $cmd -> dynlib ] } {
                    regsub -all -- {[\}\{\"\s]} $dynlib {} dynlib
                    if { [ regexp {[\\\/]+} $dynlib ] } {
                         set msg "This system has ::REJECT_EXPLICIT_DYNLIB_PATHS"
                         append msg " enabled to prevent the use of experimental"
                         append msg " shared objects."
                         append msg " You may not specify path components as"
                         append msg " part of your -dynlib option (and you"
                         append msg " specified '$dynlib')."
                         return -code error $msg
                    }
               }
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::cmdParseUserTimeout

Description:

Parameters: Usage:

Comments:

proc mgr::cmdParseUserTimeout { cmd } {
     if { [ catch {
          set timeout 0
          ;## optional option available to users in order to cause
          ;## jobs to timeout *more* quickly.
          set uto_rx {\s+-usertimeout\s+(\S+)\s+}
          if { [ regexp -nocase -- $uto_rx $cmd -> timeout ] } {
               regsub -all -- {[\}\{\"\s]} $timeout {} timeout
               set standard $::MANAGER_ABORT_AFTER_N_SECONDS_IN_ONE_API
               set timeout [ expr { round($timeout) } ]
               if { $timeout > 2 * $standard } {
                    set timeout [ expr { 2 * $standard } ]
               }
          } else {
               set timeout 0
          }
     } err ] } {
          set err "Error in -usertimeout option: '$err'"
          return -code error "[ myName ]: $err"
     }
     return $timeout
}

§   §   §

Name: mgr::parseMeta

Description:
Called by mgr::expandCmd on the command received from the user, which is assumed to be a "meta" command.
Usage:

Comments:

Should be "caught".
proc mgr::parseMeta { cmd } {
     set procname {}
     set usropts  [ list ]
     set command  {}
     set macrolist [ mgr::globMacros ]
     regexp $::cmd_rx $cmd -> procname usropts
     ;## first pass expansion from meta file
     if { [ lsearch -exact $macrolist ${procname}.meta ] > -1 } {
          set fname [ file join $::LDASMACROS meta ${procname}.meta ]
          set fid [ open $fname r ]
          ;## meta macros are constrained to start with a line
          ;## declaring the "opts" and their defaults.
          set command [ string trim [ gets $fid ] ]
          ;## test it.
          if { ! [ regexp {^set opts \{[^\}]+\}} $command ] } {
               set msg    "the first line of the meta file must be\n"
               append msg "the declaration of 'opts', but I found:\n"
               append msg "$command\ninstead."
               return -code error "[ myName ]: $msg"
          }
          ;## apply the user supplied options
          if { [ llength $usropts ] } {
               append command "\n metaOpts \{ $usropts \}\n"
          }
          append command [ read $fid ]
          ::close $fid
     } else {
          return -code error "[ myName ]: Bad meta command: '$cmd'"
     }
     return $command
}

§   §   §

Name: mgr::parseBlock

Description:
Usage:

Comments:

proc mgr::parseBlock { cmd } {
     set api {}
     set tmp [ list ]
     set sequence [ list ]
     set api_rx ^([ join $::API_LIST | ])\\s*\$
     set macrolist [ mgr::globMacros ]
     foreach line [ split $cmd ";\n" ] {
          set line [ string trim $line ]
          if { [ regexp {(^\#.*$|^$)} $line ] } {
               continue
          }
          if { [ regexp {^set opts \{[^\}]+\}} $line ] } {
               set sequence $line
               continue
          }
          if { [ regexp -nocase $api_rx $line api ] } {
               if { [ string equal $api $tmp ] } {
                    continue
               } else {
                    set tmp $api
                    continue
               }
          }
          if { ! [ regexp $::cmd_rx $line -> procname opts ] } {
               set procname [ string trim $line ]
          }
          if { [ string equal metaOpts $procname ] } {
               append sequence "\n metaOpts $opts\n"
               continue
          }
          if { [ lsearch -exact $macrolist ${procname}.$api ] > -1 } {
               set fname [ file join $::LDASMACROS $api ${procname}.$api ]
               append sequence "\n$api\n"
               set fid [ open $fname r ]
               set tmp {}
               while { [ gets $fid code ] > -1 } {
                    if { ! [ regexp {(^[ ]*;?\#)|(^[ ]*$)} $code ] } {
                         append tmp "$code\n"
                    }
               }
               ::close $fid
               append sequence \{[ string trim $tmp ]\}
               set procname {}
               continue
          }
     } ;# end of foreach
     return $sequence
}

§   §   §

Name: mgr::globMacros

Description:
Returns all the macro filenames found under ::LDASMACROS in a list, or sends a helpful error message if the macro files are not found.

Parameters: Usage:

Comments:

proc mgr::globMacros { } {
     set macrolist {}
     foreach file [ glob -nocomplain $::LDASMACROS/*/*.* ] {
          lappend macrolist [ file tail $file ]
     }
     if { [ llength $macrolist ] < 3 } {
          set msg "*********************************************\n"
          append msg "ABORT!! No macro code found under '$::LDASMACROS'\n"
          append msg "If this is a new installation of LDAS, you\n"
          append msg "must read the documentation for installing\n"
          append msg "the manager API.\n"
          append msg "If this is not a new installation, then\n"
          append msg "either you have deleted the macro files or\n"
          append msg "redefined ::LDASMACROS to point to the wrong\n"
          append msg "location.  You may also being experiencing\n"
          append msg "transient NFS problems.\n\n"
          append msg "         NO JOBS CAN BE PROCESSED\n"
          append msg "       UNTIL THIS PROBLEM IS SOLVED.\n"
          append msg "*********************************************\n"
          puts stderr $msg
          return -code error "[ myName ]:\n$msg"
     }
     set macrolist
}

§   §   §

Name: mgr::bootstrapAPI

Description:
Execute command sequence on a remote machine to bring an API online. The remote command is executed through ssh.
Usage:
      if { [ mgr::bootstrapAPI $apis ] } { ... }
Comments:
The ssh call may be overly elaborate.
proc mgr::bootstrapAPI { { apis "" } { comment "" } } {
     set redirect /dev/null
     set libmalloc [ list ]
     set libstdc   [ list ]
     set apihost unknown!
     if { ! [ string length $apis ] } {
          set msg "you must specify an api, list of api's, or 'all'"
          return -code error "[ myName ]: $msg"
     }
     if { [ string equal all $apis ] } {
          set apis $::API_LIST
     } else {
          foreach api $apis {
               if { [ lsearch $::API_LIST $api ] < 0 } {
                    set msg "cannot bootstrap unregistered API: '$api'"
                    return -code error $msg
               }
          }
     }
     ;## emergency port requests always succeed
     set caller [ uplevel myName ]
     if { [ string equal emergency_callback $caller ] } {
          mgr::bootstrapLock $apis 1
     } else {
          set apis [ mgr::bootstrapLock $apis 0 ]
     }
     set localhost [ set ::${::LDAS_SYSTEM}(manager) ]
     foreach api $apis {
          set api [ string tolower $api ]
          regsub {api$} $api {} api
          if { [ string equal $api manager ] } { continue }
          set apiPid [ getPid $api ]
          set apiPid [ string trim $apiPid ]
          addLogEntry "$api pid is '$apiPid'" purple
          ;## if API is up but appears stuck and user does not want reboot
          ;## hold off rebooting API
          if { [ string length $apiPid ] && ! $::STARTUP_IN_PROGRESS && \
                   [ info exist ::WAIT_FOR_API ] && [ set ::WAIT_FOR_API ] } {
               set subject "hold off bootstraping $api"
               set body "set ::WAIT_FOR_API to 0 to resume booting"
               addLogEntry "Subject: ${subject}; Body: ${body}" email
               continue
          }
          if { [ catch {
               mgr::shutdownAPI $api $comment
               mgr::bootstrapPause $api
               set postmortem [ mgr::postMortem $api ]
               ;## if the call to mgr::postMortem returned data
               if { [ string length $postmortem ] } {
                    addLogEntry $postmortem blue mgr::postMortem
               }
               set apihost [ set ::${api}(host) ]
          } err ] } {
               addLogEntry "$err (api: $api)" 2
          }
          if { [ catch {
               set libstdc [ mgr::libstdcPreload $api ]
          } err ] } {
               continue
          }
          if { [ catch {
               set libmalloc [ mgr::libmallocPreload $api ]
          } err ] } {
               continue
          }
          ;## Ed asked for different vars for different platforms 04/28/04
          ;## this will return SunOS or Linux
          ;## Ed asked for this 07/02/07
          ;## have different env for different APIs to run libumem
          set extra_env [ mgr::getExtraEnv $api ]
          set extra_env [ subst -nocommands $extra_env ]
          set api_cmd "${api}API"
          if { ! [ string equal $apihost $localhost ] } {
               set api_cmd "$::LDAS/bin/${api_cmd}"
          }
          set launcher [ mgr::getLauncher $api $api_cmd ]
          mgr::perApiRunningStats $api restart [ gpsTime ]
          set dir [ mgr::makeApiAtHome $api ]
          set dotfile "${dir}/.. "
          set fid [ open $dotfile w ]
          file attributes $dotfile -permissions 0600
          puts $fid $::MGRKEY
          ::close $fid
          set redirect ${dir}/${api}.log
          addLogEntry "bootstrapping $api API on $apihost"
          set apipath ${::LDASLIB}/${api}API
          if { [ catch {
               if { [ string equal $apihost $localhost ] } {
                    set LD_LIBRARY_PATH $::env(LD_LIBRARY_PATH)
                    set cwd [ pwd ]
                    cd $dir
                    set cmd    "/bin/env $extra_env "
                    append cmd "RUNDIR=$::env(RUNDIR) "
                    append cmd "LD_LIBRARY_PATH=$apipath:$::LD_LIBRARY_PATH "
                    append cmd "DB2INSTANCE=$::DB2INSTANCE "
                    append cmd "$launcher >& $redirect &"
                    eval ::exec $cmd
                    cd $cwd
               } else {
                    set cmd    "alias cd=cd && cd $dir && /bin/env "
                    append cmd "RUNDIR=$::env(RUNDIR) HOST=$apihost "
                    append cmd "PATH=$::LDAS/bin:$::env(PATH) "
                    append cmd "LD_LIBRARY_PATH=$apipath:$::LD_LIBRARY_PATH "
                    append cmd "DB2INSTANCE=$::DB2INSTANCE $extra_env "
                    append cmd "$launcher >& $redirect &"
                    execssh $apihost $cmd
               }
               addLogEntry "Startup command for ${api}API: '$cmd'" blue
               set up [ mgr::bootstrapStatus $api ]
               set pid [ getPid $api ]
               ;## if we succeeded inform martian boxes of
               ;## FTP and HTTP info for gateway.
               if { $up } {
                    mgr::pushFTPandHTTPinfo $api $::LDAS_VERSION
               } else {
                    set msg "failed connect to operator socket on $api API"
                    return -code error "[ myName ]: $msg"
               }
               mgr::bootstrapSuccessEmail $api $apihost $pid
               mgr::bootstrapUnlock $api
               mgr::bootstrapFatalErrorEmail $api $apihost NULL 1
          } err ] } {
               addLogEntry $err red
               mgr::bootstrapUnlock $api
               mgr::bootstrapFatalErrorEmail $api $apihost $err 0
          }
     } ;## end of foreach on api
     after 0 mgr::apiStatusPage
}

§   §   §

Name: mgr::bootstrapSuccessEmail

Description:

Parameters: Usage:

Comments:

proc mgr::bootstrapSuccessEmail { api apihost pid } {
     if { [ catch {
          set report 1
          if { [ info exists ::bootstrapemailpid($api) ] } {
               set last_pid   $::bootstrapemailpid($api)
               if { [ string equal $pid $last_pid ] } {
                    set report 0
               }
          }
          set ::bootstrapemailpid($api) $pid
          if { $report } {
               if { [ info exists ::jobid ] && \
                        [ string equal STARTUP $::jobid ] } {
                    set msg "done. $api API is now running as pid: $pid"
                    addLogEntry $msg green
               } else {
                    set logurl $::HTTPURL
                    regexp {(.+)/jobs} $logurl -> logurl
                    set logurl ${logurl}/logs
                    set subject "$::LDAS_SYSTEM ${api}API restarted"
                    set msg    "$::LDAS_SYSTEM ${api}API restarted.\n"
                    append msg "$api API is now running as pid '${pid}' on "
                    append msg "$::LDAS_SYSTEM host ${apihost}.\nSee the "
                    append msg "$::LDAS_SYSTEM LDAS logs for details: "
                    append msg "$logurl"
                    addLogEntry "Subject: ${subject}; Body: $msg" email
               }
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::bootstrapFatalErrorEmail

Description:
Do not send email every single time an API fails to be restarted -- requested by Kent Blackburn 05/2003.
Set the resource variable ::EMAIL_UNABLE_TO_START_API_INTERVAL_S to the number of seconds to supress repeated emails.
Default supression period is one hour.

Parameters: Usage:

Comments:

proc mgr::bootstrapFatalErrorEmail { api apihost err { cancel 0 } } {
     if { $cancel == 1 } {
          if { [ info exists ::fatalerroremailtime($api) ] } {
               ::unset ::fatalerroremailtime($api)
          }
          return {}
     }
     if { [ catch {
          set errlvl blue
          if { [ info exists ::UNABLE_TO_START_API_EMAIL_INTERVAL_S ] } {
               set interval $::UNABLE_TO_START_API_EMAIL_INTERVAL_S
          } else {
               set interval 3600
          }
          set now [ clock seconds ]
          if { [ info exists ::fatalerroremailtime($api) ] } {
               set last_notified $::fatalerroremailtime($api)
               if { $now - $last_notified > $interval } {
                    set ::fatalerroremailtime($api) $now
                    set errlvl email
               } else {
                    set errlvl red
               }
          } else {
               set last_notified $now
               set ::fatalerroremailtime($api) $now
               set errlvl email
          }
          set subject "$::LDAS_SYSTEM bootstrap of $api failed!"
          set msg     "could not start $api API on $apihost!\n"
          append msg  "(error: $err)"
          if { [ string equal email $errlvl ] } {
               addLogEntry "Subject: ${subject}; Body: $msg" $errlvl
          } else {
               addLogEntry $msg $errlvl
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::bootstrapPause

Description:
Wait up to ::BOOTSTRAP_PAUSE_TIMEOUT milliseconds for an API to shutdown on command before initiating genocide.

Parameters: Usage:

Comments:

If ::BOOTSTRAP_PAUSE_TIMEOUT is less than 5000 ms, it will be forced to be 5000 ms.
proc mgr::bootstrapPause { api } {
     if { [ catch {
          if { [ info exists ::BOOTSTRAP_PAUSE_TIMEOUT ] } {
               if { ! [ regexp {\d+} $::BOOTSTRAP_PAUSE_TIMEOUT ] } {
                    set ::BOOTSTRAP_PAUSE_TIMEOUT 10000
               } elseif { $::BOOTSTRAP_PAUSE_TIMEOUT < 5000 } {
                    set ::BOOTSTRAP_PAUSE_TIMEOUT 5000
               }
          } else {
               set ::BOOTSTRAP_PAUSE_TIMEOUT 10000
          }
          set pause $::BOOTSTRAP_PAUSE_TIMEOUT
          set first  [ expr { $pause / 5 } ]
          set second $first
          set third  [ expr { $first * 3 } ]
          set sid [ sock::open $api emergency ]
          ::close $sid
          set start [ clock clicks -milliseconds ]
          addLogEntry "giving ${api}API $first ms to shut down..."
          after $first
          set sid [ sock::open $api emergency ]
          ::close $sid
          addLogEntry "giving ${api}API $second more ms to shut down..."
          after $second
          set sid [ sock::open $api emergency ]
          ::close $sid
          addLogEntry "giving ${api}API $third more ms to shut down..."
          after $third
          set sid [ sock::open $api emergency ]
          ::close $sid
          set msg "${api}API failed to shutdown in $pause ms. "
          append msg "will try 'genocide' action. "
          append msg "if this fails, somebody will have to "
          append msg "clean up the ${api}API BY HAND..."
          addLogEntry $msg red
     } err ] } {
          if { [ info exists start ] } {
               set done [ clock clicks -milliseconds ]
               set dt [ expr { $done - $start } ]
               addLogEntry "${api}API shut down in $dt ms"
          }
     }
}

§   §   §

Name: mgr::libstdcPreload

Description:

Parameters: Usage:

Comments:

proc mgr::libstdcPreload { api } {
     if { [ catch {
          ;## force preloading of libstdc++ as required
          ;## to avoid instant abort when c++ exception
          ;## is thrown which is not handled by a c program,
          ;## i.e. Tcl.
          ;## if getApiOS failed due to ssh issue throw exeption
          ;## and caller will skip the API.
          set libstdc [ list ]
          set OS [ getApiOS $api ]
          if { [ regexp {Linux} $OS ] } {
               if { [ info exists ::STDCPLUSPLUSLIBPAT ] } {
                    set libstdc [ libstdcPlusPlus $api ]
               }
          } elseif { [ regexp {Sun} $OS ] } {
               if { [ info exists ::SUN_LIBSTDCPP ] } {
                    set libstdc $::SUN_LIBSTDCPP
               }
          } elseif { [ regexp {ssh_failed} $OS ] } {
               return -code error $OS
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $libstdc
}

§   §   §

Name: mgr::libmallocPreload

Description:

Parameters: Usage:

Comments:

proc mgr::libmallocPreload { api } {
     if { [ catch {
          ;## force preloading of libXXmalloc as required
          ;## to avoid monotonic increasing memory usage due
          ;## to memory fragmentation.
          set libmalloc [ list ]
          set OS [ getApiOS $api ]
          if { [ regexp {Linux} $OS ] } {
               if { [ info exists ::LINUX_MALLOC ] } {
                    set libmalloc  $::LINUX_MALLOC
               }
          } elseif { [ regexp {Sun} $OS ] } {
               if { [ info exists ::SUN_MALLOC ] } {
                    set libmalloc  $::SUN_MALLOC
               }
          } elseif { [ regexp {ssh_failed} $OS ] } {
               return -code error $OS
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $libmalloc
}

§   §   §

Name: mgr::makeApiAtHome

Description:

Parameters: Usage:

Comments:

proc mgr::makeApiAtHome { api } {
     if { [ catch {
          set dir $::env(RUNDIR)/${api}API
          if { ! [ file exists $dir ] } {
               file mkdir $dir
          } else {
               if { ! [ file isdirectory $dir ] } {
                    file rename $dir ${dir}.bogus
                    file mkdir $dir
                    set msg    "file with name reserved for $api API "
                    append msg "working directory renamed to ${dir}.bogus"
                    addLogEntry $msg red
               }
          }
          file attributes $dir -permissions 0755
          ;## move misplaced .rsc and .ini files
          foreach ext [ list rsc ini ] {
               if { [ file exists $::env(RUNDIR)/LDAS${api}.$ext ] } {
                    file rename -force \
                        $::env(RUNDIR)/LDAS${api}.$ext $dir/LDAS${api}.$ext
                    set msg "moving LDAS${api}.$ext from $::env(RUNDIR) to $dir"
                    addLogEntry $msg blue
               }
          }
          ;## make sure browsers can browse things in here
          ;## that have the correct permissions
          set htaccess ${dir}/.htaccess
          if { ! [ file exists $htaccess ] } {
               set fid [ open $htaccess w 0644 ]
               puts $fid "DefaultType text/html"
               ::close $fid
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $dir
}

§   §   §

Name: mgr::bootstrapStatus

Description:
Wait up to 1 minute for an API to come up. This can fail!

Parameters: Usage:

Comments:

proc mgr::bootstrapStatus { api } {
     set flag 0
     set i 0
     if { ! [ info exist ::API_BOOTSTRAP_MAX_S ] } {
        set ::API_BOOTSTRAP_MAX_S 60
        addLogEntry "Resource ::API_BOOTSTRAP_MAX_S set to $::API_BOOTSTRAP_MAX_S by default" blue
     }
     if { ! [ info exist ::API_BOOTSTRAP_DELAY_S ] } {
        set ::API_BOOTSTRAP_DELAY_S 5
        addLogEntry "Resource ::API_BOOTSTRAP_DELAY_S set to $::API_BOOTSTRAP_DELAY_S by default" blue
     }
     set maxtime [ expr $::API_BOOTSTRAP_MAX_S * 1000 ]
     set delay [ expr $::API_BOOTSTRAP_DELAY_S * 1000 ]
     while { $i < $maxtime } {
          if { [ catch {
               if { [ string equal manager $api ] } {
                    set flag 1
               } else {
	       	    set sid [ sock::open $api operator ]
                    ::close $sid
                    set flag 1
               }
          } err ] } {
               catch { ::close $sid }
               if { [ info exists ::DEBUG_BOOTSTRAP_LOCK ] && \
                        [ string equal 1 $::DEBUG_BOOTSTRAP_LOCK ] } {
                    addLogEntry "${api}API $err" purple
               }
               set flag 0
          }
          if { ! $flag } {
               incr i $delay
               after $delay
          } else {
               break
          }
     }
     return $flag
}

§   §   §

Name: mgr::bootstrapLock

Description:
Prevents bootstrap from being called multiple times in quick succession.

Parameters: Usage:

Comments:

proc mgr::bootstrapLock { apis { reset 0 } } {
     if { [ catch {
          set temp [ list ]
          set now  [ clock seconds ]
          foreach api $apis {
               if { ! [ info exists ::__bootstraplock_$api ] } {
                    set ::__bootstraplock_$api $now
                    lappend temp $api
               } elseif { $reset } {
                    set ::__bootstraplock_$api $now
               } else {
                    set lastlock [ lindex [ set ::__bootstraplock_$api ] end ]
                    if { $now > $lastlock } {
                         lappend ::__bootstraplock_$api $now
                    }
               }
          }
          if { [ info exists ::DEBUG_BOOTSTRAP_LOCK ] && \
                   [ string equal 1 $::DEBUG_BOOTSTRAP_LOCK ] } {
               set report [ list ]
               set locks [ info vars ::__boot* ]
               foreach lock $locks {
                    lappend report "$lock [ set $lock ] "
               }
               addLogEntry $report purple
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $temp
}

§   §   §

Name: mgr::bootstrapUnlock

Description:

Parameters: Usage:

Comments:

proc mgr::bootstrapUnlock { api } {
     if { [ catch {
          if { [ info exists ::__bootstraplock_$api ] } {
               set locks [ set ::__bootstraplock_$api ]
               if { [ llength $locks ] > 1 } {
                    set locks [ lrange $locks 1 end ]
                    set ::__bootstraplock_$api $locks
                    set locked 1
               } else {
                    ::unset ::__bootstraplock_$api
                    set locked 0
               }
          } else {
               set locked 0
          }
          if { [ info exists ::DEBUG_BOOTSTRAP_LOCK ] && \
                   [ string equal 1 $::DEBUG_BOOTSTRAP_LOCK ] } {
               set report [ list ]
               set locks [ info vars ::__boot* ]
               foreach lock $locks {
                    lappend report "$lock [ set $lock ] "
               }
               addLogEntry $report purple
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $locked
}

§   §   §

Name: mgr::bootstrapLockTimeout

Description:
If an API does not restart, remove the lock and send e-mail to the responsible administrator for the manager.

Parameters: Usage:

Comments:

This is called in a bgLoop at startup of the managerAPI
proc bootstrapLockTimeout { } {
     if { [ catch {
          set locks [ list ]
          set api oops!
          set now [ clock seconds ]
          set notice "The resource variable ::MGR_BOOTSTRAPLOCK_TIMEOUT "
          append notice "has been\nset to \$::MGR_BOOTSTRAPLOCK_TIMEOUT."
          append notice " If this value is less than "
          append notice "15 * number_of_beowulf_nodes\n"
          append notice "or the number_of_mpi_search_users, "
          append notice "whichever is GREATER for this system, then\n"
          append notice "the value should be adjusted, and made to be\n"
          append notice "at least 15 * (number of nodes or users)."
          ;## rationalise resource variable
          if { ! [ info exists ::MGR_BOOTSTRAPLOCK_TIMEOUT ] } {
               set ::MGR_BOOTSTRAPLOCK_TIMEOUT 300
               set notice [ subst -nocommands $notice ]
               addLogEntry $notice blue
          } else {
               ;## constrain timeout to an integer number of
               ;## seconds between 0 and 9999
               if { ! [ regexp {^\d{1,4}$} $::MGR_BOOTSTRAPLOCK_TIMEOUT ] } {
                    set ::MGR_BOOTSTRAPLOCK_TIMEOUT 300
                    set notice [ subst -nocommands $notice ]
                    addLogEntry $notice blue
               }
          }
          set to $::MGR_BOOTSTRAPLOCK_TIMEOUT
          ;## iterate over the locks, unset the expired ones
          ;## and send email to the responsible party
          set locks [ info vars ::__bootstraplock_* ]
          foreach lock $locks {
               regexp {__bootstraplock_(\S+)} $lock -> api
               if { [ string equal manager $api ] } { continue }
               set then [ set $lock ]
               if { $now >= [ lindex $then end ] + $to } {
                    ::unset $lock
                    set msg    "$lock expired after $to seconds. "
                    append msg "something is wrong with the $::LDAS_SYSTEM "
                    append msg "LDAS system $api API!"
                    set subject "$::LDAS_SYSTEM $lock expired!"
                    set notice [ subst -nocommands $notice ]
                    set msg "${msg}\n\n$notice"
                    addLogEntry "Subject: ${subject}; Body: $msg" mail
               }
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::pushFTPandHTTPinfo

Description:
Push gateway information for ftp and http transactions.
When API's are on remote machines they may be inside of a martian network and will need to know the names of the data directories as seen from the outside world.

Parameters: Usage:

Comments:

Will return repetetive error message on failure. :TODO:
proc mgr::pushFTPandHTTPinfo { api { version "" } } {
     set errs [ list ]
     if { [ catch {
          if { [ string length $version ] } {
               mgr::apiVersionCheck $api $version
          } else {
               mgr::apiVersionCheck $api UNKNOWN
          }
          set vars [ list $::FTPURL $::FTPDIR $::HTTPURL \
                         $::HTTPDIR $::GRIDFTPURL $::GRIDFTPDIR \
                         [ lindex $::LDAS_GATEWAY 0 ] \
                         [ lindex $::LDAS_GATEWAY 1 ] $::LDAS_SYSTEM ]
          set sid [ sock::open $api emergency ]
          fconfigure $sid -blocking off
          puts $sid "$::MGRKEY setFTPandHTTPinfo $vars"
          ::close $sid
     } err ] } {
          catch { ::close $sid }
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::shutdownAPI

Description:
Shut down an API started by bootstrapAPI Usage:
      mgr::shutdownAPI $apiname
Comments:
Since the API exists it can contain it's own internal shutdown procedure, ${::API}::sHuTdOwN.
::RESTART_ON_MEMFLAG is a variable set in the LDASapi.rsc which enables the restarting of an API when it's memory usage exceeds the flag level in the memFlag procedure of genericAPI.tcl. If ::RESTART_ON_MEMFLAG = 0, no attempt will be made to save the currently running job.
proc mgr::shutdownAPI { { apis "" } { comment "" } } {
     if { ! [ string length $apis ] } {
          set apis $::API_LIST
     }
     foreach api $apis {
          if { [ string equal $api manager ] } {
               continue
          }
          ;## abort sequence for an assistant manager
          set assistants [ namespace children :: ${::RUNCODE}* ]
          foreach assistant $assistants {
               if { [ catch {
                    set name [ string trim $assistant : ]
                    if { [ info exists ::${name}::api ] && \
                             [ string equal $api [ set ::${name}::api ] ] } {
                         set msg "$api API shutting down NOW!\n$name aborting!"
                         set ::${name}::errorapi [ set ::${name}::api ]
                         if { [ llength [ namespace children :: ::$name ] ] } {
                              ::${name}::predelete $msg $msg
                              if { [ llength [ namespace children :: ::$name ] ] } {
                                   ::${name}::seq
                              }
                         }
                    }
               } err ] } {
                    addLogEntry $err red
               }
          }
          if { [ catch {
               ;## memory usage and other API process info
               set statefile [ file join $::LDASLOG ${api}.status ]
               set arc $::LDASARC/${api}API/${api}_status.[ gpsTime ]
               if { [ file exists $statefile ] } {
                    set fid1 [ open $statefile r ]
                    set fid2 [ open $arc w ]
                    puts $fid2 "<html>\n<pre>"
                    fcopy $fid1 $fid2
                    ::close $fid2
                    ::close $fid1
                    file delete -force -- $statefile
               }
          } err ] } {
          }
          ;## take a gcore of the API before shutdown
          if { ! [ string equal nocore $comment ] } {
               set apihost [ set ::${api}(host) ]
               set now [ clock seconds ]
               set pid ""
               if { [ info exists ::bootstrapemailpid($api) ] } {
                    set pid $::bootstrapemailpid($api)
               }
               if { ![ string length $pid ] } {
                    set lockfile [ glob -nocomplain $::TOPDIR/${api}API/.${api}.*.lock ]
                    regexp {(\d+)} $lockfile -> pid
                    addLogEntry "obtained $api pid $pid from lock file" purple
               }
               if { [ string length $pid ] && [ info exist ::FORCE_GCORE ] && $::FORCE_GCORE} {
                    set cmd "alias cd=cd && cd $::TOPDIR/savedCores && gcore -o ${api}_${now}_gcore $pid"
                    addLogEntry "executing $cmd on $apihost" purple
                    execssh $apihost $cmd
               }
          }
          if { [ catch {
               set sid [ sock::open $api emergency ]
               puts  $sid "$::MGRKEY ${api}::sHuTdOwN \"$comment\""
               after 100
               cmd::receive $sid ${api}_$sid mgr::logReply
               addLogEntry "$api API Shut Down" red
          } err ] } {
          }
          if { [ catch {
               ;## I think we get here and delete the lock file
               ;## under all circumstances...
               set globpat [ file join [ apiDirectory $api ] .$api.*.lock ]
               set locks [ glob -nocomplain $globpat ]
               foreach lock $locks {
                    set pid [ lindex [ split [ file tail $lock ] . ] end-1 ]
                    file delete -force -- $lock
                    addLogEntry "deleted lock file: $lock" blue
               }
          } err ] } {
          }
     }
}

§   §   §

Name: mgr::postMortem

Description:
When the manager attempts to restart an API that has become unavailable, the new log file for the API should contain a reference to the condition which caused the error.
As of 01/04/02 this procedure returns a URL pointing to the archived log. The URL is absolute and should therefore continue to work even after long-term archiving.
This procedure returns a dump of the standard error redirect from the nohup that started the API.

Parameters: Usage:

Comments:

No filtering of the file contents is done at present :TODO:
proc mgr::postMortem { api } {
     set data [ list ]
     set url  [ list ]
     if { [ catch {
          set url [ file dirname $::HTTPURL ]
          regsub / $url // url
          ;## we may need an ::LDAS_LOG_ARCHIVE_URL
          set url ${url}/logs/archive/${api}API/
          set time [ gpsTime ]
          set home [ apiDirectory $api ]
          set stderrlog ${home}/${api}.log
          if { [ file exists $stderrlog ] } {
               set url $url${api}.log.$time
               set url "stdout/stderr for previous run of $api API: $url"
               set arcname $::LDASARC/${api}API/${api}.log.$time
               set dir $::LDASARC/${api}API
               if { ! [ file isdirectory $dir ] } {
                    file mkdir $dir
                    file attributes $dir -permissions 0755
                    gifBalls $dir
               }
               set fid1 [ open $arcname w ]
               puts $fid1 "<html>\n<pre>"
               set fid2 [ open $stderrlog r ]
               fcopy $fid2 $fid1
               ::close $fid2
               ::close $fid1
          } else {
               set url [ list ]
          }
     } err ] } {
          if { [ info exists fid ] } {
               catch { ::close $fid }
          }
          if { [ info exists fid1 ] } {
               catch { ::close $fid1 }
          }
          if { [ info exists fid2 ] } {
               catch { ::close $fid2 }
          }
          return -code error "[ myName ]: $err"
     }
     return $url
}

§   §   §

Name: mgr::dontStepOnJob

Description:
When a shutdown request is received, try not to shutdown while a job is pending.

Parameters: Usage:

Comments:

This is only possible if the shutdown is due to normal maintenance. If the shutdown is being driven by a failure of the API, the job will be lost under any circumstances.
proc mgr::dontStepOnJob { api timeout } {
     set i 0
     set vnames [ infoVars ]
     ;## try not to disturb a running job
     foreach vname $vnames {
          if { [ regexp $::RUNCODE $vname ] } {
               while { [ regexp $api [ set $vname ] ] && $i < $timeout } {
                    incr i
                    sleep 1000
               }
          }
     }
}

§   §   §

Name: mgr::preValidate

Description:

Parameters: Usage:

Comments:

proc mgr::preValidate { cmd } {
     if { [ catch {
          set seqpt {}
          set user   [ list ]
          set soname [ list ]
          regexp -nocase -- {-name\s+\{?\s*(\S+)} [ lindex $cmd 1 ] -> user
          set user [ string trim $user "{} " ]
          ;## see if dso is temporarily blocked!
          regexp -nocase -- {-dynlib\s+\{?\s*(\S+)} $cmd -> soname
          set soname [ string trim $soname "{} " ]
          set cmdname  [ lindex [ lindex $cmd 2 ] 0 ]
          ;## see if user is temporarily blocked!
          if { [ lsearch $::mgr::blocked_users $user ] != -1 } {
               set seqpt "ADMIN NOTICE: "
               set err "user '$user' has been temporarily blocked!"
               after 0 \
                   [ list mgr::logRejectedJob $user $cmdname blocked_user ]
               return -code error $err
          }
          ;## see if dso is temporarily blocked!
          if { [ lsearch $::mgr::blocked_dsos $soname ] != -1 } {
               set seqpt "ADMIN NOTICE: "
               set err "DSO '$soname' has been temporarily blocked!"
               after 0 \
                   [ list mgr::logRejectedJob $user $cmdname blocked_dso ]
               return -code error $err
          }
          ;## see if the user is exceeding his quota!
          set seqpt "QUOTA EXCEEDED: "
          mgr::userQuota $cmd
     } err ] } {
          return -code error "$seqpt$err"
     }
}

§   §   §

Name: mgr::userInfo

Description:
Update user info stored in QUEUE(USERS).
all ldasJob requests must have a user info argument as their first argument, and it must inlude the username, password, and the user's e-mail address.
Usage:

Comments:

There will always be at least the userkey and a new job id. New user info is optional
proc mgr::userInfo { args } {
     set return_info [ list ]
     set scratch     [ list ]
     set now [ clock seconds ]
     if { [ catch {
          if { [ catch {
               array set input $args
          } ] } {
               eval array set input $args
          }
          foreach user $::QUEUE(USERS) {
               if { ! [ llength $user ] } { continue }
               if { [ llength $user ] != 13 } {
                    mgr::reportCorruptedQueue $user
               }
               ;## each user entry contains the three user info item
               ;## pairs and a list of the last N jobs processed for
               ;## that user.
               set info [ lrange $user 0 end-1 ]
               set jobs [ lindex $user end ]
               array set q $info
               if { [ string equal $q(-name) $input(-name) ] } {
                    ;## Peter Shawhan's challenge/response code added
                    ;## 05/20/02
                    if { [ string equal md5protocol $input(-password) ] } {
                         set salt   [ uplevel set salt ]
                         set digest [ uplevel set digest ]
                         set test [ key::md5 $q(-password)$salt ]
                         if { [ string equal $digest $test ] } {
                              set hash $q(-password)
                         } else {
                              set hash invalid
                         }
                    } else {
                         set hash [ key::md5 $input(-password) ]
                    }
                    if { ! [ string equal $q(-password) $hash ] } {
                         set msg "Incorrect password given for user $q(-name)"
                         return -code error $msg
                    }
                    if { ! [ regexp {^\d+$} $q(-expires) ] } {
                         set q(expires) [ clock scan $q(-expires) ]
                    }
                    if { $now > $q(-expires) } {
                         set edate [ clock format $q(-expires) -format %D ]
                         set msg "MOU for user $q(-name) expires on ${edate}. "
                         append msg "Please contact LDAS admin to apply for "
                         append msg "an extension."
                         return -code error $msg
                    }
                    ;## here we mangle the args so that the user password
                    ;## is not updated when the other user data is updated!
                    regsub -- $input(-password) $args $q(-password) args
                    set info [ expandOpts info ]
                    set return_info $info
                    if { [ llength $return_info ] < 12 } {
                         set msg    "QUEUE(USERS) has been corrupted!\n"
                         append msg "Please check the user info file for bad\n"
                         append msg "entries, missing \"'s, etc."
                         return -code error $msg
                    }
               }
               lappend scratch [ concat $info [ list $jobs ] ]
          } ;## end of foreach
          if { [ llength $return_info ] } {
               set ::QUEUE(USERS) $scratch
               if { ! [ regexp "\[a-z\]+" $::QUEUE(USERS) ] } {
                    set msg    "The QUEUE(USERS) queue has been\n"
                    append msg "initialised, but is empty."
                    return -code error $msg
               }
          }  else {
               return -code error "unknown user: '$input(-name)'"
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $return_info
}

§   §   §

Name: mgr::reportCorruptedQueue

Description:

Parameters: Usage:

Comments:

proc mgr::reportCorruptedQueue { args } {
     set baddata [ list $args ]
     set subject "$::LDAS_SYSTEM users queue has become corrupted!"
     set msg "${subject}\n"
     append msg "The manager API choked on this user entry,\n"
     append msg "which does not appear to have the correct\n"
     append msg "number of fields:\n\n'$baddata'"
     addLogEntry "Subject: ${subject}; Body: $msg" mail
     return -code error "[ myName ]: $subject"
}

§   §   §

Name: mgr::assignJobid

Description:

Parameters: Usage:

Comments:

proc mgr::assignJobid { args } {
     if { [ catch {
          set return_info [ list ]
          set scratch     [ list ]
          set now [ clock seconds ]
          if { [ catch {
               array set input $args
          } ] } {
               eval array set input $args
          }
          foreach user $::QUEUE(USERS) {
               if { ! [ llength $user ] } { continue }
               if { [ llength $user ] != 13 } {
                    set msg "QUEUE(USERS) has been corrupted!"
                    addLogEntry $msg 2
                    return -code error "[ myName ]: $msg"
               }
               ;## each user entry contains the three user info item
               ;## pairs and a list of the last N jobs processed for
               ;## that user.
               set info [ lrange $user 0 end-1 ]
               set jobs [ lindex $user end ]
               array set q $info
               if { [ string equal $q(-name) $input(-name) ] } {
                    set jobid [ key::increment $::RUNCODE ]
                    set jobs \
                        [ linsert $jobs 0 [ list $jobid $input(-email) $now ] ]
                    set info [ expandOpts info ]
                    set return_info $info
                    if { [ llength $return_info ] < 12 } {
                         set msg    "QUEUE(USERS) has been corrupted!\n"
                         append msg "Please check the user info file for bad\n"
                         append msg "entries, missing \"'s, etc."
                         return -code error "[ myName ]: $msg"
                    }
               }
               lappend scratch [ concat $info [ list $jobs ] ]
          } ;## end of foreach
          if { [ llength $return_info ] } {
               set ::QUEUE(USERS) $scratch
               if { ! [ regexp "\[a-z\]+" $::QUEUE(USERS) ] } {
                    set msg    "The QUEUE(USERS) queue has been\n"
                    append msg "initialised, but is empty."
                    return -code error "[ myName ]: $msg"
               }
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $jobid
}

§   §   §

Name: mgr::deleteJobidFromUsersQueue

Description:

Parameters: Usage:

Comments:

proc mgr::deleteJobidFromUsersQueue { jobid } {
     if { [ catch {
          set name [ list ]
          regexp {\d+} $jobid jobid
          set jobid $::RUNCODE$jobid
          set done 0
          set i 0
          set j 0
          foreach user $::QUEUE(USERS) {
               set jobs [ lindex $user end ]
               foreach job $jobs {
                    if { [ string equal $jobid [ lindex $job 0 ] ] } {
                         set done 1
                         break
                    }
                    incr i
               }
               if { $done } { break }
               set i 0
               incr j
          } ;## end of foreach on users
          if { $done } {
               set jobs [ lreplace $jobs $i $i ]
               set user [ lreplace $user end end $jobs ]
               set ::QUEUE(USERS) [ lreplace $::QUEUE(USERS) $j $j $user ]
          } else {
               set msg "jobid: '$jobid' did not match any user!!"
               return -code error $msg
          }
     } err ] } {
          addLogEntry $err red
     }
}

§   §   §

Name: mgr::job2email

Description:
Usage:

Comments:

proc mgr::job2email { jobid } {
     if { [ catch {
          set email    [ list ]
          set jobemail [ list ]
          regexp {\d+} $jobid jobid
          set jobid $::RUNCODE$jobid
          set name [ job2name $jobid ]
          foreach user $::QUEUE(USERS) {
               if { [ string equal $name [ lindex $user 1 ] ] } {
                    set email [ lindex $user 5 ]
                    set jobs  [ lindex $user end ]
                    foreach job $jobs {
                         if { [ lsearch $job $jobid ] == 0 } {
                              set jobemail [ lindex $job 1 ]
                              break
                         }
                    }
                    break
               }
          } ;## end of foreach on users
          if { ! [ string length $email    ] && \
                   ! [ string length $jobemail ] } {
               return -code error "failed to resolve email for '$jobid'"
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     if { [ string length $jobemail ] } {
          return $jobemail
     } else {
          return $email
     }
}

§   §   §

Name: mgr::job2name

Description:

Parameters: Usage:

Comments:

proc mgr::job2name { jobid } {
     if { [ catch {
          set name [ list ]
          regexp {\d+} $jobid jobid
          set jobid $::RUNCODE$jobid
          foreach user $::QUEUE(USERS) {
               set jobs [ join [ lindex $user end ] ]
               if { [ lsearch $jobs $jobid ] > -1 } {
                    set name [ lindex $user 1 ]
                    break
               }
          } ;## end of foreach on users
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $name
}

§   §   §

Name: mgr::jobStatus

Description:
Returns an HTML page reporting the status of each users job which has not been retrieved.
Usage:
set url [ mgr::jobStatus $jobid ]
Comments:
proc mgr::jobStatus { jobid } {
     set data "
<!DOCTYPE LDAS-help>
<!-- created by mgr::JobStatus -->
<title>LIGO LDAS Job Queue Contents</title>
<a name=\"top\">
<p>
<img src=\"help/LDAS.gif\">
<br>\n"
     if { [ catch {
          set limit [ expr { $::NUMBER_OF_ASSISTANT_MANAGERS * \
                                 ($::MANAGER_QUEUE_SIZE_FACTOR + 1) } ]
          ;## get all the commands running and queued
          foreach name [ array names ::QUEUE *,cmd ] {
               set job [ lindex [ split $name , ] 0 ]
               if { [ info exists ::${job}::cmd ] } {
                    set id($job) [ set ::${job}::cmd ]
               } else {
                    set id($job) $::QUEUE($name)
               }
          }
          ;## get all the user info
          set raw(USERS) [ lindex [ queue::dump USERS ] 0 ]
          foreach user $raw(USERS) {
               foreach { 1 user 2 3 4 5 6 7 8 9 10 11 jobs } $user {
                    if { [ llength $jobs ] } {
                         append data "<font color=red><b>$user</b></font><p>\n"
                         append data "<ol>\n"
                         foreach job [ lrange $jobs 0 $limit ] {
                              set job [ lindex $job 0 ]
                              append data "<li><b>$job</b><ul>\n"
                              if { [ catch { set cmd [ set id($job) ] } ] } {
                                   append data "<li><tt>finished!</tt>\n"
                              } else {
                                   set steps [ expr { [ llength $cmd ] / 2 } ]
                                   incr steps -2
                                   if { $steps < 0 } { set steps 0 }
                                   if { [ catch {
                                        set api [ set ::${job}::api ]
                                        set starttime [ set ::${job}::starttime(job) ]
                                   } err ] } {
                                        set api {}
                                        set starttime {}
                                   }
                                   set now [ clock clicks -milliseconds ]
                                   if { [ catch {
                                        set dt [ expr { ($now - $starttime) / 1000.0 } ]
                                   } err ] } {
                                        set dt 0
                                   }
                                   if { ! [ string length $api ] } {
                                        set api "queue in manager"
                                   }
                                   append data "<li><tt><font color=red>$steps</font> steps remaining<br>"
                                   append data "after <font color=red>$dt</font> seconds runtime<br>"
                                   append data "(currently in <font color=red>$api</font> API)</tt>\n"
                              }
                              append data "</ul>\n"
                         }
                         append data "</ol>\n"
                    }
               }
          }
     } err ] } {
          return -code error "[ myName]: $err"
     }
     append data "
<center>
<h4>
<a href=\"#top\">Back to Top</a>
</h4>
</center>\n"
     return [ publicFile $jobid status_report.html $data ]
}

§   §   §

Name: mgr::activeJobStatusSummary

Description:
Report current API of all running jobs, or "queued" as a list of lists of lists of four elements:
[ list jobid api|queued user runtime:runtime_this_api ] At the outermost level there are two lists; the first list is the list of currently running jobs, the second list is the list of queued jobs.

Parameters: Usage:

Comments:

Data about completed jobs is found in the jobstats.log file.
proc mgr::activeJobStatusSummary { { format list } } {
     if { [ catch {
          set data    [ list ]
          set current [ list ]
          set waiting [ list ]
          set retval  [ list ]
          set running [ namespace children :: ${::RUNCODE}* ]
          set running [ lsort $running ]
          set queued  $::assistant_queue
          set queuelen [ llength $queued ]
          set queuelen_msg [ list ]
          set Nasst $::NUMBER_OF_ASSISTANT_MANAGERS
          set now [ clock clicks -milliseconds ]
          set time [ clock format [ clock seconds ] -format "%x-%X %Z" ]
          set dom $::LDAS_SYSTEM
          ;## RUNNING
          foreach job $running {
               set user [ mgr::job2name $job ]
               set api  [ set ${job}::api ]
               set command [ set ${job}::usrcmdname ]
               set so_rx {-dynlib[\s\{]+([^\s\}\{]+)}
               set fullcommand [ set ${job}::usrcommand ]
               if { [ regexp -nocase -- $so_rx $fullcommand -> soname ] } {
                    set soname [ file tail $soname ]
                    set command "${command}($soname)"
               }
               set start [ set ${job}::starttime(job) ]
               ;## possibly less than a millisecond has elapsed and
               ;## we are at in-between states in the array
               if { [ catch {
                    set apit0 [ set ${job}::starttime($api) ]
               } err ] } {
                    set apit0 $now
               }
               set job [ string trim $job : ]
               set runtime [ expr { ($now - $start) / 1000.0 } ]
               set runtime [ format "%.2f" $runtime ]
               set apitime [ expr { ($now - $apit0) / 1000.0 } ]
               set apitime [ format "%.2f" $apitime ]
               set data [ list $job $command $api $user $runtime:$apitime ]
               lappend current $data
          }
          ;## QUEUED
          if { $queuelen > $Nasst } {
               set queued [ lrange $queued 0 [ expr { $Nasst - 1 } ] ]
               set queuelen_msg "$queuelen total jobs currently in queue"
          }
          set ::assistant_queue_stats(wait) 0.00
          foreach job $queued {
               foreach { job command cmdtext } $job { break }
               set job [ string trim $job : ]
               set user [ mgr::job2name $job ]
               if { [ catch {
                    ;## we don't use the throttle mark anymore...
                    #set waited [ ::__t::mark $job ]
                    set waited \
                        [ expr { ($now - $::queuetime($job)) / 1000.0 } ]
                    set waited [ format "%.2f" $waited ]
               } err ] } {
                    set waited 0.00
               }
               if { $waited > $::assistant_queue_stats(wait) } {
                    set ::assistant_queue_stats(wait) $waited
               }
               set data [ list $job $command queued $user $waited ]
               lappend waiting $data
          }
          ;## text format header
          set text    "====================================================================\n"
          append text "Summary of running jobs at $dom at $time:\n\n"
          append text "   job_i.d.     command    current_API    username     RUN_TIME(s)\n"
          append text "====================================================================\n"
          set text_fs "%-18s %-10s %-10s %-10s %-10s\n"
          ;## html format header
          set head "<html>
<head>
<!-- summary report of running job status -->
<title>Summary of Running Job Status at $dom at $time</title>
</head>
<BODY BGCOLOR='#DDDDDD' TEXT='#000000'>
<h2>Summary of Running Job Status at $dom at $time</h2>
<p>
<ul>\n"
          regsub -all {[ ]+} $head { } head
          ;## html format footer
          set foot "</ol></body></html>"
          switch -exact [ string tolower $format ] {
               text {
                    set retval $text
                    foreach item [ concat $current $waiting ] {
                         foreach [ list 1 2 3 4 5 ] $item { break }
                         append retval [ format $text_fs $1 $2 $3 $4 $5 ]
                    }
               }
               html {
                    set body    "<font color=red>\n"
                    append body "<b>Running:</b><p>\n</ul>\n"
                    append body "<font color=black>\ \ \ \  "
                    append body "guide:\ \ </font>\n"
                    append body "<font color=brown>jobid\ \ </font>\n"
                    append body "<font color=green>command\ \ </font>\n"
                    append body "<font color=brown>current_API\ \ </font>\n"
                    append body "<font color=green>user\ </font>\n"
                    append body "<font color=brown>total_runtime\ </font>\n"
                    append body "<font color=red>current_API_runtime\ </font>\n"
                    append body "<font color=green>\n<ul>\n"
                    foreach item $current {
                         foreach [ list 1 2 3 4 5 ] $item { break }
                         set 1 "<font color=brown>${1}</font>"
                         set 3 "<font color=brown>${3}</font>"
                         set 5s [ split $5 : ]
                         set 5 "<font color=brown>[ lindex $5s 0 ]</font>"
                         append 5 "\ "
                         append 5 "<font color=red>[ lindex $5s 1 ]</font>"
                         append body "<li>$1 $2 $3 $4 $5<br>\n"
                    }
                    append body "</ul>\n"
                    append body "</ol>\n"
                    append body "<font color=red>\n"
                    append body "<b>Queued:</b><p>\n"
                    append body "<font color=black>\n"
                    foreach item $waiting {
                         append body "<li>${item}<br>\n"
                    }
                    ;## truncation message if queue is large
                    if { [ string length $queuelen_msg ] } {
                         append body "<font color=red>\n"
                         append body "<b>...<p>$queuelen_msg</b>\n"
                         append body "<font color=black>\n"
                    }
                    append body "</font>\n"
                    set retval $head$body$foot
               }
               list {
                    set retval [ list $current $waiting $queuelen_msg ]
               }
               default {
                    set msg "invalid format: '$format'"
                    return -code error $msg
               }
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $retval
}

§   §   §

Name: mgr::activeJobStatusLockReport

Description:
Augments the status summary report with information about manager lockout conditions, i.e. ::__bootstraplock_* and ::STOP_PROCESSING_JOBS_NO_DISK_SPACE == 1.

Parameters: Usage:

Comments:

PR #2716
proc mgr::activeJobStatusLockReport { args } {
     if { [ catch {
          set msg [ list ]
          if { [ llength [ info vars ::__bootstraplock_* ] ] > 1 } {
               set apis [ list ]
               foreach item [ info vars ::__bootstraplock_* ] {
                    lappend apis [ lindex [ split $item _ ] end ]
               }
               regsub {\s*manager\s*} $apis { } apis
               set msg "JOB PROCESSING SUSPENDED WHILE $apis API(s) "
               append msg "STABILIZE!"
          }
          if { [ info exists ::STOP_PROCESSING_JOBS_NO_DISK_SPACE ] && \
                   $::STOP_PROCESSING_JOBS_NO_DISK_SPACE == 1 } {
               set msg "JOB PROCESSING HALTED. NO DISK SPACE!"
          }
          if { [ string length $msg ] } {
               set msg "<center><h2><font color=red>$msg</font></h2></center>"
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $msg
}

§   §   §

Name: mgr::writeActiveJobStatus

Description:
Write two files, activejobs.txt and
activejobs.tcl into $::PUBDIR, which is typically /ldas_outgoing/jobs.
Updates the files every five seconds by overwriting them.

Parameters: Usage:

Comments:

This takes about 100 ms to run
proc mgr::writeActiveJobStatus { } {
     if { [ catch {
          set time [ clock format [ clock seconds ] -format "%x-%X %Z" ]
          set dom $::LDAS_SYSTEM
          set refresh $::RUNNING_JOB_STAT_UPDATE_RATE
          ;## text format header
          set text    "====================================================================\n"
          append text "Summary of running jobs at $dom at $time:\n\n"
          append text "   job_i.d.     command    current_API    username     RUN_TIME(s)\n"
          append text "====================================================================\n"
          set text_fs "%-18s %-10s %-10s %-10s %-10s\n"
          ;## html format header
          set head "<html>
<head>
<!-- summary report of running job status -->
<title>Summary of Running Job Status at $dom at $time</title>
</head>
<BODY BGCOLOR='#DDDDDD' TEXT='#000000'>
<center>
<h2>Summary of Running Job Status at $dom at $time</h2>
<b><i>(The content of this page is updated every $refresh seconds)</i></b>
</center>
<p>
<ul>\n"
          regsub -all {[ ]+} $head { } head
          ;## BIG RED WARNING IF JOB PROCESSING IS CURRENTLY SUSPENDED!
          set stopped [ mgr::activeJobStatusLockReport ]
          if { [ string length $stopped ] } {
               append head "${stopped}<p>\n\n"
          }
          ;## html format footer
          set foot "</ol></body></html>"
          set textfile ${::PUBDIR}/activejobs.txt
          set tclfile ${::PUBDIR}/activejobs.tcl
          set htmlfile ${::PUBDIR}/activejobs.html
          set data [ mgr::activeJobStatusSummary list ]
          ;## tcl list version
          set fid [ open $tclfile w ]
          puts $fid $data
          ::close $fid
          foreach [ list running waiting queuelen_msg ] $data { break }
          ;## formatted text version
          foreach item [ concat $running $waiting ] {
               foreach [ list 1 2 3 4 5 ] $item { break }
               append text [ format $text_fs $1 $2 $3 $4 $5 ]
          }
          set fid [ open $textfile w ]
          puts $fid $text
          ::close $fid
          ;## running jobs in html version
          set body    "<table border=1 columns=6 width=90%>\n"
          append body "<tr><th colspan=6 align=left><font color=red>\n"
          append body "<b>Running:</b></tr>\n"
          append body "<tr>\n<font color=brown><th>jobid</font>\n"
          append body "<font color=green><th>command</font>\n"
          append body "<font color=brown><th>current API</font>\n"
          append body "<font color=green><th>user</font>\n"
          append body "<font color=brown><th>total runtime</font>\n"
          append body "<font color=red><th>current API runtime</font>\n"
          foreach item $running {
               foreach [ list 1 2 3 4 5 ] $item { break }
               set 1 [ mgr::jobidToHref $1 ]
               set 1 "<td>${1}"
               set 2 "<td>$2"
               set 3 "<td><font color=brown>${3}</font>"
               set 4 "<td>$4"
               set 5s [ split $5 : ]
               set 5 "<td><font color=brown>[ lindex $5s 0 ]</font>"
               append 5 "\ "
               append 5 "<td><font color=red>[ lindex $5s 1 ]</font>"
               append body "</tr><tr>$1 $2 $3 $4 $5<br>\n"
          }
          ;## queued jobs in html version
          append body "</table>\n"
          append body "<table border=0 columns=6 width=90%>\n"
          append body "<tr><th colspan=6 align=left>\n"
          append body "<font color=red>\n"
          append body "<br>Queued:</font></th></tr>\n"
          set i 0
          foreach item $waiting {
               set href [ mgr::jobidToHref [ lindex $item 0 ] ]
               set item [ lrange $item 1 end ]
               regsub -all {\s+} $item "<td>" item
               set item "${href}<td>$item"
               append body "<tr><td>[ incr i ].<td>${item}</tr>\n"
          }
          append body "</table>\n"
          ;## truncation message if queue is large
          if { [ string length $queuelen_msg ] } {
               append body "<font color=red>\n"
               append body "<b>...<p>$queuelen_msg</b>\n"
          }
          set fid [ open $htmlfile w ]
          puts $fid $head$body$foot
          ::close $fid
     } err ] } {
          catch { ::close $fid }
          addLogEntry $err red
     }
}

§   §   §

Name: mgr::jobidToHref

Description:

Parameters: Usage:

Comments:

proc mgr::jobidToHref { jobid } {
     if { [ catch {
          set jobdir [ jobDirectory $jobid ]
          set base   $::HTTPURL
          regsub $::PUBDIR $base {} base
          set full $base${jobdir}/user_command.html
          set href "<a href=$full>$jobid</a>"
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $href
}

§   §   §

Name: mgr::dropPrettyCommand

Description:

Parameters: Usage:

Comments:

proc mgr::dropPrettyCommand { jobid cmd } {
     if { [ catch {
          ;## make a prettified verison of the command for dropping
          set tmp [ mgr::prettifyCmdHtml $jobid $cmd ]
          ;## use side-effect of jobDirectory to create one
          set jobdir [ jobDirectory $jobid ]
          ;## drop a nice html formatted user command
          set fid [ open $jobdir/user_command.html w 0644 ]
          puts $fid $tmp
          close $fid
     } err ] } {
          catch { ::close $fid }
          addLogEntry $err orange
     }
}

§   §   §

Name: mgr::prettifyCmdHtml

Description:

Parameters: Usage:

Comments:

proc mgr::prettifyCmdHtml { jobid cmd } {
     if { [ catch {
          set user [ mgr::job2name $jobid ]
          set start [ clock format [ clock seconds ] ]
          set head    "<html><h1>$jobid user: $user</h1>\n"
          append head "<h2>started at: $start</h2>\n"
          append head "<p><p><tt>"
          regsub -all -- { -} $cmd "<p>\n -" cmd
          regsub -all -- {\|} $cmd ";<br>\n" cmd
          regsub -all -- { (-\S+)} $cmd \
              {<font color=red> \1</font>} cmd
          regsub -all -- {([\}\{]+)} $cmd \
              {<font color=blue>\1</font>} cmd
          regsub -all -- {(file:/[^\s\n;]+)} $cmd \
              {<font color=purple>\1</font>} cmd
          set cmd "$head\n$cmd\n</html>"
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $cmd
}

§   §   §

Name: mgr::getCommandInputText

Description:

Parameters: Usage:

Comments:

proc mgr::getCommandInputText { args } {
     if { [ catch {
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::jobTimingReport

Description:
Generate a text formatted timing report for return to the user along with the job results.

Parameters: Usage:

Comments:

========================================= "LDAS API" "CLOCK TIME(seconds)" ========================================= frameAPI: 0.59 dataConditionAPI: 3.25 wrapperAPI(mpiAPI): 8.11 eventMonitorAPI: 0.94 metaDataAPI: 1.77 lightWeightAPI: 3.04 (ERROR)
----------------------------------------- managerAPI(total): 19.44 =========================================
proc mgr::jobTimingReport { jobid } {
     regexp {\d+} $jobid job
     set jobid $::RUNCODE$job
     set errorapi [ list ]
     set runtimedt 0
     set report    "=========================================\n"
     append report "'LDAS API'          'CLOCK TIME(seconds)'\n"
     append report "=========================================\n"
     if { [ catch {
          ;## if job ended with an error
          if { [ info exists ::${jobid}::errorapi ] } {
               set errorapi [ set ::${jobid}::errorapi ]
               set status fail
          } else {
               set status pass
          }
          set command [ set ::${jobid}::usrcmdname ]
          set so_rx {-dynlib[\s\{]+([^\s\}\{]+)}
          set fullcommand [ set ::${jobid}::usrcommand ]
          if { [ regexp -nocase -- $so_rx $fullcommand -> soname ] } {
               set soname [ file tail $soname ]
               set command "${command}($soname)"
          }
          ;## time job spent waiting in manager queue
          set queuetime [ set ::${jobid}::queuetime ]
          mgr::oneLineJobTimingReport $jobid queue $queuetime cm $command
          set line [ format "%-27s %10.2f" "Wait Time(queue):" $queuetime ]
          append report "$line\n"
          set runtimedt $queuetime
          set jobstart [ set ::${jobid}::starttime(job) ]
          foreach api [ array names ::${jobid}::deltat ] {
               set dt [ set ::${jobid}::deltat($api) ]
               set runtimedt [ expr { $runtimedt + $dt } ]
               set dt [ expr { $dt / 1000.0 } ]
               mgr::oneLineJobTimingReport $jobid $api $dt
               mgr::perApiRunningStats $api runtime $dt
               set line [ format "%-27s %10.2f" ${api}API: $dt ]
               if { [ string equal $api $errorapi ] } {
                    append line " (ERROR)"
                    mgr::perApiRunningStats $api failed 1
               } else {
                    mgr::perApiRunningStats $api succeeded 1
               }
               append report "$line\n"
          }
          set runtimedt [ expr { $runtimedt / 1000.0 } ]
          append report "-----------------------------------------\n"
          set line [ format "%-27s %10.2f" "managerAPI(total):" $runtimedt ]
          append report "$line\n"
          append report "=========================================\n"
          append report "\n"
          append report "$::LDAS_SYSTEM running LDAS version "
          append report "$::LDAS_VERSION\n"
     } err ] } {
          addLogEntry $err red
          set report [ list ]
     }
     catch { mgr::runningJobStatsFile $jobid $runtimedt $status }
     return $report
}

§   §   §

Name: mgr::oneLineJobTimingReport

Description:
Called by mgr::jobTimingReport to generate one line file entry summarizing job performance by job and API.
When args is a list of key/value pairs the one line report is populated accordingly. When the key is "xx", the report entry is written and the array value is unset.

Parameters: Usage:
  Column:    Content:

0 jobid 1 gps start time 2 username 3 user command name 4 pass or fail 5 seconds in queue 6 seconds in managerAPI 7 seconds in diskcacheAPI 8 seconds in frameAPI 9 seconds in metadataAPI 10 seconds in datacondAPI 11 seconds in mpiAPI 12 seconds in wrapperAPI 13 seconds in eventmonAPI 14 seconds in lightweightAPI 15 total seconds in all APIs 16 total of column 5 plus 15 (all time)
17 current queue size 18 seconds in cntlmonAPI
Comments:
proc mgr::oneLineJobTimingReport { jobid args } {
     if { [ catch {
          if { [ llength $args ] == 1 } {
               set args [ lindex $args 0 ]
          }
          if { ! [ info exists ::onelinereport($jobid) ] } {
               set ::onelinereport($jobid) \
                   [ list $jobid 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]
          }
          ;## list of two letter keys corresponding to columns in order
          set keys \
              [ list id st nm cm pf qu ma di fr me da mp wr ev li tt cn ]
          foreach [ list key value ] $args {
               set key [ string tolower [ string range $key 0 1 ] ]
               set i [ lsearch $keys $key ]
               if { $i > -1 } {
                    set ::onelinereport($jobid) \
                        [ lreplace $::onelinereport($jobid) $i $i $value ]
               } elseif { [ string equal xx $key ] } {
                    ;## calculate and add column 16, the total time for
                    ;## the job including queue time
                    set QT [ lindex $::onelinereport($jobid)  5 ]
                    set RT [ lindex $::onelinereport($jobid) 15 ]
                    set TT [ expr { $QT + $RT } ]
                    #lappend ::onelinereport($jobid) $TT
		    set ::onelinereport($jobid) [ linsert $::onelinereport($jobid) end-2 $TT ]
                    ;## get the current job queue size
                    set QS [ llength $::assistant_queue ]
		    # lappend ::onelinereport($jobid) $QS
		    set ::onelinereport($jobid) [ linsert $::onelinereport($jobid) end-1 $QS ]
                    ;## manage the file...
                    set now [ gpsTime ]
                    set active $::LDASLOG/job_stats.log
                    set archive $::LDASARC/jobstats_archive/job_stats.log.$now
                    set dir $::LDASARC/jobstats_archive
                    if { ! [ file isdirectory $dir ] } {
                         file mkdir $dir
                         file attributes $dir -permissions 0755
                         gifBalls $dir
                    }
                    set mb [ expr { 1024 * 1024 } ]
                    if { [ file exists $active ] } {
                         if { [ file size $active ] >= $mb } {
                              file rename $active $archive
                         }
                    }
                    set fid [ ::open $active a+ 0644 ]
                    ::puts $fid $::onelinereport($jobid)
                    unset ::onelinereport($jobid)
                    ::close $fid
               } else {
                    set msg "bad key received: '$key' (value: '$value')"
                    addLogEntry $msg email
               }
          }
     } err ] } {
          catch { ::close $fid }
          catch { ::unset ::onelinereport($jobid) }
          addLogEntry $err email
     }
}

§   §   §

Name: mgr::perApiRunningStats

Description:
Maintain a stats arrays for determining system health.
A new array is created at system startup, and the array is written out at shutdown.

Parameters: Usage:

Comments:

proc mgr::perApiRunningStats { api { item "" } { value "" } } {
     if { [ catch {
          set filename perAPIstats
          if { [ string equal read  $api ] } {
               if { [ file exists $filename ] } {
                    set fid [ open $filename r ]
                    set data [ read $fid [ file size $filename ] ]
                    ::close $fid
                    array set ::mgr::apirunstats $data
                    set msg "read $filename into ::mgr::apirunstats array"
                    addLogEntry $msg blue
               }
               return {}
          }
          if { [ string equal write $api ] && \
                   [ array exists ::mgr::apirunstats ] } {
               set fid [ open $filename w ]
               puts $fid [ array get ::mgr::apirunstats ]
               ::close $fid
               addLogEntry "wrote $filename" blue
               return {}
          }
          if { ! [ string length $item ] } {
               set msg "no value given for parameter 'item'"
               return -code error $msg
          }
          if { ! [ string length $value ] } {
               set msg "no value given for parameter 'value'"
               return -code error $msg
          }
          set keys [ list runtime succeeded failed ]
          foreach key $keys {
               if { ! [ info exists ::mgr::apirunstats($api,$key) ] } {
                    set ::mgr::apirunstats($api,$key) 0
               }
          }
          if { ! [ info exists ::mgr::apirunstats($api,restarted) ] } {
               set ::mgr::apirunstats($api,restarted) [ list ]
          }
          switch -exact $item {
               runtime {
                    set rt $::mgr::apirunstats($api,runtime)
                    set rt [ expr { $rt + $value } ]
                    set ::mgr::apirunstats($api,runtime) $rt
               }
               succeeded {
                    incr ::mgr::apirunstats($api,succeeded)
               }
               failed {
                    incr ::mgr::apirunstats($api,failed)
               }
               restart {
                    set gps $value
                    lappend ::mgr::apirunstats($api,restart) $gps
               }
               default {
                    set msg "invalid key: $api --> $item <-- $value"
                    return -code error $msg
               }
          } ;## end of switch
     } err ] } {
          if { [ string length $err ] } {
               catch { ::close $fid }
               return -code error "[ myName ]: $err"
          }
     }
}

§   §   §

Name: mgr::runStatusSummary

Description:
Returns a list of lists where each sublist consists of:
  1. The API name
  2. The total number of jobs which hit that API
  3. The success rate for that API expressed as %
  4. The total active RUN TIME of that API
  5. The number of seconds since the last restart of the API

Parameters: Usage:

Comments:

setting form to link returns the filename where html formatted output has been dumped.
proc mgr::runStatusSummary { { apis all } { form list } { save 0 } } {
     if { [ catch {
          if { ! [ array exists ::mgr::apirunstats ] } {
               set err "no jobs have run since last system restart"
               return -code error $err
          }
          if { [ string equal all $apis ] } {
               set apis [ lsort $::API_LIST ]
          }
          set dom $::LDAS_SYSTEM
          set head "<html>
<head>
<!-- summary report of API run history -->
<title>API Status History at $dom</title>
</head>
<BODY BGCOLOR='#DDDDDD' TEXT='#000000'>
<h2>LDAS API Status History Summary at $dom</h2>
<p>
<ul>\n"
          set foot "</ul></body></html>"
          regsub -all {[ ]+} $head { } head
          set body [ list ]
          foreach api $apis {
               if { [ string equal manager $api ] } { continue }
               ;## no exception on failure of a single API to be
               ;## initialised.
               if { [ llength [ array names ::mgr::apirunstats $api,* ] ] } {
                    set good     $::mgr::apirunstats($api,succeeded)
                    set bad      $::mgr::apirunstats($api,failed)
                    set dt       $::mgr::apirunstats($api,runtime)
                    set restarts $::mgr::apirunstats($api,restart)
               } else {
                    set good     0
                    set bad      0
                    set dt       0
                    set restarts 0
               }
               set dt [ expr int($dt) ]
               set total [ expr { $good + $bad } ]
               if { $total } {
                    set success_rate [ expr { ($good * 100) / ($good + $bad) } ]
                    set success_rate [ format %.2f $success_rate ]
               } else {
                    set success_rate 100
               }
               set gps      [ lindex $restarts end ]
               set time [ clock format [ utcTime $gps ] -format "%x-%X %Z" ]
               set last_rs  [ lindex $restarts end ]
               set last_rs  [ expr { [ gpsTime ] - $last_rs } ]
               set restarts [ llength $restarts ]
               if { [ string equal list $form ] } {
                    lappend results  \
                        [ list $api $total $success_rate $dt $last_rs ]
               } else {
                    set item "<li><b>${api}API started at
<font color=brown>$time
(GPS: $gps)</font></b><ul>
<li>Total jobs visiting this API:
<font color=red>$total</font>
<li>Job completion success rate:
<font color=red>${success_rate}%</font>
<li>Total processing time for this API:
<font color=red>$dt s</font>
<li>seconds since this API was restarted:
<font color=red>$last_rs</font>
<li>number of times restarted this run:
<font color=red>$restarts</font>
</ul>\n"
regsub -all {[ ]+} $item { } item
append body $item
}

§   §   §
}
§   §   §
;## handle html and link data if { ! [ string equal list $form ] } { set results $head$body$foot }
§   §   §
;## pop into a file if a link was requested if { [ string equal link $form ] } { if { $save } { set filename \ $::LDASARC/system_history/state_summary.[ gpsTime ].html } else { set filename \ $::LDASARC/system_history/state_summary.html } set dir $::LDASARC/system_history if { ! [ file isdirectory $dir ] } { file mkdir $dir file attributes $dir -permissions 0755 gifBalls $dir } set fid [ open $filename w 0644 ] puts $fid $results ::close $fid set gateway http://[ lindex $::LDAS_GATEWAY 1 ] set results $gateway$filename }
§   §   §
} err ] } { catch { ::close $fid } addLogEntry $err email } return $results }
§   §   §

Name: mgr::runningJobStatsFile

Description:
First cut at maintaining a running record of runtime for each job. Records jobid, start time, run time, user i.d. and whether the job returned an error or not.

Parameters: Usage:

Comments:

Archive files are named jobstats.log.NNNNNNNNN, where NNNNNNNNN is the UNIX timestamp when the log was rotated.
proc mgr::runningJobStatsFile { jobid dt status } {
     if { [ catch {
          set now [ gpsTime ]
          set start [ set ::${jobid}::starttime(gps) ]
          set user  [ mgr::job2name $jobid ]
          set dt    [ format "%.2f" $dt ]
          ;## and complete the OTHER report
          mgr::oneLineJobTimingReport \
              $jobid st $start nm $user pf $status tt $dt xx 0
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::zombies

Description:
If an API crashes and cannot be restarted by the manager, this procedure will be called and all job i.d.'s that are currently registered with the dead API will be reaped and garbage will be collected throughout the entire system.

Parameters: Usage:

Comments:

proc mgr::zombies { api } {
     if { [ catch {
          set error "$api API fault: UNREACHABLE!"
          set children [ namespace children :: ${::RUNCODE}* ]
          foreach child $children {
               set jobid [ string trim $child : ]
               if { [ info exists ${child}::api ] } {
                    if { [ string equal $api [ set ${child}::api ] ] } {
                         set subject "$api API error!"
                         if { [ llength [ namespace children :: $child ] ] } {
                              set ${child}::errorapi $api
                              ${child}::predelete $subject $error
                              if { [ llength [ namespace children :: $child ] ] } {
                                   ${child}::seq
                              }
                         }
                    }
               }
          }
          if { [ string equal frame $api ] } {
               mgr::zombies diskcache
          }
     } err ] } {
          addLogEntry $err red
     }
}

§   §   §

Name: mgr::ping

Description:
Check to see that the low level API's (frame, metadata, etc.) are running and can be communicated with.
Usage:

Comments:

When an API is unreachable, the manager will attempt to bootstrap it with mgr::bootstrapAPI, q.v.
proc mgr::ping { { apilist "" } } {
     if { [ catch {
          if { ! [ string length $apilist ] } {
               set apilist [ lsort $::API_LIST ]
          }
          set interval $::LOGINTERVAL
          set me [ myName ]
          ;## log pings if error is returned OR if ::LOG_PINGS
          ;## is defined and set to '1'.
          if { [ info exists ::LOG_PINGS ] && \
                   [ string equal 1 $::LOG_PINGS ] } {
          } else {
               set ::LOG_PINGS 0
          }
          foreach api $apilist {
               incr interval -1
               if { [ string equal $api manager ] } {
                    continue
               }
               set code "if { ! \[ info exists ::__bootstraplock_$api \] } {
set lvl 0
set msg \[ pingAPI $api \]
if { \[ regexp unreachable \$msg \] } {
     set lvl 2
}

§   §   §
if { \$lvl || \$::LOG_PINGS } { addLogEntry \$msg \$lvl $me }
§   §   §
if { \$lvl == 2 } { if { \[ catch { addLogEntry \"ping failed; bootstrapAPI $api\" purple mgr::bootstrapAPI $api } err \] } { addLogEntry \$err 2 $me } }
§   §   §
}" regsub -all {[ ]+} $code { } code bgLoop ${api}ping "$code" $interval }
§   §   §
} err ] } { addLogEntry "$err errorInfo '$::errorInfo'" 2 } }
§   §   §

Name: mgr::killPing

Description:
Usage:

Comments:

proc mgr::killPing { { apilist "" } } {
     if { ! [ string length $apilist ] } {
          set apilist [ lsort $::API_LIST ]
     }
     foreach api $apilist {
          if { [ string equal $api manager ] } {
               continue
          }
          if { [ catch {
               set ::bg::jobs(${api}ping,run) 0
               addLogEntry "No longer pinging $api" blue
          } err ] } {
               addLogEntry $err
          }
     }
}

§   §   §

Name: mgr::manageLogs

Description:
Archive log files at intervals
Parameters: Usage:

Comments:

There may be a short period of time when an API's normal log file is unavailable between the time that the log rollover is made and the next log entry is made.
proc mgr::manageLogs { } {
     if { [ catch {
          ;## make sure browsers can browse archived logs!
          if { ! [ file exists $::LDASARC/.htaccess ] } {
               set fid [ open $::LDASARC/.htaccess w 0644 ]
               puts $fid "DefaultType text/html"
               ::close $fid
          }
          if { ! [ file exists $::LDASLOG/.htaccess ] } {
               set fid [ open $::LDASLOG/.htaccess w 0644 ]
               puts $fid "DefaultType text/html"
               ::close $fid
          }
          foreach api $::API_LIST {
               set log LDAS${api}.log.html
               set log $::LDASLOG/$log
               if { [ file exists $log ] } {
                    if { [ file size $log ] > 100000 } {
                    }
               }
          }
     } err ] } {
          catch { ::close $fid }
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::reply

Description:
Just a slightly smart catter, tries to avoid actually sending junk back.
Usage:
           mgr::reply $cid $msg
Comments:
This channel should NEVER be flushed!!
proc mgr::reply { { cid "" } { args "" } } {
     set cid [ string trim $cid ]
     if {  [ regexp {_globus_xio_handle_t$} $cid ] } {
          return [ mgrGlobus::reply $cid $args ]
     }
     if { ! [ regexp {sock[0-9]+$} $cid ] } {
          set msg    "mgr::reply:\n"
          append msg "received: '$cid $args'\n"
          append msg "First argument must be channel i.d."
          addLogEntry $msg 1
     }
     if { [ string length $args ] > 4 } {
          if { [ catch {
               set prevblock [ fconfigure $cid -blocking ]
               fconfigure $cid -blocking 0
               puts $cid $args
               fconfigure $cid -blocking $prevblock
          } err ] } {
               catch { ::close $cid }
               set msg     "mgr::reply:\n"
               lappend msg "$err\n"
               lappend msg "Lost contact with my caller! $cid closed"
               addLogEntry $msg 2
          }
     }
     return {}
}

§   §   §

Name: manager::reply

Description:
wrapper for mgr::reply that uses official name of the API
Parameters: Usage:

Comments:

This is to ease the coding of manager API functions that iterate through ::API_LIST. The manager does not require special handling when this is done.
namespace eval manager {}

§   §   §
proc manager::reply { cid args } {
     eval mgr::reply $cid $args
}

§   §   §

Name: mgr::sHuTdOwN

Description:
Save state as much as possible before shutting down.
Usage:
       mgr::sHuTdOwN
Comments:
Sure would be nice to have a corresponding restore proc.
Exit handles closing all files and sockets automagically.
Note that the ::RESTART_ON_MEMFLAG is overridden by this function.
proc mgr::sHuTdOwN { } {
     set ::jobid SHUTDOWN
     set ::RESTART_ON_MEMFLAG 0
     set ::SYSTEM_SHUTDOWN_IN_PROGRESS 1
     set now [ gpsTime ]
     ;## disable boostraps
     proc ::mgr::bootstrapAPI { args } {}
     proc ::createAssistant { args } {}
     ;## don't take any new commands!!
     if { [ catch {
          closeListenSock operator
     } err ] } {
          addLogEntry $err
     }
     catch { mgr::chopQueueTo 0 }
     ;## capture state at shutdown for analysis if necessary
     set fname1 [ file join $::LDASLOG APIstatus.html ]
     set fname2 [ file join $::PUBDIR  activejobs.html ]
     set arcdir $::LDASARC/shutdown_status
     set archive1 ${arcdir}/APIstatus-${now}.html
     set archive2 ${arcdir}/activejobs-${now}.html
     catch {
          if { ! [ file isdirectory $arcdir ] } {
               file mkdir $arcdir
               file attributes $arcdir -permissions 0755
               gifBalls $arcdir
          }
          file copy -force $fname1 $archive1
          file copy -force $fname2 $archive2
     }
     catch { mgr::killPing }             err0
     foreach api $::API_LIST {
          if { [ string equal $api manager ] } {
               continue
          }
          catch { mgr::shutdownAPI $api "nocore" }
          mgr::bootstrapPause $api
     }
     catch { mgr::perApiRunningStats write } err2
     #catch { queue::save }               err3
     catch { key::increment shutdown }   err4
     ;## force shutdown to be quick
     set ::API_SOCKET_TIMEOUT_MS 1000
     catch { mgr::apiStatusPage }        err5
     if { [ catch {
          closeListenSock emergency
     } err ] } {
          addLogEntry $err
     }
     ;## close globus socket
     foreach type [ list User Host ] {
          if { [ info exist ::mgrGlobusChannel::Globus${type}ListenSocket ] } {
               set globus_sock  [ set ::mgrGlobusChannel::Globus${type}ListenSocket ]
               addLogEntry "close tclglobus $type channel $globus_sock" purple
               ::close $globus_sock
          }
     }
     closeLog
     log::lock
     kill -9 [ pid ]
}

§   §   §

Name: mgr::validateopts

Description:
Detect bad options such as ambiguous parameters or parameters which are not part of the option list and reject the request with a helpful message to the user.

Parameters: Usage:

Comments:

Testing the validity of the request at this level allows user notification and minimises resource allocation to badly formed requests.
proc mgr::validateopts { cmd user } {
     if { [ catch {
          set procname [ list ]
          set usropts  [ list ]
          set options  [ list ]
          set metalist [ list ]
          set reason   [ list ]
          set files [ glob -nocomplain ${::LDASMACROS}/meta/*.meta ]
          ;## if the /ldas directory gets deleted...
          ;## this has happened!
          if { ! [ llength $files ] } {
               set reason ldas_system_failed_sanity_check
               set subject "$::LDAS_SYSTEM ${::API}API CRITICAL error!"
               set msg "the $::LDAS_SYSTEM manager API did not "
               append msg "find ANY macro files under the directory "
               append msg "${::LDASMACROS}/meta.\nthis is a critical "
               append msg "LDAS system error indicating a condition "
               append msg "which must be remedied immediately!!\n"
               append msg "ALL LDAS user commands will fail until "
               append msg "the filesystem is restored."
               if { ! [ info exists ::meta_macros_are_missing ] || \
                        $::meta_macros_are_missing == 0 } {
                    set ::meta_macros_are_missing 1
                    after 1967666 set ::meta_macros_are_missing 0
                    addLogEntry "Subject: ${subject}; Body: $msg" mail
               }
               return -code error $msg
          }
          foreach file $files {
               lappend metalist [ file tail $file ]
          }
          regexp $::cmd_rx $cmd -> procname usropts
          if { [ catch {
               mgr::duplicateOpts $usropts
          } err ] } {
               set reason duplicate_option
               return -code error $err
          }
          if { [ catch {
               mgr::commandSanityChecks $usropts
          } err ] } {
               set reason invalid_command
               return -code error $err
          }
          if { [ lsearch -exact $metalist ${procname}.meta ] > -1 } {
               set fname [ file join $::LDASMACROS meta ${procname}.meta ]
               set fid [ open $fname r ]
               set options "[ gets $fid ]\n"
               ;## PR 3087 abort job if it requires an inactive api
               set rest [ read -nonewline $fid ]
               ;## always do this fresh as the list may change
               set apipat [ join $::API_LIST \| ]
               set allapipat [ list manager diskcache frame metadata ligolw datacond mpi eventmon cntlmon ]
               set allapipat [ join $allapipat \| ]
               ::close $fid
          } else {
               set msg    "The command entered: '$cmd'\n"
               append msg "is not a valid user command or is missing\n"
               append msg "required options. Please see\n"
               append msg "the documentation for the LDAS system\n"
               append msg "user interface for information about valid\n"
               append msg "commands and command syntax."
               set reason invalid_command
               return $msg
          }
          array set opts [ lindex $options 2 ]
          array set useroptions $usropts
          foreach item [ array names useroptions  ] {
               set possible {}
               set i 0
               foreach opt [ array names opts ] {
                    regsub -- "--" $item "-" item
                    if { [ regexp -- $item $opt ] } {
                         lappend possible $opt
                         incr i
                    }
               }
               if { ! $i } {
                    set msg    "User option: '$item'\n"
                    append msg "is not a valid option to $procname.\n"
                    append msg "Please see the documentation for\n"
                    append msg "$procname before resubmitting your\n"
                    append msg "request."
                    set reason invalid_option
                    return $msg
               }
               if { $i >= 2 } {
                    set msg    "User option: '$item'\n"
                    append msg "is ambiguous in the context of $procname\n"
                    append msg "since it matches all of these options:\n"
                    append msg "$possible\n"
                    append msg "Please make sure your option list is\n"
                    append msg "unambiguous in the context of $procname\n"
                    append msg "before resubmitting your request."
                    set reason ambiguous_option
                    return $msg
               }
          }
          ;## check if job requires an inactive API
          foreach line [ split $rest \n ] {
               if { [ regexp {^\#|^[\s\t\n]*$} $line ] } {
                    continue
               }
               if { [ string length $line ] > 8 } {
                    continue
               }
               set line [ string trim $line ]
               if { [ regexp $apipat $line ] } {
                    continue
               } elseif { [ regexp $allapipat $line ] } {
                    set reason "inactive_api $line"
                    error "This job requests an inactive api $line"
               }
          }
     } err ] } {
          if { [ string length $reason ] } {
               after 0 \
                   [ list mgr::logRejectedJob $user $procname $reason ]
          }
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::localiseInputURLs

Description:
Given a user command option which can contain input side URL's, try and localise http and ftp URL's and verify that all data is in fact received and on disk before allowing the job to proceed further.
The assistant manager will process al outgoing code to replace the URL with the local filename.
The local filenames will be returned immediately, but the assistant manager will not begin processing until all of the files are found to be ready.

Parameters: Usage:

Comments:

proc mgr::localiseInputURLs { jobid options } {
     if { [ catch {
          set url       [ list ]
          set localfile [ list ]
          set errs      [ list ]
          set localised [ list ]
          set files     [ list ]
          set out_rx {(return|algorithm|output)}
          set url_rx {(?:http|gridftp|ftp|file)://?[^/][^ \,\}\]\)]+}
          foreach [ list option value ] $options {
               if { [ regexp -nocase -- $out_rx $option ] } {
                    continue
               }
               set urls [ regexp -nocase -inline -all $url_rx $value ]
               ;## PR #2705 tarball handling!
               set limit $::MAXIMUM_NUMBER_OF_REMOTE_URLS
               if { [ llength $urls ] > $limit } {
                    if { ! [ info exists ::${jobid}::tarball ] } {
                         set error "Too many remote ftp and http URL's in "
                         append error "your request. Please use the -tarball "
                         append error "option q.v."
                         return -code error $error
                    }
               }
               if { [ llength $urls ] } {
                    foreach url $urls {
                         if { [ catch {
                              set localfile [ getUrl $jobid $url ]
                              lappend localised $url $localfile
                         } err ] } {
                              lappend errs "$err (url: '$url')"
                         }
                    }
               }
          }
          if { [ llength $errs ] } {
               return -code error $errs
          }
          ;## and actually do the substitutions
          foreach [ list url localfile ] $localised {
               lappend files $localfile
               regsub $url $options $localfile options
          }
          if { [ llength $localised ] } {
               debugPuts "URL's substituted: '$options'"
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return [ list $files $options ]
}

§   §   §

Name: mgr::delocaliseOutputURLs

Description:
Given a flat list of alternating files and URL's, returns a 'corrected' list representing the actual locations determined by the manager.

Parameters: Usage:

Comments:

proc mgr::delocaliseOutputURLs { jobid URLS } {
     if { [ catch {
          set errs    [ list ]
          set remotes [ list ]
          foreach [ list file url ] [ join $URLS ] {
               if { [ catch {
                    lappend remotes [ putUrl $jobid $file $url ]
               } err ] } {
                    lappend errs "$err (file: '$file' url: '$url')"
               }
          }
          if { [ llength $errs ] } {
               return -code error $errs
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $remotes
}

§   §   §

Name: mgr::duplicateOpts

Description:
Helper function for mgr::validateopts tries to return a inteligible report of the duplicated options values.

Parameters: Usage:

Comments:

proc mgr::duplicateOpts { usropts } {
     if { [ catch {
          set dupes [ list ]
          foreach { opt val } $usropts {
               if { [ info exists opts($opt) ] } {
                    lappend dupes [ list $opt $opts($opt) $val ]
                    set opts($opt) [ join $opts($opt) $val ]
               } else {
                    set opts($opt) $val
               }
          }
          if { [ llength $dupes ] } {
               return -code error $dupes
          }
     } err ] } {
          set msg "[ myName ]: option(s) duplicated: $err"
          return -code error $msg
     }
}

§   §   §

Name: mgr::apiStatusPage

Description:

Parameters: Usage:

Comments:

proc mgr::apiStatusPage { { api all } } {
     if { [ catch {
          set body [ list ]
          set time    [ gpsTime ]
          set t_utc   [ utcTime $time ]
          set t_local [ clock format $t_utc -format "%x-%X %Z" ]
          set t_gmt   [ clock format $t_utc -format "%x-%X %Z" -gmt 1 ]
          set caller [ uplevel myName ]
          set dom $::LDAS_SYSTEM
          set head    "<html><head>\n"
          append head "<!-- reload this page every 5 minutes -->\n"
          append head "<META HTTP-EQUIV=\"REFRESH\" CONTENT=300>\n"
          append head "<title>LDAS API Status Monitor</title>\n"
          append head "</head>\n"
          append head "<BODY BGCOLOR='#DDDDDD' TEXT='#000000'>\n"
          append head "<h2>LDAS API Status at $dom</h2>\n"
          append head "<h3><font color='red'>$time</font>\n"
          append head "<font color='green'>$t_local</font>\n"
          append head "<font color='brown'>$t_gmt</font>\n"
          append head "</h3>\n"
          foreach api $::API_LIST {
               if { [ catch {
                    set data [ mgr::apiStatus $api ]
                    foreach [ list properties channels state color ] \
                        $data { break }
               } err ] } {
                    set color red
                    ;## at shutdown, connection may succeed before the api
                    ;## has completely expired.
                    if { [ regexp {broken pipe} $err ] } {
                         set state "\$host is up, but $api API is not running\n"
                    } else {
                         set state "\$host unreachable\n"
                    }
               }
               foreach { name host port pid pcpu pmem vsz rss uptime } \
                   $properties { break }
               foreach { socks files threads events objects total } \
                   $channels { break }
               ;## short circuit on error getting proc info
               if { ! [ info exists port ] } {
                    set color red
                    set state "runtime data for $api API on \$host "
                    append state "temporarily unavailable"
               }
               set gif "<img src=\"ball_$color.gif\">\n"
               if { [ string equal $color red ] } {
                    if { ! [ string equal ok $state ] } {
                         set host [ set ::[ string toupper $api]_API_HOST ]
                         set state [ subst -nocommands $state ]
                         append body $gif
                         append body "<b><a href=LDAS$api.log.html>$api API</a>\n"
                         append body $state
                         append body "</b><p>\n"
                    }
               } else {
                    ;## if the operator ports for API's are undefined at
                    ;## the manager's startup - set the baseport
                    ;## in the state array to a value reflecting that
                    ;## state so that it cannot be mistaken for a valid
                    ;## port assignment.
                    if { [ regexp {^\d+$} $port ] } {
                         set port ::BASEPORT+[ expr $port % $::BASEPORT ]
                    } else {
                         set port ::BASEPORT+???
                    }
                    append body $gif
                    append body "<b><a href=LDAS$api.log.html>$api API</a>\n"
                    append body "is running on <i>$host</i> port $port</b>\n"
                    append body "<ul><tt>\n"
                    append body "<li><font color='red'>uptime:\n"
                    append body "<b>$uptime</b></font>\n"
                    append body "<li>maximum virtual memory allocated by this API:\n"
                    append body "<font color='red'>$vsz Kb</font>\n"
                    append body "<ul>\n"
                    append body "(current resident set size: $rss Kb)\n"
                    ;## if we have a previous vsz value, calculate the
                    ;## integrated rate of change of memory usage.
                    if { [ info exists ::mgr::${api}_vsz ] } {
                         set nowmem [ set ::mgr::${api}_vsz ]
                         if { [ regexp {^[\.\d]+$} $nowmem ] } {
                              if { [ catch {
                                   set dmem [ expr { $vsz - $nowmem } ]
                              } err ] } {
                                   # weird bug occurred!!
                                   set dmem 0
                              }
                              set dt [ expr { double($::LOGINTERVAL) } ]
                              set dmdt [ format "%3.2f" [ expr { $dmem / $dt } ] ]
                              append body "<br>\n"
                              append body "(heap size change rate: $dmdt Kb/sec.)\n"
                         }
                    }
                    set ::mgr::${api}_vsz $vsz
                    append body "</ul>\n"
                    ;## character 37 is the percent symbol
                    append body "<li>current cpu usage ${pcpu}%\n"
                    append body "<ul>\n"
                    append body "<li>number of open sockets: $socks\n"
                    append body "<li>number of open files: $files\n"
                    append body "<li>number of running threads: $threads\n"
                    append body "<li>number of pending tcl events: $events\n"
                    append body "<li>number of objects registered: $objects\n"
                    append body "<li>total: $total\n"
                    append body "</ul>\n"
                    append body "<a href=../${api}API/${api}.packages>"
                    append body "Package Version Information</a>\n"
                    append body "</tt></ul>\n\n"
               }
          }
          ;## new activity summary as of 04/09
          set url1 $::PUBDIR/activejobs.html
          set url2 [ mgr::runStatusSummary all link ]
          set gif     "<img src=\"ball_purple.gif\">\n"
          set tail    "<font color=brown>\n"
          append tail $gif
          append tail "<a href=$url1>\n"
          append tail "<b>Running Job Status Summary at $dom</b></a>\n"
          append tail "<p>\n"
          append tail $gif
          append tail "<a href=$url2>\n"
          append tail "<b>Per API Activity Summary at $dom</b></a>\n"
          if { [ string equal $caller mgr::sHuTdOwN ] } {
               append tail "<p><font color='red'><b>\n"
               append tail "LDAS system at $dom has been shut down.\n"
               append tail "</b><p></font>\n"
          }
          set end "</body></html>"
          set fname [ file join $::LDASLOG APIstatus.html ]
          set fid [ open $fname w 0644 ]
          puts $fid "$head$body$tail$end"
          ::close $fid
     } err ] } {
          if { [ string length $err ] } {
               set subject "$::LDAS_SYSTEM ${::API}API error!"
               set body "[ myName ] error: $err"
               puts stderr $body
               addLogEntry "Subject: ${subject}; Body: $body" mail
          }
     }
}

§   §   §

Name: mgr::apiStatus

Description:
Collect the data about an individual API required to prepare a status report.

Parameters: Usage:

Comments:

proc mgr::apiStatus { api } {
     if { [ catch {
          set retval [ list ]
          ;## set color and state
          set color green
          set state ok
          set mgrhost $::manager(host)
          set host [ set ::${api}(host) ]
          set port [ set ::${api}(operator) ]
          set port [ lindex $port 0 ]
          set status [ mgr::apiSocketStatus $host ]
          foreach { flag msg } $status { break }
          if { $flag } {
               set color red
               set state $msg
               set retval [ list {} {} $state $color ]
               return {}
          }
          set props [ list {} $host $port {} {} {} {} {} {} ]
          set chans [ list {} {} {} {} {} ]
          ;## set props
          if { [ catch {
               ;## don't let the manager announce it is running after
               ;## shutdown!!
               if { [ string equal manager $api ] && \
                        [ string equal mgr::sHuTdOwN [ uplevel 2 myName ] ] } {
                    shutdown
               }
               if { ! [ string equal manager $api ] } {
                    set sid [ sock::open $api emergency ]
                    #set sid [ mgr::connect2API $api emergency ]
               }
          } err ] } {
               set color red
               set state "$host is up, but $api API is not running"
               set retval [ list $props $chans $state $color ]
               return {}
          } else {
               catch { ::close $sid }
          }
          ;## get the asynchornously generated API state file
          set filename [ file join $::LDASLOG ${api}.status ]
          if { [ file exists $filename ] && [ file size $filename ] } {
               foreach [ list props chans ] [ dumpFile $filename ] { break }
          }
          set props [ linsert $props 1 $host $port ]
          set dummy [ list {} {} {} {} {} {} {} {} {} ]
          set size [ llength $props ]
          eval lappend props [ lrange $dummy $size end ]
          set dummy [ list {} {} {} {} {} ]
          set size [ llength $chans ]
          eval lappend chans [ lrange $dummy $size end ]
          foreach { socks files threads events objects } $chans {
               break
          }
          set chans     [ llength $socks ]
          lappend chans [ llength $files ]
          lappend chans [ expr [ llength $threads ] / 3 ]
          lappend chans [ llength $events ]
          lappend chans [ llength $objects ]
          lappend chans [ expr [ join [ split $chans ] + ] ]
          set ::api_status_data($api) \
              [ list $props $chans $state $color ]
     } err ] } {
          catch { ::close $sid }
          if { ! [ string length $err ] } {
               return $retval
          }
          return -code error "[ myName ]: $err"
     }
     return [ list $props $chans $state $color ]
}

§   §   §

Name: mgr::apiSocketStatus

Description:
Minimise time required to determine API status by avoiding repeated calls to sock::diagnostic.
Should make status page generation very fast.

Parameters: Usage:

Comments:

proc mgr::apiSocketStatus { host } {
     if { [ catch {
          set now [ clock seconds ]
          if { [ info exists ::apisocketstatus($host) ] } {
               foreach [ list last status ] \
                   $::apisocketstatus($host) { break }
               if { ($now - $last) > 60 } {
                    set status [ sock::diagnostic $host ]
                    set ::apisocketstatus($host) [ list $now $status ]
               }
          } else {
               set status [ sock::diagnostic $host ]
               set ::apisocketstatus($host) [ list $now $status ]
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $status
}

§   §   §

Name: mgr::removeAPI

Description:
Shutdown an API and remove it from the manager's active list. The change will propagate to all remaining API's and all references to the removed API and it's services will be removed.

Parameters: Usage:

Comments:

The name of the API as given may be apinameAPI or apiname.
proc mgr::removeAPI { api } {
     regexp {(.+)API} $api -> api
     if { [ catch {
          set i [ lsearch $::API_LIST $api ]
          if { $i < 0 } {
               return -code error "not a registered API: '$api'"
          }
          set host [ set ::${api}(host) ]
          mgr::shutdownAPI $api
          mgr::bootstrapPause $api
          set ::API_LIST [ lreplace $::API_LIST $i $i ]
          unset ::${::LDAS_SYSTEM}($api)
          mgr::killPing $api
          set ::removedapis($api) [ array get ::$api ]
          lappend ::removedapis($api) $i
          unset ::$api
          foreach _api $::API_LIST {
               if { [ string equal $_api manager ] } {
                    continue
               }
               set sid [ sock::open $_api emergency ]
               fconfigure $sid -blocking off
               puts $sid "$::MGRKEY \"set ::API_LIST \[ list $::API_LIST \];
unset ::${::LDAS_SYSTEM}($api)\""
               after 10 [ list cmd::receive $sid ${_api}_$sid mgr::logReply ]
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     set dom $::LDAS_SYSTEM
     set subject "$dom ${api}API removed from '$host'"
     set body    "unregistered '$api' API from '$host' at $dom"
     addLogEntry "Subject: ${subject}; Body: $body" email
}

§   §   §

Name: mgr::addAPI

Description:
Register a previously unregistered API into the LDAS system.

Parameters: Usage:

Comments:

If we are adding an API that has not been run during the current incarnation of the manager, we need to assign ports for the new API and the API needs to come up knowing about and able to open it's services. We do this by editing (possibly creating) an API specific resource file in the cwd which the API will use at startup.
also see mgr::removeAPI
proc mgr::addAPI { api host } {
     regexp {(.+)API} $api -> api
     if { [ catch {
          set i [ lsearch $::API_LIST $api ]
          if { $i != -1 } {
               return -code error "could not add already registered API: $api"
          }
          ;## for an API which the system has not seen before,
          ;## ports need to be assigned and all API's need to
          ;## be made aware of them.
          if { [ catch {
               if { [ info exists ::removedapis($api) ] } {
                    set i [ lindex $::removedapis($api) end ]
                    array set ::$api [ lrange $::removedapis($api) 0 end-1 ]
                    unset ::removedapis($api)
                    set ::${api}(host) $host
               } else {
                    set i end
               }
               if { ! [ array exists ::$api ] } {
                    set port [ mgr::lastUsedPort ]
                    foreach service [ list operator emergency data ] {
                         set ::${api}($service) [ incr port ]
                         set ::${api}(host) $host
                    }
               }
               set dir $::env(RUNDIR)/${api}API
               if { ! [ file isdirectory $dir ] } {
                    set dir [ mgr::makeApiAtHome $api ]
               }
               set ::API_LIST [ linsert $::API_LIST $i $api ]
               set ::${::LDAS_SYSTEM}($api) $host
               ;## temporarily modify resource file
               if { ! [ file exists $dir/LDAS$api.rsc ] } {
                    file copy -force \
                        $::LDAS/lib/${api}API/LDAS$api.rsc \
                        $dir/LDAS$api.rsc \
                   } else {
                        file copy -force $dir/LDAS$api.rsc $dir/LDAS$api.tmp
                   }
               set fid [ open $dir/LDAS$api.rsc a+ ]
               foreach service [ list operator emergency data ] {
                    puts $fid "set ::${api}($service) [ set ::${api}($service) ]"
               }
               puts $fid "set ::API_LIST [ list $::API_LIST ]"
               ::close $fid
          } err ] } {
               catch { ::close $fid }
               return -code err "problem updating LDAS$api.rsc"
          }
          foreach _api $::API_LIST {
               if { [ regexp "(manager|$api)" $_api ] } {
                    continue
               }
               set sid [ sock::open $_api emergency ]
               fconfigure $sid -blocking off
               puts $sid "$::MGRKEY \"set ::API_LIST [ list $::API_LIST ];
array set ::$api \{[ array get ::$api ]\};
set ::${::LDAS_SYSTEM}($api) $host\""
               after 10 [ list cmd::receive $sid ${_api}_$sid mgr::logReply ]
          }
          addLogEntry "bootstrapAPI $api" purple
          mgr::bootstrapAPI $api
          ;## clean up modified resource file
          file delete -force $dir/LDAS$api.rsc
          if { [ file exists $dir/LDAS$api.tmp ] } {
               file rename -force $dir/LDAS$api.tmp $dir/LDAS$api.rsc
          }
     } err ] } {
          catch { ::close $sid }
          return -code error "[ myName ]: $err"
     }
     set dom $::LDAS_SYSTEM
     set subject "$dom ${api}API started on '$host'"
     set body    "registered new '$api' API on '$host' at $dom"
     addLogEntry "Subject: ${subject}; Body: $body" email
}

§   §   §

Name: mgr::lastUsedPort

Description:
Return the last port up from ::BASEPORT that the manager has assigned to an API.

Parameters: Usage:

Comments:

proc mgr::lastUsedPort { } {
     if { [ catch {
          set last 0
          ;## get the current highest port number
          foreach api $::API_LIST {
               foreach port { operator emergency data } {
                    if { [ set ::${api}($port) ] > $last } {
                         set last [ set ::${api}($port) ]
                    }
               }
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     set last
}

§   §   §

Name: mgr::updateUserInfo

Description:
Change a user's password and/or email on-the-fly!

Parameters: Usage:

Comments:

proc mgr::updateUserInfo { name password email fullname phone expires { delete 0 } } {
     if { [ catch {
          set valid 0
          set panic $::QUEUE(USERS)
          if { ! $delete } {
               ;## otherwise expires would be changed to integer seconds
               ;## via an uplevel call from mgr::validateUserOptions
               set temp $expires
               mgr::validateUserOptions
               set expires $temp
               ::unset temp
          }
          if { ! [ regexp {^[01]$} $delete ] } {
               set msg "invalid delete flag, must be 0 or 1"
               return -code error msg
          }
          foreach user $::QUEUE(USERS) {
               if { ! [ regexp -- "-name\\s+$name\\s+" $user ] } {
                    lappend tmp $user
               } else {
                    set valid 1
                    if { [ regexp -- "-password\\s+(\S+)\\s+" $user -> hash ] } {
                         if { [ string equal $password [ key::md5 $hash ] ] } {
                              set password $hash
                         }
                    }
               }
          }
          if { $valid } {
               ;## we need to be careful not to expire a user after they
               ;## have received an extension within the last 86400
               ;## seconds of their lifetime. PR#1943
               if { [ info exists ::user_scheduled_to_expire($name) ] } {
                    set afterid $::user_scheduled_to_expire($name)
                    ::unset ::user_scheduled_to_expire($name)
                    after cancel $afterid
               }
               set ::QUEUE(USERS) $tmp
               if { ! $delete } {
                    mgr::addUser $name $password $email $fullname $phone $expires
               } else {
                    mgr::removeUser $name $password $email $fullname $phone $expires
               }
          } else {
               set msg "user '$name' does not exist, use addUser instead!"
               addLogEntry $msg red
               return -code error $msg
          }
     } err ] } {
          set ::QUEUE(USERS) $panic
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::updateOrAddUser

Description:
Iterates over a flat list of {name password email} triplets and updates as possible, returning all errors in a list.

Parameters: Usage:

Comments:

proc mgr::updateOrAddUser { args } {
     if { [ catch {
          set errs [ list ]
          if { [ llength $args ] == 1 } {
               set args [ lindex $args 0 ]
          }
          if { [ llength [ lindex $args 0 ] ] == 6 } {
               set args [ join $args ]
          }
          foreach [ list name password email fullname phone expires ] $args {
               if { [ catch {
                    mgr::updateUserInfo $name $password $email $fullname $phone $expires
               } err ] } {
                    set err1 $err
                    if { [ catch {
                         mgr::addUser $name $password $email $fullname $phone $expires
                    } err ] } {
                         lappend errs $err1 $err
                    }
               }
          }
          if { [ llength $errs ] } {
               return -code error $errs
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::abortJob

Description:
Cause a job to abort when it returns from the API which is currently processing it.
A job can only be stopped by the user who started it.
if the Control and Monitor API wants su priveleges, it can run this command through the emergency port by specifying the jobid to kill TWICE.

Parameters: Usage:

Comments:

proc mgr::abortJob { jobid1 jobid2 } {
     if { [ catch {
          set name [ list ]
          regexp {\d+} $jobid1 jobid1
          regexp {\d+} $jobid2 jobid2
          set jobid1 ${::RUNCODE}$jobid1
          set jobid2 ${::RUNCODE}$jobid2
          set data $::QUEUE(USERS)
          foreach user $data {
               set jobs [ lindex $user end ]
               if { [ regexp -- $jobid1 $jobs ] && \
                        [ regexp -- $jobid2 $jobs ] } {
                    set name [ lindex $user 1 ]
                    if { [ llength [ namespace children :: ::$jobid1 ] ] } {
                         set msg "$jobid1 aborted at user '$name' request"
                         set subject "$jobid1 [ set ::${jobid1}::userSubject ] error!"
                         ::${jobid1}::predelete $subject $msg
                         if { [ llength [ namespace children :: ::$jobid1 ] ] } {
                              ::${jobid1}::seq
                         }
                    } else {
                         set msg "$jobid1 has already been aborted or has finished"
                         addLogEntry $msg blue
                    }
                    break
               }
          }
          if { ! [ string length $name ] } {
               set msg "permission denied"
               addLogEntry $msg red
          }
     } err ] } {
          addLogEntry $err red
     }
     return $msg
}

§   §   §

Name: mgr::hideDirectories

Description:
Creates index.html files with the single entity "<html>" in subdirectories of directories specified in the global variable ::MGR_HIDE_DIRECTORIES. All files and subdirectories directly under the subdirectories with the index.html files written to them will have their world bits set to "0".
The list ::MGR_NOHIDE_DIRECTORIES should be built in the LDASmanager.rsc file.

Parameters: Usage:

Comments:

Note that this DOES NOT modify the directory named as topdir, only the directories and files directly under it.
proc mgr::hideDirectories { } {
     set errs [ list ]
     if { [ catch {
          file attributes $::PUBDIR -permissions 0775
          if { [ info exists ::MGR_NOHIDE_DIRECTORIES ] } {
               foreach { topdir dont_hide } $::MGR_NOHIDE_DIRECTORIES {
                    set dirs [ glob -nocomplain $topdir/* ]
                    foreach dir $dirs {
                         if { [ file isdirectory $dir ] \
                                  && [ lsearch $dont_hide [ file tail $dir ] ] == -1 } {
                              set fname [ file join $dir index.html ]
                              if { ! [ file exists $fname ] } {
                                   if { [ catch {
                                        set fid [ open $fname w 0644 ]
                                        puts $fid "<html>"
                                        ::close $fid
                                        debugPuts "wrote $fname"
                                   } err ] } {
                                        lappend errs $err
                                   }
                              }
                              if { [ catch {
                                   foreach item [ glob -nocomplain $dir/* ] {
                                        if { [ string equal $item $fname ] } {
                                             continue
                                        }
                                        set perms [ file attributes $item -permissions ]
                                        set world [ string range $perms end end ]
                                        if { $world } {
                                             file attributes $item -permissions o-rwx
                                             debugPuts "attributes of $item modified"
                                        }
                                   }
                                   file attributes $fname -permissions 0444
                              } err ] } {
                                   lappend errs $err
                              }
                         }
                    }
               }
          }
          if { [ catch {
               mgr::forcePerms
          } err ] } {
               lappend errs $err
          }
          if { [ llength $errs ] } {
               return -code error $errs
          }
     } err ] } {
          set jobid IDLE
          addLogEntry $err yellow
     }
}

§   §   §

Name: mgr::forcePerms

Description:

Parameters: Usage:

Comments:

proc mgr::forcePerms { } {
     if { [ catch {
          ;## PR 3047 - diskcache API should push hashfile name
          if { [ info exists ::DISKCACHE_HASHFILE_NAME_BINARY ] } {
               set diskcache  $::DISKCACHE_HASHFILE_NAME_BINARY
          } else {
               set diskcache .frame.cache
          }
          if { [ info exists ::DISKCACHE_HASHFILE_NAME_ASCII ] } {
               set adiskcache  $::DISKCACHE_HASHFILE_NAME_ASCII
          } else {
               set adiskcache frame_cache_dump
          }
          set i 0
          foreach fname [ glob -nocomplain $::env(RUNDIR)/*.rsc ] {
               catch { file attributes $fname -permissions 0640 } err$i
          }
          set extradirs [ list savedCores tmp test developers ]
          foreach dir $extradirs {
               if { [ file exists $::env(RUNDIR)/$dir ] } {
                    catch {
                         file attributes $::env(RUNDIR)/$dir -permissions 0700
                    } err[ incr i ]
               }
          }
          foreach fname [ glob -nocomplain $::env(RUNDIR)/* ] {
               set filetype [ fileType $fname ]
               if { [ lsearch $filetype script     ] != -1 || \
                        [ lsearch $filetype executable ] != -1 } {
                    catch {
                         file attributes $fname -permissions 0600
                    } err[ incr i ]
               }
          }
          catch {
               set fid [ open $::env(RUNDIR)/index.html w 0644 ]
               puts $fid "<html>"
               ::close $fid
          } err[ incr i ]
          foreach api $::API_LIST {
               set dir $::env(RUNDIR)/${api}API
               catch {
                    file attributes $dir -permissions 0755
               } err[ incr i ]
               foreach pattern [ list * .??* ] {
                    foreach fname [ glob -nocomplain $dir/$pattern ] {
                         catch {
                              file attributes $fname -permissions 0640
                         } err[ incr i ]
                    }
               }
               set expected [ list ${api}.packages .htaccess index.html \
                                  queuestats bootschema.lam ]
               foreach file $expected {
                    foreach fname [ glob -nocomplain $dir/$file ] {
                         catch {
                              file attributes $fname -permissions 0644
                         } err[ incr i ]
                    }
               }
               if { [ string equal $api diskcache ] } {
                    ;##----------------------------------------------------------
                    ;## Ensure attributes of the frame filename caches
                    ;##----------------------------------------------------------
                    foreach fname [ glob -nocomplain $dir/$diskcache ] {
                         catch {
                              file attributes $fname -permissions 0644
                         } err[ incr i ]
                    }
                    foreach fname [ glob -nocomplain $dir/$adiskcache ] {
                         catch {
                              file attributes $fname -permissions 0644
                         } err[ incr i ]
                    }
               }
          }
          set errors [ list ]
          foreach var [ info vars err* ] {
               if { [ string length [ set $var ] ] } {
                    lappend errors [ set $var ]
               }
          }
          if { [ llength $errors ] } {
               set subject "$::LDAS_SYSTEM LDAS SYSTEM file permission error!"
               set errors [ join $errors "\n" ]
               addLogEntry "Subject: ${subject}; Body: $errors" email
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::addUser

Description:

Parameters: Usage:

Comments:

proc mgr::addUser { name password email fullname phone expires } {
     if { [ catch {
          if { [ regexp -- "-name\\s+$name\\s+" $::QUEUE(USERS) ] } {
               return -code error "user '$name' already exists!"
          }
          set caller [ uplevel myName ]
          #if { [ string equal emergency_callback $caller ] } {
          #   mgr::validateUserOptions
          #}
          set item [ list -name  $name  -password $password \
                         -email $email -fullname $fullname \
                         -phone $phone -expires  $expires {} ]
          mgr::cryptCheck
          ;## write new user info into users.queue
          if { [ file exists $qname ] } {
               if { [ file writable $qname ] } {
                    set data [ dumpFile $qname ]
                    if { $crypt } {
                         set data [ decrypt $data $::MGRKEY ]
                    }
                    bak $qname
                    set fid [ open $qname w ]
                    foreach user [ split $data "\n" ] {
                         if { ! [ regexp -- "-name\\s+$name\\s+" $user ] } {
                              puts $fid $user
                         }
                    }
                    puts $fid $item
                    ::close $fid
                    if { $crypt } {
                         cryptFileInPlace $qname $::MGRKEY
                    }
                    file attributes $qname -permissions 0600
                    addLogEntry "new user added to $qname: '$item'"
               } else {
                    addLogEntry "$qname not writable. User '$name' not added."
               }
          } else {
               return -code error "$qname not found!"
          }
          ;## update manager's ::QUEUE(USERS) list in place
          lappend ::QUEUE(USERS) $item
          addLogEntry "new user added to ::QUEUE(USERS): '$item'"
     } err ] } {
          catch { ::close $fid }
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::removeUser

Description:

Parameters: Usage:

Comments:

proc mgr::removeUser { name password email fullname phone expires } {
     if { [ catch {
          set caller [ uplevel myName ]
          #if { [ string equal emergency_callback $caller ] } {
          #   mgr::validateUserOptions
          #}
          set item [ list \
                         -name $name \
                         -password $password \
                         -email $email \
                         -fullname $fullname \
                         -phone $phone \
                         -expires $expires \
                         {} ]
          mgr::cryptCheck
          ;## write new user info into users.queue
          if { [ file exists $qname ] } {
               if { [ file writable $qname ] } {
                    set data [ dumpFile $qname ]
                    if { $crypt } {
                         set data [ decrypt $data $::MGRKEY ]
                    }
                    set fid [ open $qname w ]
                    foreach user [ split $data "\n" ] {
                         if { ! [ regexp -- "-name\\s+$name\\s+" $user ] } {
                              puts $fid $user
                         }
                    }
                    ::close $fid
                    if { $crypt } {
                         cryptFileInPlace $qname $::MGRKEY
                    }
                    file attributes $qname -permissions 0600
                    addLogEntry "user '$name' removed from '$qname'"
               } else {
                    addLogEntry "$qname not writable. User '$name' not removed."
               }
          } else {
               return -code error "$qname not found!"
          }
     } err ] } {
          catch { ::close $fid }
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::validateUserOptions

Description:

Parameters: Usage:

Comments:

proc mgr::validateUserOptions { } {
     if { [ catch {
          set now [ clock seconds ]
          uplevel set now $now
          set name     [ uplevel set name     ]
          set password [ uplevel set password ]
          set email    [ uplevel set email    ]
          set fullname [ uplevel set fullname ]
          set phone    [ uplevel set phone    ]
          set expires  [ uplevel set expires  ]
          if { ! [ string length $name     ] || \
                   ! [ string length $password ] || \
                   ! [ string length $email    ] || \
                   ! [ string length $fullname ] || \
                   ! [ string length $phone    ] || \
                   ! [ string length $expires  ] } {
               set msg    "all fields must be provided when changing "
               append msg "user info (name password email full_name"
               append msg " phone expiration_date)"
               return -code error $msg
          }
          if { [ regexp {\s+} $name$password$email ] } {
               return -code error "embedded spaces forbidden!"
          }
          if { ! [ regexp {.+@.+\..+} $email ] } {
               return -code error "invalid e-mail: '$email'"
          }
          if { ! [ regexp -nocase {[0-9a-f]{32}} $password ] } {
               set password [ key::md5 $password ]
               uplevel set password $password
          }
          if { ! [ regexp {^\d+$} $expires ] } {
               set expires [ clock scan $expires ]
               uplevel set expires $expires
          }
          if { $expires <= $now } {
               set msg "account expiration given has already passed"
               return -code error $msg
          }
          if { ! [ regexp {\d{3}} $phone ] } {
               set msg "invalid phone number: '$phone'"
               return -code error $msg
          }
          if { [ string length $fullname ] < 4 || \
                   [ llength $fullname ] < 2 } {
               set msg "invalid full user name (full name required):"
               append msg " '$fullname'"
               return -code error $msg
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::cryptCheck

Description:

Parameters: Usage:

Comments:

proc mgr::cryptCheck { args } {
     if { [ catch {
          ;## name of the file containing the user info
          if { [ info exists ::ENCRYPTED_USERS_QUEUE ] } {
               if { [ string equal 1 $::ENCRYPTED_USERS_QUEUE ] } {
                    set qname users.queue.crypt
                    set crypt 1
               } else {
                    set qname users.queue
                    set crypt 0
               }
          } else {
               set qname users.queue
               set crypt 0
          }
          if { $crypt && \
                   [ file exists users.queue ] && \
                   ! [ file exists users.queue.crypt ] } {
               file copy -force users.queue users.queue.crypt
               cryptFileInPlace users.queue.crypt $::MGRKEY
          }
          if { ! [ file exist $qname ] } {
               file copy \
                   [ file join $::LDAS lib managerAPI $qname ] .
               file attributes $qname -permissions 0600
          }
          uplevel set qname $qname
          uplevel set crypt $crypt
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::killJob

Description:
Empty placeholder which garbageCollect can walk over.

Parameters: Usage:

Comments:

proc mgr::killJob { args } {
}

§   §   §

Name: kIlLjOb

Description:
Empty placeholder which garbageCollect can walk over.

Parameters: Usage:

Comments:

proc kIlLjOb { args } {
}

§   §   §

Name: mgr::garbageCollect

Description:
After a request to abort a job there may be objects orphaned in databuckets. This procedure will iterate through the API's and empty the databucket for the job, kill mpi jobs, cancel data conditioning threads, etc.
Relies on the existence of the emptyDataBucket and/or api specific killJob command to work.

Parameters: Usage:

Comments:

proc mgr::garbageCollect { jobid } {
     if { [ catch {
          if { [ info exists ::SYSTEM_SHUTDOWN_IN_PROGRESS ] } {
               set worry 0
          } else {
               set worry 1
          }
          set bootstraps [ list ]
          set errs [ list ]
          foreach _api $::API_LIST {
               set errmsg ""
               if { [ string equal manager $_api ] } { continue }
               ;## try to empty databucket
               if { [ catch {
                    #set sid [ mgr::connect2API $_api emergency $::API_SOCKET_TIMEOUT_MS ]
                    set sid [ sock::open $_api emergency ]
                    fconfigure $sid -blocking off
                    puts $sid "$::MGRKEY kIlLjOb $jobid"
                    ::close $sid
               } err ] } {
                    addLogEntry $err red
                    if { $worry } {
                         lappend bootstraps $_api
                    }
                    lappend errs $err
               }
          }
          ;## we were having a weird problem with errs being "{}"
          if { [ regexp {[a-zA-Z]+} $errs ] } {
               return -code error $errs
          }
          foreach _api $bootstraps {
               ;## don't do it if it's already locked
               if { ! [ info exists ::__bootstraplock_$_api ] } {
                    addLogEntry "bootstrapAPI $_api" purple
                    mgr::bootstrapAPI $_api
               }
          }
     } err ] } {
          catch { ::close $sid }
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::makeFtpDirectory

Description:
Create a directory under ::PUBDIR for users to place data files, scripts, etc. into that will all be owner/group ldas.

Parameters: Usage:

Comments:

proc mgr::makeFtpDirectory { jobid dir } {
     if { [ catch {
          regexp {\d+} $jobid job
          set jobid $::RUNCODE$job
          set dir [ file join $::PUBDIR $dir ]
          if { [ file isdirectory $dir ] } {
               return -code error "'$dir' already exists!"
          }
          if { [ file exists $dir ] } {
               return -code error "a FILE named '$dir' exists!"
          }
          file mkdir $dir
          file attributes $dir -permissions 0777
     } err ] } {
          set err "failed to create directory '$dir': $err"
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::getURLsForJob

Description:

Parameters: Usage:

Comments:

proc mgr::getURLsForJob { jobid { files "" } { i 0 } } {
     if { ! [ llength [ namespace children :: $jobid ] ] } {
          return {}
     }
     if { [ catch {
          if { ! [ string length [ join $files ] ] } {
               set cmd $::QUEUE($jobid,cmd)
               ;## substitute URL's on the input side with
               ;## localised filenames visible to all API's
               set idx  [ lsearch -exact $cmd metaOpts ]
               set opts [ lindex $cmd [ incr idx ] ]
               foreach [ list files opts ] \
                   [ mgr::localiseInputURLs $jobid $opts ] { break }
               set cmd  [ lreplace $cmd $idx $idx $opts ]
               set ::QUEUE($jobid,cmd) $cmd
          }
          set done 1
          ;## we have seen a number of cases where the file
          ;## existed but had zero length due to network
          ;## latency problems.
          foreach file $files {
               if { [ file exists ${file}.lock ] } {
                    set done 0
                    ;## PR #2705 handle the -tarball option!!
               } elseif { [ regexp {\.tar\.(gz|bz2)$} $file ] } {
                    set dir [ file dirname $file ]
                    set unpacked [ unpackTarball $file ]
                    foreach item $unpacked {
                         file delete -force ${dir}/${item}.lock
                    }
               }
          }
          ;## wait for up to 1 hour for data to arrive
          ;## the one second delay code addresses PR# 2415
          if { $done == 0 && $i <= 3600 } {
               if { $i <= 4 } {
                    incr i 1
                    set delay 1000
               } else {
                    incr i 5
                    set delay 5000
               }
               after $delay [ list ::mgr::getURLsForJob $jobid $files $i ]
          } elseif { $done == 0 && $i > 3600 } {
               set msg    "timed out waiting for remote files to "
               append msg "be transferred.  expected '$files' to "
               append msg "be available, but they are not all here."
               return -code error $msg
          } elseif { $done == 1 } {
               foreach file $files {
                    set filetype [ fileType $file ]
                    ;## test for .gwf files that are NOT frames!
                    if { [ string equal .gwf [ file extension $file ] ] && \
                             [ lsearch $filetype frame ] < 0 } {
                         set err "expected file '$file' to be a gravity "
                         append err "wave frame file, but it is '$filetype'"
                         lappend errors $err
                    }
                    ;## test for 404 pages from http retrievals
                    if { [ regexp {html\s+404} $filetype ] } {
                         lappend errors [ dumpFile $file ]
                    }
               }
               ;## if there were 404's, abort
               ;## and return 404 messages to user
               if { [ info exists errors ] } {
                    return -code error $errors
               }
               ;## ready to re-enter the job processing sequence
               after 1 ::${jobid}::seq
          }
     } err ] } {
          set ::${jobid}::errorapi manager
          set subject "$jobid error! URL not found."
          set msg "user specified URL not found:\n'$err'"
          ::${jobid}::predelete $subject $msg
          if { [ llength [ namespace children :: ::$jobid ] ] } {
               ::${jobid}::seq
          }
     }
}

§   §   §

Name: mgr::userQuota

Description:
Using the LDASmanager.rsc variables ::SAME_USER_REQUEST_RATE and ::SAME_USER_REQUESTS_PER_HOUR, throttle requests from indicidual users to a sane level.

Parameters: Usage:

Comments:

proc mgr::userQuota { cmd } {
     if { [ catch {
          set reason [ list ]
          set user   [ list ]
          set now [ clock seconds ]
          ;## parse the important parts from the command
          regexp -nocase -- {-name\s+\{?\s*(\S+)} [ lindex $cmd 1 ] -> user
          set user [ string trim $user "{} " ]
          set cmdname [ lindex [ lindex $cmd 2 ] 0 ]
          set rate   $::SAME_USER_REQUEST_RATE
          set hourly $::SAME_USER_REQUESTS_PER_HOUR
          set qf     $::MANAGER_QUEUE_SIZE_FACTOR
          set queuesize [ llength $::assistant_queue ]
          set maxsize [ expr { $::NUMBER_OF_ASSISTANT_MANAGERS * $qf } ]
          if { $queuesize >= $maxsize } {
               set msg    "The $::LDAS_SYSTEM LDAS system has a limited "
               append msg "queue size of $maxsize jobs.  There are "
               append msg "currently $queuesize jobs in the queue. "
               append msg "Please retry your job in a few minutes."
               set reason queuesize_exceeded
               return $msg
          }
          foreach entry $::QUEUE(USERS) {
               set name [ lindex $entry 1 ]
               if { ! [ string equal $user $name ] } {
                    continue
               }
               set jobs [ lindex $entry end ]
               set last_time [ lindex [ join $jobs ] 2 ]
               if { ! [ regexp {\d+} $last_time ] } {
                    break
               }
               ;## calculate the retry dt
               set dt  [ expr { $now - $last_time } ]
               set rdt [ expr { $rate - $dt } ]
               if { [ clock seconds ] < ($last_time + $rate) } {
                    set msg    "You must wait at least $rate seconds "
                    append msg "between LDAS system requests.  Try "
                    append msg "again in $rdt seconds.  Request Aborted."
                    set reason submission_rate
                    return $msg
               }
               set old [ lindex [ lindex $jobs end-$hourly ] end ]
               if { ! [ regexp {\d+} $old ] } {
                    break
               }
               set somany [ expr { $now - $old } ]
               if { $now < ($old + 3600) } {
                    set msg    "You have exceeded the hourly per-user "
                    append msg "job quota of $hourly jobs/hr.  You are"
                    append msg "forbidden from running another job on "
                    append msg "the $::LDAS_SYSTEM LDAS system for "
                    append msg "$somany seconds.  Request Aborted."
                    set reason hourly_rate
                    return $msg
               }
          }
     } err ] } {
          if { [ string length $reason ] } {
               after 0 \
                   [ list mgr::logRejectedJob $user $cmdname $reason ]
          }
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::logRejectedJob

Description:
Writes data about rejected jobs into rejected_jobs.log
Parameters: Usage:

Comments:

proc mgr::logRejectedJob { user command reason } {
     if { [ catch {
          set time [ gpsTime ]
          set active $::LDASLOG/rejected_jobs.log
          set archive $::LDASARC/jobstats_archive/rejected_jobs.log.$time
          set dir $::LDASARC/jobstats_archive
          if { ! [ file isdirectory $dir ] } {
               file mkdir $dir
               file attributes $dir -permissions 0755
               gifBalls $dir
          }
          set mb [ expr { 1024 * 1024 } ]
          if { [ file exists $active ] } {
               if { [ file size $active ] >= $mb } {
                    file rename $active $archive
               }
          }
          set fid [ ::open $active a+ 0644 ]
          ;## make sure whatever rubbish reaches the cntlmon API
          ;## is really a list! PR#1944
          ::puts $fid [ list $time [ list $command ] $user $reason ]
          ::close $fid
     } err ] } {
          catch { ::close $fid }
          addLogEntry $err red
     }
}

§   §   §

Name: mgr::detectBlockage

Description:
Detects system blockage when all jobs are moribund in a single API, OR more than half are moribund in a single API and the rest have dwelt for 2 times the limit longer in any other API, OR no all jobs have been running at least one hour.

Parameters: Usage:

Comments:

proc mgr::detectBlockage { { test 0 } } {
     if { [ catch {
          set data [ list ]
          set sys $::LDAS_SYSTEM
          ;## force strict boolean evaluation
          if { ! [ regexp {^(1|2)$} $test ] } {
               set test 0
          }
          if { [ info exists ::blockage_reported_at ] } {
               set last $::blockage_reported_at
          } else {
               set last 0
          }
          ;## added at Stuart's request so that ALL stuck jobs
          ;## would unstick even if system was not busy.
          if { [ info exists ::ALWAYS_KILL_BLOCKED_JOBS ] } {
               set always_kill $::ALWAYS_KILL_BLOCKED_JOBS
               if { [ regexp -nocase {^(true|1)$} $always_kill ] } {
                    set always_kill 1
               } else {
                    set always_kill 0
               }
          } else {
               set always_kill 0
          }
          ;## these will be returned with leading "::"
          set running [ namespace children :: ${::RUNCODE}* ]
          set running [ lsort $running ]
          set now [ clock clicks -milliseconds ]
          set N [ llength $running ]
          set limit $::NUMBER_OF_ASSISTANT_MANAGERS
          if { [ info exists ::MGR_IS_BLOCKED_TIMEOUT ] } {
               set timeout $::MGR_IS_BLOCKED_TIMEOUT
          } else {
               ;## default panic timeout is 10 minutes
               set timeout 600
          }
          ;## $limit jobs are possible.
          ;## note that if allways_kill is not set to 1 that
          ;## no jobs will actually be killed unless the system
          ;## is running saturated (all assistant managers assigned).
          if { $N >= $limit || $test || $always_kill } {
               set shortest 100000.0
               foreach job $running {
                    set user [ mgr::job2name $job ]
                    ;## job not actually running right now...
                    if { ! [ info exists ${job}::api ] } { continue }
                    set api  [ set ${job}::api ]
                    lappend score($api) $user
                    set start [ set ${job}::starttime(job) ]
                    ;## possibly less than a millisecond has elapsed and
                    ;## we are at in-between states in the array
                    if { [ catch {
                         set apit0 [ set ${job}::starttime($api) ]
                    } err ] } {
                         set apit0 $now
                    }
                    set job [ string trim $job : ]
                    set runtime [ expr { ($now - $start) / 1000.0 } ]
                    set runtime [ format "%.2f" $runtime ]
                    set apitime [ expr { ($now - $apit0) / 1000.0 } ]
                    set apitime [ format "%.2f" $apitime ]
                    if { $apitime < $shortest } { set shortest $apitime }
                    set command [ set ${job}::usrcmdname ]
                    lappend data [ list $job $command $api $user $apitime ]
                    ;## *** real kill occurs here ***
                    mgr::abortBlockedJob $job $api $apitime
               }
               ;## do not report more often than once per hour
               set now [ clock seconds ]
               if { $shortest >= $timeout && $N > 1 || $test } {
                    set apis [ array names score ]
                    ;## all currently running jobs are stuck in one API
                    if { [ llength $apis ] == 1 || $test == 1 } {
                         set api [ lindex $apis 0 ]
                         set n [ llength $score($api) ]
                         set subject "${sys}: $n job(s) stalled in ${api}API"
                         set body [ mgr::activeJobStatusSummary text ]
                         set msg "Subject: ${subject}; Body: $body"
                         if { $last < ($now - 3600) || $test } {
                              addLogEntry $msg mail
                              set ::blockage_reported_at $now
                         }
                    } elseif { $shortest >= (2 * $timeout) || $test == 2 } {
                         foreach api $apis {
                              set n [ llength $score($api) ]
                              if { $N > 1 && $n >= ($N / 2) || $test } {
                                   set subject "${sys}: $n job(s) stalled in ${api}API"
                                   set body [ mgr::activeJobStatusSummary text ]
                                   set msg "Subject: ${subject}; Body: $body"
                                   if { $last < ($now - 3600) || $test } {
                                        addLogEntry $msg mail
                                        set ::blockage_reported_at $now
                                   }
                                   break
                              }
                         }
                    }
                    ;## end of shortest/timeout comparison
               } else {
                    mgr::reportOversizeQueue
               }
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::abortBlockedJob

Description:
Determine whether a job has exceeded an absolute limit for dwell time in a single API, and panic, aborting the job if it has.
Optionally, an email message can be sent after 1/2 the timeout period has elapsed, warning of pending timeouts and given an administrator advance warning so that the failure mode can be analyzed before the job is cleaned up.

Parameters: Usage:
if ::MANAGER_ABORT_AFTER_N_SECONDS_IN_ONE_API is defined
then the default  timeout  period  for all API's is that
value.

if it is NOT defined, the default timeout period for all API's defaults to 15000 seconds (4 hours).
additionally, if a variable is defined according to the pattern:
::MANAGER_ABORT_AFTER_N_SECONDS_IN_${API}_API NOTE THAT THE API NAME MUST BE ALL CAPS!!
then the timeout period for the API named in the next to the last underscore seperated field in that variable name will be equal to the value of that variable, in seconds.
Comments:
proc mgr::abortBlockedJob { jobid api runtime } {
     if { [ catch {
          set Api [ string toupper $api ]
          if { [ info exists ::MANAGER_ABORT_AFTER_N_SECONDS_IN_${Api}_API ] } {
               set control ::MANAGER_ABORT_AFTER_N_SECONDS_IN_${Api}_API
               set timeout [ set ::MANAGER_ABORT_AFTER_N_SECONDS_IN_${Api}_API ]
          } elseif { [ info exists ::MANAGER_ABORT_AFTER_N_SECONDS_IN_ONE_API ] } {
               set control ::MANAGER_ABORT_AFTER_N_SECONDS_IN_ONE_API
               set timeout $::MANAGER_ABORT_AFTER_N_SECONDS_IN_ONE_API
          } else {
               set timeout 15000
          }
          ;## if the user has provided the -usertimeout option for this
          ;## job, make use of it.
          if { [ info exists ::${jobid}::usertimeout ] } {
               set timeout [ set ::${jobid}::usertimeout ]
          }
          if { [ catch {
               set timeout [ expr int($timeout) ]
          } err ] } {
               set err "non-numerical value given for ${control}: '$timeout'\n"
               append err "this value can be defined in the LDASapi.rsc OR\n"
               append err "the LDASmanager.rsc, with the manager's resource\n"
               append err "file overriding the system resource file if there\n"
               append err "is a conflict."
               return -code error $err
          }
          if { [ info exists ::MANAGER_SHOULD_ABORT_STUCK_JOBS ] } {
               set test $::MANAGER_SHOULD_ABORT_STUCK_JOBS
               if { [ regexp -nocase {(T|TRUE)} $test ] } {
                    set ::MANAGER_SHOULD_ABORT_STUCK_JOBS 1
               } elseif { [ regexp -nocase {(F|FALSE)} $test ] } {
                    set ::MANAGER_SHOULD_ABORT_STUCK_JOBS 0
               } elseif { ! [ string equal 1 $test ] } {
                    set ::MANAGER_SHOULD_ABORT_STUCK_JOBS 0
               }
          } else {
               set ::MANAGER_SHOULD_ABORT_STUCK_JOBS 0
          }
          if { [ info exists ::WARN_ABOUT_POSSIBLY_STUCK_JOBS ] } {
               set test $::WARN_ABOUT_POSSIBLY_STUCK_JOBS
               if { [ regexp -nocase {(T|TRUE)} $test ] } {
                    set ::WARN_ABOUT_POSSIBLY_STUCK_JOBS 1
               } elseif { [ regexp -nocase {(F|FALSE)} $test ] } {
                    set ::WARN_ABOUT_POSSIBLY_STUCK_JOBS 0
               } elseif { ! [ string equal 1 $test ] } {
                    set ::WARN_ABOUT_POSSIBLY_STUCK_JOBS 0
               }
          } else {
               set ::WARN_ABOUT_POSSIBLY_STUCK_JOBS 0
          }
          if { ! [ info exists ::stuckjobwarnings ] } {
               set ::stuckjobwarnings [ list ]
          }
          if { [ info exist ::${jobid}::userSubject ] } {
               set userSubject [ set ::${jobid}::userSubject ]
          } else {
               set userSubject ""
          }
          if { [ string equal 1 $::MANAGER_SHOULD_ABORT_STUCK_JOBS ] &&
               ( $timeout > 0 ) } {
               if { $runtime > $timeout } {
                    set subject "$jobid $userSubject runtime error!"
                    set body    "$jobid error!\naborted after $runtime "
                    append body "seconds in ${api}API.\n"
                    append body "You have encountered an LDAS system bug!\n"
                    append body "This condition will be automatically\n"
                    append body "reported to the developer who is\n"
                    append body "responsible for maintaining the $api API.\n"
                    append body "User command $jobid aborting!"
               } elseif { $::WARN_ABOUT_POSSIBLY_STUCK_JOBS } {
                    if { [ info exists ::stuckjobwarnings ] && \
                             [ lsearch $::stuckjobwarnings $jobid ] == -1 && \
                             $runtime > $timeout / 2 } {
                         set now \
                             [ clock format [ clock seconds ] -format "%D-%T %Z" ]
                         set subj "$jobid $userSubject running in ${api}API for "
                         append subj "$runtime secs."
                         set body    "At $now $jobid had been running in the\n"
                         append body "${api}API for $runtime seconds.\n"
                         append body "If this job is still in the $api API "
                         append body "after $timeout seconds,\nit will be consid"
                         append body "ered moribund and will be aborted by the"
                         append body " system.\nThis will be the only warning "
                         append body "sent before the job is aborted."
                         addLogEntry "Subject: ${subj}; Body: $body" email
                         lappend ::stuckjobwarnings $jobid
                         set N $::NUMBER_OF_ASSISTANT_MANAGERS
                         set ::stuckjobwarnings \
                             [ lrange $::stuckjobwarnings end-$N end ]
                    }
               }
          }
          ;## The full, correct abort sequence
          if { [ info exists subject ] } {
               if { [ llength [ namespace children :: ::$jobid ] ] } {
                    set ::${jobid}::errorapi $api
                    ::${jobid}::predelete $subject $body email
                    if { [ llength [ namespace children :: ::$jobid ] ] } {
                         ::${jobid}::seq
                    }
               }
          }
     } err ] } {
          if { [ string length $err ] } {
               set subject "$::LDAS_SYSTEM managerAPI error!"
               set body "[ myName ]: $err"
               addLogEntry "Subject: ${subject}; Body: $body" email
          }
     }
}

§   §   §

Name: mgr::reportOversizeQueue

Description:

Parameters: Usage:

Comments:

proc mgr::reportOversizeQueue { { test 0 } } {
     if { [ catch {
          set now [ clock seconds ]
          set qf $::MANAGER_QUEUE_SIZE_FACTOR
          ;## force strict boolean evaluation
          if { ! [ string equal 1 $test ] } {
               set test 0
          }
          if { [ info exists ::oversized_queue_reported_at ] } {
               set last $::oversized_queue_reported_at
          } else {
               set last 0
          }
          if { $last < ($now - 3600) || $test } {
               set size [ llength $::assistant_queue ]
               set max [ expr { $::NUMBER_OF_ASSISTANT_MANAGERS * $qf } ]
               if { $size >= $max || $test } {
                    set sys $::LDAS_SYSTEM
                    set subject "${sys}: $size jobs are currently queued"
                    set body [ mgr::activeJobStatusSummary text ]
                    set msg "Subject: ${subject}; Body: $body"
                    set ::oversized_queue_reported_at $now
                    addLogEntry $msg mail
               }
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::recryptWithNewKey

Description:

Parameters: Usage:

Comments:

proc mgr::recryptUsersWithNewKey { old new } {
     if { [ catch {
          set data [ dumpFile users.queue.crypt ]
          set data [ decrypt $data $old ]
          set data [ encrypt $data $new ]
          set fid  [ open users.queue.crypt.NEW w ]
          puts $fid $data
          ::close $fid
     } err ] } {
          catch { ::close $fid }
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::removeExpiredUsers

Description:

Parameters: Usage:

Comments:

proc mgr::findExpiredUsers {  } {
     if { [ catch {
          set errs [ list ]
          set system $::LDAS_SYSTEM
          set now  [ clock seconds ]
          set oneday [ expr { 24 * 3600 } ]
          set tomorrow [ expr { $now + $oneday } ]
          set midnight [ clock format $tomorrow -format %D ]
          set midnight [ clock scan $midnight ]
          set nextweek [ expr { $now + ( 7 * $oneday ) } ]
          set nextweek [ clock format $nextweek -format %D ]
          set nextweek [ clock scan $nextweek ]
          if { ! [ info exists ::LDAS_ACCOUNT_APPLICATION_URL ] || \
                   ! [ string length $::LDAS_ACCOUNT_APPLICATION_URL ] } {
               set ::LDAS_ACCOUNT_APPLICATION_URL \
                   https://www.ldas-sw.ligo.caltech.edu/passwordRequest/ldaspassword.cgi
          }
          foreach user $::QUEUE(USERS) {
               set msg [ list ]
               set subject [ list ]
               set body [ list ]
               set expires [ lindex $user 11 ]
               set email   [ lindex $user 5  ]
               set name    [ lindex $user 7  ]
               set user    [ lindex $user 1  ]
               if { [ string equal NULL $user ] } { continue }
               if { [ catch {
                    if { ! [ regexp {^\d+$} $expires ] } {
                         set expires [ clock scan $expires ]
                    }
               } err ] } {
                    lappend errs "user: '$user' has bogus expiration: '$expires'"
                    continue
               }
               if { [ catch {
                    if { $expires <= $midnight } {
                         foreach [ list subject body msg ] \
                             [ mgr::expiresTomorrow $user $name $email $expires $now ] \
                             { break }
                    } elseif { $expires <= $nextweek } {
                         foreach [ list subject body ] \
                             [ mgr::expiresNextWeek $user $expires ] { break }
                    }
               } err ] } {
                    lappend errs "user: '$user' error: $err"
               }
               if { [ string length $msg ] } {
                    addLogEntry "Subject: ${subject}; Body: $msg" email
               }
               if { [ string length $body ] } {
                    if { [ catch {
                         mailTo $email $subject $body
                    } err ] } {
                         lappend errs "user: '$user' error: $err"
                    }
               }
          }
          if { [ string length [ join $errs ] ] } {
               return -code error $errs
          }
     } err ] } {
          addLogEntry $err email
     }
}

§   §   §

Name: mgr::expiresNextWeek

Description:

Parameters: Usage:

Comments:

proc mgr::expiresNextWeek { user expires } {
     if { [ catch {
          set system $::LDAS_SYSTEM
          set subject "LDAS $system user account '$user' expires soon!"
          set body    "Your LDAS $system user account: '$user'\n"
          append body "will expire at [ clock format $expires ].\n\n"
          append body "You need to apply for an extension using the\n"
          append body "web form at $::LDAS_ACCOUNT_APPLICATION_URL"
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return [ list $subject $body ]
}

§   §   §

Name: mgr::expiresTomorrow

Description:

Parameters: Usage:

Comments:

proc mgr::expiresTomorrow { user name email expires now } {
     if { [ catch {
          set system $::LDAS_SYSTEM
          set subject "LDAS $system user account '$user' about to expire!"
          set body    "Your LDAS $system user account: '$user'\n"
          append body "expires at [ clock format $expires ].\n\n"
          append body "You need to apply for an extension using the\n"
          append body "web form at $::LDAS_ACCOUNT_APPLICATION_URL"
          set delay [ expr { $expires - $now } ]
          if { $delay > 60 } {
               set delay [ expr { $delay * 1000 } ]
          } else {
               set delay 0
          }
          if { $delay } {
               set ::user_scheduled_to_expire($user) \
                   [ after $delay \
                         [ list mgr::updateUserInfo $user 1 1 1 1 1 1 ] ]
          } else {
               mgr::updateUserInfo $user 1 1 1 1 1 1
          }
          set msg    "$system user account: '$user' expires "
          append msg "at [ clock format $expires ].\n"
          append msg "account belongs to '$name'.\n"
          append msg "Last known email address was: '$email'\n\n"
          append msg "user has been notified via email of\n"
          append msg "account status."
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return [ list $subject $body $msg ]
}

§   §   §

Name: mgr::throttlePipelines

Description:
Uses resource variable ::THROTTLE_AT_N_PIPELINES to stop spawning assistants if simultaneous number of pipelines active in a particular API exceeds a value.

Parameters: Usage:

Comments:

proc mgr::throttlePipelines { cmd } {
     set F 0
     set M 0
     set D 0
     set W 0
     set L 0
     if { [ info exists ::STOP_PROCESSING_JOBS_NO_DISK_SPACE ] } {
          if { $::STOP_PROCESSING_JOBS_NO_DISK_SPACE == 1 } {
               set msg "disk is full, job processing has been suspended."
               addLogEntry $msg red
               return 1
          }
     }
     foreach [ list PASS Throttles ] \
         [ mgr::throttlePipelinesAtApi ] { break }
     ;## short circuit if no throttles are defined
     if { $PASS == [ llength $::API_LIST ] } { return 0 }
     array set throttle $Throttles
     if { [ catch {
          set assistants [ namespace children :: ${::RUNCODE}* ]
          ;## iterate over RUNNING user commands
          foreach assistant $assistants {
               set command [ info vars ${assistant}::usrcmdname ]
               if { ! [ string length $command ] } { continue }
               set command [ set $command ]
               set api [ set ${assistant}::api ]
               if { [ regexp -nocase {(diskcache|frame)} $api ] } {
                    incr F
               } elseif { [ regexp -nocase {(eventmon|metadata)} $api ] } {
                    incr M
               } elseif { [ regexp -nocase {(datacond)} $api ] } {
                    incr D
               } elseif { [ regexp -nocase {(mpi)} $api ] } {
                    incr W
               } elseif { [ regexp -nocase {(ligolw)} $api ] } {
                    incr L
               }
          }
          if { [ info exists ::DEBUG_PIPELINE_THROTTLE ] && \
                   [ string equal 1 $::DEBUG_PIPELINE_THROTTLE ] } {
               if { [ info exist throttle(frame) ] && $F >= $throttle(frame) } {
                    set msg "$F user commands currently in diskcache/frame."
                    addLogEntry $msg purple
               }
               if { [ info exist throttle(metadata) ] && $M >= $throttle(metadata) } {
                    set msg "$M user commands currently in metadata/eventmon."
                    addLogEntry $msg purple
               }
               if { [ info exist throttle(datacond) ] && $D >= $throttle(datacond) } {
                    set msg "$D user commands currently in datacond."
                    addLogEntry $msg purple
               }
               if { [ info exist throttle(mpi) ] && $W >= $throttle(mpi) } {
                    set msg "$W user commands currently in mpi/wrapper."
                    addLogEntry $msg purple
               }
               if { [ info exist throttle(ligolw) ] && $L >= $throttle(ligolw) } {
                    set msg "$L user commands currently in ligolw."
                    addLogEntry $msg purple
               }
          }
          if { ([ info exist throttle(frame) ] && $F >= $throttle(frame) )    || \
                   ([ info exist throttle(metadata) ] && $M >= $throttle(metadata) )   || \
                   ([ info exist throttle(mpi) ] && $W >= $throttle(mpi) )     || \
                   ([ info exist throttle(ligolw) ] && $L >= $throttle(ligolw) ) } {
               set flag 1
          } elseif { ([ info exist throttle(datacond) ]) && $D >= $throttle(datacond) } {
               set rx {(conditionData|dataStandAlone|dataPipeline)}
               if { [ regexp -nocase $rx $cmd ] } {
                    set flag 1
               } else {
                    set flag 0
               }
          } else {
               set flag 0
          }
          ;## and be a little looser...
          ;## if frame is busy, and metadata is not
          if { ([ info exist throttle(metadata) ] && $M < $throttle(metadata)) && \
                   ([ info exist throttle(frame) ] && $F >= $throttle(frame)) } {
               ;## when it's a database action
               if { [ regexp {MetaData} $cmd ] } {
                    set flag 0
               }
               ;## when it's definitely gone on ahead of the frame
               if { [ regexp {(mpi|datacond|eventmon|metadata|ligolw)} $api ] } {
                    set flag 0
               }
               ;## if it's the metadata that's jammed, maybe a frame job
               ;## can get processed
          } elseif { ([ info exist throttle(frame) ] && $F < $throttle(frame)) && \
                         ([ info exist throttle(metadata) ] && $M >= $throttle(metadata)) } {
               if { [ regexp {(diskcache|frame)} $api ] } {
                    set flag 0
               }
          }
          if { [ regexp {(getFrameCache|cacheGetFile)} $cmd ] } {
               set flag 0
          }
     } err ] } {
          if { [ string length $err ] } {
               addLogEntry $err red
          }
          set flag 0
     }
     return $flag
}

§   §   §

Name: mgr::throttlePipelinesAtApi

Description:

Parameters: Usage:
If a variable is defined in the manager API resource
file which matches the pattern:

::THROTTLE_PIPELINES_AT_XXX_API Where 'XXX' is replaced by the name of an LDAS API with the name rendered entirely in capital letters, i.e. 'FRAME', then the the value of the variable will be used to limit the number of simultaneously running jobs in that API.
Comments:
proc mgr::throttlePipelinesAtApi { args } {
     if { [ catch {
          set apis $::API_LIST
          foreach api $apis {
               set Api [ string toupper $api ]
               set PASS 0
               if { [ string equal DATACOND $Api ] && \
                        [ info exists ::DATACOND_MEMORY_LOCK ] && \
                        $::DATACOND_MEMORY_LOCK == 1 } {
                    set throttle($api) 0
                    continue
               }
               if { [ info exists ::THROTTLE_PIPELINES_AT_${Api}_API ] } {
                    set throttle($api) \
                        [ set ::THROTTLE_PIPELINES_AT_${Api}_API ]
                    if { ! [ regexp {^\d+$} $throttle($api) ] || \
                             $throttle($api) >= $::NUMBER_OF_ASSISTANT_MANAGERS } {
                         set throttle($api) $::NUMBER_OF_ASSISTANT_MANAGERS
                         incr PASS
                    }
               } else {
                    set throttle($api) $::NUMBER_OF_ASSISTANT_MANAGERS
                    incr PASS
               }
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return [ list $PASS [ array get throttle ] ]
}

§   §   §

Name: mgr::promoteJobs

Description:
First pass at this just promotes one job, effectively letting subsequent jobs pass the job at the top of the queue until the top job can be run.

Parameters: Usage:

Comments:

What this needs to do is find the next job with a higher i.d. than the one in position 0 and promote that one.
proc mgr::promoteJobs { pipelinethrottle } {
     if { [ catch {
          if { $pipelinethrottle } {
               if { [ llength $::assistant_queue ] > 1 } {
                    set promote [ lindex $::assistant_queue 1 ]
                    set ::assistant_queue \
                        [ lreplace $::assistant_queue 1 1 ]
                    set ::assistant_queue \
                        [ linsert $::assistant_queue 0 $promote ]
               }
          }
     } err ] } {
          addLogEntry $err red
     }
}

§   §   §

Name: mgr::memUsage

Description:
Manage memory usage of all API's. Try to determine when an API needs to be restarted based on excessive memory or cpu usage.

Parameters: Usage:

Comments:

proc mgr::memUsage { args } {
}

§   §   §

Name: mgr::scanNFSErrors

Description:
Manager proc for driving the remote API's to examine the system logs for indications of NFS problems.

Parameters: Usage:
Parses the output from:

dmesg |grep -i nfs - on Solaris grep -i nfs /var/log/messages - on Linux To detect NFS errors which may be adversely affecting the LDAS system.
Comments:
proc mgr::scanNFSErrors { args } {
     if { [ catch {
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::logReply

Description:

Parameters: Usage:

Comments:

proc mgr::logReply { args } {
     if { [ catch {
          set var [ lindex $args 0 ]
          regexp {[^:_]+}  $var api
          regexp {sock\d+} $var cid
          catch { ::close $cid }
          set retval [ set $var ]
          catch { ::unset $var }
          if { [ string length $retval ] } {
               addLogEntry "${api}API: '$retval'" blue
          }
     } err ] } {
          catch { ::close $cid }
          catch { ::unset $var }
          addLogEntry $err email
     }
}

§   §   §

Name: mgr::validateUserPassword

Description:
Called via the emergency socket by the cntlmon API.
Returns "1" if the user info validates, "0" if not.

Parameters: Usage:
arguments are the normal userinfo items:

-name bob_dobbs -password $combined_hash ...
Comments:
Returns an error if the queue is corrupt or if the users MOU has expired.
proc mgr::validateUserPassword { args } {
     if { [ llength $args ] == 1 } {
          set args [ lindex $args 0 ]
     }
     if { [ catch {
          set bool 0
          ;## this will set salt and digest via uplevel
          regsub {md5Protocol} $args md5protocol args
          ;## this returns -password **** -name **** -md5salt ****
          ;## only name is used here
          set cmd [ cmd::md5Unpack $args ]
          array set input $cmd
          foreach user $::QUEUE(USERS) {
               if { ! [ llength $user ] } { continue }
               if { [ llength $user ] != 13 } {
                    set msg "QUEUE(USERS) has been corrupted!"
                    addLogEntry $msg 2
                    return -code error "[ myName ]: $msg"
               }
               array set q [ lrange $user 0 end-1 ]
               if { [ string equal $q(-name) $input(-name) ] } {
                    set test [ key::md5 $q(-password)$salt ]
                    if { [ string equal $digest $test ] } {
                         set bool 1
                    }
                    if { ! [ regexp {^\d+$} $q(-expires) ] } {
                         set q(expires) [ clock scan $q(-expires) ]
                    }
                    if { [ clock seconds ] > $q(-expires) } {
                         set edate [ clock format $q(-expires) -format %D ]
                         set msg "MOU for user $q(-name) expired on ${edate}. "
                         append msg "Please contact LDAS admin to apply for "
                         append msg "an extension."
                         return -code error "[ myName ]: $msg"
                    }
               }
          } ;## end of foreach
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $bool
}

§   §   §

Name: mgr::getDiskUsageInfo

Description:

Parameters: Usage:

Comments:

proc mgr::getDiskUsageInfo { args } {
     if { [ catch {
          set data [ exec df -m [ pwd ] ]
          set data [ lrange [ join [ split $data ] ] end-5 end ]
          foreach [ list dev size used avail percent mount ] $data { break }
          regexp {\d+} $percent percent
          set data [ list $dev $size $used $avail $percent $mount ]
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     return $data
}

§   §   §

Name: mgr::throttleOnDiskUsage

Description:
Should be called hourly. Keeps three days of disk usage statistics.

Parameters: Usage:

Comments:

proc mgr::throttleOnDiskUsage { args } {
     if { [ catch {
          set report   0
          set stopjobs 0
          set api $::API
          if { ! [ info exists ::REPORT_DISK_GETTING_FULL_USAGE_PERCENT ] } {
               set ::REPORT_DISK_GETTING_FULL_USAGE_PERCENT 90
          }
          if { ! [ info exists ::STOP_ACCEPTING_JOBS_DISK_USAGE_PERCENT ] } {
               set ::STOP_ACCEPTING_JOBS_DISK_USAGE_PERCENT 97
          }
          set reportpercent $::REPORT_DISK_GETTING_FULL_USAGE_PERCENT
          set nojobspercent $::STOP_ACCEPTING_JOBS_DISK_USAGE_PERCENT
          set now [ gpsTime ]
          set ::diskusage($now) [ mgr::getDiskUsageInfo ]
          set times [ lsort -decreasing -integer [ array names ::diskusage ] ]
          foreach time [ lrange $times 72 end ] {
               unset ::diskusage($time)
          }
          set times  [ lrange $times 0 71 ]
          set fid [ open ${::PUBDIR}/disk.usage w 0644 ]
          foreach time $times {
               puts $fid "$time $::diskusage($time)"
          }
          ::close $fid
          set points [ llength $times ]
          foreach [ list dev size used avail percent mount ] \
              $::diskusage($now) { break }
          if { $percent >= $nojobspercent } {
               set report 1
               set stopjobs 1
          } elseif { $percent >= $reportpercent } {
               set report 1
          } elseif { $percent < $reportpercent } {
               ;## if we were previously in an out-of-space condition
               if { [ info exists ::STOP_PROCESSING_JOBS_NO_DISK_SPACE ] && \
                        $::STOP_PROCESSING_JOBS_NO_DISK_SPACE == 1 } {
                    set report   1
                    set points  -1
                    set stopjobs 0
               }
          }
          if { $report } {
               set subject "$::LDAS_SYSTEM disk usage at ${percent}%!"
               if { $points > 1 } {
                    set j [ expr { $points - 1 } ]
                    set old [ set ::diskusage([ lindex $times $j ]) ]
                    set old [ lindex $old 2 ]
                    set dMb [ expr { ($used - $old) / double($j) } ]
                    set dMb [ format %.1f $dMb ]
                    if { $dMb > 0 } {
                         set dead [ expr { int($avail / $dMb) } ]
                         set mod  [ expr { $dead % 24 } ]
                         set dead [ expr { $dead / 24 } ]
                         set report "Disk usage by LDAS has reached ${percent}%.\n"
                         append report "Disk usage has been increasing at an\n"
                         append report "average rate of $dMb Mb per hour for "
                         append report "$points hours.\nAt this rate $dev will be"
                         append report " full in\n$dead days $mod hours!\n"
                         append report "When disk usage reaches "
                         append report "${nojobspercent}%, the manager\n"
                         append report "will stop accepting jobs until disk space"
                         append report "\nis cleared!"
                    } elseif { $dMb == 0 } {
                         set report "Disk usage by LDAS has reached ${percent}%.\n"
                         append report "When disk usage "
                         append report "reaches ${nojobspercent}%, the manager\n"
                         append report "will stop accepting jobs until disk space"
                         append report "\nis cleared!"
                    } elseif { $dMb < 0 } {
                         set report "Disk usage by LDAS has reached ${percent}%.\n"
                         append report "Disk space is being freed at an "
                         append report "average\nrate of $dMb Mb per hour for "
                         append report "$points hours."
                    }
               } elseif { $points == 0 } {
                    # just started, no history data available!!
                    set report "you should shutdown your LDAS system and clear\n"
                    append report "some disk space before trying to run LDAS!\n"
                    append report "perhaps you should archive some of the job\n"
                    append report "directories under ${::PUBDIR}.  the manager\n"
                    append report "will stop accepting jobs when disk usage "
                    append report "reaches ${nojobspercent}%, and will not\n"
                    append report "resume processing until disk space is freed."
               } else {
                    set report "Disk usage has fallen to ${percent}%.\n"
                    append report "Job processing will resume NOW."
               }
               if { $stopjobs } {
                    set report    "manager API cannot process any more jobs\n"
                    append report "until some disk space is freed on ${dev}\n"
                    append report "AND *either* the disk usage falls below\n"
                    append report "the value defined by the flag\n"
                    append report "::REPORT_DISK_GETTING_FULL_USAGE_PERCENT\n"
                    append report "(currently set to ${reportpercent}%) *or*\n"
                    append report "the status flag "
                    append report "::STOP_PROCESSING_JOBS_NO_DISK_SPACE\n"
                    append report "is set to '0' in the manager API by an\n"
                    append report "admin user via the control and monitor API."
                    set ::STOP_PROCESSING_JOBS_NO_DISK_SPACE 1
                    after 60000 mgr::activelyMonitorDiskUsage
               } elseif { [ string equal manager $api ] && ! $stopjobs } {
                    if { [ info exists ::STOP_PROCESSING_JOBS_NO_DISK_SPACE ] } {
                         set ::STOP_PROCESSING_JOBS_NO_DISK_SPACE 0
                    }
               }
               addLogEntry "Subject: ${subject}; Body: $report" email
          }
     } err ] } {
          addLogEntry $err red
     }
}

§   §   §

Name: mgr::activelyMonitorDiskUsage

Description:
Monitor that checks disk usage during cleanup phase and re-enables manager processing when disk usage falls below reporting level.

Parameters: Usage:

Comments:

proc mgr::activelyMonitorDiskUsage { args } {
     if { [ catch {
          if { $::STOP_PROCESSING_JOBS_NO_DISK_SPACE == 1 } {
               foreach [ list dev size used avail percent mount ] \
                   [ mgr::getDiskUsageInfo ] { break }
               if { $percent <= $::REPORT_DISK_GETTING_FULL_USAGE_PERCENT } {
                    if { [ array exists ::diskusage ] } {
                         ::unset ::diskusage
                    }
                    set ::STOP_PROCESSING_JOBS_NO_DISK_SPACE 0
               } else {
                    after 15000 mgr::activelyMonitorDiskUsage
               }
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
}

§   §   §

Name: mgr::liveJobListPush

Description:

Parameters: Usage:

Comments:

proc mgr::liveJobListPush { { apis frame } args } {
     if { [ info exists ::jobid ] && \
              [ string equal SHUTDOWN $::jobid ] } {
          return
     }
     set data [ namespace children :: ::${::RUNCODE}* ]
     set data [ lsort -dictionary $data ]
     if { [ info exists ::LIVE_JOB_PUSH_API_LIST ] } {
          set apis $::LIVE_JOB_PUSH_API_LIST
     }
     foreach api $apis {
          if { ! [ regexp $api $::API_LIST ] || ! [ info exist ::[ string toupper $api]_API_HOST ] } {
               continue
          }
          if { [ catch {
               set sid [ sock::open $api emergency ]
               fconfigure $sid -blocking off
               puts $sid \
                   "$::MGRKEY set ::SORTED_LIVE_JOB_LIST_REPORT [ list $data ]"
               ::close $sid
          } err ] } {
               catch { ::close $sid }
               addLogEntry "api: $api error: $err" red
          }
     }
     return $data
}

§   §   §

Name: mgr::reportWrongUser

Description:

Parameters: Usage:

Comments:

proc mgr::reportWrongUser { args } {
     if { [ info exists ::AUTHORIZED_LDAS_USER ] && \
              [ string length $::AUTHORIZED_LDAS_USER ] } {
          set authorized $::AUTHORIZED_LDAS_USER
          set user $::env(USER)
          if { ! [ string equal $authorized $user ] } {
               set subject "Attempt to start $::LDAS_SYSTEM by "
               append subject "unauthorised user!"
               set report "User '$user' is attempting to start LDAS,\n"
               append report "but the LDASmanager.rsc file declares\n"
               append report "the variable ::AUTHORIZED_LDAS_USER to be\n"
               append report "'${::AUTHORIZED_LDAS_USER}.'  Please log in\n"
               append report "as the correct user or change the value\n"
               append report "of ::AUTHORIZED_LDAS_USER to be the name\n"
               append report "of the user that is permitted to run LDAS."
               catch {
                    addLogEntry "Subject: ${subject}; Body: $report" email
               }
               puts stderr $report
               exit
          }
     }
}

§   §   §

Name: mgr::reportWrongHost

Description:

Parameters: Usage:

Comments:

proc mgr::reportWrongHost { args } {
     set ip $::MY_IP
     set user $::env(USER)
     set resource $::MANAGER_API_HOST
     set running $resource
     set interfaces [ list 127.0.0.1 $running ]
     catch { set interfaces [ ifConfig ] }
     ;## try to figure out where we are really running
     set mars_rx {^(10\.|192\.168\.|127\.0\.0\.)}
     foreach interface $interfaces {
          foreach [ list if ip running ] $interface { break }
          if { [ regexp -nocase -- $resource $running ] } {
               break
          }
     }
     puts stderr "manager host per ::MANAGER_API_HOST: '$resource'"
     puts stderr "manager host per system info: '$running'"
     if { ! [ regexp -nocase -- $resource $running ] } {
          set subject "$::LDAS_SYSTEM $::API API running on wrong host!"
          set report "The Manager API on $::LDAS_SYSTEM seems to\n"
          append report "be running on '$interfaces', but the\n"
          append report "LDASapi.rsc file says it should be running\n"
          append report "on '$resource'.  If these are not actually\n"
          append report "the same host machine, there may be a\n"
          append report "problem!  This manager API was started by\n"
          append report "user: '$user'\n"
          if { [ info exists ::ABORT_MANAGER_IF_WRONG_HOST ] && \
                   [ string equal 1 $::ABORT_MANAGER_IF_WRONG_HOST ] } {
               append report "\nYou can prevent the manager from aborting\n"
               append report "when this condition is detected by setting\n"
               append report "::ABORT_MANAGER_IF_WRONG_HOST in the\n"
               append report "LDASmanager.rsc file to '0'."
          }
          catch {
               addLogEntry "Subject: ${subject}; Body: $report" email
          }
          puts stderr $report
          if { [ info exists ::ABORT_MANAGER_IF_WRONG_HOST ] && \
                   [ string equal 1 $::ABORT_MANAGER_IF_WRONG_HOST ] } {
               ::exit
          }
     }
}

§   §   §

Name: mgr::chopQueueTo

Description:
Does a controlled chopping off of the queue if it is larger than $size, notifying the user that their job has been aborted.
Will chop 4 jobs per second until done.

Parameters: Usage:

Comments:

proc mgr::chopQueueTo { size } {
     if { ! [ info exists ::assistant_queue ] } { return }
     if { [ llength $::assistant_queue ] <= $size } { return }
     if { [ catch {
          set caller [ uplevel myName ]
          set chopped [ lindex $::assistant_queue end ]
          set ::assistant_queue [ lrange $::assistant_queue 0 end-1 ]
          foreach [ list name cmdname cmd ] $chopped { break }
          set jobid $name
          if    { [ info exist ::${jobid}::userSubject ] } {
                set userSubject [ set ::${jobid}::userSubject ]
          } else {
                set userSubject ""
          }
          set subject "$::LDAS_SYSTEM error! Job $name $userSubject aborted"
          set body "$::LDAS_SYSTEM job $name aborted "
          append body "by LDAS adiministrator\nas part of "
          append body "an unplanned system maintenance emergency.\n\n"
          append body "You will need to resubmit this job:\n\n"
          append body "$cmd"
          if { [ catch {
               set user  [ ::mgr::job2name  $jobid ]
               set email [ ::mgr::job2email $jobid ]
               mailTo $email $subject $body
               set drop [ jobDirectory ]/email.$name
               set fid [ open $drop w ]
               puts $fid "<html>\n<pre>\n$user ${email}\n${subject}\n$body"
               ::close $fid
          } err ] } {
               catch { ::close $fid }
               addLogEntry $err red
          }
          mgr::deleteJobidFromUsersQueue $name
          addLogEntry "job removed from queue by LDAS administrator" red
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     if { [ string equal mgr::sHuTdOwN $caller ] } {
          mgr::chopQueueTo $size
     } else {
          after 250 [ list mgr::chopQueueTo $size ]
     }
}

§   §   §

Name: mgr::rmJobFiles

Description:
Remove selected files from job directories that were created for the user making the removal request.

Parameters: Usage:

Comments:

All commands have '-userid $username' added.
Will only work if run via a user command macro.
proc mgr::rmJobFiles { thisjob jobids { extensions all } } {
     if { [ catch {
          set deleted  [ list ]
          set notowner [ list ]
	      set user [ set ::${thisjob}(-userid) ]
	
          ;## rationalise job i.d.'s
          set preprepro $jobids
          regsub -all $::RUNCODE $jobids {} jobids
          if { [ regexp -nocase {[a-z]} $jobids ] } {
               error "invalid runcode string in jobid list: '$preprepro'"
          }
          ;## sanity check. make sure user intends to hose system
          ;## if he's going to be allowed to do it!!
          ;## forces user to break long jobid range into mutliple
          ;## requests of 1000 each, i.e. 1000-2000 2001-3000...
          if { [ regexp {^(\d+)-(\d+)$} $jobids -> begin end ] } {
               if { $end - $begin > 1000 } {
                    set msg "Request exceeds limit of 1000 jobid's:"
                    append msg " '$preprepro'"
                    return -code error $msg
               }
          }
          set retval "$::PUBDIR "
          set joblist [ numRange $jobids ]
          foreach jobid $joblist {
               regexp {\d+} $jobid job
               set jobid $::RUNCODE$job
               set myriad [ expr { $job / 10000 } ]
               set myrdir ${::RUNCODE}_$myriad
               set jobdir ${::RUNCODE}$job
               set subdir [ file join $myrdir $jobdir ]
               set jobdir [ file join $::PUBDIR $subdir ]
               if { [ string equal -nocase all $extensions ] } {
                    set extensions [ list gwf ilwd xml ]
               }
               if { ! [ file isdirectory $jobdir ] } {
                    set err "invalid job directory: '$jobdir'"
                    lappend notowner $err
                    continue
               }
               set usermsg $jobdir/email.$jobid
               if { ! [ file readable $usermsg ] } {
                    set err "could not read file: '$usermsg'"
                    lappend notowner $err
                    continue
               }
               set usermsg [ split [ dumpFile $usermsg ] "\n" ]
               set USER [ lindex [ lindex $usermsg 2 ] 0 ]
               if { ! [ string equal $user $USER ] } {
                    set err "jobid $jobid does not belong to user: '$user'"
                    append err " (belongs to user '$USER')"
                    lappend notowner $err
		            addLogEntry $err red
                    continue
               }
               foreach ext $extensions {
                    ;## strip leading dots
                    regsub {^\.} $ext {} ext
                    if { ! [ regexp {^(gwf|ilwd|xml)$} $ext ] } {
                         set msg "Removal of .$ext files is not supported."
                         addLogEntry $msg red
                         continue
                    }
                    foreach file [ glob -nocomplain $jobdir/*.$ext ] {
                         file delete $file
                         lappend deleted [ file tail $file ]
                    }
               }
               if   { [ string length $retval ] < $::MAX_REPLY_BYTES } {
                    if   { [ llength $deleted ] } {
                        append retval "$subdir files removed: [ join $deleted ]\n"
                        set deleted [ list ]
                    } else {
                        append retval "$subdir: no files deleted!\n"
                    }
               }
          } ;## end of foreach on jobid's
          if { [ llength $notowner ] } {
               append retval "\n$notowner"
          }
     } err ] } {
          return -code error "[ myName ]: $err"
     }
     ;## suppress very long responses
     set len [ string length $retval ]
     if { $len > $::MAX_REPLY_BYTES } {
        set retval "rmJobFiles completed for $joblist '$retval ...',  ($len bytes) exceeded $::MAX_REPLY_BYTES limit."
     }
     addLogEntry "response has $len bytes" purple
     return [ string trim $retval \n ]
}

§   §   §

Name: mgr::blockUser

Description:
Method for temporarily blocking user commands from some user(s).

Parameters: Usage:

Comments:

proc mgr::blockUser { user { option block } } {
     if { [ string equal sTaRtUp $user ] } {
          if { [ file exists blocked.users ] } {
               set ::mgr::blocked_users [ dumpFile blocked.users ]
          } else {
               set ::mgr::blocked_users [ list ]
          }
          return {}
     }
     if { [ catch {
          set update 0
          if { [ info exists ::mgr::blocked_users ] } {
          } elseif { [ file exists blocked.users ] } {
               set ::mgr::blocked_users [ dumpFile blocked.users ]
          }
          set i [ lsearch $::mgr::blocked_users $user ]
          switch -exact $option {
               block {
                    if { $i == -1 } {
                         set update 1
                         lappend ::mgr::blocked_users $user
                         set msg "User: '$user' blocked temporarily."
                    } else {
                         set msg "User: '$user' already blocked."
                    }
               }
               unblock {
                    if { $i == -1 } {
                         set msg "User: '$user' was not blocked."
                    } else {
                         set update 1
                         set ::mgr::blocked_users [ lreplace $::mgr::blocked_users $i $i ]
                         set msg "User: '$user' existing block removed."
                    }
               }
               default {
                    set err "bad option: '$option'"
                    return -code error $err
               }
          }
          ;## only write file if it will be updated
          if { $update == 1 } {
               set fid [ open blocked.users w 0644 ]
               puts $fid $::mgr::blocked_users
               ::close $fid
          }
     } err ] } {
          catch { ::close $fid }
          return -code error "[ myName ]: $err"
     }
     return $msg
}

§   §   §

Name: mgr::blockDso

Description:
Method for temporarily blocking dataPipeline user commands based on dso.

Parameters: Usage:

Comments:

proc mgr::blockDso { dso { option block } } {
     if { [ string equal sTaRtUp $dso ] } {