#!/bin/sh

# Helper script to flag done LoadLeveler jobs.
# The script is called periodically by the grid-manager.
#

# ARC1 passes first the config file.
if [ "$1" = "--config" ]; then shift; ARC_CONFIG=$1; shift; fi

basedir=`dirname $0`
basedir=`cd $basedir > /dev/null && pwd` || exit $?

pkglibdir="$basedir/../../lib/arc"
pkglibdir=`cd $pkglibdir > /dev/null && pwd` || exit $?

# Assume that gm-kick is installed in the same directory
GMKICK=${basedir}/gm-kick

# Does the control directory exist?
control_dir="$1"
test -d "$control_dir" || exit 1

# Get LoadLeveler environment
. "${pkglibdir}/configure-ll-env.sh" || exit $?

my_id=`id -u`

# Prints the owner of the file given as argument
# Perl is used because it's more protable than using the stat command
printowner () {
  code='$f=$ARGV[0];if(@s=stat $f){@p=getpwuid $s[4];if(@p){print $p[0]}else{exit 1}}else{die "$f: $!\n"}'
  /usr/bin/perl -we "$code" "$1"
}

# Append .comment (containing STDOUT & STDERR of the job wrapper) to .errors
save_commentfile () {
  username=$1
  commentfile=$2
  errorsfile=$3
  action="
    { echo '---------- Contents of output stream forwarded by LL ------------'
      cat '$commentfile' 2> /dev/null
      echo '------------------------- End of output -------------------------'
    } >> '$errorsfile'
  "
  if [ -z "$username" ] ; then
    eval "$action"
  else
    su "$username" -c "eval \"$action\""
  fi
}

#for i in "$control_dir"/job.*.local
find $control_dir -maxdepth 1 -type f -name job\*.local | while read i
do
  # Continue if no glob expansion or other problems
  test -f "$i" || continue

  jobid=`basename $i .local|sed 's/^job.//'`
  donefile="${control_dir}/job.${jobid}.lrms_done"    
  statusfile="${control_dir}/job.${jobid}.status"
  jobfile="${control_dir}/job.${jobid}.local"
  errorsfile="${control_dir}/job.${jobid}.errors"
  
  # Continue if the job is already flagged as done?
  test -f "$donefile" && continue

  if [ ! -f "$statusfile" ] ; then continue ; fi
  gmstatus=`cat "$statusfile"`
  if [ "$gmstatus" != "INLRMS" ] && [ "$gmstatus" != "CANCELING" ] ; then continue ; fi
 
  # Get local LRMS id of job by evaluating the line with localid
  localid=`grep ^localid= $i|head -1`
  eval $localid

  # Did we get a local id?
  test "$localid" = "" && continue

  # Get job status 
  if status=`$LL_BIN_PATH/llq -l $localid|grep '^ *Status'`
  then
    status=`echo $status | sed 's/^ *Status: //'`
    case "$status" in
      		Completed) ;;
     		 Canceled) continue;;
                        *) continue;;
    esac
  fi

  exitcode=''

  # get session directory of this job
  session=`grep -h '^sessiondir=' "$control_dir/job.${jobid}.local" | sed 's/^sessiondir=\(.*\)/\1/'`
  diagfile="${session}.diag"
  commentfile="${session}.comment"

  if [ "$my_id" = '0' ] ; then
      username=`printowner "${jobfile}"`
  else
      username=
  fi

  if [ ! -z "$session" ] ; then
    # have chance to obtain exit code
    if [ -z "$username" ] ; then
      exitcode=`grep '^exitcode=' "$diagfile" 2>/dev/null | sed 's/^exitcode=//'`
    else
      exitcode=`su "${username}" -c "grep '^exitcode=' $diagfile" 2>/dev/null | sed 's/^exitcode=//'`
    fi
  else
    continue
  fi
  
  if [ ! -z "$exitcode" ] ; then
    if [ "$exitcode" = "152" -o $exitcode = "24" ] ; then
	exitcode="24"     
        save_commentfile "$username" "${session}.comment" "$errorsfile"
    	echo "$exitcode Job exceeded time limit." > "$donefile"
        ${GMKICK} "$statusfile"
        continue  
    fi
    # job finished and exit code is known
    save_commentfile "$username" "${session}.comment" "$errorsfile"
    echo "$exitcode Executable finished with exit code $exitcode" >> "$donefile"               
    ${GMKICK} "$statusfile"
    continue
  fi
  exitcode=-1
  save_commentfile "$username" "${session}.comment" "$errorsfile"
  echo "$exitcode Job finished with unknown exit code" >> "$donefile"
  ${GMKICK} "$statusfile"
done

sleep 60
exit 0
