User Remote

From oldwiki.scinet.utoronto.ca
Revision as of 09:51, 12 February 2018 by Ashwin (talk | contribs) (→‎Usage)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search


Step 0: Some SSH Aliases to Make Life Easier

function p7() {
    ssh p7n01-ib0 "$@"
}


function gpc() {
    ssh gpc01 "$@"
}

Submitting jobs

The GPC and P7 job submission commands (qsub and llsubmit, respectively) do not allow for the submission of multiple job files, leaving the user to have to for-loop through multiple files when batch submission is required. The below bash function automates such looping and can be added to the shell environment

function submit() {
    host=`hostname | awk '/^scinet/ {print 0} /^gpc/ {print 1} /^p7/ {print 2}'`

    if [ $host -eq 0 ]; then  # Login node
        echo "Can't submit from a login node"
        return
    fi  

    if [ $host -eq 1 ]; then  # GPC 
        submitter=qsub
    elif [ $host -eq 2 ]; then  # P7
        submitter=llsubmit
    fi  

    for fpath in "$@"; do
        $submitter "$fpath"
    done
}

Usage

ashwin@gpc-f101n084-ib0 $ ls
job1.sh job2.sh job3.sh

ashwin@gpc-f101n084-ib0 $ submit job*.sh
# submits all three job files using qsub
ashwin@gpc-f101n084-ib0 $ ls
job1.ll job2.ll job3.ll

ashwin@ashwin@p7n01 $ submit job*.ll
# submits all three job files using qsub

Canceling jobs

The GPC and P7 job submission commands (canceljob and llcancel, respectively) do not allow for the submission of multiple job files, leaving the user to have to for-loop through multiple files when batch submission is required. The below bash function automates such looping and can be added to the shell environment

function cancel() {
    host=`hostname | awk '/^scinet/ {print 0} /^gpc/ {print 1} /^p7/ {print 2}'`
    
    if [ $host -eq 0 ]; then  # Login node
        echo "Cannot cancel from Login node"
        return
    fi

    if [ $host -eq 1 ]; then  # GPC
        canceler=canceljob
    elif [ $host -eq 2 ]; then  # P7
        canceler=llcancel
    fi

    for jid in "$@"; do
        $canceler "$jid"
    done

}

Remote/Batch query running jobs

The GPC and P7 queues may be queried for job status (respectively, with showq and llq). However, this shows only the job ID and not much else. The following bash functions list the running and queued jobs in much more detail

Querying multiple queues

function que() {
    host=`hostname | awk '/^scinet/ {print 0} /^gpc/ {print 1} /^p7/ {print 2}'`

    if [ $host -eq 0 ]; then
        p7 que
        echo "========================================"
        gpc que
    fi

    if [ $host -eq 1 ]; then  #GPC
        showq -u "$USER"
    elif [ $host -eq 2 ]; then  # P7
        llq -u "$USER"
    fi
}

Getting detailed job information from queues

function columnize() {
    array=$1
    (for lineno in `seq 0 ${#lines[@]}`; do
        line=${lines[lineno]}
        echo "$line"
    done) | column -t
}


function clearLines() {
    for _ in `seq 1 $1`; do
        echo -ne "\e[1A"
    done
}

function whatare() {
    host=`hostname | awk '/^scinet/ {print 0} /^gpc/ {print 1} /^p7/ {print 2}'`
    if [ $host -eq 0 ]; then
        echo "============ P7 ============"
        p7 whatare
        echo ''
        echo ''
        echo "============GPC ============"
        gpc whatare
    fi

    lines=()
    if [ $host -eq 1 ]; then  #GPC
        echo "-----------RUNNING-----------"
        for jobid in `showq -r -u $USER | grep $USER | cut -d' ' -f1 | sort -n`; do
            name=`whatis "$jobid" | grep AName | cut -d":" -f2 | tr -d '\n'`
            wtime=`checkjob "$jobid" | head | grep "WallTime" | tr -d '\n'`

            len=${#lines[@]}
            clearLines $len

            appendlen=$((len + 1))
            line="$jobid $name $wtime"
            lines[$appendlen]="$line"
            columnize $lines
        done

        echo "-----------WAITING-----------"
        len=${#lines[@]}
        appendlen=$((len + 1))
        lines[$appendlen]="-----------WAITING-----------"

        for jobid in `showq -i -u $USER | grep $USER | cut -d' ' -f1 | sort -n`; do
            name=`whatis "$jobid" | grep AName | cut -d":" -f2`
            wtime=`checkjob "$jobid" | head | grep "WallTime"`

            len=${#lines[@]}
            clearLines $len

            appendlen=$((len + 1))
            line="$jobid $name $wtime"
            lines[$appendlen]="$line"
            columnize $lines
        done

        echo "-----------BLOCKED-----------"
        len=${#lines[@]}
        appendlen=$((len + 1))
        lines[$appendlen]="-----------BLOCKED-----------"
        for jobid in `showq -b -u $USER | grep $USER | cut -d' ' -f1 | sort -n`; do
            name=`whatis "$jobid" | grep AName | cut -d":" -f2`
            wtime=`checkjob "$jobid" | head | grep "WallTime"`

            len=${#lines[@]}
            clearLines $len

            appendlen=$((len + 1))
            line="$jobid  $name $wtime"
            lines[$appendlen]="$line"
            columnize $lines
        done

        echo "============================="
        return
    fi


    if [ $host -eq 2 ]; then  # P7
        joblines=`que | tail -n +3 | head -n -2 | sort -t'.' -k2 -n`

        echo "---------- RUNNING ----------"
        echo "$joblines" | while read jobline; do
            status=`echo "$jobline" | awk '$1=$1' | cut -d' ' -f5`

            if [[ $status != R ]] ; then
                continue
            fi

            job=`echo $jobline | cut -d' ' -f1 | cut -d'.' -f2`
            echo -n `whatis "$job" | grep "Job Name"` && echo
        done

        echo "---------- WAITING ----------"

        echo "$joblines" | while read jobline; do
            status=`echo "$jobline" | awk '$1=$1' | cut -d' ' -f5`

            if [[ $status != I ]] ; then
                continue
            fi

            job=`echo $jobline | cut -d' ' -f1 | cut -d'.' -f2`
            echo -n `whatis "$job" | grep "Job Name"` && echo
        done
        echo "============================="
        return
    fi
}

function whatis() {
    host=`hostname | awk '/^scinet/ {print 0} /^gpc/ {print 1} /^p7/ {print 2}'`
    if [ $host -eq 0 ]; then
        echo "Cannot check from a login node"
        #return
    fi

    if [ $host -eq 1 ]; then  #GPC
        checker=checkjob
        label=AName

    elif [ $host -eq 2 ]; then  # P7
        checker="llq -l"
        label="Job Name"
    fi

    for j in "$@"; do
        echo -n "$j " && $checker "$j" | grep "$label"
    done
}

Usage

ashwin@gpc-f101n084-ib0 $ whatare
============ P7 ============
---------- RUNNING ----------
# job information about running jobs
---------- WAITING ----------
# job information about waiting jobs
=============================

-----------RUNNING-----------
# job information about running jobs
-----------WAITING-----------
# job information about waiting jobs
-----------BLOCKED-----------
# job information about blocked jobs
=============================
ashwin@gpc-f101n084-ib0 $ whatare
-----------RUNNING-----------
# job information about running jobs
-----------WAITING-----------
# job information about waiting jobs
-----------BLOCKED-----------
# job information about blocked jobs
=============================