Difference between revisions of "User Remote"

From oldwiki.scinet.utoronto.ca
Jump to navigation Jump to search
 
(5 intermediate revisions by the same user not shown)
Line 1: Line 1:
 
__FORCETOC__
 
__FORCETOC__
  
==Remote submitting jobs==
+
==Step 0: Some SSH Aliases to Make Life Easier==
  
==Remote cancel jobs==
+
<pre>
 +
function p7() {
 +
    ssh p7n01-ib0 "$@"
 +
}
  
==Remote query running jobs==
+
 
 +
function gpc() {
 +
    ssh gpc01 "$@"
 +
}
 +
</pre>
 +
 
 +
==Submitting jobs==
 +
The GPC and P7 job submission commands (<tt>qsub</tt> and <tt>llsubmit</tt>, respectively) do not allow for the submission of multiple job files, leaving the user to have to for-loop through multiple files when batch submission is required. The below bash function automates such looping and can be added to the shell environment
 +
 
 +
<pre>
 +
function submit() {
 +
    host=`hostname | awk '/^scinet/ {print 0} /^gpc/ {print 1} /^p7/ {print 2}'`
 +
 
 +
    if [ $host -eq 0 ]; then  # Login node
 +
        echo "Can't submit from a login node"
 +
        return
 +
    fi 
 +
 
 +
    if [ $host -eq 1 ]; then  # GPC
 +
        submitter=qsub
 +
    elif [ $host -eq 2 ]; then  # P7
 +
        submitter=llsubmit
 +
    fi 
 +
 
 +
    for fpath in "$@"; do
 +
        $submitter "$fpath"
 +
    done
 +
}
 +
 
 +
</pre>
 +
 
 +
===Usage===
 +
 
 +
<pre>
 +
ashwin@gpc-f101n084-ib0 $ ls
 +
job1.sh job2.sh job3.sh
 +
 
 +
ashwin@gpc-f101n084-ib0 $ submit job*.sh
 +
# submits all three job files using qsub
 +
</pre>
 +
 
 +
<pre>
 +
ashwin@gpc-f101n084-ib0 $ ls
 +
job1.ll job2.ll job3.ll
 +
 
 +
ashwin@ashwin@p7n01 $ submit job*.ll
 +
# submits all three job files using qsub
 +
</pre>
 +
 
 +
==Canceling jobs==
 +
 
 +
The GPC and P7 job submission commands (<tt>canceljob</tt> and <tt>llcancel</tt>, respectively) do not allow for the submission of multiple job files, leaving the user to have to for-loop through multiple files when batch submission is required. The below bash function automates such looping and can be added to the shell environment
 +
 
 +
<pre>
 +
function cancel() {
 +
    host=`hostname | awk '/^scinet/ {print 0} /^gpc/ {print 1} /^p7/ {print 2}'`
 +
   
 +
    if [ $host -eq 0 ]; then  # Login node
 +
        echo "Cannot cancel from Login node"
 +
        return
 +
    fi
 +
 
 +
    if [ $host -eq 1 ]; then  # GPC
 +
        canceler=canceljob
 +
    elif [ $host -eq 2 ]; then  # P7
 +
        canceler=llcancel
 +
    fi
 +
 
 +
    for jid in "$@"; do
 +
        $canceler "$jid"
 +
    done
 +
 
 +
}
 +
 
 +
</pre>
 +
 
 +
==Remote/Batch query running jobs==
 +
The GPC and P7 queues may be queried for job status (respectively, with <tt>showq</tt> and <tt>llq</tt>). However, this shows only the job ID and not much else. The following bash functions list the running and queued jobs in much more detail
 +
 
 +
===Querying multiple queues===
 +
<pre>
 +
function que() {
 +
    host=`hostname | awk '/^scinet/ {print 0} /^gpc/ {print 1} /^p7/ {print 2}'`
 +
 
 +
    if [ $host -eq 0 ]; then
 +
        p7 que
 +
        echo "========================================"
 +
        gpc que
 +
    fi
 +
 
 +
    if [ $host -eq 1 ]; then  #GPC
 +
        showq -u "$USER"
 +
    elif [ $host -eq 2 ]; then  # P7
 +
        llq -u "$USER"
 +
    fi
 +
}
 +
</pre>
 +
 
 +
===Getting detailed job information from queues===
 +
<pre>
 +
function columnize() {
 +
    array=$1
 +
    (for lineno in `seq 0 ${#lines[@]}`; do
 +
        line=${lines[lineno]}
 +
        echo "$line"
 +
    done) | column -t
 +
}
 +
 
 +
 
 +
function clearLines() {
 +
    for _ in `seq 1 $1`; do
 +
        echo -ne "\e[1A"
 +
    done
 +
}
 +
 
 +
function whatare() {
 +
    host=`hostname | awk '/^scinet/ {print 0} /^gpc/ {print 1} /^p7/ {print 2}'`
 +
    if [ $host -eq 0 ]; then
 +
        echo "============ P7 ============"
 +
        p7 whatare
 +
        echo ''
 +
        echo ''
 +
        echo "============GPC ============"
 +
        gpc whatare
 +
    fi
 +
 
 +
    lines=()
 +
    if [ $host -eq 1 ]; then  #GPC
 +
        echo "-----------RUNNING-----------"
 +
        for jobid in `showq -r -u $USER | grep $USER | cut -d' ' -f1 | sort -n`; do
 +
            name=`whatis "$jobid" | grep AName | cut -d":" -f2 | tr -d '\n'`
 +
            wtime=`checkjob "$jobid" | head | grep "WallTime" | tr -d '\n'`
 +
 
 +
            len=${#lines[@]}
 +
            clearLines $len
 +
 
 +
            appendlen=$((len + 1))
 +
            line="$jobid $name $wtime"
 +
            lines[$appendlen]="$line"
 +
            columnize $lines
 +
        done
 +
 
 +
        echo "-----------WAITING-----------"
 +
        len=${#lines[@]}
 +
        appendlen=$((len + 1))
 +
        lines[$appendlen]="-----------WAITING-----------"
 +
 
 +
        for jobid in `showq -i -u $USER | grep $USER | cut -d' ' -f1 | sort -n`; do
 +
            name=`whatis "$jobid" | grep AName | cut -d":" -f2`
 +
            wtime=`checkjob "$jobid" | head | grep "WallTime"`
 +
 
 +
            len=${#lines[@]}
 +
            clearLines $len
 +
 
 +
            appendlen=$((len + 1))
 +
            line="$jobid $name $wtime"
 +
            lines[$appendlen]="$line"
 +
            columnize $lines
 +
        done
 +
 
 +
        echo "-----------BLOCKED-----------"
 +
        len=${#lines[@]}
 +
        appendlen=$((len + 1))
 +
        lines[$appendlen]="-----------BLOCKED-----------"
 +
        for jobid in `showq -b -u $USER | grep $USER | cut -d' ' -f1 | sort -n`; do
 +
            name=`whatis "$jobid" | grep AName | cut -d":" -f2`
 +
            wtime=`checkjob "$jobid" | head | grep "WallTime"`
 +
 
 +
            len=${#lines[@]}
 +
            clearLines $len
 +
 
 +
            appendlen=$((len + 1))
 +
            line="$jobid  $name $wtime"
 +
            lines[$appendlen]="$line"
 +
            columnize $lines
 +
        done
 +
 
 +
        echo "============================="
 +
        return
 +
    fi
 +
 
 +
 
 +
    if [ $host -eq 2 ]; then  # P7
 +
        joblines=`que | tail -n +3 | head -n -2 | sort -t'.' -k2 -n`
 +
 
 +
        echo "---------- RUNNING ----------"
 +
        echo "$joblines" | while read jobline; do
 +
            status=`echo "$jobline" | awk '$1=$1' | cut -d' ' -f5`
 +
 
 +
            if [[ $status != R ]] ; then
 +
                continue
 +
            fi
 +
 
 +
            job=`echo $jobline | cut -d' ' -f1 | cut -d'.' -f2`
 +
            echo -n `whatis "$job" | grep "Job Name"` && echo
 +
        done
 +
 
 +
        echo "---------- WAITING ----------"
 +
 
 +
        echo "$joblines" | while read jobline; do
 +
            status=`echo "$jobline" | awk '$1=$1' | cut -d' ' -f5`
 +
 
 +
            if [[ $status != I ]] ; then
 +
                continue
 +
            fi
 +
 
 +
            job=`echo $jobline | cut -d' ' -f1 | cut -d'.' -f2`
 +
            echo -n `whatis "$job" | grep "Job Name"` && echo
 +
        done
 +
        echo "============================="
 +
        return
 +
    fi
 +
}
 +
 
 +
function whatis() {
 +
    host=`hostname | awk '/^scinet/ {print 0} /^gpc/ {print 1} /^p7/ {print 2}'`
 +
    if [ $host -eq 0 ]; then
 +
        echo "Cannot check from a login node"
 +
        #return
 +
    fi
 +
 
 +
    if [ $host -eq 1 ]; then  #GPC
 +
        checker=checkjob
 +
        label=AName
 +
 
 +
    elif [ $host -eq 2 ]; then  # P7
 +
        checker="llq -l"
 +
        label="Job Name"
 +
    fi
 +
 
 +
    for j in "$@"; do
 +
        echo -n "$j " && $checker "$j" | grep "$label"
 +
    done
 +
}
 +
 
 +
</pre>
 +
 
 +
===Usage===
 +
<pre>
 +
ashwin@gpc-f101n084-ib0 $ whatare
 +
============ P7 ============
 +
---------- RUNNING ----------
 +
# job information about running jobs
 +
---------- WAITING ----------
 +
# job information about waiting jobs
 +
=============================
 +
 
 +
-----------RUNNING-----------
 +
# job information about running jobs
 +
-----------WAITING-----------
 +
# job information about waiting jobs
 +
-----------BLOCKED-----------
 +
# job information about blocked jobs
 +
=============================
 +
</pre>
 +
 
 +
<pre>
 +
ashwin@gpc-f101n084-ib0 $ whatare
 +
-----------RUNNING-----------
 +
# job information about running jobs
 +
-----------WAITING-----------
 +
# job information about waiting jobs
 +
-----------BLOCKED-----------
 +
# job information about blocked jobs
 +
=============================
 +
</pre>

Latest revision as of 10:51, 12 February 2018


Step 0: Some SSH Aliases to Make Life Easier

function p7() {
    ssh p7n01-ib0 "$@"
}


function gpc() {
    ssh gpc01 "$@"
}

Submitting jobs

The GPC and P7 job submission commands (qsub and llsubmit, respectively) do not allow for the submission of multiple job files, leaving the user to have to for-loop through multiple files when batch submission is required. The below bash function automates such looping and can be added to the shell environment

function submit() {
    host=`hostname | awk '/^scinet/ {print 0} /^gpc/ {print 1} /^p7/ {print 2}'`

    if [ $host -eq 0 ]; then  # Login node
        echo "Can't submit from a login node"
        return
    fi  

    if [ $host -eq 1 ]; then  # GPC 
        submitter=qsub
    elif [ $host -eq 2 ]; then  # P7
        submitter=llsubmit
    fi  

    for fpath in "$@"; do
        $submitter "$fpath"
    done
}

Usage

ashwin@gpc-f101n084-ib0 $ ls
job1.sh job2.sh job3.sh

ashwin@gpc-f101n084-ib0 $ submit job*.sh
# submits all three job files using qsub
ashwin@gpc-f101n084-ib0 $ ls
job1.ll job2.ll job3.ll

ashwin@ashwin@p7n01 $ submit job*.ll
# submits all three job files using qsub

Canceling jobs

The GPC and P7 job submission commands (canceljob and llcancel, respectively) do not allow for the submission of multiple job files, leaving the user to have to for-loop through multiple files when batch submission is required. The below bash function automates such looping and can be added to the shell environment

function cancel() {
    host=`hostname | awk '/^scinet/ {print 0} /^gpc/ {print 1} /^p7/ {print 2}'`
    
    if [ $host -eq 0 ]; then  # Login node
        echo "Cannot cancel from Login node"
        return
    fi

    if [ $host -eq 1 ]; then  # GPC
        canceler=canceljob
    elif [ $host -eq 2 ]; then  # P7
        canceler=llcancel
    fi

    for jid in "$@"; do
        $canceler "$jid"
    done

}

Remote/Batch query running jobs

The GPC and P7 queues may be queried for job status (respectively, with showq and llq). However, this shows only the job ID and not much else. The following bash functions list the running and queued jobs in much more detail

Querying multiple queues

function que() {
    host=`hostname | awk '/^scinet/ {print 0} /^gpc/ {print 1} /^p7/ {print 2}'`

    if [ $host -eq 0 ]; then
        p7 que
        echo "========================================"
        gpc que
    fi

    if [ $host -eq 1 ]; then  #GPC
        showq -u "$USER"
    elif [ $host -eq 2 ]; then  # P7
        llq -u "$USER"
    fi
}

Getting detailed job information from queues

function columnize() {
    array=$1
    (for lineno in `seq 0 ${#lines[@]}`; do
        line=${lines[lineno]}
        echo "$line"
    done) | column -t
}


function clearLines() {
    for _ in `seq 1 $1`; do
        echo -ne "\e[1A"
    done
}

function whatare() {
    host=`hostname | awk '/^scinet/ {print 0} /^gpc/ {print 1} /^p7/ {print 2}'`
    if [ $host -eq 0 ]; then
        echo "============ P7 ============"
        p7 whatare
        echo ''
        echo ''
        echo "============GPC ============"
        gpc whatare
    fi

    lines=()
    if [ $host -eq 1 ]; then  #GPC
        echo "-----------RUNNING-----------"
        for jobid in `showq -r -u $USER | grep $USER | cut -d' ' -f1 | sort -n`; do
            name=`whatis "$jobid" | grep AName | cut -d":" -f2 | tr -d '\n'`
            wtime=`checkjob "$jobid" | head | grep "WallTime" | tr -d '\n'`

            len=${#lines[@]}
            clearLines $len

            appendlen=$((len + 1))
            line="$jobid $name $wtime"
            lines[$appendlen]="$line"
            columnize $lines
        done

        echo "-----------WAITING-----------"
        len=${#lines[@]}
        appendlen=$((len + 1))
        lines[$appendlen]="-----------WAITING-----------"

        for jobid in `showq -i -u $USER | grep $USER | cut -d' ' -f1 | sort -n`; do
            name=`whatis "$jobid" | grep AName | cut -d":" -f2`
            wtime=`checkjob "$jobid" | head | grep "WallTime"`

            len=${#lines[@]}
            clearLines $len

            appendlen=$((len + 1))
            line="$jobid $name $wtime"
            lines[$appendlen]="$line"
            columnize $lines
        done

        echo "-----------BLOCKED-----------"
        len=${#lines[@]}
        appendlen=$((len + 1))
        lines[$appendlen]="-----------BLOCKED-----------"
        for jobid in `showq -b -u $USER | grep $USER | cut -d' ' -f1 | sort -n`; do
            name=`whatis "$jobid" | grep AName | cut -d":" -f2`
            wtime=`checkjob "$jobid" | head | grep "WallTime"`

            len=${#lines[@]}
            clearLines $len

            appendlen=$((len + 1))
            line="$jobid  $name $wtime"
            lines[$appendlen]="$line"
            columnize $lines
        done

        echo "============================="
        return
    fi


    if [ $host -eq 2 ]; then  # P7
        joblines=`que | tail -n +3 | head -n -2 | sort -t'.' -k2 -n`

        echo "---------- RUNNING ----------"
        echo "$joblines" | while read jobline; do
            status=`echo "$jobline" | awk '$1=$1' | cut -d' ' -f5`

            if [[ $status != R ]] ; then
                continue
            fi

            job=`echo $jobline | cut -d' ' -f1 | cut -d'.' -f2`
            echo -n `whatis "$job" | grep "Job Name"` && echo
        done

        echo "---------- WAITING ----------"

        echo "$joblines" | while read jobline; do
            status=`echo "$jobline" | awk '$1=$1' | cut -d' ' -f5`

            if [[ $status != I ]] ; then
                continue
            fi

            job=`echo $jobline | cut -d' ' -f1 | cut -d'.' -f2`
            echo -n `whatis "$job" | grep "Job Name"` && echo
        done
        echo "============================="
        return
    fi
}

function whatis() {
    host=`hostname | awk '/^scinet/ {print 0} /^gpc/ {print 1} /^p7/ {print 2}'`
    if [ $host -eq 0 ]; then
        echo "Cannot check from a login node"
        #return
    fi

    if [ $host -eq 1 ]; then  #GPC
        checker=checkjob
        label=AName

    elif [ $host -eq 2 ]; then  # P7
        checker="llq -l"
        label="Job Name"
    fi

    for j in "$@"; do
        echo -n "$j " && $checker "$j" | grep "$label"
    done
}

Usage

ashwin@gpc-f101n084-ib0 $ whatare
============ P7 ============
---------- RUNNING ----------
# job information about running jobs
---------- WAITING ----------
# job information about waiting jobs
=============================

-----------RUNNING-----------
# job information about running jobs
-----------WAITING-----------
# job information about waiting jobs
-----------BLOCKED-----------
# job information about blocked jobs
=============================
ashwin@gpc-f101n084-ib0 $ whatare
-----------RUNNING-----------
# job information about running jobs
-----------WAITING-----------
# job information about waiting jobs
-----------BLOCKED-----------
# job information about blocked jobs
=============================