#!/bin/bash
# This is the login shell program for the oar user, used by oarsub/oarexec and
# oarsh It adds its own process in the OAR cgroups (defined by OAR_CPUSET) and
# runs the shell or the script of the user defined by OAR_JOB_USER.
#
# If OAR_USER_CPUSET resp. OAR_USER_GPUDEVICE is set by the user calling oarsh
# (not possible with oarsub), it creates a cgroup sub directory for cpuset
# resp. devices with the requested subset of cpusets or gpudevices allocated.

set -e
set -o pipefail
shopt -s nullglob

DEFAULT_SHELL=/bin/bash
XAUTH_LOCATION="/usr/bin/xauth"
OARDIR="/usr/lib/oar"
OAR_RUNTIME_DIRECTORY="/tmp/oar_runtime"

export PATH=$OARDIR/oardodo:$PATH

# shellcheck disable=SC1091
. /etc/oar/oar.conf

# File generated by job resource manager on all job nodes
JOBENVFILE="$OAR_RUNTIME_DIRECTORY/${OAR_CPUSET##*/}.env"

OLDUMASK=$(umask)
umask 0022

# $1 = Name of the cpuset
# $2,$3,... = PIDs to add
# Add PIDS into the job cgroups
add_process_to_cpuset() {
    [ "$1" = "undef" ] && return
    CGROUP_MOUNT_POINT=$(sed -ne 's/^[^ ]\+ \([^ ]\+\) cgroup2 .*/\1/p' /proc/mounts)
    CGROUP_OAR_PATH=$CGROUP_MOUNT_POINT${OAR_CPUSET%/*}.slice
    JOB_ID=${OAR_CPUSET##*_}
    JOB_USER=${OAR_CPUSET##*/}
    JOB_USER=${JOB_USER%_*}
    JOB_USER_ID=$(id -u "$JOB_USER")
    SYSTEMD_OAR_SLICE=${CGROUP_OAR_PATH##*/}
    SYSTEMD_USER_SLICE=${SYSTEMD_OAR_SLICE%.slice}-u$JOB_USER_ID.slice
    SYSTEMD_JOB_SLICE=${SYSTEMD_USER_SLICE%.slice}-j$JOB_ID.slice
    SYSTEMD_PROC_SCOPE=${SYSTEMD_JOB_SLICE%.slice}-p$RANDOM.scope
    CGROUP_USER_PATH=$CGROUP_OAR_PATH/$SYSTEMD_USER_SLICE
    CGROUP_JOB_PATH=$CGROUP_USER_PATH/$SYSTEMD_JOB_SLICE

    if [ ! -e "$CGROUP_JOB_PATH/cgroup.procs" ]; then
        echo "oarsh: Cannot find the job's cgroup ($CGROUP_JOB_PATH), is the target host part of the job?" 1>&2
        exit 61
    fi
    shift

    # Add all processes to the OAR job cgroups
    if [ -n "$OAR_USER_CPUSET" ] || [ -n "$OAR_USER_GPUDEVICE" ]; then
        SYSTEMD_SUBJOB_SLICE=${SYSTEMD_JOB_SLICE%.slice}-t$RANDOM.slice
        CGROUP_SUBJOB_PATH=$CGROUP_JOB_PATH/$SYSTEMD_SUBJOB_SLICE
        SYSTEMD_PROC_SCOPE=${SYSTEMD_SUBJOB_SLICE%.slice}-p$RANDOM.scope
        oardodo busctl call -q org.freedesktop.systemd1 /org/freedesktop/systemd1 org.freedesktop.systemd1.Manager StartUnit ss "$SYSTEMD_SUBJOB_SLICE" fail
        while oardodo busctl call org.freedesktop.systemd1 /org/freedesktop/systemd1 org.freedesktop.systemd1.Manager ListJobs | grep -q "$SYSTEMD_SUBJOB_SLICE"; do
            sleep 0.1
        done
        if [ -n "$OAR_USER_CPUSET" ]; then
            if [[ "$OAR_USER_CPUSET" =~ ^[[:digit:][[:digit:][:space:],+]*[[:digit:]]$ ]]; then
                IFS=" " read -r -a OAR_USER_CPUSET_ARRAY <<< "${OAR_USER_CPUSET//[+,]/ }"
                SYSTEMD_ALLOWED_CPUS_STR=$(hwloc-calc --cof systemd-dbus-api --pi "${OAR_USER_CPUSET_ARRAY[@]/#/pu:}" 2>/dev/null)
                if [[ "$SYSTEMD_ALLOWED_CPUS_STR" =~ ^ay\ 0x[[:xdigit:]]{4}(\ 0x[[:xdigit:]]{2})+$ ]]; then
                    # shellcheck disable=SC2086
                    oardodo busctl call org.freedesktop.systemd1 /org/freedesktop/systemd1 org.freedesktop.systemd1.Manager SetUnitProperties 'sba(sv)' "$SYSTEMD_SUBJOB_SLICE" 1 1 AllowedCPUs $SYSTEMD_ALLOWED_CPUS_STR
                else
                    # OAR_USER_CPUSET is defined but syntax is incorrect
                    echo "Warning: Failed to set OAR_USER_CPUSET=$OAR_USER_CPUSET, use the job's cpuset" 1>&2
                    OAR_USER_CPUSET=""
                fi
            else
                # OAR_USER_CPUSET is defined but syntax is incorrect
                echo "Warning: Bad syntax for OAR_USER_CPUSET=$OAR_USER_CPUSET, use the job's cpuset" 1>&2
                OAR_USER_CPUSET=""
            fi
        fi
        if [ -n "$OAR_USER_GPUDEVICE" ]; then
            OAR_USER_GPUDEVICE=${OAR_USER_GPUDEVICE//+/ }
            OAR_USER_GPUDEVICE=${OAR_USER_GPUDEVICE//,/ }
            declare -a DEV_DENY
            for f in /dev/nvidia[0-9]* /dev/dri/card[0-9]* /dev/dri/renderD[0-9]* /dev/nvidia-caps/*; do
                if [[ " $OAR_USER_GPUDEVICE " ==  !(* $f *) ]]; then
                    DEV_DENY+=( "$f" )
                fi
            done
            oardodo /usr/lib/oar/oarcgdev "$CGROUP_SUBJOB_PATH" "${DEV_DENY[@]}"
        fi
    fi
    oardodo busctl call -q org.freedesktop.systemd1 /org/freedesktop/systemd1 org.freedesktop.systemd1.Manager StartTransientUnit 'ssa(sv)a(sa(sv))' "${SYSTEMD_PROC_SCOPE}" fail 3 Delegate b 1 PIDs au $# "$@" Slice s "${SYSTEMD_PROC_SCOPE%-p*}".slice 0
    while oardodo busctl call org.freedesktop.systemd1 /org/freedesktop/systemd1 org.freedesktop.systemd1.Manager ListJobs | grep -q "$SYSTEMD_PROC_SCOPE"; do
        sleep 0.1
    done
}

if [ "$OAR_JOB_USER" = "" ]; then
    if [ "$SSH_CLIENT" != ""  ] && [ "$OAR_KEY" != "1" ]; then
        echo "oarsh: The OAR_KEY environment variable is not defined and this seems to be a oar user connection." 1>&2
        exit 65
    fi
    # It must be oar
    if [ "$OAR_CPUSET" != "" ]; then
        add_process_to_cpuset "$OAR_CPUSET" "$$" "$PPID" || exit 62
        # shellcheck disable=SC1090
        [ -r "$JOBENVFILE" ] && . "$JOBENVFILE"
    fi
    $OARDIR/oardodo/oardodo renice 0 $$ $PPID > /dev/null 2>&1
    export SHELL=$DEFAULT_SHELL

    umask "$OLDUMASK"
    exec $DEFAULT_SHELL "$@"
    echo "oarsh: exec failed" 1>&2
    exit 66
else
    if [ "$OAR_CPUSET" = "" ]; then
        echo "oarsh: OAR_CPUSET variable is empty; Is your sshd right configured with 'AcceptEnv OAR_CPUSET OAR_JOB_USER' on all computing nodes?" 1>&2
        exit 63
    fi
    add_process_to_cpuset "$OAR_CPUSET" "$$" "$PPID" || exit 62

    #Manage display
    if [ -n "$DISPLAY" ]; then
        if [ -x "$XAUTH_LOCATION" ]; then
            "$XAUTH_LOCATION" -q extract - "${DISPLAY#localhost}" | OARDO_BECOME_USER="${OAR_JOB_USER}" $OARDIR/oardodo/oardodo "$XAUTH_LOCATION" merge -
            # shellcheck disable=SC2016
            [ "${OAR_JOB_USER}" != "$OAR_JOB_USER" ] && OARDO_BECOME_USER="${OAR_JOB_USER}" $OARDIR/oardodo/oardodo bash --noprofile --norc -c 'chmod 660 $HOME/.Xauthority'
        fi
    fi
    #Change tty owner
    TTY=$(tty) && test -e "$TTY" && $OARDIR/oardodo/oardodo chown "$OAR_JOB_USER":oar "$TTY" && $OARDIR/oardodo/oardodo chmod 660 "$TTY"
    $OARDIR/oardodo/oardodo renice 0 $$ $PPID > /dev/null 2>&1
    # shellcheck disable=SC1090
    [ -r "$JOBENVFILE" ] && . "$JOBENVFILE"
    if [ "$1" = "" ]; then
        # Simulate initial login
        export OARDO_BECOME_USER=$OAR_JOB_USER
        umask "$OLDUMASK"
        exec $OARDIR/oardodo/oardodo
        #exec oardodo su - $OAR_JOB_USER
        echo "oarsh: exec failed" 1>&2
        exit 66
    else
        export OARDO_BECOME_USER=$OAR_JOB_USER
        export OARDO_USE_USER_SHELL=1
        umask "$OLDUMASK"
        exec $OARDIR/oardodo/oardodo "$@"
        echo "oarsh: exec failed" 1>&2
        exit 66
    fi
fi

echo "oarsh: Really bad error" 1>&2
exit 67
