#!/bin/bash
set -e
#
# HTCondor for Linux Quick Install script
#
# For the installation steps, including how to verify the contents of
# this file, see:
#
#   https://htcondor.readthedocs.io/en/latest/getting-htcondor/
#
# This script is meant for quick & easy install via:
#
#   $ sudo curl -fsSL https://get.htcondor.org | /bin/bash -s -- --no-dry-run
#

usage() {
    cat <<-EOF
Specify only one of the following:
--dist      : Display detected operating system and exit
-h, --help  : Display this message and exit
--download  : Download the tarball for a user-level installation and exit.
--dry-run   : Do not install, just print commands [default]
--no-dry-run: Issue all the commands neeed to install HTCondor

Installation options:
--channel NAME: Specify version channel to install; NAME can be
       feature: Most recent release with new features [default]
       lts    : Most recent release with only bug-fixes
--password: Set the password used to secure the pool.  Use only if
            nobody else can log into the machine.  Otherwise specify
            in the environment (GET_HTCONDOR_PASSWORD) or interactively.
--shared-filesystem-domain NAME: Specifies that machines installed with
                                 the same NAME share a filesystem.

Installation options, specify only one:
--minicondor: Install a complete stand-alone HTCondor.  [default]
--cm NAME   : Install as the central manager (CM) role.
--ap NAME   : Install as the access point (AP) role.
--ep NAME   : Install as the execution point (EP) role.
     The NAME of central manager's machine must be a DNS name resolvable
     on all the machines in the pool, or an IP address.
EOF
    exit 0
}

command_exists() {
    command -v "$@" > /dev/null 2>&1
}

is_download() {
    [[ $DOWNLOAD ]]
}

is_dry_run() {
    [[ $DRY_RUN ]]
}

is_display_dist() {
    [[ $DIST ]]
}

is_darwin() {
    case "$(uname -s)" in
        *darwin* ) true ;;
        *Darwin* ) true ;;
        * ) false;;
    esac
}


# Check if this is a forked Linux distro
check_forked() {

    if [ "$ID" = 'linuxmint' ]; then
        # Linux Mint is an Ubuntu fork
        ID='ubuntu'
        VERSION_CODENAME=$UBUNTU_CODENAME
        case $VERSION_CODENAME in
            focal) VERSION_ID=20.04;;
            jammy) VERSION_ID=22.04;;
            noble) VERSION_ID=24.04;;
        esac
    fi

    if is_display_dist; then
        cat <<-EOF
You're using '$ID' version '$VERSION_ID' codename '$VERSION_CODENAME'.
EOF
        exit 0
    fi
}


get_distribution() {
    if [[ $DISTRIBUTION ]]; then
        #
        # For testing purposes.  DISTRIBUTION is a space-separated tuple:
        #      ${ID} ${VERSION_ID} ${VERSION_CODENAME}
        # where $VERSION_CODENAME number is only set on Debian and Ubuntu.
        # Because the list is space-separated, it must be quoted (or escaped)
        # on the command-line.  The values generated by this script for the
        # supported platforms follow.
        #
        #     debian 12 bookworm
        #     debian 13 trixie
        #     ubuntu 22 jammy
        #     ubuntu 24 noble
        #     almalinux 8
        #     almalinux 9
        #     rocky 8
        #     rocky 9
        #     amzn 2023
        #     opensuse-leap 15
        #

        read -r ID VERSION_ID VERSION_CODENAME <<< "$DISTRIBUTION"
        return
    fi

    # Every Linux system that we officially support has /etc/os-release
    if [ -f /etc/os-release ]; then
        . /etc/os-release
    fi

    ARCH=$(arch)
    repo_arch='noarch'

    if [ "$ID" = 'almalinux' ]; then
        if rpm -qf /bin/sh | grep -q 'x86_64_v2'; then
            ARCH='x86_64_v2'
            repo_arch='x86_64_v2'
        fi
    fi

    check_forked

    VERSION_ID=${VERSION_ID%%.*}

    if [[ $SHOW_DISTRIBUTION ]]; then
        echo "${ID} ${VERSION_ID} ${VERSION_CODENAME}"
        exit 0
    fi
}

do_configure_firewall() {
    if command_exists firewall-cmd; then
        if firewall-cmd --state > /dev/null 2>&1; then
            # The firewall is running, let's configure it
            echo -e "\n# Open port 9618 for use by HTCondor"
            (
                if ! is_dry_run; then
                    set -x
                fi
                $sh_c "firewall-cmd --zone=public --add-port=9618/tcp --permanent"
                $sh_c "firewall-cmd --reload"
            )
        fi
    fi
}

do_start_service() {
    # Start HTCondor via systemd if detected, else via just condor_master
    if [ "$(ps --pid 1 -o comm -h)" != 'systemd' ] || ! command_exists systemctl; then
        echo -e "\n# Start the HTCondor service in the background (without systemd)"
        (
            if ! is_dry_run; then
                set -x
            fi
            $sh_c "condor_master"
        )
    else
        echo -e "\n# Start the HTCondor service via systemd"
        (
            if ! is_dry_run; then
                set -x
            fi
            $sh_c "systemctl enable condor"
            $sh_c "systemctl start condor"
        )
    fi
}

do_token_security() {
    # Remove the default 9.0 security configuration.  Arguably, the
    # get_htcondor metaknobs should instead check for SECURITY_MODEL == 9.0
    # and adjust accordingly, which might make microversion upgrades easier.
    $sh_c "rm -f /etc/condor/config.d/00-htcondor-9.0.config"

    # Ensure that the password directory exists, since the condor_master has
    # not yet been run.
    $sh_c "umask 0077; mkdir -p \$(condor_config_val SEC_PASSWORD_DIRECTORY)"

    # So 'echo' is a shell built-in, which means no command-line to show
    # up in 'ps' and leak the password.  You MUST do the 'echo' before the
    # '$sh_c', because the '$sh_c' itself will show up in 'ps'.
    # But if we're a dry run, then we need the $sh_c first.
    if is_dry_run; then
        $sh_c "echo -n \"${PASSWORD}\" | condor_store_cred add -c -i -"
    else
        echo -n "${PASSWORD}" | $sh_c "condor_store_cred add -c -i -"
    fi

    # Ensure that the tokens directory exists, since the condor_master has
    # not yet been run.
    $sh_c "umask 0077; mkdir -p \$(condor_config_val SEC_TOKEN_SYSTEM_DIRECTORY)"

    # Now issue myself a token.
    $sh_c "umask 0077; condor_token_create -identity condor@${CONDOR_HOST} > /etc/condor/tokens.d/condor@${CONDOR_HOST}"
}

do_install() {
    # Figure out the distribution and version we are running on.
    # This will set $ID and $VERSION_ID.
    get_distribution

    user=$(id -un 2>/dev/null || true)

    if is_dry_run; then
        sh_c="echo"
    elif [ "$user" != 'root' ]; then
        if command_exists sudo; then
            sh_c='sudo -E sh -c'
        elif command_exists su; then
            sh_c='su -c'
        else
            cat >&2 <<-'EOF'
Error: this installer needs the ability to run commands as root.
We are unable to find either "sudo" or "su" available to make this happen.
EOF
            exit 1
        fi
    fi

    case $ID in
        centos|rhel|almalinux|rocky|amzn|suse|opensuse-leap)
            repo_cmd="dnf --assumeyes"
            case $ID in
                suse|opensuse-leap)
                    repo_cmd="zypper --non-interactive"
                    ;;
            esac
    esac
    #
    # Check for a previous installation.  The instructions below remove
    # the HTCondor package(s) and whatever packages they depended on but
    # which no other package(s) did.  However, neither set of instructions
    # removes HTCondor's repositories, which will prevent downgrades.  I
    # thinks that's OK for now.
    #
    if [ -f "/etc/condor/condor_config" ]; then
        cat >&2 <<-'EOF'
Error: HTCondor appears to have been installed previously on this system.

You may update the existing install, or remove and then re-install.
EOF
        case $ID in
        ubuntu|debian)
            PACKAGE="condor"
            if [[ ! $ROLE ]]; then
                PACKAGE="minicondor"
            fi

            cat >&2 <<-EOF
To update: $sh_c "apt-get update && apt-get upgrade ${PACKAGE}"
To remove: $sh_c "apt-get -y remove --purge ${PACKAGE} && apt-get -y autoremove --purge && rm -fr /etc/condor"
EOF
            ;;
        centos|rhel|almalinux|rocky|amzn|suse|opensuse-leap)
            PACKAGE="condor"
            if [[ ! $ROLE ]]; then
                PACKAGE="minicondor"
            fi

            cat >&2 <<-EOF
To update: $sh_c "${repo_cmd} update ${PACKAGE}"
To remove: $sh_c "${repo_cmd} remove ${PACKAGE} && rm -fr /etc/condor"
EOF
            ;;
        esac
        exit 1
    fi


    #
    # Check for the pool password / default signing key before installing.
    #
    if [[ $ROLE ]]; then
        if [[ ! $PASSWORD ]]; then
            if ! is_dry_run; then
                echo -n "Enter a pool password: "
                read -r PASSWORD
            else
                PASSWORD="NONE"
                echo "SKIPPING PASSWORD PROMPT DURING DRY RUN"
            fi
        fi

        if [[ ! $PASSWORD ]]; then
            echo
            echo "You must set a password!  Did you replace \$htcondor_password with a password?"
            echo
            exit 1
        fi
    fi

    if [[ ! $ROLE ]]; then
        echo -e "\n# Installing mini HTCondor for $PRETTY_NAME"
    else
        echo -e "\n# Installing HTCondor as ${ROLE} for $PRETTY_NAME"
    fi

    # Run install binaries for each distro accordingly
    case $ID in
        ubuntu|debian)
            (
                if ! is_dry_run; then
                    set -x
                fi

                echo -e "\n# Adding our repository"
                apt_get='DEBIAN_FRONTEND=noninteractive apt-get'

                # Install our repository key.
                $sh_c "${apt_get} update"
                $sh_c "${apt_get} install -y curl gnupg"
                $sh_c "mkdir -p /etc/apt/keyrings"
                $sh_c "curl -fsSL ${DOWNLOAD_URL}/repo/keys/HTCondor-${CHANNEL_DIR}-Key -o /etc/apt/keyrings/htcondor.asc"

                # Our repository redirects to https even if you try to
                # avoid it, and Debian 9 doesn't come with this by default.
                $sh_c "${apt_get} install apt-transport-https"

                # Add our repository.
                HTCONDOR_LIST=/etc/apt/sources.list.d/htcondor.list
                $sh_c "curl -fsSL ${DOWNLOAD_URL}/repo/${ID}/htcondor-${CHANNEL_DIR}-${VERSION_CODENAME}.list -o ${HTCONDOR_LIST}"
                if [ "${REPO_SUFFIX}" ]; then
                    sed -i "/$REPO_SUFFIX/s/^#//" ${HTCONDOR_LIST}
                fi

                echo -e "\n# Updating package lists"
                $sh_c "${apt_get} update"

                echo -e "\n# Installing ps"
                $sh_c "${apt_get} install -y procps"

                # In the Ubuntu 20.04 Docker container, the following packages
                # need to be installed non-interactively, because the pipe
                # out of curl eats the tty they'd like to use:
                #   keyboard-configuration
                #   console-setup
                #   tzdata
                # apt-get will succeed if these packages are already installed,
                # but for now let's not risk screwing things up by adding them
                # on the platforms that don't need them.
                case $ID.$VERSION_CODENAME in
                    ubuntu.bionic|ubuntu.focal|ubuntu.jammy|ubuntu.noble)
                        echo -e "\n# Preconfiguring packages for Ubuntu."
                        $sh_c "${apt_get} install -y keyboard-configuration console-setup tzdata"
                    ;;
                esac

                echo -e "\n# Installing HTCondor binaries and dependencies"
                if [[ ! $ROLE ]]; then
                    $sh_c "${apt_get} install -y minicondor"
                else
                    $sh_c "${apt_get} install -y condor"
                fi
            )
            ;;
        centos|rhel|almalinux|rocky|amzn|suse|opensuse-leap)
            (
            if ! is_dry_run; then
                set -x
            fi

            # Some distros don't include ps by default.
            case $ID in
            amzn|almalinux|rocky)
                echo -e "\n# Installing ps"
                $sh_c "${repo_cmd} install procps-ng"
                ;;
            suse|opensuse-leap)
                echo -e "\n# Installing ps"
                $sh_c "${repo_cmd} install procps"
                ;;
            esac

            repo_dist=$ID
            case $ID in
                centos|almalinux|rocky)
                    repo_dist="el"
                    echo -e "\n# Installing EPEL"
                    $sh_c "${repo_cmd} install epel-release || ${repo_cmd} reinstall epel-release"
                    ;;
                rhel)
                    repo_dist="el"
                    echo -e "\n# Installing EPEL"
                    $sh_c "${repo_cmd} install https://dl.fedoraproject.org/pub/epel/epel-release-latest-${VERSION_ID}.noarch.rpm || ${repo_cmd} reinstall https://dl.fedoraproject.org/pub/epel/epel-release-latest-${VERSION_ID}.noarch.rpm"
                    ;;
                suse|opensuse-leap)
                    repo_dist="leap"
                    ;;
            esac
            repo_suffix="${repo_dist}${VERSION_ID}"
            repo_rpm="${DOWNLOAD_URL}/repo/${CHANNEL_DIR}/htcondor-release-current.${repo_suffix}.${repo_arch}.rpm"

            # Just to make our lives harder, the packages we need from
            # EPEL depend on repositories which are disabled by default,
            # and those repositories and how to enable them between
            # RHEL and Rocky and between version 7 and 8.
            #
            case ${ID}.${VERSION_ID} in
                almalinux.8|rocky.8 ) $sh_c "dnf -y install dnf-plugins-core"
                                 $sh_c "dnf config-manager --set-enabled PowerTools || dnf config-manager --set-enabled powertools"
                                 ;;
                almalinux.9|almalinux.10|rocky.9|rocky.10 ) $sh_c "dnf -y install dnf-plugins-core"
                                 $sh_c "dnf config-manager --set-enabled crb"
                                 ;;
                # subscription-manager only works if you've already
                # "register"ed and "attach"ed the system.
                rhel.7 ) $sh_c "subscription-manager repos --enable \"rhel-*-optional-rpms\" --enable \"rhel-*-extras-rpms\" --enable \"rhel-ha-for-rhel-*-server-rpms\""
                         ;;
                rhel.8 )
                         $sh_c "subscription-manager repos --enable \"codeready-builder-for-rhel-8-${ARCH}-rpms\""
                         ;;
                rhel.9 )
                         $sh_c "subscription-manager repos --enable \"codeready-builder-for-rhel-9-${ARCH}-rpms\""
                         ;;
                rhel.10 )
                         $sh_c "subscription-manager repos --enable \"codeready-builder-for-rhel-10-${ARCH}-rpms\""
                         ;;
            esac

            echo -e "\n# Installing the HTCondor repo"
            case $ID in
                suse|opensuse-leap)
                    # There is a way to setup SUSE repos where you can install the repo
                    # with a single zypper command. But, I don't know how to do that (yet)
                    $sh_c "${repo_cmd} --no-gpg-checks install ${repo_rpm} || ${repo_cmd} reinstall ${repo_rpm}"
                    ;;
                *)
                    $sh_c "${repo_cmd} install ${repo_rpm} || ${repo_cmd} reinstall ${repo_rpm}"
                    ;;
            esac

            # Needed for SUSE, limits chattyness for other RPM distros
            echo -e "\n# Importing RPM keys"
            for key in /etc/pki/rpm-gpg/RPM-GPG-KEY-HTCondor-*; do
                $sh_c "rpmkeys --import $key"
            done

            echo -e "\n# Installing HTCondor"
            if [[ ! $ROLE ]]; then
                $sh_c "${repo_cmd} install ${YUM_REPO} minicondor"
            else
                $sh_c "${repo_cmd} install ${YUM_REPO} condor"
            fi
            )
            ;;
        *)
            if [[ ! $ID ]]; then
                if is_darwin; then
                    echo
                    echo "Error: Unsupported operating system 'macOS'"
                    echo
                    exit 1
                fi
            fi
            echo
            echo "Error: Unsupported distribution '$NAME'"
            echo
            exit 1
            ;;
    esac

    if [[ $SHARED_FS_DOMAIN ]]; then
        DOMAIN_CONFIG=$(cat <<-EOF
# HTCondor assumes that any two machines with the same value
# for this variable have the same shared filesystems.  Shared
# filesystems tend not to scale as well as you would like, but
# they do make it simpler to explain how a job accesses its file.
FILESYSTEM_DOMAIN = ${SHARED_FS_DOMAIN}

# If your jobs are accessing a shared filesystem, they probably
# need to be run as the user who submitted them (as opposed to
# user nobody or a local user which only runs batch jobs).  This
# variable must be set to the same thing on the submit machine
# and on the execute machine to do this.
UID_DOMAIN = ${SHARED_FS_DOMAIN}

# The UID_DOMAIN must normally be a suffix of the fully-qualified
# DNS name of the submit machine (as determined by a reverse
# lookup of the IP address used to contact the execute machine).
# Setting this variable relaxes that requirement, which is safe
# to do for this configuration because only submit machines you
# trust can contact your execute machines.
TRUST_UID_DOMAIN = TRUE

# Normally, before running a job as a particular user (that is,
# not as user nobody), HTCondor checks to make sure that user
# is in the password file.  Not all methods for sharing UIDs
# across machines store every user in every password file (for
# example, LDAP does not).  Setting this this variable relaxes
# this requirement.
SOFT_UID_DOMAIN = TRUE

EOF
        )
    fi

    # Configure the role, if any.
    echo -e "\n# Configuring role, if any ..."
    case $ROLE in
        "central manager")
            CONFIG_FILE="/etc/condor/config.d/01-central-manager.config"
            $sh_c "echo CONDOR_HOST = ${CONDOR_HOST} > ${CONFIG_FILE}"
            $sh_c "echo '# For details, run condor_config_val use role:get_htcondor_central_manager' >> ${CONFIG_FILE}"
            $sh_c "echo 'use role:get_htcondor_central_manager' >> ${CONFIG_FILE}"

            do_token_security
            ;;

        "submit")
            CONFIG_FILE="/etc/condor/config.d/01-submit.config"
            $sh_c "echo CONDOR_HOST = ${CONDOR_HOST} > ${CONFIG_FILE}"
            $sh_c "echo '# For details, run condor_config_val use role:get_htcondor_submit' >> ${CONFIG_FILE}"
            $sh_c "echo 'use role:get_htcondor_submit' >> ${CONFIG_FILE}"

            if [[ ${SHARED_FS_DOMAIN} ]]; then
                $sh_c "echo '${DOMAIN_CONFIG}' >> ${CONFIG_FILE}"
            fi

            do_token_security
            ;;

        "execute")
            CONFIG_FILE="/etc/condor/config.d/01-execute.config"
            $sh_c "echo CONDOR_HOST = ${CONDOR_HOST} > ${CONFIG_FILE}"
            $sh_c "echo '# For details, run condor_config_val use role:get_htcondor_execute' >> ${CONFIG_FILE}"
            $sh_c "echo 'use role:get_htcondor_execute' >> ${CONFIG_FILE}"

            if [[ "${SHARED_FS_DOMAIN}" ]]; then
                $sh_c "echo '${DOMAIN_CONFIG}' >> ${CONFIG_FILE}"
            fi

            do_token_security
            ;;

        *)
            # The mini[ht]condor package has done everything for us.
            ;;

    esac

    # Open port 9618 on system firewall
    do_configure_firewall

    # Finally, start the HTCondor service
    do_start_service

    echo
}

do_download() {
    get_distribution

    TARBALL_BASE_URL="${DOWNLOAD_URL}/tarball"
    TARBALL_DIR_URL="${TARBALL_BASE_URL}/${CHANNEL_DIR}/current"

    OS_VERSION=$VERSION_ID
    case $ID in
        # We don't release HTCondor for Fedora.
        # fedora)
        #     ;;
        ubuntu)
            OS_NAME="Ubuntu"
            ;;
        debian)
            OS_NAME="Debian"
            ;;
        almalinux|centos|rhel|rocky)
            OS_NAME="AlmaLinux"
            ;;
        amzn)
            OS_NAME="AmazonLinux"
            ;;
        suse|opensuse-leap)
            OS_NAME="openSUSE"
            ;;
        *)
            if is_darwin; then
                OS_NAME=macOS
                OS_VERSION=13
                ARCH=x86_64
            else
                echo
                echo "Error: Unsupported distribution '$NAME'"
                echo
                exit 1
            fi
            ;;
    esac

    TARBALL_NAME="condor-${ARCH}_${OS_NAME}${OS_VERSION}-stripped.tar.gz"
    TARBALL_URL="${TARBALL_DIR_URL}/${TARBALL_NAME}"

    echo "Downloading to condor.tar.gz..."
    curl -fSL "${TARBALL_URL}" -o condor.tar.gz
}

# Set global defaults
CHANNEL="feature"
DOWNLOAD_URL="https://htcss-downloads.chtc.wisc.edu"
DRY_RUN=1
sh_c='sh -c'
unset DIST
PASSWORD=$GET_HTCONDOR_PASSWORD

# Process command-line options
while [ $# -gt 0 ]; do
    case $1 in
        --show-distribution)
            SHOW_DISTRIBUTION=TRUE
            ;;
        --distribution)
            DISTRIBUTION=$2
            if [[ ! $DISTRIBUTION ]]; then
                echo "--distribution requires an argument"
                exit 1
            fi
            shift
            ;;
        --password)
            PASSWORD=$2
            if [[ ! $PASSWORD ]]; then
                echo "--password requires an argument"
                exit 1
            fi
            shift
            ;;
        --shared-filesystem-domain)
            SHARED_FS_DOMAIN=$2
            if [[ ! $SHARED_FS_DOMAIN ]]; then
                echo "--shared-filesystem-domain requires an argument"
                exit 1
            fi
            shift
            ;;
        --minicondor)
            unset ROLE
            ;;
        --cm|--central-manager)
            ROLE="central manager"
            CONDOR_HOST=$2
            if [[ ! $CONDOR_HOST ]]; then
                echo "--central-manager requires its external name as an argument"
                exit 1
            fi
            shift
            ;;
        --ap|--submit)
            ROLE="submit"
            CONDOR_HOST=$2
            if [[ ! $CONDOR_HOST ]]; then
                echo "--submit requires the central manager as an argument"
                exit 1
            fi
            shift
            ;;
        --ep|--execute)
            ROLE="execute"
            CONDOR_HOST=$2
            if [[ ! $CONDOR_HOST ]]; then
                echo "--execute requires the central manager as an argument"
                exit 1
            fi
            shift
            ;;
        --repo-suffix)
            REPO_SUFFIX=$2
            if [[ ! $REPO_SUFFIX ]]; then
                echo "--repo-suffix requires a repo-suffix as an argument"
                exit 1
            fi
            YUM_REPO=--enablerepo=htcondor-${REPO_SUFFIX}
            shift
            ;;
        --channel)
            case $2 in
                lts|stable)
                    CHANNEL="lts"
                    ;;
                feature|current|latest|developer)
                    CHANNEL="feature"
                    ;;
                *)
                    echo "Illegal option $2 for --channel"
                    echo "Run $0 --help for usage"
                    exit 1
                    ;;
            esac
            shift
            ;;
        --no-dry-run)
            unset DRY_RUN
            ;;
        --dry-run)
            DRY_RUN=1
            ;;
        --dist)
            DIST=1
            ;;
        --download)
            DOWNLOAD=1
            ;;
        --help|-h)
            usage
            ;;
        *)
            echo "Illegal option $1"
            echo "Run $0 --help for usage"
            exit 1
            ;;
    esac
    shift
done

# We have to update this once every LTS release; we don't want
# to update a symlink, like we do for 'feature', because we don't
# want to surprise other lts-channel users with new features.
if [ "${CHANNEL}" = "lts" ]; then
    CHANNEL_DIR=25.0
elif [ "${CHANNEL}" = "feature" ]; then
    CHANNEL_DIR=25.x
fi

# wrapped up in functions so that we have some protection against only getting
# half the file during "curl | sh"

if is_download; then
    do_download
else
    do_install
fi

exit 0
