#!/bin/bash set -euo pipefail shopt -s nullglob ABSOLUTE_SCRIPT_PATH="$(realpath "${BASH_SOURCE[0]}")" SCRIPT_NAME=$(basename "${ABSOLUTE_SCRIPT_PATH}") SCRIPT_DIR_PATH="$(dirname "${ABSOLUTE_SCRIPT_PATH}")" USAGE="WARNING: EXPERIMENTAL AND WORK IN PROGRESS, USE ONLY FOR TESTING! TODO: add version string Usage: ./${SCRIPT_NAME} COMMAND [ARG] TODO: complete writing usage " JAILS_DIR_PATH='jails' JAIL_ROOTFS_NAME='rootfs' JAIL_CONFIG_NAME='config' error() { echo -e "${1}" >&2 } fail() { error "${1}" && exit 1 } [[ $UID -ne 0 ]] && echo "${USAGE}" && fail "Run this script as root..." trace() { # https://unix.stackexchange.com/a/504829/477308 echo 'Error occurred:' awk 'NR>L-4 && NR>>":""),$0 }' L="${1}" "${ABSOLUTE_SCRIPT_PATH}" } # Trap errors trap 'trace $LINENO' ERR ##################### # START FUNCTIONALITY ##################### start_jail() { [[ -z "${1}" ]] && fail "Please specify the name of the jail to start." local jail_name="${1}" local jail_path="${JAILS_DIR_PATH}/${jail_name}" local jail_config_path="${jail_path}/${JAIL_CONFIG_NAME}" ! [[ -f "${jail_config_path}" ]] && fail "ERROR: Couldn't find: ${jail_config_path}." echo 'Loading config...' local key value while read -r line || [ -n "$line" ]; do key="${line%%=*}" value="${line#*=}" case "${key}" in "DOCKER_COMPATIBLE") local docker_compatible="${value}" ;; "GPU_PASSTHROUGH") local gpu_passthrough="${value}" ;; "SYSTEMD_NSPAWN_USER_ARGS") local systemd_nspawn_user_args="${value}" ;; "SYSTEMD_RUN_DEFAULT_ARGS") local systemd_run_default_args="${value}" ;; "SYSTEMD_NSPAWN_DEFAULT_ARGS") local systemd_nspawn_default_args="${value}" ;; esac done <"${jail_config_path}" echo 'Config loaded!' local systemd_run_additional_args=("--unit=jlmkr-${jail_name}" "--working-directory=./${jail_path}") local systemd_nspawn_additional_args=("--machine=${jail_name}" "--directory=${JAIL_ROOTFS_NAME}") if [[ "${docker_compatible}" -eq 1 ]]; then # Enable ip forwarding on the host (docker needs it) echo 1 >/proc/sys/net/ipv4/ip_forward # To properly run docker inside the jail, we need to lift restrictions # Without DevicePolicy=auto images with device nodes may not be pulled # https://github.com/kinvolk/kube-spawn/pull/328 systemd_run_additional_args+=(--setenv=SYSTEMD_SECCOMP=0 --property=DevicePolicy=auto) # Add additional flags required for docker systemd_nspawn_additional_args+=(--capability=all "--system-call-filter=add_key keyctl bpf") fi if [[ "${gpu_passthrough}" -eq 1 ]]; then systemd_nspawn_additional_args+=("--property=DeviceAllow=char-drm rw") # Detect intel GPU device and if present add bind flag [[ -d /dev/dri ]] && systemd_nspawn_additional_args+=(--bind=/dev/dri) # Detect nvidia GPU if [[ -d /dev/nvidia ]]; then # Mount the nvidia driver files, so we are always in sync with the host while read -r line; do if [[ "${line}" == /dev/* ]]; then systemd_nspawn_additional_args+=("--bind='${line}'") else systemd_nspawn_additional_args+=("--bind-ro='${line}'") fi done < <(nvidia-container-cli list) fi fi local args=() # Build the array of arguments local arg # Append each argument, one at a time, to the array while read -r arg; do args+=("${arg}"); done < <(printf '%s' "${systemd_run_default_args}" | xargs -n 1) # Append each element in systemd_run_additional_args to the args array args+=("${systemd_run_additional_args[@]}") # Add two more args to the array args+=(-- systemd-nspawn) # Append each argument, one at a time, to the array while read -r arg; do args+=("${arg}"); done < <(printf '%s' "${systemd_nspawn_default_args}" | xargs -n 1) # Append each element in systemd_nspawn_additional_args to the args array args+=("${systemd_nspawn_additional_args[@]}") # Append each argument, one at a time, to the array while read -r arg; do args+=("${arg}"); done < <(printf '%s' "${systemd_nspawn_user_args}" | xargs -n 1) # Concat all arguments in the array into a single space separated string, # but use %q to output each argument in a format that can be reused as shell input # This escapes special characters for us, which were 'lost' when xargs read the input above # https://ss64.com/bash/printf.html args_string="$(printf '%q ' "${args[@]}")" echo echo "All the arguments to pass to systemd-run:" printf '%s' "${args_string}" | xargs -n 1 echo echo "Starting jail with the following command:" echo echo "systemd-run ${args_string}" echo printf '%s' "${args_string}" | xargs systemd-run echo echo "Check logging:" echo "journalctl -u jlmkr-${jail_name}" echo echo "Check status:" echo "systemctl status jlmkr-${jail_name}" echo echo "Stop the jail:" echo "machinectl stop ${jail_name}" echo echo "Get a shell:" echo "machinectl shell ${jail_name}" } ###################### # CREATE FUNCTIONALITY ###################### cleanup() { # Remove the jail_path if it's a directory local jail_path="${1}" [[ -d "${jail_path}" ]] && echo -e "\n\nCleaning up: ${jail_path}\n" && rm -rf "${jail_path}" } stat_chmod() { # Only run chmod if mode is different from current mode if [[ "$(stat -c%a "${2}")" -ne "${1}" ]]; then chmod "${1}" "${2}"; fi } validate_download_script() { echo "6cca2eda73c7358c232fecb4e750b3bf0afa9636efb5de6a9517b7df78be12a4 ${1}" | sha256sum --check >/dev/null } create_jail() { # TODO: show disclaimer local arch arch="$(dpkg --print-architecture)" local lxc_dir_path='.lxc' local lxc_cache_path="${lxc_dir_path}/cache" local lxc_download_script_path="${lxc_dir_path}/lxc-download.sh" [[ "$(basename "${SCRIPT_DIR_PATH}")" != 'jailmaker' ]] && { error "${SCRIPT_NAME} needs to create files." error "Currently it can't decide if it's safe to create files in:" error "${SCRIPT_DIR_PATH}" fail "Please create a dedicated directory called 'jailmaker', store ${SCRIPT_NAME} there and try again." } local reply if [[ $(findmnt --target . --output TARGET --noheadings --first-only) != /mnt/* ]]; then echo "${SCRIPT_NAME} should be on a pool mounted under /mnt (it currently isn't)." echo "Storing it on the boot-pool means losing all jails when updating TrueNAS." echo "If you continue, jails will be stored under:" echo "${SCRIPT_DIR_PATH}" read -p "Do you wish to ignore this warning and continue? [y/N] " -n 1 -r reply && echo # Enter accepts default (no) ! [[ "${reply}" =~ ^[Yy]$ ]] && exit fi cd "${SCRIPT_DIR_PATH}" || fail "Could not change working directory to ${SCRIPT_DIR_PATH}..." # Set appropriate permissions (if not already set) for this file, since it's executed as root stat_chmod 700 "${SCRIPT_NAME}" # Create the lxc dirs if nonexistent mkdir -p "${lxc_dir_path}" stat_chmod 700 "${lxc_dir_path}" mkdir -p "${lxc_cache_path}" stat_chmod 700 "${lxc_cache_path}" # Create the dir where to store the jails mkdir -p "${JAILS_DIR_PATH}" stat_chmod 700 "${JAILS_DIR_PATH}" # Fetch the lxc download script if not present locally (or hash doesn't match) if ! validate_download_script "${lxc_download_script_path}"; then curl -fSL https://raw.githubusercontent.com/Jip-Hop/lxc/58520263041b6864cadad96278848f9b8ce78ee9/templates/lxc-download.in -o "${lxc_download_script_path}" # Validate after download to prevent executing arbitrary code as root validate_download_script "${lxc_download_script_path}" || fail 'Abort! Downloaded script has unexpected contents.' fi stat_chmod 700 "${lxc_download_script_path}" local distro release read -p "Install the recommended distro (Debian 11)? [Y/n] " -n 1 -r reply && echo if [[ "${reply}" =~ ^([Yy]|)$ ]]; then distro='debian' release='bullseye' else echo echo "ADVANCED USAGE" echo "You may now choose from a list which distro to install." echo "Not all of them will work with ${SCRIPT_NAME} (these images are made for LXC)." echo "Distros based on systemd probably work (e.g. Ubuntu, Arch Linux and Rocky Linux)." echo "Others (Alpine, Devuan, Void Linux) probably won't." echo read -p "Press any key to continue: " -n 1 -r reply && echo lxc_cache_path=${lxc_cache_path} "${lxc_download_script_path}" --list --arch="${arch}" || : echo "Choose from the DIST column." read -e -r -p "Distribution: " distro && echo echo "Choose from the RELEASE column (or ARCH if RELEASE is empty)." read -e -r -p "Release: " release && echo fi local jail_name jail_path while true; do read -e -r -p "Enter jail name: " jail_name && echo if ! [[ "${jail_name}" =~ ^[.a-zA-Z0-9-]{1,64}$ && "${jail_name}" != '.'* && "${jail_name}" != *'.' && "${jail_name}" != *'..'* ]]; then cat <<-'EOF' A valid name consists of: - allowed characters (alphanumeric, dash, dot) - no leading or trailing dots - no sequences of multiple dots - max 64 characters EOF else jail_path="${JAILS_DIR_PATH}/${jail_name}" if [[ -e "${jail_path}" ]]; then echo "A jail with this name already exists." echo else # Accept the name break fi fi done # Cleanup on exit, but only once the jail_path is final # Otherwise we may cleanup the wrong directory trap 'cleanup "${jail_path}"' EXIT local docker_compatible gpu_passthrough systemd_nspawn_user_args echo "${SCRIPT_NAME} will not install docker for you." echo "But it can configure the jail with the capabilities required to run docker." echo "You can turn docker_compatible mode on/off post-install." echo read -p "Make jail docker compatible right now? [y/N] " -n 1 -r reply && echo # Enter accepts default (no) if ! [[ "${reply}" =~ ^[Yy]$ ]]; then docker_compatible=0; else docker_compatible=1; fi read -p "Give access to the GPU inside the jail? [y/N] " -n 1 -r reply && echo # Enter accepts default (no) if ! [[ "${reply}" =~ ^[Yy]$ ]]; then gpu_passthrough=0; else gpu_passthrough=1; fi # TODO: ask to show nspawn manual echo echo "You may pass additional systemd-nspawn flags." echo "For example to mount directories inside the jail you may add:" echo "--bind=/mnt/a/readwrite/directory --bind-ro=/mnt/a/readonly/directory" echo echo "Double check the syntax:" echo "https://manpages.debian.org/bullseye/systemd-container/systemd-nspawn.1.en.html" echo "With incorrect flags the jail may not start." echo "It's possible to correct/add/remove flags post-install." echo read -e -r -p "Additional flags: " systemd_nspawn_user_args && echo # Backslashes and colons need to be escaped for systemd-nspawn by the user: # e.g. to bind mount a file called: # weird chars :?\" # the corresponding command would be: # --bind-ro='/mnt/data/weird chars \:?\\"' # Create directory for rootfs JAIL_ROOTFS_PATH="${jail_path}/${JAIL_ROOTFS_NAME}" mkdir -p "${JAIL_ROOTFS_PATH}" local jail_config_path="${jail_path}/${JAIL_CONFIG_NAME}" # LXC download script needs to write to this file during install # but we don't need it so we will remove it later touch "${jail_config_path}" echo LXC_CACHE_PATH=${lxc_cache_path} "${lxc_download_script_path}" \ --name="${jail_name}" --path="${jail_path}" --rootfs="${JAIL_ROOTFS_PATH}" \ --arch="${arch}" --dist="${distro}" --release="${release}" || fail "Aborted creating rootfs..." echo if [[ "$(basename "$(readlink -f "${JAIL_ROOTFS_PATH}/sbin/init")")" != systemd ]]; then echo "Chosen distro appears not to use systemd..." echo echo "You probably won't get a shell with:" echo "machinectl shell ${jail_name}" echo echo "You may get a shell with this command:" # About nsenter: # https://github.com/systemd/systemd/issues/12785#issuecomment-503019081 # https://github.com/systemd/systemd/issues/3144 # shellcheck disable=SC2016 echo 'nsenter -t $(machinectl show '"${jail_name}"' -p Leader --value) -a /bin/sh -l' echo echo "Using this distro with ${SCRIPT_NAME} is not recommended." echo read -p "Abort creating jail? [Y/n] " -n 1 -r reply && echo # Enter accepts default (yes) [[ "${reply}" =~ ^([Yy]|)$ ]] && exit fi # Config which systemd handles for us rm -f "${JAIL_ROOTFS_PATH}/etc/machine-id" rm -f "${JAIL_ROOTFS_PATH}/etc/resolv.conf" rm -f "${JAIL_ROOTFS_PATH}/etc/resolv.conf" # https://github.com/systemd/systemd/issues/852 printf 'pts/%d\n' $(seq 0 10) >"${JAIL_ROOTFS_PATH}/etc/securetty" # Use mostly default settings for systemd-nspawn but with systemd-run instead of a service file # https://github.com/systemd/systemd/blob/main/units/systemd-nspawn%40.service.in # TODO: also compare settings for docker: https://github.com/docker/engine/blob/master/contrib/init/systemd/docker.service local systemd_run_default_args=(--property=KillMode=mixed --property=Type=notify --property=RestartForceExitStatus=133 --property=SuccessExitStatus=133 --property=Delegate=yes --property=TasksMax=16384 --collect --setenv=SYSTEMD_NSPAWN_LOCK=0 "--description='This systemd-nspawn jail was created with jailmaker'") local systemd_nspawn_default_args=(--keep-unit --quiet --boot) { echo "DOCKER_COMPATIBLE=${docker_compatible}" echo "GPU_PASSTHROUGH=${gpu_passthrough}" echo "SYSTEMD_NSPAWN_USER_ARGS=${systemd_nspawn_user_args}" echo echo "# You generally won't need to change the options below" echo "SYSTEMD_RUN_DEFAULT_ARGS=${systemd_run_default_args[*]}" echo "SYSTEMD_NSPAWN_DEFAULT_ARGS=${systemd_nspawn_default_args[*]}" } >"${jail_config_path}" chmod 700 "${jail_config_path}" # Remove the cleanup trap on exit trap - EXIT echo "Done creating the jail." echo read -p "Start the jail now? [Y/n] " -n 1 -r reply && echo # Enter accepts default (yes) if [[ "${reply}" =~ ^([Yy]|)$ ]]; then start_jail "${jail_name}" else echo 'Skipped starting jail.' fi } ####################### # COMMAND LINE HANDLING ####################### # TODO document # machinectl shell # If that doesn't work try # machinectl login # But since there's no root password set, that won't work either # So you'd have to get a shell via # nsenter -t $(machinectl show alpine -p Leader --value) -a /bin/sh -l # Then set a root password via passwd # Then you may login via # machinectl login # TODO: recommend ssh ;) # TODO: create a jlmkr shell command to try the above in case machinectl shell doesn't work # TODO: document journalctl -u jlmkr-jailname case "${1-""}" in '') read -p "Create a new jail? [Y/n] " -n 1 -r reply && echo # Enter accepts default (yes) # https://stackoverflow.com/a/1885534 if [[ "${reply}" =~ ^([Yy]|)$ ]]; then create_jail else echo "${USAGE}" fi ;; create) create_jail ;; start) start_jail "${2-""}" ;; *) echo "${USAGE}" ;; esac