jailmaker/jlmkr.py

#!/usr/bin/env python3

"""Create persistent Linux 'jails' on TrueNAS SCALE, \
with full access to all files via bind mounts, \
thanks to systemd-nspawn!"""

__version__ = "2.1.0"
__author__ = "Jip-Hop"
__copyright__ = "Copyright (C) 2023, Jip-Hop"
__license__ = "LGPL-3.0-only"
__disclaimer__ = """USE THIS SCRIPT AT YOUR OWN RISK!
IT COMES WITHOUT WARRANTY AND IS NOT SUPPORTED BY IXSYSTEMS."""

import argparse
import configparser
import contextlib
import hashlib
import io
import json
import os
import platform
import re
import shlex
import shutil
import stat
import subprocess
import sys
import tempfile
import time
import urllib.request
from collections import defaultdict
from inspect import cleandoc
from pathlib import Path, PurePath
from textwrap import dedent

DEFAULT_CONFIG = """startup=0
gpu_passthrough_intel=0
gpu_passthrough_nvidia=0
# Turning off seccomp filtering improves performance at the expense of security
seccomp=1

# Below you may add additional systemd-nspawn flags behind systemd_nspawn_user_args=
# To mount host storage in the jail, you may add: --bind='/mnt/pool/dataset:/home'
# To readonly mount host storage, you may add: --bind-ro=/etc/certificates
# To use macvlan networking add: --network-macvlan=eno1 --resolv-conf=bind-host
# To use bridge networking add: --network-bridge=br1 --resolv-conf=bind-host
# Ensure to change eno1/br1 to the interface name you want to use
# To allow syscalls required by docker add: --system-call-filter='add_key keyctl bpf'
systemd_nspawn_user_args=

# Specify command/script to run on the HOST before starting the jail
# For example to load kernel modules and config kernel settings
pre_start_hook=
# pre_start_hook=#!/usr/bin/bash
#     set -euo pipefail
#     echo 'PRE_START_HOOK_EXAMPLE'
#     echo 1 > /proc/sys/net/ipv4/ip_forward
#     modprobe br_netfilter
#     echo 1 > /proc/sys/net/bridge/bridge-nf-call-iptables
#     echo 1 > /proc/sys/net/bridge/bridge-nf-call-ip6tables

# Specify command/script to run on the HOST after starting the jail
# For example to attach to multiple bridge interfaces 
# when using --network-veth-extra=ve-myjail-1:veth1
post_start_hook=
# post_start_hook=#!/usr/bin/bash
#     set -euo pipefail
#     echo 'POST_START_HOOK_EXAMPLE'
#     ip link set dev ve-myjail-1 master br2
#     ip link set dev ve-myjail-1 up

# Specify a command/script to run on the HOST after stopping the jail
post_stop_hook=
# post_stop_hook=echo 'POST_STOP_HOOK_EXAMPLE'

# Only used while creating the jail
distro=debian
release=bookworm

# Specify command/script to run IN THE JAIL on the first start (once networking is ready in the jail)
# Useful to install packages on top of the base rootfs
initial_setup=
# initial_setup=bash -c 'apt-get update && apt-get -y upgrade'

# Usually no need to change systemd_run_default_args
systemd_run_default_args=--collect
    --property=Delegate=yes
    --property=RestartForceExitStatus=133
    --property=SuccessExitStatus=133
    --property=TasksMax=infinity
    --property=Type=notify
    --setenv=SYSTEMD_NSPAWN_LOCK=0
    --property=KillMode=mixed

# Usually no need to change systemd_nspawn_default_args
systemd_nspawn_default_args=--bind-ro=/sys/module
    --boot
    --inaccessible=/sys/module/apparmor
    --quiet
    --keep-unit"""

# Use mostly default settings for systemd-nspawn but with systemd-run instead of a service file:
# https://github.com/systemd/systemd/blob/main/units/systemd-nspawn%40.service.in
# Use TasksMax=infinity since this is what docker does:
# https://github.com/docker/engine/blob/master/contrib/init/systemd/docker.service

# Use SYSTEMD_NSPAWN_LOCK=0: otherwise jail won't start jail after a shutdown (but why?)
# Would give "directory tree currently busy" error and I'd have to run
# `rm /run/systemd/nspawn/locks/*` and remove the .lck file from jail_path
# Disabling locking isn't a big deal as systemd-nspawn will prevent starting a container
# with the same name anyway: as long as we're starting jails using this script,
# it won't be possible to start the same jail twice

# Always add --bind-ro=/sys/module to make lsmod happy
# https://manpages.debian.org/bookworm/manpages/sysfs.5.en.html

DOWNLOAD_SCRIPT_DIGEST = (
    "cfcb5d08b24187d108f2ab0d21a6cc4b73dcd7f5d7dfc80803bfd7f1642d638d"
)
SCRIPT_PATH = os.path.realpath(__file__)
SCRIPT_NAME = os.path.basename(SCRIPT_PATH)
SCRIPT_DIR_PATH = os.path.dirname(SCRIPT_PATH)
COMMAND_NAME = os.path.basename(__file__)
JAILS_DIR_PATH = os.path.join(SCRIPT_DIR_PATH, "jails")
JAIL_CONFIG_NAME = "config"
JAIL_ROOTFS_NAME = "rootfs"
SHORTNAME = "jlmkr"

# Only set a color if we have an interactive tty
if sys.stdout.isatty():
    BOLD = "\033[1m"
    RED = "\033[91m"
    YELLOW = "\033[93m"
    UNDERLINE = "\033[4m"
    NORMAL = "\033[0m"
else:
    BOLD = RED = YELLOW = UNDERLINE = NORMAL = ""

DISCLAIMER = f"""{YELLOW}{BOLD}{__disclaimer__}{NORMAL}"""

# Used in parser getters to indicate the default behavior when a specific
# option is not found. Created to enable `None` as a valid fallback value.
_UNSET = object()


class KeyValueParser(configparser.ConfigParser):
    """Simple comment preserving parser based on ConfigParser.
    Reads a file containing key/value pairs and/or comments.
    Values can span multiple lines, as long as they are indented
    deeper than the first line of the value. Comments or keys
    must NOT be indented.
    """

    def __init__(self, *args, **kwargs):
        # Set defaults if not specified by user
        if "interpolation" not in kwargs:
            kwargs["interpolation"] = None
        if "allow_no_value" not in kwargs:
            kwargs["allow_no_value"] = True
        if "comment_prefixes" not in kwargs:
            kwargs["comment_prefixes"] = "#"

        super().__init__(*args, **kwargs)

        # Backup _comment_prefixes
        self._comment_prefixes_backup = self._comment_prefixes
        # Unset _comment_prefixes so comments won't be skipped
        self._comment_prefixes = ()
        # Starting point for the comment IDs
        self._comment_id = 0
        # Default delimiter to use
        delimiter = self._delimiters[0]
        # Template to store comments as key value pair
        self._comment_template = "#{0} " + delimiter + " {1}"
        # Regex to match the comment prefix
        self._comment_regex = re.compile(
            r"^#\d+\s*" + re.escape(delimiter) + r"[^\S\n]*"
        )
        # Regex to match cosmetic newlines (skips newlines in multiline values):
        # consecutive whitespace from start of line followed by a line not starting with whitespace
        self._cosmetic_newlines_regex = re.compile(r"^(\s+)(?=^\S)", re.MULTILINE)
        # Dummy section name
        self._section_name = "a"

    def _find_cosmetic_newlines(self, text):
        # Indices of the lines containing cosmetic newlines
        cosmetic_newline_indices = set()
        for match in re.finditer(self._cosmetic_newlines_regex, text):
            start_index = text.count("\n", 0, match.start())
            end_index = start_index + text.count("\n", match.start(), match.end())
            cosmetic_newline_indices.update(range(start_index, end_index))

        return cosmetic_newline_indices

    # TODO: can I create a solution which not depends on the internal _read method?
    def _read(self, fp, fpname):
        lines = fp.readlines()
        cosmetic_newline_indices = self._find_cosmetic_newlines("".join(lines))
        # Preprocess config file to preserve comments
        for i, line in enumerate(lines):
            if i in cosmetic_newline_indices or line.startswith(
                self._comment_prefixes_backup
            ):
                # Store cosmetic newline or comment with unique key
                lines[i] = self._comment_template.format(self._comment_id, line)
                self._comment_id += 1

        # Convert to in-memory file and prepend a dummy section header
        lines = io.StringIO(f"[{self._section_name}]\n" + "".join(lines))
        # Feed preprocessed file to original _read method
        return super()._read(lines, fpname)

    def read_default_string(self, string, source="<string>"):
        # Ignore all comments when parsing default key/values
        string = "\n".join(
            [
                line
                for line in string.splitlines()
                if not line.startswith(self._comment_prefixes_backup)
            ]
        )
        # Feed preprocessed file to original _read method
        return super()._read(io.StringIO("[DEFAULT]\n" + string), source)

    def write(self, fp, space_around_delimiters=False):
        # Write the config to an in-memory file
        with io.StringIO() as sfile:
            super().write(sfile, space_around_delimiters)
            # Start from the beginning of sfile
            sfile.seek(0)

            line = sfile.readline()
            # Throw away lines until we reach the dummy section header
            while line.strip() != f"[{self._section_name}]":
                line = sfile.readline()

            lines = sfile.readlines()

        for i, line in enumerate(lines):
            # Remove the comment id prefix
            lines[i] = self._comment_regex.sub("", line, 1)

        fp.write("".join(lines).rstrip())

    # Set value for specified option key
    def my_set(self, option, value):
        if isinstance(value, bool):
            value = str(int(value))
        elif isinstance(value, list):
            value = str("\n    ".join(value))
        elif not isinstance(value, str):
            value = str(value)

        super().set(self._section_name, option, value)

    # Return value for specified option key
    def my_get(self, option, fallback=_UNSET):
        return super().get(self._section_name, option, fallback=fallback)

    # Return value converted to boolean for specified option key
    def my_getboolean(self, option, fallback=_UNSET):
        return super().getboolean(self._section_name, option, fallback=fallback)


class ExceptionWithParser(Exception):
    def __init__(self, parser, message):
        self.parser = parser
        self.message = message
        super().__init__(message)


# Workaround for exit_on_error=False not applying to:
# "error: the following arguments are required"
# https://github.com/python/cpython/issues/103498
class CustomSubParser(argparse.ArgumentParser):
    def error(self, message):
        if self.exit_on_error:
            super().error(message)
        else:
            raise ExceptionWithParser(self, message)


class Chroot:
    def __init__(self, new_root):
        self.new_root = new_root
        self.old_root = None
        self.initial_cwd = None

    def __enter__(self):
        self.old_root = os.open("/", os.O_PATH)
        self.initial_cwd = os.path.abspath(os.getcwd())
        os.chdir(self.new_root)
        os.chroot(".")

    def __exit__(self, exc_type, exc_value, traceback):
        os.chdir(self.old_root)
        os.chroot(".")
        os.close(self.old_root)
        os.chdir(self.initial_cwd)


def eprint(*args, **kwargs):
    """
    Print to stderr.
    """
    print(*args, file=sys.stderr, **kwargs)


def fail(*args, **kwargs):
    """
    Print to stderr and exit.
    """
    eprint(*args, **kwargs)
    sys.exit(1)


def get_jail_path(jail_name):
    return os.path.join(JAILS_DIR_PATH, jail_name)


def get_jail_config_path(jail_name):
    return os.path.join(get_jail_path(jail_name), JAIL_CONFIG_NAME)


def get_jail_rootfs_path(jail_name):
    return os.path.join(get_jail_path(jail_name), JAIL_ROOTFS_NAME)


# Test intel GPU by decoding mp4 file (output is discarded)
# Run the commands below in the jail:
# curl -o bunny.mp4 https://www.w3schools.com/html/mov_bbb.mp4
# ffmpeg -hwaccel vaapi -hwaccel_device /dev/dri/renderD128 -hwaccel_output_format vaapi -i bunny.mp4 -f null - && echo 'SUCCESS!'


def passthrough_intel(gpu_passthrough_intel, systemd_nspawn_additional_args):
    if not gpu_passthrough_intel:
        return

    if not os.path.exists("/dev/dri"):
        eprint(
            dedent(
                """
        No intel GPU seems to be present...
        Skip passthrough of intel GPU."""
            )
        )
        return

    systemd_nspawn_additional_args.append("--bind=/dev/dri")


def passthrough_nvidia(
    gpu_passthrough_nvidia, systemd_nspawn_additional_args, jail_name
):
    jail_rootfs_path = get_jail_rootfs_path(jail_name)
    ld_so_conf_path = Path(
        os.path.join(jail_rootfs_path), f"etc/ld.so.conf.d/{SHORTNAME}-nvidia.conf"
    )

    if not gpu_passthrough_nvidia:
        # Cleanup the config file we made when passthrough was enabled
        ld_so_conf_path.unlink(missing_ok=True)
        return

    # Load the nvidia kernel module
    if subprocess.run(["modprobe", "nvidia-current-uvm"]).returncode != 0:
        eprint(
            dedent(
                """
            Failed to load nvidia-current-uvm kernel module."""
            )
        )

    # Run nvidia-smi to initialize the nvidia driver
    # If we can't run nvidia-smi successfully,
    # then nvidia-container-cli list will fail too:
    # we shouldn't continue with gpu passthrough
    if subprocess.run(["nvidia-smi", "-f", "/dev/null"]).returncode != 0:
        eprint("Skip passthrough of nvidia GPU.")
        return

    try:
        # Get list of libraries
        nvidia_libraries = set(
            [
                x
                for x in subprocess.check_output(
                    ["nvidia-container-cli", "list", "--libraries"]
                )
                .decode()
                .split("\n")
                if x
            ]
        )
        # Get full list of files, but excluding library ones from above
        nvidia_files = set(
            (
                [
                    x
                    for x in subprocess.check_output(["nvidia-container-cli", "list"])
                    .decode()
                    .split("\n")
                    if x and x not in nvidia_libraries
                ]
            )
        )
    except Exception:
        eprint(
            dedent(
                """
        Unable to detect which nvidia driver files to mount.
        Skip passthrough of nvidia GPU."""
            )
        )
        return

    # Also make nvidia-smi available inside the path,
    # while mounting the symlink will be resolved and nvidia-smi will appear as a regular file
    nvidia_files.add("/usr/bin/nvidia-smi")

    nvidia_mounts = []

    for file_path in nvidia_files:
        if not os.path.exists(file_path):
            # Don't try to mount files not present on the host
            print(f"Skipped mounting {file_path}, it doesn't exist on the host...")
            continue

        if file_path.startswith("/dev/"):
            nvidia_mounts.append(f"--bind={file_path}")
        else:
            nvidia_mounts.append(f"--bind-ro={file_path}")

    # Check if the parent dir exists where we want to write our conf file
    if ld_so_conf_path.parent.exists():
        library_folders = set(str(Path(x).parent) for x in nvidia_libraries)
        # Add the library folders as mounts
        for lf in library_folders:
            nvidia_mounts.append(f"--bind-ro={lf}")

        # Only write if the conf file doesn't yet exist or has different contents
        existing_conf_libraries = set()
        if ld_so_conf_path.exists():
            existing_conf_libraries.update(
                x for x in ld_so_conf_path.read_text().splitlines() if x
            )

        if library_folders != existing_conf_libraries:
            print("\n".join(x for x in library_folders), file=ld_so_conf_path.open("w"))

            # Run ldconfig inside systemd-nspawn jail with nvidia mounts...
            subprocess.run(
                [
                    "systemd-nspawn",
                    "--quiet",
                    f"--machine={jail_name}",
                    f"--directory={jail_rootfs_path}",
                    *nvidia_mounts,
                    "ldconfig",
                ]
            )
    else:
        eprint(
            dedent(
                """
            Unable to write the ld.so.conf.d directory inside the jail (it doesn't exist).
            Skipping call to ldconfig.
            The nvidia drivers will probably not be detected..."""
            )
        )

    systemd_nspawn_additional_args += nvidia_mounts


def exec_jail(jail_name, cmd):
    """
    Execute a command in the jail with given name.
    """
    return subprocess.run(
        [
            "systemd-run",
            "--machine",
            jail_name,
            "--quiet",
            "--pipe",
            "--wait",
            "--collect",
            "--service-type=exec",
            *cmd,
        ]
    ).returncode


def status_jail(jail_name, args):
    """
    Show the status of the systemd service wrapping the jail with given name.
    """
    # Alternatively `machinectl status jail_name` could be used
    return subprocess.run(
        ["systemctl", "status", f"{SHORTNAME}-{jail_name}", *args]
    ).returncode


def log_jail(jail_name, args):
    """
    Show the log file of the jail with given name.
    """
    return subprocess.run(
        ["journalctl", "-u", f"{SHORTNAME}-{jail_name}", *args]
    ).returncode


def shell_jail(args):
    """
    Open a shell in the jail with given name.
    """
    return subprocess.run(["machinectl", "shell"] + args).returncode


def parse_config_file(jail_config_path):
    config = KeyValueParser()
    # Read default config to fallback to default values
    # for keys not found in the jail_config_path file
    config.read_default_string(DEFAULT_CONFIG)
    try:
        with open(jail_config_path, "r") as fp:
            config.read_file(fp)
        return config
    except FileNotFoundError:
        eprint(f"Unable to find config file: {jail_config_path}.")
        return


def systemd_escape_path(path):
    """
    Escape path containing spaces, while properly handling backslashes in filenames.
    https://manpages.debian.org/bookworm/systemd/systemd.syntax.7.en.html#QUOTING
    https://manpages.debian.org/bookworm/systemd/systemd.service.5.en.html#COMMAND_LINES
    """
    return "".join(
        map(
            lambda char: r"\s" if char == " " else "\\\\" if char == "\\" else char,
            path,
        )
    )


def add_hook(jail_path, systemd_run_additional_args, hook_command, hook_type):
    if not hook_command:
        return

    # Run the command directly if it doesn't start with a shebang
    if not hook_command.startswith("#!"):
        systemd_run_additional_args += [f"--property={hook_type}={hook_command}"]
        return

    # Otherwise write a script file and call that
    hook_file = os.path.abspath(os.path.join(jail_path, f".{hook_type}"))

    # Only write if contents are different
    if not os.path.exists(hook_file) or Path(hook_file).read_text() != hook_command:
        print(hook_command, file=open(hook_file, "w"))

    stat_chmod(hook_file, 0o700)
    systemd_run_additional_args += [
        f"--property={hook_type}={systemd_escape_path(hook_file)}"
    ]


def start_jail(jail_name):
    """
    Start jail with given name.
    """
    skip_start_message = (
        f"Skipped starting jail {jail_name}. It appears to be running already..."
    )

    if jail_is_running(jail_name):
        eprint(skip_start_message)
        return 0

    jail_path = get_jail_path(jail_name)
    jail_config_path = get_jail_config_path(jail_name)
    jail_rootfs_path = get_jail_rootfs_path(jail_name)

    config = parse_config_file(jail_config_path)

    if not config:
        eprint("Aborting...")
        return 1

    seccomp = config.my_getboolean("seccomp")

    systemd_run_additional_args = [
        f"--unit={SHORTNAME}-{jail_name}",
        f"--working-directory={jail_path}",
        f"--description=My nspawn jail {jail_name} [created with jailmaker]",
    ]

    systemd_nspawn_additional_args = [
        f"--machine={jail_name}",
        f"--directory={JAIL_ROOTFS_NAME}",
    ]

    # The systemd-nspawn manual explicitly mentions:
    # Device nodes may not be created
    # https://www.freedesktop.org/software/systemd/man/systemd-nspawn.html
    # This means docker images containing device nodes can't be pulled
    # https://github.com/moby/moby/issues/35245
    #
    # The solution is to use DevicePolicy=auto
    # https://github.com/kinvolk/kube-spawn/pull/328
    #
    # DevicePolicy=auto is the default for systemd-run and allows access to all devices
    # as long as we don't add any --property=DeviceAllow= flags
    # https://manpages.debian.org/bookworm/systemd/systemd.resource-control.5.en.html
    #
    # We can now successfully run:
    # mknod /dev/port c 1 4
    # Or pull docker images containing device nodes:
    # docker pull oraclelinux@sha256:d49469769e4701925d5145c2676d5a10c38c213802cf13270ec3a12c9c84d643

    # Add hooks to execute commands on the host before/after starting and after stopping a jail
    add_hook(
        jail_path,
        systemd_run_additional_args,
        config.my_get("pre_start_hook"),
        "ExecStartPre",
    )

    add_hook(
        jail_path,
        systemd_run_additional_args,
        config.my_get("post_start_hook"),
        "ExecStartPost",
    )

    add_hook(
        jail_path,
        systemd_run_additional_args,
        config.my_get("post_stop_hook"),
        "ExecStopPost",
    )

    gpu_passthrough_intel = config.my_getboolean("gpu_passthrough_intel")
    gpu_passthrough_nvidia = config.my_getboolean("gpu_passthrough_nvidia")

    passthrough_intel(gpu_passthrough_intel, systemd_nspawn_additional_args)
    passthrough_nvidia(
        gpu_passthrough_nvidia, systemd_nspawn_additional_args, jail_name
    )

    if seccomp is False:
        # Disabling seccomp filtering by passing --setenv=SYSTEMD_SECCOMP=0 to systemd-run will improve performance
        # at the expense of security: it allows syscalls which otherwise would be blocked or would have to be explicitly allowed by passing
        # --system-call-filter to systemd-nspawn
        # https://github.com/systemd/systemd/issues/18370
        #
        # However, and additional layer of seccomp filtering may be undesirable
        # For example when using docker to run containers inside the jail created with systemd-nspawn
        # Even though seccomp filtering is disabled for the systemd-nspawn jail itself, docker can still use seccomp filtering
        # to restrict the actions available within its containers
        #
        # Proof that seccomp can be used inside a jail started with --setenv=SYSTEMD_SECCOMP=0:
        # Run a command in a docker container which is blocked by the default docker seccomp profile:
        # 	docker run --rm -it debian:jessie unshare --map-root-user --user sh -c whoami
        # 	unshare: unshare failed: Operation not permitted
        # Now run unconfined to show command runs successfully:
        # 	docker run --rm -it --security-opt seccomp=unconfined debian:jessie unshare --map-root-user --user sh -c whoami
        # 	root

        systemd_run_additional_args += [
            "--setenv=SYSTEMD_SECCOMP=0",
        ]

    initial_setup = False

    # If there's no machine-id, then this the first time the jail is started
    if not os.path.exists(os.path.join(jail_rootfs_path, "etc/machine-id")) and (
        initial_setup := config.my_get("initial_setup")
    ):
        # initial_setup has been assigned due to := expression above
        # Ensure the jail init system is ready before we start the initial_setup
        systemd_nspawn_additional_args += [
            "--notify-ready=yes",
        ]

    cmd = [
        "systemd-run",
        *shlex.split(config.my_get("systemd_run_default_args")),
        *systemd_run_additional_args,
        "--",
        "systemd-nspawn",
        *shlex.split(config.my_get("systemd_nspawn_default_args")),
        *systemd_nspawn_additional_args,
        *shlex.split(config.my_get("systemd_nspawn_user_args")),
    ]

    print(
        dedent(
            f"""
        Starting jail {jail_name} with the following command:

        {shlex.join(cmd)}
    """
        )
    )

    returncode = subprocess.run(cmd).returncode
    if returncode != 0:
        eprint(
            dedent(
                f"""
            Failed to start jail {jail_name}...
            In case of a config error, you may fix it with:
            {COMMAND_NAME} edit {jail_name}
        """
            )
        )

        return returncode

    # Handle initial setup after jail is up and running (for the first time)
    if initial_setup:
        if not initial_setup.startswith("#!"):
            initial_setup = "#!/bin/sh\n" + initial_setup

        with tempfile.NamedTemporaryFile(
            mode="w+t",
            prefix="jlmkr-initial-setup.",
            dir=jail_rootfs_path,
            delete=False,
        ) as initial_setup_file:
            # Write a script file to call during initial setup
            initial_setup_file.write(initial_setup)

        initial_setup_file_name = os.path.basename(initial_setup_file.name)
        initial_setup_file_host_path = os.path.abspath(initial_setup_file.name)
        stat_chmod(initial_setup_file_host_path, 0o700)

        print(f"About to run the initial setup script: {initial_setup_file_name}.")
        print("Waiting for networking in the jail to be ready.")
        print(
            "Please wait (this may take 90s in case of bridge networking with STP is enabled)..."
        )
        returncode = exec_jail(
            jail_name,
            [
                "--",
                "systemd-run",
                f"--unit={initial_setup_file_name}",
                "--quiet",
                "--pipe",
                "--wait",
                "--service-type=exec",
                "--property=After=network-online.target",
                "--property=Wants=network-online.target",
                "/" + initial_setup_file_name,
            ],
        )

        if returncode != 0:
            eprint("Tried to run the following commands inside the jail:")
            eprint(initial_setup)
            eprint()
            eprint(f"{RED}{BOLD}Failed to run initial setup...")
            eprint(
                f"You may want to manually run /{initial_setup_file_name} inside the jail for debugging purposes."
            )
            eprint(f"Or stop and remove the jail and try again.{NORMAL}")
            return returncode
        else:
            # Cleanup the initial_setup_file_host_path
            Path(initial_setup_file_host_path).unlink(missing_ok=True)
            print(f"Done with initial setup of jail {jail_name}!")

    return returncode


def restart_jail(jail_name):
    """
    Restart jail with given name.
    """

    returncode = stop_jail(jail_name)
    if returncode != 0:
        eprint("Abort restart.")
        return returncode

    return start_jail(jail_name)


def cleanup(jail_path):
    """
    Cleanup jail.
    """

    if get_zfs_dataset(jail_path):
        eprint(f"Cleaning up: {jail_path}.")
        remove_zfs_dataset(jail_path)

    elif os.path.isdir(jail_path):
        # Workaround for https://github.com/python/cpython/issues/73885
        # Should be fixed in Python 3.13 https://stackoverflow.com/a/70549000
        def _onerror(func, path, exc_info):
            exc_type, exc_value, exc_traceback = exc_info
            if issubclass(exc_type, PermissionError):
                # Update the file permissions with the immutable and append-only bit cleared
                subprocess.run(["chattr", "-i", "-a", path])
                # Reattempt the removal
                func(path)
            elif not issubclass(exc_type, FileNotFoundError):
                raise exc_value

        eprint(f"Cleaning up: {jail_path}.")
        shutil.rmtree(jail_path, onerror=_onerror)


def validate_sha256(file_path, digest):
    """
    Validates if a file matches a sha256 digest.
    """
    try:
        with open(file_path, "rb") as f:
            file_hash = hashlib.sha256(f.read()).hexdigest()
            return file_hash == digest
    except FileNotFoundError:
        return False


def run_lxc_download_script(
    jail_name=None, jail_path=None, jail_rootfs_path=None, distro=None, release=None
):
    arch = "amd64"
    lxc_dir = ".lxc"
    lxc_cache = os.path.join(lxc_dir, "cache")
    lxc_download_script = os.path.join(lxc_dir, "lxc-download.sh")

    # Create the lxc dirs if nonexistent
    os.makedirs(lxc_dir, exist_ok=True)
    stat_chmod(lxc_dir, 0o700)
    os.makedirs(lxc_cache, exist_ok=True)
    stat_chmod(lxc_cache, 0o700)

    try:
        if os.stat(lxc_download_script).st_uid != 0:
            os.remove(lxc_download_script)
    except FileNotFoundError:
        pass

    # Fetch the lxc download script if not present locally (or hash doesn't match)
    if not validate_sha256(lxc_download_script, DOWNLOAD_SCRIPT_DIGEST):
        urllib.request.urlretrieve(
            "https://raw.githubusercontent.com/Jip-Hop/lxc/97f93be72ebf380f3966259410b70b1c966b0ff0/templates/lxc-download.in",
            lxc_download_script,
        )

        if not validate_sha256(lxc_download_script, DOWNLOAD_SCRIPT_DIGEST):
            eprint("Abort! Downloaded script has unexpected contents.")
            return 1

    stat_chmod(lxc_download_script, 0o700)

    if None not in [jail_name, jail_path, jail_rootfs_path, distro, release]:
        cmd = [
            lxc_download_script,
            f"--name={jail_name}",
            f"--path={jail_path}",
            f"--rootfs={jail_rootfs_path}",
            f"--arch={arch}",
            f"--dist={distro}",
            f"--release={release}",
        ]

        if rc := subprocess.run(cmd, env={"LXC_CACHE_PATH": lxc_cache}).returncode != 0:
            eprint("Aborting...")
            return rc

    else:
        # List images
        cmd = [lxc_download_script, "--list", f"--arch={arch}"]

        p1 = subprocess.Popen(
            cmd, stdout=subprocess.PIPE, env={"LXC_CACHE_PATH": lxc_cache}
        )

        for line in iter(p1.stdout.readline, b""):
            line = line.decode().strip()
            # Filter out the known incompatible distros
            if not re.match(
                r"^(alpine|amazonlinux|busybox|devuan|funtoo|openwrt|plamo|voidlinux)\s",
                line,
            ):
                # TODO: check if output matches expected output, if it does then return 0
                # Else treat this as an error and return 1
                print(line)

        rc = p1.wait()
        # Currently --list will always return a non-zero exit code, even when listing the images was successful
        # https://github.com/lxc/lxc/pull/4462
        # Therefore we must currently return 0, to prevent aborting the interactive create process

        # return rc

    return 0


def stat_chmod(file_path, mode):
    """
    Change mode if file doesn't already have this mode.
    """
    if mode != stat.S_IMODE(os.stat(file_path).st_mode):
        os.chmod(file_path, mode)


def get_mount_point(path):
    """
    Return the mount point on which the given path resides.
    """
    path = os.path.abspath(path)
    while not os.path.ismount(path):
        path = os.path.dirname(path)
    return path


def get_relative_path_in_jailmaker_dir(absolute_path):
    return PurePath(absolute_path).relative_to(SCRIPT_DIR_PATH)


def get_zfs_dataset(path):
    """
    Get ZFS dataset path.
    """

    def clean_field(field):
        # Put back spaces which were encoded
        # https://github.com/openzfs/zfs/issues/11182
        return field.replace("\\040", " ")

    path = os.path.realpath(path)
    with open("/proc/mounts", "r") as f:
        for line in f:
            fields = line.split()
            if "zfs" == fields[2] and path == clean_field(fields[1]):
                return clean_field(fields[0])


def get_zfs_base_path():
    """
    Get ZFS dataset path for jailmaker directory.
    """
    zfs_base_path = get_zfs_dataset(SCRIPT_DIR_PATH)
    if not zfs_base_path:
        fail("Failed to get dataset path for jailmaker directory.")

    return zfs_base_path


def create_zfs_dataset(absolute_path):
    """
    Create a ZFS Dataset inside the jailmaker directory at the provided absolute path.
    E.g. "/mnt/mypool/jailmaker/jails" or "/mnt/mypool/jailmaker/jails/newjail").
    """
    relative_path = get_relative_path_in_jailmaker_dir(absolute_path)
    dataset_to_create = os.path.join(get_zfs_base_path(), relative_path)
    eprint(f"Creating ZFS Dataset {dataset_to_create}")
    subprocess.run(["zfs", "create", dataset_to_create], check=True)


def remove_zfs_dataset(absolute_path):
    """
    Remove a ZFS Dataset inside the jailmaker directory at the provided absolute path.
    E.g. "/mnt/mypool/jailmaker/jails/oldjail".
    """
    relative_path = get_relative_path_in_jailmaker_dir(absolute_path)
    dataset_to_remove = os.path.join((get_zfs_base_path()), relative_path)
    eprint(f"Removing ZFS Dataset {dataset_to_remove}")
    subprocess.run(["zfs", "destroy", "-r", dataset_to_remove], check=True)


def check_jail_name_valid(jail_name, warn=True):
    """
    Return True if jail name matches the required format.
    """
    if (
        re.match(r"^[.a-zA-Z0-9-]{1,64}$", jail_name)
        and not jail_name.startswith(".")
        and ".." not in jail_name
    ):
        return True

    if warn:
        eprint(
            dedent(
                f"""
            {YELLOW}{BOLD}WARNING: INVALID NAME{NORMAL}

            A valid name consists of:
            - allowed characters (alphanumeric, dash, dot)
            - no leading or trailing dots
            - no sequences of multiple dots
            - max 64 characters"""
            )
        )
    return False


def check_jail_name_available(jail_name, warn=True):
    """
    Return True if jail name is not yet taken.
    """
    if not os.path.exists(get_jail_path(jail_name)):
        return True

    if warn:
        print()
        eprint("A jail with this name already exists.")
    return False


def get_text_editor():
    def get_from_environ(key):
        if editor := os.environ.get(key):
            return shutil.which(editor)

    return (
        get_from_environ("VISUAL")
        or get_from_environ("EDITOR")
        or shutil.which("editor")
        or shutil.which("/usr/bin/editor")
        or "nano"
    )


def create_jail(**kwargs):
    print(DISCLAIMER)

    if os.path.basename(SCRIPT_DIR_PATH) != "jailmaker":
        eprint(
            dedent(
                f"""
            {COMMAND_NAME} needs to create files.
            Currently it can not decide if it is safe to create files in:
            {SCRIPT_DIR_PATH}
            Please create a dedicated dataset called "jailmaker", store {SCRIPT_NAME} there and try again."""
            )
        )
        return 1

    if not PurePath(get_mount_point(SCRIPT_DIR_PATH)).is_relative_to("/mnt"):
        print(
            dedent(
                f"""
            {YELLOW}{BOLD}WARNING: BEWARE OF DATA LOSS{NORMAL}

            {SCRIPT_NAME} should be on a dataset mounted under /mnt (it currently is not).
            Storing it on the boot-pool means losing all jails when updating TrueNAS.
            Jails will be stored under:
            {SCRIPT_DIR_PATH}
        """
            )
        )

    jail_name = kwargs.pop("jail_name")
    start_now = False

    if not check_jail_name_valid(jail_name):
        return 1

    if not check_jail_name_available(jail_name):
        return 1

    start_now = kwargs.pop("start", start_now)
    jail_config_path = kwargs.pop("config")

    config = KeyValueParser()

    if jail_config_path:
        # TODO: fallback to default values for e.g. distro and release if they are not in the config file
        if jail_config_path == "-":
            print(f"Creating jail {jail_name} from config template passed via stdin.")
            config.read_string(sys.stdin.read())
        else:
            print(f"Creating jail {jail_name} from config template {jail_config_path}.")
            if jail_config_path not in config.read(jail_config_path):
                eprint(f"Failed to read config template {jail_config_path}.")
                return 1
    else:
        print(f"Creating jail {jail_name} with default config.")
        config.read_string(DEFAULT_CONFIG)

    for option in [
        "distro",
        "gpu_passthrough_intel",
        "gpu_passthrough_nvidia",
        "release",
        "seccomp",
        "startup",
        "systemd_nspawn_user_args",
    ]:
        value = kwargs.pop(option)
        if (
            value is not None
            # String, non-empty list of args or int
            and (isinstance(value, int) or len(value))
            and value is not config.my_get(option, None)
        ):
            # TODO: this will wipe all systemd_nspawn_user_args from the template...
            # Should there be an option to append them instead?
            print(f"Overriding {option} config value with {value}.")
            config.my_set(option, value)

    jail_path = get_jail_path(jail_name)

    distro = config.my_get("distro")
    release = config.my_get("release")

    # Cleanup in except, but only once the jail_path is final
    # Otherwise we may cleanup the wrong directory
    try:
        # Create the dir or dataset where to store the jails
        if not os.path.exists(JAILS_DIR_PATH):
            if get_zfs_dataset(SCRIPT_DIR_PATH):
                # Creating "jails" dataset if "jailmaker" is a ZFS Dataset
                create_zfs_dataset(JAILS_DIR_PATH)
            else:
                os.makedirs(JAILS_DIR_PATH, exist_ok=True)
            stat_chmod(JAILS_DIR_PATH, 0o700)

        # Creating a dataset for the jail if the jails dir is a dataset
        if get_zfs_dataset(JAILS_DIR_PATH):
            create_zfs_dataset(jail_path)

        jail_config_path = get_jail_config_path(jail_name)
        jail_rootfs_path = get_jail_rootfs_path(jail_name)

        # Create directory for rootfs
        os.makedirs(jail_rootfs_path, exist_ok=True)
        # LXC download script needs to write to this file during install
        # but we don't need it so we will remove it later
        open(jail_config_path, "a").close()

        if (
            returncode := run_lxc_download_script(
                jail_name, jail_path, jail_rootfs_path, distro, release
            )
            != 0
        ):
            cleanup(jail_path)
            return returncode

        # Assuming the name of your jail is "myjail"
        # and "machinectl shell myjail" doesn't work
        # Try:
        #
        # Stop the jail with:
        # machinectl stop myjail
        # And start a shell inside the jail without the --boot option:
        # systemd-nspawn -q -D jails/myjail/rootfs /bin/sh
        # Then set a root password with:
        # In case of amazonlinux you may need to run:
        # yum update -y && yum install -y passwd
        # passwd
        # exit
        # Then you may login from the host via:
        # machinectl login myjail
        #
        # You could also enable SSH inside the jail to login
        #
        # Or if that doesn't work (e.g. for alpine) get a shell via:
        # nsenter -t $(machinectl show myjail -p Leader --value) -a /bin/sh -l
        # But alpine jails made with jailmaker have other issues
        # They don't shutdown cleanly via systemctl and machinectl...

        with Chroot(jail_rootfs_path):
            # Use chroot to correctly resolve absolute /sbin/init symlink
            init_system_name = os.path.basename(os.path.realpath("/sbin/init"))

        if (
            init_system_name != "systemd"
            and parse_os_release(jail_rootfs_path).get("ID") != "nixos"
        ):
            print(
                dedent(
                    f"""
                {YELLOW}{BOLD}WARNING: DISTRO NOT SUPPORTED{NORMAL}

                Chosen distro appears not to use systemd...

                You probably will not get a shell with:
                machinectl shell {jail_name}

                You may get a shell with this command:
                nsenter -t $(machinectl show {jail_name} -p Leader --value) -a /bin/sh -l

                Read about the downsides of nsenter:
                https://github.com/systemd/systemd/issues/12785#issuecomment-503019081

                {BOLD}Using this distro with {COMMAND_NAME} is NOT recommended.{NORMAL}
            """
                )
            )

            print("Autostart has been disabled.")
            print("You need to start this jail manually.")
            config.my_set("startup", 0)
            start_now = False

        # Remove config which systemd handles for us
        with contextlib.suppress(FileNotFoundError):
            os.remove(os.path.join(jail_rootfs_path, "etc/machine-id"))
        with contextlib.suppress(FileNotFoundError):
            os.remove(os.path.join(jail_rootfs_path, "etc/resolv.conf"))

        # https://github.com/systemd/systemd/issues/852
        print(
            "\n".join([f"pts/{i}" for i in range(0, 11)]),
            file=open(os.path.join(jail_rootfs_path, "etc/securetty"), "w"),
        )

        network_dir_path = os.path.join(jail_rootfs_path, "etc/systemd/network")

        # Modify default network settings, if network_dir_path exists
        if os.path.isdir(network_dir_path):
            default_host0_network_file = os.path.join(
                jail_rootfs_path, "lib/systemd/network/80-container-host0.network"
            )

            # Check if default host0 network file exists
            if os.path.isfile(default_host0_network_file):
                override_network_file = os.path.join(
                    network_dir_path, "80-container-host0.network"
                )

                # Override the default 80-container-host0.network file (by using the same name)
                # This config applies when using the --network-bridge option of systemd-nspawn
                # Disable LinkLocalAddressing on IPv4, or else the container won't get IP address via DHCP
                # But keep it enabled on IPv6, as SLAAC and DHCPv6 both require a local-link address to function
                print(
                    Path(default_host0_network_file)
                    .read_text()
                    .replace("LinkLocalAddressing=yes", "LinkLocalAddressing=ipv6"),
                    file=open(override_network_file, "w"),
                )

            # Setup DHCP for macvlan network interfaces
            # This config applies when using the --network-macvlan option of systemd-nspawn
            # https://www.debian.org/doc/manuals/debian-reference/ch05.en.html#_the_modern_network_configuration_without_gui
            print(
                cleandoc(
                    """
                [Match]
                Virtualization=container
                Name=mv-*

                [Network]
                DHCP=yes
                LinkLocalAddressing=ipv6

                [DHCPv4]
                UseDNS=true
                UseTimezone=true
            """
                ),
                file=open(os.path.join(network_dir_path, "mv-dhcp.network"), "w"),
            )

            # Setup DHCP for veth-extra network interfaces
            # This config applies when using the --network-veth-extra option of systemd-nspawn
            # https://www.debian.org/doc/manuals/debian-reference/ch05.en.html#_the_modern_network_configuration_without_gui
            print(
                cleandoc(
                    """
                [Match]
                Virtualization=container
                Name=vee-*

                [Network]
                DHCP=yes
                LinkLocalAddressing=ipv6

                [DHCPv4]
                UseDNS=true
                UseTimezone=true
            """
                ),
                file=open(os.path.join(network_dir_path, "vee-dhcp.network"), "w"),
            )

            # Override preset which caused systemd-networkd to be disabled (e.g. fedora 39)
            # https://www.freedesktop.org/software/systemd/man/latest/systemd.preset.html
            # https://github.com/lxc/lxc-ci/blob/f632823ecd9b258ed42df40449ec54ed7ef8e77d/images/fedora.yaml#L312C5-L312C38

            preset_path = os.path.join(jail_rootfs_path, "etc/systemd/system-preset")
            os.makedirs(preset_path, exist_ok=True)
            print(
                "enable systemd-networkd.service",
                file=open(os.path.join(preset_path, "00-jailmaker.preset"), "w"),
            )

        with open(jail_config_path, "w") as fp:
            config.write(fp)

        os.chmod(jail_config_path, 0o600)

    # Cleanup on any exception and rethrow
    except BaseException as error:
        cleanup(jail_path)
        raise error

    if start_now:
        return start_jail(jail_name)

    return 0


def jail_is_running(jail_name):
    return (
        subprocess.run(
            ["machinectl", "show", jail_name],
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
        ).returncode
        == 0
    )


def edit_jail(jail_name):
    """
    Edit jail with given name.
    """

    if not check_jail_name_valid(jail_name):
        return 1

    if check_jail_name_available(jail_name, False):
        eprint(f"A jail with name {jail_name} does not exist.")
        return 1

    jail_config_path = get_jail_config_path(jail_name)

    returncode = subprocess.run([get_text_editor(), jail_config_path]).returncode

    if returncode != 0:
        eprint(f"An error occurred while editing {jail_config_path}.")
        return returncode

    if jail_is_running(jail_name):
        print("\nRestart the jail for edits to apply (if you made any).")

    return 0


def stop_jail(jail_name):
    """
    Stop jail with given name and wait until stopped.
    """

    if not jail_is_running(jail_name):
        return 0

    returncode = subprocess.run(["machinectl", "poweroff", jail_name]).returncode
    if returncode != 0:
        eprint("Error while stopping jail.")
        return returncode

    print(f"Wait for {jail_name} to stop", end="", flush=True)

    while jail_is_running(jail_name):
        time.sleep(1)
        print(".", end="", flush=True)

    return 0


def remove_jail(jail_name):
    """
    Remove jail with given name.
    """

    if not check_jail_name_valid(jail_name):
        return 1

    if check_jail_name_available(jail_name, False):
        eprint(f"A jail with name {jail_name} does not exist.")
        return 1

    # TODO: print which dataset is about to be removed before the user confirmation
    # TODO: print that all zfs snapshots will be removed if jail has it's own zfs dataset
    check = input(f'\nCAUTION: Type "{jail_name}" to confirm jail deletion!\n\n')

    if check == jail_name:
        print()
        jail_path = get_jail_path(jail_name)
        returncode = stop_jail(jail_name)
        if returncode != 0:
            return returncode

        print()
        cleanup(jail_path)
        return 0
    else:
        eprint("Wrong name, nothing happened.")
        return 1


def print_table(header, list_of_objects, empty_value_indicator):
    # Find max width for each column
    widths = defaultdict(int)
    for obj in list_of_objects:
        for hdr in header:
            value = obj.get(hdr)
            if value is None:
                obj[hdr] = value = empty_value_indicator
            widths[hdr] = max(widths[hdr], len(str(value)), len(str(hdr)))

    # Print header
    print(
        UNDERLINE + " ".join(hdr.upper().ljust(widths[hdr]) for hdr in header) + NORMAL
    )

    # Print rows
    for obj in list_of_objects:
        print(" ".join(str(obj.get(hdr)).ljust(widths[hdr]) for hdr in header))


def run_command_and_parse_json(command):
    result = subprocess.run(command, capture_output=True, text=True)
    output = result.stdout.strip()

    try:
        parsed_output = json.loads(output)
        return parsed_output
    except json.JSONDecodeError as e:
        eprint(f"Error parsing JSON: {e}")
        return None


def get_all_jail_names():
    try:
        jail_names = os.listdir(JAILS_DIR_PATH)
    except FileNotFoundError:
        jail_names = []

    return jail_names


def parse_os_release(new_root):
    result = {}
    with Chroot(new_root):
        # Use chroot to correctly resolve os-release symlink (for nixos)
        for candidate in ["/etc/os-release", "/usr/lib/os-release"]:
            try:
                with open(candidate, encoding="utf-8") as f:
                    # TODO: can I create a solution which not depends on the internal _parse_os_release method?
                    result = platform._parse_os_release(f)
                    break
            except OSError:
                # Silently ignore failing to read os release info
                pass

    return result


def list_jails():
    """
    List all available and running jails.
    """

    jails = {}
    empty_value_indicator = "-"

    jail_names = get_all_jail_names()

    if not jail_names:
        print("No jails.")
        return 0

    # Get running jails from machinectl
    running_machines = run_command_and_parse_json(["machinectl", "list", "-o", "json"])
    # Index running_machines by machine name
    # We're only interested in systemd-nspawn machines
    running_machines = {
        item["machine"]: item
        for item in running_machines
        if item["service"] == "systemd-nspawn"
    }

    for jail_name in jail_names:
        jail_rootfs_path = get_jail_rootfs_path(jail_name)
        jails[jail_name] = {"name": jail_name, "running": False}
        jail = jails[jail_name]

        config = parse_config_file(get_jail_config_path(jail_name))
        if config:
            jail["startup"] = config.my_getboolean("startup")
            jail["gpu_intel"] = config.my_getboolean("gpu_passthrough_intel")
            jail["gpu_nvidia"] = config.my_getboolean("gpu_passthrough_nvidia")

        if jail_name in running_machines:
            machine = running_machines[jail_name]
            # Augment the jails dict with output from machinectl
            jail["running"] = True
            jail["os"] = machine["os"] or None
            jail["version"] = machine["version"] or None

            addresses = machine.get("addresses")
            if not addresses:
                jail["addresses"] = empty_value_indicator
            else:
                addresses = addresses.split("\n")
                jail["addresses"] = addresses[0]
                if len(addresses) > 1:
                    jail["addresses"] += "…"
        else:
            # Parse os-release info ourselves
            jail_platform = parse_os_release(jail_rootfs_path)
            jail["os"] = jail_platform.get("ID")
            jail["version"] = jail_platform.get("VERSION_ID") or jail_platform.get(
                "VERSION_CODENAME"
            )

    print_table(
        [
            "name",
            "running",
            "startup",
            "gpu_intel",
            "gpu_nvidia",
            "os",
            "version",
            "addresses",
        ],
        sorted(jails.values(), key=lambda x: x["name"]),
        empty_value_indicator,
    )

    return 0


def startup_jails():
    start_failure = False
    for jail_name in get_all_jail_names():
        config = parse_config_file(get_jail_config_path(jail_name))
        if config and config.my_getboolean("startup"):
            if start_jail(jail_name) != 0:
                start_failure = True

    if start_failure:
        return 1

    return 0


def split_at_string(lst, string):
    try:
        index = lst.index(string)
        return lst[:index], lst[index + 1 :]
    except ValueError:
        return lst, []


def add_parser(subparser, **kwargs):
    if kwargs.get("add_help") is False:
        # Don't add help if explicitly disabled
        add_help = False
    else:
        # Never add help with the built in add_help
        kwargs["add_help"] = False
        add_help = True

    kwargs["epilog"] = DISCLAIMER
    kwargs["formatter_class"] = argparse.RawDescriptionHelpFormatter
    kwargs["exit_on_error"] = False
    func = kwargs.pop("func")
    parser = subparser.add_parser(**kwargs)
    parser.set_defaults(func=func)

    if add_help:
        parser.add_argument(
            "-h", "--help", help="show this help message and exit", action="store_true"
        )

    # Setting the add_help after the parser has been created with add_parser has no effect,
    # but it allows us to look up if this parser has a help message available
    parser.add_help = add_help

    return parser


def main():
    if os.stat(SCRIPT_PATH).st_uid != 0:
        fail(
            f"This script should be owned by the root user... Fix it manually with: `chown root {SCRIPT_PATH}`."
        )

    parser = argparse.ArgumentParser(
        description=__doc__,
        allow_abbrev=False,
        epilog=f"For more info on some command, run: {COMMAND_NAME} some_command --help.\n{DISCLAIMER}",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )

    parser.add_argument("--version", action="version", version=__version__)

    subparsers = parser.add_subparsers(
        title="commands", dest="command", metavar="", parser_class=CustomSubParser
    )

    split_commands = ["create", "exec", "log", "status"]
    commands = {}

    for d in [
        dict(
            name="create",  #
            help="create a new jail",
            func=create_jail,
        ),
        dict(
            name="edit",
            help=f"edit jail config with {get_text_editor()} text editor",
            func=edit_jail,
        ),
        dict(
            name="exec",  #
            help="execute a command in the jail",
            func=exec_jail,
        ),
        dict(
            name="images",
            help="list available images to create jails from",
            func=run_lxc_download_script,
        ),
        dict(
            name="list",  #
            help="list jails",
            func=list_jails,
        ),
        dict(
            name="log",  #
            help="show jail log",
            func=log_jail,
        ),
        dict(
            name="remove",  #
            help="remove previously created jail",
            func=remove_jail,
        ),
        dict(
            name="restart",  #
            help="restart a running jail",
            func=restart_jail,
        ),
        dict(
            name="shell",
            help="open shell in running jail (alias for machinectl shell)",
            func=shell_jail,
            add_help=False,
        ),
        dict(
            name="start",  #
            help="start previously created jail",
            func=start_jail,
        ),
        dict(
            name="startup",
            help="startup selected jails",
            func=startup_jails,
        ),
        dict(
            name="status",  #
            help="show jail status",
            func=status_jail,
        ),
        dict(
            name="stop",  #
            help="stop a running jail",
            func=stop_jail,
        ),
    ]:
        commands[d["name"]] = add_parser(subparsers, **d)

    for cmd in [
        "create",
        "edit",
        "exec",
        "log",
        "remove",
        "restart",
        "start",
        "status",
        "stop",
    ]:
        commands[cmd].add_argument("jail_name", help="name of the jail")

    commands["exec"].add_argument(
        "cmd",
        nargs="*",
        help="command to execute",
    )

    commands["shell"].add_argument(
        "args",
        nargs="*",
        help="args to pass to machinectl shell",
    )

    commands["log"].add_argument(
        "args",
        nargs="*",
        help="args to pass to journalctl",
    )

    commands["status"].add_argument(
        "args",
        nargs="*",
        help="args to pass to systemctl",
    )

    commands["create"].add_argument("--distro")
    commands["create"].add_argument("--release")
    commands["create"].add_argument(
        "--start",  #
        help="start jail after create",
        action="store_true",
    )
    commands["create"].add_argument(
        "--startup",
        type=int,
        choices=[0, 1],
        help=f"start this jail when running: {SCRIPT_NAME} startup",
    )
    commands["create"].add_argument(
        "--seccomp",  #
        type=int,
        choices=[0, 1],
        help="turning off seccomp filtering improves performance at the expense of security",
    )
    commands["create"].add_argument(
        "-c",  #
        "--config",
        help="path to config file template or - for stdin",
    )
    commands["create"].add_argument(
        "-gi",  #
        "--gpu_passthrough_intel",
        type=int,
        choices=[0, 1],
    )
    commands["create"].add_argument(
        "-gn",  #
        "--gpu_passthrough_nvidia",
        type=int,
        choices=[0, 1],
    )
    commands["create"].add_argument(
        "systemd_nspawn_user_args",
        nargs="*",
        help="add additional systemd-nspawn flags",
    )

    if os.getuid() != 0:
        parser.print_help()
        fail("Run this script as root...")

    # Set appropriate permissions (if not already set) for this file, since it's executed as root
    stat_chmod(SCRIPT_PATH, 0o700)

    # Ignore all args after the first "--"
    args_to_parse = split_at_string(sys.argv[1:], "--")[0]
    # Check for help
    if any(item in args_to_parse for item in ["-h", "--help"]):
        # Likely we need to show help output...
        try:
            args = vars(parser.parse_known_args(args_to_parse)[0])
            # We've exited by now if not invoking a subparser: jlmkr.py --help
            if args.get("help"):
                need_help = True
                command = args.get("command")

                # Edge case for some commands
                if command in split_commands and args["jail_name"]:
                    # Ignore all args after the jail name
                    args_to_parse = split_at_string(args_to_parse, args["jail_name"])[0]
                    # Add back the jail_name as it may be a required positional and we
                    # don't want to end up in the except clause below
                    args_to_parse += [args["jail_name"]]
                    # Parse one more time...
                    args = vars(parser.parse_known_args(args_to_parse)[0])
                    # ...and check if help is still in the remaining args
                    need_help = args.get("help")

                if need_help:
                    commands[command].print_help()
                    sys.exit()
        except ExceptionWithParser as e:
            # Print help output on error, e.g. due to:
            # "error: the following arguments are required"
            if e.parser.add_help:
                e.parser.print_help()
                sys.exit()

    # Exit on parse errors (e.g. missing positional args)
    for command in commands:
        commands[command].exit_on_error = True

    # Parse to find command and function and ignore unknown args which may be present
    # such as args intended to pass through to systemd-run
    args = vars(parser.parse_known_args()[0])
    command = args.pop("command", None)

    # Start over with original args
    args_to_parse = sys.argv[1:]

    if not command:
        # Parse args and show error for unknown args
        parser.parse_args(args_to_parse)
        parser.print_help()
        sys.exit()

    elif command == "shell":
        # Pass anything after the "shell" command to machinectl
        _, shell_args = split_at_string(args_to_parse, command)
        sys.exit(args["func"](shell_args))
    elif command in split_commands and args["jail_name"]:
        jlmkr_args, remaining_args = split_at_string(args_to_parse, args["jail_name"])
        if remaining_args and remaining_args[0] != "--":
            # Add "--" after the jail name to ensure further args, e.g.
            # --help or --version, are captured as systemd_nspawn_user_args
            args_to_parse = jlmkr_args + [args["jail_name"], "--"] + remaining_args

    # Parse args again, but show error for unknown args
    args = vars(parser.parse_args(args_to_parse))
    # Clean the args
    args.pop("help")
    args.pop("command", None)
    func = args.pop("func")
    sys.exit(func(**args))


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        sys.exit(130)