From c5239749b952590a3472639d3712f3181068dd4c Mon Sep 17 00:00:00 2001
From: jonct <2807816+jonct@users.noreply.github.com>
Date: Mon, 15 Jul 2024 01:55:15 -0400
Subject: [PATCH] Extract start action

---
 src/jlmkr/actions/start.py | 264 +++++++++++++++++++++++++
 src/jlmkr/donor/jlmkr.py   | 391 +------------------------------------
 src/jlmkr/utils/gpu.py     | 158 +++++++++++++++
 3 files changed, 423 insertions(+), 390 deletions(-)
 create mode 100644 src/jlmkr/actions/start.py
 create mode 100644 src/jlmkr/utils/gpu.py

diff --git a/src/jlmkr/actions/start.py b/src/jlmkr/actions/start.py
new file mode 100644
index 0000000..abe50d9
--- /dev/null
+++ b/src/jlmkr/actions/start.py
@@ -0,0 +1,264 @@
+# SPDX-FileCopyrightText: © 2024 Jip-Hop and the Jailmakers <https://github.com/Jip-Hop/jailmaker>
+#
+# SPDX-License-Identifier: LGPL-3.0-only
+
+import os.path
+import shlex
+import subprocess
+import tempfile
+
+from pathlib import Path
+from textwrap import dedent
+from utils.config_parser import parse_config_file
+from utils.console import eprint
+from utils.files import stat_chmod
+from utils.gpu import passthrough_intel, passthrough_nvidia
+from utils.jail_dataset import get_jail_path, jail_is_running
+from utils.jail_dataset import get_jail_config_path, get_jail_rootfs_path
+from utils.paths import SHORTNAME, JAIL_ROOTFS_NAME
+
+
+def start_jail(jail_name):
+    """
+    Start jail with given name.
+    """
+    skip_start_message = (
+        f"Skipped starting jail {jail_name}. It appears to be running already..."
+    )
+
+    if jail_is_running(jail_name):
+        eprint(skip_start_message)
+        return 0
+
+    jail_path = get_jail_path(jail_name)
+    jail_config_path = get_jail_config_path(jail_name)
+    jail_rootfs_path = get_jail_rootfs_path(jail_name)
+
+    config = parse_config_file(jail_config_path)
+
+    if not config:
+        eprint("Aborting...")
+        return 1
+
+    seccomp = config.my_getboolean("seccomp")
+
+    systemd_run_additional_args = [
+        f"--unit={SHORTNAME}-{jail_name}",
+        f"--working-directory={jail_path}",
+        f"--description=My nspawn jail {jail_name} [created with jailmaker]",
+    ]
+
+    systemd_nspawn_additional_args = [
+        f"--machine={jail_name}",
+        f"--directory={JAIL_ROOTFS_NAME}",
+    ]
+
+    # The systemd-nspawn manual explicitly mentions:
+    # Device nodes may not be created
+    # https://www.freedesktop.org/software/systemd/man/systemd-nspawn.html
+    # This means docker images containing device nodes can't be pulled
+    # https://github.com/moby/moby/issues/35245
+    #
+    # The solution is to use DevicePolicy=auto
+    # https://github.com/kinvolk/kube-spawn/pull/328
+    #
+    # DevicePolicy=auto is the default for systemd-run and allows access to all devices
+    # as long as we don't add any --property=DeviceAllow= flags
+    # https://manpages.debian.org/bookworm/systemd/systemd.resource-control.5.en.html
+    #
+    # We can now successfully run:
+    # mknod /dev/port c 1 4
+    # Or pull docker images containing device nodes:
+    # docker pull oraclelinux@sha256:d49469769e4701925d5145c2676d5a10c38c213802cf13270ec3a12c9c84d643
+
+    # Add hooks to execute commands on the host before/after starting and after stopping a jail
+    add_hook(
+        jail_path,
+        systemd_run_additional_args,
+        config.my_get("pre_start_hook"),
+        "ExecStartPre",
+    )
+
+    add_hook(
+        jail_path,
+        systemd_run_additional_args,
+        config.my_get("post_start_hook"),
+        "ExecStartPost",
+    )
+
+    add_hook(
+        jail_path,
+        systemd_run_additional_args,
+        config.my_get("post_stop_hook"),
+        "ExecStopPost",
+    )
+
+    gpu_passthrough_intel = config.my_getboolean("gpu_passthrough_intel")
+    gpu_passthrough_nvidia = config.my_getboolean("gpu_passthrough_nvidia")
+
+    passthrough_intel(gpu_passthrough_intel, systemd_nspawn_additional_args)
+    passthrough_nvidia(
+        gpu_passthrough_nvidia, systemd_nspawn_additional_args, jail_name
+    )
+
+    if seccomp is False:
+        # Disabling seccomp filtering by passing --setenv=SYSTEMD_SECCOMP=0 to systemd-run will improve performance
+        # at the expense of security: it allows syscalls which otherwise would be blocked or would have to be explicitly allowed by passing
+        # --system-call-filter to systemd-nspawn
+        # https://github.com/systemd/systemd/issues/18370
+        #
+        # However, and additional layer of seccomp filtering may be undesirable
+        # For example when using docker to run containers inside the jail created with systemd-nspawn
+        # Even though seccomp filtering is disabled for the systemd-nspawn jail itself, docker can still use seccomp filtering
+        # to restrict the actions available within its containers
+        #
+        # Proof that seccomp can be used inside a jail started with --setenv=SYSTEMD_SECCOMP=0:
+        # Run a command in a docker container which is blocked by the default docker seccomp profile:
+        # 	docker run --rm -it debian:jessie unshare --map-root-user --user sh -c whoami
+        # 	unshare: unshare failed: Operation not permitted
+        # Now run unconfined to show command runs successfully:
+        # 	docker run --rm -it --security-opt seccomp=unconfined debian:jessie unshare --map-root-user --user sh -c whoami
+        # 	root
+
+        systemd_run_additional_args += [
+            "--setenv=SYSTEMD_SECCOMP=0",
+        ]
+
+    initial_setup = False
+
+    # If there's no machine-id, then this the first time the jail is started
+    if not os.path.exists(os.path.join(jail_rootfs_path, "etc/machine-id")) and (
+        initial_setup := config.my_get("initial_setup")
+    ):
+        # initial_setup has been assigned due to := expression above
+        # Ensure the jail init system is ready before we start the initial_setup
+        systemd_nspawn_additional_args += [
+            "--notify-ready=yes",
+        ]
+
+    cmd = [
+        "systemd-run",
+        *shlex.split(config.my_get("systemd_run_default_args")),
+        *systemd_run_additional_args,
+        "--",
+        "systemd-nspawn",
+        *shlex.split(config.my_get("systemd_nspawn_default_args")),
+        *systemd_nspawn_additional_args,
+        *shlex.split(config.my_get("systemd_nspawn_user_args")),
+    ]
+
+    print(
+        dedent(
+            f"""
+        Starting jail {jail_name} with the following command:
+
+        {shlex.join(cmd)}
+    """
+        )
+    )
+
+    returncode = subprocess.run(cmd).returncode
+    if returncode != 0:
+        eprint(
+            dedent(
+                f"""
+            Failed to start jail {jail_name}...
+            In case of a config error, you may fix it with:
+            {COMMAND_NAME} edit {jail_name}
+        """
+            )
+        )
+
+        return returncode
+
+    # Handle initial setup after jail is up and running (for the first time)
+    if initial_setup:
+        if not initial_setup.startswith("#!"):
+            initial_setup = "#!/bin/sh\n" + initial_setup
+
+        with tempfile.NamedTemporaryFile(
+            mode="w+t",
+            prefix="jlmkr-initial-setup.",
+            dir=jail_rootfs_path,
+            delete=False,
+        ) as initial_setup_file:
+            # Write a script file to call during initial setup
+            initial_setup_file.write(initial_setup)
+
+        initial_setup_file_name = os.path.basename(initial_setup_file.name)
+        initial_setup_file_host_path = os.path.abspath(initial_setup_file.name)
+        stat_chmod(initial_setup_file_host_path, 0o700)
+
+        print(f"About to run the initial setup script: {initial_setup_file_name}.")
+        print("Waiting for networking in the jail to be ready.")
+        print(
+            "Please wait (this may take 90s in case of bridge networking with STP is enabled)..."
+        )
+        returncode = exec_jail(
+            jail_name,
+            [
+                "--",
+                "systemd-run",
+                f"--unit={initial_setup_file_name}",
+                "--quiet",
+                "--pipe",
+                "--wait",
+                "--service-type=exec",
+                "--property=After=network-online.target",
+                "--property=Wants=network-online.target",
+                "/" + initial_setup_file_name,
+            ],
+        )
+
+        if returncode != 0:
+            eprint("Tried to run the following commands inside the jail:")
+            eprint(initial_setup)
+            eprint()
+            eprint(f"{RED}{BOLD}Failed to run initial setup...")
+            eprint(
+                f"You may want to manually run /{initial_setup_file_name} inside the jail for debugging purposes."
+            )
+            eprint(f"Or stop and remove the jail and try again.{NORMAL}")
+            return returncode
+        else:
+            # Cleanup the initial_setup_file_host_path
+            Path(initial_setup_file_host_path).unlink(missing_ok=True)
+            print(f"Done with initial setup of jail {jail_name}!")
+
+    return returncode
+
+
+def add_hook(jail_path, systemd_run_additional_args, hook_command, hook_type):
+    if not hook_command:
+        return
+
+    # Run the command directly if it doesn't start with a shebang
+    if not hook_command.startswith("#!"):
+        systemd_run_additional_args += [f"--property={hook_type}={hook_command}"]
+        return
+
+    # Otherwise write a script file and call that
+    hook_file = os.path.abspath(os.path.join(jail_path, f".{hook_type}"))
+
+    # Only write if contents are different
+    if not os.path.exists(hook_file) or Path(hook_file).read_text() != hook_command:
+        print(hook_command, file=open(hook_file, "w"))
+
+    stat_chmod(hook_file, 0o700)
+    systemd_run_additional_args += [
+        f"--property={hook_type}={systemd_escape_path(hook_file)}"
+    ]
+
+
+def systemd_escape_path(path):
+    """
+    Escape path containing spaces, while properly handling backslashes in filenames.
+    https://manpages.debian.org/bookworm/systemd/systemd.syntax.7.en.html#QUOTING
+    https://manpages.debian.org/bookworm/systemd/systemd.service.5.en.html#COMMAND_LINES
+    """
+    return "".join(
+        map(
+            lambda char: r"\s" if char == " " else "\\\\" if char == "\\" else char,
+            path,
+        )
+    )
diff --git a/src/jlmkr/donor/jlmkr.py b/src/jlmkr/donor/jlmkr.py
index ce3ded9..bb1526e 100755
--- a/src/jlmkr/donor/jlmkr.py
+++ b/src/jlmkr/donor/jlmkr.py
@@ -142,152 +142,6 @@ from utils.console import eprint, fail
 from utils.jail_dataset import get_jail_path, get_jail_config_path, get_jail_rootfs_path
 
 
-# Test intel GPU by decoding mp4 file (output is discarded)
-# Run the commands below in the jail:
-# curl -o bunny.mp4 https://www.w3schools.com/html/mov_bbb.mp4
-# ffmpeg -hwaccel vaapi -hwaccel_device /dev/dri/renderD128 -hwaccel_output_format vaapi -i bunny.mp4 -f null - && echo 'SUCCESS!'
-
-
-def passthrough_intel(gpu_passthrough_intel, systemd_nspawn_additional_args):
-    if not gpu_passthrough_intel:
-        return
-
-    if not os.path.exists("/dev/dri"):
-        eprint(
-            dedent(
-                """
-        No intel GPU seems to be present...
-        Skip passthrough of intel GPU."""
-            )
-        )
-        return
-
-    systemd_nspawn_additional_args.append("--bind=/dev/dri")
-
-
-def passthrough_nvidia(
-    gpu_passthrough_nvidia, systemd_nspawn_additional_args, jail_name
-):
-    jail_rootfs_path = get_jail_rootfs_path(jail_name)
-    ld_so_conf_path = Path(
-        os.path.join(jail_rootfs_path), f"etc/ld.so.conf.d/{SHORTNAME}-nvidia.conf"
-    )
-
-    if not gpu_passthrough_nvidia:
-        # Cleanup the config file we made when passthrough was enabled
-        ld_so_conf_path.unlink(missing_ok=True)
-        return
-
-    # Load the nvidia kernel module
-    if subprocess.run(["modprobe", "nvidia-current-uvm"]).returncode != 0:
-        eprint(
-            dedent(
-                """
-            Failed to load nvidia-current-uvm kernel module."""
-            )
-        )
-
-    # Run nvidia-smi to initialize the nvidia driver
-    # If we can't run nvidia-smi successfully,
-    # then nvidia-container-cli list will fail too:
-    # we shouldn't continue with gpu passthrough
-    if subprocess.run(["nvidia-smi", "-f", "/dev/null"]).returncode != 0:
-        eprint("Skip passthrough of nvidia GPU.")
-        return
-
-    try:
-        # Get list of libraries
-        nvidia_libraries = set(
-            [
-                x
-                for x in subprocess.check_output(
-                    ["nvidia-container-cli", "list", "--libraries"]
-                )
-                .decode()
-                .split("\n")
-                if x
-            ]
-        )
-        # Get full list of files, but excluding library ones from above
-        nvidia_files = set(
-            (
-                [
-                    x
-                    for x in subprocess.check_output(["nvidia-container-cli", "list"])
-                    .decode()
-                    .split("\n")
-                    if x and x not in nvidia_libraries
-                ]
-            )
-        )
-    except Exception:
-        eprint(
-            dedent(
-                """
-        Unable to detect which nvidia driver files to mount.
-        Skip passthrough of nvidia GPU."""
-            )
-        )
-        return
-
-    # Also make nvidia-smi available inside the path,
-    # while mounting the symlink will be resolved and nvidia-smi will appear as a regular file
-    nvidia_files.add("/usr/bin/nvidia-smi")
-
-    nvidia_mounts = []
-
-    for file_path in nvidia_files:
-        if not os.path.exists(file_path):
-            # Don't try to mount files not present on the host
-            print(f"Skipped mounting {file_path}, it doesn't exist on the host...")
-            continue
-
-        if file_path.startswith("/dev/"):
-            nvidia_mounts.append(f"--bind={file_path}")
-        else:
-            nvidia_mounts.append(f"--bind-ro={file_path}")
-
-    # Check if the parent dir exists where we want to write our conf file
-    if ld_so_conf_path.parent.exists():
-        library_folders = set(str(Path(x).parent) for x in nvidia_libraries)
-        # Add the library folders as mounts
-        for lf in library_folders:
-            nvidia_mounts.append(f"--bind-ro={lf}")
-
-        # Only write if the conf file doesn't yet exist or has different contents
-        existing_conf_libraries = set()
-        if ld_so_conf_path.exists():
-            existing_conf_libraries.update(
-                x for x in ld_so_conf_path.read_text().splitlines() if x
-            )
-
-        if library_folders != existing_conf_libraries:
-            print("\n".join(x for x in library_folders), file=ld_so_conf_path.open("w"))
-
-            # Run ldconfig inside systemd-nspawn jail with nvidia mounts...
-            subprocess.run(
-                [
-                    "systemd-nspawn",
-                    "--quiet",
-                    f"--machine={jail_name}",
-                    f"--directory={jail_rootfs_path}",
-                    *nvidia_mounts,
-                    "ldconfig",
-                ]
-            )
-    else:
-        eprint(
-            dedent(
-                """
-            Unable to write the ld.so.conf.d directory inside the jail (it doesn't exist).
-            Skipping call to ldconfig.
-            The nvidia drivers will probably not be detected..."""
-            )
-        )
-
-    systemd_nspawn_additional_args += nvidia_mounts
-
-
 def exec_jail(jail_name, cmd):
     """
     Execute a command in the jail with given name.
@@ -333,250 +187,7 @@ def shell_jail(args):
     return subprocess.run(["machinectl", "shell"] + args).returncode
 
 
-def systemd_escape_path(path):
-    """
-    Escape path containing spaces, while properly handling backslashes in filenames.
-    https://manpages.debian.org/bookworm/systemd/systemd.syntax.7.en.html#QUOTING
-    https://manpages.debian.org/bookworm/systemd/systemd.service.5.en.html#COMMAND_LINES
-    """
-    return "".join(
-        map(
-            lambda char: r"\s" if char == " " else "\\\\" if char == "\\" else char,
-            path,
-        )
-    )
-
-
-def add_hook(jail_path, systemd_run_additional_args, hook_command, hook_type):
-    if not hook_command:
-        return
-
-    # Run the command directly if it doesn't start with a shebang
-    if not hook_command.startswith("#!"):
-        systemd_run_additional_args += [f"--property={hook_type}={hook_command}"]
-        return
-
-    # Otherwise write a script file and call that
-    hook_file = os.path.abspath(os.path.join(jail_path, f".{hook_type}"))
-
-    # Only write if contents are different
-    if not os.path.exists(hook_file) or Path(hook_file).read_text() != hook_command:
-        print(hook_command, file=open(hook_file, "w"))
-
-    stat_chmod(hook_file, 0o700)
-    systemd_run_additional_args += [
-        f"--property={hook_type}={systemd_escape_path(hook_file)}"
-    ]
-
-
-def start_jail(jail_name):
-    """
-    Start jail with given name.
-    """
-    skip_start_message = (
-        f"Skipped starting jail {jail_name}. It appears to be running already..."
-    )
-
-    if jail_is_running(jail_name):
-        eprint(skip_start_message)
-        return 0
-
-    jail_path = get_jail_path(jail_name)
-    jail_config_path = get_jail_config_path(jail_name)
-    jail_rootfs_path = get_jail_rootfs_path(jail_name)
-
-    config = parse_config_file(jail_config_path)
-
-    if not config:
-        eprint("Aborting...")
-        return 1
-
-    seccomp = config.my_getboolean("seccomp")
-
-    systemd_run_additional_args = [
-        f"--unit={SHORTNAME}-{jail_name}",
-        f"--working-directory={jail_path}",
-        f"--description=My nspawn jail {jail_name} [created with jailmaker]",
-    ]
-
-    systemd_nspawn_additional_args = [
-        f"--machine={jail_name}",
-        f"--directory={JAIL_ROOTFS_NAME}",
-    ]
-
-    # The systemd-nspawn manual explicitly mentions:
-    # Device nodes may not be created
-    # https://www.freedesktop.org/software/systemd/man/systemd-nspawn.html
-    # This means docker images containing device nodes can't be pulled
-    # https://github.com/moby/moby/issues/35245
-    #
-    # The solution is to use DevicePolicy=auto
-    # https://github.com/kinvolk/kube-spawn/pull/328
-    #
-    # DevicePolicy=auto is the default for systemd-run and allows access to all devices
-    # as long as we don't add any --property=DeviceAllow= flags
-    # https://manpages.debian.org/bookworm/systemd/systemd.resource-control.5.en.html
-    #
-    # We can now successfully run:
-    # mknod /dev/port c 1 4
-    # Or pull docker images containing device nodes:
-    # docker pull oraclelinux@sha256:d49469769e4701925d5145c2676d5a10c38c213802cf13270ec3a12c9c84d643
-
-    # Add hooks to execute commands on the host before/after starting and after stopping a jail
-    add_hook(
-        jail_path,
-        systemd_run_additional_args,
-        config.my_get("pre_start_hook"),
-        "ExecStartPre",
-    )
-
-    add_hook(
-        jail_path,
-        systemd_run_additional_args,
-        config.my_get("post_start_hook"),
-        "ExecStartPost",
-    )
-
-    add_hook(
-        jail_path,
-        systemd_run_additional_args,
-        config.my_get("post_stop_hook"),
-        "ExecStopPost",
-    )
-
-    gpu_passthrough_intel = config.my_getboolean("gpu_passthrough_intel")
-    gpu_passthrough_nvidia = config.my_getboolean("gpu_passthrough_nvidia")
-
-    passthrough_intel(gpu_passthrough_intel, systemd_nspawn_additional_args)
-    passthrough_nvidia(
-        gpu_passthrough_nvidia, systemd_nspawn_additional_args, jail_name
-    )
-
-    if seccomp is False:
-        # Disabling seccomp filtering by passing --setenv=SYSTEMD_SECCOMP=0 to systemd-run will improve performance
-        # at the expense of security: it allows syscalls which otherwise would be blocked or would have to be explicitly allowed by passing
-        # --system-call-filter to systemd-nspawn
-        # https://github.com/systemd/systemd/issues/18370
-        #
-        # However, and additional layer of seccomp filtering may be undesirable
-        # For example when using docker to run containers inside the jail created with systemd-nspawn
-        # Even though seccomp filtering is disabled for the systemd-nspawn jail itself, docker can still use seccomp filtering
-        # to restrict the actions available within its containers
-        #
-        # Proof that seccomp can be used inside a jail started with --setenv=SYSTEMD_SECCOMP=0:
-        # Run a command in a docker container which is blocked by the default docker seccomp profile:
-        # 	docker run --rm -it debian:jessie unshare --map-root-user --user sh -c whoami
-        # 	unshare: unshare failed: Operation not permitted
-        # Now run unconfined to show command runs successfully:
-        # 	docker run --rm -it --security-opt seccomp=unconfined debian:jessie unshare --map-root-user --user sh -c whoami
-        # 	root
-
-        systemd_run_additional_args += [
-            "--setenv=SYSTEMD_SECCOMP=0",
-        ]
-
-    initial_setup = False
-
-    # If there's no machine-id, then this the first time the jail is started
-    if not os.path.exists(os.path.join(jail_rootfs_path, "etc/machine-id")) and (
-        initial_setup := config.my_get("initial_setup")
-    ):
-        # initial_setup has been assigned due to := expression above
-        # Ensure the jail init system is ready before we start the initial_setup
-        systemd_nspawn_additional_args += [
-            "--notify-ready=yes",
-        ]
-
-    cmd = [
-        "systemd-run",
-        *shlex.split(config.my_get("systemd_run_default_args")),
-        *systemd_run_additional_args,
-        "--",
-        "systemd-nspawn",
-        *shlex.split(config.my_get("systemd_nspawn_default_args")),
-        *systemd_nspawn_additional_args,
-        *shlex.split(config.my_get("systemd_nspawn_user_args")),
-    ]
-
-    print(
-        dedent(
-            f"""
-        Starting jail {jail_name} with the following command:
-
-        {shlex.join(cmd)}
-    """
-        )
-    )
-
-    returncode = subprocess.run(cmd).returncode
-    if returncode != 0:
-        eprint(
-            dedent(
-                f"""
-            Failed to start jail {jail_name}...
-            In case of a config error, you may fix it with:
-            {COMMAND_NAME} edit {jail_name}
-        """
-            )
-        )
-
-        return returncode
-
-    # Handle initial setup after jail is up and running (for the first time)
-    if initial_setup:
-        if not initial_setup.startswith("#!"):
-            initial_setup = "#!/bin/sh\n" + initial_setup
-
-        with tempfile.NamedTemporaryFile(
-            mode="w+t",
-            prefix="jlmkr-initial-setup.",
-            dir=jail_rootfs_path,
-            delete=False,
-        ) as initial_setup_file:
-            # Write a script file to call during initial setup
-            initial_setup_file.write(initial_setup)
-
-        initial_setup_file_name = os.path.basename(initial_setup_file.name)
-        initial_setup_file_host_path = os.path.abspath(initial_setup_file.name)
-        stat_chmod(initial_setup_file_host_path, 0o700)
-
-        print(f"About to run the initial setup script: {initial_setup_file_name}.")
-        print("Waiting for networking in the jail to be ready.")
-        print(
-            "Please wait (this may take 90s in case of bridge networking with STP is enabled)..."
-        )
-        returncode = exec_jail(
-            jail_name,
-            [
-                "--",
-                "systemd-run",
-                f"--unit={initial_setup_file_name}",
-                "--quiet",
-                "--pipe",
-                "--wait",
-                "--service-type=exec",
-                "--property=After=network-online.target",
-                "--property=Wants=network-online.target",
-                "/" + initial_setup_file_name,
-            ],
-        )
-
-        if returncode != 0:
-            eprint("Tried to run the following commands inside the jail:")
-            eprint(initial_setup)
-            eprint()
-            eprint(f"{RED}{BOLD}Failed to run initial setup...")
-            eprint(
-                f"You may want to manually run /{initial_setup_file_name} inside the jail for debugging purposes."
-            )
-            eprint(f"Or stop and remove the jail and try again.{NORMAL}")
-            return returncode
-        else:
-            # Cleanup the initial_setup_file_host_path
-            Path(initial_setup_file_host_path).unlink(missing_ok=True)
-            print(f"Done with initial setup of jail {jail_name}!")
-
-    return returncode
+from actions.start import add_hook, start_jail
 
 
 def restart_jail(jail_name):
diff --git a/src/jlmkr/utils/gpu.py b/src/jlmkr/utils/gpu.py
new file mode 100644
index 0000000..b71c3ae
--- /dev/null
+++ b/src/jlmkr/utils/gpu.py
@@ -0,0 +1,158 @@
+# SPDX-FileCopyrightText: © 2024 Jip-Hop and the Jailmakers <https://github.com/Jip-Hop/jailmaker>
+#
+# SPDX-License-Identifier: LGPL-3.0-only
+
+import os.path
+import subprocess
+
+from pathlib import Path
+from textwrap import dedent
+from utils.console import eprint
+from utils.jail_dataset import get_jail_rootfs_path
+from utils.paths import SHORTNAME
+
+
+# Test intel GPU by decoding mp4 file (output is discarded)
+# Run the commands below in the jail:
+# curl -o bunny.mp4 https://www.w3schools.com/html/mov_bbb.mp4
+# ffmpeg -hwaccel vaapi -hwaccel_device /dev/dri/renderD128 -hwaccel_output_format vaapi -i bunny.mp4 -f null - && echo 'SUCCESS!'
+
+
+def passthrough_intel(gpu_passthrough_intel, systemd_nspawn_additional_args):
+    if not gpu_passthrough_intel:
+        return
+
+    if not os.path.exists("/dev/dri"):
+        eprint(
+            dedent(
+                """
+        No intel GPU seems to be present...
+        Skip passthrough of intel GPU."""
+            )
+        )
+        return
+
+    systemd_nspawn_additional_args.append("--bind=/dev/dri")
+
+
+def passthrough_nvidia(
+    gpu_passthrough_nvidia, systemd_nspawn_additional_args, jail_name
+):
+    jail_rootfs_path = get_jail_rootfs_path(jail_name)
+    ld_so_conf_path = Path(
+        os.path.join(jail_rootfs_path), f"etc/ld.so.conf.d/{SHORTNAME}-nvidia.conf"
+    )
+
+    if not gpu_passthrough_nvidia:
+        # Cleanup the config file we made when passthrough was enabled
+        ld_so_conf_path.unlink(missing_ok=True)
+        return
+
+    # Load the nvidia kernel module
+    if subprocess.run(["modprobe", "nvidia-current-uvm"]).returncode != 0:
+        eprint(
+            dedent(
+                """
+            Failed to load nvidia-current-uvm kernel module."""
+            )
+        )
+
+    # Run nvidia-smi to initialize the nvidia driver
+    # If we can't run nvidia-smi successfully,
+    # then nvidia-container-cli list will fail too:
+    # we shouldn't continue with gpu passthrough
+    if subprocess.run(["nvidia-smi", "-f", "/dev/null"]).returncode != 0:
+        eprint("Skip passthrough of nvidia GPU.")
+        return
+
+    try:
+        # Get list of libraries
+        nvidia_libraries = set(
+            [
+                x
+                for x in subprocess.check_output(
+                    ["nvidia-container-cli", "list", "--libraries"]
+                )
+                .decode()
+                .split("\n")
+                if x
+            ]
+        )
+        # Get full list of files, but excluding library ones from above
+        nvidia_files = set(
+            (
+                [
+                    x
+                    for x in subprocess.check_output(["nvidia-container-cli", "list"])
+                    .decode()
+                    .split("\n")
+                    if x and x not in nvidia_libraries
+                ]
+            )
+        )
+    except Exception:
+        eprint(
+            dedent(
+                """
+        Unable to detect which nvidia driver files to mount.
+        Skip passthrough of nvidia GPU."""
+            )
+        )
+        return
+
+    # Also make nvidia-smi available inside the path,
+    # while mounting the symlink will be resolved and nvidia-smi will appear as a regular file
+    nvidia_files.add("/usr/bin/nvidia-smi")
+
+    nvidia_mounts = []
+
+    for file_path in nvidia_files:
+        if not os.path.exists(file_path):
+            # Don't try to mount files not present on the host
+            print(f"Skipped mounting {file_path}, it doesn't exist on the host...")
+            continue
+
+        if file_path.startswith("/dev/"):
+            nvidia_mounts.append(f"--bind={file_path}")
+        else:
+            nvidia_mounts.append(f"--bind-ro={file_path}")
+
+    # Check if the parent dir exists where we want to write our conf file
+    if ld_so_conf_path.parent.exists():
+        library_folders = set(str(Path(x).parent) for x in nvidia_libraries)
+        # Add the library folders as mounts
+        for lf in library_folders:
+            nvidia_mounts.append(f"--bind-ro={lf}")
+
+        # Only write if the conf file doesn't yet exist or has different contents
+        existing_conf_libraries = set()
+        if ld_so_conf_path.exists():
+            existing_conf_libraries.update(
+                x for x in ld_so_conf_path.read_text().splitlines() if x
+            )
+
+        if library_folders != existing_conf_libraries:
+            print("\n".join(x for x in library_folders), file=ld_so_conf_path.open("w"))
+
+            # Run ldconfig inside systemd-nspawn jail with nvidia mounts...
+            subprocess.run(
+                [
+                    "systemd-nspawn",
+                    "--quiet",
+                    f"--machine={jail_name}",
+                    f"--directory={jail_rootfs_path}",
+                    *nvidia_mounts,
+                    "ldconfig",
+                ]
+            )
+    else:
+        eprint(
+            dedent(
+                """
+            Unable to write the ld.so.conf.d directory inside the jail (it doesn't exist).
+            Skipping call to ldconfig.
+            The nvidia drivers will probably not be detected..."""
+            )
+        )
+
+    systemd_nspawn_additional_args += nvidia_mounts