From da2c90374b105a72fc36e3d9cf402fdb65d75bed Mon Sep 17 00:00:00 2001 From: Jip-Hop <2871973+Jip-Hop@users.noreply.github.com> Date: Fri, 26 Jan 2024 22:33:33 +0100 Subject: [PATCH] Load nvidia kernel module --- README.md | 8 -------- jlmkr.py | 31 +++++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index dde93ff..2ee7007 100644 --- a/README.md +++ b/README.md @@ -135,14 +135,6 @@ See [Advanced Networking](./NETWORKING.md) for more. The `jailmaker` script won't install Docker for you, but it can setup the jail with the capabilities required to run docker. You can manually install Docker inside the jail using the [official installation guide](https://docs.docker.com/engine/install/#server) or use [convenience script](https://get.docker.com). -## Nvidia GPU - -To make passthrough of the nvidia GPU work, you need to schedule a Pre Init command. The reason is that TrueNAS SCALE by default doesn't load the nvidia kernel modules (and `jailmaker` doesn't do that either). [This screenshot](https://user-images.githubusercontent.com/1704047/222915803-d6dd51b0-c4dd-4189-84be-a04d38cca0b3.png) shows what the configuration should look like. - -``` -[ ! -f /dev/nvidia-uvm ] && modprobe nvidia-current-uvm && /usr/bin/nvidia-modprobe -c0 -u -``` - ## Documentation Additional documentation contributed by the community can be found in [the docs directory](./docs/). diff --git a/jlmkr.py b/jlmkr.py index f8805b9..384b79f 100755 --- a/jlmkr.py +++ b/jlmkr.py @@ -104,6 +104,17 @@ def passthrough_intel(gpu_passthrough_intel, systemd_nspawn_additional_args): def passthrough_nvidia( gpu_passthrough_nvidia, systemd_nspawn_additional_args, jail_name ): + # Load the nvidia kernel module + if subprocess.run(["modprobe", "nvidia-current-uvm"]).returncode != 0: + eprint( + dedent( + """ + Failed to load nvidia-current-uvm kernel module. + Skip passthrough of nvidia GPU.""" + ) + ) + return + jail_rootfs_path = get_jail_rootfs_path(jail_name) ld_so_conf_path = Path( os.path.join(jail_rootfs_path), f"etc/ld.so.conf.d/{SYMLINK_NAME}-nvidia.conf" @@ -114,13 +125,11 @@ def passthrough_nvidia( ld_so_conf_path.unlink(missing_ok=True) return - try: # Run nvidia-smi to initialize the nvidia driver # If we can't run nvidia-smi successfully, # then nvidia-container-cli list will fail too: # we shouldn't continue with gpu passthrough - subprocess.run(["nvidia-smi", "-f", "/dev/null"], check=True) - except: + if subprocess.run(["nvidia-smi", "-f", "/dev/null"]).returncode != 0: eprint("Skip passthrough of nvidia GPU.") return @@ -316,6 +325,11 @@ def start_jail(jail_name, check_startup_enabled=False): f"--directory={JAIL_ROOTFS_NAME}", ] + # TODO: split the docker_compatible option into separate options + # - privileged (to disable seccomp, set DevicePolicy=auto and add all capabilities) + # - how to call the option to enable ip_forward and bridge-nf-call? + # TODO: always add --bind-ro=/sys/module? Or only for privileged jails? + if config.get("docker_compatible") == "1": # Enable ip forwarding on the host (docker needs it) print(1, file=open("/proc/sys/net/ipv4/ip_forward", "w")) @@ -712,15 +726,16 @@ def create_jail(jail_name, distro="debian", release="bookworm"): gpu_passthrough_nvidia = 0 - try: + if ( subprocess.run( - ["nvidia-smi"], - check=True, + ["modprobe", "nvidia-current-uvm"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, - ) + ).returncode + == 0 + ): nvidia_detected = True - except: + else: nvidia_detected = False if nvidia_detected: