Fixes for nvidia driver

This commit is contained in:
Jip-Hop 2023-03-02 18:23:34 +01:00
parent 063b9973ae
commit 3f05a43e9e
1 changed files with 54 additions and 32 deletions

View File

@ -150,7 +150,22 @@ def start_jail(jail_name):
'--system-call-filter=add_key keyctl bpf', '--system-call-filter=add_key keyctl bpf',
] ]
if config.get('gpu_passthrough') == '1': ld_so_conf_path = os.path.join(
jail_path, JAIL_ROOTFS_NAME, 'etc/ld.so.conf.d/jlmkr-nvidia.conf')
# TODO: make 2 additional config options:
# gpu_passthrough_intel and gpu_passthrough_nvidia
# gpu_passthrough == 1 will enable both (as is current behavior)
# During create, autodetect intel/nvidia presence and ask to enable passthrough,
# else leave them disabled (and write them to the config file)
# No longer write gpu_passthrough setting for new jails
# Don't ask to enable passthrough in case there are no GPU devices present
if config.get('gpu_passthrough') != '1':
# Try to cleanup ld_so_conf_path
if os.path.exists(ld_so_conf_path):
os.remove(ld_so_conf_path)
else:
systemd_nspawn_additional_args.append( systemd_nspawn_additional_args.append(
'--property=DeviceAllow=char-drm rw') '--property=DeviceAllow=char-drm rw')
@ -173,49 +188,51 @@ def start_jail(jail_name):
Unable to detect which nvidia driver files to mount. Unable to detect which nvidia driver files to mount.
Falling back to hard-coded list of nvidia files...""")) Falling back to hard-coded list of nvidia files..."""))
for pattern in ["/dev/nvidiactl", for pattern in ["/dev/nvidia-modeset",
"/dev/nvidia-modeset",
"/dev/nvidia0", "/dev/nvidia0",
"/usr/lib/nvidia/current/nvidia-smi", "/dev/nvidiactl",
"/usr/bin/nvidia-persistenced", "/usr/bin/nvidia-persistenced",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ml.so*", "/usr/lib/nvidia/current/nvidia-smi",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-cfg.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libcuda.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ptxjitcompiler.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-ngx.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-encode.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvcuvid.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-eglcore.so*", "/usr/lib/x86_64-linux-gnu/libnvidia-eglcore.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-glcore.so*", "/usr/lib/x86_64-linux-gnu/libnvidia-glcore.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-tls.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-glsi.so*", "/usr/lib/x86_64-linux-gnu/libnvidia-glsi.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-glvkspirv.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-ngx.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-rtcore.so*", "/usr/lib/x86_64-linux-gnu/libnvidia-rtcore.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-tls.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libGLX_nvidia.so*", "/usr/lib/x86_64-linux-gnu/nvidia/current/libGLX_nvidia.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-glvkspirv.so*"]: "/usr/lib/x86_64-linux-gnu/nvidia/current/libcuda.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvcuvid.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-cfg.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-encode.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ml.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ptxjitcompiler.so*"]:
for file_path in glob.glob(pattern): for file_path in glob.glob(pattern):
if os.path.exists(file_path): if os.path.exists(file_path):
nvidia_files.add(file_path) nvidia_files.add(file_path)
# Add libnvidia-ml.so.1 which is not listed by nvidia-container-cli, but required to run... # Also make nvidia-smi available inside the path
nvidia_files.add('/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1') nvidia_files.add('/usr/bin/nvidia-smi')
nvidia_mounts = [] nvidia_mounts = []
mounted_nvidia_smi_in_path = False
for file_path in nvidia_files: for file_path in nvidia_files:
if not os.path.exists(file_path):
# Don't try to mount files not present on the host
continue
if file_path.startswith('/dev/'): if file_path.startswith('/dev/'):
nvidia_mounts.append(f"--bind={file_path}") nvidia_mounts.append(f"--bind={file_path}")
else: else:
nvidia_mounts.append(f"--bind-ro={file_path}") nvidia_mounts.append(f"--bind-ro={file_path}")
if (not mounted_nvidia_smi_in_path and
os.path.basename(file_path) == 'nvidia-smi' and
os.path.normpath(file_path) != os.path.normpath('/usr/bin/nvidia-smi')):
# As an alternative to a symlink also bind mount nvidia-smi # Check if the parent dir exists where we want to write our conf file
# in a directory which is available inside the path if os.path.exists(os.path.dirname(ld_so_conf_path)):
nvidia_mounts.append(
f"--bind-ro={file_path}:/usr/bin/nvidia-smi") # Only write if the conf file doesn't yet exist or has different contents
mounted_nvidia_smi_in_path = True string_to_write = '/usr/lib/x86_64-linux-gnu/nvidia/current'
if not os.path.exists(ld_so_conf_path) or Path(ld_so_conf_path).read_text().strip() != string_to_write:
print(string_to_write, file=open(ld_so_conf_path, 'w'))
# Run ldconfig inside systemd-nspawn jail with nvidia mounts... # Run ldconfig inside systemd-nspawn jail with nvidia mounts...
subprocess.run( subprocess.run(
@ -225,6 +242,11 @@ def start_jail(jail_name):
f"--directory={os.path.join(jail_path, JAIL_ROOTFS_NAME)}", f"--directory={os.path.join(jail_path, JAIL_ROOTFS_NAME)}",
*nvidia_mounts, *nvidia_mounts,
"ldconfig"]) "ldconfig"])
else:
eprint("""
Unable to write the ld.so.conf.d directory inside the jail (it doesn't exist).
Skipping call to ldconfig.
The nvidia drivers will probably not be detected...""")
systemd_nspawn_additional_args += nvidia_mounts systemd_nspawn_additional_args += nvidia_mounts