Nvidia mounts and parser fixes

This commit is contained in:
Jip-Hop 2023-03-01 21:06:04 +01:00
parent de51c05825
commit 857b57bc39
1 changed files with 70 additions and 30 deletions

100
jlmkr.py
View File

@ -4,6 +4,7 @@ import argparse
import configparser import configparser
import contextlib import contextlib
import ctypes import ctypes
import glob
import hashlib import hashlib
import os import os
import re import re
@ -15,7 +16,6 @@ import subprocess
import sys import sys
import time import time
import urllib.request import urllib.request
from inspect import cleandoc from inspect import cleandoc
from pathlib import Path, PurePath from pathlib import Path, PurePath
from textwrap import dedent from textwrap import dedent
@ -158,35 +158,77 @@ def start_jail(jail_name):
if os.path.exists('/dev/dri'): if os.path.exists('/dev/dri'):
systemd_nspawn_additional_args.append('--bind=/dev/dri') systemd_nspawn_additional_args.append('--bind=/dev/dri')
nvidia_devices = glob.glob('/dev/nvidia*')
# Detect nvidia GPU # Detect nvidia GPU
if os.path.exists('/dev/nvidia0'): if len(nvidia_devices):
nvidia_driver_files = [] nvidia_files = set(nvidia_devices)
try: try:
nvidia_driver_files = subprocess.check_output( nvidia_files.update([x for x in subprocess.check_output(
['nvidia-container-cli', 'list']).decode().split('\n') ['nvidia-container-cli', 'list']).decode().split('\n') if x])
except subprocess.CalledProcessError: except subprocess.CalledProcessError:
eprint(dedent(""" eprint(dedent("""
Failed to run nvidia-container-cli. Failed to run nvidia-container-cli.
Unable to mount the nvidia driver files.""")) Unable to detect which nvidia driver files to mount.
Falling back to hard-coded list of nvidia files..."""))
if len(nvidia_driver_files): for pattern in ["/dev/nvidiactl",
additional_nvidia_library = '/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1' "/dev/nvidia-modeset",
if os.path.exists(additional_nvidia_library): "/dev/nvidia0",
# Add libnvidia-ml.so.1 "/usr/lib/nvidia/current/nvidia-smi",
# Not listed by nvidia-container-cli, but required "/usr/bin/nvidia-persistenced",
nvidia_driver_files.append(additional_nvidia_library) "/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ml.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-cfg.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libcuda.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ptxjitcompiler.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-ngx.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-encode.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvcuvid.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-eglcore.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-glcore.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-tls.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-glsi.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-rtcore.so*",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libGLX_nvidia.so*",
"/usr/lib/x86_64-linux-gnu/libnvidia-glvkspirv.so*"]:
for file_path in glob.glob(pattern):
if os.path.exists(file_path):
nvidia_files.add(file_path)
for file_path in nvidia_driver_files: # Add libnvidia-ml.so.1 (and possibly other nvidia files) which
if not file_path: # are not listed by nvidia-container-cli, but required to run...
# Skip empty strings nvidia_files.update(
continue glob.glob('/usr/lib/x86_64-linux-gnu/*nvidia*'))
elif file_path.startswith('/dev/'):
systemd_nspawn_additional_args.append( nvidia_mounts = []
f"--bind={file_path}") mounted_nvidia_smi_in_path = False
for file_path in nvidia_files:
if file_path.startswith('/dev/'):
nvidia_mounts.append(f"--bind={file_path}")
else: else:
systemd_nspawn_additional_args.append( nvidia_mounts.append(f"--bind-ro={file_path}")
f"--bind-ro={file_path}") if (not mounted_nvidia_smi_in_path and
os.path.basename(file_path) == 'nvidia-smi' and
os.path.normpath(file_path) != os.path.normpath('/usr/bin/nvidia-smi')):
# As an alternative to a symlink also bind mount nvidia-smi
# in a directory which is available inside the path
nvidia_mounts.append(
f"--bind-ro={file_path}:/usr/bin/nvidia-smi")
mounted_nvidia_smi_in_path = True
# Run ldconfig inside systemd-nspawn jail with nvidia mounts...
subprocess.run(
['systemd-nspawn',
'--quiet',
f"--machine={jail_name}",
f"--directory={os.path.join(jail_path, JAIL_ROOTFS_NAME)}",
*nvidia_mounts,
"ldconfig"])
systemd_nspawn_additional_args += nvidia_mounts
cmd = ['systemd-run', cmd = ['systemd-run',
*shlex.split(config.get('systemd_run_default_args', '')), *shlex.split(config.get('systemd_run_default_args', '')),
@ -692,23 +734,21 @@ def main():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description=DESCRIPTION, epilog=DISCLAIMER) description=DESCRIPTION, epilog=DISCLAIMER)
parser.add_argument('--version', action='version', version=VERSION) parser.add_argument('--version', action='version', version=VERSION)
subparsers = parser.add_subparsers(title='commands', dest='subcommand') subparsers = parser.add_subparsers(title='commands', dest='subcommand')
create_parser = subparsers.add_parser(name='create', epilog=DISCLAIMER) subparsers.add_parser(name='create', epilog=DISCLAIMER).add_argument(
create_parser.add_argument(
'name', nargs='?', help='name of the jail to create') 'name', nargs='?', help='name of the jail to create')
start_parser = subparsers.add_parser(name='start', epilog=DISCLAIMER) subparsers.add_parser(name='start', epilog=DISCLAIMER).add_argument(
start_parser.add_argument('name', help='name of the jail to start') 'name', help='name of the jail to start')
start_parser = subparsers.add_parser(name='delete', epilog=DISCLAIMER) subparsers.add_parser(name='delete', epilog=DISCLAIMER).add_argument(
start_parser.add_argument('name', help='name of the jail to delete') 'name', help='name of the jail to delete')
start_parser = subparsers.add_parser(name='list', epilog=DISCLAIMER) subparsers.add_parser(name='list', epilog=DISCLAIMER)
parser.usage = f"{parser.format_usage()[7:]}{create_parser.format_usage()}{start_parser.format_usage()}"
if os.getuid() != 0: if os.getuid() != 0:
parser.print_usage() parser.print_usage()