Deprecate gpu_passthrough conf, retry nvidia cli

This commit is contained in:
Jip-Hop 2023-03-03 20:46:10 +01:00
parent dcfaccea06
commit e8919141d2
1 changed files with 67 additions and 28 deletions

View File

@ -18,6 +18,7 @@ import time
import urllib.request import urllib.request
from inspect import cleandoc from inspect import cleandoc
from pathlib import Path, PurePath from pathlib import Path, PurePath
from shutil import which
from textwrap import dedent from textwrap import dedent
# Only set a color if we have an interactive tty # Only set a color if we have an interactive tty
@ -153,40 +154,68 @@ def start_jail(jail_name):
ld_so_conf_path = os.path.join( ld_so_conf_path = os.path.join(
jail_path, JAIL_ROOTFS_NAME, 'etc/ld.so.conf.d/jlmkr-nvidia.conf') jail_path, JAIL_ROOTFS_NAME, 'etc/ld.so.conf.d/jlmkr-nvidia.conf')
# TODO: make 2 additional config options: # Legacy gpu_passthrough config setting
# gpu_passthrough_intel and gpu_passthrough_nvidia if config.get('gpu_passthrough') == '1':
# gpu_passthrough == 1 will enable both (as is current behavior) gpu_passthrough_intel = '1'
# During create, autodetect intel/nvidia presence and ask to enable passthrough, gpu_passthrough_nvidia = '1'
# else leave them disabled (and write them to the config file) else:
# No longer write gpu_passthrough setting for new jails gpu_passthrough_intel = config.get('gpu_passthrough_intel')
# Don't ask to enable passthrough in case there are no GPU devices present gpu_passthrough_nvidia = config.get('gpu_passthrough_nvidia')
if config.get('gpu_passthrough') != '1': if gpu_passthrough_intel == '1' or gpu_passthrough_nvidia == '1':
systemd_nspawn_additional_args.append(
'--property=DeviceAllow=char-drm rw')
if gpu_passthrough_intel == '1':
# Detect intel GPU device and if present add bind flag
if os.path.exists('/dev/dri'):
systemd_nspawn_additional_args.append('--bind=/dev/dri')
else:
eprint("No intel GPU seems to be present...")
eprint(dedent("""
No intel GPU seems to be present...
Skip passthrough of intel GPU."""))
if gpu_passthrough_nvidia != '1':
# Try to cleanup ld_so_conf_path # Try to cleanup ld_so_conf_path
if os.path.exists(ld_so_conf_path): if os.path.exists(ld_so_conf_path):
os.remove(ld_so_conf_path) os.remove(ld_so_conf_path)
else: else:
systemd_nspawn_additional_args.append(
'--property=DeviceAllow=char-drm rw')
# Detect intel GPU device and if present add bind flag
if os.path.exists('/dev/dri'):
systemd_nspawn_additional_args.append('--bind=/dev/dri')
nvidia_devices = glob.glob('/dev/nvidia*') nvidia_devices = glob.glob('/dev/nvidia*')
# Detect nvidia GPU # Detect nvidia GPU
if len(nvidia_devices): if not len(nvidia_devices):
eprint(dedent("""
No nvidia GPU seems to be present...
Skip passthrough of nvidia GPU."""))
else:
nvidia_files = set(nvidia_devices) nvidia_files = set(nvidia_devices)
fallback = False
try: if which('nvidia-container-cli') is None:
nvidia_files.update([x for x in subprocess.check_output(
['nvidia-container-cli', 'list']).decode().split('\n') if x])
except subprocess.CalledProcessError:
eprint(dedent(""" eprint(dedent("""
Failed to run nvidia-container-cli. Can't run nvidia-container-cli, it appears not to be installed."""))
Unable to detect which nvidia driver files to mount. fallback = True
Falling back to hard-coded list of nvidia files...""")) else:
tries_remaining = 3
while tries_remaining:
tries_remaining -= 1
try:
nvidia_files.update([x for x in subprocess.check_output(
['nvidia-container-cli', 'list']).decode().split('\n') if x])
break
except subprocess.CalledProcessError:
eprint(dedent("""
Failed to run nvidia-container-cli."""))
if tries_remaining:
eprint("Trying again in 10 seconds...")
time.sleep(10)
else:
fallback = True
if fallback:
eprint("Unable to detect which nvidia driver files to mount.")
eprint("Falling back to hard-coded list of nvidia files...")
for pattern in ["/dev/nvidia-modeset", for pattern in ["/dev/nvidia-modeset",
"/dev/nvidia0", "/dev/nvidia0",
@ -231,7 +260,7 @@ def start_jail(jail_name):
# Check if the parent dir exists where we want to write our conf file # Check if the parent dir exists where we want to write our conf file
if os.path.exists(os.path.dirname(ld_so_conf_path)): if os.path.exists(os.path.dirname(ld_so_conf_path)):
# Only write if the conf file doesn't yet exist or has different contents # Only write if the conf file doesn't yet exist or has different contents
string_to_write = '/usr/lib/x86_64-linux-gnu/nvidia/current' string_to_write = '/usr/lib/x86_64-linux-gnu/nvidia/current'
if not os.path.exists(ld_so_conf_path) or Path(ld_so_conf_path).read_text().strip() != string_to_write: if not os.path.exists(ld_so_conf_path) or Path(ld_so_conf_path).read_text().strip() != string_to_write:
@ -499,10 +528,19 @@ def create_jail(jail_name):
print() print()
gpu_passthrough = 0 gpu_passthrough_intel = 0
if agree('Give access to the GPU inside the jail?', 'n'): if os.path.exists('/dev/dri'):
gpu_passthrough = 1 print("Detected the presence of an intel GPU.")
if agree('Passthrough the intel GPU?', 'n'):
gpu_passthrough_intel = 1
gpu_passthrough_nvidia = 0
if len(glob.glob('/dev/nvidia*')):
print("Detected the presence of an nvidia GPU.")
if agree('Passthrough the nvidia GPU?', 'n'):
gpu_passthrough_nvidia = 1
print(dedent(f""" print(dedent(f"""
{YELLOW}{BOLD}WARNING: CHECK SYNTAX{NORMAL} {YELLOW}{BOLD}WARNING: CHECK SYNTAX{NORMAL}
@ -689,7 +727,8 @@ def create_jail(jail_name):
config = cleandoc(f""" config = cleandoc(f"""
docker_compatible={docker_compatible} docker_compatible={docker_compatible}
gpu_passthrough={gpu_passthrough} gpu_passthrough_intel={gpu_passthrough_intel}
gpu_passthrough_nvidia={gpu_passthrough_nvidia}
systemd_nspawn_user_args={systemd_nspawn_user_args} systemd_nspawn_user_args={systemd_nspawn_user_args}
# You generally will not need to change the options below # You generally will not need to change the options below
systemd_run_default_args={' '.join(systemd_run_default_args)} systemd_run_default_args={' '.join(systemd_run_default_args)}