diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4279132..e32fff2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,9 +6,9 @@ name: CI on: # Triggers the workflow on push or pull request events for any branch push: - branches: [ "**" ] + branches: ["**"] pull_request: - branches: [ "**" ] + branches: ["**"] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -16,7 +16,7 @@ on: # A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: # This workflow contains a single job called "build" - build: + test: # The type of runner that the job will run on runs-on: ubuntu-24.04 @@ -25,6 +25,68 @@ jobs: # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - uses: actions/checkout@v4 - # Runs a single command using the runners shell - - name: Run a one-line script - run: sudo ./test/test.sh \ No newline at end of file + - name: Tune GitHub-hosted runner network + uses: smorimoto/tune-github-hosted-runner-network@v1 + + # Create a network namespace in the GitHub-hosted runner VM, + # simulating a primary bridge network on TrueNAS SCALE + - name: Set up networking resources + run: | + sudo -s < /etc/resolv.conf + + apt-get install -qq -y systemd-container + + cat </etc/systemd/network/10-br1.network + [Match] + Kind=bridge + Name=br1 + + [Network] + # Default to using a /24 prefix, giving up to 253 addresses per virtual network. + Address=0.0.0.0/24 + LinkLocalAddressing=yes + DHCPServer=yes + IPMasquerade=both + LLDP=yes + EmitLLDP=customer-bridge + IPv6AcceptRA=no + IPv6SendRA=yes + NETWORKCONFIG + + systemctl restart systemd-networkd + ip link add name br1 type bridge + + iptables -I DOCKER-USER -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT + iptables -I DOCKER-USER -i br1 -o eth0 -j ACCEPT + END + + - name: Examine the GitHub-hosted runner environment + run: | + uname -r + cat /etc/os-release + python3 --version + ip addr + + # # TODO: create zpool with virtual disks, create jailmaker dataset and test jlmkr.py from there + # # https://medium.com/@abaddonsd/zfs-usage-with-virtual-disks-62898064a29b + # - name: Create a parent ZFS dataset + # run: | + # sudo -s <: ``` -The service config constaining `ExecStartPost` commands is then used to add the host side of the interface link to an existing host bridge and bring the interface up. Jailmaker has simplified this process by including a `post_start_hook` configuration parameter which can automate the creation of the service config by including the `ExecStartPost` commands as below. +The service config `ExecStartPost` commands is then used to add the host side of the interface link to an existing host bridge and bring the interface up. Jailmaker has simplified this process by including a `post_start_hook` configuration parameter which can automate the creation of the service config by including the `ExecStartPost` commands as below. ``` post_start_hook=#!/usr/bin/bash @@ -62,7 +62,7 @@ post_start_hook=#!/usr/bin/bash ip link set dev ve-docker-2 up ``` -With the new `--network-veth-extra` interface link created and the host side added to an existing host bridge, the jail side of the link still needs to be configured. Jailmaker provides a network file in the form of `/etc/systemd/network/vee-dhcp.network` which will automatically perform this configuration. In order for `vee-dhcp.network` to successfully match and configure the link's jail side interface, the `` must begin with a ***vee-*** prefix. An example jailmaker config with properly named `--network-veth-extra` interfaces and `post_start_hook` commands is available [here](https://github.com/Jip-Hop/jailmaker/discussions/179#discussioncomment-9499289). +With the new `--network-veth-extra` interface link created and the host side added to an existing host bridge, the jail side of the link still needs to be configured. Jailmaker provides a network file in the form of `/etc/systemd/network/vee-dhcp.network` which will automatically perform this configuration. In order for `vee-dhcp.network` to successfully match and configure the link's jail side interface, the `` must begin with a ***vee-*** prefix. An example jailmaker config with properly named `--network-veth-extra` interfaces and `post_start_hook` commands is available [here](https://github.com/Jip-Hop/jailmaker/discussions/179#discussioncomment-9499289). ## Macvlan Networking diff --git a/jlmkr.py b/jlmkr.py index 3e6a9ed..5c54e50 100755 --- a/jlmkr.py +++ b/jlmkr.py @@ -4,7 +4,7 @@ with full access to all files via bind mounts, \ thanks to systemd-nspawn!""" -__version__ = "2.0.1" +__version__ = "2.1.0" __author__ = "Jip-Hop" __copyright__ = "Copyright (C) 2023, Jip-Hop" __license__ = "LGPL-3.0-only" @@ -115,9 +115,6 @@ systemd_nspawn_default_args=--bind-ro=/sys/module # Always add --bind-ro=/sys/module to make lsmod happy # https://manpages.debian.org/bookworm/manpages/sysfs.5.en.html -JAILS_DIR_PATH = "jails" -JAIL_CONFIG_NAME = "config" -JAIL_ROOTFS_NAME = "rootfs" DOWNLOAD_SCRIPT_DIGEST = ( "cfcb5d08b24187d108f2ab0d21a6cc4b73dcd7f5d7dfc80803bfd7f1642d638d" ) @@ -125,6 +122,9 @@ SCRIPT_PATH = os.path.realpath(__file__) SCRIPT_NAME = os.path.basename(SCRIPT_PATH) SCRIPT_DIR_PATH = os.path.dirname(SCRIPT_PATH) COMMAND_NAME = os.path.basename(__file__) +JAILS_DIR_PATH = os.path.join(SCRIPT_DIR_PATH, "jails") +JAIL_CONFIG_NAME = "config" +JAIL_ROOTFS_NAME = "rootfs" SHORTNAME = "jlmkr" # Only set a color if we have an interactive tty @@ -174,7 +174,9 @@ class KeyValueParser(configparser.ConfigParser): # Template to store comments as key value pair self._comment_template = "#{0} " + delimiter + " {1}" # Regex to match the comment prefix - self._comment_regex = re.compile(f"^#\d+\s*{re.escape(delimiter)}[^\S\n]*") + self._comment_regex = re.compile( + r"^#\d+\s*" + re.escape(delimiter) + r"[^\S\n]*" + ) # Regex to match cosmetic newlines (skips newlines in multiline values): # consecutive whitespace from start of line followed by a line not starting with whitespace self._cosmetic_newlines_regex = re.compile(r"^(\s+)(?=^\S)", re.MULTILINE) @@ -279,6 +281,25 @@ class CustomSubParser(argparse.ArgumentParser): raise ExceptionWithParser(self, message) +class Chroot: + def __init__(self, new_root): + self.new_root = new_root + self.old_root = None + self.initial_cwd = None + + def __enter__(self): + self.old_root = os.open("/", os.O_PATH) + self.initial_cwd = os.path.abspath(os.getcwd()) + os.chdir(self.new_root) + os.chroot(".") + + def __exit__(self, exc_type, exc_value, traceback): + os.chdir(self.old_root) + os.chroot(".") + os.close(self.old_root) + os.chdir(self.initial_cwd) + + def eprint(*args, **kwargs): """ Print to stderr. @@ -471,22 +492,22 @@ def exec_jail(jail_name, cmd): ).returncode -def status_jail(jail_name): +def status_jail(jail_name, args): """ Show the status of the systemd service wrapping the jail with given name. """ # Alternatively `machinectl status jail_name` could be used return subprocess.run( - ["systemctl", "status", f"{SHORTNAME}-{jail_name}"] + ["systemctl", "status", f"{SHORTNAME}-{jail_name}", *args] ).returncode -def log_jail(jail_name): +def log_jail(jail_name, args): """ Show the log file of the jail with given name. """ return subprocess.run( - ["journalctl", "-u", f"{SHORTNAME}-{jail_name}"] + ["journalctl", "-u", f"{SHORTNAME}-{jail_name}", *args] ).returncode @@ -519,7 +540,8 @@ def systemd_escape_path(path): """ return "".join( map( - lambda char: "\s" if char == " " else "\\\\" if char == "\\" else char, path + lambda char: r"\s" if char == " " else "\\\\" if char == "\\" else char, + path, ) ) @@ -572,7 +594,7 @@ def start_jail(jail_name): systemd_run_additional_args = [ f"--unit={SHORTNAME}-{jail_name}", - f"--working-directory=./{jail_path}", + f"--working-directory={jail_path}", f"--description=My nspawn jail {jail_name} [created with jailmaker]", ] @@ -658,18 +680,7 @@ def start_jail(jail_name): if not os.path.exists(os.path.join(jail_rootfs_path, "etc/machine-id")) and ( initial_setup := config.my_get("initial_setup") ): - if not initial_setup.startswith("#!"): - initial_setup = "#!/bin/sh\n" + initial_setup - - initial_setup_file_jailed_path = "/root/jlmkr-initial-setup" - initial_setup_file_host_path = os.path.abspath( - jail_rootfs_path + initial_setup_file_jailed_path - ) - - # Write a script file to call during initial setup - print(initial_setup, file=open(initial_setup_file_host_path, "w")) - stat_chmod(initial_setup_file_host_path, 0o700) - + # initial_setup has been assigned due to := expression above # Ensure the jail init system is ready before we start the initial_setup systemd_nspawn_additional_args += [ "--notify-ready=yes", @@ -712,38 +723,56 @@ def start_jail(jail_name): # Handle initial setup after jail is up and running (for the first time) if initial_setup: - print("About to run the initial setup.") + if not initial_setup.startswith("#!"): + initial_setup = "#!/bin/sh\n" + initial_setup + + with tempfile.NamedTemporaryFile( + mode="w+t", + prefix="jlmkr-initial-setup.", + dir=jail_rootfs_path, + delete=False, + ) as initial_setup_file: + # Write a script file to call during initial setup + initial_setup_file.write(initial_setup) + + initial_setup_file_name = os.path.basename(initial_setup_file.name) + initial_setup_file_host_path = os.path.abspath(initial_setup_file.name) + stat_chmod(initial_setup_file_host_path, 0o700) + + print(f"About to run the initial setup script: {initial_setup_file_name}.") print("Waiting for networking in the jail to be ready.") - print("Please wait (this may take 90s in case of bridge networking with STP is enabled)...") + print( + "Please wait (this may take 90s in case of bridge networking with STP is enabled)..." + ) returncode = exec_jail( jail_name, [ "--", "systemd-run", - f"--unit={os.path.basename(initial_setup_file_jailed_path)}", + f"--unit={initial_setup_file_name}", "--quiet", "--pipe", "--wait", "--service-type=exec", "--property=After=network-online.target", "--property=Wants=network-online.target", - initial_setup_file_jailed_path, + "/" + initial_setup_file_name, ], ) - # Cleanup the initial_setup_file_host_path - if initial_setup_file_host_path: - Path(initial_setup_file_host_path).unlink(missing_ok=True) - if returncode != 0: eprint("Tried to run the following commands inside the jail:") eprint(initial_setup) eprint() + eprint(f"{RED}{BOLD}Failed to run initial setup...") eprint( - f"""{RED}{BOLD}Failed to run initial setup... you may want to stop and remove the jail and try again.{NORMAL}""" + f"You may want to manually run /{initial_setup_file_name} inside the jail for debugging purposes." ) + eprint(f"Or stop and remove the jail and try again.{NORMAL}") return returncode else: + # Cleanup the initial_setup_file_host_path + Path(initial_setup_file_host_path).unlink(missing_ok=True) print(f"Done with initial setup of jail {jail_name}!") return returncode @@ -766,6 +795,7 @@ def cleanup(jail_path): """ Cleanup jail. """ + if get_zfs_dataset(jail_path): eprint(f"Cleaning up: {jail_path}.") remove_zfs_dataset(jail_path) @@ -775,7 +805,12 @@ def cleanup(jail_path): # Should be fixed in Python 3.13 https://stackoverflow.com/a/70549000 def _onerror(func, path, exc_info): exc_type, exc_value, exc_traceback = exc_info - if not issubclass(exc_type, FileNotFoundError): + if issubclass(exc_type, PermissionError): + # Update the file permissions with the immutable and append-only bit cleared + subprocess.run(["chattr", "-i", "-a", path]) + # Reattempt the removal + func(path) + elif not issubclass(exc_type, FileNotFoundError): raise exc_value eprint(f"Cleaning up: {jail_path}.") @@ -838,10 +873,7 @@ def run_lxc_download_script( stat_chmod(lxc_download_script, 0o700) - check_exit_code = False - if None not in [jail_name, jail_path, jail_rootfs_path, distro, release]: - check_exit_code = True cmd = [ lxc_download_script, f"--name={jail_name}", @@ -851,27 +883,34 @@ def run_lxc_download_script( f"--dist={distro}", f"--release={release}", ] + + if rc := subprocess.run(cmd, env={"LXC_CACHE_PATH": lxc_cache}).returncode != 0: + eprint("Aborting...") + return rc + else: + # List images cmd = [lxc_download_script, "--list", f"--arch={arch}"] - p1 = subprocess.Popen( - cmd, stdout=subprocess.PIPE, env={"LXC_CACHE_PATH": lxc_cache} - ) + p1 = subprocess.Popen( + cmd, stdout=subprocess.PIPE, env={"LXC_CACHE_PATH": lxc_cache} + ) - for line in iter(p1.stdout.readline, b""): - line = line.decode().strip() - # Filter out the known incompatible distros - if not re.match( - r"^(alpine|amazonlinux|busybox|devuan|funtoo|openwrt|plamo|voidlinux)\s", - line, - ): - print(line) + for line in iter(p1.stdout.readline, b""): + line = line.decode().strip() + # Filter out the known incompatible distros + if not re.match( + r"^(alpine|amazonlinux|busybox|devuan|funtoo|openwrt|plamo|voidlinux)\s", + line, + ): + print(line) - p1.wait() + rc = p1.wait() + # Currently --list will always return a non-zero exit code, even when listing the images was successful + # https://github.com/lxc/lxc/pull/4462 + # Therefore we must currently return 0, to prevent aborting the interactive create process - if check_exit_code and p1.returncode != 0: - eprint("Aborting...") - return p1.returncode + # return rc return 0 @@ -909,6 +948,10 @@ def get_mount_point(path): return path +def get_relative_path_in_jailmaker_dir(absolute_path): + return PurePath(absolute_path).relative_to(SCRIPT_DIR_PATH) + + def get_zfs_dataset(path): """ Get ZFS dataset path. @@ -938,21 +981,23 @@ def get_zfs_base_path(): return zfs_base_path -def create_zfs_dataset(relative_path): +def create_zfs_dataset(absolute_path): """ - Create a ZFS Dataset. - Receives the dataset to be created relative to the jailmaker script (e.g. "jails" or "jails/newjail"). + Create a ZFS Dataset inside the jailmaker directory at the provided absolute path. + E.g. "/mnt/mypool/jailmaker/jails" or "/mnt/mypool/jailmaker/jails/newjail"). """ + relative_path = get_relative_path_in_jailmaker_dir(absolute_path) dataset_to_create = os.path.join(get_zfs_base_path(), relative_path) eprint(f"Creating ZFS Dataset {dataset_to_create}") subprocess.run(["zfs", "create", dataset_to_create], check=True) -def remove_zfs_dataset(relative_path): +def remove_zfs_dataset(absolute_path): """ - Remove a ZFS Dataset. - Receives the dataset to be removed relative to the jailmaker script (e.g. "jails/oldjail"). + Remove a ZFS Dataset inside the jailmaker directory at the provided absolute path. + E.g. "/mnt/mypool/jailmaker/jails/oldjail". """ + relative_path = get_relative_path_in_jailmaker_dir(absolute_path) dataset_to_remove = os.path.join((get_zfs_base_path()), relative_path) eprint(f"Removing ZFS Dataset {dataset_to_remove}") subprocess.run(["zfs", "destroy", "-r", dataset_to_remove], check=True) @@ -1082,9 +1127,8 @@ def interactive_config(): input("Press Enter to continue...") print() - returncode = run_lxc_download_script() - if returncode != 0: - return returncode + if run_lxc_download_script() != 0: + fail("Failed to list images. Aborting...") print( dedent( @@ -1200,7 +1244,7 @@ def interactive_config(): def create_jail(**kwargs): print(DISCLAIMER) - if os.path.basename(os.getcwd()) != "jailmaker": + if os.path.basename(SCRIPT_DIR_PATH) != "jailmaker": eprint( dedent( f""" @@ -1212,7 +1256,7 @@ def create_jail(**kwargs): ) return 1 - if not PurePath(get_mount_point(os.getcwd())).is_relative_to("/mnt"): + if not PurePath(get_mount_point(SCRIPT_DIR_PATH)).is_relative_to("/mnt"): print( dedent( f""" @@ -1244,10 +1288,18 @@ def create_jail(**kwargs): if jail_config_path: # TODO: fallback to default values for e.g. distro and release if they are not in the config file - print(f"Creating jail {jail_name} from config template {jail_config_path}.") - if jail_config_path not in config.read(jail_config_path): - eprint(f"Failed to read config template {jail_config_path}.") - return 1 + if jail_config_path == "-": + print( + f"Creating jail {jail_name} from config template passed via stdin." + ) + config.read_string(sys.stdin.read()) + else: + print( + f"Creating jail {jail_name} from config template {jail_config_path}." + ) + if jail_config_path not in config.read(jail_config_path): + eprint(f"Failed to read config template {jail_config_path}.") + return 1 else: print(f"Creating jail {jail_name} with default config.") config.read_string(DEFAULT_CONFIG) @@ -1266,7 +1318,8 @@ def create_jail(**kwargs): value = kwargs.pop(option) if ( value is not None - and len(value) + # String, non-empty list of args or int + and (isinstance(value, int) or len(value)) and value is not config.my_get(option, None) ): # TODO: this will wipe all systemd_nspawn_user_args from the template... @@ -1279,7 +1332,7 @@ def create_jail(**kwargs): print( dedent( f""" - TIP: Run `{COMMAND_NAME} create` without any arguments for interactive config. + Hint: run `{COMMAND_NAME} create` without any arguments for interactive config. Or use CLI args to override the default options. For more info, run: `{COMMAND_NAME} create --help` """ @@ -1318,10 +1371,13 @@ def create_jail(**kwargs): # but we don't need it so we will remove it later open(jail_config_path, "a").close() - returncode = run_lxc_download_script( - jail_name, jail_path, jail_rootfs_path, distro, release - ) - if returncode != 0: + if ( + returncode := run_lxc_download_script( + jail_name, jail_path, jail_rootfs_path, distro, release + ) + != 0 + ): + cleanup(jail_path) return returncode # Assuming the name of your jail is "myjail" @@ -1347,11 +1403,13 @@ def create_jail(**kwargs): # But alpine jails made with jailmaker have other issues # They don't shutdown cleanly via systemctl and machinectl... + with Chroot(jail_rootfs_path): + # Use chroot to correctly resolve absolute /sbin/init symlink + init_system_name = os.path.basename(os.path.realpath("/sbin/init")) + if ( - os.path.basename( - os.path.realpath(os.path.join(jail_rootfs_path, "sbin/init")) - ) - != "systemd" + init_system_name != "systemd" + and parse_os_release(jail_rootfs_path).get("ID") != "nixos" ): print( dedent( @@ -1379,9 +1437,10 @@ def create_jail(**kwargs): config.my_set("startup", 0) start_now = False + # Remove config which systemd handles for us with contextlib.suppress(FileNotFoundError): - # Remove config which systemd handles for us os.remove(os.path.join(jail_rootfs_path, "etc/machine-id")) + with contextlib.suppress(FileNotFoundError): os.remove(os.path.join(jail_rootfs_path, "etc/resolv.conf")) # https://github.com/systemd/systemd/issues/852 @@ -1558,6 +1617,7 @@ def remove_jail(jail_name): return 1 # TODO: print which dataset is about to be removed before the user confirmation + # TODO: print that all zfs snapshots will be removed if jail has it's own zfs dataset check = input(f'\nCAUTION: Type "{jail_name}" to confirm jail deletion!\n\n') if check == jail_name: @@ -1616,16 +1676,21 @@ def get_all_jail_names(): return jail_names -def parse_os_release(candidates): - for candidate in candidates: - try: - with open(candidate, encoding="utf-8") as f: - # TODO: can I create a solution which not depends on the internal _parse_os_release method? - return platform._parse_os_release(f) - except OSError: - # Silently ignore failing to read os release info - pass - return {} +def parse_os_release(new_root): + result = {} + with Chroot(new_root): + # Use chroot to correctly resolve os-release symlink (for nixos) + for candidate in ["/etc/os-release", "/usr/lib/os-release"]: + try: + with open(candidate, encoding="utf-8") as f: + # TODO: can I create a solution which not depends on the internal _parse_os_release method? + result = platform._parse_os_release(f) + break + except OSError: + # Silently ignore failing to read os release info + pass + + return result def list_jails(): @@ -1680,13 +1745,7 @@ def list_jails(): jail["addresses"] += "…" else: # Parse os-release info ourselves - jail_platform = parse_os_release( - ( - os.path.join(jail_rootfs_path, "etc/os-release"), - os.path.join(jail_rootfs_path, "usr/lib/os-release"), - ) - ) - + jail_platform = parse_os_release(jail_rootfs_path) jail["os"] = jail_platform.get("ID") jail["version"] = jail_platform.get("VERSION_ID") or jail_platform.get( "VERSION_CODENAME" @@ -1775,7 +1834,7 @@ def main(): title="commands", dest="command", metavar="", parser_class=CustomSubParser ) - split_commands = ["create", "exec"] + split_commands = ["create", "exec", "log", "status"] commands = {} for d in [ @@ -1863,6 +1922,18 @@ def main(): help="args to pass to machinectl shell", ) + commands["log"].add_argument( + "args", + nargs="*", + help="args to pass to journalctl", + ) + + commands["status"].add_argument( + "args", + nargs="*", + help="args to pass to systemctl", + ) + commands["create"].add_argument( "jail_name", # nargs="?", @@ -1890,7 +1961,7 @@ def main(): commands["create"].add_argument( "-c", # "--config", - help="path to config file template", + help="path to config file template or - for stdin", ) commands["create"].add_argument( "-gi", # @@ -1917,9 +1988,6 @@ def main(): # Set appropriate permissions (if not already set) for this file, since it's executed as root stat_chmod(SCRIPT_PATH, 0o700) - # Work relative to this script - os.chdir(SCRIPT_DIR_PATH) - # Ignore all args after the first "--" args_to_parse = split_at_string(sys.argv[1:], "--")[0] # Check for help @@ -1943,7 +2011,6 @@ def main(): args = vars(parser.parse_known_args(args_to_parse)[0]) # ...and check if help is still in the remaining args need_help = args.get("help") - print(need_help) if need_help: commands[command].print_help() diff --git a/templates/docker/config b/templates/docker/config index 51f7f1d..5d72f87 100644 --- a/templates/docker/config +++ b/templates/docker/config @@ -4,7 +4,7 @@ gpu_passthrough_nvidia=0 # Turning off seccomp filtering improves performance at the expense of security seccomp=1 -# Use macvlan networking to provide an isolated network namespace, +# Use bridge networking to provide an isolated network namespace, # so docker can manage firewall rules # Alternatively use --network-macvlan=eno1 instead of --network-bridge # Ensure to change eno1/br1 to the interface name you want to use diff --git a/templates/incus/config b/templates/incus/config index 4234843..9485b91 100644 --- a/templates/incus/config +++ b/templates/incus/config @@ -6,7 +6,7 @@ gpu_passthrough_nvidia=0 # TODO: don't disable seccomp but specify which syscalls should be allowed seccomp=0 -# Use macvlan networking to provide an isolated network namespace, +# Use bridge networking to provide an isolated network namespace, # so incus can manage firewall rules # Alternatively use --network-macvlan=eno1 instead of --network-bridge # Ensure to change eno1/br1 to the interface name you want to use diff --git a/templates/k3s/config b/templates/k3s/config index 84eabfe..5853e1b 100644 --- a/templates/k3s/config +++ b/templates/k3s/config @@ -4,7 +4,7 @@ gpu_passthrough_nvidia=0 # Turning off seccomp filtering improves performance at the expense of security seccomp=1 -# Use macvlan networking to provide an isolated network namespace, +# Use bridge networking to provide an isolated network namespace, # so kubernetes can manage firewall rules # Alternatively use --network-macvlan=eno1 instead of --network-bridge # Ensure to change eno1/br1 to the interface name you want to use diff --git a/templates/lxd/config b/templates/lxd/config index 36a3af5..0b068b3 100644 --- a/templates/lxd/config +++ b/templates/lxd/config @@ -6,7 +6,7 @@ gpu_passthrough_nvidia=0 # TODO: don't disable seccomp but specify which syscalls should be allowed seccomp=0 -# Use macvlan networking to provide an isolated network namespace, +# Use bridge networking to provide an isolated network namespace, # so lxd can manage firewall rules # Alternatively use --network-macvlan=eno1 instead of --network-bridge # Ensure to change eno1/br1 to the interface name you want to use diff --git a/templates/nixos/README.md b/templates/nixos/README.md new file mode 100644 index 0000000..9181243 --- /dev/null +++ b/templates/nixos/README.md @@ -0,0 +1,35 @@ +# Nixos Jail Template + +## Disclaimer + +**Experimental. Using nixos in this setup hasn't been extensively tested and has [known issues](#known-issues).** + +## Setup + +Check out the [config](./config) template file. You may provide it when asked during `./jlmkr.py create` or, if you have the template file stored on your NAS, you may provide it directly by running `./jlmkr.py create --start --config /mnt/tank/path/to/nixos/config mynixosjail`. + +## Manual Setup + +```bash +# Create the jail without starting +./jlmkr.py create --distro=nixos --release=24.05 nixos --network-bridge=br1 --resolv-conf=bind-host --bind-ro=./lxd.nix:/etc/nixos/lxd.nix +# Create empty nix module to satisfy import in default lxc configuration.nix +echo '{ ... }:{}' > ./jails/nixos/lxd.nix +# Start the nixos jail +./jlmkr.py start nixos +sleep 90 +# Network should be up by now +./jlmkr.py shell nixos /bin/sh -c 'ifconfig' +# Try to rebuild the system +./jlmkr.py shell nixos /bin/sh -c 'nixos-rebuild switch' +``` + +## Known Issues + +### Environment jlmkr exec + +Running `./jlmkr.py exec mynixosjail ifconfig` doesn't work because the shell environment isn't setup properly. You can run `./jlmkr.py shell mynixosjail /bin/sh -c 'ifconfig'` or `./jlmkr.py exec mynixosjail /bin/sh -c '. /etc/bashrc; ifconfig'` instead. + +### Bridge networking only + +This setup has NOT been tested with macvlan networking. \ No newline at end of file diff --git a/templates/nixos/config b/templates/nixos/config new file mode 100644 index 0000000..fb8f92d --- /dev/null +++ b/templates/nixos/config @@ -0,0 +1,54 @@ +startup=0 +gpu_passthrough_intel=0 +gpu_passthrough_nvidia=0 +# Turning off seccomp filtering improves performance at the expense of security +seccomp=1 + +# Use bridge networking to provide an isolated network namespace, +# so nixos can manage firewall rules +# Ensure to change br1 to the interface name you want to use +# You may want to add additional options here, e.g. bind mounts +systemd_nspawn_user_args=--network-bridge=br1 + --bind-ro=./lxd.nix:/etc/nixos/lxd.nix + +# Script to run on the HOST before starting the jail +pre_start_hook=#!/usr/bin/env bash + set -euo pipefail + echo 'PRE_START_HOOK' + + # If there's no machine-id then this we're about to start the jail for the first time + if [ ! -e ./rootfs/etc/machine-id ]; then + echo 'BEFORE_FIRST_BOOT' + # Create empty nix module to satisfy import in default lxc configuration.nix + echo '{ ... }:{}' > ./lxd.nix + cp /etc/resolv.conf ./rootfs/etc/resolv.conf + fi + +# Only used while creating the jail +distro=nixos +release=24.05 + +# # Example initial_setup which rebuild the system, +# # for when you mount your own /etc/nixos/configuration.nix inside the jail +# initial_setup=#!/run/current-system/sw/bin/bash +# . /etc/bashrc +# set -x +# ifconfig +# nixos-rebuild switch +# echo "All Done" + +# You generally will not need to change the options below +systemd_run_default_args=--property=KillMode=mixed + --property=Type=notify + --property=RestartForceExitStatus=133 + --property=SuccessExitStatus=133 + --property=Delegate=yes + --property=TasksMax=infinity + --collect + --setenv=SYSTEMD_NSPAWN_LOCK=0 + +systemd_nspawn_default_args=--keep-unit + --quiet + --boot + --bind-ro=/sys/module + --inaccessible=/sys/module/apparmor \ No newline at end of file diff --git a/templates/podman/config b/templates/podman/config index b19106d..6aa1cfc 100644 --- a/templates/podman/config +++ b/templates/podman/config @@ -4,7 +4,7 @@ gpu_passthrough_nvidia=0 # Turning off seccomp filtering improves performance at the expense of security seccomp=1 -# Use macvlan networking to provide an isolated network namespace, +# Use bridge networking to provide an isolated network namespace, # so podman can manage firewall rules # Alternatively use --network-macvlan=eno1 instead of --network-bridge # Ensure to change eno1/br1 to the interface name you want to use diff --git a/templates/router/README.md b/templates/router/README.md new file mode 100644 index 0000000..70912c5 --- /dev/null +++ b/templates/router/README.md @@ -0,0 +1,44 @@ +# Router Jail Template + +Host a subordinate LAN using nftables and dnsmasq for DHCP, DNS, routing, and netboot infrastructure. +``` +router laptop desktop + | | | + +-- LAN --+-------+ + | + { TrueNAS SCALE } + | + +-----+-----+-- LAN2 --+------+------+-------+ + | | | | | | | + RPi1 RPi2 RPi3 NUC01 NUC02 NUC03 CrayYMP +``` +*Example usage*: deploy a flock of headless/diskless Raspberry Pi worker nodes for Kubernetes; each netbooting into an iSCSI or NFS root volume. + +## Setup + +Use the TrueNAS SCALE administrative UI to create a network bridge interface. Assign to that bridge a physical interface that's not shared with the host network. + +Use the `dnsmasq-example.conf` file as a starting point for your own dnsmasq settings file(s). Copy or mount them inside `/etc/dnsmasq.d/` within the jail. + +Optional: place assets in the mounted `/tftp/` directory for netbooting clients. + +Optional: attach more jails to this same bridge to host e.g. a K3s control plane, an nginx load balancer, a PostgreSQL database... + +Check out the [config](./config) template file. You may provide it when asked during `./jlmkr.py create` or, if you have the template file stored on your NAS, you may provide it directly by running `./jlmkr.py create --start --config /mnt/tank/path/to/router/config myrouterjail`. + +## Additional Resources + +There are as many reasons to host LAN infrastructure as there are to connect a LAN. This template can help you kick-start such a leaf network, using a TrueNAS jail as its gateway host. + +For those specifically interested in *netbooting Raspberry Pi*, the following **external** links might help you get started. + +* [Network Booting a Raspberry Pi 4 with an iSCSI Root via FreeNAS][G1]; the title says it all +* [Raspberry Pi Network Boot Guide][G2] covers more Raspberry Pi models; written for Synology users +* [pi_iscsi_netboot][s1] and [prep-netboot-storage][s2] are scripts showing preparation of boot assets and iSCSI root volumes + +Good luck! + +[G1]: https://shawnwilsher.com/2020/05/network-booting-a-raspberry-pi-4-with-an-iscsi-root-via-freenas/ +[G2]: https://warmestrobot.com/blog/2021/6/21/raspberry-pi-network-boot-guide +[s1]: https://github.com/tjpetz/pi_iscsi_netboot +[s2]: https://gitlab.com/jnicpon/rpi-prep/-/blob/main/scripts/prep-netboot-storage.fish?ref_type=heads diff --git a/templates/router/config b/templates/router/config new file mode 100644 index 0000000..65b1cfa --- /dev/null +++ b/templates/router/config @@ -0,0 +1,82 @@ +# See also: +# +# +startup=0 +gpu_passthrough_intel=0 +gpu_passthrough_nvidia=0 +# Turning off seccomp filtering improves performance at the expense of security +seccomp=1 + +# Use bridge networking to provide an isolated network namespace +# Alternatively use --network-macvlan=eno1 instead of --network-bridge +# Ensure to change br0 to the HOST interface name you want to use +# and br1 to the SECONDARY interface name you want to prepare +# Substitute your own dnsmasq.d and TFTP dataset bindings +systemd_nspawn_user_args=--network-bridge=br0 + --network-veth-extra=ve-router-1:vee-1 + --resolv-conf=bind-host + --system-call-filter='add_key keyctl bpf' + --bind=/mnt/pool/subnet/dnsmasq.d:/etc/dnsmasq.d + --bind-ro=/mnt/pool/subnet/tftpboot:/tftp + +# Script to run on the HOST before starting the jail +# Load kernel module and config kernel settings required for podman +pre_start_hook=#!/usr/bin/bash + set -euo pipefail + echo 'PRE_START_HOOK' + echo 1 > /proc/sys/net/ipv4/ip_forward + modprobe br_netfilter + echo 1 > /proc/sys/net/bridge/bridge-nf-call-iptables + echo 1 > /proc/sys/net/bridge/bridge-nf-call-ip6tables + modprobe iptable_nat + modprobe iptable_filter + +# Script to run on the HOST after starting the jail +# For example to attach to multiple bridge interfaces +post_start_hook=#!/usr/bin/bash + set -euo pipefail + echo 'POST_START_HOOK' + ip link set dev ve-router-1 master br1 + ip link set dev ve-router-1 up + #ip link set dev eth2 master br1 + +# Only used while creating the jail +distro=debian +release=bookworm + +# Install and configure within the jail +initial_setup=#!/usr/bin/bash + set -euo pipefail + + # Catch up on updates + apt-get update && apt-get full-upgrade -y + + # Configure worker LAN interface with static IP + sh -c 'cat < /etc/systemd/network/80-container-vee-1.network + [Match] + Virtualization=container + Name=vee-1 + + [Network] + DHCP=false + Address=10.3.14.202/24 + EOF' + systemctl restart systemd-networkd.service + + # Configure routing from LAN clients + apt-get install nftables -y + nft add table nat + nft add chain nat prerouting { type nat hook prerouting priority 0 \; } + nft add chain nat postrouting { type nat hook postrouting priority 100 \; } + nft add rule nat postrouting masquerade + mkdir -p /etc/nftables.d + nft list table nat >/etc/nftables.d/nat.conf + ( echo ; echo 'include "/etc/nftables.d/*.conf"' ) >>/etc/nftables.conf + + # Install dnsmasq alongside local resolver + sed -i -e 's/^#DNSStubListener=yes$/DNSStubListener=no/' /etc/systemd/resolved.conf + systemctl restart systemd-resolved.service + apt-get install dnsmasq -y + sed -i -e 's/^#DNS=$/DNS=127.0.0.1/' /etc/systemd/resolved.conf + systemctl restart systemd-resolved.service + systemctl restart dnsmasq.service diff --git a/templates/router/dnsmasq-example.conf b/templates/router/dnsmasq-example.conf new file mode 100644 index 0000000..fee9a4e --- /dev/null +++ b/templates/router/dnsmasq-example.conf @@ -0,0 +1,50 @@ +# customize and place this file inside /etc/dnsmasq.d + +# serve only Raspberry Pi network; don't backfeed to the host LAN +no-dhcp-interface=host0 +interface=vee-1 +bind-interfaces + +# designated upstream query servers +server=1.1.1.1 +server=1.0.0.1 + +# pirate TLD for the Democratic Republic of Raspberry Pi +domain=pi,10.3.14.0/24 + +# enable DHCP services +dhcp-authoritative +dhcp-rapid-commit +dhcp-range=10.3.14.101,10.3.14.199 + +# meet the 'berries +dhcp-host=e4:5f:01:da:da:b1,rpi1,10.3.14.11,infinite,set:rpi +dhcp-host=e4:5f:01:da:da:b2,rpi2,10.3.14.12,infinite,set:rpi +dhcp-host=e4:5f:01:da:da:b3,rpi3,10.3.14.13,infinite,set:rpi +dhcp-host=e4:5f:01:da:da:b4,rpi4,10.3.14.14,infinite,set:rpi +dhcp-host=e4:5f:01:da:da:b5,rpi5,10.3.14.15,infinite,set:rpi +dhcp-host=e4:5f:01:da:*:*,set:rpicube + +# PXE +dhcp-option-force=66,10.3.14.202 +# magic number +dhcp-option-force=208,f1:00:74:7e +# config filename +dhcp-option-force=209,configs/common +# path prefix +dhcp-option-force=210,/boot/ +# reboot time (i -> 32 bit) +dhcp-option-force=211,30i + +dhcp-boot=bootcode.bin + +#dhcp-match=set:ipxe,175 +#dhcp-boot=tag:ipxe,http://boot.netboot.xyz/ipxe/netboot.xyz.efi + +# TFTP +enable-tftp +tftp-root=/tftp + +#debugging +#log-queries +#log-dhcp diff --git a/test/README.md b/test/README.md new file mode 100644 index 0000000..0653693 --- /dev/null +++ b/test/README.md @@ -0,0 +1,77 @@ +# Jailmaker Testing + +This readme documents the [test-jlmkr](./test-jlmkr) script. + +The script has 2 optional parameter invocation sets: +* `` [``] +* `