feat: Add nvidia compatibility option for LXC devices
All checks were successful
CI / Format Check (push) Successful in 2s
CI / Flake Check (push) Successful in 1m46s
CI / Evaluate Key Configurations (nix-builder) (push) Successful in 14s
CI / Evaluate Key Configurations (nix-desktop1) (push) Successful in 8s
CI / Evaluate Key Configurations (nix-laptop1) (push) Successful in 9s
CI / Evaluate Artifacts (installer-iso-nix-laptop1) (push) Successful in 21s
CI / Evaluate Artifacts (lxc-nix-builder) (push) Successful in 14s
CI / Build and Publish Documentation (push) Successful in 12s

This commit is contained in:
UGA Innovation Factory
2026-02-16 15:51:19 -05:00
parent 950be4b8ce
commit 7e43285278
5 changed files with 219 additions and 0 deletions

View File

@@ -7,6 +7,7 @@
{
config,
lib,
pkgs,
...
}:
@@ -24,6 +25,56 @@ in
default = false;
description = "Enable Proxmox LXC container hardware configuration.";
};
cuda.enable = mkOption {
type = types.bool;
default = false;
description = "Enable CUDA support in LXC containers (requires NVIDIA GPU and drivers on host).";
};
cuda.visibleDevices = mkOption {
type = types.listOf types.str;
default = [ "all" ];
example = [
"0"
"1"
];
description = ''
NVIDIA devices exposed to containerized workloads.
Use indexes (e.g. "0", "1"), UUIDs, or "all".
'';
};
cuda.driverCapabilities = mkOption {
type = types.listOf types.str;
default = [
"compute"
"utility"
];
example = [
"compute"
"utility"
"video"
];
description = ''
Value used for NVIDIA_DRIVER_CAPABILITIES for container runtimes.
'';
};
cuda.driver.channel = mkOption {
type = types.str;
default = "dc_550";
example = "legacy_470";
description = ''
NVIDIA driver package channel from boot.kernelPackages.nvidiaPackages.
Common values include: stable, latest, beta, dc_550, legacy_470, legacy_390.
Alias values: production -> stable, datacenter -> dc_550.
'';
};
cuda.driver.package = mkOption {
type = types.nullOr types.package;
default = null;
description = ''
Explicit NVIDIA driver package override. When set, this takes precedence
over cuda.driver.channel.
'';
};
};
};
default = { };
@@ -50,6 +101,53 @@ in
# Set timezone to fix /etc/localtime for Docker containers
time.timeZone = lib.mkDefault "America/New_York";
# NVIDIA Container Toolkit for CUDA support (optional)
hardware.nvidia = lib.mkIf cfg.cuda.enable {
package =
let
nvidiaPackages = config.boot.kernelPackages.nvidiaPackages;
driverAliasMap = {
production = "stable";
datacenter = "dc_550";
};
driverChannel = driverAliasMap.${cfg.cuda.driver.channel} or cfg.cuda.driver.channel;
in
if cfg.cuda.driver.package != null then
cfg.cuda.driver.package
else if builtins.hasAttr driverChannel nvidiaPackages then
builtins.getAttr driverChannel nvidiaPackages
else
throw "athenix.hw.nix-lxc.cuda.driver.channel '${driverChannel}' not found in boot.kernelPackages.nvidiaPackages";
};
hardware.nvidia-container-toolkit.enable = lib.mkIf cfg.cuda.enable true;
hardware.nvidia-container-toolkit.suppressNvidiaDriverAssertion = lib.mkIf cfg.cuda.enable true;
environment.systemPackages = lib.mkIf cfg.cuda.enable [
(pkgs.writeShellScriptBin "nvidia-smi" ''
primary="${config.hardware.nvidia.package}/bin/nvidia-smi"
secondary="${
if builtins.hasAttr "bin" config.hardware.nvidia.package then
config.hardware.nvidia.package.bin
else
config.hardware.nvidia.package
}/bin/nvidia-smi"
if [ -x "$primary" ]; then
exec "$primary" "$@"
fi
if [ -x "$secondary" ]; then
exec "$secondary" "$@"
fi
echo "nvidia-smi binary not found in configured NVIDIA package: ${config.hardware.nvidia.package}" >&2
exit 127
'')
];
environment.variables = lib.mkIf cfg.cuda.enable {
NVIDIA_VISIBLE_DEVICES = lib.concatStringsSep "," cfg.cuda.visibleDevices;
NVIDIA_DRIVER_CAPABILITIES = lib.concatStringsSep "," cfg.cuda.driverCapabilities;
};
# Allow getty to work in containers
systemd.services."getty@".unitConfig.ConditionPathExists = [
""