feat: Add nvidia compatibility option for LXC devices
All checks were successful
CI / Format Check (push) Successful in 2s
CI / Flake Check (push) Successful in 1m46s
CI / Evaluate Key Configurations (nix-builder) (push) Successful in 14s
CI / Evaluate Key Configurations (nix-desktop1) (push) Successful in 8s
CI / Evaluate Key Configurations (nix-laptop1) (push) Successful in 9s
CI / Evaluate Artifacts (installer-iso-nix-laptop1) (push) Successful in 21s
CI / Evaluate Artifacts (lxc-nix-builder) (push) Successful in 14s
CI / Build and Publish Documentation (push) Successful in 12s
All checks were successful
CI / Format Check (push) Successful in 2s
CI / Flake Check (push) Successful in 1m46s
CI / Evaluate Key Configurations (nix-builder) (push) Successful in 14s
CI / Evaluate Key Configurations (nix-desktop1) (push) Successful in 8s
CI / Evaluate Key Configurations (nix-laptop1) (push) Successful in 9s
CI / Evaluate Artifacts (installer-iso-nix-laptop1) (push) Successful in 21s
CI / Evaluate Artifacts (lxc-nix-builder) (push) Successful in 14s
CI / Build and Publish Documentation (push) Successful in 12s
This commit is contained in:
@@ -343,6 +343,34 @@ nix-lxc = {
|
||||
};
|
||||
```
|
||||
|
||||
### Proxmox LXC with NVIDIA (Dual P40 Example)
|
||||
|
||||
```nix
|
||||
nix-lxc = {
|
||||
devices = {
|
||||
"gpu-builder" = {
|
||||
athenix.host.useHostPrefix = false;
|
||||
athenix.sw.type = "headless";
|
||||
|
||||
# Optional NVIDIA containerization support
|
||||
athenix.hw.nix-lxc.cuda = {
|
||||
enable = true;
|
||||
# Expose both GPUs (index-based)
|
||||
visibleDevices = [ "0" "1" ];
|
||||
# Select a compatible driver branch for older cards when needed
|
||||
driver.channel = "legacy_470";
|
||||
driverCapabilities = [ "compute" "utility" ];
|
||||
};
|
||||
};
|
||||
};
|
||||
};
|
||||
```
|
||||
|
||||
Notes:
|
||||
- Proxmox host must provide `/dev/nvidia*` devices to the container.
|
||||
- Use `driver.channel = "dc_550"` (default) for newer datacenter cards when supported.
|
||||
- Use `driver.package` only when you need an explicit package override.
|
||||
|
||||
### WSL Instances
|
||||
|
||||
```nix
|
||||
|
||||
@@ -5,6 +5,7 @@ All UGA Innovation Factory-specific options are in the `athenix` namespace to av
|
||||
## Table of Contents
|
||||
|
||||
- [Host Configuration (`athenix.host`)](#host-configuration-athenixhost)
|
||||
- [Hardware Type Configuration (`athenix.hw`)](#hardware-type-configuration-athenixhw)
|
||||
- [Software Configuration (`athenix.sw`)](#software-configuration-athenixsw)
|
||||
- [User Management (`athenix.users`)](#user-management-athenixusers)
|
||||
- [Convenience Options](#convenience-options)
|
||||
@@ -90,6 +91,69 @@ Default WSL user account (only for `nix-wsl` type).
|
||||
athenix.host.wsl.user = "myusername";
|
||||
```
|
||||
|
||||
## Hardware Type Configuration (`athenix.hw`)
|
||||
|
||||
Hardware-type specific options. These are usually set in per-device config or fleet overrides.
|
||||
|
||||
### `athenix.hw.nix-lxc.cuda.enable`
|
||||
|
||||
Enable NVIDIA CUDA container support for Proxmox LXC hosts.
|
||||
|
||||
**Type:** Boolean
|
||||
|
||||
**Default:** `false`
|
||||
|
||||
### `athenix.hw.nix-lxc.cuda.visibleDevices`
|
||||
|
||||
Select which NVIDIA GPUs are exposed to containerized workloads.
|
||||
|
||||
**Type:** List of strings
|
||||
|
||||
**Default:** `[ "all" ]`
|
||||
|
||||
**Examples:**
|
||||
- `[ "all" ]`
|
||||
- `[ "0" "1" ]` (for dual-GPU systems)
|
||||
- `[ "GPU-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" ]`
|
||||
|
||||
### `athenix.hw.nix-lxc.cuda.driverCapabilities`
|
||||
|
||||
Sets `NVIDIA_DRIVER_CAPABILITIES` for container runtimes.
|
||||
|
||||
**Type:** List of strings
|
||||
|
||||
**Default:** `[ "compute" "utility" ]`
|
||||
|
||||
**Example:**
|
||||
```nix
|
||||
athenix.hw.nix-lxc.cuda.driverCapabilities = [ "compute" "utility" "video" ];
|
||||
```
|
||||
|
||||
### `athenix.hw.nix-lxc.cuda.driver.channel`
|
||||
|
||||
Driver package channel selected from `boot.kernelPackages.nvidiaPackages`.
|
||||
|
||||
**Type:** String
|
||||
|
||||
**Default:** `"dc_550"`
|
||||
|
||||
**Common values:** `"stable"`, `"latest"`, `"beta"`, `"dc_550"`, `"legacy_470"`, `"legacy_390"`
|
||||
|
||||
**Aliases:**
|
||||
- `"production"` → `"stable"`
|
||||
- `"datacenter"` → `"dc_550"`
|
||||
|
||||
### `athenix.hw.nix-lxc.cuda.driver.package`
|
||||
|
||||
Explicit package override for NVIDIA driver selection.
|
||||
|
||||
**Type:** Package or null
|
||||
|
||||
**Default:** `null`
|
||||
|
||||
**Description:**
|
||||
When set, this takes precedence over `athenix.hw.nix-lxc.cuda.driver.channel`.
|
||||
|
||||
## Software Configuration (`athenix.sw`)
|
||||
|
||||
System type, packages, and application configuration.
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
|
||||
@@ -24,6 +25,56 @@ in
|
||||
default = false;
|
||||
description = "Enable Proxmox LXC container hardware configuration.";
|
||||
};
|
||||
cuda.enable = mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = "Enable CUDA support in LXC containers (requires NVIDIA GPU and drivers on host).";
|
||||
};
|
||||
cuda.visibleDevices = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [ "all" ];
|
||||
example = [
|
||||
"0"
|
||||
"1"
|
||||
];
|
||||
description = ''
|
||||
NVIDIA devices exposed to containerized workloads.
|
||||
Use indexes (e.g. "0", "1"), UUIDs, or "all".
|
||||
'';
|
||||
};
|
||||
cuda.driverCapabilities = mkOption {
|
||||
type = types.listOf types.str;
|
||||
default = [
|
||||
"compute"
|
||||
"utility"
|
||||
];
|
||||
example = [
|
||||
"compute"
|
||||
"utility"
|
||||
"video"
|
||||
];
|
||||
description = ''
|
||||
Value used for NVIDIA_DRIVER_CAPABILITIES for container runtimes.
|
||||
'';
|
||||
};
|
||||
cuda.driver.channel = mkOption {
|
||||
type = types.str;
|
||||
default = "dc_550";
|
||||
example = "legacy_470";
|
||||
description = ''
|
||||
NVIDIA driver package channel from boot.kernelPackages.nvidiaPackages.
|
||||
Common values include: stable, latest, beta, dc_550, legacy_470, legacy_390.
|
||||
Alias values: production -> stable, datacenter -> dc_550.
|
||||
'';
|
||||
};
|
||||
cuda.driver.package = mkOption {
|
||||
type = types.nullOr types.package;
|
||||
default = null;
|
||||
description = ''
|
||||
Explicit NVIDIA driver package override. When set, this takes precedence
|
||||
over cuda.driver.channel.
|
||||
'';
|
||||
};
|
||||
};
|
||||
};
|
||||
default = { };
|
||||
@@ -50,6 +101,53 @@ in
|
||||
# Set timezone to fix /etc/localtime for Docker containers
|
||||
time.timeZone = lib.mkDefault "America/New_York";
|
||||
|
||||
# NVIDIA Container Toolkit for CUDA support (optional)
|
||||
hardware.nvidia = lib.mkIf cfg.cuda.enable {
|
||||
package =
|
||||
let
|
||||
nvidiaPackages = config.boot.kernelPackages.nvidiaPackages;
|
||||
driverAliasMap = {
|
||||
production = "stable";
|
||||
datacenter = "dc_550";
|
||||
};
|
||||
driverChannel = driverAliasMap.${cfg.cuda.driver.channel} or cfg.cuda.driver.channel;
|
||||
in
|
||||
if cfg.cuda.driver.package != null then
|
||||
cfg.cuda.driver.package
|
||||
else if builtins.hasAttr driverChannel nvidiaPackages then
|
||||
builtins.getAttr driverChannel nvidiaPackages
|
||||
else
|
||||
throw "athenix.hw.nix-lxc.cuda.driver.channel '${driverChannel}' not found in boot.kernelPackages.nvidiaPackages";
|
||||
};
|
||||
hardware.nvidia-container-toolkit.enable = lib.mkIf cfg.cuda.enable true;
|
||||
hardware.nvidia-container-toolkit.suppressNvidiaDriverAssertion = lib.mkIf cfg.cuda.enable true;
|
||||
environment.systemPackages = lib.mkIf cfg.cuda.enable [
|
||||
(pkgs.writeShellScriptBin "nvidia-smi" ''
|
||||
primary="${config.hardware.nvidia.package}/bin/nvidia-smi"
|
||||
secondary="${
|
||||
if builtins.hasAttr "bin" config.hardware.nvidia.package then
|
||||
config.hardware.nvidia.package.bin
|
||||
else
|
||||
config.hardware.nvidia.package
|
||||
}/bin/nvidia-smi"
|
||||
|
||||
if [ -x "$primary" ]; then
|
||||
exec "$primary" "$@"
|
||||
fi
|
||||
|
||||
if [ -x "$secondary" ]; then
|
||||
exec "$secondary" "$@"
|
||||
fi
|
||||
|
||||
echo "nvidia-smi binary not found in configured NVIDIA package: ${config.hardware.nvidia.package}" >&2
|
||||
exit 127
|
||||
'')
|
||||
];
|
||||
environment.variables = lib.mkIf cfg.cuda.enable {
|
||||
NVIDIA_VISIBLE_DEVICES = lib.concatStringsSep "," cfg.cuda.visibleDevices;
|
||||
NVIDIA_DRIVER_CAPABILITIES = lib.concatStringsSep "," cfg.cuda.driverCapabilities;
|
||||
};
|
||||
|
||||
# Allow getty to work in containers
|
||||
systemd.services."getty@".unitConfig.ConditionPathExists = [
|
||||
""
|
||||
|
||||
@@ -127,6 +127,25 @@
|
||||
};
|
||||
};
|
||||
};
|
||||
"nix-big-cuda" = {
|
||||
athenix.sw.headless.enable = true;
|
||||
nixpkgs.config = {
|
||||
allowUnfree = true;
|
||||
nvidia.acceptLicense = true;
|
||||
};
|
||||
athenix.hw.nix-lxc.cuda = {
|
||||
enable = true;
|
||||
visibleDevices = [
|
||||
"0"
|
||||
"1"
|
||||
];
|
||||
driver.channel = "legacy_535";
|
||||
driverCapabilities = [
|
||||
"compute"
|
||||
"utility"
|
||||
];
|
||||
};
|
||||
};
|
||||
"usda-dash".external = {
|
||||
url = "https://git.factory.uga.edu/MODEL/usda-dash-config.git";
|
||||
rev = "ce2700b0196e106f7c013bbcee851a5f96b146a3";
|
||||
|
||||
10
users.nix
10
users.nix
@@ -48,6 +48,7 @@
|
||||
enable = true; # Default user, enabled everywhere
|
||||
};
|
||||
hdh20267 = {
|
||||
description = "Hunter Halloran";
|
||||
external = {
|
||||
url = "https://git.factory.uga.edu/hdh20267/hdh20267-nix";
|
||||
rev = "dbdf65c7bd59e646719f724a3acd2330e0c922ec";
|
||||
@@ -67,5 +68,14 @@
|
||||
shell = "zsh";
|
||||
# enable = false by default, set to true per-system
|
||||
};
|
||||
dj69594 = {
|
||||
description = "David Joy";
|
||||
extraGroups = [
|
||||
"networkmanager"
|
||||
"wheel"
|
||||
];
|
||||
shell = "zsh";
|
||||
# enable = false by default, set to true per-system
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user