-
Notifications
You must be signed in to change notification settings - Fork 36
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
nvidia-mig: nvidia-mig implementation
- Loading branch information
1 parent
5902dc0
commit cf9e015
Showing
16 changed files
with
1,010 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
10 changes: 10 additions & 0 deletions
10
packages/nvidia-k8s-device-plugin/nvidia-k8s-device-plugin-mig-conf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
[required-extensions] | ||
kubelet-device-plugins = "v1" | ||
std = { version = "v1", helpers = ["if_not_null", "toml_encode"]} | ||
+++ | ||
{{#if_not_null settings.kubelet-device-plugins.nvidia.device-partitioning-strategy}} | ||
device-partitioning-strategy = "{{{settings.kubelet-device-plugins.nvidia.device-partitioning-strategy}}}" | ||
{{/if_not_null}} | ||
{{#if_not_null settings.kubelet-device-plugins.nvidia.mig.profile}} | ||
profile = {{ toml_encode settings.kubelet-device-plugins.nvidia.mig.profile }} | ||
{{/if_not_null}} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[Service] | ||
ExecCondition=/usr/bin/nvidia-migmanager is-fabric-manager-compatible |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[Service] | ||
ExecStart=-/usr/bin/nvidia-migmanager reboot-if-required |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
d /etc/nvidia-migmanager 0750 root root - | ||
d /run/nvidia-migmanager 0755 root root - |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
[Unit] | ||
Description=NVIDIA MIG manager service | ||
After=nvidia-fabricmanager.service nvidia-persistenced.service | ||
RefuseManualStart=true | ||
RefuseManualStop=true | ||
|
||
[Service] | ||
Type=oneshot | ||
ExecStart=/usr/bin/nvidia-migmanager apply-mig | ||
RemainAfterExit=true | ||
StandardError=journal+console | ||
SyslogIdentifier=nvidia-migmanager | ||
|
||
[Install] | ||
WantedBy=configured.target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -55,6 +55,8 @@ members = [ | |
|
||
"netdog", | ||
|
||
"nvidia-migmanager", | ||
|
||
"cfsignal", | ||
|
||
"logdog", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
[package] | ||
name = "nvidia-migmanager" | ||
version = "0.1.0" | ||
authors = ["Piyush Jena <jepiyush@amazon.com>"] | ||
license = "Apache-2.0 OR MIT" | ||
edition = "2021" | ||
publish = false | ||
build = "build.rs" | ||
# Don't rebuild crate just because of changes to README. | ||
exclude = ["README.md"] | ||
|
||
[dependencies] | ||
argh.workspace = true | ||
base64.workspace = true | ||
constants.workspace = true | ||
log.workspace = true | ||
regex.workspace = true | ||
serde = { workspace = true, features = ["derive"] } | ||
serde_plain.workspace = true | ||
simplelog.workspace = true | ||
snafu.workspace = true | ||
toml.workspace = true | ||
|
||
[dev-dependencies] | ||
tempfile.workspace = true | ||
|
||
[build-dependencies] | ||
generate-readme.workspace = true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# nvidia-migmanager | ||
|
||
Current version: 0.1.0 | ||
|
||
## NVIDIA MIG Manager | ||
`nvidia-migmanager` ensures that MIG settings are applied to an instance that supports | ||
it. It is called by `nvidia-migmanager.service`. | ||
|
||
The binary first checks if MIG is activated by checking the value of | ||
`settings.kubelet-device-plugins.nvidia.device-partitioning-strategy`. Then, it activates | ||
MIG and applies the profile according to the type of GPU present in the instance. | ||
|
||
NVIDIA MIG is currently supported only in A30, A100, H100 and H200 GPUs. | ||
|
||
### Example: | ||
```toml | ||
[settings.kubelet-device-plugins.nvidia] | ||
device-partitioning-strategy="mig" | ||
|
||
[settings.kubelet-device-plugins.nvidia.mig.profile] | ||
"a100.40gb"="2" | ||
"h100.80gb"="4" | ||
"h200.141gb"="3" | ||
``` | ||
This would partition the GPUs in an instance with A100 GPU into 2 parts, instance with H100 | ||
into 4 parts and instance with H200 into 3 parts. | ||
|
||
## Colophon | ||
|
||
This text was generated using [cargo-readme](https://crates.io/crates/cargo-readme), and includes the rustdoc from `src/main.rs`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# {{crate}} | ||
|
||
Current version: {{version}} | ||
|
||
{{readme}} | ||
|
||
## Colophon | ||
|
||
This text was generated using [cargo-readme](https://crates.io/crates/cargo-readme), and includes the rustdoc from `src/main.rs`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
fn main() { | ||
generate_readme::from_main().unwrap(); | ||
} |
Oops, something went wrong.