todo(iluvatar): 适配天数

Signed-off-by: YdrMaster <ydrml@hotmail.com>
InfiniTensor · Jan 2, 2025 · cf6d8fe · cf6d8fe
1 parent 5b69747
commit cf6d8fe
Show file tree

Hide file tree

Showing 6 changed files with 30 additions and 21 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -34,8 +34,9 @@ itertools = "0.13"
 env_logger = "0.11"
 build-script-cfg = "0.0"
 
-operators = { git = "https://github.com/YdrMaster/operators-rs", rev = "807ea2b", default-features = false }
+operators = { git = "https://github.com/YdrMaster/operators-rs", rev = "6f51afa", default-features = false }
 
 search-cl-tools = { git = "https://github.com/InfiniTensor/clrt", rev = "9b6289d" }
 search-infini-tools = { git = "https://github.com/InfiniTensor/infini-rt", rev = "f40bcb5" }
-search-cuda-tools = { git = "https://github.com/YdrMaster/cuda-driver", rev = "5aec761" }
+search-cuda-tools = { git = "https://github.com/YdrMaster/cuda-driver", rev = "b320cd9" }
+search-corex-tools = { git = "https://github.com/YdrMaster/cuda-driver", rev = "b320cd9" }
diff --git a/models/llama/nvidia-gpu/Cargo.toml b/models/llama/nvidia-gpu/Cargo.toml
@@ -10,11 +10,12 @@ authors = ["YdrMaster <ydrml@hotmail.com>"]
 llama.path = "../common"
 common.workspace = true
 log.workspace = true
-operators = { workspace = true, features = ["nvidia-gpu"] }
+operators = { workspace = true, features = ["nvidia-gpu", "iluvatar-gpu"] }
 
 [build-dependencies]
 build-script-cfg.workspace = true
 search-cuda-tools.workspace = true
+search-corex-tools.workspace = true
 
 [dev-dependencies]
 test-utils = { workspace = true, features = ["llama"] }

diff --git a/models/llama/nvidia-gpu/build.rs b/models/llama/nvidia-gpu/build.rs
@@ -1,13 +1,23 @@
 fn main() {
     use build_script_cfg::Cfg;
+    use search_corex_tools::find_corex;
     use search_cuda_tools::{find_cuda_root, find_nccl_root};
 
-    let driver = Cfg::new("driver_detected");
-    let nccl = Cfg::new("nccl_detected");
-    if find_cuda_root().is_some() {
-        driver.define();
+    let nvidia = Cfg::new("use_nvidia");
+    let iluvatar = Cfg::new("use_iluvatar");
+    let nccl = Cfg::new("use_nccl");
+
+    let nvidia_detected = find_cuda_root().is_some();
+    let iluvatar_detected = find_corex().is_some();
+
+    if nvidia_detected {
+        nvidia.define();
         if find_nccl_root().is_some() {
-            nccl.define();
+            nccl.define()
         }
     }
+
+    if iluvatar_detected {
+        iluvatar.define()
+    }
 }
diff --git a/models/llama/nvidia-gpu/src/infer.rs b/models/llama/nvidia-gpu/src/infer.rs
@@ -1,11 +1,10 @@
-use crate::{Operators, RandomSample, Weights};
+use crate::{Operators, RandomSample, Weights};
 use gguf::GGufModel;
 use llama::{
     ext::ggml_quants::f16, LlamaArgs, LlamaMeta, LlamaRequest, LlamaStorage, LlamaWorker, Tensor,
 };
 use operators::{
-    cuda::{self, memcpy_d2h, Device, NoDevice},
-    nvidia_gpu::{Config, Gpu},
+    cuda::{self, memcpy_d2h, Config, Device, Gpu, NoDevice},
     random_sample::{KVPair, SampleArgs},
 };
 use std::{slice::from_raw_parts_mut, time::Instant, usize};

diff --git a/models/llama/nvidia-gpu/src/lib.rs b/models/llama/nvidia-gpu/src/lib.rs
@@ -1,14 +1,13 @@
-#![cfg(driver_detected)]
+#![cfg(any(use_nvidia, use_iluvatar))]
 
 use common::{Contiguous, Slab};
 use llama::{BlkWeight, LlamaBlkStorage, LlamaStorage, Tensor, WeightLoader};
 use log::trace;
 use operators::{
     all_reduce::{AllReduce, NonAllReduce},
-    cuda::{memcpy_d2h, AsRaw, CurrentCtx, DevByte, DevMem, Event, HostMem, Stream},
-    nvidia_gpu::Gpu,
-    random_sample::nvidia_gpu::Operator as RandomSampleGpu,
-    rearrange::nvidia_gpu::Operator as Rearrange,
+    cuda::{memcpy_d2h, AsRaw, CurrentCtx, DevByte, DevMem, Event, Gpu, HostMem, Stream},
+    random_sample::cuda::Operator as RandomSampleGpu,
+    rearrange::cuda::Operator as Rearrange,
     ByteOf, QueueOf, TopoNode,
 };
 use std::{
@@ -119,7 +118,7 @@ impl Drop for WeightResult<'_, '_> {
 
 macro_rules! op {
     ($name:ident) => {
-        operators::$name::nvidia_gpu::Operator
+        operators::$name::cuda::Operator
     };
 }
 
@@ -378,5 +377,5 @@ impl<'ctx> WeightLoader for Weights<'ctx> {
 #[cfg(test)]
 mod infer;
 
-#[cfg(all(test, nccl_detected))]
+#[cfg(all(test, use_nccl))]
 mod nccl_parallel;
diff --git a/models/llama/nvidia-gpu/src/nccl_parallel.rs b/models/llama/nvidia-gpu/src/nccl_parallel.rs
@@ -1,12 +1,11 @@
-use crate::{Operators, RandomSample, Weights};
+use crate::{Operators, RandomSample, Weights};
 use gguf::GGufModel;
 use llama::{ext::ggml_quants::f16, LlamaRequest, LlamaStorage, LlamaWorker, Tensor};
 use log::info;
 use operators::{
     all_reduce::nccl::Operator as AllReduce,
-    cuda::{self, memcpy_d2h, NoDevice},
+    cuda::{self, memcpy_d2h, NcclNode, NoDevice},
     nccl::CommunicatorGroup,
-    nvidia_gpu::NcclNode,
     random_sample::{KVPair, SampleArgs},
     TopoNode,
 };