Add BaseTensor and Vision trait

jamjamjon · Jun 19, 2024 · 701b4b3 · 701b4b3
1 parent 6c72374
commit 701b4b3
Show file tree

Hide file tree

Showing 12 changed files with 326 additions and 90 deletions.
diff --git a/examples/db/main.rs b/examples/db/main.rs
@@ -1,4 +1,4 @@
-use usls::{models::DB, Annotator, DataLoader, Options};
+use usls::{models::DB, Annotator, DataLoader, Options, Vision};
 
 fn main() -> Result<(), Box<dyn std::error::Error>> {
     // build model
@@ -21,7 +21,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     ];
 
     // run
-    let y = model.run(&x)?;
+    // let y = model.run(&x)?;
+    let y = model.forward(&x, true)?;
 
     // annotate
     let annotator = Annotator::default()

diff --git a/examples/yolov8/main.rs b/examples/yolov8/main.rs
@@ -1,4 +1,4 @@
-use usls::{coco, models::YOLO, Annotator, DataLoader, Options};
+use usls::{coco, models::YOLO, Annotator, DataLoader, Options, Vision};
 
 fn main() -> Result<(), Box<dyn std::error::Error>> {
     // build model
@@ -35,7 +35,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
 
     // run & annotate
     for (xs, _paths) in dl {
-        let ys = model.run(&xs)?;
+        // let ys = model.run(&xs)?;
+        let ys = model.forward(&xs, true)?;
         annotator.annotate(&xs, &ys);
     }
 

diff --git a/rust-toolchain.toml b/rust-toolchain.toml
@@ -1,2 +1,2 @@
 [toolchain]
-channel = "1.75"
+channel = "1.79"
diff --git a/src/core/engine.rs b/src/core/engine.rs
@@ -10,7 +10,8 @@ use prost::Message;
 use std::collections::HashSet;
 
 use crate::{
-    home_dir, onnx, ops::make_divisible, Device, MinOptMax, Options, Ts, CHECK_MARK, CROSS_MARK,
+    home_dir, onnx, ops::make_divisible, BaseTensor, Device, MinOptMax, Options, Ts, CHECK_MARK,
+    CROSS_MARK,
 };
 
 /// Ort Tensor Attrs: name, data_type, dims
@@ -353,6 +354,160 @@ impl OrtEngine {
         Ok(ys)
     }
 
+    // pub fn execute(&mut self, xs: Vec<Array<f32, IxDyn>>) -> Result<Vec<Array<f32, IxDyn>>> {
+    //     // dtype alignment
+    //     let mut xs_: Vec<ort::SessionInputValue<'_>> = Vec::new();
+    //     let t_pre = std::time::Instant::now();
+    //     for (idtype, x) in self.inputs_attrs.dtypes.iter().zip(xs) {
+    //         let x_ = match &idtype {
+    //             TensorElementType::Float32 => ort::Value::from_array(x.view())?.into_dyn(),
+    //             TensorElementType::Float16 => {
+    //                 ort::Value::from_array(x.mapv(f16::from_f32).view())?.into_dyn()
+    //             }
+    //             TensorElementType::Int32 => {
+    //                 ort::Value::from_array(x.mapv(|x_| x_ as i32).view())?.into_dyn()
+    //             }
+    //             TensorElementType::Int64 => {
+    //                 ort::Value::from_array(x.mapv(|x_| x_ as i64).view())?.into_dyn()
+    //             }
+    //             _ => todo!(),
+    //         };
+    //         xs_.push(x_.into());
+    //     }
+    //     let t_pre = t_pre.elapsed();
+    //     self.ts.add_or_push(0, t_pre);
+
+    //     // inference
+    //     let t_run = std::time::Instant::now();
+    //     let outputs = self.session.run(&xs_[..])?;
+    //     let t_run = t_run.elapsed();
+    //     self.ts.add_or_push(1, t_run);
+
+    //     // oputput
+    //     let mut ys = Vec::new();
+    //     let t_post = std::time::Instant::now();
+    //     for (dtype, name) in self
+    //         .outputs_attrs
+    //         .dtypes
+    //         .iter()
+    //         .zip(self.outputs_attrs.names.iter())
+    //     {
+    //         let y = &outputs[name.as_str()];
+    //         let y_ = match &dtype {
+    //             TensorElementType::Float32 => y.try_extract_tensor::<f32>()?.view().into_owned(),
+    //             TensorElementType::Float16 => y
+    //                 .try_extract_tensor::<f16>()?
+    //                 .view()
+    //                 .mapv(f16::to_f32)
+    //                 .into_owned(),
+    //             TensorElementType::Int64 => y
+    //                 .try_extract_tensor::<i64>()?
+    //                 .view()
+    //                 .to_owned()
+    //                 .mapv(|x| x as f32)
+    //                 .into_owned(),
+    //             _ => todo!(),
+    //         };
+    //         ys.push(y_);
+    //     }
+    //     let t_post = t_post.elapsed();
+    //     self.ts.add_or_push(2, t_post);
+
+    //     if self.profile {
+    //         let len = 10usize;
+    //         let n = 4usize;
+    //         println!(
+    //             "[Profile] {:>len$.n$?} ({:>len$.n$?} avg) [alignment: {:>len$.n$?} ({:>len$.n$?} avg) | inference: {:>len$.n$?} ({:>len$.n$?} avg) | to_f32: {:>len$.n$?} ({:>len$.n$?} avg)]",
+    //             t_pre + t_run + t_post,
+    //             self.ts.avg(),
+    //             t_pre,
+    //             self.ts.avgi(0),
+    //             t_run,
+    //             self.ts.avgi(1),
+    //             t_post,
+    //             self.ts.avgi(2),
+    //         );
+    //     }
+    //     Ok(ys)
+    // }
+
+    pub fn execute(&mut self, xs: Vec<BaseTensor>) -> Result<Vec<BaseTensor>> {
+        // dtype alignment
+        let mut xs_: Vec<ort::SessionInputValue<'_>> = Vec::new();
+        let t_pre = std::time::Instant::now();
+        for (idtype, x) in self.inputs_attrs.dtypes.iter().zip(xs) {
+            let x_ = match &idtype {
+                TensorElementType::Float32 => ort::Value::from_array(x.0.view())?.into_dyn(),
+                TensorElementType::Float16 => {
+                    ort::Value::from_array(x.0.mapv(f16::from_f32).view())?.into_dyn()
+                }
+                TensorElementType::Int32 => {
+                    ort::Value::from_array(x.0.mapv(|x_| x_ as i32).view())?.into_dyn()
+                }
+                TensorElementType::Int64 => {
+                    ort::Value::from_array(x.0.mapv(|x_| x_ as i64).view())?.into_dyn()
+                }
+                _ => todo!(),
+            };
+            xs_.push(x_.into());
+        }
+        let t_pre = t_pre.elapsed();
+        self.ts.add_or_push(0, t_pre);
+
+        // inference
+        let t_run = std::time::Instant::now();
+        let outputs = self.session.run(&xs_[..])?;
+        let t_run = t_run.elapsed();
+        self.ts.add_or_push(1, t_run);
+
+        // oputput
+        let mut ys = Vec::new();
+        let t_post = std::time::Instant::now();
+        for (dtype, name) in self
+            .outputs_attrs
+            .dtypes
+            .iter()
+            .zip(self.outputs_attrs.names.iter())
+        {
+            let y = &outputs[name.as_str()];
+            let y_ = match &dtype {
+                TensorElementType::Float32 => y.try_extract_tensor::<f32>()?.view().into_owned(),
+                TensorElementType::Float16 => y
+                    .try_extract_tensor::<f16>()?
+                    .view()
+                    .mapv(f16::to_f32)
+                    .into_owned(),
+                TensorElementType::Int64 => y
+                    .try_extract_tensor::<i64>()?
+                    .view()
+                    .to_owned()
+                    .mapv(|x| x as f32)
+                    .into_owned(),
+                _ => todo!(),
+            };
+            ys.push(BaseTensor::from(y_));
+        }
+        let t_post = t_post.elapsed();
+        self.ts.add_or_push(2, t_post);
+
+        if self.profile {
+            let len = 10usize;
+            let n = 4usize;
+            println!(
+                "[Profile] {:>len$.n$?} ({:>len$.n$?} avg) [alignment: {:>len$.n$?} ({:>len$.n$?} avg) | inference: {:>len$.n$?} ({:>len$.n$?} avg) | to_f32: {:>len$.n$?} ({:>len$.n$?} avg)]",
+                t_pre + t_run + t_post,
+                self.ts.avg(),
+                t_pre,
+                self.ts.avgi(0),
+                t_run,
+                self.ts.avgi(1),
+                t_post,
+                self.ts.avgi(2),
+            );
+        }
+        Ok(ys)
+    }
+
     pub fn _set_ixx(x: isize, ixx: &Option<MinOptMax>, i: usize, ii: usize) -> Option<MinOptMax> {
         match x {
             -1 => {

diff --git a/src/core/logits_sampler.rs b/src/core/logits_sampler.rs
@@ -23,7 +23,7 @@ impl LogitsSampler {
     }
 
     pub fn with_topp(mut self, p: f32) -> Self {
-        self.p = p.max(0.0).min(1.0);
+        self.p = p.clamp(0., 1.);
         self
     }
 

diff --git a/src/core/mod.rs b/src/core/mod.rs
@@ -9,8 +9,10 @@ mod min_opt_max;
 pub mod onnx;
 pub mod ops;
 mod options;
+mod tensor;
 mod tokenizer_stream;
 mod ts;
+mod vision;
 
 pub use annotator::Annotator;
 pub use dataloader::DataLoader;
@@ -21,5 +23,9 @@ pub use logits_sampler::LogitsSampler;
 pub use metric::Metric;
 pub use min_opt_max::MinOptMax;
 pub use options::Options;
+pub use tensor::BaseTensor;
 pub use tokenizer_stream::TokenizerStream;
 pub use ts::Ts;
+pub use vision::Vision;
+
+// pub type BaseTensor = ndarray::Array<f32, ndarray::IxDyn>;
diff --git a/src/core/preprocess.cu b/src/core/preprocess.cu
diff --git a/src/core/tensor.rs b/src/core/tensor.rs
@@ -0,0 +1,47 @@
+use ndarray::{Array, Dim, IxDyn, IxDynImpl};
+
+#[derive(Debug, Clone)]
+pub struct BaseTensor(pub Array<f32, IxDyn>);
+
+impl From<Array<f32, IxDyn>> for BaseTensor {
+    fn from(x: Array<f32, IxDyn>) -> Self {
+        Self(x)
+    }
+}
+
+// TODO: from_dynamic_image
+
+impl BaseTensor {
+    pub fn zeros(shape: &[usize]) -> Self {
+        Self(Array::zeros(Dim(IxDynImpl::from(shape.to_vec()))))
+    }
+
+    pub fn data(&self) -> &Array<f32, IxDyn> {
+        &self.0
+    }
+
+    pub fn shape(&self) -> &[usize] {
+        self.0.shape()
+    }
+
+    pub fn dims(&self) -> &[usize] {
+        self.0.shape()
+    }
+
+    pub fn normalize(mut self, min_: f32, max_: f32) -> Self {
+        self.0 = (self.0 - min_) / (max_ - min_);
+        self
+    }
+
+    pub fn standardize(mut self, mean: &[f32], std: &[f32]) -> Self {
+        // let shape = self.0.shape();
+        let mean = Array::from_shape_vec((1, mean.len(), 1, 1), mean.to_vec()).unwrap();
+        let std = Array::from_shape_vec((1, std.len(), 1, 1), std.to_vec()).unwrap();
+        self.0 = (self.0 - mean) / std;
+        self
+    }
+
+    pub fn into_image() {
+        todo!()
+    }
+}
diff --git a/src/core/vision.rs b/src/core/vision.rs
@@ -0,0 +1,47 @@
+use crate::{BaseTensor, Options, Y};
+
+pub trait Vision: Sized {
+    type Input; // DynamicImage
+                // type TensorType; // TODO: make it fixed?
+
+    /// Creates a new instance of the model with the given options.
+    fn new(options: Options) -> anyhow::Result<Self>;
+
+    /// Preprocesses the input data.
+    fn preprocess(&self, xs: &[Self::Input]) -> anyhow::Result<Vec<BaseTensor>>;
+
+    /// Executes the model on the preprocessed data.
+    fn inference(&mut self, xs: Vec<BaseTensor>) -> anyhow::Result<Vec<BaseTensor>>;
+
+    /// Postprocesses the model's output.
+    fn postprocess(&self, xs: Vec<BaseTensor>, xs0: &[Self::Input]) -> anyhow::Result<Vec<Y>>;
+
+    /// Executes the full pipeline.
+    fn run(&mut self, xs: &[Self::Input]) -> anyhow::Result<Vec<Y>> {
+        let ys = self.preprocess(xs)?;
+        let ys = self.inference(ys)?;
+        let ys = self.postprocess(ys, xs)?;
+        Ok(ys)
+    }
+
+    /// Executes the full pipeline.
+    fn forward(&mut self, xs: &[Self::Input], profile: bool) -> anyhow::Result<Vec<Y>> {
+        let t_pre = std::time::Instant::now();
+        let ys = self.preprocess(xs)?;
+        let t_pre = t_pre.elapsed();
+
+        let t_exe = std::time::Instant::now();
+        let ys = self.inference(ys)?;
+        let t_exe = t_exe.elapsed();
+
+        let t_post = std::time::Instant::now();
+        let ys = self.postprocess(ys, xs)?;
+        let t_post = t_post.elapsed();
+
+        if profile {
+            println!("> Pre: {t_pre:?} | Execution: {t_exe:?} | Post: {t_post:?}");
+        }
+
+        Ok(ys)
+    }
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -23,7 +23,7 @@ impl LogitsSampler { @@
         }
         pub fn with_topp(mut self, p: f32) -> Self {
-            self.p = p.max(0.0).min(1.0);
+            self.p = p.clamp(0., 1.);
             self
         }
@@ Expand Down @@