diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a6f73e321..17555c58d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,7 +9,7 @@ ci: repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v5.0.0 hooks: - id: end-of-file-fixer - id: trailing-whitespace @@ -23,7 +23,7 @@ repos: - id: detect-private-key - repo: https://github.com/psf/black - rev: "24.4.2" + rev: "24.10.0" hooks: - id: black - id: black-jupyter diff --git a/generation/2d_diffusion_autoencoder/2d_diffusion_autoencoder_tutorial.ipynb b/generation/2d_diffusion_autoencoder/2d_diffusion_autoencoder_tutorial.ipynb index b193e1023..67ecb5532 100644 --- a/generation/2d_diffusion_autoencoder/2d_diffusion_autoencoder_tutorial.ipynb +++ b/generation/2d_diffusion_autoencoder/2d_diffusion_autoencoder_tutorial.ipynb @@ -474,9 +474,8 @@ } ], "source": [ - "max_epochs = (\n", - " 1000 # training for longer (1e4 ~ 3h) helps a lot with reconstruction quality, even if the loss is already low\n", - ")\n", + "# training for longer (1e4 ~ 3h) helps a lot with reconstruction quality, even if the loss is already low\n", + "max_epochs = 1000\n", "val_interval = 100\n", "print_interval = 50\n", "iter_loss_list, val_iter_loss_list = [], []\n", diff --git a/generation/README.md b/generation/README.md index df50ed899..6c4fe96aa 100644 --- a/generation/README.md +++ b/generation/README.md @@ -43,7 +43,7 @@ Example shows the use cases of using MONAI to evaluate the performance of a gene ## [Training a 2D VQ-VAE + Autoregressive Transformers](./2d_vqvae_transformer/2d_vqvae_transformer_tutorial.ipynb): Example shows how to train a Vector-Quantized Variation Autoencoder + Transformers on the MedNIST dataset. -## Training VQ-VAEs and VQ-GANs: [2D VAE](./2d_vqvae/2d_vqvae_tutorial.ipynb), [3D VAE](./3d_vqvae/3d_vqvae_tutorial.ipynb) and [2D GAN](./3d_autoencoderkl/2d_vqgan_tutorial.ipynb) +## Training VQ-VAEs and VQ-GANs: [2D VAE](./2d_vqvae/2d_vqvae_tutorial.ipynb), [3D VAE](./3d_vqvae/3d_vqvae_tutorial.ipynb) and [2D GAN](./2d_vqgan/2d_vqgan_tutorial.ipynb) Examples show how to train Vector Quantized Variation Autoencoder on [2D](./2d_vqvae/2d_vqvae_tutorial.ipynb) and [3D](./3d_vqvae/3d_vqvae_tutorial.ipynb), and how to use the PatchDiscriminator class to train a [VQ-GAN](./2d_vqgan/2d_vqgan_tutorial.ipynb) and improve the quality of the generated images. ## [Training a 2D Denoising Diffusion Probabilistic Model](./2d_ddpm/2d_ddpm_tutorial.ipynb): diff --git a/generation/anomaly_detection/2d_classifierfree_guidance_anomalydetection_tutorial.ipynb b/generation/anomaly_detection/2d_classifierfree_guidance_anomalydetection_tutorial.ipynb index 1f8100faf..1a8fee83c 100644 --- a/generation/anomaly_detection/2d_classifierfree_guidance_anomalydetection_tutorial.ipynb +++ b/generation/anomaly_detection/2d_classifierfree_guidance_anomalydetection_tutorial.ipynb @@ -426,7 +426,7 @@ }, "source": [ "condition_dropout = 0.15\n", - "max_epochs = 2e4\n", + "max_epochs = 20000\n", "batch_size = 64\n", "val_interval = 100\n", "iter_loss_list = []\n", diff --git a/generation/maisi/README.md b/generation/maisi/README.md index 09cfb7dcb..e7e4e9e47 100644 --- a/generation/maisi/README.md +++ b/generation/maisi/README.md @@ -2,10 +2,22 @@ This example demonstrates the applications of training and validating NVIDIA MAISI, a 3D Latent Diffusion Model (LDM) capable of generating large CT images accompanied by corresponding segmentation masks. It supports variable volume size and voxel spacing and allows for the precise control of organ/tumor size. ## MAISI Model Highlight -- A Foundation Variational Auto-Encoder (VAE) model for latent feature compression that works for both CT and MRI with flexible volume size and voxel size +- A Foundation Variational Auto-Encoder (VAE) model for latent feature compression that works for both CT and MRI with flexible volume size and voxel size. Tensor parallel is included to reduce GPU memory usage. - A Foundation Diffusion model that can generate large CT volumes up to 512 × 512 × 768 size, with flexible volume size and voxel size - A ControlNet to generate image/mask pairs that can improve downstream tasks, with controllable organ/tumor size +More details can be found in our WACV 2025 paper: +[Guo, P., Zhao, C., Yang, D., Xu, Z., Nath, V., Tang, Y., ... & Xu, D. (2024). MAISI: Medical AI for Synthetic Imaging. arXiv preprint arXiv:2409.11169](https://arxiv.org/pdf/2409.11169) + +Welcome to try our GUI demo at [https://build.nvidia.com/nvidia/maisi](https://build.nvidia.com/nvidia/maisi). +The GUI is only a demo for toy examples. This Github repo is the full version. + + +## Minimum GPU requirement +GPU requirement depends on the size of the images. For example, +- for image size of 512x512x128, the minimum GPU memory requirement for training and inference is 16G. +- for image size of 512x512x512, the minimum GPU memory requirement for training is 40G, for inference is 24G. + ## Example Results and Evaluation We retrained several state-of-the-art diffusion model-based methods using our dataset. The results in the table and figure below show that our method outperforms previous methods on an unseen dataset ([autoPET 2023](https://www.nature.com/articles/s41597-022-01718-3)). Our method shows superior performance to previous methods based on all [Fréchet Inception Distance (FID)](https://papers.nips.cc/paper/2017/hash/8a1d694707eb0fefe65871369074926d-Abstract.html) scores on different 2D planes. Here we compared the generated images with real images of size 512 × 512 × 512 and spacing 1.0 × 1.0 × 1.0 mm3. @@ -31,6 +43,64 @@ We retrained several state-of-the-art diffusion model-based methods using our da +| Dataset | Model | LPIPS ↓ | SSIM ↑ | PSNR ↑ | GPU ↓ | +|-------------|-----------------|----------|--------|---------|--------| +| MSD Task07 | MAIS VAE | **0.038**| **0.978**|**37.266**| **0h** | +| | Dedicated VAE | 0.047 | 0.971 | 34.750 | 619h | +| MSD Task08 | MAIS VAE | 0.046 | 0.970 | 36.559 | **0h** | +| | Dedicated VAE | **0.041**|**0.973**|**37.110**| 669h | +| Brats18 | MAIS VAE | **0.026**|**0.977**| **39.003**| **0h** | +| | Dedicated VAE | 0.030 | 0.975 | 38.971 | 672h | + +**Table 2:** Performance comparison of the `MAIS VAE` model on out-of-distribution datasets (i.e., unseen during MAISI VAE training) versus `Dedicated VAE` models (i.e., train from scratch on in-distribution data). The “GPU” column shows additional GPU hours for training with one 32G V100 GPU. MAISI VAE model achieved comparable results without additional GPU resource expenditure on unseen datasets. + + +## Time Cost and GPU Memory Usage + +### Inference Time Cost and GPU Memory Usage +| `output_size` | latent size |`autoencoder_sliding_window_infer_size` | `autoencoder_tp_num_splits` | Peak Memory | DM Time | VAE Time | +|---------------|:--------------------------------------:|:--------------------------------------:|:---------------------------:|:-----------:|:-------:|:--------:| +| [256x256x128](./configs/config_infer_16g_256x256x128.json) |4x64x64x32| >=[64,64,32], not used | 2 | 14G | 57s | 1s | +| [256x256x256](./configs/config_infer_16g_256x256x256.json) |4x64x64x64| [48,48,64], 4 patches | 2 | 14G | 81s | 7s | +| [512x512x128](./configs/config_infer_16g_512x512x128.json) |4x128x128x32| [64,64,32], 9 patches | 1 | 14G | 138s | 7s | +| | | | | | | +| [256x256x256](./configs/config_infer_24g_256x256x256.json) |4x64x64x64| >=[64,64,64], not used | 4 | 22G | 81s | 2s | +| [512x512x128](./configs/config_infer_24g_512x512x128.json) |4x128x128x32| [80,80,32], 4 patches | 1 | 18G | 138s | 9s | +| [512x512x512](./configs/config_infer_24g_512x512x512.json) |4x128x128x128| [64,64,48], 36 patches | 2 | 22G | 569s | 29s | +| | | | | | | +| [512x512x512](./configs/config_infer_32g_512x512x512.json) |4x128x128x128| [64,64,64], 27 patches | 2 | 26G | 569s | 40s | +| | | | | | | +| [512x512x128](./configs/config_infer_80g_512x512x128.json) |4x128x128x32| >=[128,128,32], not used | 4 | 37G | 138s | 140s | +| [512x512x512](./configs/config_infer_80g_512x512x512.json) |4x128x128x128| [80,80,80], 8 patches | 2 | 44G | 569s | 30s | +| [512x512x768](./configs/config_infer_24g_512x512x768.json) |4x128x128x192| [80,80,112], 8 patches | 4 | 55G | 904s | 48s | + +**Table 3:** Inference Time Cost and GPU Memory Usage. `DM Time` refers to the time required for diffusion model inference. `VAE Time` refers to the time required for VAE decoder inference. The total inference time is the sum of `DM Time` and `VAE Time`. The experiment was conducted on an A100 80G GPU. + +During inference, the peak GPU memory usage occurs during the VAE's decoding of latent features. +To reduce GPU memory usage, we can either increase `autoencoder_tp_num_splits` or reduce `autoencoder_sliding_window_infer_size`. +Increasing `autoencoder_tp_num_splits` has a smaller impact on the generated image quality, while reducing `autoencoder_sliding_window_infer_size` may introduce stitching artifacts and has a larger impact on the generated image quality. + +When `autoencoder_sliding_window_infer_size` is equal to or larger than the latent feature size, the sliding window will not be used, and the time and memory costs remain the same. + + +### Training GPU Memory Usage +The VAE is trained on patches and can be trained using a 16G GPU if the patch size is set to a small value, such as [64, 64, 64]. Users can adjust the patch size to fit the available GPU memory. For the released model, we initially trained the autoencoder on 16G V100 GPUs with a small patch size of [64, 64, 64], and then continued training on 32G V100 GPUs with a larger patch size of [128, 128, 128]. + +The DM and ControlNet are trained on whole images rather than patches. The GPU memory usage during training depends on the size of the input images. + +| image size | latent size | Peak Memory | +|--------------|:------------- |:-----------:| +| 256x256x128 | 4x64x64x32 | 5G | +| 256x256x256 | 4x64x64x64 | 8G | +| 512x512x128 | 4x128x128x32 | 12G | +| 512x512x256 | 4x128x128x64 | 21G | +| 512x512x512 | 4x128x128x128 | 39G | +| 512x512x768 | 4x128x128x192 | 58G | + + + + + ## MAISI Model Workflow The training and inference workflows of MAISI are depicted in the figure below. It begins by training an autoencoder in pixel space to encode images into latent features. Following that, it trains a diffusion model in the latent space to denoise the noisy latent features. During inference, it first generates latent features from random noise by applying multiple denoising steps using the trained diffusion model. Finally, it decodes the denoised latent features into images using the trained autoencoder.
@@ -55,26 +125,43 @@ Network definition is stored in [./configs/config_maisi.json](./configs/config_m ### 2. Model Inference #### Inference parameters: -The information for the inference input, like body region and anatomy to generate, is stored in [./configs/config_infer.json](./configs/config_infer.json). Please feel free to play with it. Here are the details of the parameters. +The information for the inference input, such as the body region and anatomy to generate, is stored in [./configs/config_infer.json](./configs/config_infer.json). Feel free to experiment with it. Below are the details of the parameters: -- `"num_output_samples"`: int, the number of output image/mask pairs it will generate. -- `"spacing"`: voxel size of generated images. E.g., if set to `[1.5, 1.5, 2.0]`, it will generate images with a resolution of 1.5x1.5x2.0 mm. -- `"output_size"`: volume size of generated images. E.g., if set to `[512, 512, 256]`, it will generate images with size of 512x512x256. They need to be divisible by 16. If you have a small GPU memory size, you should adjust it to small numbers. Note that `"spacing"` and `"output_size"` together decide the output field of view (FOV). For eample, if set them to `[1.5, 1.5, 2.0]`mm and `[512, 512, 256]`, the FOV is 768x768x512 mm. We recommend the FOV in x and y axis to be at least 256mm for head, and at least 384mm for other body regions like abdomen. There is no such restriction for z-axis. -- `"controllable_anatomy_size"`: a list of controllable anatomy and its size scale (0--1). E.g., if set to `[["liver", 0.5],["hepatic tumor", 0.3]]`, the generated image will contain liver that have a median size, with size around 50% percentile, and hepatic tumor that is relatively small, with around 30% percentile. The output will contain paired image and segmentation mask for the controllable anatomy. -- `"body_region"`: If "controllable_anatomy_size" is not specified, "body_region" will be used to constrain the region of generated images. It needs to be chosen from "head", "chest", "thorax", "abdomen", "pelvis", "lower". -- `"anatomy_list"`: If "controllable_anatomy_size" is not specified, the output will contain paired image and segmentation mask for the anatomy in "./configs/label_dict.json". -- `"autoencoder_sliding_window_infer_size"`: in order to save GPU memory, we use sliding window inference when decoding latents to image when `"output_size"` is large. This is the patch size of the sliding window. Small value will reduce GPU memory but increase time cost. They need to be divisible by 16. -- `"autoencoder_sliding_window_infer_overlap"`: float between 0 and 1. Large value will reduce the stitching artifacts when stitching patches during sliding window inference, but increase time cost. If you do not observe seam lines in the generated image result, you can use a smaller value to save inference time. +- `"num_output_samples"`: An integer specifying the number of output image/mask pairs to generate. +- `"spacing"`: The voxel size of the generated images. For example, if set to `[1.5, 1.5, 2.0]`, it generates images with a resolution of 1.5x1.5x2.0 mm. +- `"output_size"`: The volume size of the generated images. For example, if set to `[512, 512, 256]`, it generates images of size 512x512x256. The values must be divisible by 16. If GPU memory is limited, adjust these to smaller numbers. Note that `"spacing"` and `"output_size"` together determine the output field of view (FOV). For example, if set to `[1.5, 1.5, 2.0]` mm and `[512, 512, 256]`, the FOV is 768x768x512 mm. We recommend the FOV in the x and y axes to be at least 256 mm for the head and at least 384 mm for other body regions like the abdomen. There is no restriction for the z-axis. +- `"controllable_anatomy_size"`: A list specifying controllable anatomy and their size scale (0–1). For example, if set to `[["liver", 0.5], ["hepatic tumor", 0.3]]`, the generated image will contain a liver of median size (around the 50th percentile) and a relatively small hepatic tumor (around the 30th percentile). The output will include paired images and segmentation masks for the controllable anatomy. +- `"body_region"`: If `"controllable_anatomy_size"` is not specified, `"body_region"` will constrain the region of the generated images. It must be chosen from `"head"`, `"chest"`, `"thorax"`, `"abdomen"`, `"pelvis"`, or `"lower"`. Please set a reasonable `"body_region"` for the given FOV determined by `"spacing"` and `"output_size"`. For example, if FOV is only 128mm in z-axis, we should not expect `"body_region"` to contain all of [`"head"`, `"chest"`, `"thorax"`, `"abdomen"`, `"pelvis"`, `"lower"`]. +- `"anatomy_list"`: If `"controllable_anatomy_size"` is not specified, the output will include paired images and segmentation masks for the anatomy listed in `"./configs/label_dict.json"`. +- `"autoencoder_sliding_window_infer_size"`: To save GPU memory, sliding window inference is used when decoding latents into images if `"output_size"` is large. This parameter specifies the patch size of the sliding window. Smaller values reduce GPU memory usage but increase the time cost. The values must be divisible by 16. If GPU memory is sufficient, select a larger value for this parameter. +- `"autoencoder_sliding_window_infer_overlap"`: A float between 0 and 1. Larger values reduce stitching artifacts when patches are stitched during sliding window inference but increase the time cost. If you do not observe seam lines in the generated image, you can use a smaller value to save inference time. +- `"autoencoder_tp_num_splits"`: An integer chosen from `[1, 2, 4, 8, 16]`. Tensor parallelism is used in the autoencoder to save GPU memory. Larger values reduce GPU memory usage. If GPU memory is sufficient, select a smaller value for this parameter. -To generate images with substantial dimensions, such as 512 × 512 × 512 or larger, using GPUs with 80GB of memory, it is advisable to configure the `"num_splits"` parameter in [the auto-encoder configuration](./configs/config_maisi.json#L11-L37) to 16. This adjustment is crucial to avoid out-of-memory issues during inference. -#### Recommended spacing for different output sizes: -|`output_size`| Recommended `"spacing"`| -|:-----:|:-----:| -[256, 256, 256] | [1.5, 1.5, 1.5] | -[512, 512, 128] | [0.8, 0.8, 2.5] | -[512, 512, 512] | [1.0, 1.0, 1.0] | +#### Recommended spacing for different output sizes: +According to the statistics of the training data, we have recommended input parameters for the body region that are included in the training data. +The Recommended `"output_size"` is the median value of the training data, the Recommended `"spacing"` is the median FOV (the product of `"output_size"` and `"spacing"`) divided by the Recommended `"output_size"`. +|`"body_region"` |percentage of training data |Recommended `"output_size"`| Recommended `"spacing"` [mm]| +|:--------------------------------------|:--------------------------|:----------------------|---------------------------:| +| ['chest', 'abdomen'] | 58.55% | [512, 512, 128] | [0.781, 0.781, 2.981] | +| ['chest'] | 38.35% | [512, 512, 128] | [0.684, 0.684, 2.422] | +| ['chest', 'abdomen', 'lower'] | 1.42% | [512, 512, 256] | [0.793, 0.793, 1.826] | +| ['lower'] | 0.61% | [512, 512, 384] | [0.839, 0.839, 0.728] | +| ['abdomen', 'lower'] | 0.37% | [512, 512, 384] | [0.808, 0.808, 0.729] | +| ['head', 'chest', 'abdomen'] | 0.33% | [512, 512, 384] | [0.977, 0.977, 2.103] | +| ['abdomen'] | 0.13% | [512, 512, 128] | [0.723, 0.723, 1.182] | +| ['head', 'chest', 'abdomen', 'lower'] | 0.13% | [512, 512, 384] | [1.367, 1.367, 4.603] | +| ['head', 'chest'] | 0.10% | [512, 512, 128] | [0.645, 0.645, 2.219] | + +If users want to try different `"output_size"`, please adjust `"spacing"` to ensure a reasonable FOV, which is the product of `"output_size"` and `"spacing"`. +For example, + +|`"output_size"`| Recommended `"spacing"`| +|:--------------------------------------|:--------------------------| +|[256, 256, 256] | [1.5, 1.5, 1.5] | +|[512, 512, 128] | [0.8, 0.8, 2.5] | +|[512, 512, 512] | [1.0, 1.0, 1.0] | #### Execute Inference: To run the inference script, please run: diff --git a/generation/maisi/configs/config_infer_16g_256x256x128.json b/generation/maisi/configs/config_infer_16g_256x256x128.json new file mode 100644 index 000000000..72933304b --- /dev/null +++ b/generation/maisi/configs/config_infer_16g_256x256x128.json @@ -0,0 +1,23 @@ +{ + "num_output_samples": 1, + "body_region": ["abdomen"], + "anatomy_list": ["liver","hepatic tumor"], + "controllable_anatomy_size": [], + "num_inference_steps": 1000, + "mask_generation_num_inference_steps": 1000, + "output_size": [ + 256, + 256, + 128 + ], + "image_output_ext": ".nii.gz", + "label_output_ext": ".nii.gz", + "spacing": [ + 1.5, + 1.5, + 4.0 + ], + "autoencoder_sliding_window_infer_size": [96,96,96], + "autoencoder_sliding_window_infer_overlap": 0.25, + "autoencoder_tp_num_splits": 2 +} diff --git a/generation/maisi/configs/config_infer_16g_256x256x256.json b/generation/maisi/configs/config_infer_16g_256x256x256.json new file mode 100644 index 000000000..d4ec9e1a8 --- /dev/null +++ b/generation/maisi/configs/config_infer_16g_256x256x256.json @@ -0,0 +1,23 @@ +{ + "num_output_samples": 1, + "body_region": ["abdomen"], + "anatomy_list": ["liver","hepatic tumor"], + "controllable_anatomy_size": [], + "num_inference_steps": 1000, + "mask_generation_num_inference_steps": 1000, + "output_size": [ + 256, + 256, + 256 + ], + "image_output_ext": ".nii.gz", + "label_output_ext": ".nii.gz", + "spacing": [ + 1.5, + 1.5, + 2.0 + ], + "autoencoder_sliding_window_infer_size": [48,48,64], + "autoencoder_sliding_window_infer_overlap": 0.25, + "autoencoder_tp_num_splits": 2 +} diff --git a/generation/maisi/configs/config_infer_16g_512x512x128.json b/generation/maisi/configs/config_infer_16g_512x512x128.json new file mode 100644 index 000000000..5e067cd4b --- /dev/null +++ b/generation/maisi/configs/config_infer_16g_512x512x128.json @@ -0,0 +1,23 @@ +{ + "num_output_samples": 1, + "body_region": ["abdomen"], + "anatomy_list": ["liver","hepatic tumor"], + "controllable_anatomy_size": [], + "num_inference_steps": 1000, + "mask_generation_num_inference_steps": 1000, + "output_size": [ + 512, + 512, + 128 + ], + "image_output_ext": ".nii.gz", + "label_output_ext": ".nii.gz", + "spacing": [ + 0.75, + 0.75, + 4.0 + ], + "autoencoder_sliding_window_infer_size": [64,64,32], + "autoencoder_sliding_window_infer_overlap": 0.25, + "autoencoder_tp_num_splits": 1 +} diff --git a/generation/maisi/configs/config_infer_24g_256x256x256.json b/generation/maisi/configs/config_infer_24g_256x256x256.json new file mode 100644 index 000000000..bb0806f63 --- /dev/null +++ b/generation/maisi/configs/config_infer_24g_256x256x256.json @@ -0,0 +1,23 @@ +{ + "num_output_samples": 1, + "body_region": ["abdomen"], + "anatomy_list": ["liver","hepatic tumor"], + "controllable_anatomy_size": [], + "num_inference_steps": 1000, + "mask_generation_num_inference_steps": 1000, + "output_size": [ + 256, + 256, + 256 + ], + "image_output_ext": ".nii.gz", + "label_output_ext": ".nii.gz", + "spacing": [ + 1.5, + 1.5, + 2.0 + ], + "autoencoder_sliding_window_infer_size": [64,64,64], + "autoencoder_sliding_window_infer_overlap": 0.25, + "autoencoder_tp_num_splits": 4 +} diff --git a/generation/maisi/configs/config_infer_24g_512x512x128.json b/generation/maisi/configs/config_infer_24g_512x512x128.json new file mode 100644 index 000000000..6d2b9d7ea --- /dev/null +++ b/generation/maisi/configs/config_infer_24g_512x512x128.json @@ -0,0 +1,23 @@ +{ + "num_output_samples": 1, + "body_region": ["abdomen"], + "anatomy_list": ["liver","hepatic tumor"], + "controllable_anatomy_size": [], + "num_inference_steps": 1000, + "mask_generation_num_inference_steps": 1000, + "output_size": [ + 512, + 512, + 128 + ], + "image_output_ext": ".nii.gz", + "label_output_ext": ".nii.gz", + "spacing": [ + 0.75, + 0.75, + 4.0 + ], + "autoencoder_sliding_window_infer_size": [80,80,32], + "autoencoder_sliding_window_infer_overlap": 0.25, + "autoencoder_tp_num_splits": 1 +} diff --git a/generation/maisi/configs/config_infer_24g_512x512x512.json b/generation/maisi/configs/config_infer_24g_512x512x512.json new file mode 100644 index 000000000..2cbfb9573 --- /dev/null +++ b/generation/maisi/configs/config_infer_24g_512x512x512.json @@ -0,0 +1,23 @@ +{ + "num_output_samples": 1, + "body_region": ["abdomen"], + "anatomy_list": ["liver","hepatic tumor"], + "controllable_anatomy_size": [], + "num_inference_steps": 1000, + "mask_generation_num_inference_steps": 1000, + "output_size": [ + 512, + 512, + 512 + ], + "image_output_ext": ".nii.gz", + "label_output_ext": ".nii.gz", + "spacing": [ + 0.75, + 0.75, + 1.0 + ], + "autoencoder_sliding_window_infer_size": [64,64,48], + "autoencoder_sliding_window_infer_overlap": 0.25, + "autoencoder_tp_num_splits": 2 +} diff --git a/generation/maisi/configs/config_infer_32g_512x512x512.json b/generation/maisi/configs/config_infer_32g_512x512x512.json new file mode 100644 index 000000000..5dcbcacbe --- /dev/null +++ b/generation/maisi/configs/config_infer_32g_512x512x512.json @@ -0,0 +1,23 @@ +{ + "num_output_samples": 1, + "body_region": ["abdomen"], + "anatomy_list": ["liver","hepatic tumor"], + "controllable_anatomy_size": [], + "num_inference_steps": 1000, + "mask_generation_num_inference_steps": 1000, + "output_size": [ + 512, + 512, + 512 + ], + "image_output_ext": ".nii.gz", + "label_output_ext": ".nii.gz", + "spacing": [ + 0.75, + 0.75, + 1.0 + ], + "autoencoder_sliding_window_infer_size": [64,64,64], + "autoencoder_sliding_window_infer_overlap": 0.25, + "autoencoder_tp_num_splits": 2 +} diff --git a/generation/maisi/configs/config_infer_80g_512x512x128.json b/generation/maisi/configs/config_infer_80g_512x512x128.json new file mode 100644 index 000000000..d20dbbc76 --- /dev/null +++ b/generation/maisi/configs/config_infer_80g_512x512x128.json @@ -0,0 +1,23 @@ +{ + "num_output_samples": 1, + "body_region": ["abdomen"], + "anatomy_list": ["liver","hepatic tumor"], + "controllable_anatomy_size": [], + "num_inference_steps": 1000, + "mask_generation_num_inference_steps": 1000, + "output_size": [ + 512, + 512, + 128 + ], + "image_output_ext": ".nii.gz", + "label_output_ext": ".nii.gz", + "spacing": [ + 0.75, + 0.75, + 4.0 + ], + "autoencoder_sliding_window_infer_size": [128,128,32], + "autoencoder_sliding_window_infer_overlap": 0.25, + "autoencoder_tp_num_splits": 4 +} diff --git a/generation/maisi/configs/config_infer_80g_512x512x512.json b/generation/maisi/configs/config_infer_80g_512x512x512.json new file mode 100644 index 000000000..bfcd6b7dc --- /dev/null +++ b/generation/maisi/configs/config_infer_80g_512x512x512.json @@ -0,0 +1,23 @@ +{ + "num_output_samples": 1, + "body_region": ["abdomen"], + "anatomy_list": ["liver","hepatic tumor"], + "controllable_anatomy_size": [], + "num_inference_steps": 1000, + "mask_generation_num_inference_steps": 1000, + "output_size": [ + 512, + 512, + 512 + ], + "image_output_ext": ".nii.gz", + "label_output_ext": ".nii.gz", + "spacing": [ + 0.75, + 0.75, + 1.0 + ], + "autoencoder_sliding_window_infer_size": [80,80,80], + "autoencoder_sliding_window_infer_overlap": 0.25, + "autoencoder_tp_num_splits": 2 +} diff --git a/generation/maisi/configs/config_infer_80g_512x512x768.json b/generation/maisi/configs/config_infer_80g_512x512x768.json new file mode 100644 index 000000000..9cb7e61b6 --- /dev/null +++ b/generation/maisi/configs/config_infer_80g_512x512x768.json @@ -0,0 +1,23 @@ +{ + "num_output_samples": 1, + "body_region": ["abdomen"], + "anatomy_list": ["liver","hepatic tumor"], + "controllable_anatomy_size": [], + "num_inference_steps": 1000, + "mask_generation_num_inference_steps": 1000, + "output_size": [ + 512, + 512, + 768 + ], + "image_output_ext": ".nii.gz", + "label_output_ext": ".nii.gz", + "spacing": [ + 0.75, + 0.75, + 0.66667 + ], + "autoencoder_sliding_window_infer_size": [80,80,112], + "autoencoder_sliding_window_infer_overlap": 0.25, + "autoencoder_tp_num_splits": 4 +} diff --git a/generation/maisi/scripts/inference.py b/generation/maisi/scripts/inference.py index ffd96a5e8..8220f200c 100644 --- a/generation/maisi/scripts/inference.py +++ b/generation/maisi/scripts/inference.py @@ -132,6 +132,10 @@ def main(): # check the format of inference inputs config_infer_dict = json.load(open(args.inference_file, "r")) + # override num_split if asked + if "autoencoder_tp_num_splits" in config_infer_dict: + args.autoencoder_def["num_splits"] = config_infer_dict["autoencoder_tp_num_splits"] + args.mask_generation_autoencoder_def["num_splits"] = config_infer_dict["autoencoder_tp_num_splits"] for k, v in config_infer_dict.items(): setattr(args, k, v) print(f"{k}: {v}") @@ -225,4 +229,7 @@ def main(): format="[%(asctime)s.%(msecs)03d][%(levelname)5s](%(name)s) - %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) + torch.cuda.reset_peak_memory_stats() main() + peak_memory_gb = torch.cuda.max_memory_allocated() / (1024**3) # Convert to GB + print(f"Peak GPU memory usage: {peak_memory_gb:.2f} GB") diff --git a/generation/maisi/scripts/sample.py b/generation/maisi/scripts/sample.py index 49b5d591d..c1e2c8699 100644 --- a/generation/maisi/scripts/sample.py +++ b/generation/maisi/scripts/sample.py @@ -132,7 +132,7 @@ def ldm_conditional_sample_one_mask( conditioning=anatomy_size.to(device), ) # decode latents to synthesized masks - if math.prod(latent_shape[1:]) < math.prod(autoencoder_sliding_window_infer_size): + if math.prod(latent_shape[1:]) <= math.prod(autoencoder_sliding_window_infer_size): synthetic_mask = recon_model(latents).cpu().detach() else: synthetic_mask = ( @@ -274,15 +274,15 @@ def ldm_conditional_sample_one_image( # decode latents to synthesized images logging.info("---- Start decoding latent features into images... ----") start_time = time.time() - if math.prod(latent_shape[1:]) < math.prod(autoencoder_sliding_window_infer_size): + if math.prod(latent_shape[1:]) <= math.prod(autoencoder_sliding_window_infer_size): synthetic_images = recon_model(latents) else: synthetic_images = sliding_window_inference( inputs=latents, roi_size=( - min(output_size[0] // 4 // 4 * 3, autoencoder_sliding_window_infer_size[0]), - min(output_size[1] // 4 // 4 * 3, autoencoder_sliding_window_infer_size[1]), - min(output_size[2] // 4 // 4 * 3, autoencoder_sliding_window_infer_size[2]), + min(output_size[0] // 4, autoencoder_sliding_window_infer_size[0]), + min(output_size[1] // 4, autoencoder_sliding_window_infer_size[1]), + min(output_size[2] // 4, autoencoder_sliding_window_infer_size[2]), ), sw_batch_size=1, predictor=recon_model, diff --git a/modules/omniverse/Spleen.png b/modules/omniverse/Spleen.png new file mode 100644 index 000000000..b3b60adfa Binary files /dev/null and b/modules/omniverse/Spleen.png differ diff --git a/modules/omniverse/omniverse.png b/modules/omniverse/omniverse.png new file mode 100644 index 000000000..858cb137a Binary files /dev/null and b/modules/omniverse/omniverse.png differ diff --git a/modules/omniverse/omniverse_integration.ipynb b/modules/omniverse/omniverse_integration.ipynb new file mode 100644 index 000000000..8ba6082f5 --- /dev/null +++ b/modules/omniverse/omniverse_integration.ipynb @@ -0,0 +1,605 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright (c) MONAI Consortium \n", + "Licensed under the Apache License, Version 2.0 (the \"License\"); \n", + "you may not use this file except in compliance with the License. \n", + "You may obtain a copy of the License at \n", + " http://www.apache.org/licenses/LICENSE-2.0 \n", + "Unless required by applicable law or agreed to in writing, software \n", + "distributed under the License is distributed on an \"AS IS\" BASIS, \n", + "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. \n", + "See the License for the specific language governing permissions and \n", + "limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# From 3D Segmentation to USD: A Complete Workflow for Hierarchical Mesh Conversion, USD Export, and NVIDIA Omniverse Integration\n", + "In this tutorial, we’ll cover:\n", + "\n", + "- **Download the MONAI Bundle**: First, we download a pre-trained model bundle from the MONAI Model Zoo. This model bundle contains the necessary models and configurations for medical image analysis, which can accelerate our development process.\n", + "\n", + "- **Run the Inference Workflow of the Bundle**: Using the downloaded model bundle, we run its built-in inference workflow to automatically segment or analyze the input medical imaging data and obtain the desired results.\n", + "\n", + "- **Convert NIfTI/DICOM to Mesh**: The inference results are usually in NIfTI or DICOM format. We need to convert this volumetric data into a 3D mesh model for visualization and further processing.\n", + "\n", + "- **Save the Mesh as OBJ and GLTF Formats**:\n", + " - Save Single Mesh as OBJ Format: For a single organ or structure mesh, we save it in OBJ format, which is convenient to open and view in various 3D software.\n", + " - Save Combined Mesh as GLTF Format: When we have meshes of multiple organs, we save them in GLTF format. This format preserves the hierarchical structure of the organs, making it easier to reflect the relationships between different organs during visualization.\n", + "\n", + "- **Visualization in the Omniverse**: Finally, we import the GLTF format mesh into NVIDIA Omniverse. In Omniverse, we can utilize its powerful rendering and interactive capabilities to perform high-quality 3D visualization of medical imaging data, exploring the structures and spatial relationships of organs.\n", + "\n", + "This end-to-end process enables efficient, high-quality visualization in NVIDIA Omniverse from raw segmentation data.\n", + "\n", + "\n", + "References:\n", + "\n", + "[1] https://developer.nvidia.com/blog/advancing-surgical-robotics-with-ai-driven-simulation-and-digital-twin-technology/" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup environment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!python -c \"import monai\" || pip install -q \"monai-weekly[nibabel]\"\n", + "!python -c \"import vtk\" || pip install -q vtk\n", + "!python -c \"import pxr\" || pip install -q usd-core\n", + "!python -c \"import trimesh\" || pip install -q trimesh\n", + "!python -c \"import ipyvtklink\" || pip install -q ipyvtklink\n", + "!apt update\n", + "!apt install -y libgl1-mesa-glx\n", + "!apt install libxrender1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MONAI version: 1.4.1rc1\n", + "Numpy version: 1.24.4\n", + "Pytorch version: 2.5.0a0+872d972e41.nv24.08\n", + "MONAI flags: HAS_EXT = False, USE_COMPILED = False, USE_META_DICT = False\n", + "MONAI rev id: e604d1841fe60c0ffb6978ae4116535ca8d8f34f\n", + "MONAI __file__: /workspace/Code/MONAI/monai/__init__.py\n", + "\n", + "Optional dependencies:\n", + "Pytorch Ignite version: 0.4.11\n", + "ITK version: 5.4.0\n", + "Nibabel version: 5.3.2\n", + "scikit-image version: 0.24.0\n", + "scipy version: 1.14.0\n", + "Pillow version: 10.4.0\n", + "Tensorboard version: 2.16.2\n", + "gdown version: 5.2.0\n", + "TorchVision version: 0.20.0a0\n", + "tqdm version: 4.66.5\n", + "lmdb version: 1.5.1\n", + "psutil version: 6.0.0\n", + "pandas version: 2.2.2\n", + "einops version: 0.8.0\n", + "transformers version: 4.40.2\n", + "mlflow version: 2.17.2\n", + "pynrrd version: 1.1.1\n", + "clearml version: 1.16.5\n", + "\n", + "For details about installing the optional dependencies, please visit:\n", + " https://docs.monai.io/en/latest/installation.html#installing-the-recommended-dependencies\n", + "\n" + ] + } + ], + "source": [ + "import os\n", + "import tempfile\n", + "import numpy as np\n", + "\n", + "import vtk\n", + "import vtkmodules\n", + "\n", + "# from ipyvtklink.viewer import ViewInteractiveWidget\n", + "\n", + "from utility import convert_to_mesh, convert_mesh_to_usd\n", + "\n", + "from monai.config import print_config\n", + "from monai.bundle.scripts import create_workflow, download\n", + "from monai.transforms import LoadImaged, SaveImage, Compose, BorderPadd, SqueezeDimd\n", + "\n", + "print_config()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup data directory\n", + "\n", + "You can specify a directory with the `MONAI_DATA_DIRECTORY` environment variable. \n", + "This allows you to save results and reuse downloads. \n", + "If not specified a temporary directory will be used." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/workspace/Data\n" + ] + } + ], + "source": [ + "directory = os.environ.get(\"MONAI_DATA_DIRECTORY\")\n", + "if directory is not None:\n", + " os.makedirs(directory, exist_ok=True)\n", + "root_dir = tempfile.mkdtemp() if directory is None else directory\n", + "print(root_dir)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Generate segmentation from MAISI\n", + "\n", + "### Download the MONAI Bundle\n", + "In this section, we download the MAISI bundle from monai model-zoo." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-12-11 11:31:31,904 - INFO - --- input summary of monai.bundle.scripts.download ---\n", + "2024-12-11 11:31:31,905 - INFO - > name: 'maisi_ct_generative'\n", + "2024-12-11 11:31:31,905 - INFO - > bundle_dir: '/workspace/Data'\n", + "2024-12-11 11:31:31,905 - INFO - > source: 'monaihosting'\n", + "2024-12-11 11:31:31,906 - INFO - > remove_prefix: 'monai_'\n", + "2024-12-11 11:31:31,906 - INFO - > progress: True\n", + "2024-12-11 11:31:31,906 - INFO - ---\n", + "\n", + "\n", + "2024-12-11 11:31:32,611 - INFO - Expected md5 is None, skip md5 check for file /workspace/Data/maisi_ct_generative_v0.4.5.zip.\n", + "2024-12-11 11:31:32,612 - INFO - File exists: /workspace/Data/maisi_ct_generative_v0.4.5.zip, skipped downloading.\n", + "2024-12-11 11:31:32,612 - INFO - Writing into directory: /workspace/Data.\n" + ] + } + ], + "source": [ + "download(name=\"maisi_ct_generative\", bundle_dir=root_dir)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run the Inference Workflow of the Bundle\n", + "We use the `create_workflow` API from MONAI to streamline the inference process directly from the bundle.\n", + "\n", + "Key input details for inference, such as the body region and target anatomy, are specified in [./configs/inference.json]. For a comprehensive explanation of the parameters, refer to [./docs/README.md] in the bundle directory. Additionally, we adjust the `output_size`, `spacing` and `num_splits` parameters to prevent out-of-memory issues during inference." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-12-11 11:32:14,010 - INFO - Setting logging properties based on config: /workspace/Data/maisi_ct_generative/configs/logging.conf.\n", + "2024-12-11 11:32:14,011 - py.warnings - WARNING - Detected deprecated name 'optional_packages_version' in configuration file, replacing with 'required_packages_version'.\n", + "\n", + "2024-12-11 11:32:14,021 - INFO - --- input summary of monai.bundle.scripts.run ---\n", + "2024-12-11 11:32:14,022 - INFO - > workflow_type: 'inference'\n", + "2024-12-11 11:32:14,022 - INFO - > bundle_root: '/workspace/Data/maisi_ct_generative'\n", + "2024-12-11 11:32:14,022 - INFO - > output_size_xy: 256\n", + "2024-12-11 11:32:14,022 - INFO - > output_size_z: 256\n", + "2024-12-11 11:32:14,022 - INFO - > spacing_xy: 1.5\n", + "2024-12-11 11:32:14,023 - INFO - > spacing_z: 1.5\n", + "2024-12-11 11:32:14,023 - INFO - > autoencoder_def#num_splits: 16\n", + "2024-12-11 11:32:14,023 - INFO - > mask_generation_autoencoder_def#num_splits: 16\n", + "2024-12-11 11:32:14,023 - INFO - ---\n", + "\n", + "\n" + ] + } + ], + "source": [ + "bundle_root = os.path.join(root_dir, \"maisi_ct_generative\")\n", + "override = {\n", + " \"output_size_xy\": 256,\n", + " \"output_size_z\": 256,\n", + " \"spacing_xy\": 1.5,\n", + " \"spacing_z\": 1.5,\n", + " \"autoencoder_def#num_splits\": 16,\n", + " \"mask_generation_autoencoder_def#num_splits\": 16,\n", + "}\n", + "workflow = create_workflow(\n", + " config_file=os.path.join(bundle_root, \"configs/inference.json\"),\n", + " workflow_type=\"inference\",\n", + " bundle_root=bundle_root,\n", + " **override,\n", + ")\n", + "\n", + "# uncomment this line to run the inference workflow\n", + "# then you will get the generated CT images and paired masks which can be used for the following steps.\n", + "# In this tutorial, we just use the tested data (IntegrationTest-AbdomenCT.nii.gz) from bundle for demonstration.\n", + "# workflow.run()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Convert NIfTI/DICOM to Mesh and Save as OBJ/GLTF\n", + "\n", + "In this section, we convert the generated NII segmentation files into mesh format and save them as OBJ files. This process involves mapping labels to organs, exporting each organ as an individual mesh file, and generating a combined mesh file for all organs.\n", + "\n", + "We define a function `nii_to_mesh` to handle the conversion of NIfTI files to OBJ files. The workflow is as follows:\n", + "\n", + "- **Preprocessing**:\n", + "The function uses a series of transformations (`LoadImaged`, `BorderPadd`, and `SqueezeDimd`) to load and preprocess the input NIfTI file, ensuring it is ready for convertion.\n", + "\n", + "- **Organ Label Mapping**:\n", + "It iterates over a dictionary mapping organ names to their respective label values. For each organ:\n", + "A binary mask (single_organ) is created to isolate the organ by assigning its corresponding label value.\n", + "The segmented organ is saved as a NIfTI file.\n", + "\n", + "- **Mesh Conversion**:\n", + "Each segmented NIfTI file is converted into an OBJ file using the `convert_to_mesh` function.\n", + "\n", + "- **Combined Mesh**:\n", + "A combined segmentation file is created by merging all organ segmentations into a single NIfTI file. This file is then converted into a GLTF file, preserving the hierarchical structure of the organs.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# 17 groupings that cover 101 segments/regions out of 140\n", + "labels = {\n", + " \"Liver\": 1,\n", + " \"Spleen\": 3,\n", + " \"Pancreas\": 4,\n", + " \"Heart\": 115,\n", + " \"Body\": 200,\n", + " \"Gallbladder\": 10,\n", + " \"Stomach\": 12,\n", + " \"Small_bowel\": 19,\n", + " \"Colon\": 62,\n", + " \"Kidney\": {\"right_kidney\": 5, \"left_kidney\": 14},\n", + " \"Veins\": {\n", + " \"aorta\": 6,\n", + " \"inferior_vena_cava\": 7,\n", + " \"portal_vein_and_splenic_vein\": 17,\n", + " \"left_iliac_artery\": 58,\n", + " \"right_iliac_artery\": 59,\n", + " \"left_iliac_vena\": 60,\n", + " \"right_iliac_vena\": 61,\n", + " \"pulmonary_vein\": 119,\n", + " \"left_subclavian_artery\": 123,\n", + " \"right_subclavian_artery\": 124,\n", + " \"superior_vena_cava\": 125,\n", + " \"brachiocephalic_trunk\": 109,\n", + " \"left_brachiocephalic_vein\": 110,\n", + " \"right_brachiocephalic_vein\": 111,\n", + " \"left_common_carotid_artery\": 112,\n", + " \"right_common_carotid_artery\": 113,\n", + " },\n", + " \"Lungs\": {\n", + " \"left_lung_upper_lobe\": 28,\n", + " \"left_lung_lower_lobe\": 29,\n", + " \"right_lung_upper_lobe\": 30,\n", + " \"right_lung_middle_lobe\": 31,\n", + " \"right_lung_lower_lobe\": 32,\n", + " },\n", + " \"Spine\": {\n", + " \"vertebrae_L6\": 131,\n", + " \"vertebrae_L5\": 33,\n", + " \"vertebrae_L4\": 34,\n", + " \"vertebrae_L3\": 35,\n", + " \"vertebrae_L2\": 36,\n", + " \"vertebrae_L1\": 37,\n", + " \"vertebrae_T12\": 38,\n", + " \"vertebrae_T11\": 39,\n", + " \"vertebrae_T10\": 40,\n", + " \"vertebrae_T9\": 41,\n", + " \"vertebrae_T8\": 42,\n", + " \"vertebrae_T7\": 43,\n", + " \"vertebrae_T6\": 44,\n", + " \"vertebrae_T5\": 45,\n", + " \"vertebrae_T4\": 46,\n", + " \"vertebrae_T3\": 47,\n", + " \"vertebrae_T2\": 48,\n", + " \"vertebrae_T1\": 49,\n", + " \"vertebrae_C7\": 50,\n", + " \"vertebrae_C6\": 51,\n", + " \"vertebrae_C5\": 52,\n", + " \"vertebrae_C4\": 53,\n", + " \"vertebrae_C3\": 54,\n", + " \"vertebrae_C2\": 55,\n", + " \"vertebrae_C1\": 56,\n", + " \"sacrum\": 97,\n", + " \"vertebrae_S1\": 127,\n", + " },\n", + " \"Ribs\": {\n", + " \"left_rib_1\": 63,\n", + " \"left_rib_2\": 64,\n", + " \"left_rib_3\": 65,\n", + " \"left_rib_4\": 66,\n", + " \"left_rib_5\": 67,\n", + " \"left_rib_6\": 68,\n", + " \"left_rib_7\": 69,\n", + " \"left_rib_8\": 70,\n", + " \"left_rib_9\": 71,\n", + " \"left_rib_10\": 72,\n", + " \"left_rib_11\": 73,\n", + " \"left_rib_12\": 74,\n", + " \"right_rib_1\": 75,\n", + " \"right_rib_2\": 76,\n", + " \"right_rib_3\": 77,\n", + " \"right_rib_4\": 78,\n", + " \"right_rib_5\": 79,\n", + " \"right_rib_6\": 80,\n", + " \"right_rib_7\": 81,\n", + " \"right_rib_8\": 82,\n", + " \"right_rib_9\": 83,\n", + " \"right_rib_10\": 84,\n", + " \"right_rib_11\": 85,\n", + " \"right_rib_12\": 86,\n", + " \"costal_cartilages\": 114,\n", + " \"sternum\": 122,\n", + " },\n", + " \"Shoulders\": {\"left_scapula\": 89, \"right_scapula\": 90, \"left_clavicula\": 91, \"right_clavicula\": 92},\n", + " \"Hips\": {\"left_hip\": 95, \"right_hip\": 96},\n", + " \"Back_muscles\": {\n", + " \"left_gluteus_maximus\": 98,\n", + " \"right_gluteus_maximus\": 99,\n", + " \"left_gluteus_medius\": 100,\n", + " \"right_gluteus_medius\": 101,\n", + " \"left_gluteus_minimus\": 102,\n", + " \"right_gluteus_minimus\": 103,\n", + " \"left_autochthon\": 104,\n", + " \"right_autochthon\": 105,\n", + " \"left_iliopsoas\": 106,\n", + " \"right_iliopsoas\": 107,\n", + " },\n", + "}\n", + "\n", + "\n", + "def nii_to_mesh(input_nii_path, output_nii_path, output_obj_path):\n", + " \"\"\"\n", + " This function converts each organ into a separate OBJ file and generates a GLTF file\n", + " containing all organs with hierarchical structure.\n", + " It processes the input NIfTI file and groups 140 labels into 17 categories.\n", + "\n", + " Args:\n", + " input_nii_path: path to the nii file\n", + " output_nii_path: path to save the obj files\n", + " output_obj_path: path to save the gltf file\n", + " \"\"\"\n", + " if not os.path.exists(output_nii_path):\n", + " os.makedirs(output_nii_path)\n", + " pre_trans = Compose(\n", + " [\n", + " LoadImaged(keys=\"label\", ensure_channel_first=True),\n", + " BorderPadd(keys=\"label\", spatial_border=2),\n", + " SqueezeDimd(keys=\"label\", dim=0),\n", + " ]\n", + " )\n", + " orig_seg = pre_trans({\"label\": input_nii_path})[\"label\"]\n", + " all_organ = np.zeros_like(orig_seg, dtype=np.uint8)\n", + " all_label_values = {}\n", + "\n", + " save_trans = SaveImage(output_ext=\"nii.gz\", output_dtype=np.uint8)\n", + " for j, (organ_name, label_val) in enumerate(labels.items(), start=1):\n", + " single_organ = np.zeros_like(orig_seg, dtype=np.uint8)\n", + " print(f\"Assigning index {j} to label {organ_name}\")\n", + " if isinstance(label_val, dict):\n", + " for _, i in label_val.items():\n", + " all_organ[orig_seg == i] = j\n", + " single_organ[orig_seg == i] = j\n", + " else:\n", + " all_organ[orig_seg == label_val] = j\n", + " single_organ[orig_seg == label_val] = j\n", + " organ_filename = os.path.join(output_nii_path, organ_name)\n", + " save_trans(single_organ[None], meta_data=orig_seg.meta, filename=organ_filename)\n", + " convert_to_mesh(\n", + " f\"{organ_filename}.nii.gz\",\n", + " output_obj_path,\n", + " f\"{organ_name}.obj\",\n", + " label_value=j,\n", + " smoothing_factor=0.5,\n", + " reduction_ratio=0.0,\n", + " )\n", + " all_label_values[j] = organ_name\n", + "\n", + " all_organ_filename = os.path.join(output_nii_path, \"all_organs\")\n", + " save_trans(all_organ[None], meta_data=orig_seg.meta, filename=all_organ_filename)\n", + " convert_to_mesh(\n", + " f\"{all_organ_filename}.nii.gz\",\n", + " output_obj_path,\n", + " \"all_organs.gltf\",\n", + " label_value=all_label_values,\n", + " smoothing_factor=0.6,\n", + " reduction_ratio=0.0,\n", + " )\n", + " print(f\"Saved whole segmentation {all_organ_filename}\")\n", + "\n", + "\n", + "input_nii_path = f\"{bundle_root}/datasets/IntegrationTest-AbdomenCT.nii.gz\"\n", + "output_nii_path = f\"{bundle_root}/datasets/monai/nii\"\n", + "output_obj_path = f\"{bundle_root}/datasets/monai/obj\"\n", + "out = nii_to_mesh(input_nii_path, output_nii_path, output_obj_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Convert 3D model contain all organs to USD format\n", + "\n", + "[Universal Scene Description (OpenUSD)](https://openusd.org/release/index.html) is an extensible ecosystem of file formats, compositors, renderers, and other plugins for comprehensive 3D scene description." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "USD file successfully exported to /workspace/Data/maisi_ct_generative/datasets/monai/obj/all_organs.usd\n" + ] + } + ], + "source": [ + "obj_filename = f\"{bundle_root}/datasets/monai/obj/all_organs.gltf\"\n", + "usd_filename = f\"{bundle_root}/datasets/monai/obj/all_organs.usd\"\n", + "\n", + "convert_mesh_to_usd(obj_filename, usd_filename)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualize one single organ mesh\n", + "\n", + "Here we randomly select one organ to visualize the mesh using `ViewInteractiveWidget`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Step 1: Read the mesh\n", + "reader = vtk.vtkOBJReader()\n", + "reader.SetFileName(f\"{bundle_root}/datasets/monai/obj/Spleen.obj\")\n", + "reader.Update()\n", + "\n", + "# Step 2: Create a mapper\n", + "mapper = vtkmodules.vtkRenderingCore.vtkPolyDataMapper()\n", + "mapper.SetInputData(reader.GetOutput())\n", + "print(f\"Number of Points: {mapper.GetInput().GetNumberOfPoints()}\")\n", + "print(f\"Number of Cells: {mapper.GetInput().GetNumberOfCells()}\")\n", + "\n", + "# Step 3: Create an actor\n", + "actor = vtkmodules.vtkRenderingCore.vtkActor()\n", + "actor.SetMapper(mapper)\n", + "\n", + "# Step 4: Create a renderer\n", + "renderer = vtk.vtkRenderer()\n", + "renderer.AddActor(actor)\n", + "render_window = vtk.vtkRenderWindow()\n", + "render_window.AddRenderer(renderer)\n", + "render_window.SetSize(800, 600)\n", + "render_window.SetOffScreenRendering(1)\n", + "\n", + "# Step 5: Create a render window interactor\n", + "render_window_interactor = vtk.vtkRenderWindowInteractor()\n", + "render_window_interactor.SetRenderWindow(render_window)\n", + "interactor_style = vtk.vtkInteractorStyleTrackballCamera()\n", + "render_window_interactor.SetInteractorStyle(interactor_style)\n", + "\n", + "# Uncomment the following line to display the interactive widget\n", + "# render_window.Render()\n", + "# interactive_widget = ViewInteractiveWidget(render_window)\n", + "# interactive_widget" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Spleen](Spleen.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualization in the Omniverse\n", + "\n", + "Download the [NVIDIA Omniverse](https://www.nvidia.com/en-us/omniverse/) launcher to explore applications such as USD Composer for viewing and manipulating the OpenUSD output file.\n", + "\n", + "![omniverse](./omniverse.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualization of All Organs\n", + "\n", + "You can view the 3D models using online viewer such as https://3dviewer.net/#\n", + "\n", + "![all organs](result.png)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/modules/omniverse/result.png b/modules/omniverse/result.png new file mode 100644 index 000000000..14925c25e Binary files /dev/null and b/modules/omniverse/result.png differ diff --git a/modules/omniverse/utility.py b/modules/omniverse/utility.py new file mode 100644 index 000000000..96e18d95f --- /dev/null +++ b/modules/omniverse/utility.py @@ -0,0 +1,228 @@ +# Copyright (c) MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import os +import vtk +import json +from pxr import Usd, UsdGeom, Gf, Sdf +import trimesh +import numpy as np +import matplotlib.pyplot as plt + + +def convert_to_mesh( + segmentation_path, output_folder, filename, label_value=1, smoothing_factor=0.5, reduction_ratio=0.0 +): + """ + Function to perform segmentation-to-mesh conversion and smoothing + """ + if not os.path.exists(output_folder): + os.makedirs(output_folder) + + # Generate 16 distinct colors using a colormap + colormap = plt.get_cmap("tab20") # Using tab20 for distinct colors + colors = [colormap(i) for i in np.linspace(0, 1, 16)] + + # Step 1: Load segmentation (binary labelmap, e.g., NRRD file) + reader = vtk.vtkNIFTIImageReader() + reader.SetFileName(segmentation_path) + reader.Update() + + label_values = {label_value: None} if isinstance(label_value, int) else label_value + if len(label_values.keys()) > 1: + renderer = vtk.vtkRenderer() + render_window = vtk.vtkRenderWindow() + render_window.AddRenderer(renderer) + actor_metadata = {} + for i, name in label_values.items(): + # Step 2: Create Closed Surface Representation using vtkDiscreteFlyingEdges3D + flying_edges = vtk.vtkDiscreteFlyingEdges3D() + flying_edges.SetInputConnection(reader.GetOutputPort()) + flying_edges.ComputeGradientsOff() + flying_edges.ComputeNormalsOff() + flying_edges.SetValue(0, i) + flying_edges.Update() + + if flying_edges.GetOutput().GetNumberOfPoints() == 0: + print(f"No points found for label {i}. Skipping...") + continue + + # Step 3: Decimate the mesh + if reduction_ratio > 0.0: + decimation_filter = vtk.vtkDecimatePro() + decimation_filter.SetInputConnection(flying_edges.GetOutputPort()) + decimation_filter.SetFeatureAngle(60) + decimation_filter.SplittingOff() + decimation_filter.PreserveTopologyOn() + decimation_filter.SetMaximumError(1) + decimation_filter.SetTargetReduction(reduction_ratio) + decimation_filter.Update() + + # Step 4: Smooth the resulting mesh + smoothing_filter = vtk.vtkWindowedSincPolyDataFilter() + numberOfIterations = int(20 + smoothing_factor * 40) + passBand = pow(10.0, -4.0 * smoothing_factor) + if reduction_ratio > 0.0: + smoothing_filter.SetInputConnection(decimation_filter.GetOutputPort()) + else: + smoothing_filter.SetInputConnection(flying_edges.GetOutputPort()) + smoothing_filter.SetNumberOfIterations(numberOfIterations) + smoothing_filter.SetPassBand(passBand) + smoothing_filter.BoundarySmoothingOff() + smoothing_filter.FeatureEdgeSmoothingOff() + smoothing_filter.NonManifoldSmoothingOn() + smoothing_filter.NormalizeCoordinatesOn() + smoothing_filter.Update() + + # Step 5: Decimate the mesh further + decimation = vtk.vtkQuadricDecimation() + decimation.SetInputConnection(smoothing_filter.GetOutputPort()) + decimation.SetTargetReduction(0.9) # 90% reduction, the same as slicer + decimation.VolumePreservationOn() + decimation.Update() + + # Step 6: Generate normals for better shading + decimatedNormals = vtk.vtkPolyDataNormals() + decimatedNormals.SetInputConnection(decimation.GetOutputPort()) + decimatedNormals.SplittingOff() + decimatedNormals.ConsistencyOn() + decimatedNormals.Update() + + # Step 7: convert to LPS + ras2lps = vtk.vtkMatrix4x4() + ras2lps.SetElement(0, 0, -1) + ras2lps.SetElement(1, 1, -1) + ras2lpsTransform = vtk.vtkTransform() + ras2lpsTransform.SetMatrix(ras2lps) + transformer = vtk.vtkTransformPolyDataFilter() + transformer.SetTransform(ras2lpsTransform) + transformer.SetInputConnection(decimatedNormals.GetOutputPort()) + transformer.Update() + + if len(label_values.keys()) > 1: + mapper = vtk.vtkPolyDataMapper() + mapper.SetInputData(transformer.GetOutput()) + actor = vtk.vtkActor() + actor.SetMapper(mapper) + colorRGB = colors[i - 1][:3] if i < 15 else colors[i - 2][:3] + colorHSV = [0, 0, 0] + vtk.vtkMath.RGBToHSV(colorRGB, colorHSV) + colorHSV[1] = min(colorHSV[1] * 1.5, 1.0) # increase saturation + colorHSV[2] = min(colorHSV[2] * 1.0, 1.0) # increase brightness + colorRGB = [0, 0, 0] + vtk.vtkMath.HSVToRGB(colorHSV, colorRGB) + actor.GetProperty().SetColor(colorRGB[0], colorRGB[1], colorRGB[2]) + actor.GetProperty().SetInterpolationToGouraud() + actor_metadata[actor] = name + renderer.AddActor(actor) + + output_filename = os.path.join(output_folder, filename) + if len(label_values.keys()) > 1: + exporter = vtk.vtkGLTFExporter() + exporter.SetFileName(output_filename) + exporter.SetRenderWindow(render_window) + exporter.SetInlineData(True) + exporter.Write() + else: + if flying_edges.GetOutput().GetNumberOfPoints() > 0: + polydata = transformer.GetOutput() + writer = vtk.vtkOBJWriter() + writer.SetFileName(output_filename) + writer.SetInputData(polydata) + writer.Write() + + print(f"Mesh successfully exported to {output_filename}") + + if len(label_values.keys()) > 1: + # Modify GLTF to include actor names + with open(output_filename, "r") as f: + gltf_data = json.load(f) + + # Iterate over actors and add names to GLTF nodes + actors = renderer.GetActors() + actors.InitTraversal() + + for i, node in enumerate(gltf_data.get("nodes", [])): + actor = actors.GetNextActor() + if actor in actor_metadata: + node["name"] = actor_metadata[actor] + + # Save the modified GLTF file + modified_output_filename = output_filename.replace(".gltf", "_modified.gltf") + with open(modified_output_filename, "w") as f: + json.dump(gltf_data, f, indent=2) + print(f"Modified GLTF successfully exported to {modified_output_filename}") + + +def convert_mesh_to_usd(input_file, output_file): + """ + convert a mesh file to USD format + """ + # Load the mesh + mesh = trimesh.load(input_file) + + # Create a new USD stage + stage = Usd.Stage.CreateNew(output_file) + + # If the mesh is a Scene, process each geometry + if isinstance(mesh, trimesh.Scene): + for name, geometry in mesh.geometry.items(): + # Create a unique path for each mesh + mesh_path = f"/{name}" + usd_mesh = UsdGeom.Mesh.Define(stage, mesh_path) + + # Set vertex positions + usd_mesh.GetPointsAttr().Set([Gf.Vec3f(*vertex) for vertex in geometry.vertices]) + + # Set face indices and counts + face_vertex_indices = geometry.faces.flatten().tolist() + face_vertex_counts = [len(face) for face in geometry.faces] + + usd_mesh.GetFaceVertexIndicesAttr().Set(face_vertex_indices) + usd_mesh.GetFaceVertexCountsAttr().Set(face_vertex_counts) + + # Optionally, set normals + if geometry.vertex_normals is not None: + usd_mesh.GetNormalsAttr().Set([Gf.Vec3f(*normal) for normal in geometry.vertex_normals]) + usd_mesh.SetNormalsInterpolation("vertex") + + # Handle materials and other attributes if needed + else: + # It's a single mesh, proceed as before + usd_mesh = UsdGeom.Mesh.Define(stage, "/Mesh") + + # Set vertex positions + usd_mesh.GetPointsAttr().Set([Gf.Vec3f(*vertex) for vertex in mesh.vertices]) + + # Set face indices and counts + face_vertex_indices = mesh.faces.flatten().tolist() + face_vertex_counts = [len(face) for face in mesh.faces] + + usd_mesh.GetFaceVertexIndicesAttr().Set(face_vertex_indices) + usd_mesh.GetFaceVertexCountsAttr().Set(face_vertex_counts) + + # Optionally, set normals + if mesh.vertex_normals is not None: + usd_mesh.GetNormalsAttr().Set([Gf.Vec3f(*normal) for normal in mesh.vertex_normals]) + usd_mesh.SetNormalsInterpolation("vertex") + + # Save the USD file + stage.GetRootLayer().Save() + print(f"USD file successfully exported to {output_file}") + + +if __name__ == "__main__": + input_file = "/workspace/Data/maisi_ct_generative/datasets/output_scene.gltf" # or "input.obj" + output_file = "/workspace/Code/tutorials/modules/omniverse/output_scene.usd" + + convert_mesh_to_usd(input_file, output_file) diff --git a/pathology/tumor_detection/ignite/profiling_camelyon_pipeline.ipynb b/pathology/tumor_detection/ignite/profiling_camelyon_pipeline.ipynb index 33b826838..25ec13eb8 100644 --- a/pathology/tumor_detection/ignite/profiling_camelyon_pipeline.ipynb +++ b/pathology/tumor_detection/ignite/profiling_camelyon_pipeline.ipynb @@ -159,7 +159,7 @@ "!nsys profile \\\n", " --trace nvtx,osrt,cudnn,cuda, \\\n", " --delay 5 \\\n", - " --duration 10 \\\n", + " --duration 20 \\\n", " --show-output true \\\n", " --force-overwrite true \\\n", " --output profile_report.nsys-rep \\\n", diff --git a/runner.sh b/runner.sh index e547e859d..07c9c07d7 100755 --- a/runner.sh +++ b/runner.sh @@ -81,6 +81,7 @@ doesnt_contain_max_epochs=("${doesnt_contain_max_epochs[@]}" bending_energy_diff doesnt_contain_max_epochs=("${doesnt_contain_max_epochs[@]}" mask_augmentation_example.ipynb) doesnt_contain_max_epochs=("${doesnt_contain_max_epochs[@]}" maisi_inference_tutorial.ipynb) doesnt_contain_max_epochs=("${doesnt_contain_max_epochs[@]}" realism_diversity_metrics.ipynb) +doesnt_contain_max_epochs=("${doesnt_contain_max_epochs[@]}" omniverse_integration.ipynb) # Execution of the notebook in these folders / with the filename cannot be automated skip_run_papermill=() @@ -127,6 +128,7 @@ skip_run_papermill=("${skip_run_papermill[@]}" .*nuclei_classification_infer.ipy skip_run_papermill=("${skip_run_papermill[@]}" .*nuclick_infer.ipynb*) # https://github.com/Project-MONAI/tutorials/issues/1542 skip_run_papermill=("${skip_run_papermill[@]}" .*unet_segmentation_3d_ignite_clearml.ipynb*) # https://github.com/Project-MONAI/tutorials/issues/1555 skip_run_papermill=("${skip_run_papermill[@]}" .*vista_2d_tutorial_monai.ipynb*) +skip_run_papermill=("${skip_run_papermill[@]}" .*learn2reg_oasis_unpaired_brain_mr.ipynb*) # output formatting separator=""