UWB-Biocomputing · AndrewBMadison · Jan 31, 2025 · Jan 31, 2025 · Feb 4, 2025 · Feb 7, 2025
diff --git a/.gitignore b/.gitignore
@@ -97,3 +97,6 @@ Testing/RegressionTesting/TestOutput/*.xml
 Testing/RegressionTesting/TestOutput/*.h5
 Testing/UnitTesting/TestOutput/*.xml
 Testing/UnitTesting/TestOutput/*.h5
+
+# Machine Specific build script
+build.sh
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -8,7 +8,7 @@ cmake_minimum_required(VERSION 3.12)
 #
 #You can also pass this flag when running cmake from the command line like this:
 #
-#cmake..- D ENABLE_CUDA = YES
+#cmake -D ENABLE_CUDA=YES ..
 #
 #"YES" / GPU choice only available if CUDA library is installed and the GPU is CUDA capable.
 ############################################################################################
@@ -21,17 +21,19 @@ if(NOT PERFORMANCE_METRICS)
         set(PERFORMANCE_METRICS NO)
 endif()
 
-#CONDITIONAL FLAG to turn on the Gprof profiler(                                                   \
-#   Gprof is a performance analysis tool for Unix applications)
-#Steps to run Gprof
-#Step 01 : set(GPROF YES) below
-#Step 02 : Compile and run the simulation on CPU or GPU as usual
-#Step 03 : Run the generated gmon.out file from the build directory and save the output in an txt  \
-#      file to improve readability                                                                  \
-#If using CPU - "~/Graphitti/build$ gprof cgraphitti gmon.out > analysis_test.txt"
-#If using GPU - "~/Graphitti/build$ gprof ggraphitti gmon.out > analysis_test.txt" 
-if(NOT GPROF)
-        set(GPROF NO)
+############################################################################################
+#CONDITIONAL FLAG to change target architecture for the GPU simulator from the default
+#
+#You can pass this flag when running cmake from the command line like this, setting TARGET_ARCH \
+#       to your desired architecture:                                                            \
+#
+#cmake -D ENABLE_CUDA=YES -D TARGET_ARCH=70 ..
+#
+#"YES" / GPU choice only available if CUDA library is installed and the GPU is CUDA capable.
+#If no TARGET_ARCH is passed in then it will default to 37 which is the kepler architecture
+############################################################################################
+if(NOT DEFINED TARGET_ARCH)
+        set(TARGET_ARCH 37)
 endif()
 
 #Creates the Graphitti project with the correct languages, depending on if using GPU or not
@@ -45,33 +47,109 @@ if(ENABLE_CUDA)
         add_compile_definitions(USE_GPU)
 #Specify the CUDA architecture / gencode that will be targeted
         ### Set gencode and architecture variables to the correct values for your specific NVIDIA hardware
-        set(CMAKE_CUDA_ARCHITECTURES 37)        
-        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode=arch=compute_37,code=sm_37)
+        set(CMAKE_CUDA_ARCHITECTURES ${TARGET_ARCH})
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode=arch=compute_${TARGET_ARCH},code=sm_${TARGET_ARCH})        
+        message(STATUS "Using CUDA architecture: ${TARGET_ARCH}")
 
 else()
         message("\n----Generating Makefile for Graphitti CPU version----")
         project(Graphitti LANGUAGES CXX C)
 endif()
 
+# -----------------------------------------------------------------------------
+# Build Type Configuration
+#
+# CMake support for different build types controling optimization, debugging and profiling:
+#
+#   - Debug         : No optimizations (`-O0`), includes debug symbols (`-g`).
+#   - Release       : Optimized build (`-O3`), removes debug symbols.
+#   - RelWithDebInfo: Optimized (`-O2`) but keeps debug symbols (`-g`) for profiling.
+#   - Profiling     : Custom build type (defined in this project) that enables:
+#                     - CPU profiling via `-pg` (GPROF)
+#                     - CUDA profiling via `-lineinfo` (for Nsight Compute)
+#
+# Selecting a Build Type:
+#   - By default, CMake does NOT set a build type for single-config generators.
+#   - If no build type is specified, this script defaults to "Release" for performance.
+#   - You can explicitly set the build type when configuring CMake:
+#
+#       cmake -S . -B build -DCMAKE_BUILD_TYPE=Debug      # Debug mode
+#       cmake -S . -B build -DCMAKE_BUILD_TYPE=Release    # Release mode
+#       cmake -S . -B build -DCMAKE_BUILD_TYPE=Profiling  # Profiling mode 
+#       
+#       If you don't want to pass in the build type flag, you can edit this file and add...
+#               set(CMAKE_BUILD_TYPE "Debug") or whichever build type you want
+# -----------------------------------------------------------------------------
+set(CMAKE_CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo;Profiling" CACHE STRING "Supported build types" FORCE)
+
+# Ensure single-config generators use a valid default
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the build type." FORCE)
+endif()
+
+# Set flags for all build types
+set(CMAKE_CXX_FLAGS_DEBUG "-g -O0")
+# We should consider using the -DNDEBUG flag for release code, it disables assert() calls and is higher performance
+set(CMAKE_CXX_FLAGS_RELEASE "-O3")
+set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g")
+
+# Define a custom build type: "Profiling"
+set(CMAKE_CXX_FLAGS_PROFILING "-pg -O2")
+set(CMAKE_EXE_LINKER_FLAGS_PROFILING "-pg")
+set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -pg")
+
+# Apply the correct flags based on the selected build type
+if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_DEBUG}")
+    if(ENABLE_CUDA)
+        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G")   
+    endif()
+elseif(CMAKE_BUILD_TYPE STREQUAL "Release")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_RELEASE}")
+    if(ENABLE_CUDA)
+        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O3")
+    endif()
+elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_RELWITHDEBINFO}")
+elseif(CMAKE_BUILD_TYPE STREQUAL "Profiling")
+    message(STATUS "Profiling build enabled: Adding -pg (GPROF)")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_PROFILING}")
+    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS_PROFILING}")
+    if(ENABLE_CUDA)
+        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo")   
+#       set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo -Xptxas=-v")   
+    endif()
+endif()
+
+
+#   Gprof is a performance analysis tool for Unix applications)
+#Steps to run Gprof
+#Step 01 : set build configuration to Profiling  ...   -DCMAKE_BUILD_TYPE=Profiling
+#Step 02 : Compile and run the simulation on CPU or GPU as usual
+#Step 03 : Run the generated gmon.out file from the build directory and save the output in an txt  \
+#      file to improve readability                                                                  \
+#If using CPU - "~/Graphitti/build$ gprof cgraphitti gmon.out > analysis_test.txt"
+#If using GPU - "~/Graphitti/build$ gprof ggraphitti gmon.out > analysis_test.txt" 
+
+
+# Print build type for verification
+message(STATUS "Build Type: ${CMAKE_BUILD_TYPE}")
+message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
+
+message(STATUS "ENABLE_CUDA: ${ENABLE_CUDA}")
+if(ENABLE_CUDA)
+        message(STATUS "CMAKE_CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}")
+endif()
+
+
 #Setting the base version to C++ 17
 set(CMAKE_CXX_STANDARD 17)
 
-#set(DEBUG_MODE YES) for debugging, no optimization
-#set(DEBUG_MODE NO) for production code, -O3 optimization enabled
-set(DEBUG_MODE NO)
-
 if(PERFORMANCE_METRICS)
         message("-- Setting PEREFORMANCE_METRICS: ON")
         add_definitions(-DPERFORMANCE_METRICS)
 endif()
 
-if(GPROF)
-        message("-- Setting GPROF: ON")
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pg")
-        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pg")
-        set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -pg")
-endif()
-
 #HDF5 Support, finds HDF5 package for C and C++ and links the hdf5 libraries to the executable     \
 #                  later in the file.
 find_package(HDF5 COMPONENTS C CXX)
@@ -116,11 +194,6 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
 #Set extra warning flags
 #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
 
-if (NOT DEBUG_MODE)
-        message("-- Setting Optimization flag: O3")
-        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
-endif()
-
 #define TIXML_USE_STL as a preproccersser macro to use the C++ standard library with TinyXML
 add_compile_definitions(TIXML_USE_STL)
 message("-- Setting Compile Definition: TIMXL_USE_STL")
@@ -282,9 +355,33 @@ add_library(RNG STATIC ${RNG_Source})
 
 
 # Create Utils library
-file(GLOB Utils_Source  Simulator/Utils/*.cpp Simulator/Utils/*.h)
+file(GLOB Utils_Source Simulator/Utils/*.cpp Simulator/Utils/*.h)
 list(REMOVE_ITEM Utils_Source "${CMAKE_CURRENT_SOURCE_DIR}/Simulator/Utils/Factory.cpp")
-add_library(Utils  ${Utils_Source})
+
+if(CMAKE_BUILD_TYPE STREQUAL "Profiling")
+        if(ENABLE_CUDA)
+# Find NVTX Library
+                find_library(NVTX_LIBRARY nvToolsExt)
+                if(NVTX_LIBRARY)
+                        message(STATUS "Found NVTX: ${NVTX_LIBRARY} included in Profiling")
+                        add_compile_definitions(ENABLE_NVTX) 
+                else()
+                        message(STATUS "NVTX library not found! Not included in Profiling.")
+                        list(REMOVE_ITEM Utils_Source "${CMAKE_CURRENT_SOURCE_DIR}/Simulator/Utils/NvtxHelper.cpp")
+                endif()
+        endif()
+
+else()
+        list(REMOVE_ITEM Utils_Source "${CMAKE_CURRENT_SOURCE_DIR}/Simulator/Utils/NvtxHelper.cpp")
+endif()
+
+# Always create the Utils library (even if NVTX and CUDA are missing)
+add_library(Utils ${Utils_Source})
+
+# Only link NVTX if it was found
+if(NVTX_LIBRARY)
+        target_link_libraries(Utils PRIVATE ${NVTX_LIBRARY})
+endif()
 
 
 # Used to locate and run other CMakeLists.txt files from Third Party resources for further compilation of the project.
@@ -352,6 +449,9 @@ endif()
 # ------ TESTS EXECUTABLE ------
 # Add the file that contains main (RunTests.cpp) and all test files. GoogleTest will only recognize them if they are
 # included in the executable.
+target_compile_options(gtest PRIVATE -Wno-error=maybe-uninitialized)
+target_compile_options(gtest_main PRIVATE -Wno-error=maybe-uninitialized)
+
 add_executable(tests
         Testing/RunTests.cpp
         Testing/UnitTesting/OperationManagerTests.cpp
@@ -426,3 +526,6 @@ target_link_libraries(serialSecondHalfTest combinedLib)
 unset(ENABLE_CUDA CACHE)
 unset(PERFORMANCE_METRICS CACHE)
 unset(GPROF CACHE)
+unset(CMAKE_BUILD_TYPE CACHE)
+unset(NVTX_LIBRARY CACHE)
+unset(TARGET_ARCH CACHE)
diff --git a/Simulator/Utils/NvtxHelper.cpp b/Simulator/Utils/NvtxHelper.cpp
@@ -0,0 +1,31 @@
+/**
+ * @file NvtxHelper.cpp
+ * 
+ * @ingroup Simulator/Utils
+ * 
+ * @brief Helper functions to enable nvtx profiling
+ * When ENABLE_NVTX is false the functions are replaced with blank inline functions which are removed by the compiler
+ * This file is only included in the utils library when ENABLE_CUDA=YES
+ */
+
+#include "NvtxHelper.h"
+#include <cuda_runtime.h>
+#include <nvToolsExt.h>
+
+void nvtxPushColor(const std::string &name, Color pColor)
+{
+   nvtxEventAttributes_t eventAttrib = {};
+   eventAttrib.version = NVTX_VERSION;
+   eventAttrib.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+   eventAttrib.colorType = NVTX_COLOR_ARGB;
+   eventAttrib.color = static_cast<uint32_t>(pColor);
+   eventAttrib.messageType = NVTX_MESSAGE_TYPE_ASCII;
+   eventAttrib.message.ascii = name.c_str();
+
+   nvtxRangePushEx(&eventAttrib);
+}
+
+void nvtxPop()
+{
+   nvtxRangePop();
+}
diff --git a/Simulator/Utils/NvtxHelper.h b/Simulator/Utils/NvtxHelper.h
@@ -0,0 +1,45 @@
+/**
+ * @file NvtxHelper.h
+ * 
+ * @ingroup Simulator/Utils
+ * 
+ * @brief Helper functions to enable nvtx profiling
+ * When ENABLE_NVTX is false the functions are replaced with blank inline functions which are removed by the compiler
+ */
+
+#ifndef NVTX_HELPER_H
+#define NVTX_HELPER_H
+
+#include <cstdint>
+#include <string>
+
+// Define NVTX colors (ARGB format)
+enum class Color : std::uint32_t {
+   RED = 0xFFFF0000,
+   GREEN = 0xFF00FF00,
+   BLUE = 0xFF0000FF,
+   YELLOW = 0xFFFFFF00,
+   ORANGE = 0xFFFFA500,
+   PURPLE = 0xFF800080
+};
+
+#ifdef ENABLE_NVTX
+
+// Function to push an NVTX range with a given name and color
+void nvtxPushColor(const std::string &name, Color pColor);
+
+// Function to pop the most recent NVTX range
+void nvtxPop();
+
+#else
+inline void nvtxPushColor(const std::string &, Color)
+{
+}
+inline void nvtxPop()
+{
+}
+
+#endif   // ENABLE_NVTX
+
+
+#endif   // NVTX_HELPER_H
diff --git a/docs/Developer/StudentSetup.md b/docs/Developer/StudentSetup.md
@@ -93,6 +93,34 @@ The following options are available:
 high-performance GPU version has been compiled (`ggraphitti`).
 - `-v, --version`: Outputs the current git commit ID and exits.
 
+## Running the GPU version of the simulator (ggraphitti)]
+1. To run:
+
+    ```
+    $ cd build
+
+    $ cmake -D ENABLE_CUDA=YES .. 
+    ```
+
+    This will generate a makefile. Then type:
+
+    ```
+    $ make
+    ```
+
+    You can then run a selection of sample tests to ensure the simulator is able to run:
+
+    ```
+    $ ./tests
+    ```
+
+    You can run ggraphitti on various test files:
+
+    ```
+    $ ./ggraphitti -c ../configfiles/test-small-connected.xml
+    ```
+
+
 ## Using Visual Studio Code 
 1. Install the [C/C++ extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode.cpptools) for IntelliSense and debugging. We recommend setting the IntelliSense Cache Size setting to 10 MB. The default size is 5120 MB, which can result in VSC consuming all of your allotted space on the CSS Linux Machines.
 

diff --git a/docs/Developer/codingConventions.md b/docs/Developer/codingConventions.md
@@ -80,6 +80,23 @@ clang-format -i fileName
 
 * If you want to make changes to the clang-format file options themselves, then visit the [clang-format options online documentation](https://clang.llvm.org/docs/ClangFormatStyleOptions.html)
 
+### running clang-format inside VSCode
+
+If you want to set clang as the default formatter in VSCode and automatically run clang-format on files as they're saved, you can add this to your settings.json
+
+```json
+{
+    "editor.defaultFormatter": "xaver.clang-format",
+    "clang-format.style": "file",
+    "clang-format.executable": "/usr/bin/clang-format", 
+    "editor.formatOnSave": true,
+    "files.associations": {
+        "ostream": "cpp"
+    }
+}
+```
+
+Change the file path to wherever you have installed clang-format
 
 ---------
 [<< Go back to the Developer Documentation page](index.md)

diff --git a/docs/User/quickstart.md b/docs/User/quickstart.md
@@ -16,11 +16,16 @@ As a quick start and sanity test, let's run a small, prepackaged simulation to m
    $ make
    $ ./tests
    ```
-   To compile the GPU version set the variable `ENABLE_CUDA` to `YES` in the `CMakeLists.txt`
+   To compile the GPU version, use the cmake conditional flag ENABLE_CUDA, setting it to YES
    ```shell
-   set(ENABLE_CUDA YES)
+   cmake -D ENABLE_CUDA=YES ..
    ```
 
+   By default, the target CUDA architecture is set to 37 which is the kepler architecture
+   To target a different architecture use the cmake conditional flag TARGET_ARCH setting it to your desired architecture
+   ```shell
+   cmake -D ENABLE_CUDA=YES -D TARGET_ARCH=70 ..
+   ```
 3. Unless you have the necessary **HDF5** libraries installed please only use XML recorders only.
 
    - HDF5 is useful for making the data analysis easier for Matlab, which has native HDF5 support, after a simulation - especially a very long one; but it is fine to use the default XML output.