From c7228bd05a0d8de3c89809c59f47384efd34dffc Mon Sep 17 00:00:00 2001 From: Jan Vogelsang Date: Thu, 12 Dec 2024 18:02:35 +0100 Subject: [PATCH] Minor changes --- CMakeLists.txt | 2 + cmake/ProcessOptions.cmake | 7 + .../guidelines/coding_guidelines_cpp.rst | 4 +- doc/htmldoc/installation/cmake_options.rst | 3 + doc/htmldoc/nest_behavior/built-in_timers.rst | 4 + nestkernel/connection_manager.cpp | 4 +- nestkernel/event_delivery_manager.cpp | 34 ++-- nestkernel/event_delivery_manager.h | 1 - nestkernel/kernel_manager.cpp | 12 +- nestkernel/kernel_manager.h | 15 +- nestkernel/nest_names.cpp | 3 +- nestkernel/nest_names.h | 3 +- nestkernel/per_thread_bool_indicator.cpp | 16 +- nestkernel/simulation_manager.cpp | 66 ++++--- nestkernel/simulation_manager.h | 4 +- nestkernel/source_table.cpp | 4 +- nestkernel/stopwatch.h | 167 +++++++----------- nestkernel/stopwatch_impl.h | 22 --- 18 files changed, 164 insertions(+), 207 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 542f233df2..137a952507 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,6 +68,7 @@ set( with-models OFF CACHE STRING "The models to include as a semicolon-separate set( tics_per_ms "1000.0" CACHE STRING "Specify elementary unit of time [default=1000 tics per ms]." ) set( tics_per_step "100" CACHE STRING "Specify resolution [default=100 tics per step]." ) set( with-detailed-timers OFF CACHE STRING "Build with detailed internal time measurements [default=OFF]. Detailed timers can affect the performance." ) +set( with-mpi-sync-timer OFF CACHE STRING "Build with mpi synchronization barrier and timer [default=OFF]. Can affect the performance." ) set( with-threaded-timers ON CACHE STRING "Build with one internal timer per thread [default=ON]. Multi-threaded timers can affect the performance." ) set( target-bits-split "standard" CACHE STRING "Split of the 64-bit target neuron identifier type [default='standard']. 'standard' is recommended for most users. If running on more than 262144 MPI processes or more than 512 threads, change to 'hpc'." ) @@ -145,6 +146,7 @@ nest_process_with_openmp() nest_process_with_mpi() nest_process_with_detailed_timers() nest_process_with_threaded_timers() +nest_process_with_mpi_sync_timer() nest_process_with_libneurosim() nest_process_with_music() nest_process_with_sionlib() diff --git a/cmake/ProcessOptions.cmake b/cmake/ProcessOptions.cmake index c5170a3940..fad5dfdc1f 100644 --- a/cmake/ProcessOptions.cmake +++ b/cmake/ProcessOptions.cmake @@ -469,6 +469,13 @@ function( NEST_PROCESS_WITH_THREADED_TIMERS ) endif () endfunction() +function( NEST_PROCESS_WITH_MPI_SYNC_TIMER ) + set( MPI_SYNC_TIMER OFF PARENT_SCOPE ) + if ( ${with-mpi-sync-timer} STREQUAL "ON" ) + set( MPI_SYNC_TIMER ON PARENT_SCOPE ) + endif () +endfunction() + function( NEST_PROCESS_WITH_LIBNEUROSIM ) # Find libneurosim set( HAVE_LIBNEUROSIM OFF PARENT_SCOPE ) diff --git a/doc/htmldoc/developer_space/guidelines/coding_guidelines_cpp.rst b/doc/htmldoc/developer_space/guidelines/coding_guidelines_cpp.rst index 68f2acd1b5..f6dceba360 100644 --- a/doc/htmldoc/developer_space/guidelines/coding_guidelines_cpp.rst +++ b/doc/htmldoc/developer_space/guidelines/coding_guidelines_cpp.rst @@ -556,7 +556,7 @@ For example, the ``stopwatch.h`` file could look like: } inline nest::Stopwatch::timestamp_t - nest::Stopwatch::elapsed_timestamp() const + nest::Stopwatch::elapsed_us() const { #ifndef DISABLE_TIMING if ( isRunning() ) @@ -622,7 +622,7 @@ For example, the ``stopwatch.h`` file could look like: } inline nest::Stopwatch::timestamp_t - nest::Stopwatch::get_timestamp() + nest::Stopwatch::get_current_time() { // works with: // * hambach (Linux 2.6.32 x86_64) diff --git a/doc/htmldoc/installation/cmake_options.rst b/doc/htmldoc/installation/cmake_options.rst index e118910d01..323d3c6127 100644 --- a/doc/htmldoc/installation/cmake_options.rst +++ b/doc/htmldoc/installation/cmake_options.rst @@ -202,6 +202,9 @@ NEST properties +-----------------------------------------------+----------------------------------------------------------------+ | ``-Dwith-detailed-timers=[OFF|ON]`` | Build with detailed internal time measurements [default=OFF]. | | | Detailed timers can affect the performance. | ++----------------------------------------------------------------------------------------------------------------+ +| ``-Dwith-mpi-sync-timer=[OFF|ON]`` | Build with mpi synchronization barrier and timer [default=OFF].| +| | Can affect the performance. | +-----------------------------------------------+----------------------------------------------------------------+ | ``-Dtarget-bits-split=['standard'|'hpc']`` | Split of the 64-bit target neuron identifier type | | | [default='standard']. 'standard' is recommended for most users.| diff --git a/doc/htmldoc/nest_behavior/built-in_timers.rst b/doc/htmldoc/nest_behavior/built-in_timers.rst index 97ab79e383..b57e0bda31 100644 --- a/doc/htmldoc/nest_behavior/built-in_timers.rst +++ b/doc/htmldoc/nest_behavior/built-in_timers.rst @@ -107,3 +107,7 @@ available as kernel attributes: | |buffers of the corresponding | | | |postsynaptic neurons | | +--------------------------------+----------------------------------+----------------------------------+ + + +with-mpi-sync-timer +with-threaded-timers diff --git a/nestkernel/connection_manager.cpp b/nestkernel/connection_manager.cpp index b67e2cb5df..4001867308 100644 --- a/nestkernel/connection_manager.cpp +++ b/nestkernel/connection_manager.cpp @@ -1801,9 +1801,9 @@ nest::ConnectionManager::collect_compressed_spike_data( const size_t tid ) } // of omp single; implicit barrier source_table_.collect_compressible_sources( tid ); - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); #pragma omp single { source_table_.fill_compressed_spike_data( compressed_spike_data_ ); diff --git a/nestkernel/event_delivery_manager.cpp b/nestkernel/event_delivery_manager.cpp index bd17cd1aae..383903f56f 100644 --- a/nestkernel/event_delivery_manager.cpp +++ b/nestkernel/event_delivery_manager.cpp @@ -416,12 +416,12 @@ EventDeliveryManager::gather_spike_data_( std::vector< SpikeDataT >& send_buffer set_end_marker_( send_buffer_position, send_buffer, local_max_spikes_per_rank ); sw_collocate_spike_data_.stop(); -#if defined( HAVE_MPI ) && defined( TIMER_DETAILED ) + sw_communicate_spike_data_.start(); +#ifdef MPI_SYNC_TIMER kernel().get_mpi_synchronization_stopwatch().start(); kernel().mpi_manager.synchronize(); kernel().get_mpi_synchronization_stopwatch().stop(); #endif - sw_communicate_spike_data_.start(); // Given that we templatize by plain vs offgrid, this if should not be necessary, but ... if ( off_grid_spiking_ ) @@ -797,9 +797,9 @@ EventDeliveryManager::gather_target_data( const size_t tid ) resize_send_recv_buffers_target_data(); } } // of omp master; (no barrier) - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); kernel().connection_manager.restore_source_table_entry_point( tid ); @@ -814,18 +814,13 @@ EventDeliveryManager::gather_target_data( const size_t tid ) set_complete_marker_target_data_( assigned_ranks, send_buffer_position ); } kernel().connection_manager.save_source_table_entry_point( tid ); - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); kernel().connection_manager.clean_source_table( tid ); #pragma omp master { -#if defined( HAVE_MPI ) && defined( TIMER_DETAILED ) - kernel().get_mpi_synchronization_stopwatch().start(); - kernel().mpi_manager.synchronize(); - kernel().get_mpi_synchronization_stopwatch().stop(); -#endif sw_communicate_target_data_.start(); kernel().mpi_manager.communicate_target_data_Alltoall( send_buffer_target_data_, recv_buffer_target_data_ ); sw_communicate_target_data_.stop(); @@ -874,9 +869,9 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) resize_send_recv_buffers_target_data(); } } // of omp master; no barrier - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); TargetSendBufferPosition send_buffer_position( assigned_ranks, kernel().mpi_manager.get_send_recv_count_target_data_per_rank() ); @@ -891,17 +886,12 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) set_complete_marker_target_data_( assigned_ranks, send_buffer_position ); } - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); #pragma omp master { -#if defined( HAVE_MPI ) && defined( TIMER_DETAILED ) - kernel().get_mpi_synchronization_stopwatch().start(); - kernel().mpi_manager.synchronize(); - kernel().get_mpi_synchronization_stopwatch().stop(); -#endif sw_communicate_target_data_.start(); kernel().mpi_manager.communicate_target_data_Alltoall( send_buffer_target_data_, recv_buffer_target_data_ ); sw_communicate_target_data_.stop(); @@ -921,9 +911,9 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) { buffer_size_target_data_has_changed_ = kernel().mpi_manager.increase_buffer_size_target_data(); } // of omp master (no barrier) - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); } } // of while diff --git a/nestkernel/event_delivery_manager.h b/nestkernel/event_delivery_manager.h index 9df0cc5a3e..59e3681646 100644 --- a/nestkernel/event_delivery_manager.h +++ b/nestkernel/event_delivery_manager.h @@ -468,7 +468,6 @@ class EventDeliveryManager : public ManagerInterface // private stop watches for benchmarking purposes // (intended for internal core developers, not for use in the public API) - // TODO JV: Make sure DETAILED_TIMERS is only ever used in stopwatch.h Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_collocate_spike_data_; Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_communicate_spike_data_; Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_communicate_target_data_; diff --git a/nestkernel/kernel_manager.cpp b/nestkernel/kernel_manager.cpp index 846123ed2b..79009cc39d 100644 --- a/nestkernel/kernel_manager.cpp +++ b/nestkernel/kernel_manager.cpp @@ -93,7 +93,8 @@ nest::KernelManager::initialize() manager->initialize( /* adjust_number_of_threads_or_rng_only */ false ); } - sw_omp_synchronization_.reset(); + sw_omp_synchronization_construction_.reset(); + sw_omp_synchronization_simulation_.reset(); sw_mpi_synchronization_.reset(); ++fingerprint_; @@ -110,7 +111,8 @@ nest::KernelManager::prepare() manager->prepare(); } - sw_omp_synchronization_.reset(); + sw_omp_synchronization_construction_.reset(); + sw_omp_synchronization_simulation_.reset(); sw_mpi_synchronization_.reset(); } @@ -177,7 +179,8 @@ nest::KernelManager::change_number_of_threads( size_t new_num_threads ) kernel().event_delivery_manager.reset_timers_for_preparation(); kernel().event_delivery_manager.reset_timers_for_dynamics(); - sw_omp_synchronization_.reset(); + sw_omp_synchronization_construction_.reset(); + sw_omp_synchronization_simulation_.reset(); sw_mpi_synchronization_.reset(); } @@ -202,7 +205,8 @@ nest::KernelManager::get_status( DictionaryDatum& dict ) manager->get_status( dict ); } - sw_omp_synchronization_.output_timer( dict, names::time_omp_synchronization ); + sw_omp_synchronization_construction_.output_timer( dict, names::time_omp_synchronization_construction ); + sw_omp_synchronization_simulation_.output_timer( dict, names::time_omp_synchronization_simulation ); sw_mpi_synchronization_.output_timer( dict, names::time_mpi_synchronization ); } diff --git a/nestkernel/kernel_manager.h b/nestkernel/kernel_manager.h index e06af9a9af..a055ba0e8b 100644 --- a/nestkernel/kernel_manager.h +++ b/nestkernel/kernel_manager.h @@ -288,10 +288,18 @@ class KernelManager NodeManager node_manager; /**@}*/ + //! Get the stopwatch to measure the time each thread is idle during network construction. Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded >& - get_omp_synchronization_stopwatch() + get_omp_synchronization_construction_stopwatch() { - return sw_omp_synchronization_; + return sw_omp_synchronization_construction_; + } + + //! Get the stopwatch to measure the time each thread is idle during simulation. + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded >& + get_omp_synchronization_simulation_stopwatch() + { + return sw_omp_synchronization_simulation_; } Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly >& @@ -307,7 +315,8 @@ class KernelManager bool initialized_; //!< true if the kernel is initialized std::ofstream dump_; //!< for FULL_LOGGING output - Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_omp_synchronization_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_omp_synchronization_construction_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_omp_synchronization_simulation_; Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_mpi_synchronization_; }; diff --git a/nestkernel/nest_names.cpp b/nestkernel/nest_names.cpp index 4d10766098..f2acfb8bf4 100644 --- a/nestkernel/nest_names.cpp +++ b/nestkernel/nest_names.cpp @@ -606,7 +606,8 @@ const Name time_deliver_spike_data( "time_deliver_spike_data" ); const Name time_gather_secondary_data( "time_gather_secondary_data" ); const Name time_gather_spike_data( "time_gather_spike_data" ); const Name time_gather_target_data( "time_gather_target_data" ); -const Name time_omp_synchronization( "time_omp_synchronization" ); +const Name time_omp_synchronization_construction( "time_omp_synchronization_construction" ); +const Name time_omp_synchronization_simulation( "time_omp_synchronization_simulation" ); const Name time_mpi_synchronization( "time_mpi_synchronization" ); const Name time_in_steps( "time_in_steps" ); const Name time_simulate( "time_simulate" ); diff --git a/nestkernel/nest_names.h b/nestkernel/nest_names.h index 701bcb538b..1acfddb70a 100644 --- a/nestkernel/nest_names.h +++ b/nestkernel/nest_names.h @@ -634,7 +634,8 @@ extern const Name time_deliver_spike_data; extern const Name time_gather_secondary_data; extern const Name time_gather_spike_data; extern const Name time_gather_target_data; -extern const Name time_omp_synchronization; +extern const Name time_omp_synchronization_construction; +extern const Name time_omp_synchronization_simulation; extern const Name time_mpi_synchronization; extern const Name time_in_steps; extern const Name time_simulate; diff --git a/nestkernel/per_thread_bool_indicator.cpp b/nestkernel/per_thread_bool_indicator.cpp index c7f158261d..4032a7320f 100644 --- a/nestkernel/per_thread_bool_indicator.cpp +++ b/nestkernel/per_thread_bool_indicator.cpp @@ -65,7 +65,7 @@ PerThreadBoolIndicator::initialize( const size_t num_threads, const bool status bool PerThreadBoolIndicator::all_false() const { - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); // We need two barriers here to ensure that no thread can continue and change the result // before all threads have determined the result. #pragma omp barrier @@ -74,42 +74,42 @@ PerThreadBoolIndicator::all_false() const bool ret = ( are_true_ == 0 ); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); return ret; } bool PerThreadBoolIndicator::all_true() const { - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier bool ret = ( are_true_ == size_ ); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); return ret; } bool PerThreadBoolIndicator::any_false() const { - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier bool ret = ( are_true_ < size_ ); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); return ret; } bool PerThreadBoolIndicator::any_true() const { - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier bool ret = ( are_true_ > 0 ); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); return ret; } diff --git a/nestkernel/simulation_manager.cpp b/nestkernel/simulation_manager.cpp index 95cc0a3a18..a756104e55 100644 --- a/nestkernel/simulation_manager.cpp +++ b/nestkernel/simulation_manager.cpp @@ -733,9 +733,9 @@ nest::SimulationManager::call_update_() void nest::SimulationManager::update_connection_infrastructure( const size_t tid ) { - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); sw_communicate_prepare_.start(); @@ -745,9 +745,9 @@ nest::SimulationManager::update_connection_infrastructure( const size_t tid ) kernel().connection_manager.collect_compressed_spike_data( tid ); sw_gather_target_data_.stop(); - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier // wait for all threads to finish sorting - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); #pragma omp single { @@ -762,11 +762,16 @@ nest::SimulationManager::update_connection_infrastructure( const size_t tid ) if ( kernel().connection_manager.secondary_connections_exist() ) { + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier + kernel().get_omp_synchronization_construction_stopwatch().stop(); + kernel().connection_manager.compute_compressed_secondary_recv_buffer_positions( tid ); - kernel().get_omp_synchronization_stopwatch().start(); + + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); + #pragma omp single { kernel().mpi_manager.communicate_recv_counts_secondary_events(); @@ -799,9 +804,9 @@ nest::SimulationManager::update_connection_infrastructure( const size_t tid ) kernel().connection_manager.compress_secondary_send_buffer_pos( tid ); } - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); #pragma omp single { kernel().connection_manager.clear_compressed_spike_data_map(); @@ -879,9 +884,9 @@ nest::SimulationManager::update_() // MUSIC *before* MUSIC time is advanced // wait until all threads are done -> synchronize - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_run_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_run_stopwatch().stop(); // the following block is executed by the master thread only // the other threads are enforced to wait at the end of the block #pragma omp master @@ -944,9 +949,9 @@ nest::SimulationManager::update_() done.push_back( done_p ); } // parallel section ends, wait until all threads are done -> synchronize - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_simulation_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_simulation_stopwatch().stop(); // the following block is executed by a single thread // the other threads wait at the end of the block @@ -1006,9 +1011,9 @@ nest::SimulationManager::update_() Node* node = i->get_node(); node->update_synaptic_elements( Time( Time::step( clock_.get_steps() + from_step_ ) ).get_ms() ); } - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_simulation_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_simulation_stopwatch().stop(); #pragma omp single { kernel().sp_manager.update_structural_plasticity(); @@ -1045,38 +1050,31 @@ nest::SimulationManager::update_() sw_update_.stop(); // parallel section ends, wait until all threads are done -> synchronize - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_simulation_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_simulation_stopwatch().stop(); // the following block is executed by the master thread only // the other threads are enforced to wait at the end of the block - - // gather and deliver only at end of slice, i.e., end of min_delay step - if ( to_step_ == kernel().connection_manager.get_min_delay() ) +#pragma omp master { - if ( kernel().connection_manager.has_primary_connections() ) + // gather and deliver only at end of slice, i.e., end of min_delay step + if ( to_step_ == kernel().connection_manager.get_min_delay() ) { - sw_gather_spike_data_.start(); -#pragma omp master + if ( kernel().connection_manager.has_primary_connections() ) { + sw_gather_spike_data_.start(); kernel().event_delivery_manager.gather_spike_data(); + sw_gather_spike_data_.stop(); } - sw_gather_spike_data_.stop(); - } - if ( kernel().connection_manager.secondary_connections_exist() ) - { - sw_gather_secondary_data_.start(); -#pragma omp master + if ( kernel().connection_manager.secondary_connections_exist() ) { + sw_gather_secondary_data_.start(); kernel().event_delivery_manager.gather_secondary_events( true ); + sw_gather_secondary_data_.stop(); } - sw_gather_secondary_data_.stop(); } - } -#pragma omp master - { advance_time_(); if ( print_time_ ) @@ -1100,9 +1098,9 @@ nest::SimulationManager::update_() #ifdef HAVE_SIONLIB kernel().io_manager.post_step_hook(); // enforce synchronization after post-step activities of the recording backends - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_run_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_run_stopwatch().stop(); #endif const double end_current_update = sw_simulate_.elapsed(); diff --git a/nestkernel/simulation_manager.h b/nestkernel/simulation_manager.h index c701dd0f74..f446aa9e86 100644 --- a/nestkernel/simulation_manager.h +++ b/nestkernel/simulation_manager.h @@ -231,8 +231,8 @@ class SimulationManager : public ManagerInterface Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > sw_simulate_; Stopwatch< StopwatchVerbosity::Normal, StopwatchType::Threaded > sw_communicate_prepare_; // intended for internal core developers, not for use in the public API - Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_gather_spike_data_; - Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_gather_secondary_data_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_gather_spike_data_; + Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly > sw_gather_secondary_data_; Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_update_; Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_gather_target_data_; Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::Threaded > sw_deliver_spike_data_; diff --git a/nestkernel/source_table.cpp b/nestkernel/source_table.cpp index 0e3d4bc072..d9fcd7af1c 100644 --- a/nestkernel/source_table.cpp +++ b/nestkernel/source_table.cpp @@ -230,9 +230,9 @@ nest::SourceTable::compute_buffer_pos_for_unique_secondary_sources( const size_t } } } - kernel().get_omp_synchronization_stopwatch().start(); + kernel().get_omp_synchronization_construction_stopwatch().start(); #pragma omp barrier - kernel().get_omp_synchronization_stopwatch().stop(); + kernel().get_omp_synchronization_construction_stopwatch().stop(); #pragma omp single { diff --git a/nestkernel/stopwatch.h b/nestkernel/stopwatch.h index ef691573af..72bc35fdb7 100644 --- a/nestkernel/stopwatch.h +++ b/nestkernel/stopwatch.h @@ -32,46 +32,51 @@ #include "dictutils.h" #include #include +#include #include #include namespace nest { -constexpr bool use_detailed_timers = TIMER_DETAILED; -constexpr bool use_threaded_timers = THREADED_TIMERS; - -// TODO JV: Update docs -/*********************************************************************** - * StopwatchBase * - * Accumulates time between start and stop, and provides * - * the elapsed time with different time units. * - * * - * Partly inspired by com.google.common.base.Stopwatch.java * - * Not thread-safe: - Do not share stopwatches among threads. * - * - Let each thread have its own stopwatch. * - * * - * Usage example: * - * StopwatchBase x; * - * x.start(); * - * // ... do computations for 15.34 sec * - * x.stop(); // only pauses stopwatch * - * x.print("Time needed "); // > Time needed 15.34 sec. * - * x.start(); // resumes stopwatch * - * // ... next computations for 11.22 sec * - * x.stop(); * - * x.print("Time needed "); // > Time needed 26,56 sec. * - * x.reset(); // reset to default values * - * x.start(); // starts the stopwatch from 0 * - * // ... computation 5.7 sec * - * x.print("Time "); // > Time 5.7 sec. * - * // ^ intermediate timing without stopping the stopwatch * - * // ... more computations 1.7643 min * - * x.stop(); * - * x.print("Time needed ", StopwatchBase::MINUTES, std::cerr); * - * // > Time needed 1,8593 min. (on cerr) * - * // other units and output streams possible * - ***********************************************************************/ +// TODO JV: Set this variable via cmake instead +#ifdef TIMER_DETAILED +constexpr bool use_detailed_timers = true; +#else +constexpr bool use_detailed_timers = false; +#endif +#ifdef THREADED_TIMERS +constexpr bool use_threaded_timers = true; +#else +constexpr bool use_threaded_timers = false; +#endif + +/******************************************************************************** + * Stopwatch * + * Accumulates time between start and stop, and provides the elapsed time * + * with different time units. Either runs multi-threaded or only on master. * + * * + * Usage example: * + * Stopwatch< StopwatchVerbosity::Normal, StopwatchType::MasterOnly > x; * + * x.start(); * + * // ... do computations for 15.34 sec * + * x.stop(); // only pauses stopwatch * + * x.print("Time needed "); // > Time needed 15.34 sec. * + * x.start(); // resumes stopwatch * + * // ... next computations for 11.22 sec * + * x.stop(); * + * x.print("Time needed "); // > Time needed 26,56 sec. * + * x.reset(); // reset to default values * + * x.start(); // starts the stopwatch from 0 * + * // ... computation 5.7 sec * + * x.print("Time "); // > Time 5.7 sec. * + * // ^ intermediate timing without stopping the stopwatch * + * // ... more computations 1.7643 min * + * x.stop(); * + * x.print("Time needed ", StopwatchBase::MINUTES, std::cerr); * + * // > Time needed 1,8593 min. (on cerr) * + * // other units and output streams possible * + ********************************************************************************/ class StopwatchBase { public: @@ -111,27 +116,13 @@ class StopwatchBase bool isRunning() const; /** - * Returns the time elapsed between the start and stop of the - * stopwatch. If it is running, it returns the time from start - * until now. If the stopwatch is run previously, the previous - * runtime is added. If you want only the last measurement, you - * have to reset the timer, before stating the measurement. + * Returns the time elapsed between the start and stop of the stopwatch in the given unit. If it is running, it + * returns the time from start until now. If the stopwatch is run previously, the previous runtime is added. If you + * want only the last measurement, you have to reset the timer, before stating the measurement. * Does not change the running state. */ double elapsed( timeunit_t timeunit = SECONDS ) const; - /** - * Returns the time elapsed between the start and stop of the - * stopwatch. If it is running, it returns the time from start - * until now. If the stopwatch is run previously, the previous - * runtime is added. If you want only the last measurement, you - * have to reset the timer, before stating the measurement. - * Does not change the running state. - * In contrast to StopwatchBase::elapsed(), only the timestamp is returned, - * that is the number if microseconds as an integer. - */ - timestamp_t elapsed_timestamp() const; - /** * Resets the stopwatch. */ @@ -158,36 +149,36 @@ class StopwatchBase /** * Returns current time in microseconds since EPOCH. */ - static timestamp_t get_timestamp(); + static size_t get_current_time(); }; inline void -nest::StopwatchBase::start() +StopwatchBase::start() { #ifndef DISABLE_TIMING if ( not isRunning() ) { - _prev_elapsed += _end - _beg; // store prev. time, if we resume - _end = _beg = get_timestamp(); // invariant: _end >= _beg - _running = true; // we start running + _prev_elapsed += _end - _beg; // store prev. time, if we resume + _end = _beg = get_current_time(); // invariant: _end >= _beg + _running = true; // we start running } #endif } inline void -nest::StopwatchBase::stop() +StopwatchBase::stop() { #ifndef DISABLE_TIMING if ( isRunning() ) { - _end = get_timestamp(); // invariant: _end >= _beg - _running = false; // we stopped running + _end = get_current_time(); // invariant: _end >= _beg + _running = false; // we stopped running } #endif } inline bool -nest::StopwatchBase::isRunning() const +StopwatchBase::isRunning() const { #ifndef DISABLE_TIMING return _running; @@ -197,36 +188,28 @@ nest::StopwatchBase::isRunning() const } inline double -nest::StopwatchBase::elapsed( timeunit_t timeunit ) const -{ -#ifndef DISABLE_TIMING - return 1.0 * elapsed_timestamp() / timeunit; -#else - return 0.0; -#endif -} - -inline nest::StopwatchBase::timestamp_t -nest::StopwatchBase::elapsed_timestamp() const +StopwatchBase::elapsed( timeunit_t timeunit ) const { #ifndef DISABLE_TIMING + size_t time_elapsed; if ( isRunning() ) { // get intermediate elapsed time; do not change _end, to be const - return get_timestamp() - _beg + _prev_elapsed; + time_elapsed = get_current_time() - _beg + _prev_elapsed; } else { // stopped before, get time of current measurement + last measurements - return _end - _beg + _prev_elapsed; + time_elapsed = _end - _beg + _prev_elapsed; } + return time_elapsed / timeunit; #else - return static_cast< timestamp_t >( 0 ); + return 0.; #endif } inline void -nest::StopwatchBase::reset() +StopwatchBase::reset() { #ifndef DISABLE_TIMING _beg = 0; // invariant: _end >= _beg @@ -237,7 +220,7 @@ nest::StopwatchBase::reset() } inline void -nest::StopwatchBase::print( const char* msg, timeunit_t timeunit, std::ostream& os ) const +StopwatchBase::print( const char* msg, timeunit_t timeunit, std::ostream& os ) const { #ifndef DISABLE_TIMING double e = elapsed( timeunit ); @@ -271,13 +254,14 @@ nest::StopwatchBase::print( const char* msg, timeunit_t timeunit, std::ostream& #endif } -inline nest::StopwatchBase::timestamp_t -nest::StopwatchBase::get_timestamp() +inline size_t +StopwatchBase::get_current_time() { + // We use a monotonic timer to make sure the stopwatch is not influenced by time jumps (e.g. summer/winter time). struct timeval now; gettimeofday( &now, static_cast< struct timezone* >( nullptr ) ); - return ( nest::StopwatchBase::timestamp_t ) now.tv_usec - + ( nest::StopwatchBase::timestamp_t ) now.tv_sec * nest::StopwatchBase::SECONDS; + return ( StopwatchBase::timestamp_t ) now.tv_usec + + ( StopwatchBase::timestamp_t ) now.tv_sec * StopwatchBase::SECONDS; } inline std::ostream& @@ -351,17 +335,6 @@ class Stopwatch return elapsed; } - StopwatchBase::timestamp_t - elapsed_timestamp() const - { - StopwatchBase::timestamp_t elapsed = 0; -#pragma omp master - { - elapsed = timer_.elapsed_timestamp(); - }; - return elapsed; - } - void reset() { @@ -406,11 +379,6 @@ class Stopwatch< StopwatchVerbosity::Detailed, StopwatchType::MasterOnly, std::e { return 0; } - StopwatchBase::timestamp_t - elapsed_timestamp() const - { - return 0; - } void reset() { @@ -450,11 +418,6 @@ class Stopwatch< detailed_timer, { return 0; } - StopwatchBase::timestamp_t - elapsed_timestamp() const - { - return 0; - } void reset() { @@ -489,8 +452,6 @@ class Stopwatch< detailed_timer, double elapsed( StopwatchBase::timeunit_t timeunit = StopwatchBase::timeunit_t::SECONDS ) const; - StopwatchBase::timestamp_t elapsed_timestamp() const; - void reset(); void print( const char* msg = "", diff --git a/nestkernel/stopwatch_impl.h b/nestkernel/stopwatch_impl.h index 0b6a873a58..339a073643 100644 --- a/nestkernel/stopwatch_impl.h +++ b/nestkernel/stopwatch_impl.h @@ -67,28 +67,6 @@ Stopwatch< detailed_timer, return timers_[ kernel().vp_manager.get_thread_id() ].elapsed( timeunit ); } -template < StopwatchVerbosity detailed_timer > -StopwatchBase::timestamp_t -Stopwatch< detailed_timer, - StopwatchType::Threaded, - std::enable_if_t< use_threaded_timers - and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::elapsed_timestamp() const -{ - return timers_[ kernel().vp_manager.get_thread_id() ].elapsed_timestamp(); -} - -template < StopwatchVerbosity detailed_timer > -void -Stopwatch< detailed_timer, - StopwatchType::Threaded, - std::enable_if_t< use_threaded_timers - and ( detailed_timer == StopwatchVerbosity::Normal or use_detailed_timers ) > >::print( const char* msg, - StopwatchBase::timeunit_t timeunit, - std::ostream& os ) const -{ - timers_[ kernel().vp_manager.get_thread_id() ].print( msg, timeunit, os ); -} - template < StopwatchVerbosity detailed_timer > void Stopwatch< detailed_timer,