Skip to content

Commit

Permalink
finalize feelpp
Browse files Browse the repository at this point in the history
  • Loading branch information
prudhomm committed Oct 15, 2024
1 parent 8b45670 commit 984da7f
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 114 deletions.
8 changes: 5 additions & 3 deletions exa-ma-d7.1.tex
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@
\definecolor{CustomBlue}{rgb}{0.25, 0.41, 0.88} % RoyalBlue
\hypersetup{
pdftitle={Benchmarking analysis report},
pdfauthor={[Christophe Prud'homme (UNISTRA), Pierre Alliez (INRIA), Vincent Chabannes (UNISTRA), Rudy Chocat (CEA), Emmanuel Franck (INRIA), Vincent Fraucher (CEA), Floriant Faucher (INRIA), Clément Gauchy (CEA), Christos Georgiadis (INRIA), Luc Giraud (INRIA), Frédéric Hecht (SU), Pierre Jolivet (CNRS), Pierre Ledac (CEA), Gilles Marait (INRIA), Victor Michel-Dansac (INRIA), Frédéric Nataf (SU), Lucas Palazzolo (INRIA), Yannick Privat (UL), Thomas Saigre-Tardif (UNISTRA), Christophe Trophime (CNRS), Pierre Henri Tournier (SU), Céline Van Landeghem (UNISTRA), Raphael Zanella (SU)]},
pdfauthor={[Christophe Prud'homme (UNISTRA), Pierre Alliez (INRIA), Vincent Chabannes (UNISTRA), Rudy Chocat (CEA), Emmanuel Franck (INRIA), Vincent Fraucher (CEA), Floriant Faucher (INRIA), Clément Gauchy (CEA), Christos Georgiadis (INRIA), Luc Giraud (INRIA), Frédéric Hecht (SU), Guillaume Helbecque (U Luxembourg), Pierre Jolivet (CNRS), Olivier Jamond (CEA), Pierre Ledac (CEA), Nouredine Melab (U. Lille), Victor Michel-Dansac (INRIA), Frédéric Nataf (SU), Lucas Palazzolo (INRIA), Yannick Privat (UL), Thomas Saigre-Tardif (UNISTRA), Rudy Chocat (CEA), El-Ghazali Talbi (U Lille), Pierre Henri Tournier (SU), Christophe Trophime (CNRS), Céline Van Landeghem (UNISTRA), Raphael Zanella (SU)
]},
pdfkeywords={HPC, Exascale, Benchmarking},
bookmarksnumbered,linktocpage,
colorlinks=true,
Expand Down Expand Up @@ -125,7 +126,7 @@
\delivResponsible{UNISTRA}

% Deliverable Version, Contractual and Actual Date, Dissemination Level, Type
\delivVersion{v0.2.0}
\delivVersion{v1.0.0}
\ContractualDate{15/10/2024}
\ActualDate{\today}
\delivDissLevel{PU} % PU, PP, RE, CO
Expand All @@ -135,7 +136,7 @@
\delivAuthor{[Christophe Prud'homme (UNISTRA)]}

% List of Co-Authors (all other co-authors should be listed here)
\delivFPAuthor{[Pierre Alliez (INRIA), Vincent Chabannes (UNISTRA), Rudy Chocat (CEA), Emmanuel Franck (INRIA), Vincent Fraucher (CEA), Floriant Faucher (INRIA), Clément Gauchy (CEA), Christos Georgiadis (INRIA), Luc Giraud (INRIA), Frédéric Hecht (SU), Olivier Jamond (CEA), Pierre Jolivet (CNRS), Pierre Ledac (CEA), Gilles Marait (INRIA), Victor Michel-Dansac (INRIA), Frédéric Nataf (SU), Lucas Palazzolo (INRIA), Yannick Privat (UL), Thomas Saigre-Tardif (UNISTRA), Christophe Trophime (CNRS), Pierre Henri Tournier (SU), Céline Van Landeghem (UNISTRA), Raphael Zanella (SU) ]}
\delivFPAuthor{[Pierre Alliez (INRIA), Vincent Chabannes (UNISTRA), Rudy Chocat (CEA), Emmanuel Franck (INRIA), Vincent Fraucher (CEA), Floriant Faucher (INRIA), Clément Gauchy (CEA), Christos Georgiadis (INRIA), Luc Giraud (INRIA), Frédéric Hecht (SU), Guillaume Helbecque (U Luxembourg), Pierre Jolivet (CNRS), Olivier Jamond (CEA), Pierre Ledac (CEA), Nouredine Melab (U. Lille), Victor Michel-Dansac (INRIA), Frédéric Nataf (SU), Lucas Palazzolo (INRIA), Yannick Privat (UL), Thomas Saigre-Tardif (UNISTRA), Rudy Chocat (CEA), El-Ghazali Talbi (U Lille), Pierre Henri Tournier (SU), Christophe Trophime (CNRS), Céline Van Landeghem (UNISTRA), Raphael Zanella (SU)]}

% Provision of Keywords (about 5-10)
\delivKeywords{HPC, Exascale, Benchmarking, Software}
Expand All @@ -147,6 +148,7 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% Change Log
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\istChange{15/10/2024}{v1.0.0}{\href{https://github.com/numpex/exa-ma-d7.1/graphs/contributors}{26 Contributors}}{Finalized contributions and reviews}
\istChange{11/10/2024}{v0.2.0}{\href{https://github.com/numpex/exa-ma-d7.1/graphs/contributors}{+14 Contributors}}{Initial contributions}
\istChange{30/09/2024}{v0.1.0}{Prud'homme Christophe (UNISTRA)}{setup architecture of D7.1, update profiling tools in toc, updates in methodology chapter, add information store in excel sheet in the report,update benchmark methodology}
\istChange{27/09/2024}{v0.0.6}{Prud'homme Christophe (UNISTRA)}{In the methodology chapter, link the deliverable to the bottlenecks identified in Exa-MA scientific document. Add resilience stats and methdology, benchmark mmg and parmmg indirectly.}
Expand Down
30 changes: 30 additions & 0 deletions references.bib
Original file line number Diff line number Diff line change
@@ -1,5 +1,35 @@


@misc{cardosi_specx_2023,
title = {Specx: a {C}++ task-based runtime system for heterogeneous distributed architectures},
copyright = {Creative Commons Attribution 4.0 International},
shorttitle = {Specx},
url = {https://arxiv.org/abs/2308.15964},
doi = {10.48550/ARXIV.2308.15964},
abstract = {Parallelization is needed everywhere, from laptops and mobile phones to supercomputers. Among parallel programming models, task-based programming has demonstrated a powerful potential and is widely used in high-performance scientific computing. Not only does it allow for efficient parallelization across distributed heterogeneous computing nodes, but it also allows for elegant source code structuring by describing hardware-independent algorithms. In this paper, we present Specx, a task-based runtime system written in modern C++. Specx supports distributed heterogeneous computing by simultaneously exploiting CPUs and GPUs (CUDA/HIP) and incorporating communication into the task graph. We describe the specificities of Specx and demonstrate its potential by running parallel applications.},
urldate = {2024-10-15},
publisher = {arXiv},
author = {Cardosi, Paul and Bramas, Bérenger},
year = {2023},
note = {Version Number: 1},
keywords = {Distributed, Parallel, and Cluster Computing (cs.DC), FOS: Computer and information sciences, Software Engineering (cs.SE)},
}

@misc{Palazollo_Feel_Shape_Optimization,
title = {Feel++ shape optimization toolbox},
copyright = {LGPL-3.0-or-later},
url = {https://github.com/feelpp/feelpp-shapo},
author = {Palazollo, Lucas and Prud'homme, Christophe},
year = {2024},
}

@book{britain_standard_1990,
title = {The {Standard} {NAFEMS} {Benchmarks}},
publisher = {NAFEMS},
author = {Britain), National Agency for Finite Element Methods \& Standards (Great},
year = {1990},
}

@article{nguessan_high_2021,
series = {Numerical {Solution} of {Differential} and {Differential}-{Algebraic} {Equations}. {Selected} {Papers} from {NUMDIFF}-15},
title = {High order time integration and mesh adaptation with error control for incompressible {Navier}–{Stokes} and scalar transport resolution on dual grids},
Expand Down
127 changes: 70 additions & 57 deletions software/feelpp/WP1/WP1.tex
Original file line number Diff line number Diff line change
Expand Up @@ -95,34 +95,43 @@ \subsection{Parallel Capabilities}
\label{sec:WP1:Feelpp:performances}


\begin{itemize}
\item describe the parallel programming environment : MPI, OpenMP, CUDA, OpenACC, etc.
\item describe the parallel computation environment: type of architecture and super computer used.
\item describe the parallel capabilities of the software
\item \textbf{Scalability:} Describe the general scalability properties of the software
\item \textbf{Integration with Other Systems:} Describe how the software integrates with other numerical libraries in the Exa-MA framework.
\end{itemize}
\begin{description}
\item[Parallel Programming Environment:] MPI, Multithreading in HdG methods when enable static condensation and Tasks based parallelism using Specx~\cite{cardosi_specx_2023}.
\item[Supercomputers:] Gaya~\Cref{sec:arch:gaya} and Discoverer~\cref{sec:arch:eurohpc-ju}.
\item[Parallel Capabilities:] Every data structures are parallelized using MPI. We are currently working on adding the support of GPU for some of the data structures. HDF5 and MPI-IO are used for I/O.
\item[Integration with Other Systems:] \Feelpp is interfaced with specx, MMG/ParMMG and Gmsh regarding WP1.
\end{description}

% \begin{itemize}
% \item describe the parallel programming environment : MPI and
% \item describe the parallel computation environment: type of architecture and super computer used.
% \item describe the parallel capabilities of the software
% \item \textbf{Scalability:} Describe the general scalability properties of the software
% \item \textbf{Integration with Other Systems:} Describe how the software integrates with other numerical libraries in the Exa-MA framework.
% \end{itemize}


\subsection{Initial Performance Metrics}
\label{sec:WP1:Feelpp:metrics}

This section provides a summary of initial performance benchmarks performed in the context of WP1. It ensures reproducibility by detailing input/output datasets, benchmarking tools, and the results. All data should be publicly available, ideally with a DOI for future reference.

\begin{itemize}
\item \textbf{Overall Performance:} Summarize the software's computational performance, energy efficiency, and scalability results across different architectures (e.g., CPU, GPU, hybrid systems).
\item \textbf{Input/Output Dataset:} Provide a detailed description of the dataset used for the benchmark, including:
\begin{itemize}
\item Input dataset size, structure, and format (e.g., CSV, HDF5, NetCDF).
\item Output dataset format and key results.
\item Location of the dataset (e.g., GitHub repository, institutional repository, or open access platform).
\item DOI or permanent link for accessing the dataset.
\end{itemize}
\item \textbf{open-data Access:} Indicate whether the datasets used for the benchmark are open access, and provide a DOI or a direct link for download. Where applicable, highlight any licensing constraints.
\item \textbf{Challenges:} Identify any significant bottlenecks or challenges observed during the benchmarking process, including data handling and computational performance.
\item \textbf{Future Improvements:} Outline areas for optimization, including dataset handling, memory usage, or algorithmic efficiency, to address identified challenges.
\end{itemize}

This section provides a summary of initial performance benchmarks performed in the context of WP1.
It ensures reproducibility by detailing input/output datasets, benchmarking tools, and the results.
The input data is publicly available and some have already Zenodo DOI~\cref{sec:arch:zenodo}, other are stored on Girder~\cref{sec:arch:girder:unistra} or on the software repository.

% \begin{itemize}
% \item \textbf{Overall Performance:} Summarize the software's computational performance, energy efficiency, and scalability results across different architectures (e.g., CPU, GPU, hybrid systems).
% \item \textbf{Input/Output Dataset:} Provide a detailed description of the dataset used for the benchmark, including:
% \begin{itemize}
% \item Input dataset size, structure, and format (e.g., CSV, HDF5, NetCDF).
% \item Output dataset format and key results.
% \item Location of the dataset (e.g., GitHub repository, institutional repository, or open access platform).
% \item DOI or permanent link for accessing the dataset.
% \end{itemize}
% \item \textbf{open-data Access:} Indicate whether the datasets used for the benchmark are open access, and provide a DOI or a direct link for download. Where applicable, highlight any licensing constraints.
% \item \textbf{Challenges:} Identify any significant bottlenecks or challenges observed during the benchmarking process, including data handling and computational performance.
% \item \textbf{Future Improvements:} Outline areas for optimization, including dataset handling, memory usage, or algorithmic efficiency, to address identified challenges.
% \end{itemize}
%
% % create latex counter
% \newcounter{feelppWP1benchcounter}
% % set the counter to 1
Expand Down Expand Up @@ -166,38 +175,40 @@ \subsubsection{Benchmark \#1: Compute Distance Function}
\begin{itemize}
\item \textbf{Input Data:} The input consists of a 3D uniform grid representing the box geometry, with approximately 1 million vertices. The level set function and ray tracing boundaries are initialized for the distance computation. The input data is stored in JSON format, and it can be accessed via DOI: \texttt{[Insert DOI]}.

\item \textbf{Output Data:} The output includes the computed distance function values at all vertices for both methods, stored in CSV format. Additionally, runtime performance logs and accuracy comparisons between the methods are included.

\item \textbf{Data Repository:} Input and output datasets, along with performance logs, are stored in a Zenodo repository and can be accessed via DOI: \texttt{[Insert DOI]}.
\end{itemize}

\paragraph{Results Summary}
The performance comparison between the two methods is summarized as follows:

RESULTS here.
\item \textbf{Output Data:} The output includes the computed distance function values at all vertices for both methods, stored in HDF5 format and statistics (performance and errors) in CSV format. Additionally, runtime performance logs and accuracy comparisons between the methods are included.

\paragraph{Challenges Identified}
The following challenges were encountered during the benchmarking process:
\begin{itemize}
\item \textbf{Ray Tracing Bottlenecks:}
\item \textbf{Parallelization Issues:}
\item \textbf{Memory Usage:}
\item \textbf{Data Repository:} Input and output datasets, along with performance logs, will be stored in a Zenodo repository~\cref{sec:arch:zenodo}
\end{itemize}

Final analysis and persectives here.

\begin{itemize}
\item \textbf{Description:} Briefly describe the benchmark case, including the problem size, target architecture (e.g., CPU, GPU), and the input data. Mention the specific goals of the benchmark (e.g., testing scalability, energy efficiency).
\item \textbf{Benchmarking Tools Used:} List the tools used for performance analysis, such as Extrae, Score-P, TAU, Vampir, or Nsight, and specify what metrics were measured (e.g., execution time, FLOPS, energy consumption).
\item \textbf{Input/Output Dataset Description:}
\begin{itemize}
\item \textbf{Input Data:} Describe the input dataset (size, format, data type) and provide a DOI or link to access it.
\item \textbf{Output Data:} Specify the structure of the results (e.g., memory usage, runtime logs) and how they can be accessed or replicated.
\item \textbf{Data Repository:} Indicate where the data is stored (e.g., Zenodo, institutional repository) and provide a DOI or URL for accessing the data.
\end{itemize}
\item \textbf{Results Summary:} Include a summary of key metrics (execution time, memory usage, FLOPS) and their comparison across architectures (e.g., CPU, GPU).
\item \textbf{Challenges Identified:} Describe any bottlenecks encountered (e.g., memory usage, parallelization inefficiencies) and how they impacted the benchmark.
\end{itemize}
This benchmark will CPU and GPU Benchmarking results. They are not yet available but will be soon once the GPU implementation is finalized.

% \paragraph{Results Summary}
% The performance comparison between the two methods is summarized as follows:
%
% RESULTS here.
%
% \paragraph{Challenges Identified}
% The following challenges were encountered during the benchmarking process:
% \begin{itemize}
% \item \textbf{Ray Tracing Bottlenecks:}
% \item \textbf{Parallelization Issues:}
% \item \textbf{Memory Usage:}
% \end{itemize}
%
% Final analysis and persectives here.
%
% \begin{itemize}
% \item \textbf{Description:} Briefly describe the benchmark case, including the problem size, target architecture (e.g., CPU, GPU), and the input data. Mention the specific goals of the benchmark (e.g., testing scalability, energy efficiency).
% \item \textbf{Benchmarking Tools Used:} List the tools used for performance analysis, such as Extrae, Score-P, TAU, Vampir, or Nsight, and specify what metrics were measured (e.g., execution time, FLOPS, energy consumption).
% \item \textbf{Input/Output Dataset Description:}
% \begin{itemize}
% \item \textbf{Input Data:} Describe the input dataset (size, format, data type) and provide a DOI or link to access it.
% \item \textbf{Output Data:} Specify the structure of the results (e.g., memory usage, runtime logs) and how they can be accessed or replicated.
% \item \textbf{Data Repository:} Indicate where the data is stored (e.g., Zenodo, institutional repository) and provide a DOI or URL for accessing the data.
% \end{itemize}
% \item \textbf{Results Summary:} Include a summary of key metrics (execution time, memory usage, FLOPS) and their comparison across architectures (e.g., CPU, GPU).
% \item \textbf{Challenges Identified:} Describe any bottlenecks encountered (e.g., memory usage, parallelization inefficiencies) and how they impacted the benchmark.
% \end{itemize}

\subsubsection{Benchmark \#2: Elliptic linear PDE : Thermal bridges}
\label{sec:WP1:Feelpp:benchmark:thermal_bridges}
Expand Down Expand Up @@ -603,6 +614,7 @@ \subsubsection{Benchmark \#2: Elliptic linear PDE : Thermal bridges}




\subsubsection{Benchmark \#2: Linear elasticity : NAFEMS LE10}
\label{sec:WP1:Feelpp:benchmark:nafems-le10}

Expand Down Expand Up @@ -889,6 +901,7 @@ \subsubsection{Benchmark \#2: Linear elasticity : NAFEMS LE10}
\item Go to extreme HPC scale : Partitioning and IO issues
\end{itemize}

\iffalse
\subsubsection{Benchmark \#2: Assemble Stiffness and Linear Elasticity Matrix}

\paragraph{Description}
Expand Down Expand Up @@ -935,11 +948,12 @@ \subsubsection{Benchmark \#2: Assemble Stiffness and Linear Elasticity Matrix}
\end{itemize}

add extra analysis and conclusion here.
\fi

\subsubsection{Benchmark \#3: Thermo-Electric Coupling}
\label{sec:WP1:Feelpp:benchmark:hl-31}

Thermo Electric coupling in a complex geometry.
%Thermo Electric coupling in a complex geometry.

\paragraph{Description}

Expand Down Expand Up @@ -1488,11 +1502,10 @@ \subsubsection{Benchmark \#4: HeatFluid Coupling}


\paragraph{Challenges Identified}
Several challenges were encountered during the benchmarking process: \textbf{??}
Several challenges were encountered during the benchmarking process:
\begin{itemize}
\item \textbf{Memory Usage:}
\item \textbf{Parallelization Inefficiencies:}
\item \textbf{Cache and Memory Bottlenecks:}
\item \textbf{Memory Usage:} Memory usage should be better monitored espcially when creating the solver and preconditioner objects.
\item \textbf{Parallelization Inefficiencies:} We need to test on large configuration in terms of mesh sizes and number of cores as well as increase the polynomial order to identify potential parallelization and I/O bottlenecks.
\end{itemize}


Expand Down
Loading

0 comments on commit 984da7f

Please sign in to comment.