Set # of threads to use performant cores (#2352)

Summary: Pull Request resolved: #2352 When using all cores, slower ones are dragging the performance down by blocking large cores. Perhaps when we have uarch specific implementation, we may not need this, but this tool is useful in general until we have better API //unrelated failures bypass-github-export-checks ghstack-source-id: 218918368 exported-using-ghexport Reviewed By: digantdesai, kirklandsign Differential Revision: D54766071 fbshipit-source-id: f49afc2afeb16f73ebc10b43c97d1f4a97e3d191
pytorch · Mar 16, 2024 · 391c498 · 391c498
1 parent 86b9ff7
commit 391c498
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 0 deletions.
diff --git a/examples/models/llama2/main.cpp b/examples/models/llama2/main.cpp
@@ -10,6 +10,11 @@
 
 #include <executorch/examples/models/llama2/runner/runner.h>
 
+#if defined(ET_USE_THREADPOOL)
+#include <executorch/backends/xnnpack/threadpool/cpuinfo_utils.h>
+#include <executorch/backends/xnnpack/threadpool/threadpool.h>
+#endif
+
 DEFINE_string(
     model_path,
     "llama2.pte",
@@ -45,6 +50,14 @@ int32_t main(int32_t argc, char** argv) {
 
   int32_t seq_len = FLAGS_seq_len;
 
+#if defined(ET_USE_THREADPOOL)
+  uint32_t num_performant_cores =
+      torch::executorch::cpuinfo::get_num_performant_cores();
+  ET_LOG(
+      Info, "Resetting threadpool with num threads = %d", num_performant_cores);
+  torch::executorch::threadpool::get_threadpool()->_unsafe_reset_threadpool(
+      num_performant_cores);
+#endif
   // create llama runner
   ::torch::executor::Runner runner(model_path, tokenizer_path, temperature);
 

diff --git a/examples/models/llama2/targets.bzl b/examples/models/llama2/targets.bzl
@@ -16,6 +16,8 @@ def define_common_targets():
                 deps = [
                     "//executorch/examples/models/llama2/runner:runner" + aten_suffix,
                     "//executorch/extension/evalue_util:print_evalue",
+                    "//executorch/backends/xnnpack/threadpool:threadpool",
+                    "//executorch/backends/xnnpack/threadpool:cpuinfo_utils",
                 ],
                 external_deps = [
                     "gflags",