From 5119a711d7743a38a53b7a8ba76c06cb4724b369 Mon Sep 17 00:00:00 2001 From: jeaye Date: Tue, 2 Jan 2024 22:41:02 -0800 Subject: [PATCH] Don't share substrings with too big a size diff --- include/cpp/jank/native_persistent_string.hpp | 9 +- src/cpp/main.cpp | 350 +----------------- 2 files changed, 14 insertions(+), 345 deletions(-) diff --git a/include/cpp/jank/native_persistent_string.hpp b/include/cpp/jank/native_persistent_string.hpp index 584cec846..c25565c8d 100644 --- a/include/cpp/jank/native_persistent_string.hpp +++ b/include/cpp/jank/native_persistent_string.hpp @@ -128,11 +128,17 @@ namespace jank if(count <= max_small_size) { init_small(s.data() + pos, count); } + /* If the size difference between our substring and its original string is too great, it's + * not worth keeping the original string alive just to share the substring. In that case, + * we deep copy. This prevents relatively small (yet still categoricall large) substrings + * from a large file keeping that whole file in memory as long as the substrings live. */ + else if((s_length - count) > max_shared_difference) + { init_large_owned(s.store.large.data + pos, count); } else { /* NOTE: Not necessarily null-terminated! */ const_cast(s).store.large.set_category(category::large_shared); - init_large_shared(s.data() + pos, count); + init_large_shared(s.store.large.data + pos, count); } } @@ -436,6 +442,7 @@ namespace jank static constexpr uint8_t last_char_index{ sizeof(large_storage) - 1 }; static constexpr uint8_t max_small_size{ last_char_index / sizeof(value_type) }; + static constexpr uint16_t max_shared_difference{ 512 }; /* The size is shifted to/from storage, to account for the 2 extra data bits. */ static constexpr uint8_t small_shift{ is_little_endian ? 0 : 2 }; static constexpr uint8_t category_extraction_mask{ is_little_endian ? 0b11000000 : 0b00000011 }; diff --git a/src/cpp/main.cpp b/src/cpp/main.cpp index b6a40f5fc..83d13aa0a 100644 --- a/src/cpp/main.cpp +++ b/src/cpp/main.cpp @@ -28,352 +28,14 @@ namespace jank { void run(util::cli::options const &opts, runtime::context &rt_ctx) { - static_cast(opts); - static_cast(rt_ctx); - //{ - // profile::timer timer{ "require clojure.core" }; - // rt_ctx.load_module("/clojure.core").expect_ok(); - //} - - //{ - // profile::timer timer{ "eval user code" }; - // std::cout << runtime::detail::to_string(rt_ctx.eval_file(opts.target_file)) << std::endl; - //} - { - ankerl::nanobench::Config config; - config.mMinEpochIterations = 10000000; - config.mOut = &std::cout; - config.mWarmup = 10000; - - auto small("foo"); - auto medium("p0aeoka13scfq4ufg27xlse0y07gjg9v29nonktptjd36jnmlfzpze4qaxztkewq8v36hivq7ieuecvjhp9myn52ubvplrq7ip62oj7qo0n2s8xqgaxc38n70jo3cwdq"); - - using std_string = std::basic_string, native_allocator>; - - std_string small_std{ small }; - native_persistent_string small_persistent{ small }; - folly::fbstring small_folly{ small }; - std_string medium_std{ medium }; - native_persistent_string medium_persistent{ medium }; - folly::fbstring medium_folly{ medium }; - - ankerl::nanobench::Bench().config(config).run - ( - "native_persistent_string small allocation", - [&] - { - native_persistent_string ret{ small }; - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "std_string small allocation", - [&] - { - std_string ret{ small }; - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "folly::string small allocation", - [&] - { - folly::fbstring ret{ small }; - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "native_persistent_string medium allocation", - [&] - { - native_persistent_string ret{ medium }; - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "std_string medium allocation", - [&] - { - std_string ret{ medium }; - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "folly::string medium allocation", - [&] - { - folly::fbstring ret{ medium }; - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - /*** Copy ctor ***/ - - ankerl::nanobench::Bench().config(config).run - ( - "native_persistent_string small copy ctor", - [&] - { - native_persistent_string ret{ small_persistent }; - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "std_string small copy ctor", - [&] - { - std_string ret{ small_std }; - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "folly::string small copy ctor", - [&] - { - folly::fbstring ret{ small_folly }; - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "native_persistent_string medium copy ctor", - [&] - { - native_persistent_string ret{ medium_persistent }; - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "std_string medium copy ctor", - [&] - { - std_string ret{ medium_std }; - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "folly::string medium copy ctor", - [&] - { - folly::fbstring ret{ medium_folly }; - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - /*** Find ***/ - - ankerl::nanobench::Bench().config(config).run - ( - "native_persistent_string small find", - [&] - { - auto const ret(small_persistent.find("fo")); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "std_string small find", - [&] - { - auto const ret(small_std.find("fo")); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "folly::string small find", - [&] - { - auto const ret(small_folly.find("fo")); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "native_persistent_string medium find", - [&] - { - auto const ret(medium_persistent.find("kewq")); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "std_string medium find", - [&] - { - auto const ret(medium_std.find("kewq")); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "folly::string medium find", - [&] - { - auto const ret(medium_folly.find("kewq")); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - /*** Substrings. ***/ - - ankerl::nanobench::Bench().config(config).run - ( - "native_persistent_string small substr", - [&] - { - auto const ret(small_persistent.substr(1)); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "std_string small substr", - [&] - { - auto const ret(small_std.substr(1)); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "folly_string small substr", - [&] - { - auto const ret(small_folly.substr(1)); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "native_persistent_string medium substr", - [&] - { - auto const ret(medium_persistent.substr(4, 100)); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "std_string medium substr", - [&] - { - auto const ret(medium_std.substr(4, 100)); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "folly_string medium substr", - [&] - { - auto const ret(medium_folly.substr(4, 100)); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - /*** Comparisons. ***/ - - std_string another_medium_std{ medium }; - native_persistent_string another_medium_persistent{ medium }; - folly::fbstring another_medium_folly{ medium }; - - auto different_medium("p0aeoka13scfq4ufg27xlse0y07gjg9v29nonktptjd36jnmlfzpze4qaxztkewq8v36hivq7ieuecvjhp9myn52ubvplrq7ip62oj7qo0n2s8xqgaxc38nXXXXXXXXX"); - std_string different_medium_std{ different_medium }; - native_persistent_string different_medium_persistent{ different_medium }; - folly::fbstring different_medium_folly{ different_medium }; - - ankerl::nanobench::Bench().config(config).run - ( - "native_persistent_string medium compare same", - [&] - { - auto const ret(medium_persistent == another_medium_persistent); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "std_string medium compare same", - [&] - { - auto const ret(medium_std == another_medium_std); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "folly_string medium compare same", - [&] - { - auto const ret(medium_folly == another_medium_folly); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "native_persistent_string medium compare different", - [&] - { - auto const ret(medium_persistent == different_medium_persistent); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); - - ankerl::nanobench::Bench().config(config).run - ( - "std_string medium compare different", - [&] - { - auto const ret(medium_std == different_medium_std); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); + profile::timer timer{ "require clojure.core" }; + rt_ctx.load_module("/clojure.core").expect_ok(); + } - ankerl::nanobench::Bench().config(config).run - ( - "folly_string medium compare different", - [&] - { - auto const ret(medium_folly == different_medium_folly); - ankerl::nanobench::doNotOptimizeAway(ret); - } - ); + { + profile::timer timer{ "eval user code" }; + std::cout << runtime::detail::to_string(rt_ctx.eval_file(opts.target_file)) << std::endl; } }