Skip to content

Commit

Permalink
Don't share substrings with too big a size diff
Browse files Browse the repository at this point in the history
  • Loading branch information
jeaye committed Jan 3, 2024
1 parent 66c2b2c commit 5119a71
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 345 deletions.
9 changes: 8 additions & 1 deletion include/cpp/jank/native_persistent_string.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,17 @@ namespace jank

if(count <= max_small_size)
{ init_small(s.data() + pos, count); }
/* If the size difference between our substring and its original string is too great, it's
* not worth keeping the original string alive just to share the substring. In that case,
* we deep copy. This prevents relatively small (yet still categoricall large) substrings
* from a large file keeping that whole file in memory as long as the substrings live. */
else if((s_length - count) > max_shared_difference)
{ init_large_owned(s.store.large.data + pos, count); }
else
{
/* NOTE: Not necessarily null-terminated! */
const_cast<native_persistent_string&>(s).store.large.set_category(category::large_shared);
init_large_shared(s.data() + pos, count);
init_large_shared(s.store.large.data + pos, count);
}
}

Expand Down Expand Up @@ -436,6 +442,7 @@ namespace jank

static constexpr uint8_t last_char_index{ sizeof(large_storage) - 1 };
static constexpr uint8_t max_small_size{ last_char_index / sizeof(value_type) };
static constexpr uint16_t max_shared_difference{ 512 };
/* The size is shifted to/from storage, to account for the 2 extra data bits. */
static constexpr uint8_t small_shift{ is_little_endian ? 0 : 2 };
static constexpr uint8_t category_extraction_mask{ is_little_endian ? 0b11000000 : 0b00000011 };
Expand Down
350 changes: 6 additions & 344 deletions src/cpp/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,352 +28,14 @@ namespace jank
{
void run(util::cli::options const &opts, runtime::context &rt_ctx)
{
static_cast<void>(opts);
static_cast<void>(rt_ctx);
//{
// profile::timer timer{ "require clojure.core" };
// rt_ctx.load_module("/clojure.core").expect_ok();
//}

//{
// profile::timer timer{ "eval user code" };
// std::cout << runtime::detail::to_string(rt_ctx.eval_file(opts.target_file)) << std::endl;
//}

{
ankerl::nanobench::Config config;
config.mMinEpochIterations = 10000000;
config.mOut = &std::cout;
config.mWarmup = 10000;

auto small("foo");
auto medium("p0aeoka13scfq4ufg27xlse0y07gjg9v29nonktptjd36jnmlfzpze4qaxztkewq8v36hivq7ieuecvjhp9myn52ubvplrq7ip62oj7qo0n2s8xqgaxc38n70jo3cwdq");

using std_string = std::basic_string<char, std::char_traits<char>, native_allocator<char>>;

std_string small_std{ small };
native_persistent_string small_persistent{ small };
folly::fbstring small_folly{ small };
std_string medium_std{ medium };
native_persistent_string medium_persistent{ medium };
folly::fbstring medium_folly{ medium };

ankerl::nanobench::Bench().config(config).run
(
"native_persistent_string small allocation",
[&]
{
native_persistent_string ret{ small };
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"std_string small allocation",
[&]
{
std_string ret{ small };
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"folly::string small allocation",
[&]
{
folly::fbstring ret{ small };
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"native_persistent_string medium allocation",
[&]
{
native_persistent_string ret{ medium };
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"std_string medium allocation",
[&]
{
std_string ret{ medium };
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"folly::string medium allocation",
[&]
{
folly::fbstring ret{ medium };
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

/*** Copy ctor ***/

ankerl::nanobench::Bench().config(config).run
(
"native_persistent_string small copy ctor",
[&]
{
native_persistent_string ret{ small_persistent };
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"std_string small copy ctor",
[&]
{
std_string ret{ small_std };
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"folly::string small copy ctor",
[&]
{
folly::fbstring ret{ small_folly };
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"native_persistent_string medium copy ctor",
[&]
{
native_persistent_string ret{ medium_persistent };
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"std_string medium copy ctor",
[&]
{
std_string ret{ medium_std };
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"folly::string medium copy ctor",
[&]
{
folly::fbstring ret{ medium_folly };
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

/*** Find ***/

ankerl::nanobench::Bench().config(config).run
(
"native_persistent_string small find",
[&]
{
auto const ret(small_persistent.find("fo"));
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"std_string small find",
[&]
{
auto const ret(small_std.find("fo"));
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"folly::string small find",
[&]
{
auto const ret(small_folly.find("fo"));
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"native_persistent_string medium find",
[&]
{
auto const ret(medium_persistent.find("kewq"));
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"std_string medium find",
[&]
{
auto const ret(medium_std.find("kewq"));
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"folly::string medium find",
[&]
{
auto const ret(medium_folly.find("kewq"));
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

/*** Substrings. ***/

ankerl::nanobench::Bench().config(config).run
(
"native_persistent_string small substr",
[&]
{
auto const ret(small_persistent.substr(1));
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"std_string small substr",
[&]
{
auto const ret(small_std.substr(1));
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"folly_string small substr",
[&]
{
auto const ret(small_folly.substr(1));
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"native_persistent_string medium substr",
[&]
{
auto const ret(medium_persistent.substr(4, 100));
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"std_string medium substr",
[&]
{
auto const ret(medium_std.substr(4, 100));
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"folly_string medium substr",
[&]
{
auto const ret(medium_folly.substr(4, 100));
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

/*** Comparisons. ***/

std_string another_medium_std{ medium };
native_persistent_string another_medium_persistent{ medium };
folly::fbstring another_medium_folly{ medium };

auto different_medium("p0aeoka13scfq4ufg27xlse0y07gjg9v29nonktptjd36jnmlfzpze4qaxztkewq8v36hivq7ieuecvjhp9myn52ubvplrq7ip62oj7qo0n2s8xqgaxc38nXXXXXXXXX");
std_string different_medium_std{ different_medium };
native_persistent_string different_medium_persistent{ different_medium };
folly::fbstring different_medium_folly{ different_medium };

ankerl::nanobench::Bench().config(config).run
(
"native_persistent_string medium compare same",
[&]
{
auto const ret(medium_persistent == another_medium_persistent);
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"std_string medium compare same",
[&]
{
auto const ret(medium_std == another_medium_std);
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"folly_string medium compare same",
[&]
{
auto const ret(medium_folly == another_medium_folly);
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"native_persistent_string medium compare different",
[&]
{
auto const ret(medium_persistent == different_medium_persistent);
ankerl::nanobench::doNotOptimizeAway(ret);
}
);

ankerl::nanobench::Bench().config(config).run
(
"std_string medium compare different",
[&]
{
auto const ret(medium_std == different_medium_std);
ankerl::nanobench::doNotOptimizeAway(ret);
}
);
profile::timer timer{ "require clojure.core" };
rt_ctx.load_module("/clojure.core").expect_ok();
}

ankerl::nanobench::Bench().config(config).run
(
"folly_string medium compare different",
[&]
{
auto const ret(medium_folly == different_medium_folly);
ankerl::nanobench::doNotOptimizeAway(ret);
}
);
{
profile::timer timer{ "eval user code" };
std::cout << runtime::detail::to_string(rt_ctx.eval_file(opts.target_file)) << std::endl;
}
}

Expand Down

0 comments on commit 5119a71

Please sign in to comment.