From 4d6929e0e059c7735b5023095a94120a3d833dd6 Mon Sep 17 00:00:00 2001 From: David Fang Date: Tue, 30 Jun 2020 10:29:49 -0700 Subject: [PATCH] Refactor out SplitLines. Avoid repeating logic surrounding absl::StrSplit, in particular handling around presence/absence of terminating '\n'. PiperOrigin-RevId: 319048981 --- common/strings/BUILD | 2 ++ common/strings/split.cc | 35 +++++++++++++++++++++++ common/strings/split.h | 6 ++++ common/strings/split_test.cc | 54 ++++++++++++++++++++++++++++++++++++ 4 files changed, 97 insertions(+) create mode 100644 common/strings/split.cc diff --git a/common/strings/BUILD b/common/strings/BUILD index 0e066c35d..cd9ee9394 100644 --- a/common/strings/BUILD +++ b/common/strings/BUILD @@ -193,6 +193,7 @@ cc_test( cc_library( name = "split", + srcs = ["split.cc"], hdrs = ["split.h"], deps = [ "@com_google_absl//absl/strings", @@ -203,6 +204,7 @@ cc_test( name = "split_test", srcs = ["split_test.cc"], deps = [ + ":range", ":split", "//common/util:range", "@com_google_googletest//:gtest_main", diff --git a/common/strings/split.cc b/common/strings/split.cc new file mode 100644 index 000000000..cc8ee931e --- /dev/null +++ b/common/strings/split.cc @@ -0,0 +1,35 @@ +// Copyright 2017-2020 The Verible Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "common/strings/split.h" + +#include + +#include "absl/strings/str_split.h" + +namespace verible { + +std::vector SplitLines(absl::string_view text) { + if (text.empty()) return std::vector(); + std::vector lines( + absl::StrSplit(text, absl::ByChar('\n'))); + // If text ends cleanly with a \n, omit the last blank split, + // otherwise treat it as if the trailing text ends with a \n. + if (text.back() == '\n') { + lines.pop_back(); + } + return lines; +} + +} // namespace verible diff --git a/common/strings/split.h b/common/strings/split.h index b15bcdf2e..7c6153a10 100644 --- a/common/strings/split.h +++ b/common/strings/split.h @@ -16,6 +16,7 @@ #define VERIBLE_COMMON_STRINGS_SPLIT_H_ #include +#include #include "absl/strings/string_view.h" @@ -108,6 +109,11 @@ std::function MakeStringSpliterator( return [=]() mutable /* splitter */ { return splitter(delimiter); }; } +// Returns line-based view of original text. +// If original text did not terminate with a \n, interpret the final partial +// line as a whole line. +std::vector SplitLines(absl::string_view text); + } // namespace verible #endif // VERIBLE_COMMON_STRINGS_SPLIT_H_ diff --git a/common/strings/split_test.cc b/common/strings/split_test.cc index 9e59e58ed..c327f20ea 100644 --- a/common/strings/split_test.cc +++ b/common/strings/split_test.cc @@ -16,11 +16,14 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "common/strings/range.h" #include "common/util/range.h" namespace verible { namespace { +using ::testing::ElementsAre; + static void AcceptFunctionChar(std::function func) {} static void AcceptFunctionStringView( std::function func) {} @@ -115,5 +118,56 @@ TEST(StringSpliteratorTest, CommaBabyCommaOverBaby) { EXPECT_TRUE(BoundsEqual(gen(), csv_row.substr(10, 2))); } +using IntPair = std::pair; + +// For testing purposes, directly compare the substring indices, +// which is a stronger check than string contents comparison. +static std::vector SplitLinesToOffsets(absl::string_view text) { + std::vector offsets; + for (const auto& line : SplitLines(text)) { + offsets.push_back(SubstringOffsets(line, text)); + } + return offsets; +} + +TEST(SplitLinesTest, Empty) { + constexpr absl::string_view text(""); + const auto lines = SplitLines(text); + EXPECT_TRUE(lines.empty()); +} + +TEST(SplitLinesTest, OneSpace) { + constexpr absl::string_view text(" "); + EXPECT_THAT(SplitLines(text), ElementsAre(" ")); + EXPECT_THAT(SplitLinesToOffsets(text), ElementsAre(IntPair(0, 1))); +} + +TEST(SplitLinesTest, OneBlankLine) { + constexpr absl::string_view text("\n"); + EXPECT_THAT(SplitLines(text), ElementsAre("")); + EXPECT_THAT(SplitLinesToOffsets(text), ElementsAre(IntPair(0, 0))); +} + +TEST(SplitLinesTest, BlankLines) { + constexpr absl::string_view text("\n\n\n"); + EXPECT_THAT(SplitLines(text), ElementsAre("", "", "")); + EXPECT_THAT(SplitLinesToOffsets(text), + ElementsAre(IntPair(0, 0), IntPair(1, 1), IntPair(2, 2))); +} + +TEST(SplitLinesTest, NonBlankLines) { + constexpr absl::string_view text("a\nbc\ndef\n"); + EXPECT_THAT(SplitLines(text), ElementsAre("a", "bc", "def")); + EXPECT_THAT(SplitLinesToOffsets(text), + ElementsAre(IntPair(0, 1), IntPair(2, 4), IntPair(5, 8))); +} + +TEST(SplitLinesTest, NonBlankLinesUnterminated) { + constexpr absl::string_view text("abc\nde\nf"); // no \n at the end + EXPECT_THAT(SplitLines(text), ElementsAre("abc", "de", "f")); + EXPECT_THAT(SplitLinesToOffsets(text), + ElementsAre(IntPair(0, 3), IntPair(4, 6), IntPair(7, 8))); +} + } // namespace } // namespace verible