From c84e8ff5de2e97532a8c20bd24a080375f80c60a Mon Sep 17 00:00:00 2001 From: captainurist <73941350+captainurist@users.noreply.github.com> Date: Tue, 17 Oct 2023 13:02:18 +0100 Subject: [PATCH] Added Utility/Unicode --- src/Utility/CMakeLists.txt | 9 ++- src/Utility/Tests/Unicode_ut.cpp | 116 +++++++++++++++++++++++++++++++ src/Utility/Unicode.cpp | 13 ++++ src/Utility/Unicode.h | 11 +++ 4 files changed, 146 insertions(+), 3 deletions(-) create mode 100644 src/Utility/Tests/Unicode_ut.cpp create mode 100644 src/Utility/Unicode.cpp create mode 100644 src/Utility/Unicode.h diff --git a/src/Utility/CMakeLists.txt b/src/Utility/CMakeLists.txt index de6f54229fd6..1233e2364d6f 100644 --- a/src/Utility/CMakeLists.txt +++ b/src/Utility/CMakeLists.txt @@ -14,7 +14,8 @@ set(UTILITY_SOURCES Streams/MemoryInputStream.cpp Streams/StringOutputStream.cpp Streams/TempFileOutputStream.cpp - String.cpp) + String.cpp + Unicode.cpp) set(UTILITY_HEADERS DataPath.h @@ -48,7 +49,8 @@ set(UTILITY_HEADERS Streams/StringOutputStream.h Streams/TempFileOutputStream.h String.h - Unaligned.h) + Unaligned.h + Unicode.h) add_library(utility STATIC ${UTILITY_SOURCES} ${UTILITY_HEADERS}) target_check_style(utility) @@ -66,7 +68,8 @@ if(OE_BUILD_TESTS) Tests/IndexedArray_ut.cpp Tests/IndexedBitset_ut.cpp Tests/Segment_ut.cpp - Tests/String_ut.cpp) + Tests/String_ut.cpp + Tests/Unicode_ut.cpp) add_library(test_utility OBJECT ${TEST_UTILITY_SOURCES}) target_link_libraries(test_utility PUBLIC testing_unit utility) diff --git a/src/Utility/Tests/Unicode_ut.cpp b/src/Utility/Tests/Unicode_ut.cpp new file mode 100644 index 000000000000..fdc608f5f383 --- /dev/null +++ b/src/Utility/Tests/Unicode_ut.cpp @@ -0,0 +1,116 @@ +#include +#include +#include + +#include "Testing/Unit/UnitTest.h" + +#include "Utility/Streams/FileOutputStream.h" +#include "Utility/Unicode.h" + +static const char8_t *u8prefix = u8"\u0444\u0430\u0439\u043B"; // "File" in Russian. +static const char16_t *u16prefix = u"\u0444\u0430\u0439\u043B"; // "File" in Russian, again. + +UNIT_TEST(Unicode, fopen) { + const char *data = "data"; + const size_t dataSize = 4; + + useUtf8Locale(); + + std::u8string u8path = std::u8string(u8prefix) + u8"_fopen"; + std::string path = reinterpret_cast(u8path.c_str()); + + FILE *f1 = fopen(path.c_str(), "w"); + EXPECT_NE(f1, nullptr); + + size_t written = fwrite(data, dataSize, 1, f1); + EXPECT_EQ(written, 1); + + int status = fclose(f1); + EXPECT_EQ(status, 0); + +#ifdef _WINDOWS + static_assert(sizeof(wchar_t) == sizeof(char16_t)); + std::u16string u16path = std::u16string(u16prefix) + u"_fopen"; + std::wstring wpath = reinterpret_cast(u16path.c_str()); + + FILE *f2 = _wfopen(wpath.c_str(), L"r"); + EXPECT_NE(f2, nullptr); + + char buffer[10] = {}; + size_t read = fread(buffer, dataSize, 1, f2); + EXPECT_EQ(read, 1); + EXPECT_EQ(std::string_view(buffer), std::string_view(data)); + + int status2 = fclose(f2); + EXPECT_EQ(status2, 0); +#endif + + // Note: using UTF-8 api directly here. + EXPECT_TRUE(std::filesystem::exists(u8path)); + EXPECT_TRUE(std::filesystem::remove(u8path)); + EXPECT_FALSE(std::filesystem::exists(u8path)); +} + +UNIT_TEST(Unicode, filesystem_exists_remove) { + useUtf8Locale(); + + std::u8string u8path = std::u8string(u8prefix) + u8"_exists"; + std::string path = reinterpret_cast(u8path.c_str()); + + FileOutputStream s(path); + s.write("something"); + s.close(); + + // Note: using char * api here, expecting it to be handled as UTF-8. + EXPECT_TRUE(std::filesystem::exists(path)); + EXPECT_TRUE(std::filesystem::remove(path)); + EXPECT_FALSE(std::filesystem::exists(path)); +} + +UNIT_TEST(Unicode, filesystem_rename) { + useUtf8Locale(); + + std::u8string u8path = std::u8string(u8prefix) + u8"_rename"; + std::string path = reinterpret_cast(u8path.c_str()); + std::string path2 = path + "2"; + + FileOutputStream s(path); + s.write("something_else"); + s.close(); + + // Note: using char * api here, expecting it to be handled as UTF-8. + EXPECT_TRUE(std::filesystem::exists(path)); + std::filesystem::rename(path, path2); + EXPECT_FALSE(std::filesystem::exists(path)); + EXPECT_TRUE(std::filesystem::exists(path2)); + EXPECT_TRUE(std::filesystem::remove(path2)); + EXPECT_FALSE(std::filesystem::exists(path2)); +} + +UNIT_TEST(Unicode, fstreams) { + useUtf8Locale(); + + std::u8string u8path = std::u8string(u8prefix) + u8"_fstreams"; + std::string path = reinterpret_cast(u8path.c_str()); + + const char *data = "data"; + size_t dataSize = 4; + + std::ofstream f1; + f1.open(path); + f1.write(data, dataSize); + f1.close(); + + std::ifstream f2; + f2.open(path); + char buffer[10] = {}; + f2.read(buffer, dataSize); + f2.close(); + + EXPECT_EQ(std::string_view(buffer), std::string_view(data)); + + // Note: using UTF-8 api directly here. + EXPECT_TRUE(std::filesystem::exists(u8path)); + EXPECT_TRUE(std::filesystem::remove(u8path)); + EXPECT_FALSE(std::filesystem::exists(u8path)); +} diff --git a/src/Utility/Unicode.cpp b/src/Utility/Unicode.cpp new file mode 100644 index 000000000000..8af04f110db4 --- /dev/null +++ b/src/Utility/Unicode.cpp @@ -0,0 +1,13 @@ +#include "Unicode.h" + +#ifdef _WINDOWS +# include +# include +#endif + +void useUtf8Locale() { +#ifdef _WINDOWS + const char *localeString = setlocale(LC_ALL, ".UTF8"); + assert(localeString); +#endif +} diff --git a/src/Utility/Unicode.h b/src/Utility/Unicode.h new file mode 100644 index 000000000000..616f28c0b8f0 --- /dev/null +++ b/src/Utility/Unicode.h @@ -0,0 +1,11 @@ +#pragma once + +/** + * Switches to UTF-8 locale on Windows, does nothing on POSIX. + * + * Note that for this to work on older Windows versions, CRT should be statically linked. This is how OE releases + * are built right now. + * + * @see https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/setlocale-wsetlocale?view=msvc-170#utf-8-support + */ +void useUtf8Locale();