Skip to content

Commit

Permalink
Added Utility/Unicode
Browse files Browse the repository at this point in the history
  • Loading branch information
captainurist committed Oct 17, 2023
1 parent b2a6a41 commit 6d4a4be
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 3 deletions.
9 changes: 6 additions & 3 deletions src/Utility/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ set(UTILITY_SOURCES
Streams/MemoryInputStream.cpp
Streams/StringOutputStream.cpp
Streams/TempFileOutputStream.cpp
String.cpp)
String.cpp
Unicode.cpp)

set(UTILITY_HEADERS
DataPath.h
Expand Down Expand Up @@ -48,7 +49,8 @@ set(UTILITY_HEADERS
Streams/StringOutputStream.h
Streams/TempFileOutputStream.h
String.h
Unaligned.h)
Unaligned.h
Unicode.h)

add_library(utility STATIC ${UTILITY_SOURCES} ${UTILITY_HEADERS})
target_check_style(utility)
Expand All @@ -66,7 +68,8 @@ if(OE_BUILD_TESTS)
Tests/IndexedArray_ut.cpp
Tests/IndexedBitset_ut.cpp
Tests/Segment_ut.cpp
Tests/String_ut.cpp)
Tests/String_ut.cpp
Tests/Unicode_ut.cpp)

add_library(test_utility OBJECT ${TEST_UTILITY_SOURCES})
target_link_libraries(test_utility PUBLIC testing_unit utility)
Expand Down
116 changes: 116 additions & 0 deletions src/Utility/Tests/Unicode_ut.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#include "Testing/Unit/UnitTest.h"

#include <cstdio>
#include <filesystem>
#include <fstream>

#include "Utility/Streams/FileOutputStream.h"
#include "Utility/Unicode.h"

static const char8_t *prefix = u8"\u0444\u0430\u0439\u043B"; // "File" in Russian.
static const char16_t *wprefix = u"\u0444\u0430\u0439\u043B"; // "File" in Russian, again.

UNIT_TEST(Unicode, fopen) {
useUtf8Locale();

std::string filename = reinterpret_cast<const char *>(prefix);
filename += "_fopen";

const char *data = "data";
size_t dataSize = 4;

FILE *f1 = fopen(filename.c_str(), "w");
EXPECT_NE(f1, nullptr);

size_t written = fwrite(data, dataSize, 1, f1);
EXPECT_EQ(written, 1);

int status = fclose(f1);
EXPECT_EQ(status, 0);

#ifdef _WINDOWS
static_assert(sizeof(wchar_t) == sizeof(char16_t));
std::wstring wfilename = reinterpret_cast<const wchar_16_t *>(wprefix);
wfilename += L"_fopen";

FILE *f2 = _wfopen(wfilename.c_str(), "r");
EXPECT_NE(f2, nullptr);

char buffer[10] = {};
size_t read = fread(buffer, dataSize, 1, f2);
EXPECT_EQ(read, 1);
EXPECT_EQ(std::string_view(buffer), std::string_view(data));

int status = fclose(f2);
EXPECT_EQ(status, 0);
#endif

// Note: using UTF-8 api directly here.
EXPECT_TRUE(std::filesystem::exists(filename));
EXPECT_TRUE(std::filesystem::remove(filename));
EXPECT_FALSE(std::filesystem::exists(filename));
}

UNIT_TEST(Unicode, filesystem_exists_remove) {
useUtf8Locale();

std::string filename = reinterpret_cast<const char *>(prefix);
filename += "_exists";

FileOutputStream s(filename);
s.write("something");
s.close();

// Note: using char* api here, expecting it to be handled as UTF-8.
EXPECT_TRUE(std::filesystem::exists(filename));
EXPECT_TRUE(std::filesystem::remove(filename));
EXPECT_FALSE(std::filesystem::exists(filename));
}

UNIT_TEST(Unicode, filesystem_rename) {
useUtf8Locale();

std::string filename = reinterpret_cast<const char *>(prefix);
filename += "_rename";
std::string filename2 = filename + "2";

FileOutputStream s(filename);
s.write("something_else");
s.close();

// Note: using char* api here, expecting it to be handled as UTF-8.
EXPECT_TRUE(std::filesystem::exists(filename));
std::filesystem::rename(filename, filename2);
EXPECT_FALSE(std::filesystem::exists(filename));
EXPECT_TRUE(std::filesystem::exists(filename2));
EXPECT_TRUE(std::filesystem::remove(filename2));
EXPECT_FALSE(std::filesystem::exists(filename2));
}

UNIT_TEST(Unicode, fstreams) {
useUtf8Locale();

std::string filename = reinterpret_cast<const char *>(prefix);
filename += "_fstreams";

const char *data = "data";
size_t dataSize = 4;

std::ofstream f1;
f1.open(filename);
f1.write(data, dataSize);
f1.close();

std::ifstream f2;
f2.open(filename);
char buffer[10] = {};
f2.read(buffer, dataSize);
f2.close();

EXPECT_EQ(std::string_view(buffer), std::string_view(data));

// Note: using UTF-8 api directly here.
EXPECT_TRUE(std::filesystem::exists(filename));
EXPECT_TRUE(std::filesystem::remove(filename));
EXPECT_FALSE(std::filesystem::exists(filename));
}
13 changes: 13 additions & 0 deletions src/Utility/Unicode.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "Unicode.h"

#ifdef _WINDOWS
# include <clocale>
# include <cassert>
#endif

void useUtf8Locale() {
#ifdef _WINDOWS
const char *localeString = setlocale(LC_ALL, ".UTF8");
assert(localeString);
#endif
}
11 changes: 11 additions & 0 deletions src/Utility/Unicode.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#pragma once

/**
* Switches to UTF-8 locale on Windows, does nothing on POSIX.
*
* Note that for this to work on older Windows versions, CRT should be statically linked. This is how OE releases
* are built right now.
*
* @see https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/setlocale-wsetlocale?view=msvc-170#utf-8-support
*/
void useUtf8Locale();

0 comments on commit 6d4a4be

Please sign in to comment.