Skip to content

Commit

Permalink
Added Utility/Unicode
Browse files Browse the repository at this point in the history
  • Loading branch information
captainurist committed Oct 17, 2023
1 parent b2a6a41 commit 239513b
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 3 deletions.
9 changes: 6 additions & 3 deletions src/Utility/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ set(UTILITY_SOURCES
Streams/MemoryInputStream.cpp
Streams/StringOutputStream.cpp
Streams/TempFileOutputStream.cpp
String.cpp)
String.cpp
Unicode.cpp)

set(UTILITY_HEADERS
DataPath.h
Expand Down Expand Up @@ -48,7 +49,8 @@ set(UTILITY_HEADERS
Streams/StringOutputStream.h
Streams/TempFileOutputStream.h
String.h
Unaligned.h)
Unaligned.h
Unicode.h)

add_library(utility STATIC ${UTILITY_SOURCES} ${UTILITY_HEADERS})
target_check_style(utility)
Expand All @@ -66,7 +68,8 @@ if(OE_BUILD_TESTS)
Tests/IndexedArray_ut.cpp
Tests/IndexedBitset_ut.cpp
Tests/Segment_ut.cpp
Tests/String_ut.cpp)
Tests/String_ut.cpp
Tests/Unicode_ut.cpp)

add_library(test_utility OBJECT ${TEST_UTILITY_SOURCES})
target_link_libraries(test_utility PUBLIC testing_unit utility)
Expand Down
116 changes: 116 additions & 0 deletions src/Utility/Tests/Unicode_ut.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#include "Testing/Unit/UnitTest.h"

#include <cstdio>
#include <filesystem>
#include <fstream>

#include "Utility/Streams/FileOutputStream.h"
#include "Utility/Unicode.h"

static const char8_t *u8prefix = u8"\u0444\u0430\u0439\u043B"; // "File" in Russian.
static const char16_t *u16prefix = u"\u0444\u0430\u0439\u043B"; // "File" in Russian, again.

UNIT_TEST(Unicode, fopen) {
const char *data = "data";
const size_t dataSize = 4;

useUtf8Locale();

std::u8string u8path = std::u8string(u8prefix) + u8"_fopen";
std::string path = reinterpret_cast<const char *>(u8path.c_str());

FILE *f1 = fopen(path.c_str(), "w");
EXPECT_NE(f1, nullptr);

size_t written = fwrite(data, dataSize, 1, f1);
EXPECT_EQ(written, 1);

int status = fclose(f1);
EXPECT_EQ(status, 0);

#ifdef _WINDOWS
static_assert(sizeof(wchar_t) == sizeof(char16_t));
std::u16string u16path = std::u16string(u16prefix) + u"_fopen";
std::wstring wpath = reinterpret_cast<const wchar_t *>(u16path.c_str());

FILE *f2 = _wfopen(wpath.c_str(), "r");
EXPECT_NE(f2, nullptr);

char buffer[10] = {};
size_t read = fread(buffer, dataSize, 1, f2);
EXPECT_EQ(read, 1);
EXPECT_EQ(std::string_view(buffer), std::string_view(data));

int status = fclose(f2);
EXPECT_EQ(status, 0);
#endif

// Note: using UTF-8 api directly here.
EXPECT_TRUE(std::filesystem::exists(u8path));
EXPECT_TRUE(std::filesystem::remove(u8path));
EXPECT_FALSE(std::filesystem::exists(u8path));
}

UNIT_TEST(Unicode, filesystem_exists_remove) {
useUtf8Locale();

std::u8string u8path = std::u8string(u8prefix) + u8"_exists";
std::string path = reinterpret_cast<const char *>(u8path.c_str());

FileOutputStream s(path);
s.write("something");
s.close();

// Note: using char * api here, expecting it to be handled as UTF-8.
EXPECT_TRUE(std::filesystem::exists(path));
EXPECT_TRUE(std::filesystem::remove(path));
EXPECT_FALSE(std::filesystem::exists(path));
}

UNIT_TEST(Unicode, filesystem_rename) {
useUtf8Locale();

std::u8string u8path = std::u8string(u8prefix) + u8"_rename";
std::string path = reinterpret_cast<const char *>(u8path.c_str());
std::string path2 = path + "2";

FileOutputStream s(path);
s.write("something_else");
s.close();

// Note: using char * api here, expecting it to be handled as UTF-8.
EXPECT_TRUE(std::filesystem::exists(path));
std::filesystem::rename(path, path2);
EXPECT_FALSE(std::filesystem::exists(path));
EXPECT_TRUE(std::filesystem::exists(path2));
EXPECT_TRUE(std::filesystem::remove(path2));
EXPECT_FALSE(std::filesystem::exists(path2));
}

UNIT_TEST(Unicode, fstreams) {
useUtf8Locale();

std::u8string u8path = std::u8string(u8prefix) + u8"_fstreams";
std::string path = reinterpret_cast<const char *>(u8path.c_str());

const char *data = "data";
size_t dataSize = 4;

std::ofstream f1;
f1.open(path);
f1.write(data, dataSize);
f1.close();

std::ifstream f2;
f2.open(path);
char buffer[10] = {};
f2.read(buffer, dataSize);
f2.close();

EXPECT_EQ(std::string_view(buffer), std::string_view(data));

// Note: using UTF-8 api directly here.
EXPECT_TRUE(std::filesystem::exists(u8path));
EXPECT_TRUE(std::filesystem::remove(u8path));
EXPECT_FALSE(std::filesystem::exists(u8path));
}
13 changes: 13 additions & 0 deletions src/Utility/Unicode.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "Unicode.h"

#ifdef _WINDOWS
# include <clocale>
# include <cassert>
#endif

void useUtf8Locale() {
#ifdef _WINDOWS
const char *localeString = setlocale(LC_ALL, ".UTF8");
assert(localeString);
#endif
}
11 changes: 11 additions & 0 deletions src/Utility/Unicode.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#pragma once

/**
* Switches to UTF-8 locale on Windows, does nothing on POSIX.
*
* Note that for this to work on older Windows versions, CRT should be statically linked. This is how OE releases
* are built right now.
*
* @see https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/setlocale-wsetlocale?view=msvc-170#utf-8-support
*/
void useUtf8Locale();

0 comments on commit 239513b

Please sign in to comment.