-
Notifications
You must be signed in to change notification settings - Fork 1
/
utf8_unicode.cpp
53 lines (44 loc) · 1.18 KB
/
utf8_unicode.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#include <string>
#include <vector>
#include "utf8_unicode.hpp"
using std::string;
using std::vector;
// https://github.com/Meteorix/pylcs
vector<string> utf8_split(const string &str){
vector<string> split;
int len = str.length();
int left = 0;
int right = 1;
for (int i = 0; i < len; i++){
if (right >= len || ((str[right] & 0xc0) != 0x80)){
string s = str.substr(left, right - left);
split.push_back(s);
// printf("%s %d %d\n", s.c_str(), left, right);
left = right;
}
right ++;
}
return split;
}
// L. Schiffmann, July 2021
// This function returns the true size of a string in any utf8 case
int utf8_length(const string &str){
return utf8_split(str).size();
}
// L. Schiffmann, July 2021
string vect2str(const vector<string> &v) {
string s="";
for (auto e: v){
s+=e;
}
return s;
}
// L. Schiffmann, July 2021
// https://www.techiedelight.com/get-slice-sub-vector-from-vector-cpp/
vector<string> slice(vector<string> const &v, int m, int n)
{
auto first = v.cbegin() + m;
auto last = v.cbegin() + n + 1;
std::vector<string> vec(first, last);
return vec;
}