This repository has been archived by the owner on Sep 2, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdecodeutf8.h
60 lines (55 loc) · 1.59 KB
/
decodeutf8.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
/**
* @brief Decode UTF8 to codepoints.
*
* This is a simple implementation of a UTF-8 decoder with an
* equivalent API to the older third-party (and much cooler...)
* version that SiriusOS used to use. Keep feeding it bytes and
* will eventually set *codep to a codepoint. Should also be able
* to detect bad UTF-8.
*
* @copyright
* This file is part of SiriusOS and is released under the terms
* of the NCSA / University of Illinois License - see LICENSE.md
* Copyright (C) 2018 K. Lange
*/
#include <stdint.h>
#define UTF8_ACCEPT 0
#define UTF8_REJECT 1
/**
* Conceptually similar to its predecessor, this implementation is much
* less cool, as it uses three separate state tables and more shifts.
*/
static inline uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
static int state_table[32] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xxxxxxx */
1,1,1,1,1,1,1,1, /* 10xxxxxx */
2,2,2,2, /* 110xxxxx */
3,3, /* 1110xxxx */
4, /* 11110xxx */
1 /* 11111xxx */
};
static int mask_bytes[32] = {
0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,
0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x1F,0x1F,0x1F,0x1F,
0x0F,0x0F,
0x07,
0x00
};
static int next[5] = {
0,
1,
0,
2,
3
};
if (*state == UTF8_ACCEPT) {
*codep = byte & mask_bytes[byte >> 3];
*state = state_table[byte >> 3];
} else if (*state > 0) {
*codep = (byte & 0x3F) | (*codep << 6);
*state = next[*state];
}
return *state;
}