-
Notifications
You must be signed in to change notification settings - Fork 18
/
unicodestring2utf8.c
54 lines (46 loc) · 1.3 KB
/
unicodestring2utf8.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# include <stdio.h>
# include <ctype.h>
# include <string.h>
# include <stdlib.h>
# include "utf8.h"
uint32_t hex2int(char c)
{
if (c >= '0' && c <= '9') {
return c - '0';
} else {
return c - 'a' + 10;
}
}
char *parse(const char *input)
{
size_t input_size = strlen(input);
char *output = malloc(input_size + 1);
size_t inpos = 0;
size_t outpos = 0;
while (input[inpos] != 0) {
if (input_size - inpos >= 6 && input[inpos] == '\\' && input[inpos + 1] == 'u' && isxdigit(input[inpos + 2]) && \
isxdigit(input[inpos + 3]) && isxdigit(input[inpos + 4]) && isxdigit(input[inpos + 5])) {
ucs4_t uc = (hex2int(input[inpos + 2]) << 12) + (hex2int(input[inpos + 3]) << 8) + (hex2int(input[inpos + 4]) << 4) + (hex2int(input[inpos + 5]));
ucs4_t us[2];
us[0] = uc;
us[1] = 0;
outpos += u8encode(us, output + outpos, 4, NULL);
inpos += 6;
} else {
output[outpos++] = input[inpos++];
}
}
output[outpos] = 0;
return output;
}
int main()
{
char *line = NULL;
size_t buf_size = 0;
while (getline(&line, &buf_size, stdin) != -1) {
char *output = parse(line);
printf("%s", output);
free(output);
}
return 0;
}