-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathutf8.js
72 lines (60 loc) · 1.84 KB
/
utf8.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
/* jshint esversion: 8 */
/* jshint node: true */
"use strict";
/**
* @function encode_utf8
* @description Taken from [fast-text-encoding]{@link https://github.com/samthor/fast-text-encoding}
* @param {string} string
* @returns {Uint8Array}
*/
function encode_utf8(string) {
const len = string.length;
let pos = 0;
let at = 0; // output position
let tlen = Math.max(32, len + (len >>> 1) + 7);
let target = new Uint8Array((tlen >>> 3) << 3);
while (pos < len) {
let value = string.charCodeAt(pos++);
if (value >= 0xd800 && value <= 0xdbff) {
if (pos < len) {
let extra = string.charCodeAt(pos);
if ((extra & 0xfc00) === 0xdc00) {
++pos;
value = ((value & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000;
}
}
if (value >= 0xd800 && value <= 0xdbff) {
continue; // drop lone surrogate
}
}
// expand the buffer if we couldn't write 4 bytes
if (at > target.byteLength - 4) {
tlen += 8;
tlen *= 1.0 + (pos / len) * 2;
tlen = (tlen >>> 3) << 3;
target = target.slice(0, tlen);
}
if ((value & 0xffffff80) === 0) {
// 1-byte
target[at++] = value; // ASCII
continue;
} else if ((value & 0xfffff800) === 0) {
// 2-byte
target[at++] = ((value >>> 6) & 0x1f) | 0xc0;
} else if ((value & 0xffff0000) === 0) {
// 3-byte
target[at++] = ((value >>> 12) & 0x0f) | 0xe0;
target[at++] = ((value >>> 6) & 0x3f) | 0x80;
} else if ((value & 0xffe00000) === 0) {
// 4-byte
target[at++] = ((value >>> 18) & 0x07) | 0xf0;
target[at++] = ((value >>> 12) & 0x3f) | 0x80;
target[at++] = ((value >>> 6) & 0x3f) | 0x80;
} else {
continue; // out of range
}
target[at++] = (value & 0x3f) | 0x80;
}
return target.slice(0, at);
}
module.exports = { encode_utf8 };