This repository has been archived by the owner on Jun 1, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
stringreader.gr
144 lines (124 loc) · 3.86 KB
/
stringreader.gr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import WasmI32 from "runtime/unsafe/wasmi32"
import { coerceNumberToWasmI32 } from "runtime/numbers"
import { tagSimpleNumber } from "runtime/dataStructures"
import Memory from "runtime/unsafe/memory"
export record StringReader {
string: String,
mut charPosition: Number,
mut bytePosition: Number,
}
export let make = (string: String) => {
{ string, charPosition: 0, bytePosition: 0 }: StringReader
}
exception MalformedUnicode
// getCodePoint function copied from stdlib
@disableGC
let getCodePoint = (ptr: WasmI32) => {
// Algorithm from https://encoding.spec.whatwg.org/#utf-8-decoder
let (+) = WasmI32.add
let (==) = WasmI32.eq
let (>=) = WasmI32.geU
let (<=) = WasmI32.leU
let (<<) = WasmI32.shl
let (&) = WasmI32.and
let (|) = WasmI32.or
let mut codePoint = 0n
let mut bytesSeen = 0n
let mut bytesNeeded = 0n
let mut lowerBoundary = 0x80n
let mut upperBoundary = 0xBFn
let mut offset = 0n
let mut result = 0n
while (true) {
let byte = WasmI32.load8U(ptr + offset, 0n)
offset += 1n
if (bytesNeeded == 0n) {
if (byte >= 0x00n && byte <= 0x7Fn) {
result = byte
break
} else if (byte >= 0xC2n && byte <= 0xDFn) {
bytesNeeded = 1n
codePoint = byte & 0x1Fn
} else if (byte >= 0xE0n && byte <= 0xEFn) {
if (byte == 0xE0n) lowerBoundary = 0xA0n
if (byte == 0xEDn) upperBoundary = 0x9Fn
bytesNeeded = 2n
codePoint = byte & 0xFn
} else if (byte >= 0xF0n && byte <= 0xF4n) {
if (byte == 0xF0n) lowerBoundary = 0x90n
if (byte == 0xF4n) upperBoundary = 0x8Fn
bytesNeeded = 3n
codePoint = byte & 0x7n
} else {
throw MalformedUnicode
}
continue
}
if (!(lowerBoundary <= byte && byte <= upperBoundary)) {
throw MalformedUnicode
}
lowerBoundary = 0x80n
upperBoundary = 0xBFn
codePoint = codePoint << 6n | byte & 0x3Fn
bytesSeen += 1n
if (bytesSeen == bytesNeeded) {
result = codePoint
break
}
}
result: WasmI32
}
@disableGC
export let rec readCodePointFast = (reader: StringReader) => {
let (>>>) = WasmI32.shrU
let (+) = WasmI32.add
let (-) = WasmI32.sub
let (&) = WasmI32.and
let (<) = WasmI32.ltU
let (<=) = WasmI32.leU
let (==) = WasmI32.eq
let strPtr = WasmI32.fromGrain(reader.string)
let byteSize = WasmI32.load(strPtr, 4n)
let bytePosition = coerceNumberToWasmI32(reader.bytePosition)
let charPosition = coerceNumberToWasmI32(reader.charPosition)
let ptr = WasmI32.add(WasmI32.add(strPtr, 8n), bytePosition)
let mut idx = 0n
let result = if (bytePosition < byteSize) {
let byte = WasmI32.load8U(ptr, 0n)
let codePointByteCount = if ((byte & 0x80n) == 0x00n) {
1n
} else if ((byte & 0xF0n) == 0xF0n) {
4n
} else if ((byte & 0xE0n) == 0xE0n) {
3n
} else {
2n
}
// Note that even if up to 4 bytes are needed to represent Unicode
// codepoints, this doesn't mean 32 bits. The highest allowed code point is
// 0x10FFFF and it should not change in future versions of Unicode. This
// means no more than 21 bits are necessary to represent a code point and
// thus we can use Grain's "simple" numbers that hold up to 31 bits and
// avoid heap allocations. `getCodePoint` will throw
// MalformedUnicode exception for values exceeding this limit.
let codePoint = getCodePoint(ptr)
reader.bytePosition = tagSimpleNumber(
WasmI32.add(bytePosition, codePointByteCount)
)
reader.charPosition = tagSimpleNumber(WasmI32.add(charPosition, 1n))
tagSimpleNumber(codePoint)
} else {
-1
}
Memory.decRef(WasmI32.fromGrain(reader))
Memory.decRef(WasmI32.fromGrain(readCodePointFast))
result
}
export let readCodePoint = (reader: StringReader) => {
let codePoint = readCodePointFast(reader)
if (codePoint >= 0) {
Some(codePoint)
} else {
None
}
}