Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(stdlib): Add Bytes.setChar and Bytes.getChar #2215

Merged
merged 9 commits into from
Jan 2, 2025
19 changes: 19 additions & 0 deletions compiler/test/stdlib/bytes.test.gr
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,25 @@ assert Bytes.length(Bytes.empty) == 0
let bytes = Bytes.make(64)
assert Bytes.length(bytes) == 64

// Bytes.getChar
let bytes = Bytes.fromString("ab©✨🍞")
assert Bytes.getChar(0, bytes) == 'a'
assert Bytes.getChar(1, bytes) == 'b'
assert Bytes.getChar(2, bytes) == '©'
assert Bytes.getChar(4, bytes) == '✨'
assert Bytes.getChar(7, bytes) == '🍞'

// Bytes.setChar
let bytes = Bytes.make(16)
Bytes.setChar(0, 'a', bytes)
assert Bytes.getChar(0, bytes) == 'a'
Bytes.setChar(1, '©', bytes)
assert Bytes.getChar(1, bytes) == '©'
Bytes.setChar(3, '✨', bytes)
assert Bytes.getChar(3, bytes) == '✨'
Bytes.setChar(7, '🍞', bytes)
assert Bytes.getChar(7, bytes) == '🍞'

// Bytes.setInt8, Bytes.setUint8, Bytes.getInt8, Bytes.getUint8
let bytes = Bytes.make(1)
Bytes.setInt8(0, 0xffs, bytes)
Expand Down
50 changes: 8 additions & 42 deletions stdlib/buffer.gr
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@ from "runtime/unsafe/wasmi32" include WasmI32
from "runtime/unsafe/conv" include Conv
from "runtime/exception" include Exception
from "runtime/dataStructures" include DataStructures
use DataStructures.{ untagChar }
use DataStructures.{ untagChar, tagSimpleNumber }
from "int32" include Int32
from "bytes" include Bytes
from "string" include String
from "char" include Char
from "runtime/numbers" include Numbers
use Numbers.{ coerceNumberToWasmI32 }
from "runtime/utf8" include Utf8
use Utf8.{ usvEncodeLength }

abstract record Buffer {
mut len: Number,
Expand Down Expand Up @@ -389,48 +391,12 @@ provide let addString = (string, buffer) => {
*/
@unsafe
provide let addChar = (char, buffer) => {
use WasmI32.{ (-), (*), (&), (|), (>>>), ltU as (<), gtU as (>), leU as (<=) }
let usv = untagChar(char)

let bytelen = if (usv < 0x80n) {
autogrow(1, buffer)
use WasmI32.{ (+) }
let off = coerceNumberToWasmI32(buffer.len)
let dst = WasmI32.fromGrain(buffer.data) + _VALUE_OFFSET
WasmI32.store8(dst, usv, off)
1
} else {
let mut count = 0n
let mut bytelen = 0
let mut offset = 0n
if (usv <= 0x07FFn) {
count = 1n
bytelen = 2
offset = 0xC0n
} else if (usv <= 0xFFFFn) {
count = 2n
bytelen = 3
offset = 0xE0n
} else {
count = 3n
bytelen = 4
offset = 0xF0n
}
use WasmI32.{ (+) }
autogrow(bytelen, buffer)
let off = coerceNumberToWasmI32(buffer.len)
let dst = WasmI32.fromGrain(buffer.data) + _VALUE_OFFSET
WasmI32.store8(dst, (usv >>> (6n * count)) + offset, off)
let mut n = 0n
while (count > 0n) {
n += 1n
let temp = usv >>> (6n * (count - 1n))
WasmI32.store8(dst + n, 0x80n | temp & 0x3Fn, off)
count -= 1n
}
bytelen
}
buffer.len += bytelen
let byteCount = tagSimpleNumber(usvEncodeLength(usv))
autogrow(byteCount, buffer)
let index = buffer.len
buffer.len += byteCount
Bytes.setChar(index, char, buffer.data)
}

/**
Expand Down
76 changes: 75 additions & 1 deletion stdlib/bytes.gr
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ from "runtime/unsafe/wasmf64" include WasmF64
from "runtime/unsafe/conv" include Conv
from "runtime/dataStructures" include DataStructures
use DataStructures.{
tagChar,
tagInt8,
tagUint8,
tagInt16,
tagUint16,
untagChar,
untagInt8,
untagUint8,
untagInt16,
Expand All @@ -33,6 +35,14 @@ from "runtime/exception" include Exception
from "int32" include Int32
from "runtime/numbers" include Numbers
use Numbers.{ coerceNumberToWasmI32 }
from "runtime/utf8" include Utf8
use Utf8.{
utf8ByteCount,
getCodePoint,
usvEncodeLength,
writeUtf8CodePoint,
exception MalformedUnicode,
}

@unsafe
let _SIZE_OFFSET = 4n
Expand Down Expand Up @@ -396,6 +406,70 @@ provide let clear = (bytes: Bytes) => {
ignore(bytes)
}

/**
* Gets the UTF-8 encoded character at the given byte index.
*
* @param index: The byte index to access
* @param bytes: The byte sequence to access
* @returns The character that starts at the given index
*
* @throws IndexOutOfBounds: When `index` is negative
* @throws MalformedUnicode: When the requested character is not a valid UTF-8 sequence
*
* @example
* let bytes = Bytes.fromString("Hello")
* assert Bytes.getChar(0, bytes) == 'H'
*
* @since v0.7.0
*/
@unsafe
provide let getChar = (index: Number, bytes: Bytes) => {
// result
use WasmI32.{ (+), (&), (+), (==), (>) }
let ptr = WasmI32.fromGrain(bytes)
let size = getSize(ptr)
let offset = coerceNumberToWasmI32(index)
checkIndexIsInBounds(offset, 1n, size)
let byte = WasmI32.load8U(ptr + offset, _VALUE_OFFSET)
let charSize = utf8ByteCount(byte)
if (offset + charSize > size) {
throw MalformedUnicode
}
let codePoint = getCodePoint(ptr + offset + _VALUE_OFFSET)
ignore(bytes)
tagChar(codePoint)
}

/**
* UTF-8 encodes a character starting at the given byte index.
*
* @param index: The byte index to update
* @param value: The value to set
* @param bytes: The byte sequence to mutate
*
* @throws IndexOutOfBounds: When `index` is negative
* @throws IndexOutOfBounds: When `index + charSize` is greater than the bytes size, `charSize` is the number of bytes in the character ranging from 1 to 4
*
* @example
* let bytes = Bytes.make(1)
* Bytes.setChar(0, 'a', bytes)
* assert Bytes.getChar(0, bytes) == 'a'
*
* @since v0.7.0
*/
@unsafe
provide let setChar = (index: Number, value: Char, bytes: Bytes) => {
use WasmI32.{ (+) }
let ptr = WasmI32.fromGrain(bytes)
let size = getSize(ptr)
let offset = coerceNumberToWasmI32(index)
let usv = untagChar(value)
let charSize = usvEncodeLength(usv)
checkIndexIsInBounds(offset, charSize, size)
writeUtf8CodePoint(ptr + offset + _VALUE_OFFSET, usv)
ignore(bytes)
}

/**
* Gets a signed 8-bit integer starting at the given byte index.
*
Expand Down Expand Up @@ -452,8 +526,8 @@ provide let setInt8 = (index: Number, value: Int8, bytes: Bytes) => {
let offset = coerceNumberToWasmI32(index)
checkIndexIsInBounds(offset, _INT8_BYTE_SIZE, size)
let v = untagInt8(value)
ignore(bytes)
WasmI32.store8(ptr + offset, v, _VALUE_OFFSET)
ignore(bytes)
}

/**
Expand Down
79 changes: 79 additions & 0 deletions stdlib/bytes.md
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,85 @@ Bytes.clear(bytes)
assert bytes == b"\x00\x00\x00\x00\x00"
```

### Bytes.**getChar**

<details disabled>
<summary tabindex="-1">Added in <code>next</code></summary>
No other changes yet.
</details>

```grain
getChar : (index: Number, bytes: Bytes) => Char
```

Gets the UTF-8 encoded character at the given byte index.

Parameters:

|param|type|description|
|-----|----|-----------|
|`index`|`Number`|The byte index to access|
|`bytes`|`Bytes`|The byte sequence to access|

Returns:

|type|description|
|----|-----------|
|`Char`|The character that starts at the given index|

Throws:

`IndexOutOfBounds`

* When `index` is negative

`MalformedUnicode`

* When the requested character is not a valid UTF-8 sequence

Examples:

```grain
let bytes = Bytes.fromString("Hello")
assert Bytes.getChar(0, bytes) == 'H'
```

### Bytes.**setChar**

<details disabled>
<summary tabindex="-1">Added in <code>next</code></summary>
No other changes yet.
</details>

```grain
setChar : (index: Number, value: Char, bytes: Bytes) => Void
```

UTF-8 encodes a character starting at the given byte index.

Parameters:

|param|type|description|
|-----|----|-----------|
|`index`|`Number`|The byte index to update|
|`value`|`Char`|The value to set|
|`bytes`|`Bytes`|The byte sequence to mutate|

Throws:

`IndexOutOfBounds`

* When `index` is negative
* When `index + charSize` is greater than the bytes size, `charSize` is the number of bytes in the character ranging from 1 to 4

Examples:

```grain
let bytes = Bytes.make(1)
Bytes.setChar(0, 'a', bytes)
assert Bytes.getChar(0, bytes) == 'a'
```

### Bytes.**getInt8**

<details>
Expand Down
54 changes: 7 additions & 47 deletions stdlib/char.gr
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ module Char
from "runtime/unsafe/wasmi32" include WasmI32
from "runtime/dataStructures" include DataStructures
use DataStructures.{ tagSimpleNumber, tagChar, untagChar, allocateString }

exception MalformedUtf8
from "runtime/utf8" include Utf8
use Utf8.{ usvEncodeLength, writeUtf8CodePoint }

/**
* The minimum valid Unicode scalar value.
Expand Down Expand Up @@ -164,52 +164,12 @@ provide let pred = char => {
*/
@unsafe
provide let toString = (char: Char) => {
use WasmI32.{
(+),
(-),
(*),
(&),
(|),
(>>>),
ltU as (<),
gtU as (>),
leU as (<=),
}

use WasmI32.{ (+) }
let usv = untagChar(char)

let result = if (usv < 0x80n) {
let string = allocateString(1n)
WasmI32.store8(string, usv, 8n)
WasmI32.toGrain(string): String
} else {
let mut count = 0n
let mut offset = 0n
if (usv <= 0x07FFn) {
count = 1n
offset = 0xC0n
} else if (usv <= 0xFFFFn) {
count = 2n
offset = 0xE0n
} else {
count = 3n
offset = 0xF0n
}
let string = allocateString(count + 1n)
WasmI32.store8(string, (usv >>> (6n * count)) + offset, 8n)

let mut n = 0n
while (count > 0n) {
n += 1n
let temp = usv >>> (6n * (count - 1n))
WasmI32.store8(string + n, 0x80n | temp & 0x3Fn, 8n)
count -= 1n
}

WasmI32.toGrain(string): String
}

result
let byteCount = usvEncodeLength(usv)
let string = allocateString(byteCount)
writeUtf8CodePoint(string + 8n, usv)
WasmI32.toGrain(string): String
}

/**
Expand Down
Loading
Loading