From 4e8a97c949c139049fcfd176017854a7117e2a9d Mon Sep 17 00:00:00 2001
From: Joseph Burton <burtonjae@hotmail.co.uk>
Date: Fri, 6 Oct 2023 02:49:39 +0100
Subject: [PATCH] Implement JavaString (#540)

# Objective

- Solve the problem that NBT cannot be loaded when strings contain
invalid UTF-16, such as in ban books.

# Solution

- This is the first part of the solution to this problem, a new string
implementation which is tolerant of invalid UTF-16. See the added readme
for details.
- This allows for round-trip, useful manipulation of strings which may
not be fully valid.
- This solution is widely applicable outside of Valence when you have to
deal with arbitrary Java strings, such as when manipulating class files.
---
 .github/workflows/ci.yml              |   19 +
 Cargo.toml                            |    1 +
 assets/depgraph.svg                   |  366 ++--
 crates/java_string/Cargo.toml         |   16 +
 crates/java_string/README.md          |   17 +
 crates/java_string/src/cesu8.rs       |  279 +++
 crates/java_string/src/char.rs        | 1012 +++++++++++
 crates/java_string/src/error.rs       |  126 ++
 crates/java_string/src/iter.rs        |  977 +++++++++++
 crates/java_string/src/lib.rs         |   27 +
 crates/java_string/src/owned.rs       | 1401 ++++++++++++++++
 crates/java_string/src/pattern.rs     |  402 +++++
 crates/java_string/src/serde.rs       |  263 +++
 crates/java_string/src/slice.rs       | 2239 +++++++++++++++++++++++++
 crates/java_string/src/validations.rs |  369 ++++
 typos.toml                            |    2 +-
 16 files changed, 7335 insertions(+), 181 deletions(-)
 create mode 100644 crates/java_string/Cargo.toml
 create mode 100644 crates/java_string/README.md
 create mode 100644 crates/java_string/src/cesu8.rs
 create mode 100644 crates/java_string/src/char.rs
 create mode 100644 crates/java_string/src/error.rs
 create mode 100644 crates/java_string/src/iter.rs
 create mode 100644 crates/java_string/src/lib.rs
 create mode 100644 crates/java_string/src/owned.rs
 create mode 100644 crates/java_string/src/pattern.rs
 create mode 100644 crates/java_string/src/serde.rs
 create mode 100644 crates/java_string/src/slice.rs
 create mode 100644 crates/java_string/src/validations.rs
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 78e12f7f5..ea46962fd 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -117,6 +117,25 @@ jobs:
       - name: Run valence_nbt tests without preserve_order feature
         run: cargo test -p valence_nbt --all-targets
 
+  valence-miri:
+    name: Miri Tests
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Actions Repository
+        uses: actions/checkout@v3
+
+      - name: Setup Rust toolchain and cache
+        uses: actions-rust-lang/setup-rust-toolchain@v1.5.0
+        with:
+          toolchain: "nightly"
+          components: "miri"
+
+      - name: Run tests
+        run: cargo miri test --workspace --all-features --doc
+
+      - name: Run doctests
+        run: cargo miri test --workspace --all-features --doc
+
   extractor-build:
     name: Build Extractor
     runs-on: ubuntu-latest
diff --git a/Cargo.toml b/Cargo.toml
index cccade60f..f221a2dd4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -135,6 +135,7 @@ hmac = "0.12.1"
 image = "0.24.6"
 indexmap = "2.0.0"
 itertools = "0.11.0"
+java_string = { path = "crates/java_string", version = "0.1.0" }
 lru = "0.11.0"
 noise = "0.8.2"
 num = "0.4.0"
diff --git a/assets/depgraph.svg b/assets/depgraph.svg
index b9523f69e..8bb16f718 100644
--- a/assets/depgraph.svg
+++ b/assets/depgraph.svg
@@ -12,368 +12,374 @@
 <!-- 0 -->
 <g id="node1" class="node">
 <title>0</title>
-<polygon fill="none" stroke="black" points="173,-468 0,-468 0,-432 173,-432 173,-468"/>
-<text text-anchor="middle" x="86.5" y="-446.3" font-family="Times,serif" font-size="14.00">valence_advancement</text>
+<polygon fill="none" stroke="black" points="573,-612 478,-612 478,-576 573,-576 573,-612"/>
+<text text-anchor="middle" x="525.5" y="-590.3" font-family="Times,serif" font-size="14.00">java_string</text>
 </g>
 <!-- 1 -->
 <g id="node2" class="node">
 <title>1</title>
+<polygon fill="none" stroke="black" points="173,-468 0,-468 0,-432 173,-432 173,-468"/>
+<text text-anchor="middle" x="86.5" y="-446.3" font-family="Times,serif" font-size="14.00">valence_advancement</text>
+</g>
+<!-- 2 -->
+<g id="node3" class="node">
+<title>2</title>
 <polygon fill="none" stroke="black" points="781,-396 658,-396 658,-360 781,-360 781,-396"/>
 <text text-anchor="middle" x="719.5" y="-374.3" font-family="Times,serif" font-size="14.00">valence_server</text>
 </g>
-<!-- 0&#45;&gt;1 -->
+<!-- 1&#45;&gt;2 -->
 <g id="edge1" class="edge">
-<title>0&#45;&gt;1</title>
+<title>1&#45;&gt;2</title>
 <path fill="none" stroke="black" d="M173.06,-433.36C176.24,-432.88 179.4,-432.43 182.5,-432 348.43,-409.15 544.53,-392.37 647.71,-384.32"/>
 <polygon fill="black" stroke="black" points="648.01,-387.81 657.71,-383.55 647.47,-380.83 648.01,-387.81"/>
 </g>
-<!-- 2 -->
-<g id="node3" class="node">
-<title>2</title>
+<!-- 3 -->
+<g id="node4" class="node">
+<title>3</title>
 <polygon fill="none" stroke="black" points="707,-324 588,-324 588,-288 707,-288 707,-324"/>
 <text text-anchor="middle" x="647.5" y="-302.3" font-family="Times,serif" font-size="14.00">valence_entity</text>
 </g>
-<!-- 1&#45;&gt;2 -->
+<!-- 2&#45;&gt;3 -->
 <g id="edge2" class="edge">
-<title>1&#45;&gt;2</title>
+<title>2&#45;&gt;3</title>
 <path fill="none" stroke="black" d="M701.7,-359.7C692.9,-351.14 682.12,-340.66 672.5,-331.3"/>
 <polygon fill="black" stroke="black" points="674.7,-328.57 665.09,-324.1 669.82,-333.58 674.7,-328.57"/>
 </g>
-<!-- 11 -->
-<g id="node4" class="node">
-<title>11</title>
+<!-- 12 -->
+<g id="node5" class="node">
+<title>12</title>
 <polygon fill="none" stroke="black" points="859.5,-324 725.5,-324 725.5,-288 859.5,-288 859.5,-324"/>
 <text text-anchor="middle" x="792.5" y="-302.3" font-family="Times,serif" font-size="14.00">valence_registry</text>
 </g>
-<!-- 1&#45;&gt;11 -->
+<!-- 2&#45;&gt;12 -->
 <g id="edge3" class="edge">
-<title>1&#45;&gt;11</title>
+<title>2&#45;&gt;12</title>
 <path fill="none" stroke="black" d="M737.54,-359.7C746.56,-351.05 757.62,-340.45 767.44,-331.03"/>
 <polygon fill="black" stroke="black" points="769.87,-333.55 774.66,-324.1 765.02,-328.5 769.87,-333.55"/>
 </g>
-<!-- 10 -->
-<g id="node5" class="node">
-<title>10</title>
-<polygon fill="none" stroke="black" points="815.5,-252 625.5,-252 625.5,-216 815.5,-216 815.5,-252"/>
-<text text-anchor="middle" x="720.5" y="-230.3" font-family="Times,serif" font-size="14.00">valence_server_common</text>
+<!-- 11 -->
+<g id="node6" class="node">
+<title>11</title>
+<polygon fill="none" stroke="black" points="814.5,-252 624.5,-252 624.5,-216 814.5,-216 814.5,-252"/>
+<text text-anchor="middle" x="719.5" y="-230.3" font-family="Times,serif" font-size="14.00">valence_server_common</text>
 </g>
-<!-- 2&#45;&gt;10 -->
+<!-- 3&#45;&gt;11 -->
 <g id="edge4" class="edge">
-<title>2&#45;&gt;10</title>
-<path fill="none" stroke="black" d="M665.54,-287.7C674.56,-279.05 685.62,-268.45 695.44,-259.03"/>
-<polygon fill="black" stroke="black" points="697.87,-261.55 702.66,-252.1 693.02,-256.5 697.87,-261.55"/>
+<title>3&#45;&gt;11</title>
+<path fill="none" stroke="black" d="M665.3,-287.7C674.1,-279.14 684.88,-268.66 694.5,-259.3"/>
+<polygon fill="black" stroke="black" points="697.18,-261.58 701.91,-252.1 692.3,-256.57 697.18,-261.58"/>
 </g>
-<!-- 11&#45;&gt;10 -->
+<!-- 12&#45;&gt;11 -->
 <g id="edge12" class="edge">
-<title>11&#45;&gt;10</title>
-<path fill="none" stroke="black" d="M774.7,-287.7C765.9,-279.14 755.12,-268.66 745.5,-259.3"/>
-<polygon fill="black" stroke="black" points="747.7,-256.57 738.09,-252.1 742.82,-261.58 747.7,-256.57"/>
+<title>12&#45;&gt;11</title>
+<path fill="none" stroke="black" d="M774.46,-287.7C765.44,-279.05 754.38,-268.45 744.56,-259.03"/>
+<polygon fill="black" stroke="black" points="746.98,-256.5 737.34,-252.1 742.13,-261.55 746.98,-256.5"/>
 </g>
-<!-- 6 -->
-<g id="node9" class="node">
-<title>6</title>
-<polygon fill="none" stroke="black" points="637.5,-180 501.5,-180 501.5,-144 637.5,-144 637.5,-180"/>
-<text text-anchor="middle" x="569.5" y="-158.3" font-family="Times,serif" font-size="14.00">valence_protocol</text>
+<!-- 7 -->
+<g id="node10" class="node">
+<title>7</title>
+<polygon fill="none" stroke="black" points="636.5,-180 500.5,-180 500.5,-144 636.5,-144 636.5,-180"/>
+<text text-anchor="middle" x="568.5" y="-158.3" font-family="Times,serif" font-size="14.00">valence_protocol</text>
 </g>
-<!-- 10&#45;&gt;6 -->
+<!-- 11&#45;&gt;7 -->
 <g id="edge11" class="edge">
-<title>10&#45;&gt;6</title>
-<path fill="none" stroke="black" d="M683.56,-215.88C663.01,-206.35 637.25,-194.41 615.41,-184.28"/>
-<polygon fill="black" stroke="black" points="616.86,-181.1 606.32,-180.07 613.92,-187.45 616.86,-181.1"/>
-</g>
-<!-- 3 -->
-<g id="node6" class="node">
-<title>3</title>
-<polygon fill="none" stroke="black" points="497.5,-36 383.5,-36 383.5,0 497.5,0 497.5,-36"/>
-<text text-anchor="middle" x="440.5" y="-14.3" font-family="Times,serif" font-size="14.00">valence_math</text>
+<title>11&#45;&gt;7</title>
+<path fill="none" stroke="black" d="M682.56,-215.88C662.01,-206.35 636.25,-194.41 614.41,-184.28"/>
+<polygon fill="black" stroke="black" points="615.86,-181.1 605.32,-180.07 612.92,-187.45 615.86,-181.1"/>
 </g>
 <!-- 4 -->
 <g id="node7" class="node">
 <title>4</title>
-<polygon fill="none" stroke="black" points="749,-36 648,-36 648,0 749,0 749,-36"/>
-<text text-anchor="middle" x="698.5" y="-14.3" font-family="Times,serif" font-size="14.00">valence_nbt</text>
+<polygon fill="none" stroke="black" points="496.5,-36 382.5,-36 382.5,0 496.5,0 496.5,-36"/>
+<text text-anchor="middle" x="439.5" y="-14.3" font-family="Times,serif" font-size="14.00">valence_math</text>
 </g>
 <!-- 5 -->
 <g id="node8" class="node">
 <title>5</title>
-<polygon fill="none" stroke="black" points="629.5,-36 515.5,-36 515.5,0 629.5,0 629.5,-36"/>
-<text text-anchor="middle" x="572.5" y="-14.3" font-family="Times,serif" font-size="14.00">valence_ident</text>
+<polygon fill="none" stroke="black" points="748,-36 647,-36 647,0 748,0 748,-36"/>
+<text text-anchor="middle" x="697.5" y="-14.3" font-family="Times,serif" font-size="14.00">valence_nbt</text>
 </g>
-<!-- 7 -->
-<g id="node10" class="node">
-<title>7</title>
-<polygon fill="none" stroke="black" points="571.5,-108 421.5,-108 421.5,-72 571.5,-72 571.5,-108"/>
-<text text-anchor="middle" x="496.5" y="-86.3" font-family="Times,serif" font-size="14.00">valence_generated</text>
+<!-- 6 -->
+<g id="node9" class="node">
+<title>6</title>
+<polygon fill="none" stroke="black" points="628.5,-36 514.5,-36 514.5,0 628.5,0 628.5,-36"/>
+<text text-anchor="middle" x="571.5" y="-14.3" font-family="Times,serif" font-size="14.00">valence_ident</text>
 </g>
-<!-- 6&#45;&gt;7 -->
+<!-- 8 -->
+<g id="node11" class="node">
+<title>8</title>
+<polygon fill="none" stroke="black" points="570.5,-108 420.5,-108 420.5,-72 570.5,-72 570.5,-108"/>
+<text text-anchor="middle" x="495.5" y="-86.3" font-family="Times,serif" font-size="14.00">valence_generated</text>
+</g>
+<!-- 7&#45;&gt;8 -->
 <g id="edge5" class="edge">
-<title>6&#45;&gt;7</title>
-<path fill="none" stroke="black" d="M551.46,-143.7C542.44,-135.05 531.38,-124.45 521.56,-115.03"/>
-<polygon fill="black" stroke="black" points="523.98,-112.5 514.34,-108.1 519.13,-117.55 523.98,-112.5"/>
+<title>7&#45;&gt;8</title>
+<path fill="none" stroke="black" d="M550.46,-143.7C541.44,-135.05 530.38,-124.45 520.56,-115.03"/>
+<polygon fill="black" stroke="black" points="522.98,-112.5 513.34,-108.1 518.13,-117.55 522.98,-112.5"/>
 </g>
-<!-- 9 -->
-<g id="node11" class="node">
-<title>9</title>
-<polygon fill="none" stroke="black" points="695.5,-108 589.5,-108 589.5,-72 695.5,-72 695.5,-108"/>
-<text text-anchor="middle" x="642.5" y="-86.3" font-family="Times,serif" font-size="14.00">valence_text</text>
+<!-- 10 -->
+<g id="node12" class="node">
+<title>10</title>
+<polygon fill="none" stroke="black" points="694.5,-108 588.5,-108 588.5,-72 694.5,-72 694.5,-108"/>
+<text text-anchor="middle" x="641.5" y="-86.3" font-family="Times,serif" font-size="14.00">valence_text</text>
 </g>
-<!-- 6&#45;&gt;9 -->
+<!-- 7&#45;&gt;10 -->
 <g id="edge6" class="edge">
-<title>6&#45;&gt;9</title>
-<path fill="none" stroke="black" d="M587.54,-143.7C596.56,-135.05 607.62,-124.45 617.44,-115.03"/>
-<polygon fill="black" stroke="black" points="619.87,-117.55 624.66,-108.1 615.02,-112.5 619.87,-117.55"/>
+<title>7&#45;&gt;10</title>
+<path fill="none" stroke="black" d="M586.54,-143.7C595.56,-135.05 606.62,-124.45 616.44,-115.03"/>
+<polygon fill="black" stroke="black" points="618.87,-117.55 623.66,-108.1 614.02,-112.5 618.87,-117.55"/>
 </g>
-<!-- 7&#45;&gt;3 -->
+<!-- 8&#45;&gt;4 -->
 <g id="edge7" class="edge">
-<title>7&#45;&gt;3</title>
-<path fill="none" stroke="black" d="M482.66,-71.7C476.01,-63.39 467.92,-53.28 460.61,-44.14"/>
-<polygon fill="black" stroke="black" points="463.16,-41.73 454.18,-36.1 457.7,-46.1 463.16,-41.73"/>
+<title>8&#45;&gt;4</title>
+<path fill="none" stroke="black" d="M481.66,-71.7C475.01,-63.39 466.92,-53.28 459.61,-44.14"/>
+<polygon fill="black" stroke="black" points="462.16,-41.73 453.18,-36.1 456.7,-46.1 462.16,-41.73"/>
 </g>
-<!-- 7&#45;&gt;5 -->
+<!-- 8&#45;&gt;6 -->
 <g id="edge8" class="edge">
-<title>7&#45;&gt;5</title>
-<path fill="none" stroke="black" d="M515.29,-71.7C524.67,-63.05 536.18,-52.45 546.41,-43.03"/>
-<polygon fill="black" stroke="black" points="548.95,-45.45 553.93,-36.1 544.2,-40.3 548.95,-45.45"/>
+<title>8&#45;&gt;6</title>
+<path fill="none" stroke="black" d="M514.29,-71.7C523.67,-63.05 535.18,-52.45 545.41,-43.03"/>
+<polygon fill="black" stroke="black" points="547.95,-45.45 552.93,-36.1 543.2,-40.3 547.95,-45.45"/>
 </g>
-<!-- 9&#45;&gt;4 -->
+<!-- 10&#45;&gt;5 -->
 <g id="edge9" class="edge">
-<title>9&#45;&gt;4</title>
-<path fill="none" stroke="black" d="M656.34,-71.7C662.99,-63.39 671.08,-53.28 678.39,-44.14"/>
-<polygon fill="black" stroke="black" points="681.3,-46.1 684.82,-36.1 675.84,-41.73 681.3,-46.1"/>
+<title>10&#45;&gt;5</title>
+<path fill="none" stroke="black" d="M655.34,-71.7C661.99,-63.39 670.08,-53.28 677.39,-44.14"/>
+<polygon fill="black" stroke="black" points="680.3,-46.1 683.82,-36.1 674.84,-41.73 680.3,-46.1"/>
 </g>
-<!-- 9&#45;&gt;5 -->
+<!-- 10&#45;&gt;6 -->
 <g id="edge10" class="edge">
-<title>9&#45;&gt;5</title>
-<path fill="none" stroke="black" d="M625.2,-71.7C616.64,-63.14 606.16,-52.66 596.8,-43.3"/>
-<polygon fill="black" stroke="black" points="599.15,-40.7 589.6,-36.1 594.2,-45.65 599.15,-40.7"/>
+<title>10&#45;&gt;6</title>
+<path fill="none" stroke="black" d="M624.2,-71.7C615.64,-63.14 605.16,-52.66 595.8,-43.3"/>
+<polygon fill="black" stroke="black" points="598.15,-40.7 588.6,-36.1 593.2,-45.65 598.15,-40.7"/>
 </g>
-<!-- 8 -->
-<g id="node12" class="node">
-<title>8</title>
+<!-- 9 -->
+<g id="node13" class="node">
+<title>9</title>
 <polygon fill="none" stroke="black" points="1004.5,-612 852.5,-612 852.5,-576 1004.5,-576 1004.5,-612"/>
 <text text-anchor="middle" x="928.5" y="-590.3" font-family="Times,serif" font-size="14.00">valence_build_utils</text>
 </g>
-<!-- 12 -->
-<g id="node13" class="node">
-<title>12</title>
+<!-- 13 -->
+<g id="node14" class="node">
+<title>13</title>
 <polygon fill="none" stroke="black" points="303.5,-468 191.5,-468 191.5,-432 303.5,-432 303.5,-468"/>
 <text text-anchor="middle" x="247.5" y="-446.3" font-family="Times,serif" font-size="14.00">valence_anvil</text>
 </g>
-<!-- 12&#45;&gt;1 -->
+<!-- 13&#45;&gt;2 -->
 <g id="edge13" class="edge">
-<title>12&#45;&gt;1</title>
+<title>13&#45;&gt;2</title>
 <path fill="none" stroke="black" stroke-dasharray="1,5" d="M303.77,-433.82C306.71,-433.17 309.64,-432.56 312.5,-432 428.87,-409.3 565.89,-393.72 647.57,-385.59"/>
 <polygon fill="black" stroke="black" points="648.13,-389.05 657.74,-384.59 647.45,-382.09 648.13,-389.05"/>
 </g>
-<!-- 13 -->
-<g id="node14" class="node">
-<title>13</title>
+<!-- 14 -->
+<g id="node15" class="node">
+<title>14</title>
 <polygon fill="none" stroke="black" points="461.5,-468 321.5,-468 321.5,-432 461.5,-432 461.5,-468"/>
 <text text-anchor="middle" x="391.5" y="-446.3" font-family="Times,serif" font-size="14.00">valence_boss_bar</text>
 </g>
-<!-- 13&#45;&gt;1 -->
+<!-- 14&#45;&gt;2 -->
 <g id="edge14" class="edge">
-<title>13&#45;&gt;1</title>
+<title>14&#45;&gt;2</title>
 <path fill="none" stroke="black" d="M461.71,-433.88C464.68,-433.25 467.62,-432.62 470.5,-432 530.24,-419.21 598.06,-404.78 647.82,-394.21"/>
 <polygon fill="black" stroke="black" points="648.78,-397.59 657.84,-392.08 647.33,-390.74 648.78,-397.59"/>
 </g>
-<!-- 14 -->
-<g id="node15" class="node">
-<title>14</title>
+<!-- 15 -->
+<g id="node16" class="node">
+<title>15</title>
 <polygon fill="none" stroke="black" points="625.5,-468 479.5,-468 479.5,-432 625.5,-432 625.5,-468"/>
 <text text-anchor="middle" x="552.5" y="-446.3" font-family="Times,serif" font-size="14.00">valence_inventory</text>
 </g>
-<!-- 14&#45;&gt;1 -->
+<!-- 15&#45;&gt;2 -->
 <g id="edge15" class="edge">
-<title>14&#45;&gt;1</title>
+<title>15&#45;&gt;2</title>
 <path fill="none" stroke="black" d="M593.35,-431.88C616.28,-422.26 645.08,-410.19 669.37,-400.01"/>
 <polygon fill="black" stroke="black" points="670.91,-403.16 678.78,-396.07 668.2,-396.71 670.91,-403.16"/>
 </g>
-<!-- 15 -->
-<g id="node16" class="node">
-<title>15</title>
-<polygon fill="none" stroke="black" points="1457.5,-396 1349.5,-396 1349.5,-360 1457.5,-360 1457.5,-396"/>
-<text text-anchor="middle" x="1403.5" y="-374.3" font-family="Times,serif" font-size="14.00">valence_lang</text>
-</g>
 <!-- 16 -->
 <g id="node17" class="node">
 <title>16</title>
+<polygon fill="none" stroke="black" points="1457.5,-396 1349.5,-396 1349.5,-360 1457.5,-360 1457.5,-396"/>
+<text text-anchor="middle" x="1403.5" y="-374.3" font-family="Times,serif" font-size="14.00">valence_lang</text>
+</g>
+<!-- 17 -->
+<g id="node18" class="node">
+<title>17</title>
 <polygon fill="none" stroke="black" points="1471.5,-468 1335.5,-468 1335.5,-432 1471.5,-432 1471.5,-468"/>
 <text text-anchor="middle" x="1403.5" y="-446.3" font-family="Times,serif" font-size="14.00">valence_network</text>
 </g>
-<!-- 16&#45;&gt;1 -->
+<!-- 17&#45;&gt;2 -->
 <g id="edge16" class="edge">
-<title>16&#45;&gt;1</title>
+<title>17&#45;&gt;2</title>
 <path fill="none" stroke="black" d="M1335.15,-433.43C1332.24,-432.91 1329.34,-432.44 1326.5,-432 1134.42,-402.54 905.44,-388.07 791.43,-382.24"/>
 <polygon fill="black" stroke="black" points="791.59,-378.75 781.42,-381.74 791.23,-385.74 791.59,-378.75"/>
 </g>
-<!-- 16&#45;&gt;15 -->
+<!-- 17&#45;&gt;16 -->
 <g id="edge17" class="edge">
-<title>16&#45;&gt;15</title>
+<title>17&#45;&gt;16</title>
 <path fill="none" stroke="black" d="M1403.5,-431.7C1403.5,-423.98 1403.5,-414.71 1403.5,-406.11"/>
 <polygon fill="black" stroke="black" points="1407,-406.1 1403.5,-396.1 1400,-406.1 1407,-406.1"/>
 </g>
-<!-- 17 -->
-<g id="node18" class="node">
-<title>17</title>
+<!-- 18 -->
+<g id="node19" class="node">
+<title>18</title>
 <polygon fill="none" stroke="black" points="795.5,-468 643.5,-468 643.5,-432 795.5,-432 795.5,-468"/>
 <text text-anchor="middle" x="719.5" y="-446.3" font-family="Times,serif" font-size="14.00">valence_player_list</text>
 </g>
-<!-- 17&#45;&gt;1 -->
+<!-- 18&#45;&gt;2 -->
 <g id="edge18" class="edge">
-<title>17&#45;&gt;1</title>
+<title>18&#45;&gt;2</title>
 <path fill="none" stroke="black" d="M719.5,-431.7C719.5,-423.98 719.5,-414.71 719.5,-406.11"/>
 <polygon fill="black" stroke="black" points="723,-406.1 719.5,-396.1 716,-406.1 723,-406.1"/>
 </g>
-<!-- 18 -->
-<g id="node19" class="node">
-<title>18</title>
+<!-- 19 -->
+<g id="node20" class="node">
+<title>19</title>
 <polygon fill="none" stroke="black" points="971,-468 814,-468 814,-432 971,-432 971,-468"/>
 <text text-anchor="middle" x="892.5" y="-446.3" font-family="Times,serif" font-size="14.00">valence_scoreboard</text>
 </g>
-<!-- 18&#45;&gt;1 -->
+<!-- 19&#45;&gt;2 -->
 <g id="edge19" class="edge">
-<title>18&#45;&gt;1</title>
+<title>19&#45;&gt;2</title>
 <path fill="none" stroke="black" d="M850.18,-431.88C826.32,-422.22 796.33,-410.09 771.1,-399.88"/>
 <polygon fill="black" stroke="black" points="772.27,-396.57 761.68,-396.07 769.64,-403.06 772.27,-396.57"/>
 </g>
-<!-- 19 -->
-<g id="node20" class="node">
-<title>19</title>
-<polygon fill="none" stroke="black" points="1148,-612 1023,-612 1023,-576 1148,-576 1148,-612"/>
-<text text-anchor="middle" x="1085.5" y="-590.3" font-family="Times,serif" font-size="14.00">valence_spatial</text>
-</g>
 <!-- 20 -->
 <g id="node21" class="node">
 <title>20</title>
+<polygon fill="none" stroke="black" points="1148,-612 1023,-612 1023,-576 1148,-576 1148,-612"/>
+<text text-anchor="middle" x="1085.5" y="-590.3" font-family="Times,serif" font-size="14.00">valence_spatial</text>
+</g>
+<!-- 21 -->
+<g id="node22" class="node">
+<title>21</title>
 <polygon fill="none" stroke="black" points="1125.5,-468 989.5,-468 989.5,-432 1125.5,-432 1125.5,-468"/>
 <text text-anchor="middle" x="1057.5" y="-446.3" font-family="Times,serif" font-size="14.00">valence_weather</text>
 </g>
-<!-- 20&#45;&gt;1 -->
+<!-- 21&#45;&gt;2 -->
 <g id="edge20" class="edge">
-<title>20&#45;&gt;1</title>
+<title>21&#45;&gt;2</title>
 <path fill="none" stroke="black" d="M989.49,-433.92C986.45,-433.27 983.45,-432.62 980.5,-432 916.55,-418.45 843.62,-403.7 791.14,-393.21"/>
 <polygon fill="black" stroke="black" points="791.75,-389.76 781.26,-391.23 790.38,-396.63 791.75,-389.76"/>
 </g>
-<!-- 21 -->
-<g id="node22" class="node">
-<title>21</title>
+<!-- 22 -->
+<g id="node23" class="node">
+<title>22</title>
 <polygon fill="none" stroke="black" points="1317,-468 1144,-468 1144,-432 1317,-432 1317,-468"/>
 <text text-anchor="middle" x="1230.5" y="-446.3" font-family="Times,serif" font-size="14.00">valence_world_border</text>
 </g>
-<!-- 21&#45;&gt;1 -->
+<!-- 22&#45;&gt;2 -->
 <g id="edge21" class="edge">
-<title>21&#45;&gt;1</title>
+<title>22&#45;&gt;2</title>
 <path fill="none" stroke="black" d="M1143.92,-433.47C1140.75,-432.96 1137.6,-432.47 1134.5,-432 1014.39,-413.71 874.06,-396.69 791.21,-387.09"/>
 <polygon fill="black" stroke="black" points="791.49,-383.6 781.15,-385.93 790.69,-390.56 791.49,-383.6"/>
 </g>
-<!-- 22 -->
-<g id="node23" class="node">
-<title>22</title>
-<polygon fill="none" stroke="black" points="718,-612 591,-612 591,-576 718,-576 718,-612"/>
-<text text-anchor="middle" x="654.5" y="-590.3" font-family="Times,serif" font-size="14.00">dump_schedule</text>
-</g>
 <!-- 23 -->
 <g id="node24" class="node">
 <title>23</title>
+<polygon fill="none" stroke="black" points="718,-612 591,-612 591,-576 718,-576 718,-612"/>
+<text text-anchor="middle" x="654.5" y="-590.3" font-family="Times,serif" font-size="14.00">dump_schedule</text>
+</g>
+<!-- 24 -->
+<g id="node25" class="node">
+<title>24</title>
 <polygon fill="none" stroke="black" points="755,-540 684,-540 684,-504 755,-504 755,-540"/>
 <text text-anchor="middle" x="719.5" y="-518.3" font-family="Times,serif" font-size="14.00">valence</text>
 </g>
-<!-- 22&#45;&gt;23 -->
+<!-- 23&#45;&gt;24 -->
 <g id="edge22" class="edge">
-<title>22&#45;&gt;23</title>
+<title>23&#45;&gt;24</title>
 <path fill="none" stroke="black" d="M670.57,-575.7C678.44,-567.22 688.06,-556.86 696.67,-547.58"/>
 <polygon fill="black" stroke="black" points="699.38,-549.81 703.62,-540.1 694.25,-545.05 699.38,-549.81"/>
 </g>
-<!-- 23&#45;&gt;0 -->
+<!-- 24&#45;&gt;1 -->
 <g id="edge23" class="edge">
-<title>23&#45;&gt;0</title>
+<title>24&#45;&gt;1</title>
 <path fill="none" stroke="black" stroke-dasharray="1,5" d="M683.89,-518.44C599.09,-512.13 378.38,-494.52 183.21,-468.03"/>
 <polygon fill="black" stroke="black" points="183.44,-464.53 173.06,-466.64 182.49,-471.47 183.44,-464.53"/>
 </g>
-<!-- 23&#45;&gt;12 -->
+<!-- 24&#45;&gt;13 -->
 <g id="edge24" class="edge">
-<title>23&#45;&gt;12</title>
+<title>24&#45;&gt;13</title>
 <path fill="none" stroke="black" stroke-dasharray="1,5" d="M683.87,-517.89C614.46,-511.5 456.18,-495.32 313.98,-468.16"/>
 <polygon fill="black" stroke="black" points="314.25,-464.64 303.77,-466.18 312.92,-471.52 314.25,-464.64"/>
 </g>
-<!-- 23&#45;&gt;13 -->
+<!-- 24&#45;&gt;14 -->
 <g id="edge25" class="edge">
-<title>23&#45;&gt;13</title>
+<title>24&#45;&gt;14</title>
 <path fill="none" stroke="black" stroke-dasharray="1,5" d="M683.62,-513.39C637.12,-503.52 553.23,-485.7 471.64,-468.24"/>
 <polygon fill="black" stroke="black" points="472.22,-464.79 461.71,-466.12 470.76,-471.63 472.22,-464.79"/>
 </g>
-<!-- 23&#45;&gt;14 -->
+<!-- 24&#45;&gt;15 -->
 <g id="edge26" class="edge">
-<title>23&#45;&gt;14</title>
+<title>24&#45;&gt;15</title>
 <path fill="none" stroke="black" stroke-dasharray="1,5" d="M683.67,-505.98C660.03,-496.07 628.72,-482.95 602.61,-472.01"/>
 <polygon fill="black" stroke="black" points="603.83,-468.72 593.26,-468.08 601.13,-475.18 603.83,-468.72"/>
 </g>
-<!-- 23&#45;&gt;16 -->
+<!-- 24&#45;&gt;17 -->
 <g id="edge27" class="edge">
-<title>23&#45;&gt;16</title>
+<title>24&#45;&gt;17</title>
 <path fill="none" stroke="black" stroke-dasharray="1,5" d="M755.27,-519.51C847.89,-515.29 1103.22,-501.46 1324.95,-468.12"/>
 <polygon fill="black" stroke="black" points="1325.79,-471.54 1335.15,-466.57 1324.74,-464.62 1325.79,-471.54"/>
 </g>
-<!-- 23&#45;&gt;17 -->
+<!-- 24&#45;&gt;18 -->
 <g id="edge28" class="edge">
-<title>23&#45;&gt;17</title>
+<title>24&#45;&gt;18</title>
 <path fill="none" stroke="black" stroke-dasharray="1,5" d="M719.5,-503.7C719.5,-495.98 719.5,-486.71 719.5,-478.11"/>
 <polygon fill="black" stroke="black" points="723,-478.1 719.5,-468.1 716,-478.1 723,-478.1"/>
 </g>
-<!-- 23&#45;&gt;18 -->
+<!-- 24&#45;&gt;19 -->
 <g id="edge29" class="edge">
-<title>23&#45;&gt;18</title>
+<title>24&#45;&gt;19</title>
 <path fill="none" stroke="black" stroke-dasharray="1,5" d="M755.35,-506.5C780.05,-496.5 813.25,-483.06 840.81,-471.92"/>
 <polygon fill="black" stroke="black" points="842.34,-475.07 850.3,-468.08 839.71,-468.58 842.34,-475.07"/>
 </g>
-<!-- 23&#45;&gt;20 -->
+<!-- 24&#45;&gt;21 -->
 <g id="edge30" class="edge">
-<title>23&#45;&gt;20</title>
+<title>24&#45;&gt;21</title>
 <path fill="none" stroke="black" stroke-dasharray="1,5" d="M755,-513.99C803.39,-504.39 893.02,-486.46 979.51,-468.19"/>
 <polygon fill="black" stroke="black" points="980.43,-471.57 989.49,-466.08 978.99,-464.73 980.43,-471.57"/>
 </g>
-<!-- 23&#45;&gt;21 -->
+<!-- 24&#45;&gt;22 -->
 <g id="edge31" class="edge">
-<title>23&#45;&gt;21</title>
+<title>24&#45;&gt;22</title>
 <path fill="none" stroke="black" stroke-dasharray="1,5" d="M755.11,-517.04C825.37,-509.1 987,-490.2 1133.83,-468.06"/>
 <polygon fill="black" stroke="black" points="1134.56,-471.49 1143.92,-466.53 1133.51,-464.57 1134.56,-471.49"/>
 </g>
-<!-- 24 -->
-<g id="node25" class="node">
-<title>24</title>
-<polygon fill="none" stroke="black" points="514,-252 377,-252 377,-216 514,-216 514,-252"/>
-<text text-anchor="middle" x="445.5" y="-230.3" font-family="Times,serif" font-size="14.00">packet_inspector</text>
-</g>
-<!-- 24&#45;&gt;6 -->
-<g id="edge32" class="edge">
-<title>24&#45;&gt;6</title>
-<path fill="none" stroke="black" d="M475.83,-215.88C492.2,-206.64 512.58,-195.13 530.15,-185.21"/>
-<polygon fill="black" stroke="black" points="532.06,-188.15 539.05,-180.19 528.62,-182.06 532.06,-188.15"/>
-</g>
 <!-- 25 -->
 <g id="node26" class="node">
 <title>25</title>
+<polygon fill="none" stroke="black" points="513,-252 376,-252 376,-216 513,-216 513,-252"/>
+<text text-anchor="middle" x="444.5" y="-230.3" font-family="Times,serif" font-size="14.00">packet_inspector</text>
+</g>
+<!-- 25&#45;&gt;7 -->
+<g id="edge32" class="edge">
+<title>25&#45;&gt;7</title>
+<path fill="none" stroke="black" d="M474.83,-215.88C491.2,-206.64 511.58,-195.13 529.15,-185.21"/>
+<polygon fill="black" stroke="black" points="531.06,-188.15 538.05,-180.19 527.62,-182.06 531.06,-188.15"/>
+</g>
+<!-- 26 -->
+<g id="node27" class="node">
+<title>26</title>
 <polygon fill="none" stroke="black" points="834.5,-612 736.5,-612 736.5,-576 834.5,-576 834.5,-612"/>
 <text text-anchor="middle" x="785.5" y="-590.3" font-family="Times,serif" font-size="14.00">playground</text>
 </g>
-<!-- 25&#45;&gt;23 -->
+<!-- 26&#45;&gt;24 -->
 <g id="edge33" class="edge">
-<title>25&#45;&gt;23</title>
+<title>26&#45;&gt;24</title>
 <path fill="none" stroke="black" d="M769.19,-575.7C761.19,-567.22 751.43,-556.86 742.68,-547.58"/>
 <polygon fill="black" stroke="black" points="745.03,-544.98 735.63,-540.1 739.94,-549.78 745.03,-544.98"/>
 </g>
-<!-- 26 -->
-<g id="node27" class="node">
-<title>26</title>
-<polygon fill="none" stroke="black" points="607,-252 532,-252 532,-216 607,-216 607,-252"/>
-<text text-anchor="middle" x="569.5" y="-230.3" font-family="Times,serif" font-size="14.00">stresser</text>
+<!-- 27 -->
+<g id="node28" class="node">
+<title>27</title>
+<polygon fill="none" stroke="black" points="606,-252 531,-252 531,-216 606,-216 606,-252"/>
+<text text-anchor="middle" x="568.5" y="-230.3" font-family="Times,serif" font-size="14.00">stresser</text>
 </g>
-<!-- 26&#45;&gt;6 -->
+<!-- 27&#45;&gt;7 -->
 <g id="edge34" class="edge">
-<title>26&#45;&gt;6</title>
-<path fill="none" stroke="black" d="M569.5,-215.7C569.5,-207.98 569.5,-198.71 569.5,-190.11"/>
-<polygon fill="black" stroke="black" points="573,-190.1 569.5,-180.1 566,-190.1 573,-190.1"/>
+<title>27&#45;&gt;7</title>
+<path fill="none" stroke="black" d="M568.5,-215.7C568.5,-207.98 568.5,-198.71 568.5,-190.11"/>
+<polygon fill="black" stroke="black" points="572,-190.1 568.5,-180.1 565,-190.1 572,-190.1"/>
 </g>
 </g>
 </svg>
diff --git a/crates/java_string/Cargo.toml b/crates/java_string/Cargo.toml
new file mode 100644
index 000000000..414e22a7d
--- /dev/null
+++ b/crates/java_string/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "java_string"
+description = "An implementation of Java strings, tolerant of invalid UTF-16 encoding"
+readme = "README.md"
+version = "0.1.0"
+keywords = ["java", "string", "utf16"]
+edition.workspace = true
+repository.workspace = true
+documentation.workspace = true
+license.workspace = true
+
+[features]
+serde = ["dep:serde"]
+
+[dependencies]
+serde = { workspace = true, optional = true }
diff --git a/crates/java_string/README.md b/crates/java_string/README.md
new file mode 100644
index 000000000..7135d6454
--- /dev/null
+++ b/crates/java_string/README.md
@@ -0,0 +1,17 @@
+# java_string
+
+An implementation of Java strings, tolerant of invalid UTF-16 encoding.
+This allows for round-trip serialization of all Java strings, including those which contain invalid UTF-16, while still
+being able to perform useful operations on those strings. 
+
+These Java strings use the UTF-8 encoding, with the modification that surrogate code points (code points between U+D800 
+and U+DFFF inclusive) are allowed. This allows for zero-cost conversion from Rust strings to Java strings. This modified
+encoding is known as "semi-UTF-8" throughout the codebase. Similarly, this crate introduces a `JavaCodePoint` type which
+is analogous to `char`, except that surrogate code points are allowed.
+
+This crate is mostly undocumented, because most methods are entirely analogous to those of the same name in Rust's
+strings. Please refer to the `std` documentation.
+
+# Features
+
+- `serde` Adds support for [`serde`](https://docs.rs/serde/latest/serde/)
\ No newline at end of file
diff --git a/crates/java_string/src/cesu8.rs b/crates/java_string/src/cesu8.rs
new file mode 100644
index 000000000..eb94ee6c1
--- /dev/null
+++ b/crates/java_string/src/cesu8.rs
@@ -0,0 +1,279 @@
+use std::borrow::Cow;
+
+use crate::validations::{utf8_char_width, CONT_MASK, TAG_CONT};
+use crate::{JavaStr, JavaString, Utf8Error};
+
+impl JavaStr {
+    /// Converts from Java's [modified UTF-8](https://docs.oracle.com/javase/8/docs/api/java/io/DataInput.html#modified-utf-8) format to a `Cow<JavaStr>`.
+    ///
+    /// ```
+    /// # use std::borrow::Cow;
+    /// # use java_string::{JavaCodePoint, JavaStr, JavaString};
+    ///
+    /// let result = JavaStr::from_modified_utf8("Hello World!".as_bytes()).unwrap();
+    /// assert!(matches!(result, Cow::Borrowed(_)));
+    /// assert_eq!(JavaStr::from_str("Hello World!"), result);
+    ///
+    /// let result = JavaStr::from_modified_utf8(&[
+    ///     0x61, 0x62, 0x63, 0xc0, 0x80, 0xe2, 0x84, 0x9d, 0xed, 0xa0, 0xbd, 0xed, 0xb2, 0xa3, 0xed,
+    ///     0xa0, 0x80,
+    /// ])
+    /// .unwrap();
+    /// assert!(matches!(result, Cow::Owned(_)));
+    /// let mut expected = JavaString::from("abc\0ℝ💣");
+    /// expected.push_java(JavaCodePoint::from_u32(0xd800).unwrap());
+    /// assert_eq!(expected, result);
+    ///
+    /// let result = JavaStr::from_modified_utf8(&[0xed]);
+    /// assert!(result.is_err());
+    /// ```
+    #[inline]
+    pub fn from_modified_utf8(bytes: &[u8]) -> Result<Cow<JavaStr>, Utf8Error> {
+        match JavaStr::from_full_utf8(bytes) {
+            Ok(str) => Ok(Cow::Borrowed(str)),
+            Err(_) => JavaString::from_modified_utf8_internal(bytes).map(Cow::Owned),
+        }
+    }
+
+    /// Converts to Java's [modified UTF-8](https://docs.oracle.com/javase/8/docs/api/java/io/DataInput.html#modified-utf-8) format.
+    ///
+    /// ```
+    /// # use std::borrow::Cow;
+    /// # use java_string::{JavaCodePoint, JavaStr, JavaString};
+    ///
+    /// let result = JavaStr::from_str("Hello World!").to_modified_utf8();
+    /// assert!(matches!(result, Cow::Borrowed(_)));
+    /// assert_eq!(result, &b"Hello World!"[..]);
+    ///
+    /// let mut str = JavaString::from("abc\0ℝ💣");
+    /// str.push_java(JavaCodePoint::from_u32(0xd800).unwrap());
+    /// let result = str.to_modified_utf8();
+    /// let expected = [
+    ///     0x61, 0x62, 0x63, 0xc0, 0x80, 0xe2, 0x84, 0x9d, 0xed, 0xa0, 0xbd, 0xed, 0xb2, 0xa3, 0xed,
+    ///     0xa0, 0x80,
+    /// ];
+    /// assert!(matches!(result, Cow::Owned(_)));
+    /// assert_eq!(result, &expected[..]);
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn to_modified_utf8(&self) -> Cow<[u8]> {
+        if is_valid_cesu8(self) {
+            Cow::Borrowed(self.as_bytes())
+        } else {
+            Cow::Owned(self.to_modified_utf8_internal())
+        }
+    }
+
+    #[inline]
+    fn to_modified_utf8_internal(&self) -> Vec<u8> {
+        let bytes = self.as_bytes();
+        let mut encoded = Vec::with_capacity((bytes.len() + bytes.len()) >> 2);
+        let mut i = 0;
+        while i < bytes.len() {
+            let b = bytes[i];
+            if b == 0 {
+                encoded.extend([0xc0, 0x80]);
+                i += 1;
+            } else if b < 128 {
+                // Pass ASCII through quickly.
+                encoded.push(b);
+                i += 1;
+            } else {
+                // Figure out how many bytes we need for this character.
+                let w = utf8_char_width(b);
+                let char_bytes = unsafe {
+                    // SAFETY: input must be valid semi UTF-8, so there must be at least w more
+                    // bytes from i
+                    bytes.get_unchecked(i..i + w)
+                };
+                if w != 4 {
+                    // Pass through short UTF-8 sequences unmodified.
+                    encoded.extend(char_bytes.iter().copied())
+                } else {
+                    // Encode 4-byte sequences as 6 bytes
+                    let s = unsafe {
+                        // SAFETY: input is valid semi UTF-8
+                        JavaStr::from_semi_utf8_unchecked(char_bytes)
+                    };
+                    let c = unsafe {
+                        // SAFETY: s contains a single char of width 4
+                        s.chars().next().unwrap_unchecked().as_u32() - 0x10000
+                    };
+                    let s = [((c >> 10) as u16) | 0xd800, ((c & 0x3ff) as u16) | 0xdc00];
+                    encoded.extend(enc_surrogate(s[0]));
+                    encoded.extend(enc_surrogate(s[1]));
+                }
+                i += w;
+            }
+        }
+        encoded
+    }
+}
+
+impl JavaString {
+    /// Converts from Java's [modified UTF-8](https://docs.oracle.com/javase/8/docs/api/java/io/DataInput.html#modified-utf-8) format to a `JavaString`.
+    ///
+    /// See [JavaStr::from_modified_utf8].
+    #[inline]
+    pub fn from_modified_utf8(bytes: Vec<u8>) -> Result<JavaString, Utf8Error> {
+        match JavaString::from_full_utf8(bytes) {
+            Ok(str) => Ok(str),
+            Err(err) => JavaString::from_modified_utf8_internal(&err.bytes),
+        }
+    }
+
+    fn from_modified_utf8_internal(slice: &[u8]) -> Result<JavaString, Utf8Error> {
+        let mut offset = 0;
+        let mut decoded = Vec::with_capacity(slice.len() + 1);
+
+        while let Some(&first) = slice.get(offset) {
+            let old_offset = offset;
+            offset += 1;
+
+            macro_rules! err {
+                ($error_len:expr) => {
+                    return Err(Utf8Error {
+                        valid_up_to: old_offset,
+                        error_len: $error_len,
+                    })
+                };
+            }
+
+            macro_rules! next {
+                () => {{
+                    if let Some(&b) = slice.get(offset) {
+                        offset += 1;
+                        b
+                    } else {
+                        err!(None)
+                    }
+                }};
+            }
+
+            macro_rules! next_cont {
+                ($error_len:expr) => {{
+                    let byte = next!();
+                    if (byte) & !CONT_MASK == TAG_CONT {
+                        byte
+                    } else {
+                        err!($error_len)
+                    }
+                }};
+            }
+
+            if first == 0 {
+                // modified UTF-8 should never contain \0 directly.
+                err!(Some(1));
+            } else if first < 128 {
+                // Pass ASCII through directly.
+                decoded.push(first);
+            } else if first == 0xc0 {
+                // modified UTF-8 encoding of null character
+                match next!() {
+                    0x80 => decoded.push(0),
+                    _ => err!(Some(1)),
+                }
+            } else {
+                let w = utf8_char_width(first);
+                let second = next_cont!(Some(1));
+                match w {
+                    // Two-byte sequences can be used directly.
+                    2 => {
+                        decoded.extend([first, second]);
+                    }
+                    3 => {
+                        let third = next_cont!(Some(2));
+                        match (first, second) {
+                            // These are valid UTF-8, so pass them through.
+                            (0xe0, 0xa0..=0xbf)
+                            | (0xe1..=0xec, 0x80..=0xbf)
+                            | (0xed, 0x80..=0x9f)
+                            | (0xee..=0xef, 0x80..=0xbf)
+                            // Second half of a surrogate pair without a preceding first half, also pass this through.
+                            | (0xed, 0xb0..=0xbf)
+                            => decoded.extend([first, second, third]),
+                            // First half of a surrogate pair
+                            (0xed, 0xa0..=0xaf) => {
+                                // Peek ahead and try to pair the first half of surrogate pair with
+                                // second.
+                                match &slice[offset..] {
+                                    [0xed, fifth @ 0xb0..=0xbf, sixth, ..]
+                                    if *sixth & !CONT_MASK == TAG_CONT =>
+                                        {
+                                            let s = dec_surrogates(second, third, *fifth, *sixth);
+                                            decoded.extend(s);
+                                            offset += 3;
+                                        }
+                                    _ => {
+                                        // No second half, append the first half directly.
+                                        decoded.extend([first, second, third]);
+                                    }
+                                }
+                            }
+                            _ => err!(Some(1)),
+                        }
+                    }
+                    _ => err!(Some(1)), // modified UTF-8 doesn't allow width 4
+                }
+            }
+        }
+
+        unsafe {
+            // SAFETY: we built a semi UTF-8 encoded string
+            Ok(JavaString::from_semi_utf8_unchecked(decoded))
+        }
+    }
+
+    /// Converts to Java's [modified UTF-8](https://docs.oracle.com/javase/8/docs/api/java/io/DataInput.html#modified-utf-8) format.
+    ///
+    /// See [JavaStr::to_modified_utf8].
+    #[inline]
+    #[must_use]
+    pub fn into_modified_utf8(self) -> Vec<u8> {
+        if is_valid_cesu8(&self) {
+            self.into_bytes()
+        } else {
+            self.to_modified_utf8_internal()
+        }
+    }
+}
+
+#[inline]
+fn dec_surrogate(second: u8, third: u8) -> u32 {
+    0xd000 | ((second & CONT_MASK) as u32) << 6 | (third & CONT_MASK) as u32
+}
+
+#[inline]
+fn dec_surrogates(second: u8, third: u8, fifth: u8, sixth: u8) -> [u8; 4] {
+    // Convert to a 32-bit code point.
+    let s1 = dec_surrogate(second, third);
+    let s2 = dec_surrogate(fifth, sixth);
+    let c = 0x10000 + (((s1 - 0xd800) << 10) | (s2 - 0xdc00));
+    assert!((0x010000..=0x10ffff).contains(&c));
+
+    // Convert to UTF-8.
+    // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+    [
+        0b1111_0000u8 | ((c & 0b1_1100_0000_0000_0000_0000) >> 18) as u8,
+        TAG_CONT | ((c & 0b0_0011_1111_0000_0000_0000) >> 12) as u8,
+        TAG_CONT | ((c & 0b0_0000_0000_1111_1100_0000) >> 6) as u8,
+        TAG_CONT | (c & 0b0_0000_0000_0000_0011_1111) as u8,
+    ]
+}
+
+#[inline]
+fn is_valid_cesu8(text: &JavaStr) -> bool {
+    text.bytes()
+        .all(|b| b != 0 && ((b & !CONT_MASK) == TAG_CONT || utf8_char_width(b) <= 3))
+}
+
+#[inline]
+fn enc_surrogate(surrogate: u16) -> [u8; 3] {
+    // 1110xxxx 10xxxxxx 10xxxxxx
+    [
+        0b11100000 | ((surrogate & 0b11110000_00000000) >> 12) as u8,
+        TAG_CONT | ((surrogate & 0b00001111_11000000) >> 6) as u8,
+        TAG_CONT | (surrogate & 0b00000000_00111111) as u8,
+    ]
+}
diff --git a/crates/java_string/src/char.rs b/crates/java_string/src/char.rs
new file mode 100644
index 000000000..5bc26a4f7
--- /dev/null
+++ b/crates/java_string/src/char.rs
@@ -0,0 +1,1012 @@
+use std::char::ParseCharError;
+use std::cmp::Ordering;
+use std::fmt;
+use std::fmt::{Debug, Display, Formatter, Write};
+use std::hash::{Hash, Hasher};
+use std::iter::{once, FusedIterator, Once};
+use std::ops::Range;
+use std::str::FromStr;
+
+use crate::validations::{TAG_CONT, TAG_FOUR_B, TAG_THREE_B, TAG_TWO_B};
+
+// JavaCodePoint is guaranteed to have the same repr as a u32, with valid values
+// of between 0 and 0x10FFFF, the same as a unicode code point. Surrogate code
+// points are valid values of this type.
+#[derive(Copy, Clone, PartialEq, Eq)]
+#[repr(C)]
+pub struct JavaCodePoint {
+    #[cfg(target_endian = "little")]
+    lower: u16,
+    upper: SeventeenValues,
+    #[cfg(target_endian = "big")]
+    lower: u16,
+}
+
+#[repr(u16)]
+#[derive(Copy, Clone, PartialEq, Eq)]
+#[allow(unused)]
+enum SeventeenValues {
+    V0,
+    V1,
+    V2,
+    V3,
+    V4,
+    V5,
+    V6,
+    V7,
+    V8,
+    V9,
+    V10,
+    V11,
+    V12,
+    V13,
+    V14,
+    V15,
+    V16,
+}
+
+impl JavaCodePoint {
+    pub const MAX: JavaCodePoint = JavaCodePoint::from_char(char::MAX);
+    pub const REPLACEMENT_CHARACTER: JavaCodePoint =
+        JavaCodePoint::from_char(char::REPLACEMENT_CHARACTER);
+
+    /// See [char::from_u32]
+    ///
+    /// ```
+    /// # use java_string::JavaCodePoint;
+    /// let c = JavaCodePoint::from_u32(0x2764);
+    /// assert_eq!(Some(JavaCodePoint::from_char('❤')), c);
+    ///
+    /// assert_eq!(None, JavaCodePoint::from_u32(0x110000));
+    /// ```
+    #[inline]
+    #[must_use]
+    pub const fn from_u32(i: u32) -> Option<JavaCodePoint> {
+        if i <= 0x10ffff {
+            unsafe { Some(Self::from_u32_unchecked(i)) }
+        } else {
+            None
+        }
+    }
+
+    /// # Safety
+    /// The argument must be within the valid Unicode code point range of 0 to
+    /// 0x10FFFF inclusive. Surrogate code points are allowed.
+    #[inline]
+    #[must_use]
+    pub const unsafe fn from_u32_unchecked(i: u32) -> JavaCodePoint {
+        // SAFETY: the caller checks that the argument can be represented by this type
+        std::mem::transmute(i)
+    }
+
+    /// Converts a `char` to a code point.
+    #[inline]
+    #[must_use]
+    pub const fn from_char(char: char) -> JavaCodePoint {
+        unsafe {
+            // SAFETY: all chars are valid code points
+            JavaCodePoint::from_u32_unchecked(char as u32)
+        }
+    }
+
+    /// Converts this code point to a `u32`.
+    ///
+    /// ```
+    /// # use java_string::JavaCodePoint;
+    /// assert_eq!(65, JavaCodePoint::from_char('A').as_u32());
+    /// assert_eq!(0xd800, JavaCodePoint::from_u32(0xd800).unwrap().as_u32());
+    /// ```
+    #[inline]
+    #[must_use]
+    pub const fn as_u32(self) -> u32 {
+        unsafe {
+            // SAFETY: JavaCodePoint has the same repr as a u32
+            let result = std::mem::transmute(self);
+
+            if result > 0x10ffff {
+                // SAFETY: JavaCodePoint can never have a value > 0x10FFFF.
+                // This statement may allow the optimizer to remove branches in the calling code
+                // associated with out of bounds chars.
+                std::hint::unreachable_unchecked();
+            }
+
+            result
+        }
+    }
+
+    /// Converts this code point to a `char`.
+    ///
+    /// ```
+    /// # use java_string::JavaCodePoint;
+    /// assert_eq!(Some('a'), JavaCodePoint::from_char('a').as_char());
+    /// assert_eq!(None, JavaCodePoint::from_u32(0xd800).unwrap().as_char());
+    /// ```
+    #[inline]
+    #[must_use]
+    pub const fn as_char(self) -> Option<char> {
+        char::from_u32(self.as_u32())
+    }
+
+    /// # Safety
+    /// The caller must ensure that this code point is not a surrogate code
+    /// point.
+    #[inline]
+    #[must_use]
+    pub unsafe fn as_char_unchecked(self) -> char {
+        char::from_u32_unchecked(self.as_u32())
+    }
+
+    /// See [char::encode_utf16]
+    ///
+    /// ```
+    /// # use java_string::JavaCodePoint;
+    /// assert_eq!(
+    ///     2,
+    ///     JavaCodePoint::from_char('𝕊')
+    ///         .encode_utf16(&mut [0; 2])
+    ///         .len()
+    /// );
+    /// assert_eq!(
+    ///     1,
+    ///     JavaCodePoint::from_u32(0xd800)
+    ///         .unwrap()
+    ///         .encode_utf16(&mut [0; 2])
+    ///         .len()
+    /// );
+    /// ```
+    /// ```should_panic
+    /// # use java_string::JavaCodePoint;
+    /// // Should panic
+    /// JavaCodePoint::from_char('𝕊').encode_utf16(&mut [0; 1]);
+    /// ```
+    #[inline]
+    pub fn encode_utf16(self, dst: &mut [u16]) -> &mut [u16] {
+        if let Some(char) = self.as_char() {
+            char.encode_utf16(dst)
+        } else {
+            dst[0] = self.as_u32() as u16;
+            &mut dst[..1]
+        }
+    }
+
+    /// Encodes this `JavaCodePoint` into semi UTF-8, that is, UTF-8 with
+    /// surrogate code points. See also [char::encode_utf8].
+    ///
+    /// ```
+    /// # use java_string::JavaCodePoint;
+    /// assert_eq!(
+    ///     2,
+    ///     JavaCodePoint::from_char('ß')
+    ///         .encode_semi_utf8(&mut [0; 4])
+    ///         .len()
+    /// );
+    /// assert_eq!(
+    ///     3,
+    ///     JavaCodePoint::from_u32(0xd800)
+    ///         .unwrap()
+    ///         .encode_semi_utf8(&mut [0; 4])
+    ///         .len()
+    /// );
+    /// ```
+    /// ```should_panic
+    /// # use java_string::JavaCodePoint;
+    /// // Should panic
+    /// JavaCodePoint::from_char('ß').encode_semi_utf8(&mut [0; 1]);
+    /// ```
+    #[inline]
+    pub fn encode_semi_utf8(self, dst: &mut [u8]) -> &mut [u8] {
+        let len = self.len_utf8();
+        let code = self.as_u32();
+        match (len, &mut dst[..]) {
+            (1, [a, ..]) => {
+                *a = code as u8;
+            }
+            (2, [a, b, ..]) => {
+                *a = (code >> 6 & 0x1f) as u8 | TAG_TWO_B;
+                *b = (code & 0x3f) as u8 | TAG_CONT;
+            }
+            (3, [a, b, c, ..]) => {
+                *a = (code >> 12 & 0x0f) as u8 | TAG_THREE_B;
+                *b = (code >> 6 & 0x3f) as u8 | TAG_CONT;
+                *c = (code & 0x3f) as u8 | TAG_CONT;
+            }
+            (4, [a, b, c, d, ..]) => {
+                *a = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
+                *b = (code >> 12 & 0x3f) as u8 | TAG_CONT;
+                *c = (code >> 6 & 0x3f) as u8 | TAG_CONT;
+                *d = (code & 0x3f) as u8 | TAG_CONT;
+            }
+            _ => panic!(
+                "encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
+                len,
+                code,
+                dst.len()
+            ),
+        }
+        &mut dst[..len]
+    }
+
+    /// See [char::eq_ignore_ascii_case].
+    #[inline]
+    pub fn eq_ignore_ascii_case(&self, other: &JavaCodePoint) -> bool {
+        match (self.as_char(), other.as_char()) {
+            (Some(char1), Some(char2)) => char1.eq_ignore_ascii_case(&char2),
+            (None, None) => self == other,
+            _ => false,
+        }
+    }
+
+    /// See [char::escape_debug].
+    ///
+    /// ```
+    /// # use java_string::JavaCodePoint;
+    /// assert_eq!(
+    ///     "a",
+    ///     JavaCodePoint::from_char('a').escape_debug().to_string()
+    /// );
+    /// assert_eq!(
+    ///     "\\n",
+    ///     JavaCodePoint::from_char('\n').escape_debug().to_string()
+    /// );
+    /// assert_eq!(
+    ///     "\\u{d800}",
+    ///     JavaCodePoint::from_u32(0xd800)
+    ///         .unwrap()
+    ///         .escape_debug()
+    ///         .to_string()
+    /// );
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn escape_debug(self) -> CharEscapeIter {
+        self.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL)
+    }
+
+    #[inline]
+    #[must_use]
+    pub(crate) fn escape_debug_ext(self, args: EscapeDebugExtArgs) -> CharEscapeIter {
+        const NULL: u32 = '\0' as u32;
+        const TAB: u32 = '\t' as u32;
+        const CARRIAGE_RETURN: u32 = '\r' as u32;
+        const LINE_FEED: u32 = '\n' as u32;
+        const SINGLE_QUOTE: u32 = '\'' as u32;
+        const DOUBLE_QUOTE: u32 = '"' as u32;
+        const BACKSLASH: u32 = '\\' as u32;
+
+        unsafe {
+            // SAFETY: all characters specified are in ascii range
+            match self.as_u32() {
+                NULL => CharEscapeIter::new([b'\\', b'0']),
+                TAB => CharEscapeIter::new([b'\\', b't']),
+                CARRIAGE_RETURN => CharEscapeIter::new([b'\\', b'r']),
+                LINE_FEED => CharEscapeIter::new([b'\\', b'n']),
+                SINGLE_QUOTE if args.escape_single_quote => CharEscapeIter::new([b'\\', b'\'']),
+                DOUBLE_QUOTE if args.escape_double_quote => CharEscapeIter::new([b'\\', b'"']),
+                BACKSLASH => CharEscapeIter::new([b'\\', b'\\']),
+                _ if self.is_printable() => {
+                    // SAFETY: surrogate code points are not printable
+                    CharEscapeIter::printable(self.as_char_unchecked())
+                }
+                _ => self.escape_unicode(),
+            }
+        }
+    }
+
+    #[inline]
+    fn is_printable(self) -> bool {
+        let Some(char) = self.as_char() else {
+            return false;
+        };
+        if matches!(char, '\\' | '\'' | '"') {
+            return true;
+        }
+        char.escape_debug().next() != Some('\\')
+    }
+
+    /// See [char::escape_default].
+    ///
+    /// ```
+    /// # use java_string::JavaCodePoint;
+    /// assert_eq!(
+    ///     "a",
+    ///     JavaCodePoint::from_char('a').escape_default().to_string()
+    /// );
+    /// assert_eq!(
+    ///     "\\n",
+    ///     JavaCodePoint::from_char('\n').escape_default().to_string()
+    /// );
+    /// assert_eq!(
+    ///     "\\u{d800}",
+    ///     JavaCodePoint::from_u32(0xd800)
+    ///         .unwrap()
+    ///         .escape_default()
+    ///         .to_string()
+    /// );
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn escape_default(self) -> CharEscapeIter {
+        const TAB: u32 = '\t' as u32;
+        const CARRIAGE_RETURN: u32 = '\r' as u32;
+        const LINE_FEED: u32 = '\n' as u32;
+        const SINGLE_QUOTE: u32 = '\'' as u32;
+        const DOUBLE_QUOTE: u32 = '"' as u32;
+        const BACKSLASH: u32 = '\\' as u32;
+
+        unsafe {
+            // SAFETY: all characters specified are in ascii range
+            match self.as_u32() {
+                TAB => CharEscapeIter::new([b'\\', b't']),
+                CARRIAGE_RETURN => CharEscapeIter::new([b'\\', b'r']),
+                LINE_FEED => CharEscapeIter::new([b'\\', b'n']),
+                SINGLE_QUOTE => CharEscapeIter::new([b'\\', b'\'']),
+                DOUBLE_QUOTE => CharEscapeIter::new([b'\\', b'"']),
+                BACKSLASH => CharEscapeIter::new([b'\\', b'\\']),
+                0x20..=0x7e => CharEscapeIter::new([self.as_u32() as u8]),
+                _ => self.escape_unicode(),
+            }
+        }
+    }
+
+    /// See [char::escape_unicode].
+    ///
+    /// ```
+    /// # use java_string::JavaCodePoint;
+    /// assert_eq!(
+    ///     "\\u{2764}",
+    ///     JavaCodePoint::from_char('❤').escape_unicode().to_string()
+    /// );
+    /// assert_eq!(
+    ///     "\\u{d800}",
+    ///     JavaCodePoint::from_u32(0xd800)
+    ///         .unwrap()
+    ///         .escape_unicode()
+    ///         .to_string()
+    /// );
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn escape_unicode(self) -> CharEscapeIter {
+        let x = self.as_u32();
+
+        let mut arr = [0; 10];
+        arr[0] = b'\\';
+        arr[1] = b'u';
+        arr[2] = b'{';
+
+        let number_len = if x == 0 {
+            1
+        } else {
+            ((x.ilog2() >> 2) + 1) as usize
+        };
+        arr[3 + number_len] = b'}';
+        for hexit in 0..number_len {
+            arr[2 + number_len - hexit] = b"0123456789abcdef"[((x >> (hexit << 2)) & 15) as usize];
+        }
+
+        CharEscapeIter {
+            inner: EscapeIterInner::Escaped(EscapeIterEscaped {
+                bytes: arr,
+                range: 0..number_len + 4,
+            }),
+        }
+    }
+
+    /// See [char::is_alphabetic].
+    #[inline]
+    #[must_use]
+    pub fn is_alphabetic(self) -> bool {
+        self.as_char().is_some_and(|char| char.is_alphabetic())
+    }
+
+    /// See [char::is_alphanumeric].
+    #[inline]
+    #[must_use]
+    pub fn is_alphanumeric(self) -> bool {
+        self.as_char().is_some_and(|char| char.is_alphanumeric())
+    }
+
+    /// See [char::is_ascii].
+    #[inline]
+    #[must_use]
+    pub fn is_ascii(self) -> bool {
+        self.as_u32() <= 0x7f
+    }
+
+    /// See [char::is_ascii_alphabetic].
+    #[inline]
+    #[must_use]
+    pub const fn is_ascii_alphabetic(self) -> bool {
+        self.is_ascii_lowercase() || self.is_ascii_uppercase()
+    }
+
+    /// See [char::is_ascii_alphanumeric].
+    #[inline]
+    #[must_use]
+    pub const fn is_ascii_alphanumeric(self) -> bool {
+        self.is_ascii_alphabetic() || self.is_ascii_digit()
+    }
+
+    /// See [char::is_ascii_control].
+    #[inline]
+    #[must_use]
+    pub const fn is_ascii_control(self) -> bool {
+        matches!(self.as_u32(), 0..=0x1f | 0x7f)
+    }
+
+    /// See [char::is_ascii_digit].
+    #[inline]
+    #[must_use]
+    pub const fn is_ascii_digit(self) -> bool {
+        const ZERO: u32 = '0' as u32;
+        const NINE: u32 = '9' as u32;
+        matches!(self.as_u32(), ZERO..=NINE)
+    }
+
+    /// See [char::is_ascii_graphic].
+    #[inline]
+    #[must_use]
+    pub const fn is_ascii_graphic(self) -> bool {
+        matches!(self.as_u32(), 0x21..=0x7e)
+    }
+
+    /// See [char::is_ascii_hexdigit].
+    #[inline]
+    #[must_use]
+    pub const fn is_ascii_hexdigit(self) -> bool {
+        const LOWER_A: u32 = 'a' as u32;
+        const LOWER_F: u32 = 'f' as u32;
+        const UPPER_A: u32 = 'A' as u32;
+        const UPPER_F: u32 = 'F' as u32;
+        self.is_ascii_digit() || matches!(self.as_u32(), (LOWER_A..=LOWER_F) | (UPPER_A..=UPPER_F))
+    }
+
+    /// See [char::is_ascii_lowercase].
+    #[inline]
+    #[must_use]
+    pub const fn is_ascii_lowercase(self) -> bool {
+        const A: u32 = 'a' as u32;
+        const Z: u32 = 'z' as u32;
+        matches!(self.as_u32(), A..=Z)
+    }
+
+    /// See [char::is_ascii_octdigit].
+    #[inline]
+    #[must_use]
+    pub const fn is_ascii_octdigit(self) -> bool {
+        const ZERO: u32 = '0' as u32;
+        const SEVEN: u32 = '7' as u32;
+        matches!(self.as_u32(), ZERO..=SEVEN)
+    }
+
+    /// See [char::is_ascii_punctuation].
+    #[inline]
+    #[must_use]
+    pub const fn is_ascii_punctuation(self) -> bool {
+        matches!(
+            self.as_u32(),
+            (0x21..=0x2f) | (0x3a..=0x40) | (0x5b..=0x60) | (0x7b..=0x7e)
+        )
+    }
+
+    /// See [char::is_ascii_uppercase].
+    #[inline]
+    #[must_use]
+    pub const fn is_ascii_uppercase(self) -> bool {
+        const A: u32 = 'A' as u32;
+        const Z: u32 = 'Z' as u32;
+        matches!(self.as_u32(), A..=Z)
+    }
+
+    /// See [char::is_ascii_whitespace].
+    #[inline]
+    #[must_use]
+    pub const fn is_ascii_whitespace(self) -> bool {
+        const SPACE: u32 = ' ' as u32;
+        const HORIZONTAL_TAB: u32 = '\t' as u32;
+        const LINE_FEED: u32 = '\n' as u32;
+        const FORM_FEED: u32 = 0xc;
+        const CARRIAGE_RETURN: u32 = '\r' as u32;
+        matches!(
+            self.as_u32(),
+            SPACE | HORIZONTAL_TAB | LINE_FEED | FORM_FEED | CARRIAGE_RETURN
+        )
+    }
+
+    /// See [char::is_control].
+    #[inline]
+    #[must_use]
+    pub fn is_control(self) -> bool {
+        self.as_char().is_some_and(|char| char.is_control())
+    }
+
+    /// See [char::is_digit].
+    #[inline]
+    #[must_use]
+    pub fn is_digit(self, radix: u32) -> bool {
+        self.to_digit(radix).is_some()
+    }
+
+    /// See [char::is_lowercase].
+    #[inline]
+    #[must_use]
+    pub fn is_lowercase(self) -> bool {
+        self.as_char().is_some_and(|char| char.is_lowercase())
+    }
+
+    /// See [char::is_numeric].
+    #[inline]
+    #[must_use]
+    pub fn is_numeric(self) -> bool {
+        self.as_char().is_some_and(|char| char.is_numeric())
+    }
+
+    /// See [char::is_uppercase].
+    #[inline]
+    #[must_use]
+    pub fn is_uppercase(self) -> bool {
+        self.as_char().is_some_and(|char| char.is_uppercase())
+    }
+
+    /// See [char::is_whitespace].
+    #[inline]
+    #[must_use]
+    pub fn is_whitespace(self) -> bool {
+        self.as_char().is_some_and(|char| char.is_whitespace())
+    }
+
+    /// See [char::len_utf16]. Surrogate code points return 1.
+    ///
+    /// ```
+    /// # use java_string::JavaCodePoint;
+    ///
+    /// let n = JavaCodePoint::from_char('ß').len_utf16();
+    /// assert_eq!(n, 1);
+    ///
+    /// let len = JavaCodePoint::from_char('💣').len_utf16();
+    /// assert_eq!(len, 2);
+    ///
+    /// assert_eq!(1, JavaCodePoint::from_u32(0xd800).unwrap().len_utf16());
+    /// ```
+    #[inline]
+    #[must_use]
+    pub const fn len_utf16(self) -> usize {
+        if let Some(char) = self.as_char() {
+            char.len_utf16()
+        } else {
+            1 // invalid code points are encoded as 1 utf16 code point anyway
+        }
+    }
+
+    /// See [char::len_utf8]. Surrogate code points return 3.
+    ///
+    /// ```
+    /// # use java_string::JavaCodePoint;
+    ///
+    /// let len = JavaCodePoint::from_char('A').len_utf8();
+    /// assert_eq!(len, 1);
+    ///
+    /// let len = JavaCodePoint::from_char('ß').len_utf8();
+    /// assert_eq!(len, 2);
+    ///
+    /// let len = JavaCodePoint::from_char('ℝ').len_utf8();
+    /// assert_eq!(len, 3);
+    ///
+    /// let len = JavaCodePoint::from_char('💣').len_utf8();
+    /// assert_eq!(len, 4);
+    ///
+    /// let len = JavaCodePoint::from_u32(0xd800).unwrap().len_utf8();
+    /// assert_eq!(len, 3);
+    /// ```
+    #[inline]
+    #[must_use]
+    pub const fn len_utf8(self) -> usize {
+        if let Some(char) = self.as_char() {
+            char.len_utf8()
+        } else {
+            3 // invalid code points are all length 3 in semi-valid utf8
+        }
+    }
+
+    /// See [char::make_ascii_lowercase].
+    #[inline]
+    pub fn make_ascii_lowercase(&mut self) {
+        *self = self.to_ascii_lowercase();
+    }
+
+    /// See [char::make_ascii_uppercase].
+    #[inline]
+    pub fn make_ascii_uppercase(&mut self) {
+        *self = self.to_ascii_uppercase();
+    }
+
+    /// See [char::to_ascii_lowercase].
+    ///
+    /// ```
+    /// # use java_string::JavaCodePoint;
+    ///
+    /// let ascii = JavaCodePoint::from_char('A');
+    /// let non_ascii = JavaCodePoint::from_char('❤');
+    ///
+    /// assert_eq!('a', ascii.to_ascii_lowercase());
+    /// assert_eq!('❤', non_ascii.to_ascii_lowercase());
+    /// ```
+    #[inline]
+    #[must_use]
+    pub const fn to_ascii_lowercase(self) -> JavaCodePoint {
+        if self.is_ascii_uppercase() {
+            unsafe {
+                // SAFETY: all lowercase chars are valid chars
+                Self::from_u32_unchecked(self.as_u32() + 32)
+            }
+        } else {
+            self
+        }
+    }
+
+    /// See [char::to_ascii_uppercase].
+    ///
+    /// ```
+    /// # use java_string::JavaCodePoint;
+    ///
+    /// let ascii = JavaCodePoint::from_char('a');
+    /// let non_ascii = JavaCodePoint::from_char('❤');
+    ///
+    /// assert_eq!('A', ascii.to_ascii_uppercase());
+    /// assert_eq!('❤', non_ascii.to_ascii_uppercase());
+    /// ```
+    #[inline]
+    #[must_use]
+    pub const fn to_ascii_uppercase(self) -> JavaCodePoint {
+        if self.is_ascii_lowercase() {
+            unsafe {
+                // SAFETY: all uppercase chars are valid chars
+                Self::from_u32_unchecked(self.as_u32() - 32)
+            }
+        } else {
+            self
+        }
+    }
+
+    /// See [char::to_digit].
+    #[inline]
+    #[must_use]
+    pub const fn to_digit(self, radix: u32) -> Option<u32> {
+        if let Some(char) = self.as_char() {
+            char.to_digit(radix)
+        } else {
+            None
+        }
+    }
+
+    /// See [char::to_lowercase].
+    #[inline]
+    #[must_use]
+    pub fn to_lowercase(self) -> ToLowercase {
+        match self.as_char() {
+            Some(char) => ToLowercase::char(char.to_lowercase()),
+            None => ToLowercase::invalid(self),
+        }
+    }
+
+    /// See [char::to_uppercase].
+    #[inline]
+    #[must_use]
+    pub fn to_uppercase(self) -> ToUppercase {
+        match self.as_char() {
+            Some(char) => ToUppercase::char(char.to_uppercase()),
+            None => ToUppercase::invalid(self),
+        }
+    }
+}
+
+impl Debug for JavaCodePoint {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        f.write_char('\'')?;
+        for c in self.escape_debug_ext(EscapeDebugExtArgs {
+            escape_single_quote: true,
+            escape_double_quote: false,
+        }) {
+            f.write_char(c)?;
+        }
+        f.write_char('\'')
+    }
+}
+
+impl Default for JavaCodePoint {
+    #[inline]
+    fn default() -> Self {
+        JavaCodePoint::from_char('\0')
+    }
+}
+
+impl Display for JavaCodePoint {
+    #[inline]
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        Display::fmt(&self.as_char().unwrap_or(char::REPLACEMENT_CHARACTER), f)
+    }
+}
+
+impl From<JavaCodePoint> for u32 {
+    #[inline]
+    fn from(value: JavaCodePoint) -> Self {
+        value.as_u32()
+    }
+}
+
+impl From<u8> for JavaCodePoint {
+    #[inline]
+    fn from(value: u8) -> Self {
+        JavaCodePoint::from_char(char::from(value))
+    }
+}
+
+impl FromStr for JavaCodePoint {
+    type Err = ParseCharError;
+
+    #[inline]
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        char::from_str(s).map(JavaCodePoint::from_char)
+    }
+}
+
+impl Hash for JavaCodePoint {
+    #[inline]
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        self.as_u32().hash(state)
+    }
+}
+
+impl Ord for JavaCodePoint {
+    #[inline]
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.as_u32().cmp(&other.as_u32())
+    }
+}
+
+impl PartialOrd for JavaCodePoint {
+    #[inline]
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        self.as_u32().partial_cmp(&other.as_u32())
+    }
+}
+
+impl PartialOrd<char> for JavaCodePoint {
+    #[inline]
+    fn partial_cmp(&self, other: &char) -> Option<Ordering> {
+        self.partial_cmp(&JavaCodePoint::from_char(*other))
+    }
+}
+
+impl PartialOrd<JavaCodePoint> for char {
+    #[inline]
+    fn partial_cmp(&self, other: &JavaCodePoint) -> Option<Ordering> {
+        JavaCodePoint::from_char(*self).partial_cmp(other)
+    }
+}
+
+impl PartialEq<char> for JavaCodePoint {
+    #[inline]
+    fn eq(&self, other: &char) -> bool {
+        self == &JavaCodePoint::from_char(*other)
+    }
+}
+
+impl PartialEq<JavaCodePoint> for char {
+    #[inline]
+    fn eq(&self, other: &JavaCodePoint) -> bool {
+        &JavaCodePoint::from_char(*self) == other
+    }
+}
+
+pub(crate) struct EscapeDebugExtArgs {
+    pub(crate) escape_single_quote: bool,
+    pub(crate) escape_double_quote: bool,
+}
+
+impl EscapeDebugExtArgs {
+    pub(crate) const ESCAPE_ALL: Self = Self {
+        escape_single_quote: true,
+        escape_double_quote: true,
+    };
+}
+
+#[derive(Clone, Debug)]
+pub struct CharEscapeIter {
+    inner: EscapeIterInner,
+}
+
+#[derive(Clone, Debug)]
+enum EscapeIterInner {
+    Printable(Once<char>),
+    Escaped(EscapeIterEscaped),
+}
+
+impl Display for EscapeIterInner {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        match self {
+            EscapeIterInner::Printable(char) => char.clone().try_for_each(|ch| f.write_char(ch)),
+            EscapeIterInner::Escaped(escaped) => Display::fmt(escaped, f),
+        }
+    }
+}
+
+impl CharEscapeIter {
+    #[inline]
+    fn printable(char: char) -> Self {
+        CharEscapeIter {
+            inner: EscapeIterInner::Printable(once(char)),
+        }
+    }
+
+    /// # Safety
+    /// Assumes that the input byte array is ASCII
+    #[inline]
+    unsafe fn new<const N: usize>(bytes: [u8; N]) -> Self {
+        assert!(N <= 10, "Too many bytes in escape iter");
+        let mut ten_bytes = [0; 10];
+        ten_bytes[..N].copy_from_slice(&bytes);
+        CharEscapeIter {
+            inner: EscapeIterInner::Escaped(EscapeIterEscaped {
+                bytes: ten_bytes,
+                range: 0..N,
+            }),
+        }
+    }
+}
+
+impl Iterator for CharEscapeIter {
+    type Item = char;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        match &mut self.inner {
+            EscapeIterInner::Printable(printable) => printable.next(),
+            EscapeIterInner::Escaped(escaped) => escaped.next(),
+        }
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        match &self.inner {
+            EscapeIterInner::Printable(printable) => printable.size_hint(),
+            EscapeIterInner::Escaped(escaped) => escaped.size_hint(),
+        }
+    }
+}
+
+impl ExactSizeIterator for CharEscapeIter {
+    #[inline]
+    fn len(&self) -> usize {
+        match &self.inner {
+            EscapeIterInner::Printable(printable) => printable.len(),
+            EscapeIterInner::Escaped(escaped) => escaped.len(),
+        }
+    }
+}
+
+impl FusedIterator for CharEscapeIter {}
+
+impl Display for CharEscapeIter {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        Display::fmt(&self.inner, f)
+    }
+}
+
+#[derive(Clone, Debug)]
+struct EscapeIterEscaped {
+    // SAFETY: all values must be in the ASCII range
+    bytes: [u8; 10],
+    // SAFETY: range must not be out of bounds for length 10
+    range: Range<usize>,
+}
+
+impl Iterator for EscapeIterEscaped {
+    type Item = char;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        self.range.next().map(|index| unsafe {
+            // SAFETY: the range is never out of bounds for length 10
+            char::from(*self.bytes.get_unchecked(index))
+        })
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.range.size_hint()
+    }
+
+    #[inline]
+    fn count(self) -> usize {
+        self.range.len()
+    }
+}
+
+impl ExactSizeIterator for EscapeIterEscaped {
+    #[inline]
+    fn len(&self) -> usize {
+        self.range.len()
+    }
+}
+
+impl FusedIterator for EscapeIterEscaped {}
+
+impl Display for EscapeIterEscaped {
+    #[inline]
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        let str = unsafe {
+            // SAFETY: all bytes are in ASCII range, and range is in bounds for length 10
+            std::str::from_utf8_unchecked(self.bytes.get_unchecked(self.range.clone()))
+        };
+        f.write_str(str)
+    }
+}
+
+pub type ToLowercase = CharIterDelegate<std::char::ToLowercase>;
+pub type ToUppercase = CharIterDelegate<std::char::ToUppercase>;
+
+#[derive(Debug, Clone)]
+pub struct CharIterDelegate<I>(CharIterDelegateInner<I>);
+
+impl<I> CharIterDelegate<I> {
+    #[inline]
+    fn char(iter: I) -> CharIterDelegate<I> {
+        CharIterDelegate(CharIterDelegateInner::Char(iter))
+    }
+
+    #[inline]
+    fn invalid(code_point: JavaCodePoint) -> CharIterDelegate<I> {
+        CharIterDelegate(CharIterDelegateInner::Invalid(Some(code_point).into_iter()))
+    }
+}
+
+#[derive(Debug, Clone)]
+enum CharIterDelegateInner<I> {
+    Char(I),
+    Invalid(std::option::IntoIter<JavaCodePoint>),
+}
+
+impl<I> Iterator for CharIterDelegate<I>
+where
+    I: Iterator<Item = char>,
+{
+    type Item = JavaCodePoint;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        match &mut self.0 {
+            CharIterDelegateInner::Char(char_iter) => {
+                char_iter.next().map(JavaCodePoint::from_char)
+            }
+            CharIterDelegateInner::Invalid(code_point) => code_point.next(),
+        }
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        match &self.0 {
+            CharIterDelegateInner::Char(char_iter) => char_iter.size_hint(),
+            CharIterDelegateInner::Invalid(code_point) => code_point.size_hint(),
+        }
+    }
+}
+
+impl<I> DoubleEndedIterator for CharIterDelegate<I>
+where
+    I: Iterator<Item = char> + DoubleEndedIterator,
+{
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        match &mut self.0 {
+            CharIterDelegateInner::Char(char_iter) => {
+                char_iter.next_back().map(JavaCodePoint::from_char)
+            }
+            CharIterDelegateInner::Invalid(code_point) => code_point.next_back(),
+        }
+    }
+}
+
+impl<I> ExactSizeIterator for CharIterDelegate<I> where I: Iterator<Item = char> + ExactSizeIterator {}
+
+impl<I> FusedIterator for CharIterDelegate<I> where I: Iterator<Item = char> + FusedIterator {}
diff --git a/crates/java_string/src/error.rs b/crates/java_string/src/error.rs
new file mode 100644
index 000000000..09742d014
--- /dev/null
+++ b/crates/java_string/src/error.rs
@@ -0,0 +1,126 @@
+use std::error::Error;
+use std::fmt;
+use std::fmt::{Display, Formatter};
+
+#[derive(Copy, Eq, PartialEq, Clone, Debug)]
+pub struct Utf8Error {
+    pub(crate) valid_up_to: usize,
+    pub(crate) error_len: Option<u8>,
+}
+
+impl Utf8Error {
+    #[must_use]
+    #[inline]
+    pub const fn valid_up_to(&self) -> usize {
+        self.valid_up_to
+    }
+
+    #[must_use]
+    #[inline]
+    pub const fn error_len(&self) -> Option<usize> {
+        // Manual implementation of Option::map since it's not const
+        match self.error_len {
+            Some(len) => Some(len as usize),
+            None => None,
+        }
+    }
+
+    #[must_use]
+    #[inline]
+    pub(crate) const fn from_std(value: std::str::Utf8Error) -> Self {
+        Self {
+            valid_up_to: value.valid_up_to(),
+            // Manual implementation of Option::map since it's not const
+            error_len: match value.error_len() {
+                Some(error_len) => Some(error_len as u8),
+                None => None,
+            },
+        }
+    }
+}
+
+impl Display for Utf8Error {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        if let Some(error_len) = self.error_len {
+            write!(
+                f,
+                "invalid utf-8 sequence of {} bytes from index {}",
+                error_len, self.valid_up_to
+            )
+        } else {
+            write!(
+                f,
+                "incomplete utf-8 byte sequence from index {}",
+                self.valid_up_to
+            )
+        }
+    }
+}
+
+impl From<std::str::Utf8Error> for Utf8Error {
+    #[inline]
+    fn from(value: std::str::Utf8Error) -> Self {
+        Self::from_std(value)
+    }
+}
+
+impl Error for Utf8Error {}
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct FromUtf8Error {
+    pub(crate) bytes: Vec<u8>,
+    pub(crate) error: Utf8Error,
+}
+
+impl FromUtf8Error {
+    pub fn as_bytes(&self) -> &[u8] {
+        &self.bytes[..]
+    }
+
+    #[must_use]
+    pub fn into_bytes(self) -> Vec<u8> {
+        self.bytes
+    }
+
+    pub fn utf8_error(&self) -> Utf8Error {
+        self.error
+    }
+}
+
+impl Display for FromUtf8Error {
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        Display::fmt(&self.error, f)
+    }
+}
+
+impl Error for FromUtf8Error {}
+
+#[derive(Copy, Eq, PartialEq, Clone, Debug)]
+pub enum ParseError<E> {
+    InvalidUtf8(Utf8Error),
+    Err(E),
+}
+
+impl<E> Display for ParseError<E>
+where
+    E: Display,
+{
+    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+        match self {
+            ParseError::InvalidUtf8(err) => Display::fmt(err, f),
+            ParseError::Err(err) => Display::fmt(err, f),
+        }
+    }
+}
+
+impl<E> Error for ParseError<E>
+where
+    E: Error + 'static,
+{
+    fn source(&self) -> Option<&(dyn Error + 'static)> {
+        match self {
+            ParseError::InvalidUtf8(err) => Some(err),
+            ParseError::Err(err) => Some(err),
+        }
+    }
+}
diff --git a/crates/java_string/src/iter.rs b/crates/java_string/src/iter.rs
new file mode 100644
index 000000000..3762f6d72
--- /dev/null
+++ b/crates/java_string/src/iter.rs
@@ -0,0 +1,977 @@
+use std::fmt::{Debug, Display, Formatter, Write};
+use std::iter::{Chain, Copied, Filter, FlatMap, Flatten, FusedIterator, Map};
+use std::{option, slice};
+
+use crate::validations::{next_code_point, next_code_point_reverse};
+use crate::{CharEscapeIter, JavaCodePoint, JavaStr, JavaStrPattern};
+macro_rules! delegate {
+    (Iterator for $ty:ident $(<$($lt:lifetime),+>)? => $item:ty $(, DoubleEnded = $double_ended:ty)?) => {
+        impl$(<$($lt),+>)? Iterator for $ty$(<$($lt),+>)? {
+            type Item = $item;
+
+            #[inline]
+            fn next(&mut self) -> Option<Self::Item> {
+                self.inner.next()
+            }
+
+            #[inline]
+            fn size_hint(&self) -> (usize, Option<usize>) {
+                self.inner.size_hint()
+            }
+
+            #[inline]
+            fn count(self) -> usize {
+                self.inner.count()
+            }
+
+            #[inline]
+            fn last(self) -> Option<Self::Item> {
+                self.inner.last()
+            }
+
+            #[inline]
+            fn nth(&mut self, n: usize) -> Option<Self::Item> {
+                self.inner.nth(n)
+            }
+
+            #[inline]
+            fn all<F>(&mut self, f: F) -> bool
+            where
+                F: FnMut(Self::Item) -> bool,
+            {
+                self.inner.all(f)
+            }
+
+            #[inline]
+            fn any<F>(&mut self, f: F) -> bool
+            where
+                F: FnMut(Self::Item) -> bool,
+            {
+                self.inner.any(f)
+            }
+
+            #[inline]
+            fn find<P>(&mut self, predicate: P) -> Option<Self::Item>
+            where
+                P: FnMut(&Self::Item) -> bool,
+            {
+                self.inner.find(predicate)
+            }
+
+            #[inline]
+            fn position<P>(&mut self, predicate: P) -> Option<usize>
+            where
+                P: FnMut(Self::Item) -> bool,
+            {
+                self.inner.position(predicate)
+            }
+
+            $(
+            #[inline]
+            fn rposition<P>(&mut self, predicate: P) -> Option<usize>
+            where
+                P: FnMut(Self::Item) -> bool,
+            {
+                let _test: $double_ended = ();
+                self.inner.rposition(predicate)
+            }
+            )?
+        }
+    };
+
+    (DoubleEndedIterator for $ty:ident $(<$($lt:lifetime),+>)?) => {
+        impl$(<$($lt),+>)? DoubleEndedIterator for $ty$(<$($lt),+>)? {
+            #[inline]
+            fn next_back(&mut self) -> Option<Self::Item> {
+                self.inner.next_back()
+            }
+
+            #[inline]
+            fn nth_back(&mut self, n: usize) -> Option<Self::Item> {
+                self.inner.nth_back(n)
+            }
+
+            #[inline]
+            fn rfind<P>(&mut self, predicate: P) -> Option<Self::Item>
+            where
+                P: FnMut(&Self::Item) -> bool,
+            {
+                self.inner.rfind(predicate)
+            }
+        }
+    };
+
+    (ExactSizeIterator for $ty:ident $(<$($lt:lifetime),+>)?) => {
+        impl$(<$($lt),+>)? ExactSizeIterator for $ty$(<$($lt),+>)? {
+            #[inline]
+            fn len(&self) -> usize {
+                self.inner.len()
+            }
+        }
+    };
+
+    (FusedIterator for $ty:ident $(<$($lt:lifetime),+>)?) => {
+        impl$(<$($lt),+>)? FusedIterator for $ty$(<$($lt),+>)? {}
+    };
+
+    (Iterator, DoubleEndedIterator, ExactSizeIterator, FusedIterator for $ty:ident $(<$($lt:lifetime),+>)? => $item:ty) => {
+        delegate!(Iterator for $ty$(<$($lt),+>)? => $item, DoubleEnded = ());
+        delegate!(DoubleEndedIterator for $ty$(<$($lt),+>)?);
+        delegate!(ExactSizeIterator for $ty$(<$($lt),+>)?);
+        delegate!(FusedIterator for $ty$(<$($lt),+>)?);
+    };
+}
+
+#[must_use]
+#[derive(Clone, Debug)]
+pub struct Bytes<'a> {
+    pub(crate) inner: Copied<slice::Iter<'a, u8>>,
+}
+delegate!(Iterator, DoubleEndedIterator, ExactSizeIterator, FusedIterator for Bytes<'a> => u8);
+
+#[derive(Clone, Debug)]
+#[must_use]
+pub struct EscapeDebug<'a> {
+    #[allow(clippy::type_complexity)]
+    pub(crate) inner: Chain<
+        Flatten<option::IntoIter<CharEscapeIter>>,
+        FlatMap<Chars<'a>, CharEscapeIter, fn(JavaCodePoint) -> CharEscapeIter>,
+    >,
+}
+delegate!(Iterator for EscapeDebug<'a> => char);
+delegate!(FusedIterator for EscapeDebug<'a>);
+impl<'a> Display for EscapeDebug<'a> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        self.clone().try_for_each(|c| f.write_char(c))
+    }
+}
+
+#[derive(Clone, Debug)]
+#[must_use]
+pub struct EscapeDefault<'a> {
+    pub(crate) inner: FlatMap<Chars<'a>, CharEscapeIter, fn(JavaCodePoint) -> CharEscapeIter>,
+}
+delegate!(Iterator for EscapeDefault<'a> => char);
+delegate!(FusedIterator for EscapeDefault<'a>);
+impl<'a> Display for EscapeDefault<'a> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        self.clone().try_for_each(|c| f.write_char(c))
+    }
+}
+
+#[derive(Clone, Debug)]
+#[must_use]
+pub struct EscapeUnicode<'a> {
+    pub(crate) inner: FlatMap<Chars<'a>, CharEscapeIter, fn(JavaCodePoint) -> CharEscapeIter>,
+}
+delegate!(Iterator for EscapeUnicode<'a> => char);
+delegate!(FusedIterator for EscapeUnicode<'a>);
+impl<'a> Display for EscapeUnicode<'a> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        self.clone().try_for_each(|c| f.write_char(c))
+    }
+}
+
+#[derive(Clone, Debug)]
+#[must_use]
+pub struct Lines<'a> {
+    pub(crate) inner: Map<SplitInclusive<'a, char>, fn(&JavaStr) -> &JavaStr>,
+}
+delegate!(Iterator for Lines<'a> => &'a JavaStr);
+delegate!(DoubleEndedIterator for Lines<'a>);
+delegate!(FusedIterator for Lines<'a>);
+
+#[derive(Clone)]
+#[must_use]
+pub struct Chars<'a> {
+    pub(crate) inner: slice::Iter<'a, u8>,
+}
+
+impl<'a> Iterator for Chars<'a> {
+    type Item = JavaCodePoint;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        // SAFETY: `JavaStr` invariant says `self.inner` is a semi-valid UTF-8 string
+        // and the resulting `ch` is a valid Unicode Scalar Value or surrogate
+        // code point.
+        unsafe { next_code_point(&mut self.inner).map(|ch| JavaCodePoint::from_u32_unchecked(ch)) }
+    }
+
+    // TODO: std has an optimized count impl
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let len = self.inner.len();
+        // `(len + 3)` can't overflow, because we know that the `slice::Iter`
+        // belongs to a slice in memory which has a maximum length of
+        // `isize::MAX` (that's well below `usize::MAX`).
+        ((len + 3) / 4, Some(len))
+    }
+
+    #[inline]
+    fn last(mut self) -> Option<JavaCodePoint> {
+        // No need to go through the entire string.
+        self.next_back()
+    }
+}
+
+impl Debug for Chars<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        write!(f, "Chars(")?;
+        f.debug_list().entries(self.clone()).finish()?;
+        write!(f, ")")?;
+        Ok(())
+    }
+}
+
+impl<'a> DoubleEndedIterator for Chars<'a> {
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        // SAFETY: `JavaStr` invariant says `self.inner` is a semi-valid UTF-8 string
+        // and the resulting `ch` is a valid Unicode Scalar Value or surrogate
+        // code point.
+        unsafe {
+            next_code_point_reverse(&mut self.inner).map(|ch| JavaCodePoint::from_u32_unchecked(ch))
+        }
+    }
+}
+
+impl FusedIterator for Chars<'_> {}
+
+impl<'a> Chars<'a> {
+    #[inline]
+    #[must_use]
+    pub fn as_str(&self) -> &'a JavaStr {
+        // SAFETY: `Chars` is only made from a JavaStr, which guarantees the iter is
+        // semi-valid UTF-8.
+        unsafe { JavaStr::from_semi_utf8_unchecked(self.inner.as_slice()) }
+    }
+}
+
+#[derive(Clone, Debug)]
+#[must_use]
+pub struct CharIndices<'a> {
+    pub(crate) front_offset: usize,
+    pub(crate) inner: Chars<'a>,
+}
+
+impl<'a> Iterator for CharIndices<'a> {
+    type Item = (usize, JavaCodePoint);
+
+    #[inline]
+    fn next(&mut self) -> Option<(usize, JavaCodePoint)> {
+        let pre_len = self.inner.inner.len();
+        match self.inner.next() {
+            None => None,
+            Some(ch) => {
+                let index = self.front_offset;
+                let len = self.inner.inner.len();
+                self.front_offset += pre_len - len;
+                Some((index, ch))
+            }
+        }
+    }
+
+    #[inline]
+    fn count(self) -> usize {
+        self.inner.count()
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.inner.size_hint()
+    }
+
+    #[inline]
+    fn last(mut self) -> Option<(usize, JavaCodePoint)> {
+        // No need to go through the entire string.
+        self.next_back()
+    }
+}
+
+impl<'a> DoubleEndedIterator for CharIndices<'a> {
+    #[inline]
+    fn next_back(&mut self) -> Option<(usize, JavaCodePoint)> {
+        self.inner.next_back().map(|ch| {
+            let index = self.front_offset + self.inner.inner.len();
+            (index, ch)
+        })
+    }
+}
+
+impl FusedIterator for CharIndices<'_> {}
+
+impl<'a> CharIndices<'a> {
+    #[inline]
+    #[must_use]
+    pub fn as_str(&self) -> &'a JavaStr {
+        self.inner.as_str()
+    }
+}
+
+#[must_use]
+#[derive(Debug, Clone)]
+pub struct Matches<'a, P> {
+    pub(crate) str: &'a JavaStr,
+    pub(crate) pat: P,
+}
+
+impl<'a, P> Iterator for Matches<'a, P>
+where
+    P: JavaStrPattern,
+{
+    type Item = &'a JavaStr;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some((index, len)) = self.pat.find_in(self.str) {
+            // SAFETY: pattern returns valid indices
+            let ret = unsafe { self.str.get_unchecked(index..index + len) };
+            self.str = unsafe { self.str.get_unchecked(index + len..) };
+            Some(ret)
+        } else {
+            self.str = Default::default();
+            None
+        }
+    }
+}
+
+impl<'a, P> DoubleEndedIterator for Matches<'a, P>
+where
+    P: JavaStrPattern,
+{
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        if let Some((index, len)) = self.pat.rfind_in(self.str) {
+            // SAFETY: pattern returns valid indices
+            let ret = unsafe { self.str.get_unchecked(index..index + len) };
+            self.str = unsafe { self.str.get_unchecked(..index) };
+            Some(ret)
+        } else {
+            self.str = Default::default();
+            None
+        }
+    }
+}
+
+#[must_use]
+#[derive(Clone, Debug)]
+pub struct RMatches<'a, P> {
+    pub(crate) inner: Matches<'a, P>,
+}
+
+impl<'a, P> Iterator for RMatches<'a, P>
+where
+    P: JavaStrPattern,
+{
+    type Item = &'a JavaStr;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        self.inner.next_back()
+    }
+}
+
+impl<'a, P> DoubleEndedIterator for RMatches<'a, P>
+where
+    P: JavaStrPattern,
+{
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        self.inner.next()
+    }
+}
+
+#[must_use]
+#[derive(Clone, Debug)]
+pub struct MatchIndices<'a, P> {
+    pub(crate) str: &'a JavaStr,
+    pub(crate) start: usize,
+    pub(crate) pat: P,
+}
+
+impl<'a, P> Iterator for MatchIndices<'a, P>
+where
+    P: JavaStrPattern,
+{
+    type Item = (usize, &'a JavaStr);
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some((index, len)) = self.pat.find_in(self.str) {
+            let full_index = self.start + index;
+            self.start = full_index + len;
+            // SAFETY: pattern returns valid indices
+            let ret = unsafe { self.str.get_unchecked(index..index + len) };
+            self.str = unsafe { self.str.get_unchecked(index + len..) };
+            Some((full_index, ret))
+        } else {
+            self.start += self.str.len();
+            self.str = Default::default();
+            None
+        }
+    }
+}
+
+impl<'a, P> DoubleEndedIterator for MatchIndices<'a, P>
+where
+    P: JavaStrPattern,
+{
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        if let Some((index, len)) = self.pat.rfind_in(self.str) {
+            // SAFETY: pattern returns valid indices
+            let ret = unsafe { self.str.get_unchecked(index..index + len) };
+            self.str = unsafe { self.str.get_unchecked(..index) };
+            Some((self.start + index, ret))
+        } else {
+            self.str = Default::default();
+            None
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct RMatchIndices<'a, P> {
+    pub(crate) inner: MatchIndices<'a, P>,
+}
+
+impl<'a, P> Iterator for RMatchIndices<'a, P>
+where
+    P: JavaStrPattern,
+{
+    type Item = (usize, &'a JavaStr);
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        self.inner.next_back()
+    }
+}
+
+impl<'a, P> DoubleEndedIterator for RMatchIndices<'a, P>
+where
+    P: JavaStrPattern,
+{
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        self.inner.next()
+    }
+}
+
+#[derive(Clone, Debug)]
+struct SplitHelper<'a, P> {
+    start: usize,
+    end: usize,
+    haystack: &'a JavaStr,
+    pat: P,
+    allow_trailing_empty: bool,
+    finished: bool,
+    had_empty_match: bool,
+}
+
+impl<'a, P> SplitHelper<'a, P>
+where
+    P: JavaStrPattern,
+{
+    #[inline]
+    fn new(haystack: &'a JavaStr, pat: P, allow_trailing_empty: bool) -> Self {
+        Self {
+            start: 0,
+            end: haystack.len(),
+            haystack,
+            pat,
+            allow_trailing_empty,
+            finished: false,
+            had_empty_match: false,
+        }
+    }
+
+    #[inline]
+    fn get_end(&mut self) -> Option<&'a JavaStr> {
+        if !self.finished {
+            self.finished = true;
+
+            if self.allow_trailing_empty || self.end - self.start > 0 {
+                // SAFETY: `self.start` and `self.end` always lie on unicode boundaries.
+                let string = unsafe { self.haystack.get_unchecked(self.start..self.end) };
+                return Some(string);
+            }
+        }
+
+        None
+    }
+
+    #[inline]
+    fn next_match(&mut self) -> Option<(usize, usize)> {
+        // SAFETY: `self.start` always lies on a unicode boundary.
+        let substr = unsafe { self.haystack.get_unchecked(self.start..) };
+
+        let result = if self.had_empty_match {
+            // if we had an empty match before, we are going to find the empty match again.
+            // don't do that, search from the next index along.
+
+            if substr.is_empty() {
+                None
+            } else {
+                // SAFETY: we can pop the string because we already checked if the string is
+                // empty above
+                let first_char_len = unsafe { substr.chars().next().unwrap_unchecked().len_utf8() };
+                let popped_str = unsafe { substr.get_unchecked(first_char_len..) };
+
+                self.pat
+                    .find_in(popped_str)
+                    .map(|(index, len)| (index + first_char_len + self.start, len))
+            }
+        } else {
+            self.pat
+                .find_in(substr)
+                .map(|(index, len)| (index + self.start, len))
+        };
+
+        self.had_empty_match = result.is_some_and(|(_, len)| len == 0);
+
+        result
+    }
+
+    #[inline]
+    fn next(&mut self) -> Option<&'a JavaStr> {
+        if self.finished {
+            return None;
+        }
+
+        match self.next_match() {
+            Some((index, len)) => unsafe {
+                // SAFETY: pattern guarantees valid indices
+                let elt = self.haystack.get_unchecked(self.start..index);
+                self.start = index + len;
+                Some(elt)
+            },
+            None => self.get_end(),
+        }
+    }
+
+    #[inline]
+    fn next_inclusive(&mut self) -> Option<&'a JavaStr> {
+        if self.finished {
+            return None;
+        }
+
+        match self.next_match() {
+            Some((index, len)) => unsafe {
+                // SAFETY: pattern guarantees valid indices
+                let elt = self.haystack.get_unchecked(self.start..index + len);
+                self.start = index + len;
+                Some(elt)
+            },
+            None => self.get_end(),
+        }
+    }
+
+    #[inline]
+    fn next_match_back(&mut self) -> Option<(usize, usize)> {
+        // SAFETY: `self.end` always lies on a unicode boundary.
+        let substr = unsafe { self.haystack.get_unchecked(..self.end) };
+
+        let result = if self.had_empty_match {
+            // if we had an empty match before, we are going to find the empty match again.
+            // don't do that, search from the next index along.
+
+            if substr.is_empty() {
+                None
+            } else {
+                // SAFETY: we can pop the string because we already checked if the string is
+                // empty above
+                let last_char_len =
+                    unsafe { substr.chars().next_back().unwrap_unchecked().len_utf8() };
+                let popped_str = unsafe { substr.get_unchecked(..substr.len() - last_char_len) };
+
+                self.pat.rfind_in(popped_str)
+            }
+        } else {
+            self.pat.rfind_in(substr)
+        };
+
+        self.had_empty_match = result.is_some_and(|(_, len)| len == 0);
+
+        result
+    }
+
+    #[inline]
+    fn next_back(&mut self) -> Option<&'a JavaStr> {
+        if self.finished {
+            return None;
+        }
+
+        if !self.allow_trailing_empty {
+            self.allow_trailing_empty = true;
+            match self.next_back() {
+                Some(elt) if !elt.is_empty() => return Some(elt),
+                _ => {
+                    if self.finished {
+                        return None;
+                    }
+                }
+            }
+        }
+
+        match self.next_match_back() {
+            Some((index, len)) => unsafe {
+                // SAFETY: pattern guarantees valid indices
+                let elt = self.haystack.get_unchecked(index + len..self.end);
+                self.end = index;
+                Some(elt)
+            },
+            None => unsafe {
+                // SAFETY: `self.start` and `self.end` always lie on unicode boundaries.
+                self.finished = true;
+                Some(self.haystack.get_unchecked(self.start..self.end))
+            },
+        }
+    }
+
+    #[inline]
+    fn next_back_inclusive(&mut self) -> Option<&'a JavaStr> {
+        if self.finished {
+            return None;
+        }
+
+        if !self.allow_trailing_empty {
+            self.allow_trailing_empty = true;
+            match self.next_back_inclusive() {
+                Some(elt) if !elt.is_empty() => return Some(elt),
+                _ => {
+                    if self.finished {
+                        return None;
+                    }
+                }
+            }
+        }
+
+        match self.next_match_back() {
+            Some((index, len)) => unsafe {
+                // SAFETY: pattern guarantees valid indices
+                let elt = self.haystack.get_unchecked(index + len..self.end);
+                self.end = index + len;
+                Some(elt)
+            },
+            None => unsafe {
+                // SAFETY: `self.start` and `self.end` always lie on unicode boundaries.
+                self.finished = true;
+                Some(self.haystack.get_unchecked(self.start..self.end))
+            },
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct Split<'a, P> {
+    inner: SplitHelper<'a, P>,
+}
+
+impl<'a, P> Split<'a, P>
+where
+    P: JavaStrPattern,
+{
+    #[inline]
+    pub(crate) fn new(haystack: &'a JavaStr, pat: P) -> Self {
+        Split {
+            inner: SplitHelper::new(haystack, pat, true),
+        }
+    }
+}
+
+impl<'a, P> Iterator for Split<'a, P>
+where
+    P: JavaStrPattern,
+{
+    type Item = &'a JavaStr;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        self.inner.next()
+    }
+}
+
+impl<'a, P> DoubleEndedIterator for Split<'a, P>
+where
+    P: JavaStrPattern,
+{
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        self.inner.next_back()
+    }
+}
+
+impl<'a, P> FusedIterator for Split<'a, P> where P: JavaStrPattern {}
+
+#[derive(Clone, Debug)]
+pub struct RSplit<'a, P> {
+    inner: SplitHelper<'a, P>,
+}
+
+impl<'a, P> RSplit<'a, P>
+where
+    P: JavaStrPattern,
+{
+    #[inline]
+    pub(crate) fn new(haystack: &'a JavaStr, pat: P) -> Self {
+        RSplit {
+            inner: SplitHelper::new(haystack, pat, true),
+        }
+    }
+}
+
+impl<'a, P> Iterator for RSplit<'a, P>
+where
+    P: JavaStrPattern,
+{
+    type Item = &'a JavaStr;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        self.inner.next_back()
+    }
+}
+
+impl<'a, P> DoubleEndedIterator for RSplit<'a, P>
+where
+    P: JavaStrPattern,
+{
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        self.inner.next()
+    }
+}
+
+impl<'a, P> FusedIterator for RSplit<'a, P> where P: JavaStrPattern {}
+
+#[derive(Clone, Debug)]
+pub struct SplitTerminator<'a, P> {
+    inner: SplitHelper<'a, P>,
+}
+
+impl<'a, P> SplitTerminator<'a, P>
+where
+    P: JavaStrPattern,
+{
+    #[inline]
+    pub(crate) fn new(haystack: &'a JavaStr, pat: P) -> Self {
+        SplitTerminator {
+            inner: SplitHelper::new(haystack, pat, false),
+        }
+    }
+}
+
+impl<'a, P> Iterator for SplitTerminator<'a, P>
+where
+    P: JavaStrPattern,
+{
+    type Item = &'a JavaStr;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        self.inner.next()
+    }
+}
+
+impl<'a, P> DoubleEndedIterator for SplitTerminator<'a, P>
+where
+    P: JavaStrPattern,
+{
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        self.inner.next_back()
+    }
+}
+
+impl<'a, P> FusedIterator for SplitTerminator<'a, P> where P: JavaStrPattern {}
+
+#[derive(Clone, Debug)]
+pub struct RSplitTerminator<'a, P> {
+    inner: SplitHelper<'a, P>,
+}
+
+impl<'a, P> RSplitTerminator<'a, P>
+where
+    P: JavaStrPattern,
+{
+    #[inline]
+    pub(crate) fn new(haystack: &'a JavaStr, pat: P) -> Self {
+        RSplitTerminator {
+            inner: SplitHelper::new(haystack, pat, false),
+        }
+    }
+}
+
+impl<'a, P> Iterator for RSplitTerminator<'a, P>
+where
+    P: JavaStrPattern,
+{
+    type Item = &'a JavaStr;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        self.inner.next_back()
+    }
+}
+
+impl<'a, P> DoubleEndedIterator for RSplitTerminator<'a, P>
+where
+    P: JavaStrPattern,
+{
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        self.inner.next()
+    }
+}
+
+impl<'a, P> FusedIterator for RSplitTerminator<'a, P> where P: JavaStrPattern {}
+
+#[derive(Clone, Debug)]
+pub struct SplitInclusive<'a, P> {
+    inner: SplitHelper<'a, P>,
+}
+
+impl<'a, P> SplitInclusive<'a, P>
+where
+    P: JavaStrPattern,
+{
+    #[inline]
+    pub(crate) fn new(haystack: &'a JavaStr, pat: P) -> Self {
+        SplitInclusive {
+            inner: SplitHelper::new(haystack, pat, false),
+        }
+    }
+}
+
+impl<'a, P> Iterator for SplitInclusive<'a, P>
+where
+    P: JavaStrPattern,
+{
+    type Item = &'a JavaStr;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        self.inner.next_inclusive()
+    }
+}
+
+impl<'a, P> DoubleEndedIterator for SplitInclusive<'a, P>
+where
+    P: JavaStrPattern,
+{
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        self.inner.next_back_inclusive()
+    }
+}
+
+impl<'a, P> FusedIterator for SplitInclusive<'a, P> where P: JavaStrPattern {}
+
+#[derive(Clone, Debug)]
+pub struct SplitN<'a, P> {
+    inner: SplitHelper<'a, P>,
+    count: usize,
+}
+
+impl<'a, P> SplitN<'a, P>
+where
+    P: JavaStrPattern,
+{
+    #[inline]
+    pub(crate) fn new(haystack: &'a JavaStr, pat: P, count: usize) -> Self {
+        SplitN {
+            inner: SplitHelper::new(haystack, pat, true),
+            count,
+        }
+    }
+}
+
+impl<'a, P> Iterator for SplitN<'a, P>
+where
+    P: JavaStrPattern,
+{
+    type Item = &'a JavaStr;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.count {
+            0 => None,
+            1 => {
+                self.count = 0;
+                self.inner.get_end()
+            }
+            _ => {
+                self.count -= 1;
+                self.inner.next()
+            }
+        }
+    }
+}
+
+impl<'a, P> FusedIterator for SplitN<'a, P> where P: JavaStrPattern {}
+
+#[derive(Clone, Debug)]
+pub struct RSplitN<'a, P> {
+    inner: SplitHelper<'a, P>,
+    count: usize,
+}
+
+impl<'a, P> RSplitN<'a, P>
+where
+    P: JavaStrPattern,
+{
+    #[inline]
+    pub(crate) fn new(haystack: &'a JavaStr, pat: P, count: usize) -> Self {
+        RSplitN {
+            inner: SplitHelper::new(haystack, pat, true),
+            count,
+        }
+    }
+}
+
+impl<'a, P> Iterator for RSplitN<'a, P>
+where
+    P: JavaStrPattern,
+{
+    type Item = &'a JavaStr;
+
+    #[inline]
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.count {
+            0 => None,
+            1 => {
+                self.count = 0;
+                self.inner.get_end()
+            }
+            _ => {
+                self.count -= 1;
+                self.inner.next_back()
+            }
+        }
+    }
+}
+
+impl<'a, P> FusedIterator for RSplitN<'a, P> where P: JavaStrPattern {}
+
+#[derive(Clone, Debug)]
+pub struct SplitAsciiWhitespace<'a> {
+    #[allow(clippy::type_complexity)]
+    pub(crate) inner: Map<
+        Filter<slice::Split<'a, u8, fn(&u8) -> bool>, fn(&&[u8]) -> bool>,
+        fn(&[u8]) -> &JavaStr,
+    >,
+}
+delegate!(Iterator for SplitAsciiWhitespace<'a> => &'a JavaStr);
+delegate!(DoubleEndedIterator for SplitAsciiWhitespace<'a>);
+delegate!(FusedIterator for SplitAsciiWhitespace<'a>);
+
+#[derive(Clone, Debug)]
+pub struct SplitWhitespace<'a> {
+    #[allow(clippy::type_complexity)]
+    pub(crate) inner: Filter<Split<'a, fn(JavaCodePoint) -> bool>, fn(&&JavaStr) -> bool>,
+}
+delegate!(Iterator for SplitWhitespace<'a> => &'a JavaStr);
+delegate!(DoubleEndedIterator for SplitWhitespace<'a>);
+delegate!(FusedIterator for SplitWhitespace<'a>);
diff --git a/crates/java_string/src/lib.rs b/crates/java_string/src/lib.rs
new file mode 100644
index 000000000..57f035944
--- /dev/null
+++ b/crates/java_string/src/lib.rs
@@ -0,0 +1,27 @@
+#![doc = include_str!("../README.md")]
+
+mod cesu8;
+mod char;
+mod error;
+mod iter;
+mod owned;
+mod pattern;
+#[cfg(feature = "serde")]
+mod serde;
+mod slice;
+pub(crate) mod validations;
+
+pub use cesu8::*;
+pub use char::*;
+pub use error::*;
+pub use iter::*;
+pub use owned::*;
+pub use pattern::*;
+pub use slice::*;
+
+#[macro_export]
+macro_rules! format_java {
+    ($($arg:tt)*) => {
+        $crate::JavaString::from(::std::format!($($arg)*))
+    }
+}
diff --git a/crates/java_string/src/owned.rs b/crates/java_string/src/owned.rs
new file mode 100644
index 000000000..e03f82a7d
--- /dev/null
+++ b/crates/java_string/src/owned.rs
@@ -0,0 +1,1401 @@
+use std::borrow::{Borrow, BorrowMut, Cow};
+use std::collections::{Bound, TryReserveError};
+use std::convert::Infallible;
+use std::fmt::{Debug, Display, Formatter, Write};
+use std::hash::{Hash, Hasher};
+use std::iter::FusedIterator;
+use std::ops::{
+    Add, AddAssign, Deref, DerefMut, Index, IndexMut, Range, RangeBounds, RangeFrom, RangeFull,
+    RangeInclusive, RangeTo, RangeToInclusive,
+};
+use std::rc::Rc;
+use std::str::FromStr;
+use std::sync::Arc;
+use std::{ptr, slice};
+
+use crate::validations::{
+    run_utf8_full_validation_from_semi, run_utf8_semi_validation, to_range_checked,
+};
+use crate::{Chars, FromUtf8Error, JavaCodePoint, JavaStr, Utf8Error};
+
+#[derive(Default, PartialEq, PartialOrd, Eq, Ord)]
+pub struct JavaString {
+    vec: Vec<u8>,
+}
+
+impl JavaString {
+    #[inline]
+    #[must_use]
+    pub const fn new() -> JavaString {
+        JavaString { vec: Vec::new() }
+    }
+
+    #[inline]
+    #[must_use]
+    pub fn with_capacity(capacity: usize) -> JavaString {
+        JavaString {
+            vec: Vec::with_capacity(capacity),
+        }
+    }
+
+    /// Converts `vec` to a `JavaString` if it is fully-valid UTF-8, i.e. UTF-8
+    /// without surrogate code points. See [String::from_utf8].
+    #[inline]
+    pub fn from_full_utf8(vec: Vec<u8>) -> Result<JavaString, FromUtf8Error> {
+        match std::str::from_utf8(&vec) {
+            Ok(..) => Ok(JavaString { vec }),
+            Err(e) => Err(FromUtf8Error {
+                bytes: vec,
+                error: e.into(),
+            }),
+        }
+    }
+
+    /// Converts `vec` to a `JavaString` if it is semi-valid UTF-8, i.e. UTF-8
+    /// with surrogate code points.
+    ///
+    /// ```
+    /// # use java_string::{JavaCodePoint, JavaString};
+    ///
+    /// assert_eq!(
+    ///     JavaString::from_semi_utf8(b"Hello World!".to_vec()).unwrap(),
+    ///     "Hello World!"
+    /// );
+    /// assert_eq!(
+    ///     JavaString::from_semi_utf8(vec![0xf0, 0x9f, 0x92, 0x96]).unwrap(),
+    ///     "💖"
+    /// );
+    /// assert_eq!(
+    ///     JavaString::from_semi_utf8(vec![0xed, 0xa0, 0x80]).unwrap(),
+    ///     JavaString::from(JavaCodePoint::from_u32(0xd800).unwrap())
+    /// );
+    /// assert!(JavaString::from_semi_utf8(vec![0xed]).is_err());
+    /// ```
+    pub fn from_semi_utf8(vec: Vec<u8>) -> Result<JavaString, FromUtf8Error> {
+        match run_utf8_semi_validation(&vec) {
+            Ok(..) => Ok(JavaString { vec }),
+            Err(err) => Err(FromUtf8Error {
+                bytes: vec,
+                error: err,
+            }),
+        }
+    }
+
+    /// Converts `v` to a `Cow<JavaStr>`, replacing invalid semi-UTF-8 with the
+    /// replacement character �.
+    ///
+    /// ```
+    /// # use std::borrow::Cow;
+    /// # use java_string::{JavaStr, JavaString};
+    ///
+    /// let sparkle_heart = [0xf0, 0x9f, 0x92, 0x96];
+    /// let result = JavaString::from_semi_utf8_lossy(&sparkle_heart);
+    /// assert!(matches!(result, Cow::Borrowed(_)));
+    /// assert_eq!(result, JavaStr::from_str("💖"));
+    ///
+    /// let foobar_with_error = [b'f', b'o', b'o', 0xed, b'b', b'a', b'r'];
+    /// let result = JavaString::from_semi_utf8_lossy(&foobar_with_error);
+    /// assert!(matches!(result, Cow::Owned(_)));
+    /// assert_eq!(result, JavaStr::from_str("foo�bar"));
+    /// ```
+    #[must_use]
+    pub fn from_semi_utf8_lossy(v: &[u8]) -> Cow<'_, JavaStr> {
+        const REPLACEMENT: &str = "\u{FFFD}";
+
+        match run_utf8_semi_validation(v) {
+            Ok(()) => unsafe {
+                // SAFETY: validation succeeded
+                Cow::Borrowed(JavaStr::from_semi_utf8_unchecked(v))
+            },
+            Err(error) => {
+                let mut result = unsafe {
+                    // SAFETY: validation succeeded up to this index
+                    JavaString::from_semi_utf8_unchecked(
+                        v.get_unchecked(..error.valid_up_to).to_vec(),
+                    )
+                };
+                result.push_str(REPLACEMENT);
+                let mut index = error.valid_up_to + error.error_len.unwrap_or(1) as usize;
+                loop {
+                    match run_utf8_semi_validation(&v[index..]) {
+                        Ok(()) => {
+                            unsafe {
+                                // SAFETY: validation succeeded
+                                result
+                                    .push_java_str(JavaStr::from_semi_utf8_unchecked(&v[index..]));
+                            }
+                            return Cow::Owned(result);
+                        }
+                        Err(error) => {
+                            unsafe {
+                                // SAFETY: validation succeeded up to this index
+                                result.push_java_str(JavaStr::from_semi_utf8_unchecked(
+                                    v.get_unchecked(index..index + error.valid_up_to),
+                                ));
+                            }
+                            result.push_str(REPLACEMENT);
+                            index += error.valid_up_to + error.error_len.unwrap_or(1) as usize;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    /// # Safety
+    ///
+    /// The parameter must be in semi-valid UTF-8 format, that is, UTF-8 plus
+    /// surrogate code points.
+    #[inline]
+    #[must_use]
+    pub unsafe fn from_semi_utf8_unchecked(bytes: Vec<u8>) -> JavaString {
+        JavaString { vec: bytes }
+    }
+
+    /// See [String::into_bytes].
+    #[inline]
+    #[must_use]
+    pub fn into_bytes(self) -> Vec<u8> {
+        self.vec
+    }
+
+    /// See [String::as_str].
+    #[inline]
+    #[must_use]
+    pub fn as_java_str(&self) -> &JavaStr {
+        unsafe {
+            // SAFETY: this str has semi-valid UTF-8
+            JavaStr::from_semi_utf8_unchecked(&self.vec)
+        }
+    }
+
+    /// See [String::as_mut_str].
+    #[inline]
+    #[must_use]
+    pub fn as_mut_java_str(&mut self) -> &mut JavaStr {
+        unsafe {
+            // SAFETY: this str has semi-valid UTF-8
+            JavaStr::from_semi_utf8_unchecked_mut(&mut self.vec)
+        }
+    }
+
+    /// Tries to convert this `JavaString` to a `String`, returning an error if
+    /// it is not fully valid UTF-8, i.e. has no surrogate code points.
+    ///
+    /// ```
+    /// # use java_string::{JavaCodePoint, JavaString};
+    ///
+    /// assert_eq!(
+    ///     JavaString::from("Hello World!").into_string().unwrap(),
+    ///     "Hello World!"
+    /// );
+    /// assert_eq!(
+    ///     JavaString::from("abc\0ℝ💣").into_string().unwrap(),
+    ///     "abc\0ℝ💣"
+    /// );
+    ///
+    /// let string_with_error = JavaString::from("abc")
+    ///     + JavaString::from(JavaCodePoint::from_u32(0xd800).unwrap()).as_java_str();
+    /// assert!(string_with_error.into_string().is_err());
+    /// ```
+    pub fn into_string(self) -> Result<String, Utf8Error> {
+        run_utf8_full_validation_from_semi(self.as_bytes()).map(|_| unsafe {
+            // SAFETY: validation succeeded
+            self.into_string_unchecked()
+        })
+    }
+
+    /// # Safety
+    ///
+    /// This string must be fully valid UTF-8, i.e. have no surrogate code
+    /// points.
+    #[inline]
+    #[must_use]
+    pub unsafe fn into_string_unchecked(self) -> String {
+        // SAFETY: preconditions checked by caller
+        String::from_utf8_unchecked(self.vec)
+    }
+
+    /// See [String::push_str].
+    #[inline]
+    pub fn push_java_str(&mut self, string: &JavaStr) {
+        self.vec.extend_from_slice(string.as_bytes())
+    }
+
+    /// See [String::push_str].
+    #[inline]
+    pub fn push_str(&mut self, string: &str) {
+        self.vec.extend_from_slice(string.as_bytes())
+    }
+
+    /// See [String::capacity].
+    #[inline]
+    #[must_use]
+    pub fn capacity(&self) -> usize {
+        self.vec.capacity()
+    }
+
+    /// See [String::reserve].
+    #[inline]
+    pub fn reserve(&mut self, additional: usize) {
+        self.vec.reserve(additional)
+    }
+
+    /// See [String::reserve_exact].
+    #[inline]
+    pub fn reserve_exact(&mut self, additional: usize) {
+        self.vec.reserve_exact(additional)
+    }
+
+    /// See [String::try_reserve].
+    #[inline]
+    pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> {
+        self.vec.try_reserve(additional)
+    }
+
+    /// See [String::try_reserve_exact].
+    #[inline]
+    pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> {
+        self.vec.try_reserve_exact(additional)
+    }
+
+    /// See [String::shrink_to_fit].
+    #[inline]
+    pub fn shrink_to_fit(&mut self) {
+        self.vec.shrink_to_fit()
+    }
+
+    /// See [String::shrink_to].
+    #[inline]
+    pub fn shrink_to(&mut self, min_capacity: usize) {
+        self.vec.shrink_to(min_capacity)
+    }
+
+    /// See [String::push].
+    #[inline]
+    pub fn push(&mut self, ch: char) {
+        match ch.len_utf8() {
+            1 => self.vec.push(ch as u8),
+            _ => self
+                .vec
+                .extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()),
+        }
+    }
+
+    /// See [String::push].
+    #[inline]
+    pub fn push_java(&mut self, ch: JavaCodePoint) {
+        match ch.len_utf8() {
+            1 => self.vec.push(ch.as_u32() as u8),
+            _ => self.vec.extend_from_slice(ch.encode_semi_utf8(&mut [0; 4])),
+        }
+    }
+
+    /// See [String::as_bytes].
+    #[inline]
+    #[must_use]
+    pub fn as_bytes(&self) -> &[u8] {
+        &self.vec
+    }
+
+    /// See [String::truncate].
+    #[inline]
+    pub fn truncate(&mut self, new_len: usize) {
+        if new_len <= self.len() {
+            assert!(self.is_char_boundary(new_len));
+            self.vec.truncate(new_len)
+        }
+    }
+
+    /// See [String::pop].
+    ///
+    /// ```
+    /// # use java_string::JavaString;
+    ///
+    /// let mut str = JavaString::from("Hello World!");
+    /// assert_eq!(str.pop().unwrap(), '!');
+    /// assert_eq!(str, "Hello World");
+    ///
+    /// let mut str = JavaString::from("東京");
+    /// assert_eq!(str.pop().unwrap(), '京');
+    /// assert_eq!(str, "東");
+    ///
+    /// assert!(JavaString::new().pop().is_none());
+    /// ```
+    #[inline]
+    pub fn pop(&mut self) -> Option<JavaCodePoint> {
+        let ch = self.chars().next_back()?;
+        let newlen = self.len() - ch.len_utf8();
+        unsafe {
+            self.vec.set_len(newlen);
+        }
+        Some(ch)
+    }
+
+    /// See [String::remove].
+    ///
+    /// ```
+    /// # use java_string::JavaString;
+    ///
+    /// let mut str = JavaString::from("Hello World!");
+    /// assert_eq!(str.remove(5), ' ');
+    /// assert_eq!(str, "HelloWorld!");
+    ///
+    /// let mut str = JavaString::from("Hello 🦀 World!");
+    /// assert_eq!(str.remove(6), '🦀');
+    /// assert_eq!(str, "Hello  World!");
+    /// ```
+    /// ```should_panic
+    /// # use java_string::JavaString;
+    /// // Should panic
+    /// JavaString::new().remove(0);
+    /// ```
+    /// ```should_panic
+    /// # use java_string::JavaString;
+    /// // Should panic
+    /// JavaString::from("🦀").remove(1);
+    /// ```
+    #[inline]
+    pub fn remove(&mut self, idx: usize) -> JavaCodePoint {
+        let ch = match self[idx..].chars().next() {
+            Some(ch) => ch,
+            None => panic!("cannot remove a char from the end of a string"),
+        };
+
+        let next = idx + ch.len_utf8();
+        let len = self.len();
+        unsafe {
+            ptr::copy(
+                self.vec.as_ptr().add(next),
+                self.vec.as_mut_ptr().add(idx),
+                len - next,
+            );
+            self.vec.set_len(len - (next - idx));
+        }
+        ch
+    }
+
+    /// See [String::retain].
+    ///
+    /// ```
+    /// # use java_string::{JavaCodePoint, JavaString};
+    ///
+    /// let mut str = JavaString::from("Hello 🦀 World!");
+    /// str.retain(|ch| !ch.is_ascii_uppercase());
+    /// assert_eq!(str, "ello 🦀 orld!");
+    /// str.retain(JavaCodePoint::is_ascii);
+    /// assert_eq!(str, "ello  orld!");
+    /// ```
+    #[inline]
+    pub fn retain<F>(&mut self, mut f: F)
+    where
+        F: FnMut(JavaCodePoint) -> bool,
+    {
+        struct SetLenOnDrop<'a> {
+            s: &'a mut JavaString,
+            idx: usize,
+            del_bytes: usize,
+        }
+
+        impl<'a> Drop for SetLenOnDrop<'a> {
+            #[inline]
+            fn drop(&mut self) {
+                let new_len = self.idx - self.del_bytes;
+                debug_assert!(new_len <= self.s.len());
+                unsafe { self.s.vec.set_len(new_len) };
+            }
+        }
+
+        let len = self.len();
+        let mut guard = SetLenOnDrop {
+            s: self,
+            idx: 0,
+            del_bytes: 0,
+        };
+
+        while guard.idx < len {
+            // SAFETY: `guard.idx` is positive-or-zero and less that len so the
+            // `get_unchecked` is in bound. `self` is valid UTF-8 like string
+            // and the returned slice starts at a unicode code point so the
+            // `Chars` always return one character.
+            let ch = unsafe {
+                guard
+                    .s
+                    .get_unchecked(guard.idx..len)
+                    .chars()
+                    .next()
+                    .unwrap_unchecked()
+            };
+            let ch_len = ch.len_utf8();
+
+            if !f(ch) {
+                guard.del_bytes += ch_len;
+            } else if guard.del_bytes > 0 {
+                // SAFETY: `guard.idx` is in bound and `guard.del_bytes` represent the number of
+                // bytes that are erased from the string so the resulting `guard.idx -
+                // guard.del_bytes` always represent a valid unicode code point.
+                //
+                // `guard.del_bytes` >= `ch.len_utf8()`, so taking a slice with `ch.len_utf8()`
+                // len is safe.
+                ch.encode_semi_utf8(unsafe {
+                    slice::from_raw_parts_mut(
+                        guard.s.as_mut_ptr().add(guard.idx - guard.del_bytes),
+                        ch.len_utf8(),
+                    )
+                });
+            }
+
+            // Point idx to the next char
+            guard.idx += ch_len;
+        }
+
+        drop(guard);
+    }
+
+    /// See [String::insert].
+    ///
+    /// ```
+    /// # use java_string::JavaString;
+    /// let mut s = JavaString::from("foo");
+    /// s.insert(3, 'a');
+    /// s.insert(4, 'r');
+    /// s.insert(3, 'b');
+    /// assert_eq!(s, "foobar");
+    /// ```
+    #[inline]
+    pub fn insert(&mut self, idx: usize, ch: char) {
+        assert!(self.is_char_boundary(idx));
+        let mut bits = [0; 4];
+        let bits = ch.encode_utf8(&mut bits).as_bytes();
+
+        unsafe {
+            self.insert_bytes(idx, bits);
+        }
+    }
+
+    /// See [String::insert].
+    #[inline]
+    pub fn insert_java(&mut self, idx: usize, ch: JavaCodePoint) {
+        assert!(self.is_char_boundary(idx));
+        let mut bits = [0; 4];
+        let bits = ch.encode_semi_utf8(&mut bits);
+
+        unsafe {
+            self.insert_bytes(idx, bits);
+        }
+    }
+
+    #[inline]
+    unsafe fn insert_bytes(&mut self, idx: usize, bytes: &[u8]) {
+        let len = self.len();
+        let amt = bytes.len();
+        self.vec.reserve(amt);
+
+        unsafe {
+            ptr::copy(
+                self.vec.as_ptr().add(idx),
+                self.vec.as_mut_ptr().add(idx + amt),
+                len - idx,
+            );
+            ptr::copy_nonoverlapping(bytes.as_ptr(), self.vec.as_mut_ptr().add(idx), amt);
+            self.vec.set_len(len + amt);
+        }
+    }
+
+    /// See [String::insert_str].
+    ///
+    /// ```
+    /// # use java_string::JavaString;
+    /// let mut s = JavaString::from("bar");
+    /// s.insert_str(0, "foo");
+    /// assert_eq!(s, "foobar");
+    /// ```
+    #[inline]
+    pub fn insert_str(&mut self, idx: usize, string: &str) {
+        assert!(self.is_char_boundary(idx));
+
+        unsafe {
+            self.insert_bytes(idx, string.as_bytes());
+        }
+    }
+
+    /// See [String::insert_str].
+    pub fn insert_java_str(&mut self, idx: usize, string: &JavaStr) {
+        assert!(self.is_char_boundary(idx));
+
+        unsafe {
+            self.insert_bytes(idx, string.as_bytes());
+        }
+    }
+
+    /// See [String::as_mut_vec].
+    ///
+    /// # Safety
+    ///
+    /// The returned `Vec` must not have invalid UTF-8 written to it, besides
+    /// surrogate pairs.
+    #[inline]
+    pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<u8> {
+        &mut self.vec
+    }
+
+    /// See [String::len].
+    #[inline]
+    #[must_use]
+    pub fn len(&self) -> usize {
+        self.vec.len()
+    }
+
+    /// See [String::is_empty].
+    #[inline]
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// See [String::split_off].
+    ///
+    /// ```
+    /// # use java_string::JavaString;
+    /// let mut hello = JavaString::from("Hello World!");
+    /// let world = hello.split_off(6);
+    /// assert_eq!(hello, "Hello ");
+    /// assert_eq!(world, "World!");
+    /// ```
+    /// ```should_panic
+    /// # use java_string::JavaString;
+    /// let mut s = JavaString::from("🦀");
+    /// // Should panic
+    /// let _ = s.split_off(1);
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn split_off(&mut self, at: usize) -> JavaString {
+        assert!(self.is_char_boundary(at));
+        let other = self.vec.split_off(at);
+        unsafe { JavaString::from_semi_utf8_unchecked(other) }
+    }
+
+    /// See [String::clear].
+    #[inline]
+    pub fn clear(&mut self) {
+        self.vec.clear();
+    }
+
+    /// See [String::drain].
+    ///
+    /// ```
+    /// # use java_string::JavaString;
+    ///
+    /// let mut s = JavaString::from("α is alpha, β is beta");
+    /// let beta_offset = s.find('β').unwrap_or(s.len());
+    ///
+    /// // Remove the range up until the β from the string
+    /// let t: JavaString = s.drain(..beta_offset).collect();
+    /// assert_eq!(t, "α is alpha, ");
+    /// assert_eq!(s, "β is beta");
+    ///
+    /// // A full range clears the string, like `clear()` does
+    /// s.drain(..);
+    /// assert_eq!(s, "");
+    /// ```
+    #[inline]
+    pub fn drain<R>(&mut self, range: R) -> Drain<'_>
+    where
+        R: RangeBounds<usize>,
+    {
+        // Memory safety: see String::drain
+        let Range { start, end } = to_range_checked(range, ..self.len());
+        assert!(self.is_char_boundary(start));
+        assert!(self.is_char_boundary(end));
+
+        // Take out two simultaneous borrows. The &mut String won't be accessed
+        // until iteration is over, in Drop.
+        let self_ptr = self as *mut _;
+        // SAFETY: `to_range_checked` and `is_char_boundary` do the appropriate bounds
+        // checks.
+        let chars_iter = unsafe { self.get_unchecked(start..end) }.chars();
+
+        Drain {
+            start,
+            end,
+            iter: chars_iter,
+            string: self_ptr,
+        }
+    }
+
+    /// See [String::replace_range].
+    ///
+    /// ```
+    /// # use java_string::JavaString;
+    ///
+    /// let mut s = JavaString::from("α is alpha, β is beta");
+    /// let beta_offset = s.find('β').unwrap_or(s.len());
+    ///
+    /// // Replace the range up until the β from the string
+    /// s.replace_range(..beta_offset, "Α is capital alpha; ");
+    /// assert_eq!(s, "Α is capital alpha; β is beta");
+    /// ```
+    /// ```should_panic
+    /// # use java_string::JavaString;
+    /// let mut s = JavaString::from("α is alpha, β is beta");
+    /// // Should panic
+    /// s.replace_range(..1, "Α is capital alpha; ");
+    /// ```
+    pub fn replace_range<R>(&mut self, range: R, replace_with: &str)
+    where
+        R: RangeBounds<usize>,
+    {
+        self.replace_range_java(range, JavaStr::from_str(replace_with))
+    }
+
+    /// See [String::replace_range].
+    pub fn replace_range_java<R>(&mut self, range: R, replace_with: &JavaStr)
+    where
+        R: RangeBounds<usize>,
+    {
+        let start = range.start_bound();
+        match start {
+            Bound::Included(&n) => assert!(self.is_char_boundary(n)),
+            Bound::Excluded(&n) => assert!(self.is_char_boundary(n + 1)),
+            Bound::Unbounded => {}
+        };
+        let end = range.end_bound();
+        match end {
+            Bound::Included(&n) => assert!(self.is_char_boundary(n + 1)),
+            Bound::Excluded(&n) => assert!(self.is_char_boundary(n)),
+            Bound::Unbounded => {}
+        };
+
+        unsafe { self.as_mut_vec() }.splice((start, end), replace_with.bytes());
+    }
+
+    /// See [String::into_boxed_str].
+    #[inline]
+    #[must_use]
+    pub fn into_boxed_str(self) -> Box<JavaStr> {
+        let slice = self.vec.into_boxed_slice();
+        unsafe { JavaStr::from_boxed_semi_utf8_unchecked(slice) }
+    }
+
+    /// See [String::leak].
+    #[inline]
+    pub fn leak<'a>(self) -> &'a mut JavaStr {
+        let slice = self.vec.leak();
+        unsafe { JavaStr::from_semi_utf8_unchecked_mut(slice) }
+    }
+}
+
+impl Add<&str> for JavaString {
+    type Output = JavaString;
+
+    #[inline]
+    fn add(mut self, rhs: &str) -> Self::Output {
+        self.push_str(rhs);
+        self
+    }
+}
+
+impl Add<&JavaStr> for JavaString {
+    type Output = JavaString;
+
+    #[inline]
+    fn add(mut self, rhs: &JavaStr) -> Self::Output {
+        self.push_java_str(rhs);
+        self
+    }
+}
+
+impl AddAssign<&str> for JavaString {
+    #[inline]
+    fn add_assign(&mut self, rhs: &str) {
+        self.push_str(rhs);
+    }
+}
+
+impl AddAssign<&JavaStr> for JavaString {
+    #[inline]
+    fn add_assign(&mut self, rhs: &JavaStr) {
+        self.push_java_str(rhs);
+    }
+}
+
+impl AsMut<JavaStr> for JavaString {
+    #[inline]
+    fn as_mut(&mut self) -> &mut JavaStr {
+        self.as_mut_java_str()
+    }
+}
+
+impl AsRef<[u8]> for JavaString {
+    #[inline]
+    fn as_ref(&self) -> &[u8] {
+        self.as_bytes()
+    }
+}
+
+impl AsRef<JavaStr> for JavaString {
+    #[inline]
+    fn as_ref(&self) -> &JavaStr {
+        self.as_java_str()
+    }
+}
+
+impl Borrow<JavaStr> for JavaString {
+    #[inline]
+    fn borrow(&self) -> &JavaStr {
+        self.as_java_str()
+    }
+}
+
+impl BorrowMut<JavaStr> for JavaString {
+    #[inline]
+    fn borrow_mut(&mut self) -> &mut JavaStr {
+        self.as_mut_java_str()
+    }
+}
+
+impl Clone for JavaString {
+    #[inline]
+    fn clone(&self) -> Self {
+        JavaString {
+            vec: self.vec.clone(),
+        }
+    }
+
+    #[inline]
+    fn clone_from(&mut self, source: &Self) {
+        self.vec.clone_from(&source.vec)
+    }
+}
+
+impl Debug for JavaString {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        Debug::fmt(&**self, f)
+    }
+}
+
+impl Deref for JavaString {
+    type Target = JavaStr;
+
+    #[inline]
+    fn deref(&self) -> &Self::Target {
+        self.as_java_str()
+    }
+}
+
+impl DerefMut for JavaString {
+    #[inline]
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        self.as_mut_java_str()
+    }
+}
+
+impl Display for JavaString {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        Display::fmt(&**self, f)
+    }
+}
+
+impl Extend<char> for JavaString {
+    fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
+        let iterator = iter.into_iter();
+        let (lower_bound, _) = iterator.size_hint();
+        self.reserve(lower_bound);
+        iterator.for_each(move |c| self.push(c));
+    }
+}
+
+impl Extend<JavaCodePoint> for JavaString {
+    fn extend<T: IntoIterator<Item = JavaCodePoint>>(&mut self, iter: T) {
+        let iterator = iter.into_iter();
+        let (lower_bound, _) = iterator.size_hint();
+        self.reserve(lower_bound);
+        iterator.for_each(move |c| self.push_java(c));
+    }
+}
+
+impl Extend<String> for JavaString {
+    fn extend<T: IntoIterator<Item = String>>(&mut self, iter: T) {
+        iter.into_iter().for_each(move |s| self.push_str(&s));
+    }
+}
+
+impl Extend<JavaString> for JavaString {
+    fn extend<T: IntoIterator<Item = JavaString>>(&mut self, iter: T) {
+        iter.into_iter().for_each(move |s| self.push_java_str(&s));
+    }
+}
+
+impl<'a> Extend<&'a char> for JavaString {
+    fn extend<T: IntoIterator<Item = &'a char>>(&mut self, iter: T) {
+        self.extend(iter.into_iter().cloned())
+    }
+}
+
+impl<'a> Extend<&'a JavaCodePoint> for JavaString {
+    fn extend<T: IntoIterator<Item = &'a JavaCodePoint>>(&mut self, iter: T) {
+        self.extend(iter.into_iter().cloned())
+    }
+}
+
+impl<'a> Extend<&'a str> for JavaString {
+    fn extend<T: IntoIterator<Item = &'a str>>(&mut self, iter: T) {
+        iter.into_iter().for_each(move |s| self.push_str(s));
+    }
+}
+
+impl<'a> Extend<&'a JavaStr> for JavaString {
+    fn extend<T: IntoIterator<Item = &'a JavaStr>>(&mut self, iter: T) {
+        iter.into_iter().for_each(move |s| self.push_java_str(s));
+    }
+}
+
+impl Extend<Box<str>> for JavaString {
+    fn extend<T: IntoIterator<Item = Box<str>>>(&mut self, iter: T) {
+        iter.into_iter().for_each(move |s| self.push_str(&s));
+    }
+}
+
+impl Extend<Box<JavaStr>> for JavaString {
+    fn extend<T: IntoIterator<Item = Box<JavaStr>>>(&mut self, iter: T) {
+        iter.into_iter().for_each(move |s| self.push_java_str(&s));
+    }
+}
+
+impl<'a> Extend<Cow<'a, str>> for JavaString {
+    fn extend<T: IntoIterator<Item = Cow<'a, str>>>(&mut self, iter: T) {
+        iter.into_iter().for_each(move |s| self.push_str(&s));
+    }
+}
+
+impl<'a> Extend<Cow<'a, JavaStr>> for JavaString {
+    fn extend<T: IntoIterator<Item = Cow<'a, JavaStr>>>(&mut self, iter: T) {
+        iter.into_iter().for_each(move |s| self.push_java_str(&s));
+    }
+}
+
+impl From<String> for JavaString {
+    #[inline]
+    fn from(value: String) -> Self {
+        unsafe {
+            // SAFETY: value is valid UTF-8
+            JavaString::from_semi_utf8_unchecked(value.into_bytes())
+        }
+    }
+}
+
+impl From<&String> for JavaString {
+    #[inline]
+    fn from(value: &String) -> Self {
+        Self::from(value.clone())
+    }
+}
+
+impl From<&JavaString> for JavaString {
+    #[inline]
+    fn from(value: &JavaString) -> Self {
+        value.clone()
+    }
+}
+
+impl From<&mut str> for JavaString {
+    #[inline]
+    fn from(value: &mut str) -> Self {
+        Self::from(&*value)
+    }
+}
+
+impl From<&str> for JavaString {
+    #[inline]
+    fn from(value: &str) -> Self {
+        Self::from(value.to_owned())
+    }
+}
+
+impl From<&mut JavaStr> for JavaString {
+    #[inline]
+    fn from(value: &mut JavaStr) -> Self {
+        Self::from(&*value)
+    }
+}
+
+impl From<&JavaStr> for JavaString {
+    #[inline]
+    fn from(value: &JavaStr) -> Self {
+        value.to_owned()
+    }
+}
+
+impl From<Box<str>> for JavaString {
+    #[inline]
+    fn from(value: Box<str>) -> Self {
+        Self::from(value.into_string())
+    }
+}
+
+impl From<Box<JavaStr>> for JavaString {
+    #[inline]
+    fn from(value: Box<JavaStr>) -> Self {
+        value.into_string()
+    }
+}
+
+impl<'a> From<Cow<'a, str>> for JavaString {
+    #[inline]
+    fn from(value: Cow<'a, str>) -> Self {
+        Self::from(value.into_owned())
+    }
+}
+
+impl<'a> From<Cow<'a, JavaStr>> for JavaString {
+    #[inline]
+    fn from(value: Cow<'a, JavaStr>) -> Self {
+        value.into_owned()
+    }
+}
+
+impl From<JavaString> for Arc<JavaStr> {
+    #[inline]
+    fn from(value: JavaString) -> Self {
+        Arc::from(&value[..])
+    }
+}
+
+impl<'a> From<JavaString> for Cow<'a, JavaStr> {
+    #[inline]
+    fn from(value: JavaString) -> Self {
+        Cow::Owned(value)
+    }
+}
+
+impl From<JavaString> for Rc<JavaStr> {
+    #[inline]
+    fn from(value: JavaString) -> Self {
+        Rc::from(&value[..])
+    }
+}
+
+impl From<JavaString> for Vec<u8> {
+    #[inline]
+    fn from(value: JavaString) -> Self {
+        value.into_bytes()
+    }
+}
+
+impl From<char> for JavaString {
+    #[inline]
+    fn from(value: char) -> Self {
+        Self::from(value.encode_utf8(&mut [0; 4]))
+    }
+}
+
+impl From<JavaCodePoint> for JavaString {
+    #[inline]
+    fn from(value: JavaCodePoint) -> Self {
+        unsafe {
+            // SAFETY: we're encoding into semi-valid UTF-8
+            JavaString::from_semi_utf8_unchecked(value.encode_semi_utf8(&mut [0; 4]).to_vec())
+        }
+    }
+}
+
+impl FromIterator<char> for JavaString {
+    #[inline]
+    fn from_iter<T: IntoIterator<Item = char>>(iter: T) -> Self {
+        let mut buf = JavaString::new();
+        buf.extend(iter);
+        buf
+    }
+}
+
+impl<'a> FromIterator<&'a char> for JavaString {
+    #[inline]
+    fn from_iter<T: IntoIterator<Item = &'a char>>(iter: T) -> Self {
+        let mut buf = JavaString::new();
+        buf.extend(iter);
+        buf
+    }
+}
+
+impl FromIterator<JavaCodePoint> for JavaString {
+    #[inline]
+    fn from_iter<T: IntoIterator<Item = JavaCodePoint>>(iter: T) -> Self {
+        let mut buf = JavaString::new();
+        buf.extend(iter);
+        buf
+    }
+}
+
+impl<'a> FromIterator<&'a JavaCodePoint> for JavaString {
+    #[inline]
+    fn from_iter<T: IntoIterator<Item = &'a JavaCodePoint>>(iter: T) -> Self {
+        let mut buf = JavaString::new();
+        buf.extend(iter);
+        buf
+    }
+}
+
+impl<'a> FromIterator<&'a str> for JavaString {
+    #[inline]
+    fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
+        let mut buf = JavaString::new();
+        buf.extend(iter);
+        buf
+    }
+}
+
+impl FromIterator<String> for JavaString {
+    fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
+        let mut iterator = iter.into_iter();
+
+        match iterator.next() {
+            None => JavaString::new(),
+            Some(buf) => {
+                let mut buf = JavaString::from(buf);
+                buf.extend(iterator);
+                buf
+            }
+        }
+    }
+}
+
+impl FromIterator<JavaString> for JavaString {
+    fn from_iter<T: IntoIterator<Item = JavaString>>(iter: T) -> Self {
+        let mut iterator = iter.into_iter();
+
+        match iterator.next() {
+            None => JavaString::new(),
+            Some(mut buf) => {
+                buf.extend(iterator);
+                buf
+            }
+        }
+    }
+}
+
+impl FromIterator<Box<str>> for JavaString {
+    #[inline]
+    fn from_iter<T: IntoIterator<Item = Box<str>>>(iter: T) -> Self {
+        let mut buf = JavaString::new();
+        buf.extend(iter);
+        buf
+    }
+}
+
+impl FromIterator<Box<JavaStr>> for JavaString {
+    #[inline]
+    fn from_iter<T: IntoIterator<Item = Box<JavaStr>>>(iter: T) -> Self {
+        let mut buf = JavaString::new();
+        buf.extend(iter);
+        buf
+    }
+}
+
+impl<'a> FromIterator<Cow<'a, str>> for JavaString {
+    #[inline]
+    fn from_iter<T: IntoIterator<Item = Cow<'a, str>>>(iter: T) -> Self {
+        let mut buf = JavaString::new();
+        buf.extend(iter);
+        buf
+    }
+}
+
+impl<'a> FromIterator<Cow<'a, JavaStr>> for JavaString {
+    #[inline]
+    fn from_iter<T: IntoIterator<Item = Cow<'a, JavaStr>>>(iter: T) -> Self {
+        let mut buf = JavaString::new();
+        buf.extend(iter);
+        buf
+    }
+}
+
+impl FromStr for JavaString {
+    type Err = Infallible;
+
+    #[inline]
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        Ok(Self::from(s))
+    }
+}
+
+impl Hash for JavaString {
+    #[inline]
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        (**self).hash(state)
+    }
+}
+
+impl Index<Range<usize>> for JavaString {
+    type Output = JavaStr;
+
+    #[inline]
+    fn index(&self, index: Range<usize>) -> &Self::Output {
+        &self[..][index]
+    }
+}
+
+impl Index<RangeFrom<usize>> for JavaString {
+    type Output = JavaStr;
+
+    #[inline]
+    fn index(&self, index: RangeFrom<usize>) -> &Self::Output {
+        &self[..][index]
+    }
+}
+
+impl Index<RangeFull> for JavaString {
+    type Output = JavaStr;
+
+    #[inline]
+    fn index(&self, _index: RangeFull) -> &Self::Output {
+        self.as_java_str()
+    }
+}
+
+impl Index<RangeInclusive<usize>> for JavaString {
+    type Output = JavaStr;
+
+    #[inline]
+    fn index(&self, index: RangeInclusive<usize>) -> &Self::Output {
+        &self[..][index]
+    }
+}
+
+impl Index<RangeTo<usize>> for JavaString {
+    type Output = JavaStr;
+
+    #[inline]
+    fn index(&self, index: RangeTo<usize>) -> &Self::Output {
+        &self[..][index]
+    }
+}
+
+impl Index<RangeToInclusive<usize>> for JavaString {
+    type Output = JavaStr;
+
+    #[inline]
+    fn index(&self, index: RangeToInclusive<usize>) -> &Self::Output {
+        &self[..][index]
+    }
+}
+
+impl IndexMut<Range<usize>> for JavaString {
+    #[inline]
+    fn index_mut(&mut self, index: Range<usize>) -> &mut Self::Output {
+        &mut self[..][index]
+    }
+}
+
+impl IndexMut<RangeFrom<usize>> for JavaString {
+    #[inline]
+    fn index_mut(&mut self, index: RangeFrom<usize>) -> &mut Self::Output {
+        &mut self[..][index]
+    }
+}
+
+impl IndexMut<RangeFull> for JavaString {
+    #[inline]
+    fn index_mut(&mut self, _index: RangeFull) -> &mut Self::Output {
+        self.as_mut_java_str()
+    }
+}
+
+impl IndexMut<RangeInclusive<usize>> for JavaString {
+    #[inline]
+    fn index_mut(&mut self, index: RangeInclusive<usize>) -> &mut Self::Output {
+        &mut self[..][index]
+    }
+}
+
+impl IndexMut<RangeTo<usize>> for JavaString {
+    #[inline]
+    fn index_mut(&mut self, index: RangeTo<usize>) -> &mut Self::Output {
+        &mut self[..][index]
+    }
+}
+
+impl IndexMut<RangeToInclusive<usize>> for JavaString {
+    #[inline]
+    fn index_mut(&mut self, index: RangeToInclusive<usize>) -> &mut Self::Output {
+        &mut self[..][index]
+    }
+}
+
+impl PartialEq<str> for JavaString {
+    #[inline]
+    fn eq(&self, other: &str) -> bool {
+        self[..] == other
+    }
+}
+
+impl PartialEq<JavaString> for str {
+    #[inline]
+    fn eq(&self, other: &JavaString) -> bool {
+        self == other[..]
+    }
+}
+
+impl<'a> PartialEq<&'a str> for JavaString {
+    #[inline]
+    fn eq(&self, other: &&'a str) -> bool {
+        self == *other
+    }
+}
+
+impl<'a> PartialEq<JavaString> for &'a str {
+    #[inline]
+    fn eq(&self, other: &JavaString) -> bool {
+        *self == other
+    }
+}
+
+impl PartialEq<String> for JavaString {
+    #[inline]
+    fn eq(&self, other: &String) -> bool {
+        &self[..] == other
+    }
+}
+
+impl PartialEq<JavaString> for String {
+    #[inline]
+    fn eq(&self, other: &JavaString) -> bool {
+        self == &other[..]
+    }
+}
+
+impl PartialEq<JavaStr> for JavaString {
+    #[inline]
+    fn eq(&self, other: &JavaStr) -> bool {
+        self[..] == other
+    }
+}
+
+impl<'a> PartialEq<&'a JavaStr> for JavaString {
+    #[inline]
+    fn eq(&self, other: &&'a JavaStr) -> bool {
+        self == *other
+    }
+}
+
+impl<'a> PartialEq<Cow<'a, str>> for JavaString {
+    #[inline]
+    fn eq(&self, other: &Cow<'a, str>) -> bool {
+        &self[..] == other
+    }
+}
+
+impl<'a> PartialEq<JavaString> for Cow<'a, str> {
+    #[inline]
+    fn eq(&self, other: &JavaString) -> bool {
+        self == &other[..]
+    }
+}
+
+impl<'a> PartialEq<Cow<'a, JavaStr>> for JavaString {
+    #[inline]
+    fn eq(&self, other: &Cow<'a, JavaStr>) -> bool {
+        &self[..] == other
+    }
+}
+
+impl<'a> PartialEq<JavaString> for Cow<'a, JavaStr> {
+    #[inline]
+    fn eq(&self, other: &JavaString) -> bool {
+        self == &other[..]
+    }
+}
+
+impl Write for JavaString {
+    #[inline]
+    fn write_str(&mut self, s: &str) -> std::fmt::Result {
+        self.push_str(s);
+        Ok(())
+    }
+
+    #[inline]
+    fn write_char(&mut self, c: char) -> std::fmt::Result {
+        self.push(c);
+        Ok(())
+    }
+}
+
+pub struct Drain<'a> {
+    string: *mut JavaString,
+    start: usize,
+    end: usize,
+    iter: Chars<'a>,
+}
+
+impl Debug for Drain<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_tuple("Drain").field(&self.as_str()).finish()
+    }
+}
+
+unsafe impl Sync for Drain<'_> {}
+unsafe impl Send for Drain<'_> {}
+
+impl Drop for Drain<'_> {
+    #[inline]
+    fn drop(&mut self) {
+        unsafe {
+            // Use Vec::drain. "Reaffirm" the bounds checks to avoid
+            // panic code being inserted again.
+            let self_vec = (*self.string).as_mut_vec();
+            if self.start <= self.end && self.end <= self_vec.len() {
+                self_vec.drain(self.start..self.end);
+            }
+        }
+    }
+}
+
+impl AsRef<JavaStr> for Drain<'_> {
+    #[inline]
+    fn as_ref(&self) -> &JavaStr {
+        self.as_str()
+    }
+}
+
+impl AsRef<[u8]> for Drain<'_> {
+    #[inline]
+    fn as_ref(&self) -> &[u8] {
+        self.as_str().as_bytes()
+    }
+}
+
+impl Drain<'_> {
+    #[inline]
+    #[must_use]
+    pub fn as_str(&self) -> &JavaStr {
+        self.iter.as_str()
+    }
+}
+
+impl Iterator for Drain<'_> {
+    type Item = JavaCodePoint;
+
+    #[inline]
+    fn next(&mut self) -> Option<JavaCodePoint> {
+        self.iter.next()
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.iter.size_hint()
+    }
+
+    #[inline]
+    fn last(mut self) -> Option<JavaCodePoint> {
+        self.next_back()
+    }
+}
+
+impl DoubleEndedIterator for Drain<'_> {
+    #[inline]
+    fn next_back(&mut self) -> Option<Self::Item> {
+        self.iter.next_back()
+    }
+}
+
+impl FusedIterator for Drain<'_> {}
diff --git a/crates/java_string/src/pattern.rs b/crates/java_string/src/pattern.rs
new file mode 100644
index 000000000..06cc78041
--- /dev/null
+++ b/crates/java_string/src/pattern.rs
@@ -0,0 +1,402 @@
+use crate::{JavaCodePoint, JavaStr};
+
+mod private_pattern {
+    use crate::{JavaCodePoint, JavaStr};
+
+    pub trait Sealed {}
+
+    impl Sealed for char {}
+    impl Sealed for JavaCodePoint {}
+    impl Sealed for &str {}
+    impl Sealed for &JavaStr {}
+    impl<F> Sealed for F where F: FnMut(JavaCodePoint) -> bool {}
+    impl Sealed for &[char] {}
+    impl Sealed for &[JavaCodePoint] {}
+    impl Sealed for &char {}
+    impl Sealed for &JavaCodePoint {}
+    impl Sealed for &&str {}
+    impl Sealed for &&JavaStr {}
+}
+
+/// # Safety
+///
+/// Methods in this trait must only return indexes that are on char boundaries
+pub unsafe trait JavaStrPattern: private_pattern::Sealed {
+    fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option<usize>;
+    fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option<usize>;
+    fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)>;
+    fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)>;
+}
+
+unsafe impl JavaStrPattern for char {
+    #[inline]
+    fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let ch = haystack.chars().next()?;
+        if ch == *self {
+            Some(ch.len_utf8())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let ch = haystack.chars().next_back()?;
+        if ch == *self {
+            Some(ch.len_utf8())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        let mut encoded = [0; 4];
+        let encoded = self.encode_utf8(&mut encoded).as_bytes();
+        find(haystack.as_bytes(), encoded).map(|index| (index, encoded.len()))
+    }
+
+    #[inline]
+    fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        let mut encoded = [0; 4];
+        let encoded = self.encode_utf8(&mut encoded).as_bytes();
+        rfind(haystack.as_bytes(), encoded).map(|index| (index, encoded.len()))
+    }
+}
+
+unsafe impl JavaStrPattern for JavaCodePoint {
+    #[inline]
+    fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let ch = haystack.chars().next()?;
+        if ch == *self {
+            Some(ch.len_utf8())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let ch = haystack.chars().next_back()?;
+        if ch == *self {
+            Some(ch.len_utf8())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        let mut encoded = [0; 4];
+        let encoded = self.encode_semi_utf8(&mut encoded);
+        find(haystack.as_bytes(), encoded).map(|index| (index, encoded.len()))
+    }
+
+    #[inline]
+    fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        let mut encoded = [0; 4];
+        let encoded = self.encode_semi_utf8(&mut encoded);
+        rfind(haystack.as_bytes(), encoded).map(|index| (index, encoded.len()))
+    }
+}
+
+unsafe impl JavaStrPattern for &str {
+    #[inline]
+    fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        if haystack.as_bytes().starts_with(self.as_bytes()) {
+            Some(self.len())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        if haystack.as_bytes().ends_with(self.as_bytes()) {
+            Some(self.len())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        find(haystack.as_bytes(), self.as_bytes()).map(|index| (index, self.len()))
+    }
+
+    #[inline]
+    fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        rfind(haystack.as_bytes(), self.as_bytes()).map(|index| (index, self.len()))
+    }
+}
+
+unsafe impl JavaStrPattern for &JavaStr {
+    #[inline]
+    fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        if haystack.as_bytes().starts_with(self.as_bytes()) {
+            Some(self.len())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        if haystack.as_bytes().ends_with(self.as_bytes()) {
+            Some(self.len())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        find(haystack.as_bytes(), self.as_bytes()).map(|index| (index, self.len()))
+    }
+
+    #[inline]
+    fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        rfind(haystack.as_bytes(), self.as_bytes()).map(|index| (index, self.len()))
+    }
+}
+
+unsafe impl<F> JavaStrPattern for F
+where
+    F: FnMut(JavaCodePoint) -> bool,
+{
+    #[inline]
+    fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let ch = haystack.chars().next()?;
+        if self(ch) {
+            Some(ch.len_utf8())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let ch = haystack.chars().next_back()?;
+        if self(ch) {
+            Some(ch.len_utf8())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        haystack
+            .char_indices()
+            .find(|(_, ch)| self(*ch))
+            .map(|(index, ch)| (index, ch.len_utf8()))
+    }
+
+    #[inline]
+    fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        haystack
+            .char_indices()
+            .rfind(|(_, ch)| self(*ch))
+            .map(|(index, ch)| (index, ch.len_utf8()))
+    }
+}
+
+unsafe impl JavaStrPattern for &[char] {
+    #[inline]
+    fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let ch = haystack.chars().next()?;
+        if self.iter().any(|c| ch == *c) {
+            Some(ch.len_utf8())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let ch = haystack.chars().next_back()?;
+        if self.iter().any(|c| ch == *c) {
+            Some(ch.len_utf8())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        haystack
+            .char_indices()
+            .find(|(_, ch)| self.iter().any(|c| *ch == *c))
+            .map(|(index, ch)| (index, ch.len_utf8()))
+    }
+
+    #[inline]
+    fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        haystack
+            .char_indices()
+            .rfind(|(_, ch)| self.iter().any(|c| *ch == *c))
+            .map(|(index, ch)| (index, ch.len_utf8()))
+    }
+}
+
+unsafe impl JavaStrPattern for &[JavaCodePoint] {
+    #[inline]
+    fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let ch = haystack.chars().next()?;
+        if self.contains(&ch) {
+            Some(ch.len_utf8())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let ch = haystack.chars().next_back()?;
+        if self.contains(&ch) {
+            Some(ch.len_utf8())
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        haystack
+            .char_indices()
+            .find(|(_, ch)| self.contains(ch))
+            .map(|(index, ch)| (index, ch.len_utf8()))
+    }
+
+    #[inline]
+    fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        haystack
+            .char_indices()
+            .rfind(|(_, ch)| self.contains(ch))
+            .map(|(index, ch)| (index, ch.len_utf8()))
+    }
+}
+
+unsafe impl JavaStrPattern for &char {
+    #[inline]
+    fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let mut ch = **self;
+        ch.prefix_len_in(haystack)
+    }
+
+    #[inline]
+    fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let mut ch = **self;
+        ch.suffix_len_in(haystack)
+    }
+
+    #[inline]
+    fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        let mut ch = **self;
+        ch.find_in(haystack)
+    }
+
+    #[inline]
+    fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        let mut ch = **self;
+        ch.rfind_in(haystack)
+    }
+}
+
+unsafe impl JavaStrPattern for &JavaCodePoint {
+    #[inline]
+    fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let mut ch = **self;
+        ch.prefix_len_in(haystack)
+    }
+
+    #[inline]
+    fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let mut ch = **self;
+        ch.suffix_len_in(haystack)
+    }
+
+    #[inline]
+    fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        let mut ch = **self;
+        ch.find_in(haystack)
+    }
+
+    #[inline]
+    fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        let mut ch = **self;
+        ch.rfind_in(haystack)
+    }
+}
+
+unsafe impl JavaStrPattern for &&str {
+    #[inline]
+    fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let mut str = **self;
+        str.prefix_len_in(haystack)
+    }
+
+    #[inline]
+    fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let mut str = **self;
+        str.suffix_len_in(haystack)
+    }
+
+    #[inline]
+    fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        let mut str = **self;
+        str.find_in(haystack)
+    }
+
+    #[inline]
+    fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        let mut str = **self;
+        str.rfind_in(haystack)
+    }
+}
+
+unsafe impl JavaStrPattern for &&JavaStr {
+    #[inline]
+    fn prefix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let mut str = **self;
+        str.prefix_len_in(haystack)
+    }
+
+    #[inline]
+    fn suffix_len_in(&mut self, haystack: &JavaStr) -> Option<usize> {
+        let mut str = **self;
+        str.suffix_len_in(haystack)
+    }
+
+    #[inline]
+    fn find_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        let mut str = **self;
+        str.find_in(haystack)
+    }
+
+    #[inline]
+    fn rfind_in(&mut self, haystack: &JavaStr) -> Option<(usize, usize)> {
+        let mut str = **self;
+        str.rfind_in(haystack)
+    }
+}
+
+#[inline]
+fn find(haystack: &[u8], needle: &[u8]) -> Option<usize> {
+    if needle.is_empty() {
+        return Some(0);
+    }
+    haystack
+        .windows(needle.len())
+        .position(|window| window == needle)
+}
+
+#[inline]
+fn rfind(haystack: &[u8], needle: &[u8]) -> Option<usize> {
+    if needle.is_empty() {
+        return Some(haystack.len());
+    }
+    haystack
+        .windows(needle.len())
+        .rposition(|window| window == needle)
+}
diff --git a/crates/java_string/src/serde.rs b/crates/java_string/src/serde.rs
new file mode 100644
index 000000000..e1c152d11
--- /dev/null
+++ b/crates/java_string/src/serde.rs
@@ -0,0 +1,263 @@
+use std::fmt::Formatter;
+
+use serde::de::value::SeqAccessDeserializer;
+use serde::de::{Error, SeqAccess, Unexpected, Visitor};
+use serde::ser::SerializeSeq;
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
+
+use crate::{JavaCodePoint, JavaStr, JavaString};
+
+impl Serialize for JavaString {
+    #[inline]
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        match self.as_str() {
+            Ok(str) => str.serialize(serializer),
+            Err(_) => {
+                let mut seq = serializer.serialize_seq(None)?;
+                for ch in self.chars() {
+                    seq.serialize_element(&ch.as_u32())?;
+                }
+                seq.end()
+            }
+        }
+    }
+}
+
+impl<'de> Deserialize<'de> for JavaString {
+    #[inline]
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        deserializer.deserialize_any(JavaStringVisitor)
+    }
+}
+
+struct JavaStringVisitor;
+
+impl<'de> Visitor<'de> for JavaStringVisitor {
+    type Value = JavaString;
+
+    fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
+        formatter.write_str("a JavaString")
+    }
+
+    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
+    where
+        E: Error,
+    {
+        Ok(JavaString::from(v))
+    }
+
+    fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
+    where
+        E: Error,
+    {
+        Ok(JavaString::from(v))
+    }
+
+    fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
+    where
+        E: Error,
+    {
+        match JavaStr::from_semi_utf8(v) {
+            Ok(str) => Ok(str.to_owned()),
+            Err(_) => Err(Error::invalid_value(Unexpected::Bytes(v), &self)),
+        }
+    }
+
+    fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
+    where
+        E: Error,
+    {
+        JavaString::from_semi_utf8(v)
+            .map_err(|err| Error::invalid_value(Unexpected::Bytes(&err.into_bytes()), &self))
+    }
+
+    fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
+    where
+        A: SeqAccess<'de>,
+    {
+        let vec = Vec::<u8>::deserialize(SeqAccessDeserializer::new(seq))?;
+        JavaString::from_semi_utf8(vec).map_err(|_| Error::invalid_value(Unexpected::Seq, &self))
+    }
+}
+
+impl Serialize for JavaStr {
+    #[inline]
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        match self.as_str() {
+            Ok(str) => str.serialize(serializer),
+            Err(_) => {
+                let mut seq = serializer.serialize_seq(None)?;
+                for ch in self.chars() {
+                    seq.serialize_element(&ch.as_u32())?;
+                }
+                seq.end()
+            }
+        }
+    }
+}
+
+impl<'de: 'a, 'a> Deserialize<'de> for &'a JavaStr {
+    #[inline]
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        deserializer.deserialize_any(JavaStrVisitor)
+    }
+}
+
+struct JavaStrVisitor;
+
+impl<'de> Visitor<'de> for JavaStrVisitor {
+    type Value = &'de JavaStr;
+
+    fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
+        formatter.write_str("a borrowed JavaStr")
+    }
+
+    fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E>
+    where
+        E: Error,
+    {
+        Ok(JavaStr::from_str(v))
+    }
+
+    fn visit_borrowed_bytes<E>(self, v: &'de [u8]) -> Result<Self::Value, E>
+    where
+        E: Error,
+    {
+        JavaStr::from_semi_utf8(v).map_err(|_| Error::invalid_value(Unexpected::Bytes(v), &self))
+    }
+}
+
+impl Serialize for JavaCodePoint {
+    #[inline]
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        match self.as_char() {
+            Some(ch) => ch.serialize(serializer),
+            None => self.as_u32().serialize(serializer),
+        }
+    }
+}
+
+impl<'de> Deserialize<'de> for JavaCodePoint {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        deserializer.deserialize_any(JavaCodePointVisitor)
+    }
+}
+
+struct JavaCodePointVisitor;
+
+impl<'de> Visitor<'de> for JavaCodePointVisitor {
+    type Value = JavaCodePoint;
+
+    fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
+        formatter.write_str("a character")
+    }
+
+    #[inline]
+    fn visit_i8<E>(self, v: i8) -> Result<Self::Value, E>
+    where
+        E: Error,
+    {
+        self.visit_i32(v as i32)
+    }
+
+    #[inline]
+    fn visit_i16<E>(self, v: i16) -> Result<Self::Value, E>
+    where
+        E: Error,
+    {
+        self.visit_i32(v as i32)
+    }
+
+    fn visit_i32<E>(self, v: i32) -> Result<Self::Value, E>
+    where
+        E: Error,
+    {
+        if v < 0 {
+            Err(Error::invalid_value(Unexpected::Signed(v as i64), &self))
+        } else {
+            self.visit_u32(v as u32)
+        }
+    }
+
+    fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E>
+    where
+        E: Error,
+    {
+        if v < 0 {
+            Err(Error::invalid_value(Unexpected::Signed(v), &self))
+        } else {
+            self.visit_u64(v as u64)
+        }
+    }
+
+    #[inline]
+    fn visit_u8<E>(self, v: u8) -> Result<Self::Value, E>
+    where
+        E: Error,
+    {
+        self.visit_u32(v as u32)
+    }
+
+    #[inline]
+    fn visit_u16<E>(self, v: u16) -> Result<Self::Value, E>
+    where
+        E: Error,
+    {
+        self.visit_u32(v as u32)
+    }
+
+    fn visit_u32<E>(self, v: u32) -> Result<Self::Value, E>
+    where
+        E: Error,
+    {
+        JavaCodePoint::from_u32(v)
+            .ok_or_else(|| Error::invalid_value(Unexpected::Unsigned(v as u64), &self))
+    }
+
+    fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
+    where
+        E: Error,
+    {
+        if v > u32::MAX as u64 {
+            Err(Error::invalid_value(Unexpected::Unsigned(v), &self))
+        } else {
+            self.visit_u32(v as u32)
+        }
+    }
+
+    fn visit_char<E>(self, v: char) -> Result<Self::Value, E>
+    where
+        E: Error,
+    {
+        Ok(JavaCodePoint::from_char(v))
+    }
+
+    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
+    where
+        E: Error,
+    {
+        let mut iter = v.chars();
+        match (iter.next(), iter.next()) {
+            (Some(c), None) => Ok(JavaCodePoint::from_char(c)),
+            _ => Err(Error::invalid_value(Unexpected::Str(v), &self)),
+        }
+    }
+}
diff --git a/crates/java_string/src/slice.rs b/crates/java_string/src/slice.rs
new file mode 100644
index 000000000..104df4228
--- /dev/null
+++ b/crates/java_string/src/slice.rs
@@ -0,0 +1,2239 @@
+use std::borrow::Cow;
+use std::collections::Bound;
+use std::fmt::{Debug, Display, Formatter, Write};
+use std::hash::{Hash, Hasher};
+use std::ops::{
+    Add, AddAssign, Index, IndexMut, Range, RangeBounds, RangeFrom, RangeFull, RangeInclusive,
+    RangeTo, RangeToInclusive,
+};
+use std::rc::Rc;
+use std::str::FromStr;
+use std::sync::Arc;
+use std::{ptr, slice};
+
+use crate::char::EscapeDebugExtArgs;
+use crate::validations::{
+    run_utf8_full_validation_from_semi, run_utf8_semi_validation, slice_error_fail,
+    str_end_index_overflow_fail,
+};
+use crate::{
+    Bytes, CharEscapeIter, CharIndices, Chars, EscapeDebug, EscapeDefault, EscapeUnicode,
+    JavaCodePoint, JavaStrPattern, JavaString, Lines, MatchIndices, Matches, ParseError,
+    RMatchIndices, RMatches, RSplit, RSplitN, RSplitTerminator, Split, SplitAsciiWhitespace,
+    SplitInclusive, SplitN, SplitTerminator, SplitWhitespace, Utf8Error,
+};
+
+#[repr(transparent)]
+#[derive(PartialEq, Eq, PartialOrd, Ord)]
+pub struct JavaStr {
+    inner: [u8],
+}
+
+impl JavaStr {
+    /// Converts `v` to a `&JavaStr` if it is fully-valid UTF-8, i.e. UTF-8
+    /// without surrogate code points. See [std::str::from_utf8].
+    #[inline]
+    pub const fn from_full_utf8(v: &[u8]) -> Result<&JavaStr, Utf8Error> {
+        match std::str::from_utf8(v) {
+            Ok(str) => Ok(JavaStr::from_str(str)),
+            Err(err) => Err(Utf8Error::from_std(err)),
+        }
+    }
+
+    /// Converts `v` to a `&mut JavaStr` if it is fully-valid UTF-8, i.e. UTF-8
+    /// without surrogate code points. See [std::str::from_utf8_mut].
+    #[inline]
+    pub fn from_full_utf8_mut(v: &mut [u8]) -> Result<&mut JavaStr, Utf8Error> {
+        match std::str::from_utf8_mut(v) {
+            Ok(str) => Ok(JavaStr::from_mut_str(str)),
+            Err(err) => Err(Utf8Error::from_std(err)),
+        }
+    }
+
+    /// Converts `v` to a `&JavaStr` if it is semi-valid UTF-8, i.e. UTF-8
+    /// with surrogate code points.
+    pub fn from_semi_utf8(v: &[u8]) -> Result<&JavaStr, Utf8Error> {
+        match run_utf8_semi_validation(v) {
+            Ok(()) => Ok(unsafe { JavaStr::from_semi_utf8_unchecked(v) }),
+            Err(err) => Err(err),
+        }
+    }
+
+    /// Converts `v` to a `&mut JavaStr` if it is semi-valid UTF-8, i.e. UTF-8
+    /// with surrogate code points.
+    pub fn from_semi_utf8_mut(v: &mut [u8]) -> Result<&mut JavaStr, Utf8Error> {
+        match run_utf8_semi_validation(v) {
+            Ok(()) => Ok(unsafe { JavaStr::from_semi_utf8_unchecked_mut(v) }),
+            Err(err) => Err(err),
+        }
+    }
+
+    /// # Safety
+    ///
+    /// The parameter must be in semi-valid UTF-8 format, that is, UTF-8 plus
+    /// surrogate code points.
+    #[inline]
+    #[must_use]
+    pub const unsafe fn from_semi_utf8_unchecked(v: &[u8]) -> &JavaStr {
+        // SAFETY: the caller must guarantee that the bytes `v` are valid UTF-8, minus
+        // the absence of surrogate chars. Also relies on `&JavaStr` and `&[u8]`
+        // having the same layout.
+        std::mem::transmute(v)
+    }
+
+    /// # Safety
+    ///
+    /// The parameter must be in semi-valid UTF-8 format, that is, UTF-8 plus
+    /// surrogate code points.
+    #[inline]
+    #[must_use]
+    pub unsafe fn from_semi_utf8_unchecked_mut(v: &mut [u8]) -> &mut JavaStr {
+        // SAFETY: see from_semi_utf8_unchecked
+        std::mem::transmute(v)
+    }
+
+    #[inline]
+    #[must_use]
+    pub const fn from_str(str: &str) -> &JavaStr {
+        unsafe {
+            // SAFETY: the input str is guaranteed to have valid UTF-8.
+            JavaStr::from_semi_utf8_unchecked(str.as_bytes())
+        }
+    }
+
+    #[inline]
+    #[must_use]
+    pub fn from_mut_str(str: &mut str) -> &mut JavaStr {
+        unsafe {
+            // SAFETY: the input str is guaranteed to have valid UTF-8.
+            JavaStr::from_semi_utf8_unchecked_mut(str.as_bytes_mut())
+        }
+    }
+
+    #[inline]
+    #[must_use]
+    pub fn from_boxed_str(v: Box<str>) -> Box<JavaStr> {
+        unsafe { JavaStr::from_boxed_semi_utf8_unchecked(v.into_boxed_bytes()) }
+    }
+
+    /// # Safety
+    ///
+    /// The parameter must be in semi-valid UTF-8 format, that is, UTF-8 plus
+    /// surrogate code points.
+    #[inline]
+    #[must_use]
+    pub unsafe fn from_boxed_semi_utf8_unchecked(v: Box<[u8]>) -> Box<JavaStr> {
+        unsafe { Box::from_raw(Box::into_raw(v) as *mut JavaStr) }
+    }
+
+    /// See [str::as_bytes].
+    #[inline]
+    #[must_use]
+    pub const fn as_bytes(&self) -> &[u8] {
+        &self.inner
+    }
+
+    /// See [str::as_bytes_mut].
+    ///
+    /// # Safety
+    ///
+    /// The returned slice must not have invalid UTF-8 written to it, besides
+    /// surrogate pairs.
+    #[inline]
+    #[must_use]
+    pub unsafe fn as_bytes_mut(&mut self) -> &mut [u8] {
+        &mut self.inner
+    }
+
+    /// See [str::as_mut_ptr].
+    #[inline]
+    #[must_use]
+    pub fn as_mut_ptr(&mut self) -> *mut u8 {
+        self.inner.as_mut_ptr()
+    }
+
+    /// See [str::as_ptr].
+    #[inline]
+    #[must_use]
+    pub const fn as_ptr(&self) -> *const u8 {
+        self.inner.as_ptr()
+    }
+
+    /// Tries to convert this `&JavaStr` to a `&str`, returning an error if
+    /// it is not fully valid UTF-8, i.e. has no surrogate code points.
+    pub const fn as_str(&self) -> Result<&str, Utf8Error> {
+        // Manual implementation of Option::map since it's not const
+        match run_utf8_full_validation_from_semi(self.as_bytes()) {
+            Ok(..) => unsafe {
+                // SAFETY: we were already semi-valid, and full validation just succeeded.
+                Ok(self.as_str_unchecked())
+            },
+            Err(err) => Err(err),
+        }
+    }
+
+    /// # Safety
+    ///
+    /// This string must be fully valid UTF-8, i.e. have no surrogate code
+    /// points.
+    #[inline]
+    #[must_use]
+    pub const unsafe fn as_str_unchecked(&self) -> &str {
+        std::str::from_utf8_unchecked(self.as_bytes())
+    }
+
+    /// Converts this `&JavaStr` to a `Cow<str>`, replacing surrogate code
+    /// points with the replacement character �.
+    ///
+    /// ```
+    /// # use std::borrow::Cow;
+    /// # use java_string::{JavaCodePoint, JavaStr, JavaString};
+    /// let s = JavaStr::from_str("Hello 🦀 World!");
+    /// let result = s.as_str_lossy();
+    /// assert!(matches!(result, Cow::Borrowed(_)));
+    /// assert_eq!(result, "Hello 🦀 World!");
+    ///
+    /// let s = JavaString::from("Hello ")
+    ///     + JavaString::from(JavaCodePoint::from_u32(0xd800).unwrap()).as_java_str()
+    ///     + JavaStr::from_str(" World!");
+    /// let result = s.as_str_lossy();
+    /// assert!(matches!(result, Cow::Owned(_)));
+    /// assert_eq!(result, "Hello � World!");
+    /// ```
+    #[must_use]
+    pub fn as_str_lossy(&self) -> Cow<'_, str> {
+        match run_utf8_full_validation_from_semi(self.as_bytes()) {
+            Ok(()) => unsafe {
+                // SAFETY: validation succeeded
+                Cow::Borrowed(self.as_str_unchecked())
+            },
+            Err(error) => unsafe {
+                // SAFETY: invalid parts of string are converted to replacement char
+                Cow::Owned(
+                    self.transform_invalid_string(error, str::to_owned, |_| {
+                        JavaStr::from_str("\u{FFFD}")
+                    })
+                    .into_string_unchecked(),
+                )
+            },
+        }
+    }
+
+    /// See [str::bytes].
+    #[inline]
+    pub fn bytes(&self) -> Bytes<'_> {
+        Bytes {
+            inner: self.inner.iter().copied(),
+        }
+    }
+
+    /// See [str::char_indices].
+    #[inline]
+    pub fn char_indices(&self) -> CharIndices<'_> {
+        CharIndices {
+            front_offset: 0,
+            inner: self.chars(),
+        }
+    }
+
+    /// See [str::chars].
+    #[inline]
+    pub fn chars(&self) -> Chars<'_> {
+        Chars {
+            inner: self.inner.iter(),
+        }
+    }
+
+    /// See [str::contains].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// let bananas = JavaStr::from_str("bananas");
+    ///
+    /// assert!(bananas.contains("nana"));
+    /// assert!(!bananas.contains("apples"));
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn contains<P>(&self, mut pat: P) -> bool
+    where
+        P: JavaStrPattern,
+    {
+        pat.find_in(self).is_some()
+    }
+
+    /// See [str::ends_with].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// let bananas = JavaStr::from_str("bananas");
+    ///
+    /// assert!(bananas.ends_with("anas"));
+    /// assert!(!bananas.ends_with("nana"));
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn ends_with<P>(&self, mut pat: P) -> bool
+    where
+        P: JavaStrPattern,
+    {
+        pat.suffix_len_in(self).is_some()
+    }
+
+    /// See [str::eq_ignore_ascii_case].
+    #[inline]
+    #[must_use]
+    pub fn eq_ignore_ascii_case(&self, other: &str) -> bool {
+        self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
+    }
+
+    /// See [str::eq_ignore_ascii_case].
+    #[inline]
+    #[must_use]
+    pub fn eq_java_ignore_ascii_case(&self, other: &JavaStr) -> bool {
+        self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
+    }
+
+    /// See [str::escape_debug].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// assert_eq!(
+    ///     JavaStr::from_str("❤\n!").escape_debug().to_string(),
+    ///     "❤\\n!"
+    /// );
+    /// ```
+    #[inline]
+    pub fn escape_debug(&self) -> EscapeDebug<'_> {
+        #[inline]
+        fn escape_first(first: JavaCodePoint) -> CharEscapeIter {
+            first.escape_debug_ext(EscapeDebugExtArgs::ESCAPE_ALL)
+        }
+        #[inline]
+        fn escape_rest(char: JavaCodePoint) -> CharEscapeIter {
+            char.escape_debug_ext(EscapeDebugExtArgs {
+                escape_single_quote: true,
+                escape_double_quote: true,
+            })
+        }
+
+        let mut chars = self.chars();
+        EscapeDebug {
+            inner: chars
+                .next()
+                .map(escape_first as fn(JavaCodePoint) -> CharEscapeIter)
+                .into_iter()
+                .flatten()
+                .chain(chars.flat_map(escape_rest as fn(JavaCodePoint) -> CharEscapeIter)),
+        }
+    }
+
+    /// See [str::escape_default].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// assert_eq!(
+    ///     JavaStr::from_str("❤\n!").escape_default().to_string(),
+    ///     "\\u{2764}\\n!"
+    /// );
+    /// ```
+    #[inline]
+    pub fn escape_default(&self) -> EscapeDefault<'_> {
+        EscapeDefault {
+            inner: self.chars().flat_map(JavaCodePoint::escape_default),
+        }
+    }
+
+    /// See [str::escape_unicode].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// assert_eq!(
+    ///     JavaStr::from_str("❤\n!").escape_unicode().to_string(),
+    ///     "\\u{2764}\\u{a}\\u{21}"
+    /// );
+    /// ```
+    #[inline]
+    pub fn escape_unicode(&self) -> EscapeUnicode<'_> {
+        EscapeUnicode {
+            inner: self.chars().flat_map(JavaCodePoint::escape_unicode),
+        }
+    }
+
+    /// See [str::find].
+    ///
+    /// ```
+    /// let s = "Löwe 老虎 Léopard Gepardi";
+    ///
+    /// assert_eq!(s.find('L'), Some(0));
+    /// assert_eq!(s.find('é'), Some(14));
+    /// assert_eq!(s.find("pard"), Some(17));
+    ///
+    /// let x: &[_] = &['1', '2'];
+    /// assert_eq!(s.find(x), None);
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn find<P>(&self, mut pat: P) -> Option<usize>
+    where
+        P: JavaStrPattern,
+    {
+        pat.find_in(self).map(|(index, _)| index)
+    }
+
+    /// See [str::get].
+    ///
+    /// ```
+    /// # use java_string::{JavaStr, JavaString};
+    /// let v = JavaString::from("🗻∈🌏");
+    ///
+    /// assert_eq!(Some(JavaStr::from_str("🗻")), v.get(0..4));
+    ///
+    /// // indices not on UTF-8 sequence boundaries
+    /// assert!(v.get(1..).is_none());
+    /// assert!(v.get(..8).is_none());
+    ///
+    /// // out of bounds
+    /// assert!(v.get(..42).is_none());
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn get<I>(&self, i: I) -> Option<&JavaStr>
+    where
+        I: JavaStrSliceIndex,
+    {
+        i.get(self)
+    }
+
+    /// See [str::get_mut].
+    #[inline]
+    #[must_use]
+    pub fn get_mut<I>(&mut self, i: I) -> Option<&mut JavaStr>
+    where
+        I: JavaStrSliceIndex,
+    {
+        i.get_mut(self)
+    }
+
+    /// See [str::get_unchecked].
+    ///
+    /// # Safety
+    ///
+    /// - The starting index must not exceed the ending index
+    /// - Indexes must be within bounds of the original slice
+    /// - Indexes must lie on UTF-8 sequence boundaries
+    #[inline]
+    #[must_use]
+    pub unsafe fn get_unchecked<I>(&self, i: I) -> &JavaStr
+    where
+        I: JavaStrSliceIndex,
+    {
+        unsafe { &*i.get_unchecked(self) }
+    }
+
+    /// See [str::get_unchecked_mut].
+    ///
+    /// # Safety
+    ///
+    /// - The starting index must not exceed the ending index
+    /// - Indexes must be within bounds of the original slice
+    /// - Indexes must lie on UTF-8 sequence boundaries
+    #[inline]
+    #[must_use]
+    pub unsafe fn get_unchecked_mut<I>(&mut self, i: I) -> &mut JavaStr
+    where
+        I: JavaStrSliceIndex,
+    {
+        unsafe { &mut *i.get_unchecked_mut(self) }
+    }
+
+    /// See [str::into_boxed_bytes].
+    #[inline]
+    #[must_use]
+    pub fn into_boxed_bytes(self: Box<JavaStr>) -> Box<[u8]> {
+        unsafe { Box::from_raw(Box::into_raw(self) as *mut [u8]) }
+    }
+
+    /// See [str::into_string].
+    #[inline]
+    #[must_use]
+    pub fn into_string(self: Box<JavaStr>) -> JavaString {
+        let slice = self.into_boxed_bytes();
+        unsafe { JavaString::from_semi_utf8_unchecked(slice.into_vec()) }
+    }
+
+    /// See [str::is_ascii].
+    #[inline]
+    #[must_use]
+    pub fn is_ascii(&self) -> bool {
+        self.as_bytes().is_ascii()
+    }
+
+    /// See [str::is_char_boundary].
+    #[inline]
+    #[must_use]
+    pub fn is_char_boundary(&self, index: usize) -> bool {
+        // 0 is always ok.
+        // Test for 0 explicitly so that it can optimize out the check
+        // easily and skip reading string data for that case.
+        // Note that optimizing `self.get(..index)` relies on this.
+        if index == 0 {
+            return true;
+        }
+
+        match self.as_bytes().get(index) {
+            // For `None` we have two options:
+            //
+            // - index == self.len() Empty strings are valid, so return true
+            // - index > self.len() In this case return false
+            //
+            // The check is placed exactly here, because it improves generated
+            // code on higher opt-levels. See https://github.com/rust-lang/rust/pull/84751 for more details.
+            None => index == self.len(),
+
+            Some(&b) => {
+                // This is bit magic equivalent to: b < 128 || b >= 192
+                (b as i8) >= -0x40
+            }
+        }
+    }
+
+    pub(crate) fn floor_char_boundary(&self, index: usize) -> usize {
+        if index >= self.len() {
+            self.len()
+        } else {
+            let lower_bound = index.saturating_sub(3);
+            let new_index = self.as_bytes()[lower_bound..=index].iter().rposition(|b| {
+                // This is bit magic equivalent to: b < 128 || b >= 192
+                (*b as i8) >= -0x40
+            });
+
+            // SAFETY: we know that the character boundary will be within four bytes
+            unsafe { lower_bound + new_index.unwrap_unchecked() }
+        }
+    }
+
+    /// See [str::is_empty].
+    #[inline]
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// See [str::len].
+    #[inline]
+    #[must_use]
+    pub fn len(&self) -> usize {
+        self.inner.len()
+    }
+
+    /// See [str::lines].
+    #[inline]
+    pub fn lines(&self) -> Lines<'_> {
+        Lines {
+            inner: self.split_inclusive('\n').map(|line| {
+                let Some(line) = line.strip_suffix('\n') else {
+                    return line;
+                };
+                let Some(line) = line.strip_suffix('\r') else {
+                    return line;
+                };
+                line
+            }),
+        }
+    }
+
+    /// See [str::make_ascii_lowercase].
+    #[inline]
+    pub fn make_ascii_lowercase(&mut self) {
+        // SAFETY: changing ASCII letters only does not invalidate UTF-8.
+        let me = unsafe { self.as_bytes_mut() };
+        me.make_ascii_lowercase()
+    }
+
+    /// See [str::make_ascii_uppercase].
+    #[inline]
+    pub fn make_ascii_uppercase(&mut self) {
+        // SAFETY: changing ASCII letters only does not invalidate UTF-8.
+        let me = unsafe { self.as_bytes_mut() };
+        me.make_ascii_uppercase()
+    }
+
+    /// See [str::match_indices].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// let v: Vec<_> = JavaStr::from_str("abcXXXabcYYYabc")
+    ///     .match_indices("abc")
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         (0, JavaStr::from_str("abc")),
+    ///         (6, JavaStr::from_str("abc")),
+    ///         (12, JavaStr::from_str("abc"))
+    ///     ]
+    /// );
+    ///
+    /// let v: Vec<_> = JavaStr::from_str("1abcabc2").match_indices("abc").collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [(1, JavaStr::from_str("abc")), (4, JavaStr::from_str("abc"))]
+    /// );
+    ///
+    /// let v: Vec<_> = JavaStr::from_str("ababa").match_indices("aba").collect();
+    /// assert_eq!(v, [(0, JavaStr::from_str("aba"))]); // only the first `aba`
+    /// ```
+    #[inline]
+    pub fn match_indices<P>(&self, pat: P) -> MatchIndices<P>
+    where
+        P: JavaStrPattern,
+    {
+        MatchIndices {
+            str: self,
+            start: 0,
+            pat,
+        }
+    }
+
+    /// See [str::matches].
+    ///
+    /// ```
+    /// # use java_string::{JavaCodePoint, JavaStr};
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("abcXXXabcYYYabc")
+    ///     .matches("abc")
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("abc"),
+    ///         JavaStr::from_str("abc"),
+    ///         JavaStr::from_str("abc")
+    ///     ]
+    /// );
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("1abc2abc3")
+    ///     .matches(JavaCodePoint::is_numeric)
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("1"),
+    ///         JavaStr::from_str("2"),
+    ///         JavaStr::from_str("3")
+    ///     ]
+    /// );
+    /// ```
+    #[inline]
+    pub fn matches<P>(&self, pat: P) -> Matches<P>
+    where
+        P: JavaStrPattern,
+    {
+        Matches { str: self, pat }
+    }
+
+    /// See [str::parse].
+    #[inline]
+    pub fn parse<F>(&self) -> Result<F, ParseError<<F as FromStr>::Err>>
+    where
+        F: FromStr,
+    {
+        match self.as_str() {
+            Ok(str) => str.parse().map_err(ParseError::Err),
+            Err(err) => Err(ParseError::InvalidUtf8(err)),
+        }
+    }
+
+    /// See [str::repeat].
+    #[inline]
+    #[must_use]
+    pub fn repeat(&self, n: usize) -> JavaString {
+        unsafe { JavaString::from_semi_utf8_unchecked(self.as_bytes().repeat(n)) }
+    }
+
+    /// See [str::replace].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// let s = JavaStr::from_str("this is old");
+    ///
+    /// assert_eq!("this is new", s.replace("old", "new"));
+    /// assert_eq!("than an old", s.replace("is", "an"));
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn replace<P>(&self, from: P, to: &str) -> JavaString
+    where
+        P: JavaStrPattern,
+    {
+        self.replace_java(from, JavaStr::from_str(to))
+    }
+
+    /// See [str::replace].
+    #[inline]
+    #[must_use]
+    pub fn replace_java<P>(&self, from: P, to: &JavaStr) -> JavaString
+    where
+        P: JavaStrPattern,
+    {
+        let mut result = JavaString::new();
+        let mut last_end = 0;
+        for (start, part) in self.match_indices(from) {
+            result.push_java_str(unsafe { self.get_unchecked(last_end..start) });
+            result.push_java_str(to);
+            last_end = start + part.len();
+        }
+        result.push_java_str(unsafe { self.get_unchecked(last_end..self.len()) });
+        result
+    }
+
+    /// See [str::replacen].
+    ///
+    /// ```
+    /// # use java_string::{JavaCodePoint, JavaStr};
+    /// let s = JavaStr::from_str("foo foo 123 foo");
+    /// assert_eq!("new new 123 foo", s.replacen("foo", "new", 2));
+    /// assert_eq!("faa fao 123 foo", s.replacen('o', "a", 3));
+    /// assert_eq!(
+    ///     "foo foo new23 foo",
+    ///     s.replacen(JavaCodePoint::is_numeric, "new", 1)
+    /// );
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn replacen<P>(&self, from: P, to: &str, count: usize) -> JavaString
+    where
+        P: JavaStrPattern,
+    {
+        self.replacen_java(from, JavaStr::from_str(to), count)
+    }
+
+    /// See [str::replacen].
+    #[inline]
+    #[must_use]
+    pub fn replacen_java<P>(&self, from: P, to: &JavaStr, count: usize) -> JavaString
+    where
+        P: JavaStrPattern,
+    {
+        // Hope to reduce the times of re-allocation
+        let mut result = JavaString::with_capacity(32);
+        let mut last_end = 0;
+        for (start, part) in self.match_indices(from).take(count) {
+            result.push_java_str(unsafe { self.get_unchecked(last_end..start) });
+            result.push_java_str(to);
+            last_end = start + part.len();
+        }
+        result.push_java_str(unsafe { self.get_unchecked(last_end..self.len()) });
+        result
+    }
+
+    /// See [str::rfind].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// let s = JavaStr::from_str("Löwe 老虎 Léopard Gepardi");
+    ///
+    /// assert_eq!(s.rfind('L'), Some(13));
+    /// assert_eq!(s.rfind('é'), Some(14));
+    /// assert_eq!(s.rfind("pard"), Some(24));
+    ///
+    /// let x: &[_] = &['1', '2'];
+    /// assert_eq!(s.rfind(x), None);
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn rfind<P>(&self, mut pat: P) -> Option<usize>
+    where
+        P: JavaStrPattern,
+    {
+        pat.rfind_in(self).map(|(index, _)| index)
+    }
+
+    /// See [str::rmatch_indices].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// let v: Vec<_> = JavaStr::from_str("abcXXXabcYYYabc")
+    ///     .rmatch_indices("abc")
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         (12, JavaStr::from_str("abc")),
+    ///         (6, JavaStr::from_str("abc")),
+    ///         (0, JavaStr::from_str("abc"))
+    ///     ]
+    /// );
+    ///
+    /// let v: Vec<_> = JavaStr::from_str("1abcabc2")
+    ///     .rmatch_indices("abc")
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [(4, JavaStr::from_str("abc")), (1, JavaStr::from_str("abc"))]
+    /// );
+    ///
+    /// let v: Vec<_> = JavaStr::from_str("ababa").rmatch_indices("aba").collect();
+    /// assert_eq!(v, [(2, JavaStr::from_str("aba"))]); // only the last `aba`
+    /// ```
+    #[inline]
+    pub fn rmatch_indices<P>(&self, pat: P) -> RMatchIndices<P>
+    where
+        P: JavaStrPattern,
+    {
+        RMatchIndices {
+            inner: self.match_indices(pat),
+        }
+    }
+
+    /// See [str::rmatches].
+    ///
+    /// ```
+    /// # use java_string::{JavaCodePoint, JavaStr};
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("abcXXXabcYYYabc")
+    ///     .rmatches("abc")
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("abc"),
+    ///         JavaStr::from_str("abc"),
+    ///         JavaStr::from_str("abc")
+    ///     ]
+    /// );
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("1abc2abc3")
+    ///     .rmatches(JavaCodePoint::is_numeric)
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("3"),
+    ///         JavaStr::from_str("2"),
+    ///         JavaStr::from_str("1")
+    ///     ]
+    /// );
+    /// ```
+    #[inline]
+    pub fn rmatches<P>(&self, pat: P) -> RMatches<P>
+    where
+        P: JavaStrPattern,
+    {
+        RMatches {
+            inner: self.matches(pat),
+        }
+    }
+
+    /// See [str::rsplit].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("Mary had a little lamb")
+    ///     .rsplit(' ')
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("lamb"),
+    ///         JavaStr::from_str("little"),
+    ///         JavaStr::from_str("a"),
+    ///         JavaStr::from_str("had"),
+    ///         JavaStr::from_str("Mary")
+    ///     ]
+    /// );
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("").rsplit('X').collect();
+    /// assert_eq!(v, [JavaStr::from_str("")]);
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("lionXXtigerXleopard")
+    ///     .rsplit('X')
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("leopard"),
+    ///         JavaStr::from_str("tiger"),
+    ///         JavaStr::from_str(""),
+    ///         JavaStr::from_str("lion")
+    ///     ]
+    /// );
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("lion::tiger::leopard")
+    ///     .rsplit("::")
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("leopard"),
+    ///         JavaStr::from_str("tiger"),
+    ///         JavaStr::from_str("lion")
+    ///     ]
+    /// );
+    /// ```
+    #[inline]
+    pub fn rsplit<P>(&self, pat: P) -> RSplit<P>
+    where
+        P: JavaStrPattern,
+    {
+        RSplit::new(self, pat)
+    }
+
+    /// See [str::rsplit_once].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// assert_eq!(JavaStr::from_str("cfg").rsplit_once('='), None);
+    /// assert_eq!(
+    ///     JavaStr::from_str("cfg=foo").rsplit_once('='),
+    ///     Some((JavaStr::from_str("cfg"), JavaStr::from_str("foo")))
+    /// );
+    /// assert_eq!(
+    ///     JavaStr::from_str("cfg=foo=bar").rsplit_once('='),
+    ///     Some((JavaStr::from_str("cfg=foo"), JavaStr::from_str("bar")))
+    /// );
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn rsplit_once<P>(&self, mut delimiter: P) -> Option<(&JavaStr, &JavaStr)>
+    where
+        P: JavaStrPattern,
+    {
+        let (index, len) = delimiter.rfind_in(self)?;
+        // SAFETY: pattern is known to return valid indices.
+        unsafe {
+            Some((
+                self.get_unchecked(..index),
+                self.get_unchecked(index + len..),
+            ))
+        }
+    }
+
+    /// See [str::rsplit_terminator].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("A.B.").rsplit_terminator('.').collect();
+    /// assert_eq!(v, [JavaStr::from_str("B"), JavaStr::from_str("A")]);
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("A..B..").rsplit_terminator(".").collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str(""),
+    ///         JavaStr::from_str("B"),
+    ///         JavaStr::from_str(""),
+    ///         JavaStr::from_str("A")
+    ///     ]
+    /// );
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("A.B:C.D")
+    ///     .rsplit_terminator(&['.', ':'][..])
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("D"),
+    ///         JavaStr::from_str("C"),
+    ///         JavaStr::from_str("B"),
+    ///         JavaStr::from_str("A")
+    ///     ]
+    /// );
+    /// ```
+    #[inline]
+    pub fn rsplit_terminator<P>(&self, pat: P) -> RSplitTerminator<P>
+    where
+        P: JavaStrPattern,
+    {
+        RSplitTerminator::new(self, pat)
+    }
+
+    /// See [str::rsplitn].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("Mary had a little lamb")
+    ///     .rsplitn(3, ' ')
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("lamb"),
+    ///         JavaStr::from_str("little"),
+    ///         JavaStr::from_str("Mary had a")
+    ///     ]
+    /// );
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("lionXXtigerXleopard")
+    ///     .rsplitn(3, 'X')
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("leopard"),
+    ///         JavaStr::from_str("tiger"),
+    ///         JavaStr::from_str("lionX")
+    ///     ]
+    /// );
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("lion::tiger::leopard")
+    ///     .rsplitn(2, "::")
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("leopard"),
+    ///         JavaStr::from_str("lion::tiger")
+    ///     ]
+    /// );
+    /// ```
+    #[inline]
+    pub fn rsplitn<P>(&self, n: usize, pat: P) -> RSplitN<P>
+    where
+        P: JavaStrPattern,
+    {
+        RSplitN::new(self, pat, n)
+    }
+
+    /// See [str::split].
+    ///
+    /// ```
+    /// # use java_string::{JavaCodePoint, JavaStr};
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("Mary had a little lamb")
+    ///     .split(' ')
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("Mary"),
+    ///         JavaStr::from_str("had"),
+    ///         JavaStr::from_str("a"),
+    ///         JavaStr::from_str("little"),
+    ///         JavaStr::from_str("lamb")
+    ///     ]
+    /// );
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("").split('X').collect();
+    /// assert_eq!(v, [JavaStr::from_str("")]);
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("lionXXtigerXleopard")
+    ///     .split('X')
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("lion"),
+    ///         JavaStr::from_str(""),
+    ///         JavaStr::from_str("tiger"),
+    ///         JavaStr::from_str("leopard")
+    ///     ]
+    /// );
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("lion::tiger::leopard")
+    ///     .split("::")
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("lion"),
+    ///         JavaStr::from_str("tiger"),
+    ///         JavaStr::from_str("leopard")
+    ///     ]
+    /// );
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("abc1def2ghi")
+    ///     .split(JavaCodePoint::is_numeric)
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("abc"),
+    ///         JavaStr::from_str("def"),
+    ///         JavaStr::from_str("ghi")
+    ///     ]
+    /// );
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("lionXtigerXleopard")
+    ///     .split(JavaCodePoint::is_uppercase)
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("lion"),
+    ///         JavaStr::from_str("tiger"),
+    ///         JavaStr::from_str("leopard")
+    ///     ]
+    /// );
+    /// ```
+    #[inline]
+    pub fn split<P>(&self, pat: P) -> Split<P>
+    where
+        P: JavaStrPattern,
+    {
+        Split::new(self, pat)
+    }
+
+    /// See [str::split_ascii_whitespace].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// let mut iter = JavaStr::from_str(" Mary   had\ta little  \n\t lamb").split_ascii_whitespace();
+    /// assert_eq!(Some(JavaStr::from_str("Mary")), iter.next());
+    /// assert_eq!(Some(JavaStr::from_str("had")), iter.next());
+    /// assert_eq!(Some(JavaStr::from_str("a")), iter.next());
+    /// assert_eq!(Some(JavaStr::from_str("little")), iter.next());
+    /// assert_eq!(Some(JavaStr::from_str("lamb")), iter.next());
+    ///
+    /// assert_eq!(None, iter.next());
+    /// ```
+    #[inline]
+    pub fn split_ascii_whitespace(&self) -> SplitAsciiWhitespace<'_> {
+        #[inline]
+        fn is_non_empty(bytes: &&[u8]) -> bool {
+            !bytes.is_empty()
+        }
+
+        SplitAsciiWhitespace {
+            inner: self
+                .as_bytes()
+                .split(u8::is_ascii_whitespace as fn(&u8) -> bool)
+                .filter(is_non_empty as fn(&&[u8]) -> bool)
+                .map(|bytes| unsafe { JavaStr::from_semi_utf8_unchecked(bytes) }),
+        }
+    }
+
+    /// See [str::split_at].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// let s = JavaStr::from_str("Per Martin-Löf");
+    ///
+    /// let (first, last) = s.split_at(3);
+    ///
+    /// assert_eq!("Per", first);
+    /// assert_eq!(" Martin-Löf", last);
+    /// ```
+    /// ```should_panic
+    /// # use java_string::JavaStr;
+    /// let s = JavaStr::from_str("Per Martin-Löf");
+    /// // Should panic
+    /// let _ = s.split_at(13);
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn split_at(&self, mid: usize) -> (&JavaStr, &JavaStr) {
+        // is_char_boundary checks that the index is in [0, .len()]
+        if self.is_char_boundary(mid) {
+            // SAFETY: just checked that `mid` is on a char boundary.
+            unsafe {
+                (
+                    self.get_unchecked(0..mid),
+                    self.get_unchecked(mid..self.len()),
+                )
+            }
+        } else {
+            slice_error_fail(self, 0, mid)
+        }
+    }
+
+    /// See [str::split_at_mut].
+    ///
+    /// ```
+    /// # use java_string::{JavaStr, JavaString};
+    /// let mut s = JavaString::from("Per Martin-Löf");
+    /// let s = s.as_mut_java_str();
+    ///
+    /// let (first, last) = s.split_at_mut(3);
+    ///
+    /// assert_eq!("Per", first);
+    /// assert_eq!(" Martin-Löf", last);
+    /// ```
+    /// ```should_panic
+    /// # use java_string::{JavaStr, JavaString};
+    /// let mut s = JavaString::from("Per Martin-Löf");
+    /// let s = s.as_mut_java_str();
+    /// // Should panic
+    /// let _ = s.split_at(13);
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn split_at_mut(&mut self, mid: usize) -> (&mut JavaStr, &mut JavaStr) {
+        // is_char_boundary checks that the index is in [0, .len()]
+        if self.is_char_boundary(mid) {
+            let len = self.len();
+            let ptr = self.as_mut_ptr();
+            // SAFETY: just checked that `mid` is on a char boundary.
+            unsafe {
+                (
+                    JavaStr::from_semi_utf8_unchecked_mut(slice::from_raw_parts_mut(ptr, mid)),
+                    JavaStr::from_semi_utf8_unchecked_mut(slice::from_raw_parts_mut(
+                        ptr.add(mid),
+                        len - mid,
+                    )),
+                )
+            }
+        } else {
+            slice_error_fail(self, 0, mid)
+        }
+    }
+
+    /// See [str::split_inclusive].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("Mary had a little lamb\nlittle lamb\nlittle lamb.\n")
+    ///     .split_inclusive('\n')
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("Mary had a little lamb\n"),
+    ///         JavaStr::from_str("little lamb\n"),
+    ///         JavaStr::from_str("little lamb.\n")
+    ///     ]
+    /// );
+    /// ```
+    #[inline]
+    pub fn split_inclusive<P>(&self, pat: P) -> SplitInclusive<P>
+    where
+        P: JavaStrPattern,
+    {
+        SplitInclusive::new(self, pat)
+    }
+
+    /// See [str::split_once].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// assert_eq!(JavaStr::from_str("cfg").split_once('='), None);
+    /// assert_eq!(
+    ///     JavaStr::from_str("cfg=").split_once('='),
+    ///     Some((JavaStr::from_str("cfg"), JavaStr::from_str("")))
+    /// );
+    /// assert_eq!(
+    ///     JavaStr::from_str("cfg=foo").split_once('='),
+    ///     Some((JavaStr::from_str("cfg"), JavaStr::from_str("foo")))
+    /// );
+    /// assert_eq!(
+    ///     JavaStr::from_str("cfg=foo=bar").split_once('='),
+    ///     Some((JavaStr::from_str("cfg"), JavaStr::from_str("foo=bar")))
+    /// );
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn split_once<P>(&self, mut delimiter: P) -> Option<(&JavaStr, &JavaStr)>
+    where
+        P: JavaStrPattern,
+    {
+        let (index, len) = delimiter.find_in(self)?;
+        // SAFETY: pattern is known to return valid indices.
+        unsafe {
+            Some((
+                self.get_unchecked(..index),
+                self.get_unchecked(index + len..),
+            ))
+        }
+    }
+
+    /// See [str::split_terminator].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("A.B.").split_terminator('.').collect();
+    /// assert_eq!(v, [JavaStr::from_str("A"), JavaStr::from_str("B")]);
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("A..B..").split_terminator(".").collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("A"),
+    ///         JavaStr::from_str(""),
+    ///         JavaStr::from_str("B"),
+    ///         JavaStr::from_str("")
+    ///     ]
+    /// );
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("A.B:C.D")
+    ///     .split_terminator(&['.', ':'][..])
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("A"),
+    ///         JavaStr::from_str("B"),
+    ///         JavaStr::from_str("C"),
+    ///         JavaStr::from_str("D")
+    ///     ]
+    /// );
+    /// ```
+    #[inline]
+    pub fn split_terminator<P>(&self, pat: P) -> SplitTerminator<P>
+    where
+        P: JavaStrPattern,
+    {
+        SplitTerminator::new(self, pat)
+    }
+
+    /// See [str::split_whitespace].
+    #[inline]
+    pub fn split_whitespace(&self) -> SplitWhitespace<'_> {
+        SplitWhitespace {
+            inner: self
+                .split(JavaCodePoint::is_whitespace as fn(JavaCodePoint) -> bool)
+                .filter(|str| !str.is_empty()),
+        }
+    }
+
+    /// See [str::splitn].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("Mary had a little lambda")
+    ///     .splitn(3, ' ')
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("Mary"),
+    ///         JavaStr::from_str("had"),
+    ///         JavaStr::from_str("a little lambda")
+    ///     ]
+    /// );
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("lionXXtigerXleopard")
+    ///     .splitn(3, "X")
+    ///     .collect();
+    /// assert_eq!(
+    ///     v,
+    ///     [
+    ///         JavaStr::from_str("lion"),
+    ///         JavaStr::from_str(""),
+    ///         JavaStr::from_str("tigerXleopard")
+    ///     ]
+    /// );
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("abcXdef").splitn(1, 'X').collect();
+    /// assert_eq!(v, [JavaStr::from_str("abcXdef")]);
+    ///
+    /// let v: Vec<&JavaStr> = JavaStr::from_str("").splitn(1, 'X').collect();
+    /// assert_eq!(v, [JavaStr::from_str("")]);
+    /// ```
+    #[inline]
+    pub fn splitn<P>(&self, n: usize, pat: P) -> SplitN<P>
+    where
+        P: JavaStrPattern,
+    {
+        SplitN::new(self, pat, n)
+    }
+
+    /// See [str::starts_with].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// let bananas = JavaStr::from_str("bananas");
+    ///
+    /// assert!(bananas.starts_with("bana"));
+    /// assert!(!bananas.starts_with("nana"));
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn starts_with<P>(&self, mut pat: P) -> bool
+    where
+        P: JavaStrPattern,
+    {
+        pat.prefix_len_in(self).is_some()
+    }
+
+    /// See [str::strip_prefix].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// assert_eq!(
+    ///     JavaStr::from_str("foo:bar").strip_prefix("foo:"),
+    ///     Some(JavaStr::from_str("bar"))
+    /// );
+    /// assert_eq!(JavaStr::from_str("foo:bar").strip_prefix("bar"), None);
+    /// assert_eq!(
+    ///     JavaStr::from_str("foofoo").strip_prefix("foo"),
+    ///     Some(JavaStr::from_str("foo"))
+    /// );
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn strip_prefix<P>(&self, mut prefix: P) -> Option<&JavaStr>
+    where
+        P: JavaStrPattern,
+    {
+        let len = prefix.prefix_len_in(self)?;
+        // SAFETY: pattern is known to return valid indices.
+        unsafe { Some(self.get_unchecked(len..)) }
+    }
+
+    /// See [str::strip_suffix].
+    ///
+    /// ```
+    /// # use java_string::JavaStr;
+    /// assert_eq!(
+    ///     JavaStr::from_str("bar:foo").strip_suffix(":foo"),
+    ///     Some(JavaStr::from_str("bar"))
+    /// );
+    /// assert_eq!(JavaStr::from_str("bar:foo").strip_suffix("bar"), None);
+    /// assert_eq!(
+    ///     JavaStr::from_str("foofoo").strip_suffix("foo"),
+    ///     Some(JavaStr::from_str("foo"))
+    /// );
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn strip_suffix<P>(&self, mut suffix: P) -> Option<&JavaStr>
+    where
+        P: JavaStrPattern,
+    {
+        let len = suffix.suffix_len_in(self)?;
+        // SAFETY: pattern is known to return valid indices.
+        unsafe { Some(self.get_unchecked(..self.len() - len)) }
+    }
+
+    /// See [str::to_ascii_lowercase].
+    #[inline]
+    #[must_use]
+    pub fn to_ascii_lowercase(&self) -> JavaString {
+        let mut s = self.to_owned();
+        s.make_ascii_lowercase();
+        s
+    }
+
+    /// See [str::to_ascii_uppercase].
+    #[inline]
+    #[must_use]
+    pub fn to_ascii_uppercase(&self) -> JavaString {
+        let mut s = self.to_owned();
+        s.make_ascii_uppercase();
+        s
+    }
+
+    /// See [str::to_lowercase].
+    ///
+    /// ```
+    /// # use java_string::{JavaCodePoint, JavaStr, JavaString};
+    /// let s = JavaStr::from_str("HELLO");
+    /// assert_eq!("hello", s.to_lowercase());
+    ///
+    /// let odysseus = JavaStr::from_str("ὈΔΥΣΣΕΎΣ");
+    /// assert_eq!("ὀδυσσεύς", odysseus.to_lowercase());
+    ///
+    /// let s = JavaString::from("Hello ")
+    ///     + JavaString::from(JavaCodePoint::from_u32(0xd800).unwrap()).as_java_str()
+    ///     + JavaStr::from_str(" World!");
+    /// let expected = JavaString::from("hello ")
+    ///     + JavaString::from(JavaCodePoint::from_u32(0xd800).unwrap()).as_java_str()
+    ///     + JavaStr::from_str(" world!");
+    /// assert_eq!(expected, s.to_lowercase());
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn to_lowercase(&self) -> JavaString {
+        self.transform_string(str::to_lowercase, |ch| ch)
+    }
+
+    /// See [str::to_uppercase].
+    ///
+    /// ```
+    /// # use java_string::{JavaCodePoint, JavaStr, JavaString};
+    /// let s = JavaStr::from_str("hello");
+    /// assert_eq!("HELLO", s.to_uppercase());
+    ///
+    /// let s = JavaStr::from_str("tschüß");
+    /// assert_eq!("TSCHÜSS", s.to_uppercase());
+    ///
+    /// let s = JavaString::from("Hello ")
+    ///     + JavaString::from(JavaCodePoint::from_u32(0xd800).unwrap()).as_java_str()
+    ///     + JavaStr::from_str(" World!");
+    /// let expected = JavaString::from("HELLO ")
+    ///     + JavaString::from(JavaCodePoint::from_u32(0xd800).unwrap()).as_java_str()
+    ///     + JavaStr::from_str(" WORLD!");
+    /// assert_eq!(expected, s.to_uppercase());
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn to_uppercase(&self) -> JavaString {
+        self.transform_string(str::to_uppercase, |ch| ch)
+    }
+
+    /// See [str::trim].
+    #[inline]
+    #[must_use]
+    pub fn trim(&self) -> &JavaStr {
+        self.trim_matches(|c: JavaCodePoint| c.is_whitespace())
+    }
+
+    /// See [str::trim_end].
+    #[inline]
+    #[must_use]
+    pub fn trim_end(&self) -> &JavaStr {
+        self.trim_end_matches(|c: JavaCodePoint| c.is_whitespace())
+    }
+
+    /// See [str::trim_end_matches].
+    ///
+    /// ```
+    /// # use java_string::{JavaCodePoint, JavaStr};
+    /// assert_eq!(
+    ///     JavaStr::from_str("11foo1bar11").trim_end_matches('1'),
+    ///     "11foo1bar"
+    /// );
+    /// assert_eq!(
+    ///     JavaStr::from_str("123foo1bar123").trim_end_matches(JavaCodePoint::is_numeric),
+    ///     "123foo1bar"
+    /// );
+    ///
+    /// let x: &[_] = &['1', '2'];
+    /// assert_eq!(
+    ///     JavaStr::from_str("12foo1bar12").trim_end_matches(x),
+    ///     "12foo1bar"
+    /// );
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn trim_end_matches<P>(&self, mut pat: P) -> &JavaStr
+    where
+        P: JavaStrPattern,
+    {
+        let mut str = self;
+        while let Some(suffix_len) = pat.suffix_len_in(str) {
+            if suffix_len == 0 {
+                break;
+            }
+            // SAFETY: pattern is known to return valid indices.
+            str = unsafe { str.get_unchecked(..str.len() - suffix_len) };
+        }
+        str
+    }
+
+    /// See [str::trim_matches].
+    ///
+    /// ```
+    /// # use java_string::{JavaCodePoint, JavaStr};
+    /// assert_eq!(
+    ///     JavaStr::from_str("11foo1bar11").trim_matches('1'),
+    ///     "foo1bar"
+    /// );
+    /// assert_eq!(
+    ///     JavaStr::from_str("123foo1bar123").trim_matches(JavaCodePoint::is_numeric),
+    ///     "foo1bar"
+    /// );
+    ///
+    /// let x: &[_] = &['1', '2'];
+    /// assert_eq!(JavaStr::from_str("12foo1bar12").trim_matches(x), "foo1bar");
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn trim_matches<P>(&self, mut pat: P) -> &JavaStr
+    where
+        P: JavaStrPattern,
+    {
+        let mut str = self;
+        while let Some(prefix_len) = pat.prefix_len_in(str) {
+            if prefix_len == 0 {
+                break;
+            }
+            // SAFETY: pattern is known to return valid indices.
+            str = unsafe { str.get_unchecked(prefix_len..) };
+        }
+        while let Some(suffix_len) = pat.suffix_len_in(str) {
+            if suffix_len == 0 {
+                break;
+            }
+            // SAFETY: pattern is known to return valid indices.
+            str = unsafe { str.get_unchecked(..str.len() - suffix_len) };
+        }
+        str
+    }
+
+    /// See [str::trim_start].
+    #[inline]
+    #[must_use]
+    pub fn trim_start(&self) -> &JavaStr {
+        self.trim_start_matches(|c: JavaCodePoint| c.is_whitespace())
+    }
+
+    /// See [str::trim_start_matches].
+    ///
+    /// ```
+    /// # use java_string::{JavaCodePoint, JavaStr};
+    /// assert_eq!(
+    ///     JavaStr::from_str("11foo1bar11").trim_start_matches('1'),
+    ///     "foo1bar11"
+    /// );
+    /// assert_eq!(
+    ///     JavaStr::from_str("123foo1bar123").trim_start_matches(JavaCodePoint::is_numeric),
+    ///     "foo1bar123"
+    /// );
+    ///
+    /// let x: &[_] = &['1', '2'];
+    /// assert_eq!(
+    ///     JavaStr::from_str("12foo1bar12").trim_start_matches(x),
+    ///     "foo1bar12"
+    /// );
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn trim_start_matches<P>(&self, mut pat: P) -> &JavaStr
+    where
+        P: JavaStrPattern,
+    {
+        let mut str = self;
+        while let Some(prefix_len) = pat.prefix_len_in(str) {
+            if prefix_len == 0 {
+                break;
+            }
+            // SAFETY: pattern is known to return valid indices.
+            str = unsafe { str.get_unchecked(prefix_len..) };
+        }
+        str
+    }
+
+    #[inline]
+    fn transform_string<SF, ICF>(
+        &self,
+        mut string_transformer: SF,
+        invalid_char_transformer: ICF,
+    ) -> JavaString
+    where
+        SF: FnMut(&str) -> String,
+        ICF: FnMut(&JavaStr) -> &JavaStr,
+    {
+        let bytes = self.as_bytes();
+        match run_utf8_full_validation_from_semi(bytes) {
+            Ok(()) => JavaString::from(string_transformer(unsafe {
+                // SAFETY: validation succeeded
+                std::str::from_utf8_unchecked(bytes)
+            })),
+            Err(error) => {
+                self.transform_invalid_string(error, string_transformer, invalid_char_transformer)
+            }
+        }
+    }
+
+    #[inline]
+    fn transform_invalid_string<SF, ICF>(
+        &self,
+        error: Utf8Error,
+        mut string_transformer: SF,
+        mut invalid_char_transformer: ICF,
+    ) -> JavaString
+    where
+        SF: FnMut(&str) -> String,
+        ICF: FnMut(&JavaStr) -> &JavaStr,
+    {
+        let bytes = self.as_bytes();
+        let mut result = JavaString::from(string_transformer(unsafe {
+            // SAFETY: validation succeeded up to this index
+            std::str::from_utf8_unchecked(bytes.get_unchecked(..error.valid_up_to))
+        }));
+        result.push_java_str(invalid_char_transformer(unsafe {
+            // SAFETY: any UTF-8 error in semi-valid UTF-8 is a 3 byte long sequence
+            // representing a surrogate code point. We're pushing that sequence now
+            JavaStr::from_semi_utf8_unchecked(
+                bytes.get_unchecked(error.valid_up_to..error.valid_up_to + 3),
+            )
+        }));
+        let mut index = error.valid_up_to + 3;
+        loop {
+            let remainder = unsafe { bytes.get_unchecked(index..) };
+            match run_utf8_full_validation_from_semi(remainder) {
+                Ok(()) => {
+                    result.push_str(&string_transformer(unsafe {
+                        // SAFETY: validation succeeded
+                        std::str::from_utf8_unchecked(remainder)
+                    }));
+                    return result;
+                }
+                Err(error) => {
+                    result.push_str(&string_transformer(unsafe {
+                        // SAFETY: validation succeeded up to this index
+                        std::str::from_utf8_unchecked(
+                            bytes.get_unchecked(index..index + error.valid_up_to),
+                        )
+                    }));
+                    result.push_java_str(invalid_char_transformer(unsafe {
+                        // SAFETY: see comment above
+                        JavaStr::from_semi_utf8_unchecked(bytes.get_unchecked(
+                            index + error.valid_up_to..index + error.valid_up_to + 3,
+                        ))
+                    }));
+                    index += error.valid_up_to + 3;
+                }
+            }
+        }
+    }
+}
+
+impl<'a> Add<&JavaStr> for Cow<'a, JavaStr> {
+    type Output = Cow<'a, JavaStr>;
+
+    #[inline]
+    fn add(mut self, rhs: &JavaStr) -> Self::Output {
+        self += rhs;
+        self
+    }
+}
+
+impl<'a> AddAssign<&JavaStr> for Cow<'a, JavaStr> {
+    #[inline]
+    fn add_assign(&mut self, rhs: &JavaStr) {
+        if !rhs.is_empty() {
+            match self {
+                Cow::Borrowed(lhs) => {
+                    let mut result = lhs.to_owned();
+                    result.push_java_str(rhs);
+                    *self = Cow::Owned(result);
+                }
+                Cow::Owned(lhs) => {
+                    lhs.push_java_str(rhs);
+                }
+            }
+        }
+    }
+}
+
+impl AsRef<[u8]> for JavaStr {
+    #[inline]
+    fn as_ref(&self) -> &[u8] {
+        self.as_bytes()
+    }
+}
+
+impl Clone for Box<JavaStr> {
+    #[inline]
+    fn clone(&self) -> Self {
+        let buf: Box<[u8]> = self.as_bytes().into();
+        unsafe { JavaStr::from_boxed_semi_utf8_unchecked(buf) }
+    }
+}
+
+impl Debug for JavaStr {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.write_char('"')?;
+        let mut from = 0;
+        for (i, c) in self.char_indices() {
+            let esc = c.escape_debug_ext(EscapeDebugExtArgs {
+                escape_single_quote: false,
+                escape_double_quote: true,
+            });
+            // If char needs escaping, flush backlog so far and write, else skip.
+            // Also handle invalid UTF-8 here
+            if esc.len() != 1 || c.as_char().is_none() {
+                unsafe {
+                    // SAFETY: any invalid UTF-8 should have been caught by a previous iteration
+                    f.write_str(self[from..i].as_str_unchecked())?;
+                }
+                for c in esc {
+                    f.write_char(c)?;
+                }
+                from = i + c.len_utf8();
+            }
+        }
+        unsafe {
+            // SAFETY: any invalid UTF-8 should have been caught by the loop above
+            f.write_str(self[from..].as_str_unchecked())?;
+        }
+        f.write_char('"')
+    }
+}
+
+impl Default for &JavaStr {
+    #[inline]
+    fn default() -> Self {
+        JavaStr::from_str("")
+    }
+}
+
+impl Default for Box<JavaStr> {
+    #[inline]
+    fn default() -> Self {
+        JavaStr::from_boxed_str(Box::<str>::default())
+    }
+}
+
+impl Display for JavaStr {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        Display::fmt(&self.as_str_lossy(), f)
+    }
+}
+
+impl<'a> From<&'a JavaStr> for Cow<'a, JavaStr> {
+    #[inline]
+    fn from(value: &'a JavaStr) -> Self {
+        Cow::Borrowed(value)
+    }
+}
+
+impl From<&JavaStr> for Arc<JavaStr> {
+    #[inline]
+    fn from(value: &JavaStr) -> Self {
+        let arc = Arc::<[u8]>::from(value.as_bytes());
+        unsafe { Arc::from_raw(Arc::into_raw(arc) as *const JavaStr) }
+    }
+}
+
+impl From<&JavaStr> for Box<JavaStr> {
+    #[inline]
+    fn from(value: &JavaStr) -> Self {
+        unsafe { JavaStr::from_boxed_semi_utf8_unchecked(Box::from(value.as_bytes())) }
+    }
+}
+
+impl From<&JavaStr> for Rc<JavaStr> {
+    #[inline]
+    fn from(value: &JavaStr) -> Self {
+        let rc = Rc::<[u8]>::from(value.as_bytes());
+        unsafe { Rc::from_raw(Rc::into_raw(rc) as *const JavaStr) }
+    }
+}
+
+impl From<&JavaStr> for Vec<u8> {
+    #[inline]
+    fn from(value: &JavaStr) -> Self {
+        From::from(value.as_bytes())
+    }
+}
+
+impl From<Cow<'_, JavaStr>> for Box<JavaStr> {
+    #[inline]
+    fn from(value: Cow<'_, JavaStr>) -> Self {
+        match value {
+            Cow::Borrowed(s) => Box::from(s),
+            Cow::Owned(s) => Box::from(s),
+        }
+    }
+}
+
+impl From<JavaString> for Box<JavaStr> {
+    #[inline]
+    fn from(value: JavaString) -> Self {
+        value.into_boxed_str()
+    }
+}
+
+impl<'a> From<&'a str> for &'a JavaStr {
+    #[inline]
+    fn from(value: &'a str) -> Self {
+        JavaStr::from_str(value)
+    }
+}
+
+impl Hash for JavaStr {
+    #[inline]
+    fn hash<H: Hasher>(&self, state: &mut H) {
+        state.write(self.as_bytes());
+        state.write_u8(0xff);
+    }
+}
+
+impl<I> Index<I> for JavaStr
+where
+    I: JavaStrSliceIndex,
+{
+    type Output = JavaStr;
+
+    #[inline]
+    fn index(&self, index: I) -> &Self::Output {
+        index.index(self)
+    }
+}
+
+impl<I> IndexMut<I> for JavaStr
+where
+    I: JavaStrSliceIndex,
+{
+    #[inline]
+    fn index_mut(&mut self, index: I) -> &mut Self::Output {
+        index.index_mut(self)
+    }
+}
+
+impl<'a, 'b> PartialEq<&'b JavaStr> for Cow<'a, str> {
+    #[inline]
+    fn eq(&self, other: &&'b JavaStr) -> bool {
+        self == *other
+    }
+}
+
+impl<'a, 'b> PartialEq<&'b JavaStr> for Cow<'a, JavaStr> {
+    #[inline]
+    fn eq(&self, other: &&'b JavaStr) -> bool {
+        self == *other
+    }
+}
+
+impl<'a, 'b> PartialEq<Cow<'a, str>> for &'b JavaStr {
+    #[inline]
+    fn eq(&self, other: &Cow<'a, str>) -> bool {
+        *self == other
+    }
+}
+
+impl<'a> PartialEq<Cow<'a, str>> for JavaStr {
+    #[inline]
+    fn eq(&self, other: &Cow<'a, str>) -> bool {
+        other == self
+    }
+}
+
+impl<'a, 'b> PartialEq<Cow<'a, JavaStr>> for &'b JavaStr {
+    #[inline]
+    fn eq(&self, other: &Cow<'a, JavaStr>) -> bool {
+        *self == other
+    }
+}
+
+impl<'a> PartialEq<Cow<'a, JavaStr>> for JavaStr {
+    #[inline]
+    fn eq(&self, other: &Cow<'a, JavaStr>) -> bool {
+        other == self
+    }
+}
+
+impl<'a> PartialEq<String> for &'a JavaStr {
+    #[inline]
+    fn eq(&self, other: &String) -> bool {
+        *self == other
+    }
+}
+
+impl PartialEq<String> for JavaStr {
+    #[inline]
+    fn eq(&self, other: &String) -> bool {
+        self == &other[..]
+    }
+}
+
+impl PartialEq<JavaStr> for String {
+    #[inline]
+    fn eq(&self, other: &JavaStr) -> bool {
+        &self[..] == other
+    }
+}
+
+impl<'a> PartialEq<JavaString> for &'a JavaStr {
+    #[inline]
+    fn eq(&self, other: &JavaString) -> bool {
+        *self == other
+    }
+}
+
+impl PartialEq<JavaString> for JavaStr {
+    #[inline]
+    fn eq(&self, other: &JavaString) -> bool {
+        self == other[..]
+    }
+}
+
+impl<'a> PartialEq<JavaStr> for Cow<'a, str> {
+    #[inline]
+    fn eq(&self, other: &JavaStr) -> bool {
+        match self {
+            Cow::Borrowed(this) => this == other,
+            Cow::Owned(this) => this == other,
+        }
+    }
+}
+
+impl<'a> PartialEq<JavaStr> for Cow<'a, JavaStr> {
+    #[inline]
+    fn eq(&self, other: &JavaStr) -> bool {
+        match self {
+            Cow::Borrowed(this) => this == other,
+            Cow::Owned(this) => this == other,
+        }
+    }
+}
+
+impl PartialEq<JavaStr> for str {
+    #[inline]
+    fn eq(&self, other: &JavaStr) -> bool {
+        JavaStr::from_str(self) == other
+    }
+}
+
+impl<'a> PartialEq<JavaStr> for &'a str {
+    #[inline]
+    fn eq(&self, other: &JavaStr) -> bool {
+        *self == other
+    }
+}
+
+impl PartialEq<str> for JavaStr {
+    #[inline]
+    fn eq(&self, other: &str) -> bool {
+        self == JavaStr::from_str(other)
+    }
+}
+
+impl<'a> PartialEq<&'a str> for JavaStr {
+    #[inline]
+    fn eq(&self, other: &&'a str) -> bool {
+        self == *other
+    }
+}
+
+impl<'a> PartialEq<JavaStr> for &'a JavaStr {
+    #[inline]
+    fn eq(&self, other: &JavaStr) -> bool {
+        *self == other
+    }
+}
+
+impl<'a> PartialEq<&'a JavaStr> for JavaStr {
+    #[inline]
+    fn eq(&self, other: &&'a JavaStr) -> bool {
+        self == *other
+    }
+}
+
+impl ToOwned for JavaStr {
+    type Owned = JavaString;
+
+    #[inline]
+    fn to_owned(&self) -> Self::Owned {
+        unsafe { JavaString::from_semi_utf8_unchecked(self.as_bytes().to_vec()) }
+    }
+}
+
+mod private_slice_index {
+    use std::ops;
+
+    pub trait Sealed {}
+
+    impl Sealed for ops::Range<usize> {}
+    impl Sealed for ops::RangeTo<usize> {}
+    impl Sealed for ops::RangeFrom<usize> {}
+    impl Sealed for ops::RangeFull {}
+    impl Sealed for ops::RangeInclusive<usize> {}
+    impl Sealed for ops::RangeToInclusive<usize> {}
+}
+
+/// # Safety
+///
+/// Implementations' `check_bounds` method must properly check the bounds of the
+/// slice, such that calling `get_unchecked` is not UB.
+pub unsafe trait JavaStrSliceIndex: private_slice_index::Sealed + Sized {
+    fn check_bounds(&self, slice: &JavaStr) -> bool;
+    fn check_bounds_fail(self, slice: &JavaStr) -> !;
+
+    /// # Safety
+    ///
+    /// - The input slice must be a valid pointer
+    /// - This index must not be out of bounds of the input slice
+    /// - The indices of this slice must point to char boundaries in the input
+    ///   slice
+    unsafe fn get_unchecked(self, slice: *const JavaStr) -> *const JavaStr;
+
+    /// # Safety
+    ///
+    /// - The input slice must be a valid pointer
+    /// - This index must not be out of bounds of the input slice
+    /// - The indices of this slice must point to char boundaries in the input
+    ///   slice
+    unsafe fn get_unchecked_mut(self, slice: *mut JavaStr) -> *mut JavaStr;
+
+    #[inline]
+    fn get(self, slice: &JavaStr) -> Option<&JavaStr> {
+        if self.check_bounds(slice) {
+            Some(unsafe { &*self.get_unchecked(slice) })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn get_mut(self, slice: &mut JavaStr) -> Option<&mut JavaStr> {
+        if self.check_bounds(slice) {
+            Some(unsafe { &mut *self.get_unchecked_mut(slice) })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    fn index(self, slice: &JavaStr) -> &JavaStr {
+        if self.check_bounds(slice) {
+            unsafe { &*self.get_unchecked(slice) }
+        } else {
+            self.check_bounds_fail(slice)
+        }
+    }
+
+    #[inline]
+    fn index_mut(self, slice: &mut JavaStr) -> &mut JavaStr {
+        if self.check_bounds(slice) {
+            unsafe { &mut *self.get_unchecked_mut(slice) }
+        } else {
+            self.check_bounds_fail(slice)
+        }
+    }
+}
+
+unsafe impl JavaStrSliceIndex for RangeFull {
+    #[inline]
+    fn check_bounds(&self, _slice: &JavaStr) -> bool {
+        true
+    }
+
+    #[inline]
+    fn check_bounds_fail(self, _slice: &JavaStr) -> ! {
+        unreachable!()
+    }
+
+    #[inline]
+    unsafe fn get_unchecked(self, slice: *const JavaStr) -> *const JavaStr {
+        slice
+    }
+
+    #[inline]
+    unsafe fn get_unchecked_mut(self, slice: *mut JavaStr) -> *mut JavaStr {
+        slice
+    }
+}
+
+unsafe impl JavaStrSliceIndex for Range<usize> {
+    #[inline]
+    fn check_bounds(&self, slice: &JavaStr) -> bool {
+        self.start <= self.end
+            && slice.is_char_boundary(self.start)
+            && slice.is_char_boundary(self.end)
+    }
+
+    #[inline]
+    #[track_caller]
+    fn check_bounds_fail(self, slice: &JavaStr) -> ! {
+        slice_error_fail(slice, self.start, self.end)
+    }
+
+    #[inline]
+    unsafe fn get_unchecked(self, slice: *const JavaStr) -> *const JavaStr {
+        let slice = slice as *const [u8];
+        // SAFETY: the caller guarantees that `self` is in bounds of `slice`
+        // which satisfies all the conditions for `add`.
+        let ptr = unsafe { (slice as *const u8).add(self.start) };
+        let len = self.end - self.start;
+        ptr::slice_from_raw_parts(ptr, len) as *const JavaStr
+    }
+
+    #[inline]
+    unsafe fn get_unchecked_mut(self, slice: *mut JavaStr) -> *mut JavaStr {
+        let slice = slice as *mut [u8];
+        // SAFETY: see comments for `get_unchecked`.
+        let ptr = unsafe { (slice as *mut u8).add(self.start) };
+        let len = self.end - self.start;
+        ptr::slice_from_raw_parts_mut(ptr, len) as *mut JavaStr
+    }
+}
+
+unsafe impl JavaStrSliceIndex for RangeTo<usize> {
+    #[inline]
+    fn check_bounds(&self, slice: &JavaStr) -> bool {
+        slice.is_char_boundary(self.end)
+    }
+
+    #[inline]
+    #[track_caller]
+    fn check_bounds_fail(self, slice: &JavaStr) -> ! {
+        slice_error_fail(slice, 0, self.end)
+    }
+
+    #[inline]
+    unsafe fn get_unchecked(self, slice: *const JavaStr) -> *const JavaStr {
+        unsafe { (0..self.end).get_unchecked(slice) }
+    }
+
+    #[inline]
+    unsafe fn get_unchecked_mut(self, slice: *mut JavaStr) -> *mut JavaStr {
+        unsafe { (0..self.end).get_unchecked_mut(slice) }
+    }
+}
+
+unsafe impl JavaStrSliceIndex for RangeFrom<usize> {
+    #[inline]
+    fn check_bounds(&self, slice: &JavaStr) -> bool {
+        slice.is_char_boundary(self.start)
+    }
+
+    #[inline]
+    #[track_caller]
+    fn check_bounds_fail(self, slice: &JavaStr) -> ! {
+        slice_error_fail(slice, self.start, slice.len())
+    }
+
+    #[inline]
+    unsafe fn get_unchecked(self, slice: *const JavaStr) -> *const JavaStr {
+        let len = unsafe { (*(slice as *const [u8])).len() };
+        unsafe { (self.start..len).get_unchecked(slice) }
+    }
+
+    #[inline]
+    unsafe fn get_unchecked_mut(self, slice: *mut JavaStr) -> *mut JavaStr {
+        let len = unsafe { (*(slice as *mut [u8])).len() };
+        unsafe { (self.start..len).get_unchecked_mut(slice) }
+    }
+}
+
+#[inline]
+fn into_slice_range(range: RangeInclusive<usize>) -> Range<usize> {
+    let exclusive_end = *range.end() + 1;
+    let start = match range.end_bound() {
+        Bound::Excluded(..) => exclusive_end, // excluded
+        Bound::Included(..) => *range.start(),
+        Bound::Unbounded => unreachable!(),
+    };
+    start..exclusive_end
+}
+
+unsafe impl JavaStrSliceIndex for RangeInclusive<usize> {
+    #[inline]
+    fn check_bounds(&self, slice: &JavaStr) -> bool {
+        *self.end() != usize::MAX && into_slice_range(self.clone()).check_bounds(slice)
+    }
+
+    #[inline]
+    #[track_caller]
+    fn check_bounds_fail(self, slice: &JavaStr) -> ! {
+        if *self.end() == usize::MAX {
+            str_end_index_overflow_fail()
+        } else {
+            into_slice_range(self).check_bounds_fail(slice)
+        }
+    }
+
+    #[inline]
+    unsafe fn get_unchecked(self, slice: *const JavaStr) -> *const JavaStr {
+        into_slice_range(self).get_unchecked(slice)
+    }
+
+    #[inline]
+    unsafe fn get_unchecked_mut(self, slice: *mut JavaStr) -> *mut JavaStr {
+        into_slice_range(self).get_unchecked_mut(slice)
+    }
+}
+
+unsafe impl JavaStrSliceIndex for RangeToInclusive<usize> {
+    #[inline]
+    fn check_bounds(&self, slice: &JavaStr) -> bool {
+        (0..=self.end).check_bounds(slice)
+    }
+
+    #[inline]
+    fn check_bounds_fail(self, slice: &JavaStr) -> ! {
+        (0..=self.end).check_bounds_fail(slice)
+    }
+
+    #[inline]
+    unsafe fn get_unchecked(self, slice: *const JavaStr) -> *const JavaStr {
+        (0..=self.end).get_unchecked(slice)
+    }
+
+    #[inline]
+    unsafe fn get_unchecked_mut(self, slice: *mut JavaStr) -> *mut JavaStr {
+        (0..=self.end).get_unchecked_mut(slice)
+    }
+}
diff --git a/crates/java_string/src/validations.rs b/crates/java_string/src/validations.rs
new file mode 100644
index 000000000..102783f55
--- /dev/null
+++ b/crates/java_string/src/validations.rs
@@ -0,0 +1,369 @@
+use std::ops::{Bound, Range, RangeBounds, RangeTo};
+
+use crate::{JavaStr, Utf8Error};
+
+pub(crate) const TAG_CONT: u8 = 0b1000_0000;
+pub(crate) const TAG_TWO_B: u8 = 0b1100_0000;
+pub(crate) const TAG_THREE_B: u8 = 0b1110_0000;
+pub(crate) const TAG_FOUR_B: u8 = 0b1111_0000;
+pub(crate) const CONT_MASK: u8 = 0b0011_1111;
+
+#[inline]
+const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
+    (byte & (0x7f >> width)) as u32
+}
+
+#[inline]
+const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
+    (ch << 6) | (byte & CONT_MASK) as u32
+}
+
+#[inline]
+const fn utf8_is_cont_byte(byte: u8) -> bool {
+    (byte as i8) < -64
+}
+
+/// # Safety
+///
+/// `bytes` must produce a semi-valid UTF-8 string
+#[inline]
+pub(crate) unsafe fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
+    // Decode UTF-8
+    let x = *bytes.next()?;
+    if x < 128 {
+        return Some(x as u32);
+    }
+
+    // Multibyte case follows
+    // Decode from a byte combination out of: [[[x y] z] w]
+    // NOTE: Performance is sensitive to the exact formulation here
+    let init = utf8_first_byte(x, 2);
+    // SAFETY: `bytes` produces an UTF-8-like string,
+    // so the iterator must produce a value here.
+    let y = unsafe { *bytes.next().unwrap_unchecked() };
+    let mut ch = utf8_acc_cont_byte(init, y);
+    if x >= 0xe0 {
+        // [[x y z] w] case
+        // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
+        // SAFETY: `bytes` produces an UTF-8-like string,
+        // so the iterator must produce a value here.
+        let z = unsafe { *bytes.next().unwrap_unchecked() };
+        let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
+        ch = init << 12 | y_z;
+        if x >= 0xf0 {
+            // [x y z w] case
+            // use only the lower 3 bits of `init`
+            // SAFETY: `bytes` produces an UTF-8-like string,
+            // so the iterator must produce a value here.
+            let w = unsafe { *bytes.next().unwrap_unchecked() };
+            ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
+        }
+    }
+
+    Some(ch)
+}
+
+/// # Safety
+///
+/// `bytes` must produce a semi-valid UTF-8 string
+#[inline]
+pub(crate) unsafe fn next_code_point_reverse<'a, I: DoubleEndedIterator<Item = &'a u8>>(
+    bytes: &mut I,
+) -> Option<u32> {
+    // Decode UTF-8
+    let w = match *bytes.next_back()? {
+        next_byte if next_byte < 128 => return Some(next_byte as u32),
+        back_byte => back_byte,
+    };
+
+    // Multibyte case follows
+    // Decode from a byte combination out of: [x [y [z w]]]
+    let mut ch;
+    // SAFETY: `bytes` produces an UTF-8-like string,
+    // so the iterator must produce a value here.
+    let z = unsafe { *bytes.next_back().unwrap_unchecked() };
+    ch = utf8_first_byte(z, 2);
+    if utf8_is_cont_byte(z) {
+        // SAFETY: `bytes` produces an UTF-8-like string,
+        // so the iterator must produce a value here.
+        let y = unsafe { *bytes.next_back().unwrap_unchecked() };
+        ch = utf8_first_byte(y, 3);
+        if utf8_is_cont_byte(y) {
+            // SAFETY: `bytes` produces an UTF-8-like string,
+            // so the iterator must produce a value here.
+            let x = unsafe { *bytes.next_back().unwrap_unchecked() };
+            ch = utf8_first_byte(x, 4);
+            ch = utf8_acc_cont_byte(ch, y);
+        }
+        ch = utf8_acc_cont_byte(ch, z);
+    }
+    ch = utf8_acc_cont_byte(ch, w);
+
+    Some(ch)
+}
+
+#[inline(always)]
+pub(crate) fn run_utf8_semi_validation(v: &[u8]) -> Result<(), Utf8Error> {
+    let mut index = 0;
+    let len = v.len();
+
+    let usize_bytes = std::mem::size_of::<usize>();
+    let ascii_block_size = 2 * usize_bytes;
+    let blocks_end = if len >= ascii_block_size {
+        len - ascii_block_size + 1
+    } else {
+        0
+    };
+    let align = v.as_ptr().align_offset(usize_bytes);
+
+    while index < len {
+        let old_offset = index;
+        macro_rules! err {
+            ($error_len:expr) => {
+                return Err(Utf8Error {
+                    valid_up_to: old_offset,
+                    error_len: $error_len,
+                })
+            };
+        }
+
+        macro_rules! next {
+            () => {{
+                index += 1;
+                // we needed data, but there was none: error!
+                if index >= len {
+                    err!(None)
+                }
+                v[index]
+            }};
+        }
+
+        let first = v[index];
+        if first >= 128 {
+            let w = utf8_char_width(first);
+            // 2-byte encoding is for codepoints  \u{0080} to  \u{07ff}
+            //        first  C2 80        last DF BF
+            // 3-byte encoding is for codepoints  \u{0800} to  \u{ffff}
+            //        first  E0 A0 80     last EF BF BF
+            //   INCLUDING surrogates codepoints  \u{d800} to  \u{dfff}
+            //               ED A0 80 to       ED BF BF
+            // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff
+            //        first  F0 90 80 80  last F4 8F BF BF
+            //
+            // Use the UTF-8 syntax from the RFC
+            //
+            // https://tools.ietf.org/html/rfc3629
+            // UTF8-1      = %x00-7F
+            // UTF8-2      = %xC2-DF UTF8-tail
+            // UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
+            //               %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
+            // UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
+            //               %xF4 %x80-8F 2( UTF8-tail )
+            match w {
+                2 => {
+                    if next!() as i8 >= -64 {
+                        err!(Some(1))
+                    }
+                }
+                3 => {
+                    match (first, next!()) {
+                        (0xe0, 0xa0..=0xbf) | (0xe1..=0xef, 0x80..=0xbf) => {} /* INCLUDING surrogate codepoints here */
+                        _ => err!(Some(1)),
+                    }
+                    if next!() as i8 >= -64 {
+                        err!(Some(2))
+                    }
+                }
+                4 => {
+                    match (first, next!()) {
+                        (0xf0, 0x90..=0xbf) | (0xf1..=0xf3, 0x80..=0xbf) | (0xf4, 0x80..=0x8f) => {}
+                        _ => err!(Some(1)),
+                    }
+                    if next!() as i8 >= -64 {
+                        err!(Some(2))
+                    }
+                    if next!() as i8 >= -64 {
+                        err!(Some(3))
+                    }
+                }
+                _ => err!(Some(1)),
+            }
+            index += 1;
+        } else {
+            // Ascii case, try to skip forward quickly.
+            // When the pointer is aligned, read 2 words of data per iteration
+            // until we find a word containing a non-ascii byte.
+            if align != usize::MAX && align.wrapping_sub(index) % usize_bytes == 0 {
+                let ptr = v.as_ptr();
+                while index < blocks_end {
+                    // SAFETY: since `align - index` and `ascii_block_size` are
+                    // multiples of `usize_bytes`, `block = ptr.add(index)` is
+                    // always aligned with a `usize` so it's safe to dereference
+                    // both `block` and `block.add(1)`.
+                    unsafe {
+                        let block = ptr.add(index) as *const usize;
+                        // break if there is a nonascii byte
+                        let zu = contains_nonascii(*block);
+                        let zv = contains_nonascii(*block.add(1));
+                        if zu || zv {
+                            break;
+                        }
+                    }
+                    index += ascii_block_size;
+                }
+                // step from the point where the wordwise loop stopped
+                while index < len && v[index] < 128 {
+                    index += 1;
+                }
+            } else {
+                index += 1;
+            }
+        }
+    }
+
+    Ok(())
+}
+
+#[inline(always)]
+pub(crate) const fn run_utf8_full_validation_from_semi(v: &[u8]) -> Result<(), Utf8Error> {
+    // this function checks for surrogate codepoints, between \u{d800} to \u{dfff},
+    // or ED A0 80 to ED BF BF of width 3 unicode chars. The valid range of width 3
+    // characters is ED 80 80 to ED BF BF, so we need to check for an ED byte
+    // followed by a >=A0 byte.
+    let mut index = 0;
+    while index + 3 <= v.len() {
+        if v[index] == 0xed && v[index + 1] >= 0xa0 {
+            return Err(Utf8Error {
+                valid_up_to: index,
+                error_len: Some(1),
+            });
+        }
+        index += 1;
+    }
+
+    Ok(())
+}
+
+#[inline]
+pub(crate) const fn utf8_char_width(first_byte: u8) -> usize {
+    const UTF8_CHAR_WIDTH: [u8; 256] = [
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+        4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    ];
+
+    UTF8_CHAR_WIDTH[first_byte as usize] as _
+}
+
+#[inline]
+const fn contains_nonascii(x: usize) -> bool {
+    const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; std::mem::size_of::<usize>()]);
+    (x & NONASCII_MASK) != 0
+}
+
+#[cold]
+#[track_caller]
+pub(crate) fn slice_error_fail(s: &JavaStr, begin: usize, end: usize) -> ! {
+    const MAX_DISPLAY_LENGTH: usize = 256;
+    let trunc_len = s.floor_char_boundary(MAX_DISPLAY_LENGTH);
+    let s_trunc = &s[..trunc_len];
+    let ellipsis = if trunc_len < s.len() { "[...]" } else { "" };
+
+    // 1. out of bounds
+    if begin > s.len() || end > s.len() {
+        let oob_index = if begin > s.len() { begin } else { end };
+        panic!("byte index {oob_index} is out of bounds of `{s_trunc}`{ellipsis}");
+    }
+
+    // 2. begin <= end
+    assert!(
+        begin <= end,
+        "begin <= end ({} <= {}) when slicing `{}`{}",
+        begin,
+        end,
+        s_trunc,
+        ellipsis
+    );
+
+    // 3. character boundary
+    let index = if !s.is_char_boundary(begin) {
+        begin
+    } else {
+        end
+    };
+    // find the character
+    let char_start = s.floor_char_boundary(index);
+    // `char_start` must be less than len and a char boundary
+    let ch = s[char_start..].chars().next().unwrap();
+    let char_range = char_start..char_start + ch.len_utf8();
+    panic!(
+        "byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
+        index, ch, char_range, s_trunc, ellipsis
+    );
+}
+
+#[cold]
+#[track_caller]
+pub(crate) fn str_end_index_len_fail(index: usize, len: usize) -> ! {
+    panic!("range end index {index} out of range for JavaStr of length {len}");
+}
+
+#[cold]
+#[track_caller]
+pub(crate) fn str_index_order_fail(index: usize, end: usize) -> ! {
+    panic!("JavaStr index starts at {index} but ends at {end}");
+}
+
+#[cold]
+#[track_caller]
+pub(crate) fn str_start_index_overflow_fail() -> ! {
+    panic!("attempted to index JavaStr from after maximum usize");
+}
+
+#[cold]
+#[track_caller]
+pub(crate) fn str_end_index_overflow_fail() -> ! {
+    panic!("attempted to index JavaStr up to maximum usize")
+}
+
+#[inline]
+#[track_caller]
+pub(crate) fn to_range_checked<R>(range: R, bounds: RangeTo<usize>) -> Range<usize>
+where
+    R: RangeBounds<usize>,
+{
+    let len = bounds.end;
+
+    let start = range.start_bound();
+    let start = match start {
+        Bound::Included(&start) => start,
+        Bound::Excluded(start) => start
+            .checked_add(1)
+            .unwrap_or_else(|| str_start_index_overflow_fail()),
+        Bound::Unbounded => 0,
+    };
+
+    let end: Bound<&usize> = range.end_bound();
+    let end = match end {
+        Bound::Included(end) => end
+            .checked_add(1)
+            .unwrap_or_else(|| str_end_index_overflow_fail()),
+        Bound::Excluded(&end) => end,
+        Bound::Unbounded => len,
+    };
+
+    if start > end {
+        str_index_order_fail(start, end);
+    }
+    if end > len {
+        str_end_index_len_fail(end, len);
+    }
+
+    Range { start, end }
+}
diff --git a/typos.toml b/typos.toml
index 8e54543f5..c59189146 100644
--- a/typos.toml
+++ b/typos.toml
@@ -1,5 +1,5 @@
 [files]
-extend-exclude = ["*.svg", "*.json"]
+extend-exclude = ["*.svg", "*.json", "crates/java_string/src/slice.rs"]
 
 [default]
 extend-ignore-re = ['\d+ths', 'CC BY-NC-ND']