Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generify valence_nbt to allow other types of String #546

Merged
merged 9 commits into from
Oct 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions crates/java_string/src/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1703,6 +1703,20 @@ impl AsRef<[u8]> for JavaStr {
}
}

impl AsRef<JavaStr> for str {
#[inline]
fn as_ref(&self) -> &JavaStr {
JavaStr::from_str(self)
}
}

impl AsRef<JavaStr> for String {
#[inline]
fn as_ref(&self) -> &JavaStr {
JavaStr::from_str(self)
}
}

impl Clone for Box<JavaStr> {
#[inline]
fn clone(&self) -> Self {
Expand Down Expand Up @@ -1822,6 +1836,13 @@ impl<'a> From<&'a str> for &'a JavaStr {
}
}

impl<'a> From<&'a String> for &'a JavaStr {
#[inline]
fn from(value: &'a String) -> Self {
JavaStr::from_str(value)
}
}

impl Hash for JavaStr {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
Expand Down
40 changes: 29 additions & 11 deletions crates/valence_anvil/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
)]

use std::fs::{DirEntry, File};
use std::hash::Hash;
use std::io::{Cursor, ErrorKind, Read, Seek, SeekFrom, Write};
use std::num::NonZeroUsize;
use std::path::{Path, PathBuf};
Expand All @@ -31,6 +32,7 @@ use flate2::bufread::{GzDecoder, ZlibDecoder};
use flate2::write::{GzEncoder, ZlibEncoder};
use lru::LruCache;
use thiserror::Error;
use valence_nbt::binary::{FromModifiedUtf8, ToModifiedUtf8};
use valence_nbt::Compound;

#[cfg(feature = "bevy_plugin")]
Expand Down Expand Up @@ -169,7 +171,14 @@ impl RegionFolder {
/// loading it. Returns `Ok(None)` if the chunk does not exist and no
/// errors occurred attempting to load it. Returns `Err(_)` if an error
/// occurred attempting to load the chunk.
pub fn get_chunk(&mut self, pos_x: i32, pos_z: i32) -> Result<Option<RawChunk>, RegionError> {
pub fn get_chunk<S>(
&mut self,
pos_x: i32,
pos_z: i32,
) -> Result<Option<RawChunk<S>>, RegionError>
where
S: for<'a> FromModifiedUtf8<'a> + Hash + Ord,
{
let region_x = pos_x.div_euclid(32);
let region_z = pos_z.div_euclid(32);

Expand Down Expand Up @@ -201,12 +210,15 @@ impl RegionFolder {

/// Sets the raw chunk at the given position, overwriting the old chunk if
/// it exists.
pub fn set_chunk(
pub fn set_chunk<S>(
&mut self,
pos_x: i32,
pos_z: i32,
chunk: &Compound,
) -> Result<(), RegionError> {
chunk: &Compound<S>,
) -> Result<(), RegionError>
where
S: ToModifiedUtf8 + Hash + Ord,
{
let region_x = pos_x.div_euclid(32);
let region_z = pos_z.div_euclid(32);

Expand Down Expand Up @@ -312,8 +324,8 @@ impl RegionFolder {
}

/// A chunk represented by the raw compound data.
pub struct RawChunk {
pub data: Compound,
pub struct RawChunk<S = String> {
pub data: Compound<S>,
pub timestamp: u32,
}

Expand Down Expand Up @@ -415,13 +427,16 @@ impl Region {
})
}

fn get_chunk(
fn get_chunk<S>(
&mut self,
pos_x: i32,
pos_z: i32,
decompress_buf: &mut Vec<u8>,
region_root: &Path,
) -> Result<Option<RawChunk>, RegionError> {
) -> Result<Option<RawChunk<S>>, RegionError>
where
S: for<'a> FromModifiedUtf8<'a> + Hash + Ord,
{
let chunk_idx = Self::chunk_idx(pos_x, pos_z);

let location = self.locations[chunk_idx];
Expand Down Expand Up @@ -536,15 +551,18 @@ impl Region {
Ok(true)
}

fn set_chunk(
fn set_chunk<S>(
&mut self,
pos_x: i32,
pos_z: i32,
chunk: &Compound,
chunk: &Compound<S>,
options: WriteOptions,
compress_buf: &mut Vec<u8>,
region_root: &Path,
) -> Result<(), RegionError> {
) -> Result<(), RegionError>
where
S: ToModifiedUtf8 + Hash + Ord,
{
// erase the chunk from allocated chunks (not from disk)
self.delete_chunk(pos_x, pos_z, false, region_root)?;

Expand Down
2 changes: 2 additions & 0 deletions crates/valence_nbt/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ repository.workspace = true

[features]
binary = ["dep:byteorder", "dep:cesu8"]
java_string = ["dep:java_string"]
snbt = []
# When enabled, the order of fields in compounds are preserved.
preserve_order = ["dep:indexmap"]
Expand All @@ -20,6 +21,7 @@ serde = ["dep:serde", "dep:thiserror", "indexmap?/serde"]
byteorder = { workspace = true, optional = true }
cesu8 = { workspace = true, optional = true }
indexmap = { workspace = true, optional = true }
java_string = { workspace = true, optional = true }
serde = { workspace = true, features = ["derive"], optional = true }
thiserror = { workspace = true, optional = true }
uuid = { workspace = true, optional = true }
Expand Down
4 changes: 2 additions & 2 deletions crates/valence_nbt/src/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ mod modified_utf8;
#[cfg(test)]
mod tests;

pub use decode::from_binary;
pub use encode::{to_binary, written_size};
pub use decode::{from_binary, FromModifiedUtf8, FromModifiedUtf8Error};
pub use encode::{to_binary, written_size, ToModifiedUtf8};
pub use error::*;

use crate::Tag;
Expand Down
129 changes: 102 additions & 27 deletions crates/valence_nbt/src/binary/decode.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use std::mem;
use std::borrow::Cow;
use std::hash::Hash;
use std::{fmt, mem};

use byteorder::{BigEndian, ReadBytesExt};
use cesu8::Cesu8DecodingError;

use super::{Error, Result};
use crate::tag::Tag;
Expand All @@ -11,7 +12,10 @@ use crate::{Compound, List, Value};
///
/// The string returned in the tuple is the name of the root compound
/// (typically the empty string).
pub fn from_binary(slice: &mut &[u8]) -> Result<(Compound, String)> {
pub fn from_binary<'de, S>(slice: &mut &'de [u8]) -> Result<(Compound<S>, S)>
where
S: FromModifiedUtf8<'de> + Hash + Ord,
{
let mut state = DecodeState { slice, depth: 0 };

let root_tag = state.read_tag()?;
Expand All @@ -23,7 +27,7 @@ pub fn from_binary(slice: &mut &[u8]) -> Result<(Compound, String)> {
)));
}

let root_name = state.read_string()?;
let root_name = state.read_string::<S>()?;
let root = state.read_compound()?;

debug_assert_eq!(state.depth, 0);
Expand All @@ -34,13 +38,13 @@ pub fn from_binary(slice: &mut &[u8]) -> Result<(Compound, String)> {
/// Maximum recursion depth to prevent overflowing the call stack.
const MAX_DEPTH: usize = 512;

struct DecodeState<'a, 'b> {
slice: &'a mut &'b [u8],
struct DecodeState<'a, 'de> {
slice: &'a mut &'de [u8],
/// Current recursion depth.
depth: usize,
}

impl DecodeState<'_, '_> {
impl<'de> DecodeState<'_, 'de> {
#[inline]
fn check_depth<T>(&mut self, f: impl FnOnce(&mut Self) -> Result<T>) -> Result<T> {
if self.depth >= MAX_DEPTH {
Expand Down Expand Up @@ -72,7 +76,10 @@ impl DecodeState<'_, '_> {
}
}

fn read_value(&mut self, tag: Tag) -> Result<Value> {
fn read_value<S>(&mut self, tag: Tag) -> Result<Value<S>>
where
S: FromModifiedUtf8<'de> + Hash + Ord,
{
match tag {
Tag::End => unreachable!("illegal TAG_End argument"),
Tag::Byte => Ok(self.read_byte()?.into()),
Expand All @@ -82,9 +89,9 @@ impl DecodeState<'_, '_> {
Tag::Float => Ok(self.read_float()?.into()),
Tag::Double => Ok(self.read_double()?.into()),
Tag::ByteArray => Ok(self.read_byte_array()?.into()),
Tag::String => Ok(self.read_string()?.into()),
Tag::List => self.check_depth(|st| Ok(st.read_any_list()?.into())),
Tag::Compound => self.check_depth(|st| Ok(st.read_compound()?.into())),
Tag::String => Ok(Value::String(self.read_string::<S>()?)),
Tag::List => self.check_depth(|st| Ok(st.read_any_list::<S>()?.into())),
Tag::Compound => self.check_depth(|st| Ok(st.read_compound::<S>()?.into())),
Tag::IntArray => Ok(self.read_int_array()?.into()),
Tag::LongArray => Ok(self.read_long_array()?.into()),
}
Expand Down Expand Up @@ -137,7 +144,10 @@ impl DecodeState<'_, '_> {
Ok(array)
}

fn read_string(&mut self) -> Result<String> {
fn read_string<S>(&mut self) -> Result<S>
where
S: FromModifiedUtf8<'de>,
{
let len = self.slice.read_u16::<BigEndian>()?.into();

if len > self.slice.len() {
Expand All @@ -148,18 +158,19 @@ impl DecodeState<'_, '_> {

let (left, right) = self.slice.split_at(len);

match cesu8::from_java_cesu8(left) {
Ok(cow) => {
match S::from_modified_utf8(left) {
Ok(str) => {
*self.slice = right;
Ok(cow.into())
}
Err(Cesu8DecodingError) => {
Err(Error::new_static("could not convert CESU-8 data to UTF-8"))
Ok(str)
}
Err(_) => Err(Error::new_static("could not decode modified UTF-8 data")),
}
}

fn read_any_list(&mut self) -> Result<List> {
fn read_any_list<S>(&mut self) -> Result<List<S>>
where
S: FromModifiedUtf8<'de> + Hash + Ord,
{
match self.read_tag()? {
Tag::End => match self.read_int()? {
0 => Ok(List::End),
Expand All @@ -178,14 +189,17 @@ impl DecodeState<'_, '_> {
Tag::ByteArray => Ok(self
.read_list(Tag::ByteArray, 0, |st| st.read_byte_array())?
.into()),
Tag::String => Ok(self
.read_list(Tag::String, 0, |st| st.read_string())?
.into()),
Tag::List => self
.check_depth(|st| Ok(st.read_list(Tag::List, 0, |st| st.read_any_list())?.into())),
Tag::String => Ok(List::String(
self.read_list(Tag::String, 0, |st| st.read_string::<S>())?,
)),
Tag::List => self.check_depth(|st| {
Ok(st
.read_list(Tag::List, 0, |st| st.read_any_list::<S>())?
.into())
}),
Tag::Compound => self.check_depth(|st| {
Ok(st
.read_list(Tag::Compound, 0, |st| st.read_compound())?
.read_list(Tag::Compound, 0, |st| st.read_compound::<S>())?
.into())
}),
Tag::IntArray => Ok(self
Expand Down Expand Up @@ -237,7 +251,10 @@ impl DecodeState<'_, '_> {
Ok(list)
}

fn read_compound(&mut self) -> Result<Compound> {
fn read_compound<S>(&mut self) -> Result<Compound<S>>
where
S: FromModifiedUtf8<'de> + Hash + Ord,
{
let mut compound = Compound::new();

loop {
Expand All @@ -246,7 +263,7 @@ impl DecodeState<'_, '_> {
return Ok(compound);
}

compound.insert(self.read_string()?, self.read_value(tag)?);
compound.insert(self.read_string::<S>()?, self.read_value::<S>(tag)?);
}
}

Expand Down Expand Up @@ -296,3 +313,61 @@ impl DecodeState<'_, '_> {
Ok(array)
}
}

#[derive(Copy, Clone, Debug)]
pub struct FromModifiedUtf8Error;

impl fmt::Display for FromModifiedUtf8Error {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("could not decode modified UTF-8 data")
}
}

impl std::error::Error for FromModifiedUtf8Error {}

/// A string type which can be decoded from Java's [modified UTF-8](https://docs.oracle.com/javase/8/docs/api/java/io/DataInput.html#modified-utf-8).
pub trait FromModifiedUtf8<'de>: Sized {
fn from_modified_utf8(
Earthcomputer marked this conversation as resolved.
Show resolved Hide resolved
modified_utf8: &'de [u8],
) -> std::result::Result<Self, FromModifiedUtf8Error>;
}

impl<'de> FromModifiedUtf8<'de> for Cow<'de, str> {
fn from_modified_utf8(
modified_utf8: &'de [u8],
) -> std::result::Result<Self, FromModifiedUtf8Error> {
cesu8::from_java_cesu8(modified_utf8).map_err(move |_| FromModifiedUtf8Error)
}
}

impl<'de> FromModifiedUtf8<'de> for String {
fn from_modified_utf8(
modified_utf8: &'de [u8],
) -> std::result::Result<Self, FromModifiedUtf8Error> {
match cesu8::from_java_cesu8(modified_utf8) {
Ok(str) => Ok(str.into_owned()),
Err(_) => Err(FromModifiedUtf8Error),
}
}
}

#[cfg(feature = "java_string")]
impl<'de> FromModifiedUtf8<'de> for Cow<'de, java_string::JavaStr> {
fn from_modified_utf8(
modified_utf8: &'de [u8],
) -> std::result::Result<Self, FromModifiedUtf8Error> {
java_string::JavaStr::from_modified_utf8(modified_utf8).map_err(|_| FromModifiedUtf8Error)
}
}

#[cfg(feature = "java_string")]
impl<'de> FromModifiedUtf8<'de> for java_string::JavaString {
fn from_modified_utf8(
modified_utf8: &'de [u8],
) -> std::result::Result<Self, FromModifiedUtf8Error> {
match java_string::JavaStr::from_modified_utf8(modified_utf8) {
Ok(str) => Ok(str.into_owned()),
Err(_) => Err(FromModifiedUtf8Error),
}
}
}
Loading
Loading