From 9c4103c0a706b8b94b5dc030980576e20b99cb81 Mon Sep 17 00:00:00 2001 From: Christophe Le Saec <51320496+clesaec@users.noreply.github.com> Date: Mon, 4 Dec 2023 08:57:02 +0100 Subject: [PATCH] AVRO-3912: Fix Big decimal deser (#2599) * AVRO-3912: Fix Big decimal deser Co-authored-by: Martin Tzvetanov Grigorov --- lang/rust/avro/src/bigdecimal.rs | 203 +++++++++++++++++++++++++++++++ lang/rust/avro/src/decimal.rs | 85 +------------ lang/rust/avro/src/decode.rs | 3 +- lang/rust/avro/src/encode.rs | 6 +- lang/rust/avro/src/lib.rs | 1 + lang/rust/avro/src/types.rs | 3 +- lang/rust/avro/tests/bigdec.avro | Bin 0 -> 189 bytes lang/rust/avro/tests/shared.rs | 13 +- 8 files changed, 219 insertions(+), 95 deletions(-) create mode 100644 lang/rust/avro/src/bigdecimal.rs create mode 100644 lang/rust/avro/tests/bigdec.avro diff --git a/lang/rust/avro/src/bigdecimal.rs b/lang/rust/avro/src/bigdecimal.rs new file mode 100644 index 00000000000..93ddae48de4 --- /dev/null +++ b/lang/rust/avro/src/bigdecimal.rs @@ -0,0 +1,203 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::{ + decode::{decode_len, decode_long}, + encode::{encode_bytes, encode_long}, + types::Value, + Error, +}; +use bigdecimal::BigDecimal; +use num_bigint::BigInt; +use std::io::Read; + +pub(crate) fn serialize_big_decimal(decimal: &BigDecimal) -> Vec { + // encode big decimal, without global size + let mut buffer: Vec = Vec::new(); + let (big_int, exponent): (BigInt, i64) = decimal.as_bigint_and_exponent(); + let big_endian_value: Vec = big_int.to_signed_bytes_be(); + encode_bytes(&big_endian_value, &mut buffer); + encode_long(exponent, &mut buffer); + + // encode global size and content + let mut final_buffer: Vec = Vec::new(); + encode_bytes(&buffer, &mut final_buffer); + final_buffer +} + +pub(crate) fn deserialize_big_decimal(bytes: &Vec) -> Result { + let mut bytes: &[u8] = bytes.as_slice(); + let mut big_decimal_buffer = match decode_len(&mut bytes) { + Ok(size) => vec![0u8; size], + Err(err) => return Err(Error::BigDecimalLen(Box::new(err))), + }; + + bytes + .read_exact(&mut big_decimal_buffer[..]) + .map_err(Error::ReadDouble)?; + + match decode_long(&mut bytes) { + Ok(Value::Long(scale_value)) => { + let big_int: BigInt = BigInt::from_signed_bytes_be(&big_decimal_buffer); + let decimal = BigDecimal::new(big_int, scale_value); + Ok(decimal) + } + _ => Err(Error::BigDecimalScale), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{ + types::{Record, Value}, + Codec, Reader, Schema, Writer, + }; + use apache_avro_test_helper::TestResult; + use bigdecimal::{One, Zero}; + use pretty_assertions::assert_eq; + use std::{ + fs::File, + io::BufReader, + ops::{Div, Mul}, + str::FromStr, + }; + + #[test] + fn test_avro_3779_bigdecimal_serial() -> TestResult { + let value: BigDecimal = + bigdecimal::BigDecimal::from(-1421).div(bigdecimal::BigDecimal::from(2)); + let mut current: BigDecimal = BigDecimal::one(); + + for iter in 1..180 { + let buffer: Vec = serialize_big_decimal(¤t); + + let mut as_slice = buffer.as_slice(); + decode_long(&mut as_slice)?; + + let mut result: Vec = Vec::new(); + result.extend_from_slice(as_slice); + + let deserialize_big_decimal: Result = + deserialize_big_decimal(&result); + assert!( + deserialize_big_decimal.is_ok(), + "can't deserialize for iter {iter}" + ); + assert_eq!(current, deserialize_big_decimal?, "not equals for {iter}"); + current = current.mul(&value); + } + + let buffer: Vec = serialize_big_decimal(&BigDecimal::zero()); + let mut as_slice = buffer.as_slice(); + decode_long(&mut as_slice)?; + + let mut result: Vec = Vec::new(); + result.extend_from_slice(as_slice); + + let deserialize_big_decimal: Result = deserialize_big_decimal(&result); + assert!( + deserialize_big_decimal.is_ok(), + "can't deserialize for zero" + ); + assert_eq!( + BigDecimal::zero(), + deserialize_big_decimal?, + "not equals for zero" + ); + + Ok(()) + } + + #[test] + fn test_avro_3779_record_with_bg() -> TestResult { + let schema_str = r#" + { + "type": "record", + "name": "test", + "fields": [ + { + "name": "field_name", + "type": "bytes", + "logicalType": "big-decimal" + } + ] + } + "#; + let schema = Schema::parse_str(schema_str)?; + + // build record with big decimal value + let mut record = Record::new(&schema).unwrap(); + let val = BigDecimal::new(BigInt::from(12), 2); + record.put("field_name", val.clone()); + + // write a record + let codec = Codec::Null; + let mut writer = Writer::builder() + .schema(&schema) + .codec(codec) + .writer(Vec::new()) + .build(); + + writer.append(record.clone())?; + writer.flush()?; + + // read record + let wrote_data = writer.into_inner()?; + let mut reader = Reader::new(&wrote_data[..])?; + + let value = reader.next().unwrap()?; + + // extract field value + let big_decimal_value: &Value = match value { + Value::Record(ref fields) => Ok(&fields[0].1), + other => Err(format!("Expected a Value::Record, got: {other:?}")), + }?; + + let x1res: &BigDecimal = match big_decimal_value { + Value::BigDecimal(ref s) => Ok(s), + other => Err(format!("Expected Value::BigDecimal, got: {other:?}")), + }?; + assert_eq!(&val, x1res); + + Ok(()) + } + + #[test] + fn test_avro_3779_from_java_file() -> TestResult { + // Open file generated with Java code to ensure compatibility + // with Java big decimal logical type. + let file: File = File::open("./tests/bigdec.avro")?; + let mut reader = Reader::new(BufReader::new(&file))?; + let next_element = reader.next(); + assert!(next_element.is_some()); + let value = next_element.unwrap()?; + let bg = match value { + Value::Record(ref fields) => Ok(&fields[0].1), + other => Err(format!("Expected a Value::Record, got: {other:?}")), + }?; + let value_big_decimal = match bg { + Value::BigDecimal(val) => Ok(val), + other => Err(format!("Expected a Value::BigDecimal, got: {other:?}")), + }?; + + let ref_value = BigDecimal::from_str("2.24")?; + assert_eq!(&ref_value, value_big_decimal); + + Ok(()) + } +} diff --git a/lang/rust/avro/src/decimal.rs b/lang/rust/avro/src/decimal.rs index 7188127f620..0139e371929 100644 --- a/lang/rust/avro/src/decimal.rs +++ b/lang/rust/avro/src/decimal.rs @@ -15,15 +15,8 @@ // specific language governing permissions and limitations // under the License. -use crate::{ - decode::{decode_len, decode_long}, - encode::{encode_bytes, encode_long}, - types::Value, - AvroResult, Error, -}; -use bigdecimal::BigDecimal; +use crate::{AvroResult, Error}; use num_bigint::{BigInt, Sign}; -use std::io::Read; #[derive(Debug, Clone)] pub struct Decimal { @@ -112,47 +105,12 @@ impl> From for Decimal { } } -pub(crate) fn serialize_big_decimal(decimal: &BigDecimal) -> Vec { - let mut buffer: Vec = Vec::new(); - let (big_int, exponent): (BigInt, i64) = decimal.as_bigint_and_exponent(); - let big_endian_value: Vec = big_int.to_signed_bytes_be(); - encode_bytes(&big_endian_value, &mut buffer); - encode_long(exponent, &mut buffer); - - buffer -} - -pub(crate) fn deserialize_big_decimal(bytes: &Vec) -> Result { - let mut bytes: &[u8] = bytes.as_slice(); - let mut big_decimal_buffer = match decode_len(&mut bytes) { - Ok(size) => vec![0u8; size], - Err(err) => return Err(Error::BigDecimalLen(Box::new(err))), - }; - - bytes - .read_exact(&mut big_decimal_buffer[..]) - .map_err(Error::ReadDouble)?; - - match decode_long(&mut bytes) { - Ok(Value::Long(scale_value)) => { - let big_int: BigInt = BigInt::from_signed_bytes_be(&big_decimal_buffer); - let decimal = BigDecimal::new(big_int, scale_value); - Ok(decimal) - } - _ => Err(Error::BigDecimalScale), - } -} - #[cfg(test)] mod tests { use super::*; use apache_avro_test_helper::TestResult; - use bigdecimal::{One, Zero}; use pretty_assertions::assert_eq; - use std::{ - convert::TryFrom, - ops::{Div, Mul}, - }; + use std::convert::TryFrom; #[test] fn test_decimal_from_bytes_from_ref_decimal() -> TestResult { @@ -175,43 +133,4 @@ mod tests { Ok(()) } - - #[test] - fn test_avro_3779_bigdecimal_serial() -> TestResult { - let value: bigdecimal::BigDecimal = - bigdecimal::BigDecimal::from(-1421).div(bigdecimal::BigDecimal::from(2)); - let mut current: bigdecimal::BigDecimal = bigdecimal::BigDecimal::one(); - - for iter in 1..180 { - let result: Vec = serialize_big_decimal(¤t); - - let deserialize_big_decimal: Result = - deserialize_big_decimal(&result); - assert!( - deserialize_big_decimal.is_ok(), - "can't deserialize for iter {iter}" - ); - assert_eq!( - current, - deserialize_big_decimal.unwrap(), - "not equals for ${iter}" - ); - current = current.mul(&value); - } - - let result: Vec = serialize_big_decimal(&BigDecimal::zero()); - let deserialize_big_decimal: Result = - deserialize_big_decimal(&result); - assert!( - deserialize_big_decimal.is_ok(), - "can't deserialize for zero" - ); - assert_eq!( - BigDecimal::zero(), - deserialize_big_decimal.unwrap(), - "not equals for zero" - ); - - Ok(()) - } } diff --git a/lang/rust/avro/src/decode.rs b/lang/rust/avro/src/decode.rs index 7857bbec565..8c50e77026a 100644 --- a/lang/rust/avro/src/decode.rs +++ b/lang/rust/avro/src/decode.rs @@ -16,7 +16,8 @@ // under the License. use crate::{ - decimal::{deserialize_big_decimal, Decimal}, + bigdecimal::deserialize_big_decimal, + decimal::Decimal, duration::Duration, schema::{ DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, RecordSchema, ResolvedSchema, diff --git a/lang/rust/avro/src/encode.rs b/lang/rust/avro/src/encode.rs index 1f7e40da964..5dd08777177 100644 --- a/lang/rust/avro/src/encode.rs +++ b/lang/rust/avro/src/encode.rs @@ -16,7 +16,7 @@ // under the License. use crate::{ - decimal::serialize_big_decimal, + bigdecimal::serialize_big_decimal, schema::{ DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, RecordSchema, ResolvedSchema, Schema, SchemaKind, UnionSchema, @@ -130,8 +130,8 @@ pub(crate) fn encode_internal>( buffer, ), Value::BigDecimal(bg) => { - let mut buf: Vec = serialize_big_decimal(bg); - buffer.append(&mut buf); + let buf: Vec = serialize_big_decimal(bg); + buffer.extend_from_slice(buf.as_slice()); } Value::Bytes(bytes) => match *schema { Schema::Bytes => encode_bytes(bytes, buffer), diff --git a/lang/rust/avro/src/lib.rs b/lang/rust/avro/src/lib.rs index cb49555d567..a613d93c0a0 100644 --- a/lang/rust/avro/src/lib.rs +++ b/lang/rust/avro/src/lib.rs @@ -763,6 +763,7 @@ //! assert!(SchemaCompatibility::can_read(&writers_schema, &readers_schema).is_err()); //! ``` +mod bigdecimal; mod codec; mod de; mod decimal; diff --git a/lang/rust/avro/src/types.rs b/lang/rust/avro/src/types.rs index 060326f2e6c..49fb9e6a35a 100644 --- a/lang/rust/avro/src/types.rs +++ b/lang/rust/avro/src/types.rs @@ -17,7 +17,8 @@ //! Logic handling the intermediate representation of Avro values. use crate::{ - decimal::{deserialize_big_decimal, serialize_big_decimal, Decimal}, + bigdecimal::{deserialize_big_decimal, serialize_big_decimal}, + decimal::Decimal, duration::Duration, schema::{ DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, Precision, RecordField, diff --git a/lang/rust/avro/tests/bigdec.avro b/lang/rust/avro/tests/bigdec.avro new file mode 100644 index 0000000000000000000000000000000000000000..641db7e2a47553371ce17679e3ad14d6be08e1e1 GIT binary patch literal 189 zcmeZI%3@>^ODrqO*DFrWNX<=bVX9UtsVqoUvQjEaP0lY$QPNS$OUy;#r{pICr9q^Q zQd(wePD-(oRdh8>8CWD9tVaiG6x_(9%97M#pst+!^vvYMoDhgbNtx-oDXGbsxrsSS kwY9Oe49_Y)-d~aba?y@Qi|4#zbx~pB;$UHTz=AFe0E!w$Q2+n{ literal 0 HcmV?d00001 diff --git a/lang/rust/avro/tests/shared.rs b/lang/rust/avro/tests/shared.rs index 9790ddfe424..3915d5d12aa 100644 --- a/lang/rust/avro/tests/shared.rs +++ b/lang/rust/avro/tests/shared.rs @@ -49,7 +49,7 @@ fn test_schema() -> TestResult { ROOT_DIRECTORY.to_owned() + "/" + entry.file_name().to_str().unwrap(); let dir_result = test_folder(sub_folder.as_str()); - if let Result::Err(ed) = dir_result { + if let Err(ed) = dir_result { result = match result { Ok(()) => Err(ed), Err(e) => Err(e.merge(&ed)), @@ -58,9 +58,8 @@ fn test_schema() -> TestResult { } } } - if let Err(e) = result { - core::panic!("{}", e) - } + result?; + Ok(()) } @@ -99,16 +98,16 @@ impl fmt::Display for ErrorsDesc { fn test_folder(folder: &str) -> Result<(), ErrorsDesc> { let file_name = folder.to_owned() + "/schema.json"; - let content = std::fs::read_to_string(file_name).expect("Unable to find schema.jon file"); + let content = std::fs::read_to_string(file_name).expect("Unable to find schema.json file"); let schema: Schema = Schema::parse_str(content.as_str()).expect("Can't read schema"); let data_file_name = folder.to_owned() + "/data.avro"; let data_path: &Path = Path::new(data_file_name.as_str()); - let mut result = Result::Ok(()); + let mut result = Ok(()); if !data_path.exists() { log::error!("{}", format!("folder {folder} does not exist")); - return Result::Err(ErrorsDesc::new( + return Err(ErrorsDesc::new( format!("folder {folder} does not exist").as_str(), )); } else {