diff --git a/.github/workflows/codeql-csharp-analysis.yml b/.github/workflows/codeql-csharp-analysis.yml index 48af1be55c1..93fab0c861a 100644 --- a/.github/workflows/codeql-csharp-analysis.yml +++ b/.github/workflows/codeql-csharp-analysis.yml @@ -53,7 +53,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. @@ -67,6 +67,7 @@ jobs: 3.1.x 5.0.x 6.0.x + 7.0.x # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/codeql-java-analysis.yml b/.github/workflows/codeql-java-analysis.yml index c42c627a0cd..5117e2404a9 100644 --- a/.github/workflows/codeql-java-analysis.yml +++ b/.github/workflows/codeql-java-analysis.yml @@ -53,7 +53,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. diff --git a/.github/workflows/codeql-js-analysis.yml b/.github/workflows/codeql-js-analysis.yml index 21af5443bb0..6d042b13606 100644 --- a/.github/workflows/codeql-js-analysis.yml +++ b/.github/workflows/codeql-js-analysis.yml @@ -53,7 +53,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. diff --git a/.github/workflows/codeql-py-analysis.yml b/.github/workflows/codeql-py-analysis.yml index 95f986ae7ea..11582400d66 100644 --- a/.github/workflows/codeql-py-analysis.yml +++ b/.github/workflows/codeql-py-analysis.yml @@ -53,7 +53,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. diff --git a/.github/workflows/java-publish-snapshot.yml b/.github/workflows/java-publish-snapshot.yml new file mode 100644 index 00000000000..279a0aff4bc --- /dev/null +++ b/.github/workflows/java-publish-snapshot.yml @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. + +name: "Publish Snapshot to Maven" +on: + workflow_dispatch: + push: + branches: [ master ] + paths: + - .github/workflows/java-publish-snapshot.yml + - lang/java/** + - pom.xml + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +defaults: + run: + working-directory: lang/java + +jobs: + publish-snapshot: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Cache Local Maven Repository + uses: actions/cache@v3 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: Setup Java + uses: actions/setup-java@v3 + with: + distribution: 'adopt' + java-version: 8 + + - name: Deploy Maven snapshots + env: + ASF_USERNAME: ${{ secrets.NEXUS_USER }} + ASF_PASSWORD: ${{ secrets.NEXUS_PW }} + run: | + echo "apache.snapshots.https$ASF_USERNAME$ASF_PASSWORD" > settings.xml + mvn --settings settings.xml -U -B -e -fae -ntp -DskipTests deploy diff --git a/.github/workflows/maven4.yml b/.github/workflows/maven4.yml new file mode 100644 index 00000000000..c3a04c0e11c --- /dev/null +++ b/.github/workflows/maven4.yml @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: 'Maven 4' +on: + workflow_dispatch: + push: + branches: [ master ] + pull_request: + branches: [ master ] + paths: + - .github/workflows/maven4.yml + - lang/java/** + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + maven4: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Cache Local Maven Repository + uses: actions/cache@v3 + with: + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: Cache Maven 4 Build Cache + uses: actions/cache@v3 + with: + path: ~/.m2/build-cache + key: ${{ runner.os }}-maven-build-cache-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven-build-cache + + - name: Setup Java + uses: actions/setup-java@v3 + with: + distribution: 'adopt' + java-version: '11' + + - name: Setup Maven 4 + uses: stCarolas/setup-maven@v4.5 + with: + maven-version: 4.0.0-alpha-3 + + - name: Test + run: mvn clean package diff --git a/.github/workflows/rat.yml b/.github/workflows/rat.yml index 97c59e88259..43cc19a2ced 100644 --- a/.github/workflows/rat.yml +++ b/.github/workflows/rat.yml @@ -29,7 +29,7 @@ jobs: rat: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Cache Local Maven Repository uses: actions/cache@v2 diff --git a/.github/workflows/spotless.yml b/.github/workflows/spotless.yml index 37b40e903b3..03a0b81dea1 100644 --- a/.github/workflows/spotless.yml +++ b/.github/workflows/spotless.yml @@ -32,7 +32,7 @@ jobs: spotless: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Cache Local Maven Repository uses: actions/cache@v2 diff --git a/.github/workflows/test-lang-c++.yml b/.github/workflows/test-lang-c++.yml index d9a2ca2c1a1..605ad2227a5 100644 --- a/.github/workflows/test-lang-c++.yml +++ b/.github/workflows/test-lang-c++.yml @@ -36,7 +36,7 @@ jobs: test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Install Dependencies run: sudo apt-get install -qqy cppcheck libboost-all-dev libsnappy-dev cmake diff --git a/.github/workflows/test-lang-c.yml b/.github/workflows/test-lang-c.yml index 614728ede1e..cf146f8529a 100644 --- a/.github/workflows/test-lang-c.yml +++ b/.github/workflows/test-lang-c.yml @@ -36,7 +36,7 @@ jobs: test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Install Dependencies run: sudo apt-get install -qqy libjansson-dev libsnappy-dev @@ -76,7 +76,7 @@ jobs: interop: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Install Dependencies run: | diff --git a/.github/workflows/test-lang-csharp.yml b/.github/workflows/test-lang-csharp.yml index b436f270c88..579e5c30b3e 100644 --- a/.github/workflows/test-lang-csharp.yml +++ b/.github/workflows/test-lang-csharp.yml @@ -36,21 +36,22 @@ jobs: test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Add libzstd shell: bash run: sudo apt-get install -y libzstd-dev - name: Install .NET SDKs - uses: actions/setup-dotnet@v1 + uses: actions/setup-dotnet@v3 with: dotnet-version: | 3.1.x 5.0.x 6.0.x + 7.0.x - - uses: actions/cache@v2 + - uses: actions/cache@v3 with: path: ~/.nuget/packages key: ${{ runner.os }}-nuget-${{ hashFiles('**/packages.lock.json') }} @@ -63,38 +64,26 @@ jobs: - name: Test run: ./build.sh test - # Build and test against .NET 7 - # .NET 7 is not released yet, however this is a good way to test if the project is ready for the release - # Once .NET 7 is officially released, this can be removed and 7.0.x can be used instead above - - name: Install .NET SDK 7.0 (pre-release) - uses: actions/setup-dotnet@v1 - with: - include-prerelease: true - dotnet-version: | - 7.0.x - - - name: Test .NET 7.0 (pre-release) - run: ./build.sh test - interop: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Add libzstd shell: bash run: sudo apt-get install -y libzstd-dev - name: Install .NET SDKs - uses: actions/setup-dotnet@v1 + uses: actions/setup-dotnet@v3 with: dotnet-version: | 3.1.x 5.0.x 6.0.x + 7.0.x - name: Cache Local Maven Repository - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: ~/.m2/repository key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} diff --git a/.github/workflows/test-lang-java.yml b/.github/workflows/test-lang-java.yml index 01b9a597ef0..ddfde4bc6be 100644 --- a/.github/workflows/test-lang-java.yml +++ b/.github/workflows/test-lang-java.yml @@ -46,7 +46,7 @@ jobs: - '18' - '19' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Cache Local Maven Repository uses: actions/cache@v2 @@ -87,7 +87,7 @@ jobs: - '11' - '17' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Cache Local Maven Repository uses: actions/cache@v2 @@ -115,7 +115,7 @@ jobs: - name: Install Python Dependencies run: | - python3 -m pip install --upgrade pip setuptools tox-wheel + python3 -m pip install --upgrade pip setuptools tox python3 -m pip install python-snappy zstandard - name: Setup C# for Generating Interop Data @@ -125,6 +125,7 @@ jobs: 3.1.x 5.0.x 6.0.x + 7.0.x - name: Install Java Avro for Interop Test working-directory: . diff --git a/.github/workflows/test-lang-js.yml b/.github/workflows/test-lang-js.yml index 5efb677b710..3ab7d3d98f0 100644 --- a/.github/workflows/test-lang-js.yml +++ b/.github/workflows/test-lang-js.yml @@ -43,7 +43,7 @@ jobs: - 14 - 16 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup Node uses: actions/setup-node@v2 with: @@ -72,7 +72,7 @@ jobs: - 14 - 16 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup Node uses: actions/setup-node@v2 with: @@ -96,7 +96,7 @@ jobs: libzstd-dev - name: Install Python Dependencies run: | - python3 -m pip install --upgrade pip setuptools tox-wheel + python3 -m pip install --upgrade pip setuptools tox python3 -m pip install python-snappy zstandard - name: Create Interop Data Directory diff --git a/.github/workflows/test-lang-perl.yml b/.github/workflows/test-lang-perl.yml index f5243340516..8e3a3cbab40 100644 --- a/.github/workflows/test-lang-perl.yml +++ b/.github/workflows/test-lang-perl.yml @@ -41,7 +41,7 @@ jobs: perl: - '5.32' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: shogo82148/actions-setup-perl@v1 with: @@ -87,7 +87,7 @@ jobs: perl: - '5.32' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: shogo82148/actions-setup-perl@v1 with: diff --git a/.github/workflows/test-lang-php.yml b/.github/workflows/test-lang-php.yml index 15c1ee7d6fe..23987fd9f16 100644 --- a/.github/workflows/test-lang-php.yml +++ b/.github/workflows/test-lang-php.yml @@ -44,7 +44,7 @@ jobs: - '8.0' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup PHP uses: shivammathur/setup-php@v2 @@ -53,7 +53,7 @@ jobs: - name: Get Composer Cache Directory id: composer-cache - run: echo "::set-output name=dir::$(composer config cache-files-dir)" + run: echo "dir=$(composer config cache-files-dir)" >> $GITHUB_OUTPUT - uses: actions/cache@v2 with: @@ -79,7 +79,7 @@ jobs: - '8.0' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup PHP uses: shivammathur/setup-php@v2 @@ -106,7 +106,7 @@ jobs: working-directory: lang/java/avro run: mvn -B -P interop-data-generate generate-resources - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: repository: kjdev/php-ext-zstd path: lang/php/php-ext-zstd @@ -122,7 +122,7 @@ jobs: echo "extension=zstd.so" | sudo tee -a /etc/php/${{ matrix.php }}/cli/conf.d/10-zstd.ini php -m - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: repository: kjdev/php-ext-snappy path: lang/php/php-ext-snappy diff --git a/.github/workflows/test-lang-py.yml b/.github/workflows/test-lang-py.yml index 48b793ede8f..833b7bf48bc 100644 --- a/.github/workflows/test-lang-py.yml +++ b/.github/workflows/test-lang-py.yml @@ -50,7 +50,7 @@ jobs: - 'pypy-3.6' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup Python uses: actions/setup-python@v2 @@ -67,7 +67,7 @@ jobs: - name: Install Dependencies run: | - python3 -m pip install --upgrade pip setuptools tox-wheel + python3 -m pip install --upgrade pip setuptools tox - name: Lint if: ${{ matrix.python == '3.10' }} @@ -97,7 +97,7 @@ jobs: - 'pypy-3.6' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Setup Python uses: actions/setup-python@v2 @@ -114,7 +114,7 @@ jobs: - name: Install Dependencies run: | - python3 -m pip install --upgrade pip setuptools tox-wheel + python3 -m pip install --upgrade pip setuptools tox python3 -m pip install python-snappy zstandard - name: Cache Local Maven Repository diff --git a/.github/workflows/test-lang-ruby.yml b/.github/workflows/test-lang-ruby.yml index b846781ea2f..e9d84469373 100644 --- a/.github/workflows/test-lang-ruby.yml +++ b/.github/workflows/test-lang-ruby.yml @@ -43,15 +43,16 @@ jobs: - '2.7' - '3.0' - '3.1' + - '3.2' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby }} - name: Install Dependencies - run: sudo apt-get install -qqy bundler libsnappy-dev + run: sudo apt-get install -qqy libsnappy-dev - uses: actions/cache@v2 with: @@ -83,15 +84,16 @@ jobs: - '2.7' - '3.0' - '3.1' + - '3.2' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby }} - name: Install Dependencies - run: sudo apt-get install -qqy bundler libsnappy-dev + run: sudo apt-get install -qqy libsnappy-dev - uses: actions/cache@v2 with: diff --git a/.github/workflows/test-lang-rust-audit.yml b/.github/workflows/test-lang-rust-audit.yml index 73f348d4f50..9ca10b0b538 100644 --- a/.github/workflows/test-lang-rust-audit.yml +++ b/.github/workflows/test-lang-rust-audit.yml @@ -27,6 +27,12 @@ on: - lang/rust/Cargo.toml - lang/rust/Cargo.lock +permissions: + contents: read + +env: + RUSTFLAGS: -Dwarnings + defaults: run: working-directory: lang/rust @@ -40,13 +46,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v2 - # Currently does not work. See https://github.com/actions-rs/audit-check/issues/194 - #- name: Rust Audit - # uses: actions-rs/audit-check@v1 - # with: - # token: ${{ secrets.GITHUB_TOKEN }} - # Install it manually + uses: actions/checkout@v4 - name: Dependency Review if: github.event_name == 'pull_request' uses: actions/dependency-review-action@v3 diff --git a/.github/workflows/test-lang-rust-ci.yml b/.github/workflows/test-lang-rust-ci.yml index 12c6ad273c9..fb13754d19f 100644 --- a/.github/workflows/test-lang-rust-ci.yml +++ b/.github/workflows/test-lang-rust-ci.yml @@ -26,6 +26,12 @@ on: - .github/workflows/test-lang-rust-ci.yml - lang/rust/** +permissions: + contents: read + +env: + RUSTFLAGS: -Dwarnings + defaults: run: working-directory: lang/rust @@ -40,17 +46,17 @@ jobs: strategy: matrix: rust: - - stable - - beta - - nightly - - 1.60.0 # MSRV + - 'stable' + - 'beta' + - 'nightly' + - '1.65.0' # MSRV target: - x86_64-unknown-linux-gnu - wasm32-unknown-unknown steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Cache Cargo uses: actions/cache@v2 @@ -68,62 +74,59 @@ jobs: key: ${{ runner.os }}-target-cache1-${{ matrix.rust }}- - name: Rust Toolchain - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@nightly with: - profile: minimal toolchain: ${{ matrix.rust }} - override: true components: rustfmt - target: ${{ matrix.target }} + targets: ${{ matrix.target }} + + - name: Cache cargo-rdme + if: matrix.rust == 'stable' && matrix.target == 'x86_64-unknown-linux-gnu' + uses: actions/cache@v3 + with: + path: ~/.cargo-${{ matrix.rust }}/cargo-rdme + key: cargo-rdme- + + # Check if the doc cumment in avro/src/lib.rs and avro/README.md are in sync. + - name: Run cargo-rdme + # The result is environment independent so one test pattern is enough. + if: matrix.rust == 'stable' && matrix.target == 'x86_64-unknown-linux-gnu' + run: | + cargo install --root ~/.cargo-${{ matrix.rust }}/cargo-rdme --locked cargo-rdme + export PATH=$PATH:~/.cargo-${{ matrix.rust }}/cargo-rdme/bin + cargo rdme --check - name: Rust Format if: matrix.target != 'wasm32-unknown-unknown' - uses: actions-rs/cargo@v1 - with: - command: fmt - args: --manifest-path lang/rust/Cargo.toml --all -- --check + run: cargo fmt --all -- --check - name: Rust Build - uses: actions-rs/cargo@v1 - with: - command: build - args: --manifest-path lang/rust/Cargo.toml --all-features --all-targets + run: cargo build --all-features --all-targets - name: Rust Test if: matrix.target != 'wasm32-unknown-unknown' - uses: actions-rs/cargo@v1 - with: - command: test - args: --manifest-path lang/rust/Cargo.toml --all-features --target ${{ matrix.target }} + run: cargo test --all-features --target ${{ matrix.target }} - name: Rust Test AVRO-3549 if: matrix.target != 'wasm32-unknown-unknown' - uses: actions-rs/cargo@v1 - with: - command: test - args: --manifest-path lang/rust/Cargo.toml --target ${{ matrix.target }} test_avro_3549_read_not_enabled_codec + run: cargo test --target ${{ matrix.target }} test_avro_3549_read_not_enabled_codec # because of https://github.com/rust-lang/cargo/issues/6669 - name: Rust Test docs if: matrix.target != 'wasm32-unknown-unknown' - uses: actions-rs/cargo@v1 - with: - command: test - args: --manifest-path lang/rust/Cargo.toml --doc + run: cargo test --doc interop: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Rust Toolchain - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@nightly with: - profile: minimal toolchain: stable - override: true - name: Cache Cargo uses: actions/cache@v2 @@ -202,15 +205,13 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Rust Toolchain - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@nightly with: - profile: minimal toolchain: stable - override: true - target: wasm32-unknown-unknown + targets: wasm32-unknown-unknown - name: Cache Cargo uses: actions/cache@v2 diff --git a/.github/workflows/test-lang-rust-clippy.yml b/.github/workflows/test-lang-rust-clippy.yml index c5bc52a5aca..e0287863160 100644 --- a/.github/workflows/test-lang-rust-clippy.yml +++ b/.github/workflows/test-lang-rust-clippy.yml @@ -26,6 +26,12 @@ on: - .github/workflows/test-lang-rust-clippy.yml - lang/rust/** +permissions: + contents: read + +env: + RUSTFLAGS: -Dwarnings + defaults: run: working-directory: lang/rust @@ -37,14 +43,15 @@ concurrency: jobs: clippy_check: runs-on: ubuntu-latest + strategy: + matrix: + rust: + - 'stable' + - '1.65.0' # MSRV steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@nightly with: - toolchain: stable + toolchain: ${{ matrix.rust }} components: clippy - override: true - - uses: actions-rs/clippy-check@v1 - with: - token: ${{ secrets.GITHUB_TOKEN }} - args: --manifest-path lang/rust/Cargo.toml --all-features --all-targets -- -Dclippy::all -Dunused_imports + - run: cargo clippy --all-features --all-targets -- -Dclippy::all -Dunused_imports diff --git a/.mvn/extensions.xml b/.mvn/extensions.xml new file mode 100644 index 00000000000..c6000969d38 --- /dev/null +++ b/.mvn/extensions.xml @@ -0,0 +1,25 @@ + + + + + org.apache.maven.extensions + maven-build-cache-extension + 1.0.0 + + diff --git a/BUILD.md b/BUILD.md index 61d26b2cd0f..ce0939e2732 100644 --- a/BUILD.md +++ b/BUILD.md @@ -6,7 +6,7 @@ The following packages must be installed before Avro can be built: - Java: JDK 1.8, Maven 3 or better, protobuf-compile - PHP: php7, phpunit, php7-gmp - - Python 3: 3.6 or greater + - Python 3: 3.7 or greater, tox (tox will install other dependencies as needed) - C: gcc, cmake, asciidoc, source-highlight, Jansson, pkg-config - C++: cmake 3.7.2 or greater, g++, flex, bison, libboost-dev - C#: .NET Core 2.2 SDK @@ -17,6 +17,7 @@ The following packages must be installed before Avro can be built: Math::BigInt, JSON::XS, Try::Tiny, Regexp::Common, Encode, IO::String, Object::Tiny, Compress::ZLib, Error::Simple, Test::More, Test::Exception, Test::Pod + - Rust: rustc and Cargo 1.65.0 or greater - Apache Ant 1.7 - md5sum, sha1sum, used by top-level dist target diff --git a/build.sh b/build.sh index 52ee9cd30c3..572b4e1cb50 100755 --- a/build.sh +++ b/build.sh @@ -56,6 +56,9 @@ DOCKER_BUILD_XTRA_ARGS=${DOCKER_BUILD_XTRA_ARGS-} # Override the docker image name used. DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME-} +# When building a docker container, these are the files that will sent and available. +DOCKER_EXTRA_CONTEXT="lang/ruby/Gemfile lang/ruby/avro.gemspec lang/ruby/Manifest share/VERSION.txt" + usage() { echo "Usage: $0 {lint|test|dist|sign|clean|veryclean|docker [--args \"docker-args\"]|rat|githooks|docker-test}" exit 1 @@ -208,7 +211,13 @@ do \! -name '*.asc' \! -name '*.txt' ); do (cd "${f%/*}" && shasum -a 512 "${f##*/}") > "$f.sha512" - gpg --passphrase "$password" --armor --output "$f.asc" --detach-sig "$f" + + if [ -z "$GPG_LOCAL_USER" ]; then + gpg --pinentry-mode loopback --passphrase "$password" --armor --output "$f.asc" --detach-sig "$f" + else + gpg --pinentry-mode loopback --local-user="$GPG_LOCAL_USER" --passphrase "$password" --armor --output "$f.asc" --detach-sig "$f" + fi + done set -x @@ -300,8 +309,9 @@ do echo "RUN getent group $GROUP_ID || groupadd -g $GROUP_ID $USER_NAME" echo "RUN getent passwd $USER_ID || useradd -g $GROUP_ID -u $USER_ID -k /root -m $USER_NAME" } > Dockerfile + # Include the ruby gemspec for preinstallation. # shellcheck disable=SC2086 - tar -cf- lang/ruby/Gemfile Dockerfile | docker build $DOCKER_BUILD_XTRA_ARGS -t "$DOCKER_IMAGE_NAME" - + tar -cf- Dockerfile $DOCKER_EXTRA_CONTEXT | docker build $DOCKER_BUILD_XTRA_ARGS -t "$DOCKER_IMAGE_NAME" - rm Dockerfile # By mapping the .m2 directory you can do an mvn install from # within the container and use the result on your normal @@ -336,7 +346,7 @@ do ;; docker-test) - tar -cf- share/docker/Dockerfile lang/ruby/Gemfile | + tar -cf- share/docker/Dockerfile $DOCKER_EXTRA_CONTEXT | docker build -t avro-test -f share/docker/Dockerfile - docker run --rm -v "${PWD}:/avro${DOCKER_MOUNT_FLAG}" --env "JAVA=${JAVA:-8}" avro-test /avro/share/docker/run-tests.sh ;; diff --git a/doc/README.md b/doc/README.md index db2a8454996..31f167d8712 100644 --- a/doc/README.md +++ b/doc/README.md @@ -1,6 +1,11 @@ # Apache Avro website This website is base on [Hugo](https://gohugo.io) and uses the [Docsy](https://www.docsy.dev/) theme. +Before building the website, you need to initialize submodules. + +``` +git submodule update --init --recursive +``` ## Previewing the website locally diff --git a/doc/content/en/blog/news/new-committer-christophe-le-saec.md b/doc/content/en/blog/news/new-committer-christophe-le-saec.md new file mode 100755 index 00000000000..1522c1722b9 --- /dev/null +++ b/doc/content/en/blog/news/new-committer-christophe-le-saec.md @@ -0,0 +1,41 @@ +--- +title: "New committer: Christophe Le Saec" +linkTitle: "New committer: Christophe Le Saec" +date: 2023-08-09 +--- + + + +The Project Management Committee (PMC) for Apache Avro has invited Christophe +Le Saec to become a committer and we are pleased to announce that +he has accepted. + +Christophe definitely puts in the work and, has an impressive breadth of +knowledge about the languages of the Avro SDK! + +As an ASF project, we tend to be very conservative about making changes, and +Christophe brings in fresh ideas and very quickly proposes concrete +implementations to prove them. He has a good understanding of Avro, the +motivation to move things forward, and the expertise to make changes! At the +same time, he's easy to talk to and flexible in coming to a consensus. + +Thanks for all your hard work! diff --git a/doc/content/en/blog/news/new-committer-oscar-westra-van-holthe-kind.md b/doc/content/en/blog/news/new-committer-oscar-westra-van-holthe-kind.md new file mode 100755 index 00000000000..535a2d88185 --- /dev/null +++ b/doc/content/en/blog/news/new-committer-oscar-westra-van-holthe-kind.md @@ -0,0 +1,41 @@ +--- +title: "New committer: Oscar Westra van Holthe - Kind" +linkTitle: "New committer: Oscar Westra van Holthe - Kind" +date: 2023-08-09 +--- + + + +The Project Management Committee (PMC) for Apache Avro has invited Oscar +Westra van Holthe - Kind to become a committer and we are pleased to announce that +he has accepted. + +Oscar has done some really solid work on the IDL and JavaCC parts of the Java +SDK. We trust his work and think it's exceptionally high quality. From the +start, he has already doing much of the work of a committer, demonstrated by +his continuous presence in commenting JIRA, reviewing PRs as well as +encouraging and insightful words on the mailing list. + +As a bonus, in his spare time, Oscar also maintains the IntelliJ plugin for +[IDL support](https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support)! + +Thanks for all your hard work, and welcome! diff --git a/doc/content/en/blog/news/new-pmc-michael-a-smith.md b/doc/content/en/blog/news/new-pmc-michael-a-smith.md new file mode 100755 index 00000000000..2d203128eca --- /dev/null +++ b/doc/content/en/blog/news/new-pmc-michael-a-smith.md @@ -0,0 +1,34 @@ +--- +title: "New PMC member: Michael A. Smith" +linkTitle: "New PMC member: Michael A. Smith" +date: 2023-08-09 +--- + + + +The Project Management Committee (PMC) for Apache Avro has invited Michael A. +Smith to the PMC and we are pleased to announce that he has accepted. + +Notably, Michael has taken a leadership role in ensuring the quality of the +Python SDK, lending his expertise to ensure that Avro has a place in the +python community, while keeping our implementation up-to-date with standards +and modern versions. It's not an easy task, and we appreciate all he does! diff --git a/doc/content/en/docs/++version++/IDL Language/_index.md b/doc/content/en/docs/++version++/IDL Language/_index.md index 6ca0deb7b82..8fe3d0592ac 100644 --- a/doc/content/en/docs/++version++/IDL Language/_index.md +++ b/doc/content/en/docs/++version++/IDL Language/_index.md @@ -174,6 +174,7 @@ Some of the logical types supported by Avro's JSON format are also supported by * _date_ (logical type [date]({{< relref "specification#date" >}})) * _time_ms_ (logical type [time-millis]({{< relref "specification#time-millisecond-precision" >}})) * _timestamp_ms_ (logical type [timestamp-millis]({{< relref "specification#timestamp-millisecond-precision" >}})) +* _uuid_ (logical type [uuid]({{< relref "specification#uuid" >}})) For example: ```java @@ -183,6 +184,7 @@ record Job { time_ms submitTime; timestamp_ms finishTime; decimal(9,2) finishRatio; + uuid pk = "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8"; } ``` @@ -203,7 +205,7 @@ record Card { Suit suit; // refers to the enum Card defined above int number; } -``` +``` ### Default Values Default values for fields may be optionally specified by using an equals sign after the field name followed by a JSON expression indicating the default value. This JSON is interpreted as described in the [spec]({{< relref "specification#schema-record" >}}). @@ -270,7 +272,7 @@ Comments that begin with _/**_ are used as the documentation string for the type Occasionally, one will need to use a reserved language keyword as an identifier. In order to do so, backticks (`) may be used to escape the identifier. For example, to define a message with the literal name error, you may write: ```java void `error`(); -``` +``` This syntax is allowed anywhere an identifier is expected. ### Annotations for Ordering and Namespaces @@ -283,13 +285,13 @@ record MyRecord { string @order("descending") myDescendingField; string @order("ignore") myIgnoredField; } -``` +``` A field's type (with the exception of type references) may also be preceded by annotations, e.g.: ```java record MyRecord { @java-class("java.util.ArrayList") array myStrings; } -``` +``` This can be used to support java classes that can be serialized/deserialized via their `toString`/`String constructor`, e.g.: ```java record MyRecord { diff --git a/doc/content/en/docs/++version++/Specification/_index.md b/doc/content/en/docs/++version++/Specification/_index.md index 6b28010ad0e..7cc5a17547e 100755 --- a/doc/content/en/docs/++version++/Specification/_index.md +++ b/doc/content/en/docs/++version++/Specification/_index.md @@ -75,6 +75,8 @@ Records use the type name "record" and support the following attributes: * _name_: a JSON string providing the name of the field (required), and * _doc_: a JSON string describing this field for users (optional). * _type_: a [schema]({{< ref "#schema-declaration" >}} "Schema declaration"), as defined above + * _order_: specifies how this field impacts sort ordering of this record (optional). Valid values are "ascending" (the default), "descending", or "ignore". For more details on how this is used, see the sort order section below. + * _aliases_: a JSON array of strings, providing alternate names for this field (optional). * _default_: A default value for this field, only used when reading instances that lack the field for schema evolution purposes. The presence of a default value does not make the field optional at encoding time. Permitted values depend on the field's schema type, according to the table below. Default values for union fields correspond to the first schema in the union. Default values for bytes and fixed fields are JSON strings, where Unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255. Avro encodes a field even if its value is equal to its default. *field default values* @@ -93,9 +95,6 @@ Records use the type name "record" and support the following attributes: | map | object | `{"a": 1}` | | fixed | string | `"\u00ff"` | - * _order_: specifies how this field impacts sort ordering of this record (optional). Valid values are "ascending" (the default), "descending", or "ignore". For more details on how this is used, see the sort order section below. - * _aliases_: a JSON array of strings, providing alternate names for this field (optional). - For example, a linked-list of 64-bit values may be defined with: ```jsonc { @@ -394,12 +393,12 @@ For example, the union schema `["null","string","Foo"]`, where Foo is a record n Note that the original schema is still required to correctly process JSON-encoded data. For example, the JSON encoding does not distinguish between _int_ and _long_, _float_ and _double_, records and maps, enums and strings, etc. -#### Single-object encoding +### Single-object encoding In some situations a single Avro serialized object is to be stored for a longer period of time. One very common example is storing Avro records for several weeks in an [Apache Kafka](https://kafka.apache.org/) topic. In the period after a schema change this persistence system will contain records that have been written with different schemas. So the need arises to know which schema was used to write a record to support schema evolution correctly. In most cases the schema itself is too large to include in the message, so this binary wrapper format supports the use case more effectively. -##### Single object encoding specification +#### Single object encoding specification Single Avro objects are encoded as follows: 1. A two-byte marker, `C3 01`, to show that the message is Avro and uses this single-record format (version 1). @@ -460,7 +459,7 @@ A file header is thus described by the following schema: "fields" : [ {"name": "magic", "type": {"type": "fixed", "name": "Magic", "size": 4}}, {"name": "meta", "type": {"type": "map", "values": "bytes"}}, - {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}}, + {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} ] } ``` @@ -472,7 +471,18 @@ A file data block consists of: * The serialized objects. If a codec is specified, this is compressed by that codec. * The file's 16-byte sync marker. -Thus, each block's binary data can be efficiently extracted or skipped without deserializing the contents. The combination of block size, object counts, and sync markers enable detection of corrupt blocks and help ensure data integrity. +A file data block is thus described by the following schema: +```json +{"type": "record", "name": "org.apache.avro.file.DataBlock", + "fields" : [ + {"name": "count", "type": "long"}, + {"name": "data", "type": "bytes"}, + {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} + ] +} +``` + +Each block's binary data can be efficiently extracted or skipped without deserializing the contents. The combination of block size, object counts, and sync markers enable detection of corrupt blocks and help ensure data integrity. ### Required Codecs diff --git a/doc/content/en/docs/++version++/api-py.md b/doc/content/en/docs/++version++/api-py.md new file mode 100644 index 00000000000..fb4f4ba13b5 --- /dev/null +++ b/doc/content/en/docs/++version++/api-py.md @@ -0,0 +1,29 @@ +--- +title: "Python API" +linkTitle: "Python API" +weight: 104 +manualLink: /docs/++version++/api/py/html/ +--- + + + +The Python API documentation can be found here. diff --git a/doc/examples/java-example/pom.xml b/doc/examples/java-example/pom.xml index e0ed51f6358..73602f96fb8 100644 --- a/doc/examples/java-example/pom.xml +++ b/doc/examples/java-example/pom.xml @@ -38,7 +38,7 @@ org.apache.avro avro - 1.11.1 + 1.11.3 @@ -55,7 +55,7 @@ org.apache.avro avro-maven-plugin - 1.11.1 + 1.11.3 generate-sources @@ -72,7 +72,7 @@ org.apache.maven.plugins maven-plugin - 1.11.1 + 1.11.3 1.8 1.8 @@ -92,7 +92,7 @@ org.apache.avro avro-maven-plugin - [1.11.1,) + [1.11.3,) schema diff --git a/doc/examples/mr-example/pom.xml b/doc/examples/mr-example/pom.xml index 7eafd21d35d..3cc50dbbd8d 100644 --- a/doc/examples/mr-example/pom.xml +++ b/doc/examples/mr-example/pom.xml @@ -45,7 +45,7 @@ org.apache.avro avro-maven-plugin - 1.11.1 + 1.11.3 generate-sources @@ -73,7 +73,7 @@ org.apache.avro avro-maven-plugin - [1.11.1,) + [1.11.3,) schema @@ -94,12 +94,12 @@ org.apache.avro avro - 1.11.1 + 1.11.3 org.apache.avro avro-mapred - 1.11.1 + 1.11.3 org.apache.hadoop diff --git a/lang/c++/api/Reader.hh b/lang/c++/api/Reader.hh index ca6a719e31c..588a912648a 100644 --- a/lang/c++/api/Reader.hh +++ b/lang/c++/api/Reader.hh @@ -84,7 +84,7 @@ public: union { double d; uint64_t i; - } v; + } v = { 0 }; reader_.read(v.i); val = v.d; } diff --git a/lang/c++/api/buffer/Buffer.hh b/lang/c++/api/buffer/Buffer.hh index bc3baf12330..45c439d6d43 100644 --- a/lang/c++/api/buffer/Buffer.hh +++ b/lang/c++/api/buffer/Buffer.hh @@ -145,7 +145,7 @@ public: **/ size_type wroteTo(size_type size) { - int wrote = 0; + size_type wrote = 0; if (size) { if (size > freeSpace()) { throw std::length_error("Impossible to write more data than free space"); diff --git a/lang/c++/impl/avrogencpp.cc b/lang/c++/impl/avrogencpp.cc index 01c4cdbf3c3..94f7befdc37 100644 --- a/lang/c++/impl/avrogencpp.cc +++ b/lang/c++/impl/avrogencpp.cc @@ -53,12 +53,6 @@ using boost::lexical_cast; using avro::compileJsonSchema; using avro::ValidSchema; -#if __cplusplus >= 201703L -#define ANY_NS "std" -#else -#define ANY_NS "boost" -#endif - struct PendingSetterGetter { string structName; string type; @@ -84,6 +78,8 @@ class CodeGen { const std::string headerFile_; const std::string includePrefix_; const bool noUnion_; + const bool useCpp17_; + std::string anyNs; const std::string guardString_; boost::mt19937 random_; @@ -108,16 +104,25 @@ class CodeGen { void generateRecordTraits(const NodePtr &n); void generateUnionTraits(const NodePtr &n); void emitCopyright(); + void emitGeneratedWarning(); public: CodeGen(std::ostream &os, std::string ns, std::string schemaFile, std::string headerFile, std::string guardString, - std::string includePrefix, bool noUnion) : unionNumber_(0), os_(os), inNamespace_(false), ns_(std::move(ns)), + std::string includePrefix, bool noUnion, bool useCpp17) : unionNumber_(0), os_(os), inNamespace_(false), ns_(std::move(ns)), schemaFile_(std::move(schemaFile)), headerFile_(std::move(headerFile)), - includePrefix_(std::move(includePrefix)), noUnion_(noUnion), + includePrefix_(std::move(includePrefix)), noUnion_(noUnion), useCpp17_(useCpp17), guardString_(std::move(guardString)), - random_(static_cast(::time(nullptr))) {} + random_(static_cast(::time(nullptr))) + { +#if __cplusplus >= 201703L + anyNs = "std"; +#else + anyNs = (useCpp17) ? "std" : "boost"; +#endif + } + void generate(const ValidSchema &schema); }; @@ -318,7 +323,7 @@ string CodeGen::unionName() { static void generateGetterAndSetter(ostream &os, const string &structName, const string &type, const string &name, - size_t idx) { + size_t idx, const std::string& anyNs) { string sn = " " + structName + "::"; os << "inline\n"; @@ -326,9 +331,9 @@ static void generateGetterAndSetter(ostream &os, os << type << sn << "get_" << name << "() const {\n" << " if (idx_ != " << idx << ") {\n" << " throw avro::Exception(\"Invalid type for " - << "union\");\n" + << "union " << structName << "\");\n" << " }\n" - << " return " << ANY_NS << "::any_cast<" << type << " >(value_);\n" + << " return " << anyNs << "::any_cast<" << type << " >(value_);\n" << "}\n\n"; os << "inline\n" @@ -385,7 +390,7 @@ string CodeGen::generateUnionType(const NodePtr &n) { os_ << "struct " << result << " {\n" << "private:\n" << " size_t idx_;\n" - << " " << ANY_NS << "::any value_;\n" + << " " << anyNs << "::any value_;\n" << "public:\n" << " size_t idx() const { return idx_; }\n"; @@ -397,7 +402,7 @@ string CodeGen::generateUnionType(const NodePtr &n) { << " }\n" << " void set_null() {\n" << " idx_ = " << i << ";\n" - << " value_ = " << ANY_NS << "::any();\n" + << " value_ = " << anyNs << "::any();\n" << " }\n"; } else { const string &type = types[i]; @@ -702,30 +707,38 @@ void CodeGen::emitCopyright() { " * See the License for the specific language governing " "permissions and\n" " * limitations under the License.\n" - " */\n\n\n"; + " */\n\n"; +} + +void CodeGen::emitGeneratedWarning() { + os_ << "/* This code was generated by avrogencpp " << AVRO_VERSION << ". Do not edit.*/\n\n"; } string CodeGen::guard() { string h = headerFile_; makeCanonical(h, true); - return h + "_" + lexical_cast(random_()) + "__H_"; + return h + "_" + lexical_cast(random_()) + "_H"; } void CodeGen::generate(const ValidSchema &schema) { emitCopyright(); + emitGeneratedWarning(); string h = guardString_.empty() ? guard() : guardString_; os_ << "#ifndef " << h << "\n"; os_ << "#define " << h << "\n\n\n"; - os_ << "#include \n" + os_ << "#include \n"; #if __cplusplus >= 201703L - << "#include \n" + os_ << "#include \n"; #else - << "#include \"boost/any.hpp\"\n" + if (useCpp17_) + os_ << "#include \n"; + else + os_ << "#include \"boost/any.hpp\"\n"; #endif - << "#include \"" << includePrefix_ << "Specific.hh\"\n" + os_ << "#include \"" << includePrefix_ << "Specific.hh\"\n" << "#include \"" << includePrefix_ << "Encoder.hh\"\n" << "#include \"" << includePrefix_ << "Decoder.hh\"\n" << "\n"; @@ -748,7 +761,7 @@ void CodeGen::generate(const ValidSchema &schema) { pendingGettersAndSetters.begin(); it != pendingGettersAndSetters.end(); ++it) { generateGetterAndSetter(os_, it->structName, it->type, it->name, - it->idx); + it->idx, anyNs); } for (vector::const_iterator it = @@ -810,8 +823,14 @@ int main(int argc, char **argv) { const string NO_UNION_TYPEDEF("no-union-typedef"); po::options_description desc("Allowed options"); - desc.add_options()("help,h", "produce help message")("version,V", "produce version information")("include-prefix,p", po::value()->default_value("avro"), - "prefix for include headers, - for none, default: avro")("no-union-typedef,U", "do not generate typedefs for unions in records")("namespace,n", po::value(), "set namespace for generated code")("input,i", po::value(), "input file")("output,o", po::value(), "output file to generate"); + desc.add_options()("help,h", "produce help message") + ("version,V", "produce version information") + ("include-prefix,p", po::value()->default_value("avro"), "prefix for include headers, - for none, default: avro") + ("no-union-typedef,U", "do not generate typedefs for unions in records") + ("namespace,n", po::value(), "set namespace for generated code") + ("cpp17", "use c++17 instead of boost") + ("input,i", po::value(), "input file") + ("output,o", po::value(), "output file to generate"); po::variables_map vm; po::store(po::parse_command_line(argc, argv, desc), vm); @@ -837,6 +856,8 @@ int main(int argc, char **argv) { string inf = vm.count(IN_FILE) > 0 ? vm[IN_FILE].as() : string(); string incPrefix = vm[INCLUDE_PREFIX].as(); bool noUnion = vm.count(NO_UNION_TYPEDEF) != 0; + bool useCpp17 = vm.count("cpp17") != 0; + if (incPrefix == "-") { incPrefix.clear(); } else if (*incPrefix.rbegin() != '/') { @@ -856,9 +877,9 @@ int main(int argc, char **argv) { if (!outf.empty()) { string g = readGuard(outf); ofstream out(outf.c_str()); - CodeGen(out, ns, inf, outf, g, incPrefix, noUnion).generate(schema); + CodeGen(out, ns, inf, outf, g, incPrefix, noUnion, useCpp17).generate(schema); } else { - CodeGen(std::cout, ns, inf, outf, "", incPrefix, noUnion).generate(schema); + CodeGen(std::cout, ns, inf, outf, "", incPrefix, noUnion, useCpp17).generate(schema); } return 0; } catch (std::exception &e) { diff --git a/lang/c/tests/CMakeLists.txt b/lang/c/tests/CMakeLists.txt index 2e84a06a31c..1413a3f376f 100644 --- a/lang/c/tests/CMakeLists.txt +++ b/lang/c/tests/CMakeLists.txt @@ -64,6 +64,7 @@ add_avro_executable(test_interop_data) add_avro_test_checkmem(test_data_structures) add_avro_test_checkmem(test_avro_schema) +add_avro_test_checkmem(test_avro_commons_schema) add_avro_test_checkmem(test_avro_schema_names) add_avro_test_checkmem(test_avro_values) add_avro_test_checkmem(test_avro_766) diff --git a/lang/c/tests/test_avro_commons_schema.c b/lang/c/tests/test_avro_commons_schema.c new file mode 100644 index 00000000000..e3751e9836a --- /dev/null +++ b/lang/c/tests/test_avro_commons_schema.c @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#include "avro.h" +#include "avro_private.h" +#include +#include +#include +#include +#ifdef _WIN32 + #include "msdirent.h" +#else + #include +#endif + +avro_writer_t avro_stderr; + +static avro_schema_t read_common_schema_test(const char *dirpath) { + char schemafilepath[1024]; + char jsontext[4096]; + + avro_schema_t schema; + int n = snprintf(schemafilepath, sizeof(schemafilepath), "%s/schema.json", dirpath); + if (n < 0) { + fprintf(stderr, "Size of dir path is too long %s !\n", dirpath); + exit(EXIT_FAILURE); + } + FILE* fp = fopen(schemafilepath, "r"); + if (!fp) { + fprintf(stderr, "can't open file %s !\n", schemafilepath); + exit(EXIT_FAILURE); + } + int rval = fread(jsontext, 1, sizeof(jsontext) - 1, fp); + fclose(fp); + jsontext[rval] = '\0'; + + int test_rval = avro_schema_from_json(jsontext, 0, &schema, NULL); + if (test_rval != 0) { + fprintf(stderr, "fail! Can' read schema from file %s\n", schemafilepath); + exit(EXIT_FAILURE); + } + return schema; +} + +static void create_writer(avro_schema_t schema, avro_file_writer_t* writer) +{ + // create / reset copy.avro file. + FILE* copyFile = fopen("./copy.avro", "w"); + if (!copyFile) { + fprintf(stderr, "can't create file copy.avro !\n"); + exit(EXIT_FAILURE); + } + fclose(copyFile); + + // create avro writer on file. + if (avro_file_writer_create("./copy.avro", schema, writer)) { + fprintf(stdout, "\nThere was an error creating db: %s", avro_strerror()); + exit(EXIT_FAILURE); + } +} + +static void read_data(const char *dirpath, avro_schema_t schema) { + char datafilepath[1024]; + int n = snprintf(datafilepath, sizeof(datafilepath), "%s/data.avro", dirpath); + if (n < 0) { + fprintf(stderr, "Size of dir path is too long %s/data.avro !\n", dirpath); + exit(EXIT_FAILURE); + } + + avro_file_reader_t reader; + avro_datum_t datum; + int rval = avro_file_reader(datafilepath, &reader); + if (rval) { + exit(EXIT_FAILURE); + } + + avro_file_writer_t writer; + create_writer(schema, &writer); + + int records_read = 0; + while ((rval = avro_file_reader_read(reader, schema, &datum)) == 0) { + records_read++; + if (avro_file_writer_append(writer, datum)) { + fprintf(stdout, "\nCan't write record: %s\n", avro_strerror()); + exit(EXIT_FAILURE); + } + + avro_datum_decref(datum); + } + fprintf(stdout, "\nExit run test OK => %d records", records_read); + remove("./copy.avro"); + fflush(stdout); + avro_file_reader_close(reader); + avro_file_writer_close(writer); +} + +static void run_tests(const char *dirpath) +{ + fprintf(stdout, "\nRun test for path '%s'", dirpath); + avro_schema_t schema = read_common_schema_test(dirpath); + read_data(dirpath, schema); + avro_schema_decref(schema); +} + + + +int main(int argc, char *argv[]) +{ + char *srcdir = "../../../share/test/data/schemas"; + AVRO_UNUSED(argc); + AVRO_UNUSED(argv); + + avro_stderr = avro_writer_file(stderr); + + DIR* dir = opendir(srcdir); + if (dir == NULL) { + fprintf(stdout, "Unable to open '%s'\n", srcdir); + fflush(stdout); + exit(EXIT_FAILURE); + } + struct dirent *dent; + do { + dent = readdir(dir); + + if (dent && dent->d_name[0] != '.' && dent->d_type == DT_DIR) { + char filepath[1024]; + snprintf(filepath, sizeof(filepath), "%s/%s", srcdir, dent->d_name); + run_tests(filepath); + } + } + while(dent != NULL); + closedir(dir); + + avro_writer_free(avro_stderr); + return EXIT_SUCCESS; +} diff --git a/lang/csharp/README.md b/lang/csharp/README.md index 70fc90e86da..6f923fe0273 100644 --- a/lang/csharp/README.md +++ b/lang/csharp/README.md @@ -17,20 +17,20 @@ Install-Package Apache.Avro ## Project Target Frameworks -| Project | Published to nuget.org | Type | .NET Standard 2.0 | .NET Standard 2.1 | .NET Core 3.1 | .NET 5.0 | .NET 6.0 | -|:-------------------:|:--------------------------:|:----------:|:------------------:|:-----------------:|:-------------:|:---------:|:---------:| -| Avro.main | Apache.Avro | Library | ✔️ | ✔️ | | | | -| Avro.File.Snappy | Apache.Avro.File.Snappy | Library | ✔️ | ✔️ | | | | -| Avro.File.BZip2 | Apache.Avro.File.BZip2 | Library | ✔️ | ✔️ | | | | -| Avro.File.XZ | Apache.Avro.File.XZ | Library | ✔️ | ✔️ | | | | -| Avro.File.Zstandard | Apache.Avro.File.Zstandard | Library | ✔️ | ✔️ | | | | -| Avro.codegen | Apache.Avro.Tools | Exe | | | ✔️ |✔️ |✔️ | -| Avro.ipc | | Library | ✔️ | ✔️ | | | | -| Avro.ipc.test | | Unit Tests | | | ✔️ |✔️ |✔️ | -| Avro.msbuild | | Library | ✔️ | ✔️ | | | | -| Avro.perf | | Exe | | | ✔️ |✔️ |✔️ | -| Avro.test | | Unit Tests | | | ✔️ |✔️ |✔️ | -| Avro.benchmark | | Exe | | | ✔️ |✔️ |✔️ | +| Project | Published to nuget.org | Type | .NET Standard 2.0 | .NET Standard 2.1 | .NET Core 3.1 | .NET 5.0 | .NET 6.0 | .NET 7.0 | +|:-------------------:|:--------------------------:|:----------:|:------------------:|:-----------------:|:-------------:|:---------:|:---------:|:---------:| +| Avro.main | Apache.Avro | Library | ✔️ | ✔️ | | | | | +| Avro.File.Snappy | Apache.Avro.File.Snappy | Library | ✔️ | ✔️ | | | | | +| Avro.File.BZip2 | Apache.Avro.File.BZip2 | Library | ✔️ | ✔️ | | | | | +| Avro.File.XZ | Apache.Avro.File.XZ | Library | ✔️ | ✔️ | | | | | +| Avro.File.Zstandard | Apache.Avro.File.Zstandard | Library | ✔️ | ✔️ | | | | | +| Avro.codegen | Apache.Avro.Tools | Exe | | | ✔️ |✔️ |✔️ |✔️ | +| Avro.ipc | | Library | ✔️ | ✔️ | | | | | +| Avro.ipc.test | | Unit Tests | | | ✔️ |✔️ |✔️ |✔️ | +| Avro.msbuild | | Library | ✔️ | ✔️ | | | | | +| Avro.perf | | Exe | | | ✔️ |✔️ |✔️ |✔️ | +| Avro.test | | Unit Tests | | | ✔️ |✔️ |✔️ |✔️ | +| Avro.benchmark | | Exe | | | ✔️ |✔️ |✔️ |✔️ | ## Dependency package version strategy diff --git a/lang/csharp/build.sh b/lang/csharp/build.sh index c65e08d5df1..623ef03a353 100755 --- a/lang/csharp/build.sh +++ b/lang/csharp/build.sh @@ -42,7 +42,7 @@ do perf) pushd ./src/apache/perf/ - dotnet run --configuration Release --framework net6.0 + dotnet run --configuration Release --framework net7.0 ;; dist) @@ -77,7 +77,7 @@ do ;; interop-data-generate) - dotnet run --project src/apache/test/Avro.test.csproj --framework net6.0 ../../share/test/schemas/interop.avsc ../../build/interop/data + dotnet run --project src/apache/test/Avro.test.csproj --framework net7.0 ../../share/test/schemas/interop.avsc ../../build/interop/data ;; interop-data-test) diff --git a/lang/csharp/common.props b/lang/csharp/common.props index 72a79fda086..1874ea2a100 100644 --- a/lang/csharp/common.props +++ b/lang/csharp/common.props @@ -37,9 +37,7 @@ - - netcoreapp3.1;net5.0;net6.0 - net7.0 + netcoreapp3.1;net5.0;net6.0;net7.0 netstandard2.0;netstandard2.1 @@ -51,6 +49,7 @@ Copyright © 2019 The Apache Software Foundation. logo.png LICENSE + README.md https://avro.apache.org/ Avro;Apache;Serialization;Binary;Json;Schema https://github.com/apache/avro.git @@ -58,9 +57,16 @@ + + + + false + true + + false true diff --git a/lang/csharp/src/apache/benchmark/Avro.benchmark.csproj b/lang/csharp/src/apache/benchmark/Avro.benchmark.csproj index 5b38895a18c..b944de3c2d4 100644 --- a/lang/csharp/src/apache/benchmark/Avro.benchmark.csproj +++ b/lang/csharp/src/apache/benchmark/Avro.benchmark.csproj @@ -31,6 +31,12 @@ false + + + + $(NoWarn);CS8981 + + diff --git a/lang/csharp/src/apache/benchmark/Program.cs b/lang/csharp/src/apache/benchmark/Program.cs index 4381897e24e..5b63517623c 100644 --- a/lang/csharp/src/apache/benchmark/Program.cs +++ b/lang/csharp/src/apache/benchmark/Program.cs @@ -21,8 +21,8 @@ namespace Avro.Benchmark { public class Program { - // dotnet run -c Release -f net6.0 - // dotnet run -c Release -f net6.0 --runtimes netcoreapp3.1 net5.0 net6.0 + // dotnet run -c Release -f net7.0 + // dotnet run -c Release -f net7.0 --runtimes netcoreapp3.1 net5.0 net6.0 net7.0 public static void Main(string[] args) { BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args); diff --git a/lang/csharp/src/apache/main/CodeGen/CodeGen.cs b/lang/csharp/src/apache/main/CodeGen/CodeGen.cs index e579d8bb07c..7e793627201 100644 --- a/lang/csharp/src/apache/main/CodeGen/CodeGen.cs +++ b/lang/csharp/src/apache/main/CodeGen/CodeGen.cs @@ -1136,6 +1136,50 @@ public virtual void WriteCompileUnit(string outputFile) } } + /// + /// Gets names and generated code of the schema(s) types + /// + /// + public virtual IDictionary GetTypes() + { + using (var cscp = new CSharpCodeProvider()) + { + var opts = new CodeGeneratorOptions + { + BracingStyle = "C", IndentString = "\t", BlankLinesBetweenMembers = false + }; + CodeNamespaceCollection nsc = CompileUnit.Namespaces; + + var sourceCodeByName = new Dictionary(); + for (int i = 0; i < nsc.Count; i++) + { + var ns = nsc[i]; + + var new_ns = new CodeNamespace(ns.Name); + new_ns.Comments.Add(CodeGenUtil.Instance.FileComment); + foreach (CodeNamespaceImport nci in CodeGenUtil.Instance.NamespaceImports) + { + new_ns.Imports.Add(nci); + } + + var types = ns.Types; + for (int j = 0; j < types.Count; j++) + { + var ctd = types[j]; + using (var writer = new StringWriter()) + { + new_ns.Types.Add(ctd); + cscp.GenerateCodeFromNamespace(new_ns, writer, opts); + new_ns.Types.Remove(ctd); + sourceCodeByName[ctd.Name] = writer.ToString(); + } + } + } + + return sourceCodeByName; + } + } + /// /// Writes each types in each namespaces into individual files. /// diff --git a/lang/csharp/src/apache/main/IO/Encoder.cs b/lang/csharp/src/apache/main/IO/Encoder.cs index 84a2099a195..0c1712af430 100644 --- a/lang/csharp/src/apache/main/IO/Encoder.cs +++ b/lang/csharp/src/apache/main/IO/Encoder.cs @@ -187,5 +187,10 @@ public interface Encoder /// Position within data where the contents start. /// Number of bytes to write. void WriteFixed(byte[] data, int start, int len); + + /// + /// Flushes the encoder. + /// + void Flush(); } } diff --git a/lang/csharp/src/apache/main/IO/JsonEncoder.cs b/lang/csharp/src/apache/main/IO/JsonEncoder.cs index 48415d7cddd..c159a013e8c 100644 --- a/lang/csharp/src/apache/main/IO/JsonEncoder.cs +++ b/lang/csharp/src/apache/main/IO/JsonEncoder.cs @@ -28,7 +28,7 @@ namespace Avro.IO /// An for Avro's JSON data encoding. /// /// JsonEncoder buffers output, and data may not appear on the output until - /// is called. + /// is called. /// /// JsonEncoder is not thread-safe. /// diff --git a/lang/csharp/src/apache/main/Specific/SpecificDatumWriter.cs b/lang/csharp/src/apache/main/Specific/SpecificDatumWriter.cs index bfc88847176..c823253692d 100644 --- a/lang/csharp/src/apache/main/Specific/SpecificDatumWriter.cs +++ b/lang/csharp/src/apache/main/Specific/SpecificDatumWriter.cs @@ -176,6 +176,7 @@ public void WriteArrayValues(object array, WriteItem valueWriter, Encoder encode var list = (IList) array; for (int i = 0; i < list.Count; i++ ) { + encoder.StartItem(); valueWriter(list[i], encoder); } } diff --git a/lang/csharp/src/apache/test/CodGen/CodeGenTest.cs b/lang/csharp/src/apache/test/CodGen/CodeGenTest.cs index e514347206e..f8eef4a9aba 100644 --- a/lang/csharp/src/apache/test/CodGen/CodeGenTest.cs +++ b/lang/csharp/src/apache/test/CodGen/CodeGenTest.cs @@ -18,6 +18,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Text.RegularExpressions; using Microsoft.CodeAnalysis.CSharp; using NUnit.Framework; @@ -81,6 +82,33 @@ public void TestGenerateNamesException() Protocol protocol = null; Assert.Throws(() => this.GenerateNames(protocol)); } + + + [Test] + public void GetTypesShouldReturnTypes() + { + AddSchema(@" +{ + ""name"": ""PlanetEnum"", + ""namespace"": ""Space.Models"", + ""type"": ""enum"", + ""symbols"": [ + ""Earth"", + ""Mars"", + ""Jupiter"", + ""Saturn"", + ""Uranus"", + ""Neptune"" + ] +} +"); + GenerateCode(); + var types = GetTypes(); + Assert.That(types.Count, Is.EqualTo(1)); + bool hasPlanetEnumCode = types.TryGetValue("PlanetEnum", out string planetEnumCode); + Assert.That(hasPlanetEnumCode); + Assert.That(Regex.Matches(planetEnumCode, "public enum PlanetEnum").Count, Is.EqualTo(1)); + } } } } diff --git a/lang/csharp/src/apache/test/IO/JsonCodecTests.cs b/lang/csharp/src/apache/test/IO/JsonCodecTests.cs index 58d047f5ef4..28aab10e70c 100644 --- a/lang/csharp/src/apache/test/IO/JsonCodecTests.cs +++ b/lang/csharp/src/apache/test/IO/JsonCodecTests.cs @@ -17,12 +17,14 @@ */ using System; +using System.Collections.Generic; using NUnit.Framework; using System.IO; using System.Linq; using System.Text; using Avro.Generic; using Avro.IO; +using Avro.Specific; using Newtonsoft.Json; using Newtonsoft.Json.Linq; @@ -235,7 +237,6 @@ public void TestJsonUnionWithLogicalTypes(String value) public void TestJsonUnionWithRecord(String value) { Schema schema = Schema.Parse( - "[\"null\",\n" + " { \"type\": \"int\", \"logicalType\": \"date\" },\n" + " {\"type\":\"record\",\"name\":\"myrecord\", \"namespace\":\"com\"," + @@ -285,14 +286,38 @@ public void TestJsonDecoderReorderFields() decoder.SkipArray(); } + [Test] + public void TestJsonDecoderSpecificWithArray() + { + Root data = new Root(); + Item item = new Item { id = 123456 }; + data.myarray = new List { item }; + + DatumWriter writer = new SpecificDatumWriter(data.Schema); + + ByteBufferOutputStream bbos = new ByteBufferOutputStream(); + + Encoder encoder = new JsonEncoder(data.Schema, bbos); + writer.Write(data, encoder); + encoder.Flush(); + + List listStreams = bbos.GetBufferList(); + + using (StreamReader reader = new StreamReader(listStreams[0])) + { + String output = reader.ReadToEnd(); + Assert.AreEqual("{\"myarray\":[{\"id\":123456}]}", output); + } + } + private byte[] fromJsonToAvro(string json, Schema schema) { DatumReader reader = new GenericDatumReader(schema, schema); GenericDatumWriter writer = new GenericDatumWriter(schema); MemoryStream output = new MemoryStream(); - JsonDecoder decoder = new JsonDecoder(schema, json); - BinaryEncoder encoder = new BinaryEncoder(output); + Decoder decoder = new JsonDecoder(schema, json); + Encoder encoder = new BinaryEncoder(output); object datum = reader.Read(null, decoder); @@ -326,4 +351,85 @@ private string fromDatumToJson(object datum, Schema schema, bool includeNamespac return Encoding.UTF8.GetString(output.ToArray()); } } + + public partial class Root : global::Avro.Specific.ISpecificRecord + { + public static global::Avro.Schema _SCHEMA = global::Avro.Schema.Parse( + "{\"type\":\"record\",\"name\":\"Root\",\"namespace\":\"Avro.Test\",\"fields\":[{\"name\":\"myarray" + + "\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Item\",\"namespace\":\"Avr" + + "o.Test\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"}]}}}]}"); + + private IList _myarray; + + public virtual global::Avro.Schema Schema + { + get { return Root._SCHEMA; } + } + + public IList myarray + { + get { return this._myarray; } + set { this._myarray = value; } + } + + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.myarray; + default: throw new global::Avro.AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + } + } + + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: + this.myarray = (IList)fieldValue; + break; + default: throw new global::Avro.AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + } + } + } + + public partial class Item : global::Avro.Specific.ISpecificRecord + { + public static global::Avro.Schema _SCHEMA = global::Avro.Schema.Parse( + "{\"type\":\"record\",\"name\":\"Item\",\"namespace\":\"Avro.Test\",\"fields\":[{\"name\":\"id\",\"ty" + + "pe\":\"long\"}]}"); + + private long _id; + + public virtual global::Avro.Schema Schema + { + get { return Item._SCHEMA; } + } + + public long id + { + get { return this._id; } + set { this._id = value; } + } + + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.id; + default: throw new global::Avro.AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + } + } + + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: + this.id = (System.Int64)fieldValue; + break; + default: throw new global::Avro.AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + } + } + } } diff --git a/lang/csharp/versions.props b/lang/csharp/versions.props index 170e609db36..4acdaa9d759 100644 --- a/lang/csharp/versions.props +++ b/lang/csharp/versions.props @@ -26,14 +26,14 @@ !!! SHIPPED CLASS LIBRARIES SHOULD USE MINIMUMVERSIONs FOR SOME LIBRARIES. SEE BELOW !!! --> - 12.0.3 - 6.0.0 + 13.0.1 + 7.0.0 4.3.0 4.7.0 4.7.0 - 1.3.3 + 1.4.1 1.3.0 4.1.0 1.1.7 @@ -55,21 +55,19 @@ Please sort the packages alphabetically --> - 0.13.1 - 3.1.2 - 3.1.2 - 17.1.0 - 17.1.0 - 4.1.0 - 4.1.0 - 4.1.0 - - 6.0.0 - 7.0.0-preview* - 17.1.0 - 3.13.2 - 3.15.0 - 4.2.1 + 0.13.2 + 3.2.0 + 3.2.0 + 17.4.0 + 17.4.0 + 4.3.1 + 4.3.1 + 4.3.1 + 7.0.1 + 17.4.0 + 3.13.3 + 3.15.2 + 4.3.0 1.1.118 diff --git a/lang/java/android/pom.xml b/lang/java/android/pom.xml index 1ec1f8eb107..c05c14e5682 100644 --- a/lang/java/android/pom.xml +++ b/lang/java/android/pom.xml @@ -22,7 +22,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../pom.xml diff --git a/lang/java/archetypes/avro-service-archetype/pom.xml b/lang/java/archetypes/avro-service-archetype/pom.xml index a1e160f450e..72a773bb627 100644 --- a/lang/java/archetypes/avro-service-archetype/pom.xml +++ b/lang/java/archetypes/avro-service-archetype/pom.xml @@ -23,7 +23,7 @@ avro-archetypes-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../pom.xml diff --git a/lang/java/archetypes/pom.xml b/lang/java/archetypes/pom.xml index 922ff619612..cb35096f038 100644 --- a/lang/java/archetypes/pom.xml +++ b/lang/java/archetypes/pom.xml @@ -22,7 +22,7 @@ org.apache.avro avro-parent - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../pom.xml diff --git a/lang/java/avro/pom.xml b/lang/java/avro/pom.xml index 0b148a140b8..ee7b525c94b 100644 --- a/lang/java/avro/pom.xml +++ b/lang/java/avro/pom.xml @@ -23,7 +23,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../ diff --git a/lang/java/avro/src/main/java/org/apache/avro/Conversion.java b/lang/java/avro/src/main/java/org/apache/avro/Conversion.java index 4ae75f4a5cb..934672e7d30 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Conversion.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Conversion.java @@ -21,6 +21,9 @@ import java.nio.ByteBuffer; import java.util.Collection; import java.util.Map; +import java.util.ServiceLoader; + +import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericEnumSymbol; import org.apache.avro.generic.GenericFixed; import org.apache.avro.generic.IndexedRecord; @@ -28,23 +31,33 @@ /** * Conversion between generic and logical type instances. *

- * Instances of this class are added to GenericData to convert a logical type to - * a particular representation. + * Instances of this class can be added to GenericData to convert a logical type + * to a particular representation. This can be done manually, using + * {@link GenericData#addLogicalTypeConversion(Conversion)}, or automatically. + * This last option uses the Java {@link ServiceLoader}, and requires the + * implementation to be a public class with a public no-arg constructor, be + * named in a file called {@code /META-INF/services/org.apache.avro.Conversion}, + * and both must available in the classpath. *

- * Implementations must provide: * {@link #getConvertedType()}: get the Java - * class used for the logical type * {@link #getLogicalTypeName()}: get the - * logical type this implements + * Implementations must provide: + *

    + *
  • {@link #getConvertedType()}: get the Java class used for the logical + * type
  • + *
  • {@link #getLogicalTypeName()}: get the logical type this implements
  • + *
*

- * Subclasses must also override all of the conversion methods for Avro's base - * types that are valid for the logical type, or else risk causing + * Subclasses must also override the conversion methods for Avro's base types + * that are valid for the logical type, or else risk causing * {@code UnsupportedOperationException} at runtime. *

* Optionally, use {@link #getRecommendedSchema()} to provide a Schema that will - * be used when a Schema is generated for the class returned by - * {@code getConvertedType}. + * be used when generating a Schema for the class. This is useful when using + * {@code ReflectData} or {@code ProtobufData}, for example. * - * @param a Java type that generic data is converted to + * @param a Java type that can represent the named logical type + * @see ServiceLoader */ +@SuppressWarnings("unused") public abstract class Conversion { /** @@ -65,9 +78,9 @@ public abstract class Conversion { * Certain logical types may require adjusting the code within the "setter" * methods to make sure the data that is set is properly formatted. This method * allows the Conversion to generate custom setter code if required. - * - * @param varName - * @param valParamName + * + * @param varName the name of the variable holding the converted value + * @param valParamName the name of the parameter with the new converted value * @return a String for the body of the setter method */ public String adjustAndSetValue(String varName, String valParamName) { @@ -102,7 +115,7 @@ public T fromCharSequence(CharSequence value, Schema schema, LogicalType type) { throw new UnsupportedOperationException("fromCharSequence is not supported for " + type.getName()); } - public T fromEnumSymbol(GenericEnumSymbol value, Schema schema, LogicalType type) { + public T fromEnumSymbol(GenericEnumSymbol value, Schema schema, LogicalType type) { throw new UnsupportedOperationException("fromEnumSymbol is not supported for " + type.getName()); } @@ -150,7 +163,7 @@ public CharSequence toCharSequence(T value, Schema schema, LogicalType type) { throw new UnsupportedOperationException("toCharSequence is not supported for " + type.getName()); } - public GenericEnumSymbol toEnumSymbol(T value, Schema schema, LogicalType type) { + public GenericEnumSymbol toEnumSymbol(T value, Schema schema, LogicalType type) { throw new UnsupportedOperationException("toEnumSymbol is not supported for " + type.getName()); } diff --git a/lang/java/avro/src/main/java/org/apache/avro/Conversions.java b/lang/java/avro/src/main/java/org/apache/avro/Conversions.java index 1c28c9adb81..7d01fc62a37 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Conversions.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Conversions.java @@ -106,11 +106,12 @@ public GenericFixed toFixed(BigDecimal value, Schema schema, LogicalType type) { byte fillByte = (byte) (value.signum() < 0 ? 0xFF : 0x00); byte[] unscaled = value.unscaledValue().toByteArray(); byte[] bytes = new byte[schema.getFixedSize()]; - int offset = bytes.length - unscaled.length; + int unscaledLength = unscaled.length; + int offset = bytes.length - unscaledLength; - // Fill the front of the array and copy remaining with unscaled values + // Fill the front with the filler and copy the unscaled value into the remainder Arrays.fill(bytes, 0, offset, fillByte); - System.arraycopy(unscaled, 0, bytes, offset, bytes.length - offset); + System.arraycopy(unscaled, 0, bytes, offset, unscaledLength); return new GenericData.Fixed(schema, bytes); } @@ -147,7 +148,7 @@ private static BigDecimal validate(final LogicalTypes.Decimal decimal, BigDecima } /** - * Convert a underlying representation of a logical type (such as a ByteBuffer) + * Convert an underlying representation of a logical type (such as a ByteBuffer) * to a higher level object (such as a BigDecimal). * * @param datum The object to be converted. @@ -157,9 +158,9 @@ private static BigDecimal validate(final LogicalTypes.Decimal decimal, BigDecima * @param conversion The tool used to finish the conversion. Cannot be null if * datum is not null. * @return The result object, which is a high level object of the logical type. - * If a null datum is passed in, a null value will be returned. - * @throws IllegalArgumentException if a null schema, type or conversion is - * passed in while datum is not null. + * The null datum always converts to a null value. + * @throws IllegalArgumentException if datum is not null, but schema, type or + * conversion is. */ public static Object convertToLogicalType(Object datum, Schema schema, LogicalType type, Conversion conversion) { if (datum == null) { @@ -176,9 +177,9 @@ public static Object convertToLogicalType(Object datum, Schema schema, LogicalTy case RECORD: return conversion.fromRecord((IndexedRecord) datum, schema, type); case ENUM: - return conversion.fromEnumSymbol((GenericEnumSymbol) datum, schema, type); + return conversion.fromEnumSymbol((GenericEnumSymbol) datum, schema, type); case ARRAY: - return conversion.fromArray((Collection) datum, schema, type); + return conversion.fromArray((Collection) datum, schema, type); case MAP: return conversion.fromMap((Map) datum, schema, type); case FIXED: @@ -201,13 +202,13 @@ public static Object convertToLogicalType(Object datum, Schema schema, LogicalTy return datum; } catch (ClassCastException e) { throw new AvroRuntimeException( - "Cannot convert " + datum + ":" + datum.getClass().getSimpleName() + ": expected generic type", e); + "Cannot convert " + datum + ':' + datum.getClass().getSimpleName() + ": expected generic type", e); } } /** * Convert a high level representation of a logical type (such as a BigDecimal) - * to the its underlying representation object (such as a ByteBuffer) + * to its underlying representation object (such as a ByteBuffer) * * @param datum The object to be converted. * @param schema The schema of datum. Cannot be null if datum is not null. @@ -218,8 +219,8 @@ public static Object convertToLogicalType(Object datum, Schema schema, LogicalTy * @return The result object, which is an underlying representation object of * the logical type. If the input param datum is null, a null value will * be returned. - * @throws IllegalArgumentException if a null schema, type or conversion is - * passed in while datum is not null. + * @throws IllegalArgumentException if datum is not null, but schema, type or + * conversion is. */ public static Object convertToRawType(Object datum, Schema schema, LogicalType type, Conversion conversion) { if (datum == null) { @@ -262,7 +263,7 @@ public static Object convertToRawType(Object datum, Schema schema, LogicalTy return datum; } catch (ClassCastException e) { throw new AvroRuntimeException( - "Cannot convert " + datum + ":" + datum.getClass().getSimpleName() + ": expected logical type", e); + "Cannot convert " + datum + ':' + datum.getClass().getSimpleName() + ": expected logical type", e); } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/Protocol.java b/lang/java/avro/src/main/java/org/apache/avro/Protocol.java index ff996889517..e01a3c73ea9 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Protocol.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Protocol.java @@ -274,15 +274,29 @@ public Protocol(Protocol p) { public Protocol(String name, String doc, String namespace) { super(PROTOCOL_RESERVED); - this.name = name; + setName(name, namespace); this.doc = doc; - this.namespace = namespace; } public Protocol(String name, String namespace) { this(name, null, namespace); } + private void setName(String name, String namespace) { + int lastDot = name.lastIndexOf('.'); + if (lastDot < 0) { + this.name = name; + this.namespace = namespace; + } else { + this.name = name.substring(lastDot + 1); + this.namespace = name.substring(0, lastDot); + } + if (this.namespace != null && this.namespace.isEmpty()) { + this.namespace = null; + } + types.space(this.namespace); + } + /** The name of this protocol. */ public String getName() { return name; @@ -488,20 +502,22 @@ private static Protocol parse(JsonParser parser) { } private void parse(JsonNode json) { - parseNamespace(json); - parseName(json); + parseNameAndNamespace(json); parseTypes(json); parseMessages(json); parseDoc(json); parseProps(json); } - private void parseNamespace(JsonNode json) { - JsonNode nameNode = json.get("namespace"); - if (nameNode == null) - return; // no namespace defined - this.namespace = nameNode.textValue(); - types.space(this.namespace); + private void parseNameAndNamespace(JsonNode json) { + JsonNode nameNode = json.get("protocol"); + if (nameNode == null) { + throw new SchemaParseException("No protocol name specified: " + json); + } + JsonNode namespaceNode = json.get("namespace"); + String namespace = namespaceNode == null ? null : namespaceNode.textValue(); + + setName(nameNode.textValue(), namespace); } private void parseDoc(JsonNode json) { @@ -515,13 +531,6 @@ private String parseDocNode(JsonNode json) { return nameNode.textValue(); } - private void parseName(JsonNode json) { - JsonNode nameNode = json.get("protocol"); - if (nameNode == null) - throw new SchemaParseException("No protocol name specified: " + json); - this.name = nameNode.textValue(); - } - private void parseTypes(JsonNode json) { JsonNode defs = json.get("types"); if (defs == null) diff --git a/lang/java/avro/src/main/java/org/apache/avro/Schema.java b/lang/java/avro/src/main/java/org/apache/avro/Schema.java index f6c3de7684e..ad6bf0ca61c 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Schema.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Schema.java @@ -26,11 +26,13 @@ import com.fasterxml.jackson.databind.node.DoubleNode; import com.fasterxml.jackson.databind.node.NullNode; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.Serializable; import java.io.StringWriter; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -1285,8 +1287,7 @@ private static class FixedSchema extends NamedSchema { public FixedSchema(Name name, String doc, int size) { super(Type.FIXED, name, doc); - if (size < 0) - throw new IllegalArgumentException("Invalid fixed size: " + size); + SystemLimitException.checkMaxBytesLength(size); this.size = size; } @@ -1429,7 +1430,7 @@ public boolean getValidateDefaults() { * names known to this parser. */ public Schema parse(File file) throws IOException { - return parse(FACTORY.createParser(file)); + return parse(FACTORY.createParser(file), false); } /** @@ -1437,7 +1438,7 @@ public Schema parse(File file) throws IOException { * names known to this parser. The input stream stays open after the parsing. */ public Schema parse(InputStream in) throws IOException { - return parse(FACTORY.createParser(in).disable(JsonParser.Feature.AUTO_CLOSE_SOURCE)); + return parse(FACTORY.createParser(in).disable(JsonParser.Feature.AUTO_CLOSE_SOURCE), true); } /** Read a schema from one or more json strings */ @@ -1454,19 +1455,36 @@ public Schema parse(String s, String... more) { */ public Schema parse(String s) { try { - return parse(FACTORY.createParser(s)); + return parse(FACTORY.createParser(s), false); } catch (IOException e) { throw new SchemaParseException(e); } } - private Schema parse(JsonParser parser) throws IOException { + private Schema parse(JsonParser parser, boolean allowDanglingContent) throws IOException { boolean saved = validateNames.get(); boolean savedValidateDefaults = VALIDATE_DEFAULTS.get(); try { validateNames.set(validate); VALIDATE_DEFAULTS.set(validateDefaults); - return Schema.parse(MAPPER.readTree(parser), names); + JsonNode jsonNode = MAPPER.readTree(parser); + Schema schema = Schema.parse(jsonNode, names); + if (!allowDanglingContent) { + String dangling; + StringWriter danglingWriter = new StringWriter(); + int numCharsReleased = parser.releaseBuffered(danglingWriter); + if (numCharsReleased == -1) { + ByteArrayOutputStream danglingOutputStream = new ByteArrayOutputStream(); + parser.releaseBuffered(danglingOutputStream); // if input isnt chars above it must be bytes + dangling = new String(danglingOutputStream.toByteArray(), StandardCharsets.UTF_8).trim(); + } else { + dangling = danglingWriter.toString().trim(); + } + if (!dangling.isEmpty()) { + throw new SchemaParseException("dangling content after end of schema: " + dangling); + } + } + return schema; } catch (JsonParseException e) { throw new SchemaParseException(e); } finally { diff --git a/lang/java/avro/src/main/java/org/apache/avro/SystemLimitException.java b/lang/java/avro/src/main/java/org/apache/avro/SystemLimitException.java new file mode 100644 index 00000000000..a96f812d84d --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/SystemLimitException.java @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro; + +import org.slf4j.LoggerFactory; + +/** + * Thrown to prevent making large allocations when reading potentially + * pathological input data from an untrusted source. + *

+ * The following system properties can be set to limit the size of bytes, + * strings and collection types to be allocated: + *

    + *
  • org.apache.avro.limits.byte.maxLength
  • limits the maximum + * size of byte types. + *
  • org.apache.avro.limits.collectionItems.maxLength
  • limits the + * maximum number of map and list items that can be read at + * once single sequence. + *
  • org.apache.avro.limits.string.maxLength
  • limits the maximum + * size of string types. + *
+ * + * The default is to permit sizes up to {@link #MAX_ARRAY_VM_LIMIT}. + */ +public class SystemLimitException extends AvroRuntimeException { + + /** + * The maximum length of array to allocate (unless necessary). Some VMs reserve + * some header words in an array. Attempts to allocate larger arrays may result + * in {@code OutOfMemoryError: Requested array size exceeds VM limit} + * + * @see JDK-8246725 + */ + // VisibleForTesting + static final int MAX_ARRAY_VM_LIMIT = Integer.MAX_VALUE - 8; + + public static final String MAX_BYTES_LENGTH_PROPERTY = "org.apache.avro.limits.bytes.maxLength"; + public static final String MAX_COLLECTION_LENGTH_PROPERTY = "org.apache.avro.limits.collectionItems.maxLength"; + public static final String MAX_STRING_LENGTH_PROPERTY = "org.apache.avro.limits.string.maxLength"; + + private static int maxBytesLength = MAX_ARRAY_VM_LIMIT; + private static int maxCollectionLength = MAX_ARRAY_VM_LIMIT; + private static int maxStringLength = MAX_ARRAY_VM_LIMIT; + + static { + resetLimits(); + } + + public SystemLimitException(String message) { + super(message); + } + + /** + * Get an integer value stored in a system property, used to configure the + * system behaviour of decoders + * + * @param property The system property to fetch + * @param defaultValue The value to use if the system property is not present or + * parsable as an int + * @return The value from the system property + */ + private static int getLimitFromProperty(String property, int defaultValue) { + String o = System.getProperty(property); + int i = defaultValue; + if (o != null) { + try { + i = Integer.parseUnsignedInt(o); + } catch (NumberFormatException nfe) { + LoggerFactory.getLogger(SystemLimitException.class).warn("Could not parse property " + property + ": " + o, + nfe); + } + } + return i; + } + + /** + * Check to ensure that reading the bytes is within the specified limits. + * + * @param length The proposed size of the bytes to read + * @return The size of the bytes if and only if it is within the limit and + * non-negative. + * @throws UnsupportedOperationException if reading the datum would allocate a + * collection that the Java VM would be + * unable to handle + * @throws SystemLimitException if the decoding should fail because it + * would otherwise result in an allocation + * exceeding the set limit + * @throws AvroRuntimeException if the length is negative + */ + public static int checkMaxBytesLength(long length) { + if (length < 0) { + throw new AvroRuntimeException("Malformed data. Length is negative: " + length); + } + if (length > MAX_ARRAY_VM_LIMIT) { + throw new UnsupportedOperationException( + "Cannot read arrays longer than " + MAX_ARRAY_VM_LIMIT + " bytes in Java library"); + } + if (length > maxBytesLength) { + throw new SystemLimitException("Bytes length " + length + " exceeds maximum allowed"); + } + return (int) length; + } + + /** + * Check to ensure that reading the specified number of items remains within the + * specified limits. + * + * @param existing The number of elements items read in the collection + * @param items The next number of items to read. In normal usage, this is + * always a positive, permitted value. Negative and zero values + * have a special meaning in Avro decoding. + * @return The total number of items in the collection if and only if it is + * within the limit and non-negative. + * @throws UnsupportedOperationException if reading the items would allocate a + * collection that the Java VM would be + * unable to handle + * @throws SystemLimitException if the decoding should fail because it + * would otherwise result in an allocation + * exceeding the set limit + * @throws AvroRuntimeException if the length is negative + */ + public static int checkMaxCollectionLength(long existing, long items) { + long length = existing + items; + if (existing < 0) { + throw new AvroRuntimeException("Malformed data. Length is negative: " + existing); + } + if (items < 0) { + throw new AvroRuntimeException("Malformed data. Length is negative: " + items); + } + if (length > MAX_ARRAY_VM_LIMIT || length < existing) { + throw new UnsupportedOperationException( + "Cannot read collections larger than " + MAX_ARRAY_VM_LIMIT + " items in Java library"); + } + if (length > maxCollectionLength) { + throw new SystemLimitException("Collection length " + length + " exceeds maximum allowed"); + } + return (int) length; + } + + /** + * Check to ensure that reading the string size is within the specified limits. + * + * @param length The proposed size of the string to read + * @return The size of the string if and only if it is within the limit and + * non-negative. + * @throws UnsupportedOperationException if reading the items would allocate a + * collection that the Java VM would be + * unable to handle + * @throws SystemLimitException if the decoding should fail because it + * would otherwise result in an allocation + * exceeding the set limit + * @throws AvroRuntimeException if the length is negative + */ + public static int checkMaxStringLength(long length) { + if (length < 0) { + throw new AvroRuntimeException("Malformed data. Length is negative: " + length); + } + if (length > MAX_ARRAY_VM_LIMIT) { + throw new UnsupportedOperationException("Cannot read strings longer than " + MAX_ARRAY_VM_LIMIT + " bytes"); + } + if (length > maxStringLength) { + throw new SystemLimitException("String length " + length + " exceeds maximum allowed"); + } + return (int) length; + } + + /** Reread the limits from the system properties. */ + // VisibleForTesting + static void resetLimits() { + maxBytesLength = getLimitFromProperty(MAX_BYTES_LENGTH_PROPERTY, MAX_ARRAY_VM_LIMIT); + maxCollectionLength = getLimitFromProperty(MAX_COLLECTION_LENGTH_PROPERTY, MAX_ARRAY_VM_LIMIT); + maxStringLength = getLimitFromProperty(MAX_STRING_LENGTH_PROPERTY, MAX_ARRAY_VM_LIMIT); + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java index d33f8bbf018..875abc7d70d 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java @@ -26,15 +26,15 @@ import java.util.AbstractList; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.HashMap; import java.util.IdentityHashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.ServiceLoader; import java.util.UUID; -import java.util.WeakHashMap; +import java.util.concurrent.ConcurrentMap; import org.apache.avro.AvroMissingFieldException; import org.apache.avro.AvroRuntimeException; @@ -59,6 +59,9 @@ import org.apache.avro.util.internal.Accessor; import com.fasterxml.jackson.databind.JsonNode; +import org.apache.avro.util.springframework.ConcurrentReferenceHashMap; + +import static org.apache.avro.util.springframework.ConcurrentReferenceHashMap.ReferenceType.WEAK; /** * Utilities for generic Java data. See {@link GenericRecordBuilder} for a @@ -115,6 +118,7 @@ public GenericData() { /** For subclasses. GenericData does not use a ClassLoader. */ public GenericData(ClassLoader classLoader) { this.classLoader = (classLoader != null) ? classLoader : getClass().getClassLoader(); + loadConversions(); } /** Return the class loader that's used (by subclasses). */ @@ -122,6 +126,17 @@ public ClassLoader getClassLoader() { return classLoader; } + /** + * Use the Java 6 ServiceLoader to load conversions. + * + * @see #addLogicalTypeConversion(Conversion) + */ + private void loadConversions() { + for (Conversion conversion : ServiceLoader.load(Conversion.class, classLoader)) { + addLogicalTypeConversion(conversion); + } + } + private Map> conversions = new HashMap<>(); private Map, Map>> conversionsByClass = new IdentityHashMap<>(); @@ -132,19 +147,17 @@ public Collection> getConversions() { /** * Registers the given conversion to be used when reading and writing with this - * data model. + * data model. Conversions can also be registered automatically, as documented + * on the class {@link Conversion Conversion<T>}. * * @param conversion a logical type Conversion. */ public void addLogicalTypeConversion(Conversion conversion) { conversions.put(conversion.getLogicalTypeName(), conversion); Class type = conversion.getConvertedType(); - Map> conversions = conversionsByClass.get(type); - if (conversions == null) { - conversions = new LinkedHashMap<>(); - conversionsByClass.put(type, conversions); - } - conversions.put(conversion.getLogicalTypeName(), conversion); + Map> conversionsForClass = conversionsByClass.computeIfAbsent(type, + k -> new LinkedHashMap<>()); + conversionsForClass.put(conversion.getLogicalTypeName(), conversion); } /** @@ -185,11 +198,11 @@ public Conversion getConversionByClass(Class datumClass, LogicalType l * @return the conversion for the logical type, or null */ @SuppressWarnings("unchecked") - public Conversion getConversionFor(LogicalType logicalType) { + public Conversion getConversionFor(LogicalType logicalType) { if (logicalType == null) { return null; } - return (Conversion) conversions.get(logicalType.getName()); + return (Conversion) conversions.get(logicalType.getName()); } public static final String FAST_READER_PROP = "org.apache.avro.fastread"; @@ -1137,6 +1150,73 @@ public int compare(Object o1, Object o2, Schema s) { return compare(o1, o2, s, false); } + protected int compareMaps(final Map m1, final Map m2) { + if (m1 == m2) { + return 0; + } + + if (m1.isEmpty() && m2.isEmpty()) { + return 0; + } + + if (m1.size() != m2.size()) { + return 1; + } + + /** + * Peek at keys, assuming they're all the same type within a Map + */ + final Object key1 = m1.keySet().iterator().next(); + final Object key2 = m2.keySet().iterator().next(); + boolean utf8ToString = false; + boolean stringToUtf8 = false; + + if (key1 instanceof Utf8 && key2 instanceof String) { + utf8ToString = true; + } else if (key1 instanceof String && key2 instanceof Utf8) { + stringToUtf8 = true; + } + + try { + for (Map.Entry e : m1.entrySet()) { + final Object key = e.getKey(); + Object lookupKey = key; + if (utf8ToString) { + lookupKey = key.toString(); + } else if (stringToUtf8) { + lookupKey = new Utf8((String) lookupKey); + } + final Object value = e.getValue(); + if (value == null) { + if (!(m2.get(lookupKey) == null && m2.containsKey(lookupKey))) { + return 1; + } + } else { + final Object value2 = m2.get(lookupKey); + if (value instanceof Utf8 && value2 instanceof String) { + if (!value.toString().equals(value2)) { + return 1; + } + } else if (value instanceof String && value2 instanceof Utf8) { + if (!new Utf8((String) value).equals(value2)) { + return 1; + } + } else { + if (!value.equals(value2)) { + return 1; + } + } + } + } + } catch (ClassCastException unused) { + return 1; + } catch (NullPointerException unused) { + return 1; + } + + return 0; + } + /** * Comparison implementation. When equals is true, only checks for equality, not * for order. @@ -1173,7 +1253,7 @@ protected int compare(Object o1, Object o2, Schema s, boolean equals) { return e1.hasNext() ? 1 : (e2.hasNext() ? -1 : 0); case MAP: if (equals) - return o1.equals(o2) ? 0 : 1; + return compareMaps((Map) o1, (Map) o2); throw new AvroRuntimeException("Can't compare maps!"); case UNION: int i1 = resolveUnion(s, o1); @@ -1190,7 +1270,7 @@ protected int compare(Object o1, Object o2, Schema s, boolean equals) { } } - private final Map defaultValueCache = Collections.synchronizedMap(new WeakHashMap<>()); + private final ConcurrentMap defaultValueCache = new ConcurrentReferenceHashMap<>(128, WEAK); /** * Gets the default value of the given field, if any. @@ -1210,28 +1290,20 @@ public Object getDefaultValue(Field field) { } // Check the cache - Object defaultValue = defaultValueCache.get(field); - // If not cached, get the default Java value by encoding the default JSON // value and then decoding it: - if (defaultValue == null) + return defaultValueCache.computeIfAbsent(field, fieldToGetValueFor -> { try { ByteArrayOutputStream baos = new ByteArrayOutputStream(); BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(baos, null); - Accessor.encode(encoder, field.schema(), json); + Accessor.encode(encoder, fieldToGetValueFor.schema(), json); encoder.flush(); BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(baos.toByteArray(), null); - defaultValue = createDatumReader(field.schema()).read(null, decoder); - - // this MAY result in two threads creating the same defaultValue - // and calling put. The last thread will win. However, - // that's not an issue. - defaultValueCache.put(field, defaultValue); + return createDatumReader(fieldToGetValueFor.schema()).read(null, decoder); } catch (IOException e) { throw new AvroRuntimeException(e); } - - return defaultValue; + }); } private static final Schema STRINGS = Schema.create(Type.STRING); diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java index 051563abaef..3fa675d793a 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java @@ -26,8 +26,8 @@ import org.apache.avro.AvroRuntimeException; import org.apache.avro.InvalidNumberEncodingException; +import org.apache.avro.SystemLimitException; import org.apache.avro.util.Utf8; -import org.slf4j.LoggerFactory; /** * An {@link Decoder} for binary-format data. @@ -39,27 +39,20 @@ * can be accessed by inputStream().remaining(), if the BinaryDecoder is not * 'direct'. *

- * To prevent this class from making large allocations when handling potentially - * pathological input data, set Java properties - * org.apache.avro.limits.string.maxLength and - * org.apache.avro.limits.bytes.maxLength before instantiating this - * class to limit the maximum sizes of string and bytes types - * handled. The default is to permit sizes up to Java's maximum array length. * * @see Encoder + * @see SystemLimitException */ public class BinaryDecoder extends Decoder { /** - * The maximum size of array to allocate. Some VMs reserve some header words in - * an array. Attempts to allocate larger arrays may result in OutOfMemoryError: - * Requested array size exceeds VM limit + * When reading a collection (MAP or ARRAY), this keeps track of the number of + * elements to ensure that the + * {@link SystemLimitException#checkMaxCollectionLength} constraint is + * respected. */ - static final long MAX_ARRAY_SIZE = (long) Integer.MAX_VALUE - 8L; - - private static final String MAX_BYTES_LENGTH_PROPERTY = "org.apache.avro.limits.bytes.maxLength"; - protected final int maxBytesLength; + private long collectionCount = 0L; private ByteSource source = null; // we keep the buffer and its state variables in this class and not in a @@ -99,17 +92,6 @@ void clearBuf() { /** protected constructor for child classes */ protected BinaryDecoder() { super(); - String o = System.getProperty(MAX_BYTES_LENGTH_PROPERTY); - int i = Integer.MAX_VALUE; - if (o != null) { - try { - i = Integer.parseUnsignedInt(o); - } catch (NumberFormatException nfe) { - LoggerFactory.getLogger(BinaryDecoder.class) - .warn("Could not parse property " + MAX_BYTES_LENGTH_PROPERTY + ": " + o, nfe); - } - } - maxBytesLength = i; } BinaryDecoder(InputStream in, int bufferSize) { @@ -300,17 +282,11 @@ public double readDouble() throws IOException { @Override public Utf8 readString(Utf8 old) throws IOException { - long length = readLong(); - if (length > MAX_ARRAY_SIZE) { - throw new UnsupportedOperationException("Cannot read strings longer than " + MAX_ARRAY_SIZE + " bytes"); - } - if (length < 0L) { - throw new AvroRuntimeException("Malformed data. Length is negative: " + length); - } + int length = SystemLimitException.checkMaxStringLength(readLong()); Utf8 result = (old != null ? old : new Utf8()); - result.setByteLength((int) length); - if (0L != length) { - doReadBytes(result.getBytes(), 0, (int) length); + result.setByteLength(length); + if (0 != length) { + doReadBytes(result.getBytes(), 0, length); } return result; } @@ -329,25 +305,16 @@ public void skipString() throws IOException { @Override public ByteBuffer readBytes(ByteBuffer old) throws IOException { - int length = readInt(); - if (length > MAX_ARRAY_SIZE) { - throw new UnsupportedOperationException("Cannot read arrays longer than " + MAX_ARRAY_SIZE + " bytes"); - } - if (length > maxBytesLength) { - throw new AvroRuntimeException("Bytes length " + length + " exceeds maximum allowed"); - } - if (length < 0L) { - throw new AvroRuntimeException("Malformed data. Length is negative: " + length); - } + int length = SystemLimitException.checkMaxBytesLength(readLong()); final ByteBuffer result; if (old != null && length <= old.capacity()) { result = old; ((Buffer) result).clear(); } else { - result = ByteBuffer.allocate(length); + result = ByteBuffer.allocate((int) length); } - doReadBytes(result.array(), result.position(), length); - ((Buffer) result).limit(length); + doReadBytes(result.array(), result.position(), (int) length); + ((Buffer) result).limit((int) length); return result; } @@ -443,7 +410,6 @@ protected long doReadItemCount() throws IOException { * @return Zero if there are no more items to skip and end of array/map is * reached. Positive number if some items are found that cannot be * skipped and the client needs to skip them individually. - * * @throws IOException If the first byte cannot be read for any reason other * than the end of the file, if the input stream has been * closed, or if some other I/O error occurs. @@ -460,12 +426,15 @@ private long doSkipItems() throws IOException { @Override public long readArrayStart() throws IOException { - return doReadItemCount(); + collectionCount = SystemLimitException.checkMaxCollectionLength(0L, doReadItemCount()); + return collectionCount; } @Override public long arrayNext() throws IOException { - return doReadItemCount(); + long length = doReadItemCount(); + collectionCount = SystemLimitException.checkMaxCollectionLength(collectionCount, length); + return length; } @Override @@ -475,12 +444,15 @@ public long skipArray() throws IOException { @Override public long readMapStart() throws IOException { - return doReadItemCount(); + collectionCount = SystemLimitException.checkMaxCollectionLength(0L, doReadItemCount()); + return collectionCount; } @Override public long mapNext() throws IOException { - return doReadItemCount(); + long length = doReadItemCount(); + collectionCount = SystemLimitException.checkMaxCollectionLength(collectionCount, length); + return length; } @Override @@ -932,7 +904,6 @@ public void close() throws IOException { /** * This byte source is special. It will avoid copying data by using the source's * byte[] as a buffer in the decoder. - * */ private static class ByteArrayByteSource extends ByteSource { private static final int MIN_SIZE = 16; diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryDecoder.java index d9bbe93534c..6f07b13eee2 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryDecoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryDecoder.java @@ -22,8 +22,8 @@ import java.io.InputStream; import java.nio.ByteBuffer; -import org.apache.avro.AvroRuntimeException; import org.apache.avro.InvalidNumberEncodingException; +import org.apache.avro.SystemLimitException; import org.apache.avro.util.ByteBufferInputStream; /** @@ -40,30 +40,17 @@ class DirectBinaryDecoder extends BinaryDecoder { private class ByteReader { public ByteBuffer read(ByteBuffer old, int length) throws IOException { - this.checkLength(length); final ByteBuffer result; if (old != null && length <= old.capacity()) { result = old; result.clear(); } else { - result = ByteBuffer.allocate(length); + result = ByteBuffer.allocate((int) length); } - doReadBytes(result.array(), result.position(), length); - result.limit(length); + doReadBytes(result.array(), result.position(), (int) length); + result.limit((int) length); return result; } - - protected final void checkLength(int length) { - if (length < 0L) { - throw new AvroRuntimeException("Malformed data. Length is negative: " + length); - } - if (length > MAX_ARRAY_SIZE) { - throw new UnsupportedOperationException("Cannot read arrays longer than " + MAX_ARRAY_SIZE + " bytes"); - } - if (length > maxBytesLength) { - throw new AvroRuntimeException("Bytes length " + length + " exceeds maximum allowed"); - } - } } private class ReuseByteReader extends ByteReader { @@ -75,14 +62,12 @@ public ReuseByteReader(ByteBufferInputStream bbi) { @Override public ByteBuffer read(ByteBuffer old, int length) throws IOException { - this.checkLength(length); if (old != null) { return super.read(old, length); } else { - return bbi.readBuffer(length); + return bbi.readBuffer((int) length); } } - } private ByteReader byteReader; @@ -170,8 +155,8 @@ public double readDouble() throws IOException { @Override public ByteBuffer readBytes(ByteBuffer old) throws IOException { - int length = readInt(); - return byteReader.read(old, length); + long length = readLong(); + return byteReader.read(old, SystemLimitException.checkMaxBytesLength(length)); } @Override diff --git a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java index a220cedd1e2..66ff6d91b8e 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/specific/SpecificData.java @@ -90,6 +90,8 @@ public class SpecificData extends GenericData { public static final String KEY_CLASS_PROP = "java-key-class"; public static final String ELEMENT_PROP = "java-element-class"; + public static final char RESERVED_WORD_ESCAPE_CHAR = '$'; + /** * Reserved words from * https://docs.oracle.com/javase/specs/jls/se16/html/jls-3.html require @@ -328,8 +330,26 @@ public static String getClassName(Schema schema) { String name = schema.getName(); if (namespace == null || "".equals(namespace)) return name; - String dot = namespace.endsWith("$") ? "" : "."; // back-compatibly handle $ - return namespace + dot + name; + + StringBuilder classNameBuilder = new StringBuilder(); + String[] words = namespace.split("\\."); + + for (int i = 0; i < words.length; i++) { + String word = words[i]; + classNameBuilder.append(word); + + if (RESERVED_WORDS.contains(word)) { + classNameBuilder.append(RESERVED_WORD_ESCAPE_CHAR); + } + + if (i != words.length - 1 || !word.endsWith("$")) { // back-compatibly handle $ + classNameBuilder.append("."); + } + } + + classNameBuilder.append(name); + + return classNameBuilder.toString(); } // cache for schemas created from Class objects. Use ClassValue to avoid diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java index f54b6e2062b..9238fd78c65 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java @@ -24,9 +24,8 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; -import org.apache.avro.AvroRuntimeException; +import org.apache.avro.SystemLimitException; import org.apache.avro.io.BinaryData; -import org.slf4j.LoggerFactory; /** * A Utf8 string. Unlike {@link String}, instances are mutable. This is more @@ -34,22 +33,8 @@ * as a single instance may be reused. */ public class Utf8 implements Comparable, CharSequence, Externalizable { - private static final String MAX_LENGTH_PROPERTY = "org.apache.avro.limits.string.maxLength"; - private static final int MAX_LENGTH; - private static final byte[] EMPTY = new byte[0]; - static { - String o = System.getProperty(MAX_LENGTH_PROPERTY); - int i = Integer.MAX_VALUE; - if (o != null) { - try { - i = Integer.parseUnsignedInt(o); - } catch (NumberFormatException nfe) { - LoggerFactory.getLogger(Utf8.class).warn("Could not parse property " + MAX_LENGTH_PROPERTY + ": " + o, nfe); - } - } - MAX_LENGTH = i; - } + private static final byte[] EMPTY = new byte[0]; private byte[] bytes; private int hash; @@ -63,7 +48,7 @@ public Utf8() { public Utf8(String string) { byte[] bytes = getBytesFor(string); int length = bytes.length; - checkLength(length); + SystemLimitException.checkMaxStringLength(length); this.bytes = bytes; this.length = length; this.string = string; @@ -78,7 +63,7 @@ public Utf8(Utf8 other) { public Utf8(byte[] bytes) { int length = bytes.length; - checkLength(length); + SystemLimitException.checkMaxStringLength(length); this.bytes = bytes; this.length = length; } @@ -121,7 +106,7 @@ public Utf8 setLength(int newLength) { * length does not change, as this also clears the cached String. */ public Utf8 setByteLength(int newLength) { - checkLength(newLength); + SystemLimitException.checkMaxStringLength(newLength); if (this.bytes.length < newLength) { this.bytes = Arrays.copyOf(this.bytes, newLength); } @@ -135,7 +120,7 @@ public Utf8 setByteLength(int newLength) { public Utf8 set(String string) { byte[] bytes = getBytesFor(string); int length = bytes.length; - checkLength(length); + SystemLimitException.checkMaxStringLength(length); this.bytes = bytes; this.length = length; this.string = string; @@ -215,12 +200,6 @@ public CharSequence subSequence(int start, int end) { return toString().subSequence(start, end); } - private static void checkLength(int length) { - if (length > MAX_LENGTH) { - throw new AvroRuntimeException("String length " + length + " exceeds maximum allowed"); - } - } - /** Gets the UTF-8 bytes for a String */ public static byte[] getBytesFor(String str) { return str.getBytes(StandardCharsets.UTF_8); diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/springframework/Assert.java b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/Assert.java new file mode 100644 index 00000000000..70e2e9f3b30 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/Assert.java @@ -0,0 +1,121 @@ +/* + * Copyright 2002-2020 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; + +/** + * Assertion utility class that assists in validating arguments. + * + *

+ * Useful for identifying programmer errors early and clearly at runtime. + * + *

+ * For example, if the contract of a public method states it does not allow + * {@code null} arguments, {@code Assert} can be used to validate that contract. + * Doing this clearly indicates a contract violation when it occurs and protects + * the class's invariants. + * + *

+ * Typically used to validate method arguments rather than configuration + * properties, to check for cases that are usually programmer errors rather than + * configuration errors. In contrast to configuration initialization code, there + * is usually no point in falling back to defaults in such methods. + * + *

+ * This class is similar to JUnit's assertion library. If an argument value is + * deemed invalid, an {@link IllegalArgumentException} is thrown (typically). + * For example: + * + *

+ * Assert.notNull(clazz, "The class must not be null");
+ * Assert.isTrue(i > 0, "The value must be greater than zero");
+ * 
+ * + *

+ * Mainly for internal use within the framework; for a more comprehensive suite + * of assertion utilities consider {@code org.apache.commons.lang3.Validate} + * from Apache Commons + * Lang, Google Guava's Preconditions, + * or similar third-party libraries. + * + * @author Keith Donald + * @author Juergen Hoeller + * @author Sam Brannen + * @author Colin Sampaleanu + * @author Rob Harrop + * @since 1.1.2 + */ +class Assert { + private Assert() { + } + + /** + * Assert a boolean expression, throwing an {@code IllegalStateException} if the + * expression evaluates to {@code false}. + * + *

+   * Assert.state(id == null, "The id property must not already be initialized");
+   * 
+ * + * @param expression a boolean expression + * @param message the exception message to use if the assertion fails + * @throws IllegalStateException if {@code expression} is {@code false} + */ + public static void state(boolean expression, String message) { + if (!expression) { + throw new IllegalStateException(message); + } + } + + /** + * Assert a boolean expression, throwing an {@code IllegalArgumentException} if + * the expression evaluates to {@code false}. + * + *
+   * Assert.isTrue(i > 0, "The value must be greater than zero");
+   * 
+ * + * @param expression a boolean expression + * @param message the exception message to use if the assertion fails + * @throws IllegalArgumentException if {@code expression} is {@code false} + */ + public static void isTrue(boolean expression, String message) { + if (!expression) { + throw new IllegalArgumentException(message); + } + } + + /** + * Assert that an object is not {@code null}. + * + *
+   * Assert.notNull(clazz, "The class must not be null");
+   * 
+ * + * @param object the object to check + * @param message the exception message to use if the assertion fails + * @throws IllegalArgumentException if the object is {@code null} + */ + public static void notNull(@Nullable Object object, String message) { + if (object == null) { + throw new IllegalArgumentException(message); + } + } + +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ConcurrentReferenceHashMap.java b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ConcurrentReferenceHashMap.java new file mode 100644 index 00000000000..1a137cf2101 --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ConcurrentReferenceHashMap.java @@ -0,0 +1,1111 @@ +/* + * Copyright 2002-2021 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; + +import java.lang.ref.ReferenceQueue; +import java.lang.ref.SoftReference; +import java.lang.ref.WeakReference; +import java.lang.reflect.Array; +import java.util.AbstractMap; +import java.util.AbstractSet; +import java.util.Collections; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.ReentrantLock; + +/** + * A {@link ConcurrentHashMap} that uses {@link ReferenceType#SOFT soft} or + * {@linkplain ReferenceType#WEAK weak} references for both {@code keys} and + * {@code values}. + * + *

+ * This class can be used as an alternative to + * {@code Collections.synchronizedMap(new WeakHashMap>())} in + * order to support better performance when accessed concurrently. This + * implementation follows the same design constraints as + * {@link ConcurrentHashMap} with the exception that {@code null} values and + * {@code null} keys are supported. + * + *

+ * NOTE: The use of references means that there is no guarantee that + * items placed into the map will be subsequently available. The garbage + * collector may discard references at any time, so it may appear that an + * unknown thread is silently removing entries. + * + *

+ * If not explicitly specified, this implementation will use + * {@linkplain SoftReference soft entry references}. + * + * @param the key type + * @param the value type + * @author Phillip Webb + * @author Juergen Hoeller + * @since 3.2 + */ +public class ConcurrentReferenceHashMap extends AbstractMap implements ConcurrentMap { + + private static final int DEFAULT_INITIAL_CAPACITY = 16; + + private static final float DEFAULT_LOAD_FACTOR = 0.75f; + + private static final int DEFAULT_CONCURRENCY_LEVEL = 16; + + private static final ReferenceType DEFAULT_REFERENCE_TYPE = ReferenceType.SOFT; + + private static final int MAXIMUM_CONCURRENCY_LEVEL = 1 << 16; + + private static final int MAXIMUM_SEGMENT_SIZE = 1 << 30; + + /** + * Array of segments indexed using the high order bits from the hash. + */ + private final Segment[] segments; + + /** + * When the average number of references per table exceeds this value resize + * will be attempted. + */ + private final float loadFactor; + + /** + * The reference type: SOFT or WEAK. + */ + private final ReferenceType referenceType; + + /** + * The shift value used to calculate the size of the segments array and an index + * from the hash. + */ + private final int shift; + + /** + * Late binding entry set. + */ + @Nullable + private volatile Set> entrySet; + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + */ + public ConcurrentReferenceHashMap() { + this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL, DEFAULT_REFERENCE_TYPE); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + */ + public ConcurrentReferenceHashMap(int initialCapacity) { + this(initialCapacity, DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL, DEFAULT_REFERENCE_TYPE); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + * @param loadFactor the load factor. When the average number of references + * per table exceeds this value resize will be attempted + */ + public ConcurrentReferenceHashMap(int initialCapacity, float loadFactor) { + this(initialCapacity, loadFactor, DEFAULT_CONCURRENCY_LEVEL, DEFAULT_REFERENCE_TYPE); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + * @param concurrencyLevel the expected number of threads that will concurrently + * write to the map + */ + public ConcurrentReferenceHashMap(int initialCapacity, int concurrencyLevel) { + this(initialCapacity, DEFAULT_LOAD_FACTOR, concurrencyLevel, DEFAULT_REFERENCE_TYPE); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + * @param referenceType the reference type used for entries (soft or weak) + */ + public ConcurrentReferenceHashMap(int initialCapacity, ReferenceType referenceType) { + this(initialCapacity, DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL, referenceType); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + * @param loadFactor the load factor. When the average number of + * references per table exceeds this value, resize will + * be attempted. + * @param concurrencyLevel the expected number of threads that will concurrently + * write to the map + */ + public ConcurrentReferenceHashMap(int initialCapacity, float loadFactor, int concurrencyLevel) { + this(initialCapacity, loadFactor, concurrencyLevel, DEFAULT_REFERENCE_TYPE); + } + + /** + * Create a new {@code ConcurrentReferenceHashMap} instance. + * + * @param initialCapacity the initial capacity of the map + * @param loadFactor the load factor. When the average number of + * references per table exceeds this value, resize will + * be attempted. + * @param concurrencyLevel the expected number of threads that will concurrently + * write to the map + * @param referenceType the reference type used for entries (soft or weak) + */ + @SuppressWarnings("unchecked") + public ConcurrentReferenceHashMap(int initialCapacity, float loadFactor, int concurrencyLevel, + ReferenceType referenceType) { + + Assert.isTrue(initialCapacity >= 0, "Initial capacity must not be negative"); + Assert.isTrue(loadFactor > 0f, "Load factor must be positive"); + Assert.isTrue(concurrencyLevel > 0, "Concurrency level must be positive"); + Assert.notNull(referenceType, "Reference type must not be null"); + this.loadFactor = loadFactor; + this.shift = calculateShift(concurrencyLevel, MAXIMUM_CONCURRENCY_LEVEL); + int size = 1 << this.shift; + this.referenceType = referenceType; + int roundedUpSegmentCapacity = (int) ((initialCapacity + size - 1L) / size); + int initialSize = 1 << calculateShift(roundedUpSegmentCapacity, MAXIMUM_SEGMENT_SIZE); + Segment[] segments = (Segment[]) Array.newInstance(Segment.class, size); + int resizeThreshold = (int) (initialSize * getLoadFactor()); + for (int i = 0; i < segments.length; i++) { + segments[i] = new Segment(initialSize, resizeThreshold); + } + this.segments = segments; + } + + protected final float getLoadFactor() { + return this.loadFactor; + } + + protected final int getSegmentsSize() { + return this.segments.length; + } + + protected final Segment getSegment(int index) { + return this.segments[index]; + } + + /** + * Factory method that returns the {@link ReferenceManager}. This method will be + * called once for each {@link Segment}. + * + * @return a new reference manager + */ + protected ReferenceManager createReferenceManager() { + return new ReferenceManager(); + } + + /** + * Get the hash for a given object, apply an additional hash function to reduce + * collisions. This implementation uses the same Wang/Jenkins algorithm as + * {@link ConcurrentHashMap}. Subclasses can override to provide alternative + * hashing. + * + * @param o the object to hash (may be null) + * @return the resulting hash code + */ + protected int getHash(@Nullable Object o) { + int hash = (o != null ? o.hashCode() : 0); + hash += (hash << 15) ^ 0xffffcd7d; + hash ^= (hash >>> 10); + hash += (hash << 3); + hash ^= (hash >>> 6); + hash += (hash << 2) + (hash << 14); + hash ^= (hash >>> 16); + return hash; + } + + @Override + @Nullable + public V get(@Nullable Object key) { + Reference ref = getReference(key, Restructure.WHEN_NECESSARY); + Entry entry = (ref != null ? ref.get() : null); + return (entry != null ? entry.getValue() : null); + } + + @Override + @Nullable + public V getOrDefault(@Nullable Object key, @Nullable V defaultValue) { + Reference ref = getReference(key, Restructure.WHEN_NECESSARY); + Entry entry = (ref != null ? ref.get() : null); + return (entry != null ? entry.getValue() : defaultValue); + } + + @Override + public boolean containsKey(@Nullable Object key) { + Reference ref = getReference(key, Restructure.WHEN_NECESSARY); + Entry entry = (ref != null ? ref.get() : null); + return (entry != null && ObjectUtils.nullSafeEquals(entry.getKey(), key)); + } + + /** + * Return a {@link Reference} to the {@link Entry} for the specified + * {@code key}, or {@code null} if not found. + * + * @param key the key (can be {@code null}) + * @param restructure types of restructure allowed during this call + * @return the reference, or {@code null} if not found + */ + @Nullable + protected final Reference getReference(@Nullable Object key, Restructure restructure) { + int hash = getHash(key); + return getSegmentForHash(hash).getReference(key, hash, restructure); + } + + @Override + @Nullable + public V put(@Nullable K key, @Nullable V value) { + return put(key, value, true); + } + + @Override + @Nullable + public V putIfAbsent(@Nullable K key, @Nullable V value) { + return put(key, value, false); + } + + @Nullable + private V put(@Nullable final K key, @Nullable final V value, final boolean overwriteExisting) { + return doTask(key, new Task(TaskOption.RESTRUCTURE_BEFORE, TaskOption.RESIZE) { + @Override + @Nullable + protected V execute(@Nullable Reference ref, @Nullable Entry entry, @Nullable Entries entries) { + if (entry != null) { + V oldValue = entry.getValue(); + if (overwriteExisting) { + entry.setValue(value); + } + return oldValue; + } + Assert.state(entries != null, "No entries segment"); + entries.add(value); + return null; + } + }); + } + + @Override + @Nullable + public V remove(@Nullable Object key) { + return doTask(key, new Task(TaskOption.RESTRUCTURE_AFTER, TaskOption.SKIP_IF_EMPTY) { + @Override + @Nullable + protected V execute(@Nullable Reference ref, @Nullable Entry entry) { + if (entry != null) { + if (ref != null) { + ref.release(); + } + return entry.value; + } + return null; + } + }); + } + + @Override + public boolean remove(@Nullable Object key, final @Nullable Object value) { + Boolean result = doTask(key, new Task(TaskOption.RESTRUCTURE_AFTER, TaskOption.SKIP_IF_EMPTY) { + @Override + protected Boolean execute(@Nullable Reference ref, @Nullable Entry entry) { + if (entry != null && ObjectUtils.nullSafeEquals(entry.getValue(), value)) { + if (ref != null) { + ref.release(); + } + return true; + } + return false; + } + }); + return (Boolean.TRUE.equals(result)); + } + + @Override + public boolean replace(@Nullable K key, final @Nullable V oldValue, final @Nullable V newValue) { + Boolean result = doTask(key, new Task(TaskOption.RESTRUCTURE_BEFORE, TaskOption.SKIP_IF_EMPTY) { + @Override + protected Boolean execute(@Nullable Reference ref, @Nullable Entry entry) { + if (entry != null && ObjectUtils.nullSafeEquals(entry.getValue(), oldValue)) { + entry.setValue(newValue); + return true; + } + return false; + } + }); + return (Boolean.TRUE.equals(result)); + } + + @Override + @Nullable + public V replace(@Nullable K key, final @Nullable V value) { + return doTask(key, new Task(TaskOption.RESTRUCTURE_BEFORE, TaskOption.SKIP_IF_EMPTY) { + @Override + @Nullable + protected V execute(@Nullable Reference ref, @Nullable Entry entry) { + if (entry != null) { + V oldValue = entry.getValue(); + entry.setValue(value); + return oldValue; + } + return null; + } + }); + } + + @Override + public void clear() { + for (Segment segment : this.segments) { + segment.clear(); + } + } + + /** + * Remove any entries that have been garbage collected and are no longer + * referenced. Under normal circumstances garbage collected entries are + * automatically purged as items are added or removed from the Map. This method + * can be used to force a purge, and is useful when the Map is read frequently + * but updated less often. + */ + public void purgeUnreferencedEntries() { + for (Segment segment : this.segments) { + segment.restructureIfNecessary(false); + } + } + + @Override + public int size() { + int size = 0; + for (Segment segment : this.segments) { + size += segment.getCount(); + } + return size; + } + + @Override + public boolean isEmpty() { + for (Segment segment : this.segments) { + if (segment.getCount() > 0) { + return false; + } + } + return true; + } + + @Override + public Set> entrySet() { + Set> entrySet = this.entrySet; + if (entrySet == null) { + entrySet = new EntrySet(); + this.entrySet = entrySet; + } + return entrySet; + } + + @Nullable + private T doTask(@Nullable Object key, Task task) { + int hash = getHash(key); + return getSegmentForHash(hash).doTask(hash, key, task); + } + + private Segment getSegmentForHash(int hash) { + return this.segments[(hash >>> (32 - this.shift)) & (this.segments.length - 1)]; + } + + /** + * Calculate a shift value that can be used to create a power-of-two value + * between the specified maximum and minimum values. + * + * @param minimumValue the minimum value + * @param maximumValue the maximum value + * @return the calculated shift (use {@code 1 << shift} to obtain a value) + */ + protected static int calculateShift(int minimumValue, int maximumValue) { + int shift = 0; + int value = 1; + while (value < minimumValue && value < maximumValue) { + value <<= 1; + shift++; + } + return shift; + } + + /** + * Various reference types supported by this map. + */ + public enum ReferenceType { + + /** + * Use {@link SoftReference SoftReferences}. + */ + SOFT, + + /** + * Use {@link WeakReference WeakReferences}. + */ + WEAK + } + + /** + * A single segment used to divide the map to allow better concurrent + * performance. + */ + @SuppressWarnings("serial") + protected final class Segment extends ReentrantLock { + + private final ReferenceManager referenceManager; + + private final int initialSize; + + /** + * Array of references indexed using the low order bits from the hash. This + * property should only be set along with {@code resizeThreshold}. + */ + private volatile Reference[] references; + + /** + * The total number of references contained in this segment. This includes + * chained references and references that have been garbage collected but not + * purged. + */ + private final AtomicInteger count = new AtomicInteger(); + + /** + * The threshold when resizing of the references should occur. When + * {@code count} exceeds this value references will be resized. + */ + private int resizeThreshold; + + public Segment(int initialSize, int resizeThreshold) { + this.referenceManager = createReferenceManager(); + this.initialSize = initialSize; + this.references = createReferenceArray(initialSize); + this.resizeThreshold = resizeThreshold; + } + + @Nullable + public Reference getReference(@Nullable Object key, int hash, Restructure restructure) { + if (restructure == Restructure.WHEN_NECESSARY) { + restructureIfNecessary(false); + } + if (this.count.get() == 0) { + return null; + } + // Use a local copy to protect against other threads writing + Reference[] references = this.references; + int index = getIndex(hash, references); + Reference head = references[index]; + return findInChain(head, key, hash); + } + + /** + * Apply an update operation to this segment. The segment will be locked during + * the update. + * + * @param hash the hash of the key + * @param key the key + * @param task the update operation + * @return the result of the operation + */ + @Nullable + public T doTask(final int hash, @Nullable final Object key, final Task task) { + boolean resize = task.hasOption(TaskOption.RESIZE); + if (task.hasOption(TaskOption.RESTRUCTURE_BEFORE)) { + restructureIfNecessary(resize); + } + if (task.hasOption(TaskOption.SKIP_IF_EMPTY) && this.count.get() == 0) { + return task.execute(null, null, null); + } + lock(); + try { + final int index = getIndex(hash, this.references); + final Reference head = this.references[index]; + Reference ref = findInChain(head, key, hash); + Entry entry = (ref != null ? ref.get() : null); + Entries entries = value -> { + @SuppressWarnings("unchecked") + Entry newEntry = new Entry<>((K) key, value); + Reference newReference = Segment.this.referenceManager.createReference(newEntry, hash, head); + Segment.this.references[index] = newReference; + Segment.this.count.incrementAndGet(); + }; + return task.execute(ref, entry, entries); + } finally { + unlock(); + if (task.hasOption(TaskOption.RESTRUCTURE_AFTER)) { + restructureIfNecessary(resize); + } + } + } + + /** + * Clear all items from this segment. + */ + public void clear() { + if (this.count.get() == 0) { + return; + } + lock(); + try { + this.references = createReferenceArray(this.initialSize); + this.resizeThreshold = (int) (this.references.length * getLoadFactor()); + this.count.set(0); + } finally { + unlock(); + } + } + + /** + * Restructure the underlying data structure when it becomes necessary. This + * method can increase the size of the references table as well as purge any + * references that have been garbage collected. + * + * @param allowResize if resizing is permitted + */ + private void restructureIfNecessary(boolean allowResize) { + int currCount = this.count.get(); + boolean needsResize = allowResize && (currCount > 0 && currCount >= this.resizeThreshold); + Reference ref = this.referenceManager.pollForPurge(); + if (ref != null || (needsResize)) { + restructure(allowResize, ref); + } + } + + private void restructure(boolean allowResize, @Nullable Reference ref) { + boolean needsResize; + lock(); + try { + int countAfterRestructure = this.count.get(); + Set> toPurge = Collections.emptySet(); + if (ref != null) { + toPurge = new HashSet<>(); + while (ref != null) { + toPurge.add(ref); + ref = this.referenceManager.pollForPurge(); + } + } + countAfterRestructure -= toPurge.size(); + + // Recalculate taking into account count inside lock and items that + // will be purged + needsResize = (countAfterRestructure > 0 && countAfterRestructure >= this.resizeThreshold); + boolean resizing = false; + int restructureSize = this.references.length; + if (allowResize && needsResize && restructureSize < MAXIMUM_SEGMENT_SIZE) { + restructureSize <<= 1; + resizing = true; + } + + // Either create a new table or reuse the existing one + Reference[] restructured = (resizing ? createReferenceArray(restructureSize) : this.references); + + // Restructure + for (int i = 0; i < this.references.length; i++) { + ref = this.references[i]; + if (!resizing) { + restructured[i] = null; + } + while (ref != null) { + if (!toPurge.contains(ref)) { + Entry entry = ref.get(); + if (entry != null) { + int index = getIndex(ref.getHash(), restructured); + restructured[index] = this.referenceManager.createReference(entry, ref.getHash(), restructured[index]); + } + } + ref = ref.getNext(); + } + } + + // Replace volatile members + if (resizing) { + this.references = restructured; + this.resizeThreshold = (int) (this.references.length * getLoadFactor()); + } + this.count.set(Math.max(countAfterRestructure, 0)); + } finally { + unlock(); + } + } + + @Nullable + private Reference findInChain(Reference ref, @Nullable Object key, int hash) { + Reference currRef = ref; + while (currRef != null) { + if (currRef.getHash() == hash) { + Entry entry = currRef.get(); + if (entry != null) { + K entryKey = entry.getKey(); + if (ObjectUtils.nullSafeEquals(entryKey, key)) { + return currRef; + } + } + } + currRef = currRef.getNext(); + } + return null; + } + + @SuppressWarnings({ "unchecked" }) + private Reference[] createReferenceArray(int size) { + return new Reference[size]; + } + + private int getIndex(int hash, Reference[] references) { + return (hash & (references.length - 1)); + } + + /** + * Return the size of the current references array. + */ + public int getSize() { + return this.references.length; + } + + /** + * Return the total number of references in this segment. + */ + public int getCount() { + return this.count.get(); + } + } + + /** + * A reference to an {@link Entry} contained in the map. Implementations are + * usually wrappers around specific Java reference implementations (e.g., + * {@link SoftReference}). + * + * @param the key type + * @param the value type + */ + protected interface Reference { + + /** + * Return the referenced entry, or {@code null} if the entry is no longer + * available. + */ + @Nullable + Entry get(); + + /** + * Return the hash for the reference. + */ + int getHash(); + + /** + * Return the next reference in the chain, or {@code null} if none. + */ + @Nullable + Reference getNext(); + + /** + * Release this entry and ensure that it will be returned from + * {@code ReferenceManager#pollForPurge()}. + */ + void release(); + } + + /** + * A single map entry. + * + * @param the key type + * @param the value type + */ + protected static final class Entry implements Map.Entry { + + @Nullable + private final K key; + + @Nullable + private volatile V value; + + public Entry(@Nullable K key, @Nullable V value) { + this.key = key; + this.value = value; + } + + @Override + @Nullable + public K getKey() { + return this.key; + } + + @Override + @Nullable + public V getValue() { + return this.value; + } + + @Override + @Nullable + public V setValue(@Nullable V value) { + V previous = this.value; + this.value = value; + return previous; + } + + @Override + public String toString() { + return (this.key + "=" + this.value); + } + + @Override + @SuppressWarnings("rawtypes") + public boolean equals(@Nullable Object other) { + if (this == other) { + return true; + } + if (!(other instanceof Map.Entry)) { + return false; + } + Map.Entry otherEntry = (Map.Entry) other; + return (ObjectUtils.nullSafeEquals(getKey(), otherEntry.getKey()) + && ObjectUtils.nullSafeEquals(getValue(), otherEntry.getValue())); + } + + @Override + public int hashCode() { + return (ObjectUtils.nullSafeHashCode(this.key) ^ ObjectUtils.nullSafeHashCode(this.value)); + } + } + + /** + * A task that can be {@link Segment#doTask run} against a {@link Segment}. + */ + private abstract class Task { + + private final EnumSet options; + + public Task(TaskOption... options) { + this.options = (options.length == 0 ? EnumSet.noneOf(TaskOption.class) : EnumSet.of(options[0], options)); + } + + public boolean hasOption(TaskOption option) { + return this.options.contains(option); + } + + /** + * Execute the task. + * + * @param ref the found reference (or {@code null}) + * @param entry the found entry (or {@code null}) + * @param entries access to the underlying entries + * @return the result of the task + * @see #execute(Reference, Entry) + */ + @Nullable + protected T execute(@Nullable Reference ref, @Nullable Entry entry, @Nullable Entries entries) { + return execute(ref, entry); + } + + /** + * Convenience method that can be used for tasks that do not need access to + * {@link Entries}. + * + * @param ref the found reference (or {@code null}) + * @param entry the found entry (or {@code null}) + * @return the result of the task + * @see #execute(Reference, Entry, Entries) + */ + @Nullable + protected T execute(@Nullable Reference ref, @Nullable Entry entry) { + return null; + } + } + + /** + * Various options supported by a {@code Task}. + */ + private enum TaskOption { + + RESTRUCTURE_BEFORE, RESTRUCTURE_AFTER, SKIP_IF_EMPTY, RESIZE + } + + /** + * Allows a task access to {@link Segment} entries. + */ + private interface Entries { + + /** + * Add a new entry with the specified value. + * + * @param value the value to add + */ + void add(@Nullable V value); + } + + /** + * Internal entry-set implementation. + */ + private class EntrySet extends AbstractSet> { + + @Override + public Iterator> iterator() { + return new EntryIterator(); + } + + @Override + public boolean contains(@Nullable Object o) { + if (o instanceof Map.Entry) { + Map.Entry entry = (Map.Entry) o; + Reference ref = ConcurrentReferenceHashMap.this.getReference(entry.getKey(), Restructure.NEVER); + Entry otherEntry = (ref != null ? ref.get() : null); + if (otherEntry != null) { + return ObjectUtils.nullSafeEquals(entry.getValue(), otherEntry.getValue()); + } + } + return false; + } + + @Override + public boolean remove(Object o) { + if (o instanceof Map.Entry) { + Map.Entry entry = (Map.Entry) o; + return ConcurrentReferenceHashMap.this.remove(entry.getKey(), entry.getValue()); + } + return false; + } + + @Override + public int size() { + return ConcurrentReferenceHashMap.this.size(); + } + + @Override + public void clear() { + ConcurrentReferenceHashMap.this.clear(); + } + } + + /** + * Internal entry iterator implementation. + */ + private class EntryIterator implements Iterator> { + + private int segmentIndex; + + private int referenceIndex; + + @Nullable + private Reference[] references; + + @Nullable + private Reference reference; + + @Nullable + private Entry next; + + @Nullable + private Entry last; + + public EntryIterator() { + moveToNextSegment(); + } + + @Override + public boolean hasNext() { + getNextIfNecessary(); + return (this.next != null); + } + + @Override + public Entry next() { + getNextIfNecessary(); + if (this.next == null) { + throw new NoSuchElementException(); + } + this.last = this.next; + this.next = null; + return this.last; + } + + private void getNextIfNecessary() { + while (this.next == null) { + moveToNextReference(); + if (this.reference == null) { + return; + } + this.next = this.reference.get(); + } + } + + private void moveToNextReference() { + if (this.reference != null) { + this.reference = this.reference.getNext(); + } + while (this.reference == null && this.references != null) { + if (this.referenceIndex >= this.references.length) { + moveToNextSegment(); + this.referenceIndex = 0; + } else { + this.reference = this.references[this.referenceIndex]; + this.referenceIndex++; + } + } + } + + private void moveToNextSegment() { + this.reference = null; + this.references = null; + if (this.segmentIndex < ConcurrentReferenceHashMap.this.segments.length) { + this.references = ConcurrentReferenceHashMap.this.segments[this.segmentIndex].references; + this.segmentIndex++; + } + } + + @Override + public void remove() { + Assert.state(this.last != null, "No element to remove"); + ConcurrentReferenceHashMap.this.remove(this.last.getKey()); + this.last = null; + } + } + + /** + * The types of restructuring that can be performed. + */ + protected enum Restructure { + + WHEN_NECESSARY, NEVER + } + + /** + * Strategy class used to manage {@link Reference References}. This class can be + * overridden if alternative reference types need to be supported. + */ + protected class ReferenceManager { + + private final ReferenceQueue> queue = new ReferenceQueue<>(); + + /** + * Factory method used to create a new {@link Reference}. + * + * @param entry the entry contained in the reference + * @param hash the hash + * @param next the next reference in the chain, or {@code null} if none + * @return a new {@link Reference} + */ + public Reference createReference(Entry entry, int hash, @Nullable Reference next) { + if (ConcurrentReferenceHashMap.this.referenceType == ReferenceType.WEAK) { + return new WeakEntryReference<>(entry, hash, next, this.queue); + } + return new SoftEntryReference<>(entry, hash, next, this.queue); + } + + /** + * Return any reference that has been garbage collected and can be purged from + * the underlying structure or {@code null} if no references need purging. This + * method must be thread safe and ideally should not block when returning + * {@code null}. References should be returned once and only once. + * + * @return a reference to purge or {@code null} + */ + @SuppressWarnings("unchecked") + @Nullable + public Reference pollForPurge() { + return (Reference) this.queue.poll(); + } + } + + /** + * Internal {@link Reference} implementation for {@link SoftReference + * SoftReferences}. + */ + private static final class SoftEntryReference extends SoftReference> implements Reference { + + private final int hash; + + @Nullable + private final Reference nextReference; + + public SoftEntryReference(Entry entry, int hash, @Nullable Reference next, + ReferenceQueue> queue) { + + super(entry, queue); + this.hash = hash; + this.nextReference = next; + } + + @Override + public int getHash() { + return this.hash; + } + + @Override + @Nullable + public Reference getNext() { + return this.nextReference; + } + + @Override + public void release() { + enqueue(); + clear(); + } + } + + /** + * Internal {@link Reference} implementation for {@link WeakReference + * WeakReferences}. + */ + private static final class WeakEntryReference extends WeakReference> implements Reference { + + private final int hash; + + @Nullable + private final Reference nextReference; + + public WeakEntryReference(Entry entry, int hash, @Nullable Reference next, + ReferenceQueue> queue) { + + super(entry, queue); + this.hash = hash; + this.nextReference = next; + } + + @Override + public int getHash() { + return this.hash; + } + + @Override + @Nullable + public Reference getNext() { + return this.nextReference; + } + + @Override + public void release() { + enqueue(); + clear(); + } + } + +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ObjectUtils.java b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ObjectUtils.java new file mode 100644 index 00000000000..a8e0c45180e --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/util/springframework/ObjectUtils.java @@ -0,0 +1,320 @@ +/* + * Copyright 2002-2021 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; +import org.apache.avro.util.ClassUtils; + +import java.util.Arrays; + +/** + * Miscellaneous object utility methods. + * + *

+ * Mainly for internal use within the framework. + * + *

+ * Thanks to Alex Ruiz for contributing several enhancements to this class! + * + * @author Juergen Hoeller + * @author Keith Donald + * @author Rod Johnson + * @author Rob Harrop + * @author Chris Beams + * @author Sam Brannen + * @see ClassUtils see CollectionUtils see StringUtils + * @since 19.03.2004 + */ +class ObjectUtils { + private ObjectUtils() { + } + + private static final int INITIAL_HASH = 7; + private static final int MULTIPLIER = 31; + + /** + * Determine whether the given array is empty: i.e. {@code null} or of zero + * length. + * + * @param array the array to check + */ + public static boolean isEmpty(@Nullable Object[] array) { + return (array == null || array.length == 0); + } + + // --------------------------------------------------------------------- + // Convenience methods for content-based equality/hash-code handling + // --------------------------------------------------------------------- + + /** + * Determine if the given objects are equal, returning {@code true} if both are + * {@code null} or {@code false} if only one is {@code null}. + *

+ * Compares arrays with {@code Arrays.equals}, performing an equality check + * based on the array elements rather than the array reference. + * + * @param o1 first Object to compare + * @param o2 second Object to compare + * @return whether the given objects are equal + * @see Object#equals(Object) + * @see Arrays#equals + */ + public static boolean nullSafeEquals(@Nullable Object o1, @Nullable Object o2) { + if (o1 == o2) { + return true; + } + if (o1 == null || o2 == null) { + return false; + } + if (o1.equals(o2)) { + return true; + } + if (o1.getClass().isArray() && o2.getClass().isArray()) { + return arrayEquals(o1, o2); + } + return false; + } + + /** + * Compare the given arrays with {@code Arrays.equals}, performing an equality + * check based on the array elements rather than the array reference. + * + * @param o1 first array to compare + * @param o2 second array to compare + * @return whether the given objects are equal + * @see #nullSafeEquals(Object, Object) + * @see Arrays#equals + */ + private static boolean arrayEquals(Object o1, Object o2) { + if (o1 instanceof Object[] && o2 instanceof Object[]) { + return Arrays.equals((Object[]) o1, (Object[]) o2); + } + if (o1 instanceof boolean[] && o2 instanceof boolean[]) { + return Arrays.equals((boolean[]) o1, (boolean[]) o2); + } + if (o1 instanceof byte[] && o2 instanceof byte[]) { + return Arrays.equals((byte[]) o1, (byte[]) o2); + } + if (o1 instanceof char[] && o2 instanceof char[]) { + return Arrays.equals((char[]) o1, (char[]) o2); + } + if (o1 instanceof double[] && o2 instanceof double[]) { + return Arrays.equals((double[]) o1, (double[]) o2); + } + if (o1 instanceof float[] && o2 instanceof float[]) { + return Arrays.equals((float[]) o1, (float[]) o2); + } + if (o1 instanceof int[] && o2 instanceof int[]) { + return Arrays.equals((int[]) o1, (int[]) o2); + } + if (o1 instanceof long[] && o2 instanceof long[]) { + return Arrays.equals((long[]) o1, (long[]) o2); + } + if (o1 instanceof short[] && o2 instanceof short[]) { + return Arrays.equals((short[]) o1, (short[]) o2); + } + return false; + } + + /** + * Return as hash code for the given object; typically the value of + * {@code Object#hashCode()}}. If the object is an array, this method will + * delegate to any of the {@code nullSafeHashCode} methods for arrays in this + * class. If the object is {@code null}, this method returns 0. + * + * @see Object#hashCode() + * @see #nullSafeHashCode(Object[]) + * @see #nullSafeHashCode(boolean[]) + * @see #nullSafeHashCode(byte[]) + * @see #nullSafeHashCode(char[]) + * @see #nullSafeHashCode(double[]) + * @see #nullSafeHashCode(float[]) + * @see #nullSafeHashCode(int[]) + * @see #nullSafeHashCode(long[]) + * @see #nullSafeHashCode(short[]) + */ + public static int nullSafeHashCode(@Nullable Object obj) { + if (obj == null) { + return 0; + } + if (obj.getClass().isArray()) { + if (obj instanceof Object[]) { + return nullSafeHashCode((Object[]) obj); + } + if (obj instanceof boolean[]) { + return nullSafeHashCode((boolean[]) obj); + } + if (obj instanceof byte[]) { + return nullSafeHashCode((byte[]) obj); + } + if (obj instanceof char[]) { + return nullSafeHashCode((char[]) obj); + } + if (obj instanceof double[]) { + return nullSafeHashCode((double[]) obj); + } + if (obj instanceof float[]) { + return nullSafeHashCode((float[]) obj); + } + if (obj instanceof int[]) { + return nullSafeHashCode((int[]) obj); + } + if (obj instanceof long[]) { + return nullSafeHashCode((long[]) obj); + } + if (obj instanceof short[]) { + return nullSafeHashCode((short[]) obj); + } + } + return obj.hashCode(); + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable Object[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (Object element : array) { + hash = MULTIPLIER * hash + nullSafeHashCode(element); + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable boolean[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (boolean element : array) { + hash = MULTIPLIER * hash + Boolean.hashCode(element); + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable byte[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (byte element : array) { + hash = MULTIPLIER * hash + element; + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable char[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (char element : array) { + hash = MULTIPLIER * hash + element; + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable double[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (double element : array) { + hash = MULTIPLIER * hash + Double.hashCode(element); + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable float[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (float element : array) { + hash = MULTIPLIER * hash + Float.hashCode(element); + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable int[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (int element : array) { + hash = MULTIPLIER * hash + element; + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable long[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (long element : array) { + hash = MULTIPLIER * hash + Long.hashCode(element); + } + return hash; + } + + /** + * Return a hash code based on the contents of the specified array. If + * {@code array} is {@code null}, this method returns 0. + */ + public static int nullSafeHashCode(@Nullable short[] array) { + if (array == null) { + return 0; + } + int hash = INITIAL_HASH; + for (short element : array) { + hash = MULTIPLIER * hash + element; + } + return hash; + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/CustomType.java b/lang/java/avro/src/test/java/org/apache/avro/CustomType.java new file mode 100644 index 00000000000..140ac901b0b --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/CustomType.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro; + +import java.util.Objects; + +public final class CustomType { + private final String name; + + public CustomType(CharSequence name) { + this.name = name.toString(); + } + + public String getName() { + return name; + } + + @Override + public int hashCode() { + return Objects.hashCode(name); + } + + @Override + public boolean equals(Object obj) { + return obj instanceof CustomType && name.equals(((CustomType) obj).name); + } + + @Override + public String toString() { + return "CustomType{name='" + name + "'}"; + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/CustomTypeConverter.java b/lang/java/avro/src/test/java/org/apache/avro/CustomTypeConverter.java new file mode 100644 index 00000000000..de8fea02ca4 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/CustomTypeConverter.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro; + +public class CustomTypeConverter extends Conversion { + private static final CustomTypeLogicalTypeFactory logicalTypeFactory = new CustomTypeLogicalTypeFactory(); + + @Override + public Class getConvertedType() { + return CustomType.class; + } + + @Override + public String getLogicalTypeName() { + return logicalTypeFactory.getTypeName(); + } + + @Override + public Schema getRecommendedSchema() { + return Schema.create(Schema.Type.STRING); + } + + @Override + public CustomType fromCharSequence(CharSequence value, Schema schema, LogicalType type) { + return new CustomType(value); + } + + @Override + public CharSequence toCharSequence(CustomType value, Schema schema, LogicalType type) { + return value.getName(); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/DummyLogicalTypeFactory.java b/lang/java/avro/src/test/java/org/apache/avro/CustomTypeLogicalTypeFactory.java similarity index 86% rename from lang/java/avro/src/test/java/org/apache/avro/DummyLogicalTypeFactory.java rename to lang/java/avro/src/test/java/org/apache/avro/CustomTypeLogicalTypeFactory.java index 4957e376521..3e121e0242c 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/DummyLogicalTypeFactory.java +++ b/lang/java/avro/src/test/java/org/apache/avro/CustomTypeLogicalTypeFactory.java @@ -17,14 +17,14 @@ */ package org.apache.avro; -public class DummyLogicalTypeFactory implements LogicalTypes.LogicalTypeFactory { +public class CustomTypeLogicalTypeFactory implements LogicalTypes.LogicalTypeFactory { @Override public LogicalType fromSchema(Schema schema) { - return LogicalTypes.date(); + return new LogicalType(getTypeName()); } @Override public String getTypeName() { - return "service-example"; + return "custom"; } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestFixed.java b/lang/java/avro/src/test/java/org/apache/avro/TestFixed.java index a9f78f16899..f35c62d7a2e 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestFixed.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestFixed.java @@ -18,19 +18,32 @@ package org.apache.avro; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; public class TestFixed { @Test - public void testFixedDefaultValueDrop() { + void fixedDefaultValueDrop() { Schema md5 = SchemaBuilder.builder().fixed("MD5").size(16); Schema frec = SchemaBuilder.builder().record("test").fields().name("hash").type(md5).withDefault(new byte[16]) .endRecord(); Schema.Field field = frec.getField("hash"); - Assert.assertNotNull(field.defaultVal()); - Assert.assertArrayEquals(new byte[16], (byte[]) field.defaultVal()); + assertNotNull(field.defaultVal()); + assertArrayEquals(new byte[16], (byte[]) field.defaultVal()); + } + + @Test + void fixedLengthOutOfLimit() { + Exception ex = assertThrows(UnsupportedOperationException.class, + () -> Schema.createFixed("oversize", "doc", "space", Integer.MAX_VALUE)); + assertEquals(TestSystemLimitException.ERROR_VM_LIMIT_BYTES, ex.getMessage()); } + @Test + void fixedNegativeLength() { + Exception ex = assertThrows(AvroRuntimeException.class, () -> Schema.createFixed("negative", "doc", "space", -1)); + assertEquals(TestSystemLimitException.ERROR_NEGATIVE, ex.getMessage()); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java b/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java index deadb2efc58..2514a5ab0dd 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java @@ -291,7 +291,7 @@ public void testRegisterLogicalTypeWithFactoryNameNotProvided() { @Test public void testRegisterLogicalTypeFactoryByServiceLoader() { MatcherAssert.assertThat(LogicalTypes.getCustomRegisteredTypes(), - IsMapContaining.hasEntry(equalTo("service-example"), instanceOf(LogicalTypes.LogicalTypeFactory.class))); + IsMapContaining.hasEntry(equalTo("custom"), instanceOf(LogicalTypes.LogicalTypeFactory.class))); } public static void assertEqualsTrue(String message, Object o1, Object o2) { diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestProtocol.java b/lang/java/avro/src/test/java/org/apache/avro/TestProtocol.java index 881b929eaa6..5af522235ea 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestProtocol.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestProtocol.java @@ -17,9 +17,6 @@ */ package org.apache.avro; -import static java.util.Collections.emptyList; -import static java.util.Collections.emptyMap; -import static java.util.Collections.singletonList; import static java.util.Collections.singletonMap; import static org.junit.Assert.*; @@ -27,13 +24,31 @@ public class TestProtocol { + @Test + public void testNamespaceAndNameRules() { + Protocol p1 = new Protocol("P", null, "foo"); + Protocol p2 = new Protocol("foo.P", null, null); + Protocol p3 = new Protocol("foo.P", null, "bar"); + assertEquals(p1.getName(), p2.getName()); + assertEquals(p1.getNamespace(), p2.getNamespace()); + assertEquals(p1.getName(), p3.getName()); + assertEquals(p1.getNamespace(), p3.getNamespace()); + + // The following situation is allowed, even if confusing, because the + // specification describes this algorithm without specifying that the resulting + // namespace mst be non-empty. + Protocol invalidName = new Protocol(".P", null, "ignored"); + assertNull(invalidName.getNamespace()); + assertEquals("P", invalidName.getName()); + } + @Test public void testPropEquals() { Protocol p1 = new Protocol("P", null, "foo"); p1.addProp("a", "1"); Protocol p2 = new Protocol("P", null, "foo"); p2.addProp("a", "2"); - assertFalse(p1.equals(p2)); + assertNotEquals(p1, p2); } @Test diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java b/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java index 47cafcec189..2918f8b9eed 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java @@ -89,13 +89,18 @@ public class TestReadingWritingDataInEvolvedSchemas { .fields() // .name(FIELD_A).type().unionOf().stringType().and().bytesType().endUnion().noDefault() // .endRecord(); + + private static final Schema ENUM_AB = SchemaBuilder.enumeration("Enum1").symbols("A", "B"); + private static final Schema ENUM_AB_RECORD = SchemaBuilder.record(RECORD_A) // .fields() // - .name(FIELD_A).type().enumeration("Enum1").symbols("A", "B").noDefault() // + .name(FIELD_A).type(ENUM_AB).noDefault() // .endRecord(); + + private static final Schema ENUM_ABC = SchemaBuilder.enumeration("Enum1").symbols("A", "B", "C"); private static final Schema ENUM_ABC_RECORD = SchemaBuilder.record(RECORD_A) // .fields() // - .name(FIELD_A).type().enumeration("Enum1").symbols("A", "B", "C").noDefault() // + .name(FIELD_A).type(ENUM_ABC).noDefault() // .endRecord(); private static final Schema UNION_INT_RECORD = SchemaBuilder.record(RECORD_A) // .fields() // @@ -310,7 +315,7 @@ public void utf8BytesWrittenWithUnionSchemaIsConvertedToStringSchema() throws Ex @Test public void enumRecordCanBeReadWithExtendedEnumSchema() throws Exception { Schema writer = ENUM_AB_RECORD; - Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(writer, "A")); + Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(ENUM_AB, "A")); byte[] encoded = encodeGenericBlob(record); Record decoded = decodeGenericBlob(ENUM_ABC_RECORD, writer, encoded); assertEquals("A", decoded.get(FIELD_A).toString()); @@ -319,7 +324,7 @@ public void enumRecordCanBeReadWithExtendedEnumSchema() throws Exception { @Test public void enumRecordWithExtendedSchemaCanBeReadWithOriginalEnumSchemaIfOnlyOldValues() throws Exception { Schema writer = ENUM_ABC_RECORD; - Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(writer, "A")); + Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(ENUM_ABC, "A")); byte[] encoded = encodeGenericBlob(record); Record decoded = decodeGenericBlob(ENUM_AB_RECORD, writer, encoded); assertEquals("A", decoded.get(FIELD_A).toString()); @@ -330,7 +335,7 @@ public void enumRecordWithExtendedSchemaCanNotBeReadIfNewValuesAreUsed() throws expectedException.expect(AvroTypeException.class); expectedException.expectMessage("No match for C"); Schema writer = ENUM_ABC_RECORD; - Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(writer, "C")); + Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(ENUM_ABC, "C")); byte[] encoded = encodeGenericBlob(record); decodeGenericBlob(ENUM_AB_RECORD, writer, encoded); } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java index 95cb367460e..e79faad2691 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java @@ -21,10 +21,14 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -395,4 +399,36 @@ public void testQualifiedName() { assertEquals("Int", nameInt.getQualified("space")); } + @Test + public void testContentAfterAvsc() throws Exception { + Schema.Parser parser = new Schema.Parser(); + parser.setValidate(true); + parser.setValidateDefaults(true); + assertThrows(SchemaParseException.class, () -> parser.parse("{\"type\": \"string\"}; DROP TABLE STUDENTS")); + } + + @Test + public void testContentAfterAvscInInputStream() throws Exception { + Schema.Parser parser = new Schema.Parser(); + parser.setValidate(true); + parser.setValidateDefaults(true); + String avsc = "{\"type\": \"string\"}; DROP TABLE STUDENTS"; + ByteArrayInputStream is = new ByteArrayInputStream(avsc.getBytes(StandardCharsets.UTF_8)); + Schema schema = parser.parse(is); + assertNotNull(schema); + } + + @Test + public void testContentAfterAvscInFile() throws Exception { + File avscFile = Files.createTempFile("testContentAfterAvscInFile", null).toFile(); + try (FileWriter writer = new FileWriter(avscFile)) { + writer.write("{\"type\": \"string\"}; DROP TABLE STUDENTS"); + writer.flush(); + } + + Schema.Parser parser = new Schema.Parser(); + parser.setValidate(true); + parser.setValidateDefaults(true); + assertThrows(SchemaParseException.class, () -> parser.parse(avscFile)); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCommons.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCommons.java new file mode 100644 index 00000000000..30f760b3053 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCommons.java @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.stream.Stream; + +import org.apache.avro.file.DataFileReader; +import org.apache.avro.file.DataFileWriter; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.DatumWriter; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestSchemaCommons { + private static final Logger LOG = LoggerFactory.getLogger(TestSchemaCommons.class); + + @ParameterizedTest + @MethodSource("sharedFolders") + void runFolder(final File folder) throws IOException { + final File schemaSource = new File(folder, "schema.json"); + final File data = new File(folder, "data.avro"); + + if (!schemaSource.exists()) { + LOG.warn("No 'schema.json' file on folder {}", folder.getPath()); + return; + } + final Schema schema = new Schema.Parser().parse(schemaSource); + Assertions.assertNotNull(schema); + + if (!data.exists()) { + LOG.warn("No 'data.avro' file on folder {}", folder.getPath()); + return; + } + + // output file + final String rootTest = Thread.currentThread().getContextClassLoader().getResource(".").getPath(); + final File copyData = new File(rootTest, "copy.avro"); + + // Deserialize from disk + DatumWriter datumWriter = new GenericDatumWriter<>(schema); + GenericDatumReader datumReader = new GenericDatumReader<>(schema); + try (DataFileReader dataFileReader = new DataFileReader<>(data, datumReader); + DataFileWriter dataFileWriter = new DataFileWriter<>(datumWriter)) { + dataFileWriter.create(schema, copyData); + GenericRecord record = null; + int counter = 0; + while (dataFileReader.hasNext()) { + record = dataFileReader.next(); + counter++; + Assertions.assertNotNull(record); + dataFileWriter.append(record); + } + Assertions.assertTrue(counter > 0, "no data in file"); + } + } + + public static Stream sharedFolders() { + File root = new File("../../../share/test/data/schemas"); + return Arrays.stream(root.listFiles(File::isDirectory)).map(Arguments::of); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSystemLimitException.java b/lang/java/avro/src/test/java/org/apache/avro/TestSystemLimitException.java new file mode 100644 index 00000000000..0da39179506 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSystemLimitException.java @@ -0,0 +1,164 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro; + +import static org.apache.avro.SystemLimitException.*; +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; + +import java.util.function.Function; + +public class TestSystemLimitException { + + /** Delegated here for package visibility. */ + public static final int MAX_ARRAY_VM_LIMIT = SystemLimitException.MAX_ARRAY_VM_LIMIT; + + public static final String ERROR_NEGATIVE = "Malformed data. Length is negative: -1"; + public static final String ERROR_VM_LIMIT_BYTES = "Cannot read arrays longer than " + MAX_ARRAY_VM_LIMIT + + " bytes in Java library"; + public static final String ERROR_VM_LIMIT_COLLECTION = "Cannot read collections larger than " + MAX_ARRAY_VM_LIMIT + + " items in Java library"; + public static final String ERROR_VM_LIMIT_STRING = "Cannot read strings longer than " + MAX_ARRAY_VM_LIMIT + " bytes"; + + /** Delegated here for package visibility. */ + public static void resetLimits() { + SystemLimitException.resetLimits(); + } + + @AfterEach + void reset() { + System.clearProperty(MAX_BYTES_LENGTH_PROPERTY); + System.clearProperty(MAX_COLLECTION_LENGTH_PROPERTY); + System.clearProperty(MAX_STRING_LENGTH_PROPERTY); + resetLimits(); + } + + /** + * A helper method that tests the consistent limit handling from system + * properties. + * + * @param f The function to be tested. + * @param sysProperty The system property used to control the custom limit. + * @param errorVmLimit The error message used when the property would be + * over the VM limit. + * @param errorCustomLimit The error message used when the property would be + * over the custom limit of 1000. + */ + void helpCheckSystemLimits(Function f, String sysProperty, String errorVmLimit, + String errorCustomLimit) { + // Correct values pass through + assertEquals(0, f.apply(0L)); + assertEquals(1024, f.apply(1024L)); + assertEquals(MAX_ARRAY_VM_LIMIT, f.apply((long) MAX_ARRAY_VM_LIMIT)); + + // Values that exceed the default system limits throw exceptions + Exception ex = assertThrows(UnsupportedOperationException.class, () -> f.apply(Long.MAX_VALUE)); + assertEquals(errorVmLimit, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, () -> f.apply((long) MAX_ARRAY_VM_LIMIT + 1)); + assertEquals(errorVmLimit, ex.getMessage()); + ex = assertThrows(AvroRuntimeException.class, () -> f.apply(-1L)); + assertEquals(ERROR_NEGATIVE, ex.getMessage()); + + // Setting the system property to provide a custom limit. + System.setProperty(sysProperty, Long.toString(1000L)); + resetLimits(); + + // Correct values pass through + assertEquals(0, f.apply(0L)); + assertEquals(102, f.apply(102L)); + + // Values that exceed the custom system limits throw exceptions + ex = assertThrows(UnsupportedOperationException.class, () -> f.apply((long) MAX_ARRAY_VM_LIMIT + 1)); + assertEquals(errorVmLimit, ex.getMessage()); + ex = assertThrows(SystemLimitException.class, () -> f.apply(1024L)); + assertEquals(errorCustomLimit, ex.getMessage()); + ex = assertThrows(AvroRuntimeException.class, () -> f.apply(-1L)); + assertEquals(ERROR_NEGATIVE, ex.getMessage()); + } + + @Test + void testCheckMaxBytesLength() { + helpCheckSystemLimits(SystemLimitException::checkMaxBytesLength, MAX_BYTES_LENGTH_PROPERTY, ERROR_VM_LIMIT_BYTES, + "Bytes length 1024 exceeds maximum allowed"); + } + + @Test + void testCheckMaxCollectionLengthFromZero() { + helpCheckSystemLimits(l -> checkMaxCollectionLength(0L, l), MAX_COLLECTION_LENGTH_PROPERTY, + ERROR_VM_LIMIT_COLLECTION, "Collection length 1024 exceeds maximum allowed"); + } + + @Test + void testCheckMaxStringLength() { + helpCheckSystemLimits(SystemLimitException::checkMaxStringLength, MAX_STRING_LENGTH_PROPERTY, ERROR_VM_LIMIT_STRING, + "String length 1024 exceeds maximum allowed"); + } + + @Test + void testCheckMaxCollectionLengthFromNonZero() { + // Correct values pass through + assertEquals(10, checkMaxCollectionLength(10L, 0L)); + assertEquals(MAX_ARRAY_VM_LIMIT, checkMaxCollectionLength(10L, MAX_ARRAY_VM_LIMIT - 10L)); + assertEquals(MAX_ARRAY_VM_LIMIT, checkMaxCollectionLength(MAX_ARRAY_VM_LIMIT - 10L, 10L)); + + // Values that exceed the default system limits throw exceptions + Exception ex = assertThrows(UnsupportedOperationException.class, + () -> checkMaxCollectionLength(10L, MAX_ARRAY_VM_LIMIT - 9L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, + () -> checkMaxCollectionLength(SystemLimitException.MAX_ARRAY_VM_LIMIT - 9L, 10L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(10L, Long.MAX_VALUE - 10L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(Long.MAX_VALUE - 10L, 10L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Overflow that adds to negative + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(10L, Long.MAX_VALUE)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(Long.MAX_VALUE, 10L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + ex = assertThrows(AvroRuntimeException.class, () -> checkMaxCollectionLength(10L, -1L)); + assertEquals(ERROR_NEGATIVE, ex.getMessage()); + ex = assertThrows(AvroRuntimeException.class, () -> checkMaxCollectionLength(-1L, 10L)); + assertEquals(ERROR_NEGATIVE, ex.getMessage()); + + // Setting the system property to provide a custom limit. + System.setProperty(MAX_COLLECTION_LENGTH_PROPERTY, Long.toString(1000L)); + resetLimits(); + + // Correct values pass through + assertEquals(10, checkMaxCollectionLength(10L, 0L)); + assertEquals(102, checkMaxCollectionLength(10L, 92L)); + assertEquals(102, checkMaxCollectionLength(92L, 10L)); + + // Values that exceed the custom system limits throw exceptions + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(MAX_ARRAY_VM_LIMIT, 1)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(1, MAX_ARRAY_VM_LIMIT)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + ex = assertThrows(SystemLimitException.class, () -> checkMaxCollectionLength(999, 25)); + assertEquals("Collection length 1024 exceeds maximum allowed", ex.getMessage()); + ex = assertThrows(SystemLimitException.class, () -> checkMaxCollectionLength(25, 999)); + assertEquals("Collection length 1024 exceeds maximum allowed", ex.getMessage()); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java index 21492d1b0ec..c764dcd7fe4 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java @@ -17,19 +17,23 @@ */ package org.apache.avro.generic; -import static org.apache.avro.TestCircularReferences.Reference; -import static org.apache.avro.TestCircularReferences.Referenceable; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.avro.AvroRuntimeException; +import org.apache.avro.Schema; +import org.apache.avro.Schema.Field; +import org.apache.avro.Schema.Type; +import org.apache.avro.SchemaBuilder; +import org.apache.avro.TestCircularReferences.ReferenceManager; +import org.apache.avro.generic.GenericData.Record; +import org.apache.avro.io.BinaryData; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.util.Utf8; +import org.junit.Test; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; @@ -47,18 +51,15 @@ import java.util.Map; import java.util.UUID; -import org.apache.avro.AvroRuntimeException; -import org.apache.avro.Schema; -import org.apache.avro.Schema.Field; -import org.apache.avro.Schema.Type; -import org.apache.avro.SchemaBuilder; -import org.apache.avro.TestCircularReferences.ReferenceManager; -import org.apache.avro.generic.GenericData.Record; -import org.apache.avro.io.BinaryData; -import org.apache.avro.io.BinaryEncoder; -import org.apache.avro.io.EncoderFactory; -import org.apache.avro.util.Utf8; -import org.junit.Test; +import static org.apache.avro.TestCircularReferences.Reference; +import static org.apache.avro.TestCircularReferences.Referenceable; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; public class TestGenericData { @@ -103,7 +104,7 @@ public void testGetEmptySchemaField() throws Exception { @Test(expected = AvroRuntimeException.class) public void testRecordPutInvalidField() throws Exception { Schema s = Schema.createRecord("schemaName", "schemaDoc", "namespace", false); - List fields = new ArrayList<>(); + List fields = new ArrayList<>(); fields.add(new Schema.Field("someFieldName", s, "docs", null)); s.setFields(fields); Record r = new GenericData.Record(s); @@ -145,6 +146,157 @@ public void testEquals() { assertEquals(r1, r2); } + @Test + public void testMapKeyEqualsStringAndUtf8Compatibility() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + GenericRecord r0 = new GenericData.Record(schema); + GenericRecord r1 = new GenericData.Record(schema); + + HashMap pair1 = new HashMap<>(); + pair1.put("keyOne", "valueOne"); + r0.put("my_map", pair1); + + HashMap pair2 = new HashMap<>(); + pair2.put(new Utf8("keyOne"), "valueOne"); + r1.put("my_map", pair2); + + assertEquals(r0, r1); + assertEquals(r1, r0); + } + + @Test + public void testMapValuesEqualsStringAndUtf8Compatibility() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + GenericRecord r0 = new GenericData.Record(schema); + GenericRecord r1 = new GenericData.Record(schema); + + HashMap pair1 = new HashMap<>(); + pair1.put("keyOne", "valueOne"); + r0.put("my_map", pair1); + + HashMap pair2 = new HashMap<>(); + pair2.put("keyOne", new Utf8("valueOne")); + r1.put("my_map", pair2); + + assertEquals(r0, r1); + assertEquals(r1, r0); + } + + @Test + public void testEqualsEmptyMaps() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + r0.put("my_map", new HashMap<>()); + GenericRecord r1 = new GenericData.Record(schema); + r1.put("my_map", new HashMap<>()); + + assertEquals(r0, r1); + assertEquals(r1, r0); + } + + @Test + public void testEqualsEmptyMapAndNonEmptyMap() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + r0.put("my_map", new HashMap<>()); + GenericRecord r1 = new GenericData.Record(schema); + HashMap pair1 = new HashMap<>(); + pair1.put("keyOne", "valueOne"); + r1.put("my_map", pair1); + + assertNotEquals(r0, r1); + assertNotEquals(r1, r0); + } + + @Test + public void testEqualsMapAndSubset() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + HashMap m1 = new HashMap<>(); + m1.put("keyOne", "valueOne"); + m1.put("keyTwo", "valueTwo"); + r0.put("my_map", m1); + + GenericRecord r1 = new GenericData.Record(schema); + HashMap m2 = new HashMap<>(); + m2.put("keyOne", "valueOne"); + r1.put("my_map", m2); + + assertNotEquals(r0, r1); + assertNotEquals(r1, r0); + } + + @Test + public void testEqualsMapAndSameSizeMapWithDifferentKeys() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + HashMap m1 = new HashMap<>(); + m1.put("keyOne", "valueOne"); + r0.put("my_map", m1); + + GenericRecord r1 = new GenericData.Record(schema); + HashMap m2 = new HashMap<>(); + m2.put("keyTwo", "valueTwo"); + r1.put("my_map", m2); + + assertNotEquals(r0, r1); + assertNotEquals(r1, r0); + } + + @Test + public void testEqualsMapAndSameSizeMapWithDifferentValues() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + HashMap m1 = new HashMap<>(); + m1.put("keyOne", "valueOne"); + r0.put("my_map", m1); + + GenericRecord r1 = new GenericData.Record(schema); + HashMap m2 = new HashMap<>(); + m2.put("keyOne", "valueTwo"); + r1.put("my_map", m2); + + assertNotEquals(r0, r1); + assertNotEquals(r1, r0); + } + + @Test + public void testArrayValuesEqualsStringAndUtf8Compatibility() { + Field myArrayField = new Field("my_array", Schema.createArray(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myArrayField)); + GenericRecord r0 = new GenericData.Record(schema); + GenericRecord r1 = new GenericData.Record(schema); + + List array1 = Arrays.asList("valueOne"); + r0.put("my_array", array1); + + List array2 = Arrays.asList(new Utf8("valueOne")); + r1.put("my_array", array2); + + assertEquals(r0, r1); + assertEquals(r1, r0); + } + private Schema recordSchema() { List fields = new ArrayList<>(); fields.add(new Field("anArray", Schema.createArray(Schema.create(Type.STRING)), null, null)); @@ -230,8 +382,9 @@ public void testArrayAddAtLocation() { Schema schema = Schema.createArray(Schema.create(Schema.Type.INT)); GenericArray array = new GenericData.Array<>(6, schema); array.clear(); - for (int i = 0; i < 5; ++i) + for (int i = 0; i < 5; ++i) { array.add(i); + } assertEquals(5, array.size()); array.add(0, 6); assertEquals(Integer.valueOf(6), array.get(0)); @@ -260,8 +413,9 @@ public void testArrayRemove() { Schema schema = Schema.createArray(Schema.create(Schema.Type.INT)); GenericArray array = new GenericData.Array<>(10, schema); array.clear(); - for (int i = 0; i < 10; ++i) + for (int i = 0; i < 10; ++i) { array.add(i); + } assertEquals(10, array.size()); assertEquals(Integer.valueOf(0), array.get(0)); assertEquals(Integer.valueOf(9), array.get(9)); @@ -303,8 +457,9 @@ public void testArraySet() { Schema schema = Schema.createArray(Schema.create(Schema.Type.INT)); GenericArray array = new GenericData.Array<>(10, schema); array.clear(); - for (int i = 0; i < 10; ++i) + for (int i = 0; i < 10; ++i) { array.add(i); + } assertEquals(10, array.size()); assertEquals(Integer.valueOf(0), array.get(0)); assertEquals(Integer.valueOf(5), array.get(5)); @@ -551,7 +706,9 @@ public void testValidateNullableEnum() { private enum anEnum { ONE, TWO, THREE - }; + } + + ; @Test public void validateRequiresGenericSymbolForEnumSchema() { diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericLogicalTypes.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericLogicalTypes.java index 593f759f854..272673cb3bc 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericLogicalTypes.java +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericLogicalTypes.java @@ -18,9 +18,6 @@ package org.apache.avro.generic; -import static org.hamcrest.Matchers.is; -import static org.hamcrest.MatcherAssert.assertThat; - import java.io.File; import java.io.IOException; import java.math.BigDecimal; @@ -33,8 +30,10 @@ import java.util.Collections; import java.util.List; import java.util.UUID; + import org.apache.avro.Conversion; import org.apache.avro.Conversions; +import org.apache.avro.CustomType; import org.apache.avro.LogicalType; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; @@ -50,6 +49,11 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.is; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; + public class TestGenericLogicalTypes { @TempDir @@ -404,4 +408,53 @@ public void testWriteLocalTimestampMicros() throws IOException { Assertions.assertEquals(expected, read(GenericData.get().createDatumReader(timestampSchema), test), "Should read LocalDateTime as longs"); } + + @Test + public void testReadAutomaticallyRegisteredUri() throws IOException { + Schema stringSchema = Schema.create(Schema.Type.STRING); + GenericData.setStringType(stringSchema, GenericData.StringType.String); + LogicalType customType = LogicalTypes.getCustomRegisteredTypes().get("custom").fromSchema(stringSchema); + Schema customTypeSchema = customType.addToSchema(Schema.create(Schema.Type.STRING)); + + CustomType ct1 = new CustomType("foo"); + CustomType ct2 = new CustomType("bar"); + List expected = Arrays.asList(ct1, ct2); + + Conversion conversion = GENERIC.getConversionFor(customType); + + // use the conversion directly instead of relying on the write side + CharSequence ct1String = conversion.toCharSequence(ct1, stringSchema, customType); + CharSequence ct2String = conversion.toCharSequence(ct2, stringSchema, customType); + + File test = write(stringSchema, ct1String, ct2String); + assertEquals(expected, read(GENERIC.createDatumReader(customTypeSchema), test), + "Should convert string to CustomType"); + } + + @Test + public void testWriteAutomaticallyRegisteredUri() throws IOException { + Schema stringSchema = Schema.create(Schema.Type.STRING); + GenericData.setStringType(stringSchema, GenericData.StringType.String); + LogicalType customType = LogicalTypes.getCustomRegisteredTypes().get("custom").fromSchema(stringSchema); + Schema customTypeSchema = customType.addToSchema(Schema.create(Schema.Type.STRING)); + + CustomType ct1 = new CustomType("foo"); + CustomType ct2 = new CustomType("bar"); + + Conversion conversion = GENERIC.getConversionFor(customType); + + // use the conversion directly instead of relying on the write side + CharSequence ct1String = conversion.toCharSequence(ct1, stringSchema, customType); + CharSequence ct2String = conversion.toCharSequence(ct2, stringSchema, customType); + List expected = Arrays.asList(ct1String, ct2String); + + File test = write(GENERIC, customTypeSchema, ct1, ct2); + + // Note that this test still cannot read strings using the logical type + // schema, as all GenericData instances have the logical type and the + // conversions loaded. That's why this final assert is slightly different. + + assertEquals(expected, read(GenericData.get().createDatumReader(stringSchema), test), + "Should read CustomType as strings"); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java index edbcd0bcbe3..167cd724630 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java @@ -18,8 +18,10 @@ package org.apache.avro.io; -import org.junit.Assert; -import org.junit.Test; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.junit.jupiter.api.Test; public class TestBinaryData { @@ -29,13 +31,33 @@ public class TestBinaryData { * normal 8). When skipping it, the next byte should be 10. */ @Test - public void testSkipLong() { + void skipLong() { byte[] b = new byte[10]; BinaryData.encodeLong(Long.MAX_VALUE, b, 0); final int nextIndex = BinaryData.skipLong(b, 0); - Assert.assertEquals(nextIndex, 10); + assertEquals(nextIndex, 10); } + @Test + void testIntLongVleEquality() { + byte[] intResult = new byte[9]; + byte[] longResult = new byte[9]; + BinaryData.encodeInt(0, intResult, 0); + BinaryData.encodeLong(0, longResult, 0); + assertArrayEquals(intResult, longResult); + BinaryData.encodeInt(42, intResult, 0); + BinaryData.encodeLong(42, longResult, 0); + assertArrayEquals(intResult, longResult); + BinaryData.encodeInt(-24, intResult, 0); + BinaryData.encodeLong(-24, longResult, 0); + assertArrayEquals(intResult, longResult); + BinaryData.encodeInt(Integer.MAX_VALUE, intResult, 0); + BinaryData.encodeLong(Integer.MAX_VALUE, longResult, 0); + assertArrayEquals(intResult, longResult); + BinaryData.encodeInt(Integer.MIN_VALUE, intResult, 0); + BinaryData.encodeLong(Integer.MIN_VALUE, longResult, 0); + assertArrayEquals(intResult, longResult); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java index fe405cfb9d2..6010fc9c69f 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java @@ -17,56 +17,49 @@ */ package org.apache.avro.io; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.EOFException; -import java.io.IOException; -import java.io.InputStream; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; import org.apache.avro.AvroRuntimeException; import org.apache.avro.Schema; +import org.apache.avro.SystemLimitException; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.util.ByteBufferInputStream; import org.apache.avro.util.ByteBufferOutputStream; import org.apache.avro.util.RandomData; import org.apache.avro.util.Utf8; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(Parameterized.class) + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; + +import static org.apache.avro.TestSystemLimitException.*; + public class TestBinaryDecoder { // prime number buffer size so that looping tests hit the buffer edge // at different points in the loop. DecoderFactory factory = new DecoderFactory().configureDecoderBufferSize(521); - private boolean useDirect = false; - static EncoderFactory e_factory = EncoderFactory.get(); - public TestBinaryDecoder(boolean useDirect) { - this.useDirect = useDirect; - } - - @Parameters - public static Collection data() { - return Arrays.asList(new Object[][] { { true }, { false }, }); - } + static EncoderFactory e_factory = EncoderFactory.get(); - private Decoder newDecoderWithNoData() { - return newDecoder(new byte[0]); + private Decoder newDecoderWithNoData(boolean useDirect) { + return newDecoder(new byte[0], useDirect); } - private BinaryDecoder newDecoder(byte[] bytes, int start, int len) { - return this.newDecoder(bytes, start, len, null); + private BinaryDecoder newDecoder(byte[] bytes, int start, int len, boolean useDirect) { + return this.newDecoder(bytes, start, len, null, useDirect); } - private BinaryDecoder newDecoder(byte[] bytes, int start, int len, BinaryDecoder reuse) { + private BinaryDecoder newDecoder(byte[] bytes, int start, int len, BinaryDecoder reuse, boolean useDirect) { if (useDirect) { final ByteArrayInputStream input = new ByteArrayInputStream(bytes, start, len); return factory.directBinaryDecoder(input, reuse); @@ -75,11 +68,11 @@ private BinaryDecoder newDecoder(byte[] bytes, int start, int len, BinaryDecoder } } - private BinaryDecoder newDecoder(InputStream in) { - return this.newDecoder(in, null); + private BinaryDecoder newDecoder(InputStream in, boolean useDirect) { + return this.newDecoder(in, null, useDirect); } - private BinaryDecoder newDecoder(InputStream in, BinaryDecoder reuse) { + private BinaryDecoder newDecoder(InputStream in, BinaryDecoder reuse, boolean useDirect) { if (useDirect) { return factory.directBinaryDecoder(in, reuse); } else { @@ -87,67 +80,93 @@ private BinaryDecoder newDecoder(InputStream in, BinaryDecoder reuse) { } } - private BinaryDecoder newDecoder(byte[] bytes, BinaryDecoder reuse) { - if (this.useDirect) { + private BinaryDecoder newDecoder(byte[] bytes, BinaryDecoder reuse, boolean useDirect) { + if (useDirect) { return this.factory.directBinaryDecoder(new ByteArrayInputStream(bytes), reuse); } else { return factory.binaryDecoder(bytes, reuse); } } - private BinaryDecoder newDecoder(byte[] bytes) { - return this.newDecoder(bytes, null); + private BinaryDecoder newDecoder(byte[] bytes, boolean useDirect) { + return this.newDecoder(bytes, null, useDirect); + } + + /** + * Create a decoder for simulating reading corrupt, unexpected or out-of-bounds + * data. + * + * @return a {@link org.apache.avro.io.BinaryDecoder that has been initialized + * on a byte array containing the sequence of encoded longs in order. + */ + private BinaryDecoder newDecoder(boolean useDirect, long... values) throws IOException { + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(baos, null); + for (long v : values) + encoder.writeLong(v); + encoder.flush(); + return newDecoder(baos.toByteArray(), useDirect); + } } /** Verify EOFException throw at EOF */ - @Test(expected = EOFException.class) - public void testEOFBoolean() throws IOException { - newDecoderWithNoData().readBoolean(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofBoolean(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readBoolean()); } - @Test(expected = EOFException.class) - public void testEOFInt() throws IOException { - newDecoderWithNoData().readInt(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofInt(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readInt()); } - @Test(expected = EOFException.class) - public void testEOFLong() throws IOException { - newDecoderWithNoData().readLong(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofLong(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readLong()); } - @Test(expected = EOFException.class) - public void testEOFFloat() throws IOException { - newDecoderWithNoData().readFloat(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofFloat(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readFloat()); } - @Test(expected = EOFException.class) - public void testEOFDouble() throws IOException { - newDecoderWithNoData().readDouble(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofDouble(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readDouble()); } - @Test(expected = EOFException.class) - public void testEOFBytes() throws IOException { - newDecoderWithNoData().readBytes(null); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofBytes(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readBytes(null)); } - @Test(expected = EOFException.class) - public void testEOFString() throws IOException { - newDecoderWithNoData().readString(new Utf8("a")); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofString(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readString(new Utf8("a"))); } - @Test(expected = EOFException.class) - public void testEOFFixed() throws IOException { - newDecoderWithNoData().readFixed(new byte[1]); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofFixed(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readFixed(new byte[1])); } - @Test(expected = EOFException.class) - public void testEOFEnum() throws IOException { - newDecoderWithNoData().readEnum(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofEnum(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readEnum()); } @Test - public void testReuse() throws IOException { + void reuse() throws IOException { ByteBufferOutputStream bbo1 = new ByteBufferOutputStream(); ByteBufferOutputStream bbo2 = new ByteBufferOutputStream(); byte[] b1 = new byte[] { 1, 2 }; @@ -162,11 +181,11 @@ public void testReuse() throws IOException { DirectBinaryDecoder d = new DirectBinaryDecoder(new ByteBufferInputStream(bbo1.getBufferList())); ByteBuffer bb1 = d.readBytes(null); - Assert.assertEquals(b1.length, bb1.limit() - bb1.position()); + Assertions.assertEquals(b1.length, bb1.limit() - bb1.position()); d.configure(new ByteBufferInputStream(bbo2.getBufferList())); ByteBuffer bb2 = d.readBytes(null); - Assert.assertEquals(b1.length, bb2.limit() - bb2.position()); + Assertions.assertEquals(b1.length, bb2.limit() - bb2.position()); } @@ -175,7 +194,7 @@ public void testReuse() throws IOException { private static final int count = 200; private static final ArrayList records = new ArrayList<>(count); - @BeforeClass + @BeforeAll public static void generateData() throws IOException { int seed = (int) System.currentTimeMillis(); // note some tests (testSkipping) rely on this explicitly @@ -199,8 +218,9 @@ public static void generateData() throws IOException { data = baos.toByteArray(); } - @Test - public void testDecodeFromSources() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void decodeFromSources(boolean useDirect) throws IOException { GenericDatumReader reader = new GenericDatumReader<>(); reader.setSchema(schema); @@ -208,81 +228,82 @@ public void testDecodeFromSources() throws IOException { ByteArrayInputStream is2 = new ByteArrayInputStream(data); ByteArrayInputStream is3 = new ByteArrayInputStream(data); - Decoder fromInputStream = newDecoder(is); - Decoder fromArray = newDecoder(data); + Decoder fromInputStream = newDecoder(is, useDirect); + Decoder fromArray = newDecoder(data, useDirect); byte[] data2 = new byte[data.length + 30]; Arrays.fill(data2, (byte) 0xff); System.arraycopy(data, 0, data2, 15, data.length); - Decoder fromOffsetArray = newDecoder(data2, 15, data.length); + Decoder fromOffsetArray = newDecoder(data2, 15, data.length, useDirect); - BinaryDecoder initOnInputStream = newDecoder(new byte[50], 0, 30); - initOnInputStream = newDecoder(is2, initOnInputStream); - BinaryDecoder initOnArray = this.newDecoder(is3, null); - initOnArray = this.newDecoder(data, initOnArray); + BinaryDecoder initOnInputStream = newDecoder(new byte[50], 0, 30, useDirect); + initOnInputStream = newDecoder(is2, initOnInputStream, useDirect); + BinaryDecoder initOnArray = this.newDecoder(is3, null, useDirect); + initOnArray = this.newDecoder(data, initOnArray, useDirect); for (Object datum : records) { - Assert.assertEquals("InputStream based BinaryDecoder result does not match", datum, - reader.read(null, fromInputStream)); - Assert.assertEquals("Array based BinaryDecoder result does not match", datum, reader.read(null, fromArray)); - Assert.assertEquals("offset Array based BinaryDecoder result does not match", datum, - reader.read(null, fromOffsetArray)); - Assert.assertEquals("InputStream initialized BinaryDecoder result does not match", datum, - reader.read(null, initOnInputStream)); - Assert.assertEquals("Array initialized BinaryDecoder result does not match", datum, - reader.read(null, initOnArray)); + Assertions.assertEquals(datum, reader.read(null, fromInputStream), + "InputStream based BinaryDecoder result does not match"); + Assertions.assertEquals(datum, reader.read(null, fromArray), "Array based BinaryDecoder result does not match"); + Assertions.assertEquals(datum, reader.read(null, fromOffsetArray), + "offset Array based BinaryDecoder result does not match"); + Assertions.assertEquals(datum, reader.read(null, initOnInputStream), + "InputStream initialized BinaryDecoder result does not match"); + Assertions.assertEquals(datum, reader.read(null, initOnArray), + "Array initialized BinaryDecoder result does not match"); } } - @Test - public void testInputStreamProxy() throws IOException { - BinaryDecoder d = newDecoder(data); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void inputStreamProxy(boolean useDirect) throws IOException { + BinaryDecoder d = newDecoder(data, useDirect); if (d != null) { BinaryDecoder bd = d; InputStream test = bd.inputStream(); InputStream check = new ByteArrayInputStream(data); validateInputStreamReads(test, check); - bd = this.newDecoder(data, bd); + bd = this.newDecoder(data, bd, useDirect); test = bd.inputStream(); check = new ByteArrayInputStream(data); validateInputStreamSkips(test, check); // with input stream sources - bd = newDecoder(new ByteArrayInputStream(data), bd); + bd = newDecoder(new ByteArrayInputStream(data), bd, useDirect); test = bd.inputStream(); check = new ByteArrayInputStream(data); validateInputStreamReads(test, check); - bd = newDecoder(new ByteArrayInputStream(data), bd); + bd = newDecoder(new ByteArrayInputStream(data), bd, useDirect); test = bd.inputStream(); check = new ByteArrayInputStream(data); validateInputStreamSkips(test, check); } } - @Test - public void testInputStreamProxyDetached() throws IOException { - Decoder d = newDecoder(data); - if (d instanceof BinaryDecoder) { - BinaryDecoder bd = (BinaryDecoder) d; - InputStream test = bd.inputStream(); - InputStream check = new ByteArrayInputStream(data); - // detach input stream and decoder from old source - this.newDecoder(new byte[56]); - try (InputStream bad = bd.inputStream(); InputStream check2 = new ByteArrayInputStream(data)) { - validateInputStreamReads(test, check); - Assert.assertNotEquals(bad.read(), check2.read()); - } + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void inputStreamProxyDetached(boolean useDirect) throws IOException { + BinaryDecoder bd = newDecoder(data, useDirect); + + InputStream test = bd.inputStream(); + InputStream check = new ByteArrayInputStream(data); + // detach input stream and decoder from old source + this.newDecoder(new byte[56], useDirect); + try (InputStream bad = bd.inputStream(); InputStream check2 = new ByteArrayInputStream(data)) { + validateInputStreamReads(test, check); + Assertions.assertNotEquals(bad.read(), check2.read()); } } - @Test - public void testInputStreamPartiallyUsed() throws IOException { - BinaryDecoder bd = this.newDecoder(new ByteArrayInputStream(data)); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void inputStreamPartiallyUsed(boolean useDirect) throws IOException { + BinaryDecoder bd = this.newDecoder(new ByteArrayInputStream(data), useDirect); InputStream test = bd.inputStream(); InputStream check = new ByteArrayInputStream(data); // triggers buffer fill if unused and tests isEnd() try { - Assert.assertFalse(bd.isEnd()); + Assertions.assertFalse(bd.isEnd()); } catch (UnsupportedOperationException e) { // this is ok if its a DirectBinaryDecoder. if (bd.getClass() != DirectBinaryDecoder.class) { @@ -300,25 +321,28 @@ private void validateInputStreamReads(InputStream test, InputStream check) throw while (true) { int t = test.read(); int c = check.read(); - Assert.assertEquals(c, t); - if (-1 == t) + Assertions.assertEquals(c, t); + if (-1 == t) { break; + } t = test.read(bt); c = check.read(bc); - Assert.assertEquals(c, t); - Assert.assertArrayEquals(bt, bc); - if (-1 == t) + Assertions.assertEquals(c, t); + Assertions.assertArrayEquals(bt, bc); + if (-1 == t) { break; + } t = test.read(bt, 1, 4); c = check.read(bc, 1, 4); - Assert.assertEquals(c, t); - Assert.assertArrayEquals(bt, bc); - if (-1 == t) + Assertions.assertEquals(c, t); + Assertions.assertArrayEquals(bt, bc); + if (-1 == t) { break; + } } - Assert.assertEquals(0, test.skip(5)); - Assert.assertEquals(0, test.available()); - Assert.assertFalse(test.getClass() != ByteArrayInputStream.class && test.markSupported()); + Assertions.assertEquals(0, test.skip(5)); + Assertions.assertEquals(0, test.available()); + Assertions.assertFalse(test.getClass() != ByteArrayInputStream.class && test.markSupported()); test.close(); } @@ -326,154 +350,300 @@ private void validateInputStreamSkips(InputStream test, InputStream check) throw while (true) { long t2 = test.skip(19); long c2 = check.skip(19); - Assert.assertEquals(c2, t2); - if (0 == t2) + Assertions.assertEquals(c2, t2); + if (0 == t2) { break; + } } - Assert.assertEquals(-1, test.read()); + Assertions.assertEquals(-1, test.read()); } - @Test - public void testBadIntEncoding() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void badIntEncoding(boolean useDirect) throws IOException { byte[] badint = new byte[5]; Arrays.fill(badint, (byte) 0xff); - Decoder bd = this.newDecoder(badint); + Decoder bd = this.newDecoder(badint, useDirect); String message = ""; try { bd.readInt(); } catch (IOException ioe) { message = ioe.getMessage(); } - Assert.assertEquals("Invalid int encoding", message); + Assertions.assertEquals("Invalid int encoding", message); } - @Test - public void testBadLongEncoding() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void badLongEncoding(boolean useDirect) throws IOException { byte[] badint = new byte[10]; Arrays.fill(badint, (byte) 0xff); - Decoder bd = this.newDecoder(badint); + Decoder bd = this.newDecoder(badint, useDirect); String message = ""; try { bd.readLong(); } catch (IOException ioe) { message = ioe.getMessage(); } - Assert.assertEquals("Invalid long encoding", message); + Assertions.assertEquals("Invalid long encoding", message); } - @Test - public void testNegativeStringLength() throws IOException { - byte[] bad = new byte[] { (byte) 1 }; - Decoder bd = this.newDecoder(bad); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testStringNegativeLength(boolean useDirect) throws IOException { + Exception ex = Assertions.assertThrows(AvroRuntimeException.class, this.newDecoder(useDirect, -1L)::readString); + Assertions.assertEquals(ERROR_NEGATIVE, ex.getMessage()); + } - Assert.assertThrows("Malformed data. Length is negative: -1", AvroRuntimeException.class, bd::readString); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testStringVmMaxSize(boolean useDirect) throws IOException { + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1L)::readString); + Assertions.assertEquals(ERROR_VM_LIMIT_STRING, ex.getMessage()); } - @Test - public void testStringMaxArraySize() throws IOException { - byte[] bad = new byte[10]; - BinaryData.encodeLong(BinaryDecoder.MAX_ARRAY_SIZE + 1, bad, 0); - Decoder bd = this.newDecoder(bad); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testStringMaxCustom(boolean useDirect) throws IOException { + try { + System.setProperty(SystemLimitException.MAX_STRING_LENGTH_PROPERTY, Long.toString(128)); + resetLimits(); + Exception ex = Assertions.assertThrows(SystemLimitException.class, newDecoder(useDirect, 129)::readString); + Assertions.assertEquals("String length 129 exceeds maximum allowed", ex.getMessage()); + } finally { + System.clearProperty(SystemLimitException.MAX_STRING_LENGTH_PROPERTY); + resetLimits(); + } + } - Assert.assertThrows("Cannot read strings longer than " + BinaryDecoder.MAX_ARRAY_SIZE + " bytes", - UnsupportedOperationException.class, bd::readString); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testBytesNegativeLength(boolean useDirect) throws IOException { + Exception ex = Assertions.assertThrows(AvroRuntimeException.class, + () -> this.newDecoder(useDirect, -1).readBytes(null)); + Assertions.assertEquals(ERROR_NEGATIVE, ex.getMessage()); } - @Test - public void testNegativeBytesLength() throws IOException { - byte[] bad = new byte[] { (byte) 1 }; - Decoder bd = this.newDecoder(bad); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testBytesVmMaxSize(boolean useDirect) throws IOException { + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readBytes(null)); + Assertions.assertEquals(ERROR_VM_LIMIT_BYTES, ex.getMessage()); + } - Assert.assertThrows("Malformed data. Length is negative: -1", AvroRuntimeException.class, () -> bd.readBytes(null)); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testBytesMaxCustom(boolean useDirect) throws IOException { + try { + System.setProperty(SystemLimitException.MAX_BYTES_LENGTH_PROPERTY, Long.toString(128)); + resetLimits(); + Exception ex = Assertions.assertThrows(SystemLimitException.class, + () -> newDecoder(useDirect, 129).readBytes(null)); + Assertions.assertEquals("Bytes length 129 exceeds maximum allowed", ex.getMessage()); + } finally { + System.clearProperty(SystemLimitException.MAX_BYTES_LENGTH_PROPERTY); + resetLimits(); + } } - @Test - public void testBytesMaxArraySize() { - byte[] bad = new byte[10]; - BinaryData.encodeLong(BinaryDecoder.MAX_ARRAY_SIZE + 1, bad, 0); - Decoder bd = this.newDecoder(bad); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testArrayVmMaxSize(boolean useDirect) throws IOException { + // At start + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readArrayStart()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Next + ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).arrayNext()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // An OK reads followed by an overflow + Decoder bd = newDecoder(useDirect, MAX_ARRAY_VM_LIMIT - 100, Long.MAX_VALUE); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readArrayStart()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::arrayNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the VM limit. + bd = newDecoder(useDirect, MAX_ARRAY_VM_LIMIT - 100, 100, 1); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readArrayStart()); + Assertions.assertEquals(100, bd.arrayNext()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::arrayNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the VM limit, where negative numbers are + // followed by the byte length of the items. For testing, the 999 values are + // read but ignored. + bd = newDecoder(useDirect, 100 - MAX_ARRAY_VM_LIMIT, 999, -100, 999, 1); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readArrayStart()); + Assertions.assertEquals(100, bd.arrayNext()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::arrayNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + } + + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testArrayMaxCustom(boolean useDirect) throws IOException { + try { + System.setProperty(SystemLimitException.MAX_COLLECTION_LENGTH_PROPERTY, Long.toString(128)); + resetLimits(); + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readArrayStart()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the custom limit. + Decoder bd = newDecoder(useDirect, 118, 10, 1); + Assertions.assertEquals(118, bd.readArrayStart()); + Assertions.assertEquals(10, bd.arrayNext()); + ex = Assertions.assertThrows(SystemLimitException.class, bd::arrayNext); + Assertions.assertEquals("Collection length 129 exceeds maximum allowed", ex.getMessage()); + + // Two OK reads followed by going over the VM limit, where negative numbers are + // followed by the byte length of the items. For testing, the 999 values are + // read but ignored. + bd = newDecoder(useDirect, -118, 999, -10, 999, 1); + Assertions.assertEquals(118, bd.readArrayStart()); + Assertions.assertEquals(10, bd.arrayNext()); + ex = Assertions.assertThrows(SystemLimitException.class, bd::arrayNext); + Assertions.assertEquals("Collection length 129 exceeds maximum allowed", ex.getMessage()); - Assert.assertThrows("Cannot read arrays longer than " + BinaryDecoder.MAX_ARRAY_SIZE + " bytes", - UnsupportedOperationException.class, () -> bd.readBytes(null)); + } finally { + System.clearProperty(SystemLimitException.MAX_COLLECTION_LENGTH_PROPERTY); + resetLimits(); + } } - @Test - public void testBytesMaxLengthProperty() { - int maxLength = 128; - byte[] bad = new byte[10]; - BinaryData.encodeLong(maxLength + 1, bad, 0); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testMapVmMaxSize(boolean useDirect) throws IOException { + // At start + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readMapStart()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Next + ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).mapNext()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the VM limit. + Decoder bd = newDecoder(useDirect, MAX_ARRAY_VM_LIMIT - 100, 100, 1); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readMapStart()); + Assertions.assertEquals(100, bd.mapNext()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::mapNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the VM limit, where negative numbers are + // followed by the byte length of the items. For testing, the 999 values are + // read but ignored. + bd = newDecoder(useDirect, 100 - MAX_ARRAY_VM_LIMIT, 999, -100, 999, 1); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readMapStart()); + Assertions.assertEquals(100, bd.mapNext()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::mapNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + } + + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testMapMaxCustom(boolean useDirect) throws IOException { try { - System.setProperty("org.apache.avro.limits.bytes.maxLength", Long.toString(maxLength)); - Decoder bd = this.newDecoder(bad); + System.setProperty(SystemLimitException.MAX_COLLECTION_LENGTH_PROPERTY, Long.toString(128)); + resetLimits(); + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readMapStart()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the custom limit. + Decoder bd = newDecoder(useDirect, 118, 10, 1); + Assertions.assertEquals(118, bd.readMapStart()); + Assertions.assertEquals(10, bd.mapNext()); + ex = Assertions.assertThrows(SystemLimitException.class, bd::mapNext); + Assertions.assertEquals("Collection length 129 exceeds maximum allowed", ex.getMessage()); + + // Two OK reads followed by going over the VM limit, where negative numbers are + // followed by the byte length of the items. For testing, the 999 values are + // read but ignored. + bd = newDecoder(useDirect, -118, 999, -10, 999, 1); + Assertions.assertEquals(118, bd.readMapStart()); + Assertions.assertEquals(10, bd.mapNext()); + ex = Assertions.assertThrows(SystemLimitException.class, bd::mapNext); + Assertions.assertEquals("Collection length 129 exceeds maximum allowed", ex.getMessage()); - Assert.assertThrows("Bytes length " + (maxLength + 1) + " exceeds maximum allowed", AvroRuntimeException.class, - () -> bd.readBytes(null)); } finally { - System.clearProperty("org.apache.avro.limits.bytes.maxLength"); + System.clearProperty(SystemLimitException.MAX_COLLECTION_LENGTH_PROPERTY); + resetLimits(); } } - @Test(expected = UnsupportedOperationException.class) - public void testLongLengthEncoding() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void longLengthEncoding(boolean useDirect) { // Size equivalent to Integer.MAX_VALUE + 1 byte[] bad = new byte[] { (byte) -128, (byte) -128, (byte) -128, (byte) -128, (byte) 16 }; - Decoder bd = this.newDecoder(bad); - bd.readString(); + Decoder bd = this.newDecoder(bad, useDirect); + Assertions.assertThrows(UnsupportedOperationException.class, bd::readString); } - @Test(expected = EOFException.class) - public void testIntTooShort() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void intTooShort(boolean useDirect) { byte[] badint = new byte[4]; Arrays.fill(badint, (byte) 0xff); - newDecoder(badint).readInt(); + Assertions.assertThrows(EOFException.class, () -> newDecoder(badint, useDirect).readInt()); } - @Test(expected = EOFException.class) - public void testLongTooShort() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void longTooShort(boolean useDirect) { byte[] badint = new byte[9]; Arrays.fill(badint, (byte) 0xff); - newDecoder(badint).readLong(); + Assertions.assertThrows(EOFException.class, () -> newDecoder(badint, useDirect).readLong()); } - @Test(expected = EOFException.class) - public void testFloatTooShort() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void floatTooShort(boolean useDirect) { byte[] badint = new byte[3]; Arrays.fill(badint, (byte) 0xff); - newDecoder(badint).readInt(); + Assertions.assertThrows(EOFException.class, () -> newDecoder(badint, useDirect).readInt()); } - @Test(expected = EOFException.class) - public void testDoubleTooShort() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void doubleTooShort(boolean useDirect) { byte[] badint = new byte[7]; Arrays.fill(badint, (byte) 0xff); - newDecoder(badint).readLong(); + Assertions.assertThrows(EOFException.class, () -> newDecoder(badint, useDirect).readLong()); } - @Test - public void testSkipping() throws IOException { - Decoder d = newDecoder(data); - skipGenerated(d); - if (d instanceof BinaryDecoder) { - BinaryDecoder bd = (BinaryDecoder) d; - try { - Assert.assertTrue(bd.isEnd()); - } catch (UnsupportedOperationException e) { - // this is ok if its a DirectBinaryDecoder. - if (bd.getClass() != DirectBinaryDecoder.class) { - throw e; - } + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void skipping(boolean useDirect) throws IOException { + BinaryDecoder bd = newDecoder(data, useDirect); + skipGenerated(bd); + + try { + Assertions.assertTrue(bd.isEnd()); + } catch (UnsupportedOperationException e) { + // this is ok if its a DirectBinaryDecoder. + if (bd.getClass() != DirectBinaryDecoder.class) { + throw e; } - bd = this.newDecoder(new ByteArrayInputStream(data), bd); - skipGenerated(bd); - try { - Assert.assertTrue(bd.isEnd()); - } catch (UnsupportedOperationException e) { - // this is ok if its a DirectBinaryDecoder. - if (bd.getClass() != DirectBinaryDecoder.class) { - throw e; - } + } + bd = this.newDecoder(new ByteArrayInputStream(data), bd, useDirect); + skipGenerated(bd); + try { + Assertions.assertTrue(bd.isEnd()); + } catch (UnsupportedOperationException e) { + // this is ok if its a DirectBinaryDecoder. + if (bd.getClass() != DirectBinaryDecoder.class) { + throw e; } } + } private void skipGenerated(Decoder bd) throws IOException { @@ -496,19 +666,20 @@ private void skipGenerated(Decoder bd) throws IOException { } catch (EOFException e) { eof = e; } - Assert.assertNotNull(eof); + Assertions.assertNotNull(eof); } - @Test(expected = EOFException.class) - public void testEOF() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eof(boolean useDirect) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); Encoder e = EncoderFactory.get().binaryEncoder(baos, null); e.writeLong(0x10000000000000L); e.flush(); - Decoder d = newDecoder(new ByteArrayInputStream(baos.toByteArray())); - Assert.assertEquals(0x10000000000000L, d.readLong()); - d.readInt(); + Decoder d = newDecoder(new ByteArrayInputStream(baos.toByteArray()), useDirect); + Assertions.assertEquals(0x10000000000000L, d.readLong()); + Assertions.assertThrows(EOFException.class, () -> d.readInt()); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java index 8e3dc8e53d7..abde027e23f 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java @@ -101,7 +101,7 @@ private static Object[][] dataForResolvingTests() { "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" + "{\"name\": \"g1\", " + "\"type\":{\"type\":\"record\",\"name\":\"inner\",\"fields\":[" + "{\"name\":\"f1\", \"type\":\"int\", \"default\": 101}," + "{\"name\":\"f2\", \"type\":\"int\"}]}}, " - + "{\"name\": \"g2\", \"type\": \"long\"}]}}", + + "{\"name\": \"g2\", \"type\": \"long\"}]}", "RRIIL", new Object[] { 10, 101, 11L } }, // Default value for a record. { "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" + "{\"name\": \"g2\", \"type\": \"long\"}]}", "L", diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java index 4eac760cec7..212bc3ad374 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java @@ -103,9 +103,9 @@ public void testDifferingEnumNamespaces() throws Exception { public static Collection data() { Collection ret = Arrays.asList(new Object[][] { { "{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + " { \"name\" : \"f1\", \"type\": \"int\" }, " - + " { \"name\" : \"f2\", \"type\": \"float\" } " + "] } }", "{ \"f2\": 10.4, \"f1\": 10 } " }, - { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", \"s2\"] } }", " \"s1\" " }, - { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", \"s2\"] } }", " \"s2\" " }, + + " { \"name\" : \"f2\", \"type\": \"float\" } " + "] }", "{ \"f2\": 10.4, \"f1\": 10 } " }, + { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", \"s2\"] }", " \"s1\" " }, + { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", \"s2\"] }", " \"s2\" " }, { "{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 10 }", "\"hello\"" }, { "{ \"type\": \"array\", \"items\": \"int\" }", "[ 10, 20, 30 ]" }, { "{ \"type\": \"map\", \"values\": \"int\" }", "{ \"k1\": 10, \"k3\": 20, \"k3\": 30 }" }, diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectLogicalTypes.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectLogicalTypes.java index c23a2f7369b..591c68fc07e 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectLogicalTypes.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectLogicalTypes.java @@ -764,6 +764,6 @@ public boolean equals(Object obj) { return false; } RecordWithTimestamps that = (RecordWithTimestamps) obj; - return Objects.equals(that.localDateTime, that.localDateTime); + return Objects.equals(localDateTime, that.localDateTime); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificData.java b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificData.java index 12d8ddbcc83..faa60bdc6e8 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificData.java +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificData.java @@ -174,4 +174,11 @@ public void testNonStringable() throws Exception { // Expected error } } + + @Test + public void classNameContainingReservedWords() { + final Schema schema = Schema.createRecord("AnyName", null, "db.public.table", false); + + assertEquals("db.public$.table.AnyName", SpecificData.getClassName(schema)); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java index 918465a725c..9c5ca545053 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java +++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java @@ -19,9 +19,7 @@ import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.is; -import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.*; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -30,7 +28,9 @@ import java.io.ObjectOutputStream; import java.nio.charset.StandardCharsets; -import org.junit.Test; +import org.apache.avro.SystemLimitException; +import org.apache.avro.TestSystemLimitException; +import org.junit.jupiter.api.Test; public class TestUtf8 { @Test @@ -98,6 +98,26 @@ public void testHashCodeReused() { assertEquals(3198781, u.hashCode()); } + @Test + void oversizeUtf8() { + Utf8 u = new Utf8(); + u.setByteLength(1024); + assertEquals(1024, u.getByteLength()); + assertThrows(UnsupportedOperationException.class, + () -> u.setByteLength(TestSystemLimitException.MAX_ARRAY_VM_LIMIT + 1)); + + try { + System.setProperty(SystemLimitException.MAX_STRING_LENGTH_PROPERTY, Long.toString(1000L)); + TestSystemLimitException.resetLimits(); + + Exception ex = assertThrows(SystemLimitException.class, () -> u.setByteLength(1024)); + assertEquals("String length 1024 exceeds maximum allowed", ex.getMessage()); + } finally { + System.clearProperty(SystemLimitException.MAX_STRING_LENGTH_PROPERTY); + TestSystemLimitException.resetLimits(); + } + } + @Test public void testSerialization() throws IOException, ClassNotFoundException { try (ByteArrayOutputStream bos = new ByteArrayOutputStream(); diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/springframework/ComparableComparator.java b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/ComparableComparator.java new file mode 100644 index 00000000000..54c887cc167 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/ComparableComparator.java @@ -0,0 +1,44 @@ +/* + * Copyright 2002-2018 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import java.util.Comparator; + +/** + * Comparator that adapts Comparables to the Comparator interface. Mainly for + * internal use in other Comparators, when supposed to work on Comparables. + * + * @author Keith Donald + * @since 1.2.2 + * @param the type of comparable objects that may be compared by this + * comparator + * @see Comparable + */ +class ComparableComparator> implements Comparator { + + /** + * A shared instance of this default comparator. see Comparators#comparable() + */ + @SuppressWarnings("rawtypes") + public static final ComparableComparator INSTANCE = new ComparableComparator(); + + @Override + public int compare(T o1, T o2) { + return o1.compareTo(o2); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/springframework/NullSafeComparator.java b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/NullSafeComparator.java new file mode 100644 index 00000000000..f621abfe42e --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/NullSafeComparator.java @@ -0,0 +1,132 @@ +/* + * Copyright 2002-2018 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; + +import java.util.Comparator; + +/** + * A Comparator that will safely compare nulls to be lower or higher than other + * objects. Can decorate a given Comparator or work on Comparables. + * + * @author Keith Donald + * @author Juergen Hoeller + * @since 1.2.2 + * @param the type of objects that may be compared by this comparator + * @see Comparable + */ +class NullSafeComparator implements Comparator { + + /** + * A shared default instance of this comparator, treating nulls lower than + * non-null objects. see Comparators#nullsLow() + */ + @SuppressWarnings("rawtypes") + public static final NullSafeComparator NULLS_LOW = new NullSafeComparator<>(true); + + /** + * A shared default instance of this comparator, treating nulls higher than + * non-null objects. see Comparators#nullsHigh() + */ + @SuppressWarnings("rawtypes") + public static final NullSafeComparator NULLS_HIGH = new NullSafeComparator<>(false); + + private final Comparator nonNullComparator; + + private final boolean nullsLow; + + /** + * Create a NullSafeComparator that sorts {@code null} based on the provided + * flag, working on Comparables. + *

+ * When comparing two non-null objects, their Comparable implementation will be + * used: this means that non-null elements (that this Comparator will be applied + * to) need to implement Comparable. + *

+ * As a convenience, you can use the default shared instances: + * {@code NullSafeComparator.NULLS_LOW} and + * {@code NullSafeComparator.NULLS_HIGH}. + * + * @param nullsLow whether to treat nulls lower or higher than non-null objects + * @see Comparable + * @see #NULLS_LOW + * @see #NULLS_HIGH + */ + @SuppressWarnings("unchecked") + private NullSafeComparator(boolean nullsLow) { + this.nonNullComparator = ComparableComparator.INSTANCE; + this.nullsLow = nullsLow; + } + + /** + * Create a NullSafeComparator that sorts {@code null} based on the provided + * flag, decorating the given Comparator. + *

+ * When comparing two non-null objects, the specified Comparator will be used. + * The given underlying Comparator must be able to handle the elements that this + * Comparator will be applied to. + * + * @param comparator the comparator to use when comparing two non-null objects + * @param nullsLow whether to treat nulls lower or higher than non-null + * objects + */ + public NullSafeComparator(Comparator comparator, boolean nullsLow) { + // Assert.notNull(comparator, "Non-null Comparator is required"); + this.nonNullComparator = comparator; + this.nullsLow = nullsLow; + } + + @Override + public int compare(@Nullable T o1, @Nullable T o2) { + if (o1 == o2) { + return 0; + } + if (o1 == null) { + return (this.nullsLow ? -1 : 1); + } + if (o2 == null) { + return (this.nullsLow ? 1 : -1); + } + return this.nonNullComparator.compare(o1, o2); + } + + @Override + @SuppressWarnings("unchecked") + public boolean equals(@Nullable Object other) { + if (this == other) { + return true; + } + if (!(other instanceof NullSafeComparator)) { + return false; + } + NullSafeComparator otherComp = (NullSafeComparator) other; + return (this.nonNullComparator.equals(otherComp.nonNullComparator) && this.nullsLow == otherComp.nullsLow); + } + + @Override + public int hashCode() { + return this.nonNullComparator.hashCode() * (this.nullsLow ? -1 : 1); + } + + @Override + public String toString() { + return "NullSafeComparator: non-null comparator [" + this.nonNullComparator + "]; " + + (this.nullsLow ? "nulls low" : "nulls high"); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/springframework/StopWatch.java b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/StopWatch.java new file mode 100644 index 00000000000..10131fa309a --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/StopWatch.java @@ -0,0 +1,415 @@ +/* + * Copyright 2002-2021 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; + +import java.text.NumberFormat; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; + +/** + * Simple stop watch, allowing for timing of a number of tasks, exposing total + * running time and running time for each named task. + * + *

+ * Conceals use of {@link System#nanoTime()}, improving the readability of + * application code and reducing the likelihood of calculation errors. + * + *

+ * Note that this object is not designed to be thread-safe and does not use + * synchronization. + * + *

+ * This class is normally used to verify performance during proof-of-concept + * work and in development, rather than as part of production applications. + * + *

+ * As of Spring Framework 5.2, running time is tracked and reported in + * nanoseconds. + * + * @author Rod Johnson + * @author Juergen Hoeller + * @author Sam Brannen + * @since May 2, 2001 + */ +class StopWatch { + + /** + * Identifier of this {@code StopWatch}. + *

+ * Handy when we have output from multiple stop watches and need to distinguish + * between them in log or console output. + */ + private final String id; + + private boolean keepTaskList = true; + + private final List taskList = new ArrayList<>(1); + + /** Start time of the current task. */ + private long startTimeNanos; + + /** Name of the current task. */ + @Nullable + private String currentTaskName; + + @Nullable + private TaskInfo lastTaskInfo; + + private int taskCount; + + /** Total running time. */ + private long totalTimeNanos; + + /** + * Construct a new {@code StopWatch}. + *

+ * Does not start any task. + */ + public StopWatch() { + this(""); + } + + /** + * Construct a new {@code StopWatch} with the given ID. + *

+ * The ID is handy when we have output from multiple stop watches and need to + * distinguish between them. + *

+ * Does not start any task. + * + * @param id identifier for this stop watch + */ + public StopWatch(String id) { + this.id = id; + } + + /** + * Get the ID of this {@code StopWatch}, as specified on construction. + * + * @return the ID (empty String by default) + * @since 4.2.2 + * @see #StopWatch(String) + */ + public String getId() { + return this.id; + } + + /** + * Configure whether the {@link TaskInfo} array is built over time. + *

+ * Set this to {@code false} when using a {@code StopWatch} for millions of + * intervals; otherwise, the {@code TaskInfo} structure will consume excessive + * memory. + *

+ * Default is {@code true}. + */ + public void setKeepTaskList(boolean keepTaskList) { + this.keepTaskList = keepTaskList; + } + + /** + * Start an unnamed task. + *

+ * The results are undefined if {@link #stop()} or timing methods are called + * without invoking this method first. + * + * @see #start(String) + * @see #stop() + */ + public void start() throws IllegalStateException { + start(""); + } + + /** + * Start a named task. + *

+ * The results are undefined if {@link #stop()} or timing methods are called + * without invoking this method first. + * + * @param taskName the name of the task to start + * @see #start() + * @see #stop() + */ + public void start(String taskName) throws IllegalStateException { + if (this.currentTaskName != null) { + throw new IllegalStateException("Can't start StopWatch: it's already running"); + } + this.currentTaskName = taskName; + this.startTimeNanos = System.nanoTime(); + } + + /** + * Stop the current task. + *

+ * The results are undefined if timing methods are called without invoking at + * least one pair of {@code start()} / {@code stop()} methods. + * + * @see #start() + * @see #start(String) + */ + public void stop() throws IllegalStateException { + if (this.currentTaskName == null) { + throw new IllegalStateException("Can't stop StopWatch: it's not running"); + } + long lastTime = System.nanoTime() - this.startTimeNanos; + this.totalTimeNanos += lastTime; + this.lastTaskInfo = new TaskInfo(this.currentTaskName, lastTime); + if (this.keepTaskList) { + this.taskList.add(this.lastTaskInfo); + } + ++this.taskCount; + this.currentTaskName = null; + } + + /** + * Determine whether this {@code StopWatch} is currently running. + * + * @see #currentTaskName() + */ + public boolean isRunning() { + return (this.currentTaskName != null); + } + + /** + * Get the name of the currently running task, if any. + * + * @since 4.2.2 + * @see #isRunning() + */ + @Nullable + public String currentTaskName() { + return this.currentTaskName; + } + + /** + * Get the time taken by the last task in nanoseconds. + * + * @since 5.2 + * @see #getLastTaskTimeMillis() + */ + public long getLastTaskTimeNanos() throws IllegalStateException { + if (this.lastTaskInfo == null) { + throw new IllegalStateException("No tasks run: can't get last task interval"); + } + return this.lastTaskInfo.getTimeNanos(); + } + + /** + * Get the time taken by the last task in milliseconds. + * + * @see #getLastTaskTimeNanos() + */ + public long getLastTaskTimeMillis() throws IllegalStateException { + if (this.lastTaskInfo == null) { + throw new IllegalStateException("No tasks run: can't get last task interval"); + } + return this.lastTaskInfo.getTimeMillis(); + } + + /** + * Get the name of the last task. + */ + public String getLastTaskName() throws IllegalStateException { + if (this.lastTaskInfo == null) { + throw new IllegalStateException("No tasks run: can't get last task name"); + } + return this.lastTaskInfo.getTaskName(); + } + + /** + * Get the last task as a {@link TaskInfo} object. + */ + public TaskInfo getLastTaskInfo() throws IllegalStateException { + if (this.lastTaskInfo == null) { + throw new IllegalStateException("No tasks run: can't get last task info"); + } + return this.lastTaskInfo; + } + + /** + * Get the total time in nanoseconds for all tasks. + * + * @since 5.2 + * @see #getTotalTimeMillis() + * @see #getTotalTimeSeconds() + */ + public long getTotalTimeNanos() { + return this.totalTimeNanos; + } + + /** + * Get the total time in milliseconds for all tasks. + * + * @see #getTotalTimeNanos() + * @see #getTotalTimeSeconds() + */ + public long getTotalTimeMillis() { + return nanosToMillis(this.totalTimeNanos); + } + + /** + * Get the total time in seconds for all tasks. + * + * @see #getTotalTimeNanos() + * @see #getTotalTimeMillis() + */ + public double getTotalTimeSeconds() { + return nanosToSeconds(this.totalTimeNanos); + } + + /** + * Get the number of tasks timed. + */ + public int getTaskCount() { + return this.taskCount; + } + + /** + * Get an array of the data for tasks performed. + */ + public TaskInfo[] getTaskInfo() { + if (!this.keepTaskList) { + throw new UnsupportedOperationException("Task info is not being kept!"); + } + return this.taskList.toArray(new TaskInfo[0]); + } + + /** + * Get a short description of the total running time. + */ + public String shortSummary() { + return "StopWatch '" + getId() + "': running time = " + getTotalTimeNanos() + " ns"; + } + + /** + * Generate a string with a table describing all tasks performed. + *

+ * For custom reporting, call {@link #getTaskInfo()} and use the task info + * directly. + */ + public String prettyPrint() { + StringBuilder sb = new StringBuilder(shortSummary()); + sb.append('\n'); + if (!this.keepTaskList) { + sb.append("No task info kept"); + } else { + sb.append("---------------------------------------------\n"); + sb.append("ns % Task name\n"); + sb.append("---------------------------------------------\n"); + NumberFormat nf = NumberFormat.getNumberInstance(); + nf.setMinimumIntegerDigits(9); + nf.setGroupingUsed(false); + NumberFormat pf = NumberFormat.getPercentInstance(); + pf.setMinimumIntegerDigits(3); + pf.setGroupingUsed(false); + for (TaskInfo task : getTaskInfo()) { + sb.append(nf.format(task.getTimeNanos())).append(" "); + sb.append(pf.format((double) task.getTimeNanos() / getTotalTimeNanos())).append(" "); + sb.append(task.getTaskName()).append('\n'); + } + } + return sb.toString(); + } + + /** + * Generate an informative string describing all tasks performed + *

+ * For custom reporting, call {@link #getTaskInfo()} and use the task info + * directly. + */ + @Override + public String toString() { + StringBuilder sb = new StringBuilder(shortSummary()); + if (this.keepTaskList) { + for (TaskInfo task : getTaskInfo()) { + sb.append("; [").append(task.getTaskName()).append("] took ").append(task.getTimeNanos()).append(" ns"); + long percent = Math.round(100.0 * task.getTimeNanos() / getTotalTimeNanos()); + sb.append(" = ").append(percent).append('%'); + } + } else { + sb.append("; no task info kept"); + } + return sb.toString(); + } + + private static long nanosToMillis(long duration) { + return TimeUnit.NANOSECONDS.toMillis(duration); + } + + private static double nanosToSeconds(long duration) { + return duration / 1_000_000_000.0; + } + + /** + * Nested class to hold data about one task executed within the + * {@code StopWatch}. + */ + public static final class TaskInfo { + + private final String taskName; + + private final long timeNanos; + + TaskInfo(String taskName, long timeNanos) { + this.taskName = taskName; + this.timeNanos = timeNanos; + } + + /** + * Get the name of this task. + */ + public String getTaskName() { + return this.taskName; + } + + /** + * Get the time in nanoseconds this task took. + * + * @since 5.2 + * @see #getTimeMillis() + * @see #getTimeSeconds() + */ + public long getTimeNanos() { + return this.timeNanos; + } + + /** + * Get the time in milliseconds this task took. + * + * @see #getTimeNanos() + * @see #getTimeSeconds() + */ + public long getTimeMillis() { + return nanosToMillis(this.timeNanos); + } + + /** + * Get the time in seconds this task took. + * + * @see #getTimeMillis() + * @see #getTimeNanos() + */ + public double getTimeSeconds() { + return nanosToSeconds(this.timeNanos); + } + + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/springframework/TestConcurrentReferenceHashMap.java b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/TestConcurrentReferenceHashMap.java new file mode 100644 index 00000000000..c351768864b --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/springframework/TestConcurrentReferenceHashMap.java @@ -0,0 +1,688 @@ +/* + * Copyright 2002-2021 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.util.springframework; + +import org.apache.avro.reflect.Nullable; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.apache.avro.util.springframework.ConcurrentReferenceHashMap.Entry; +import org.apache.avro.util.springframework.ConcurrentReferenceHashMap.Reference; +import org.apache.avro.util.springframework.ConcurrentReferenceHashMap.Restructure; + +import java.lang.ref.WeakReference; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.WeakHashMap; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Tests for {@link ConcurrentReferenceHashMap}. + * + * @author Phillip Webb + * @author Juergen Hoeller + */ +class TestConcurrentReferenceHashMap { + + private static final Comparator NULL_SAFE_STRING_SORT = new NullSafeComparator<>( + new ComparableComparator(), true); + + private TestWeakConcurrentCache map = new TestWeakConcurrentCache<>(); + + @Test + void shouldCreateWithDefaults() { + ConcurrentReferenceHashMap map = new ConcurrentReferenceHashMap<>(); + assertThat(map.getSegmentsSize(), equalTo(16)); + assertThat(map.getSegment(0).getSize(), equalTo(1)); + assertThat(map.getLoadFactor(), equalTo(0.75f)); + } + + @Test + void shouldCreateWithInitialCapacity() { + ConcurrentReferenceHashMap map = new ConcurrentReferenceHashMap<>(32); + assertThat(map.getSegmentsSize(), equalTo(16)); + assertThat(map.getSegment(0).getSize(), equalTo(2)); + assertThat(map.getLoadFactor(), equalTo(0.75f)); + } + + @Test + void shouldCreateWithInitialCapacityAndLoadFactor() { + ConcurrentReferenceHashMap map = new ConcurrentReferenceHashMap<>(32, 0.5f); + assertThat(map.getSegmentsSize(), equalTo(16)); + assertThat(map.getSegment(0).getSize(), equalTo(2)); + assertThat(map.getLoadFactor(), equalTo(0.5f)); + } + + @Test + void shouldCreateWithInitialCapacityAndConcurrentLevel() { + ConcurrentReferenceHashMap map = new ConcurrentReferenceHashMap<>(16, 2); + assertThat(map.getSegmentsSize(), equalTo(2)); + assertThat(map.getSegment(0).getSize(), equalTo(8)); + assertThat(map.getLoadFactor(), equalTo(0.75f)); + } + + @Test + void shouldCreateFullyCustom() { + ConcurrentReferenceHashMap map = new ConcurrentReferenceHashMap<>(5, 0.5f, 3); + // concurrencyLevel of 3 ends up as 4 (nearest power of 2) + assertThat(map.getSegmentsSize(), equalTo(4)); + // initialCapacity is 5/4 (rounded up, to nearest power of 2) + assertThat(map.getSegment(0).getSize(), equalTo(2)); + assertThat(map.getLoadFactor(), equalTo(0.5f)); + } + + @Test + void shouldNeedNonNegativeInitialCapacity() { + new ConcurrentReferenceHashMap(0, 1); + IllegalArgumentException e = assertThrows(IllegalArgumentException.class, + () -> new TestWeakConcurrentCache(-1, 1)); + assertTrue(e.getMessage().contains("Initial capacity must not be negative")); + } + + @Test + void shouldNeedPositiveLoadFactor() { + new ConcurrentReferenceHashMap(0, 0.1f, 1); + IllegalArgumentException e = assertThrows(IllegalArgumentException.class, + () -> new TestWeakConcurrentCache(0, 0.0f, 1)); + assertTrue(e.getMessage().contains("Load factor must be positive")); + } + + @Test + void shouldNeedPositiveConcurrencyLevel() { + new ConcurrentReferenceHashMap(1, 1); + IllegalArgumentException e = assertThrows(IllegalArgumentException.class, + () -> new TestWeakConcurrentCache(1, 0)); + assertTrue(e.getMessage().contains("Concurrency level must be positive")); + } + + @Test + void shouldPutAndGet() { + // NOTE we are using mock references so we don't need to worry about GC + assertEquals(0, this.map.size()); + this.map.put(123, "123"); + assertThat(this.map.get(123), equalTo("123")); + assertEquals(1, this.map.size()); + this.map.put(123, "123b"); + assertEquals(1, this.map.size()); + this.map.put(123, null); + assertEquals(1, this.map.size()); + } + + @Test + void shouldReplaceOnDoublePut() { + this.map.put(123, "321"); + this.map.put(123, "123"); + assertThat(this.map.get(123), equalTo("123")); + } + + @Test + void shouldPutNullKey() { + assertNull(this.map.get(null)); + assertThat(this.map.getOrDefault(null, "456"), equalTo("456")); + this.map.put(null, "123"); + assertThat(this.map.get(null), equalTo("123")); + assertThat(this.map.getOrDefault(null, "456"), equalTo("123")); + } + + @Test + void shouldPutNullValue() { + assertNull(this.map.get(123)); + assertThat(this.map.getOrDefault(123, "456"), equalTo("456")); + this.map.put(123, "321"); + assertThat(this.map.get(123), equalTo("321")); + assertThat(this.map.getOrDefault(123, "456"), equalTo("321")); + this.map.put(123, null); + assertNull(this.map.get(123)); + assertNull(this.map.getOrDefault(123, "456")); + } + + @Test + void shouldGetWithNoItems() { + assertNull(this.map.get(123)); + } + + @Test + void shouldApplySupplementalHash() { + Integer key = 123; + this.map.put(key, "123"); + assertNotEquals(this.map.getSupplementalHash(), key.hashCode()); + assertNotEquals(this.map.getSupplementalHash() >> 30 & 0xFF, 0); + } + + @Test + void shouldGetFollowingNexts() { + // Use loadFactor to disable resize + this.map = new TestWeakConcurrentCache<>(1, 10.0f, 1); + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + assertThat(this.map.getSegment(0).getSize(), equalTo(1)); + assertThat(this.map.get(1), equalTo("1")); + assertThat(this.map.get(2), equalTo("2")); + assertThat(this.map.get(3), equalTo("3")); + assertNull(this.map.get(4)); + } + + @Test + void shouldResize() { + this.map = new TestWeakConcurrentCache<>(1, 0.75f, 1); + this.map.put(1, "1"); + assertThat(this.map.getSegment(0).getSize(), equalTo(1)); + assertThat(this.map.get(1), equalTo("1")); + + this.map.put(2, "2"); + assertThat(this.map.getSegment(0).getSize(), equalTo(2)); + assertThat(this.map.get(1), equalTo("1")); + assertThat(this.map.get(2), equalTo("2")); + + this.map.put(3, "3"); + assertThat(this.map.getSegment(0).getSize(), equalTo(4)); + assertThat(this.map.get(1), equalTo("1")); + assertThat(this.map.get(2), equalTo("2")); + assertThat(this.map.get(3), equalTo("3")); + + this.map.put(4, "4"); + assertThat(this.map.getSegment(0).getSize(), equalTo(8)); + assertThat(this.map.get(4), equalTo("4")); + + // Putting again should not increase the count + for (int i = 1; i <= 5; i++) { + this.map.put(i, String.valueOf(i)); + } + assertThat(this.map.getSegment(0).getSize(), equalTo(8)); + assertThat(this.map.get(5), equalTo("5")); + } + + @Test + void shouldPurgeOnGet() { + this.map = new TestWeakConcurrentCache<>(1, 0.75f, 1); + for (int i = 1; i <= 5; i++) { + this.map.put(i, String.valueOf(i)); + } + this.map.getMockReference(1, Restructure.NEVER).queueForPurge(); + this.map.getMockReference(3, Restructure.NEVER).queueForPurge(); + assertNull(this.map.getReference(1, Restructure.WHEN_NECESSARY)); + assertThat(this.map.get(2), equalTo("2")); + assertNull(this.map.getReference(3, Restructure.WHEN_NECESSARY)); + assertThat(this.map.get(4), equalTo("4")); + assertThat(this.map.get(5), equalTo("5")); + } + + @Test + void shouldPurgeOnPut() { + this.map = new TestWeakConcurrentCache<>(1, 0.75f, 1); + for (int i = 1; i <= 5; i++) { + this.map.put(i, String.valueOf(i)); + } + this.map.getMockReference(1, Restructure.NEVER).queueForPurge(); + this.map.getMockReference(3, Restructure.NEVER).queueForPurge(); + this.map.put(1, "1"); + assertThat(this.map.get(1), equalTo("1")); + assertThat(this.map.get(2), equalTo("2")); + assertNull(this.map.getReference(3, Restructure.WHEN_NECESSARY)); + assertThat(this.map.get(4), equalTo("4")); + assertThat(this.map.get(5), equalTo("5")); + } + + @Test + void shouldPutIfAbsent() { + assertNull(this.map.putIfAbsent(123, "123")); + assertThat(this.map.putIfAbsent(123, "123b"), equalTo("123")); + assertThat(this.map.get(123), equalTo("123")); + } + + @Test + void shouldPutIfAbsentWithNullValue() { + assertNull(this.map.putIfAbsent(123, null)); + assertNull(this.map.putIfAbsent(123, "123")); + assertNull(this.map.get(123)); + } + + @Test + void shouldPutIfAbsentWithNullKey() { + assertNull(this.map.putIfAbsent(null, "123")); + assertThat(this.map.putIfAbsent(null, "123b"), equalTo("123")); + assertThat(this.map.get(null), equalTo("123")); + } + + @Test + void shouldRemoveKeyAndValue() { + this.map.put(123, "123"); + assertFalse(this.map.remove(123, "456")); + assertThat(this.map.get(123), equalTo("123")); + assertTrue(this.map.remove(123, "123")); + assertFalse(this.map.containsKey(123)); + assertTrue(this.map.isEmpty()); + } + + @Test + void shouldRemoveKeyAndValueWithExistingNull() { + this.map.put(123, null); + assertFalse(this.map.remove(123, "456")); + assertNull(this.map.get(123)); + assertTrue(this.map.remove(123, null)); + assertFalse(this.map.containsKey(123)); + assertTrue(this.map.isEmpty()); + } + + @Test + void shouldReplaceOldValueWithNewValue() { + this.map.put(123, "123"); + assertFalse(this.map.replace(123, "456", "789")); + assertThat(this.map.get(123), equalTo("123")); + assertTrue(this.map.replace(123, "123", "789")); + assertThat(this.map.get(123), equalTo("789")); + } + + @Test + void shouldReplaceOldNullValueWithNewValue() { + this.map.put(123, null); + assertFalse(this.map.replace(123, "456", "789")); + assertNull(this.map.get(123)); + assertTrue(this.map.replace(123, null, "789")); + assertThat(this.map.get(123), equalTo("789")); + } + + @Test + void shouldReplaceValue() { + this.map.put(123, "123"); + assertThat(this.map.replace(123, "456"), equalTo("123")); + assertThat(this.map.get(123), equalTo("456")); + } + + @Test + void shouldReplaceNullValue() { + this.map.put(123, null); + assertNull(this.map.replace(123, "456")); + assertThat(this.map.get(123), equalTo("456")); + } + + @Test + void shouldGetSize() { + assertEquals(0, this.map.size()); + this.map.put(123, "123"); + this.map.put(123, null); + this.map.put(456, "456"); + assertEquals(2, this.map.size()); + } + + @Test + void shouldSupportIsEmpty() { + assertTrue(this.map.isEmpty()); + this.map.put(123, "123"); + this.map.put(123, null); + this.map.put(456, "456"); + assertFalse(this.map.isEmpty()); + } + + @Test + void shouldContainKey() { + assertFalse(this.map.containsKey(123)); + assertFalse(this.map.containsKey(456)); + this.map.put(123, "123"); + this.map.put(456, null); + assertTrue(this.map.containsKey(123)); + assertTrue(this.map.containsKey(456)); + } + + @Test + void shouldContainValue() { + assertFalse(this.map.containsValue("123")); + assertFalse(this.map.containsValue(null)); + this.map.put(123, "123"); + this.map.put(456, null); + assertTrue(this.map.containsValue("123")); + assertTrue(this.map.containsValue(null)); + } + + @Test + void shouldRemoveWhenKeyIsInMap() { + this.map.put(123, null); + this.map.put(456, "456"); + this.map.put(null, "789"); + assertNull(this.map.remove(123)); + assertThat(this.map.remove(456), equalTo("456")); + assertThat(this.map.remove(null), equalTo("789")); + assertTrue(this.map.isEmpty()); + } + + @Test + void shouldRemoveWhenKeyIsNotInMap() { + assertNull(this.map.remove(123)); + assertNull(this.map.remove(null)); + assertTrue(this.map.isEmpty()); + } + + @Test + void shouldPutAll() { + Map m = new HashMap<>(); + m.put(123, "123"); + m.put(456, null); + m.put(null, "789"); + this.map.putAll(m); + assertEquals(3, this.map.size()); + assertThat(this.map.get(123), equalTo("123")); + assertNull(this.map.get(456)); + assertThat(this.map.get(null), equalTo("789")); + } + + @Test + void shouldClear() { + this.map.put(123, "123"); + this.map.put(456, null); + this.map.put(null, "789"); + this.map.clear(); + assertEquals(0, this.map.size()); + assertFalse(this.map.containsKey(123)); + assertFalse(this.map.containsKey(456)); + assertFalse(this.map.containsKey(null)); + } + + @Test + void shouldGetKeySet() { + this.map.put(123, "123"); + this.map.put(456, null); + this.map.put(null, "789"); + Set expected = new HashSet<>(); + expected.add(123); + expected.add(456); + expected.add(null); + assertThat(this.map.keySet(), equalTo(expected)); + } + + @Test + void shouldGetValues() { + this.map.put(123, "123"); + this.map.put(456, null); + this.map.put(null, "789"); + List actual = new ArrayList<>(this.map.values()); + List expected = new ArrayList<>(); + expected.add("123"); + expected.add(null); + expected.add("789"); + actual.sort(NULL_SAFE_STRING_SORT); + expected.sort(NULL_SAFE_STRING_SORT); + assertThat(actual, equalTo(expected)); + } + + @Test + void shouldGetEntrySet() { + this.map.put(123, "123"); + this.map.put(456, null); + this.map.put(null, "789"); + HashMap expected = new HashMap<>(); + expected.put(123, "123"); + expected.put(456, null); + expected.put(null, "789"); + assertThat(this.map.entrySet(), equalTo(expected.entrySet())); + } + + @Test + void shouldGetEntrySetFollowingNext() { + // Use loadFactor to disable resize + this.map = new TestWeakConcurrentCache<>(1, 10.0f, 1); + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + HashMap expected = new HashMap<>(); + expected.put(1, "1"); + expected.put(2, "2"); + expected.put(3, "3"); + assertThat(this.map.entrySet(), equalTo(expected.entrySet())); + } + + @Test + void shouldRemoveViaEntrySet() { + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + Iterator> iterator = this.map.entrySet().iterator(); + iterator.next(); + iterator.next(); + iterator.remove(); + assertThrows(IllegalStateException.class, iterator::remove); + iterator.next(); + assertFalse(iterator.hasNext()); + assertEquals(2, this.map.size()); + assertFalse(this.map.containsKey(2)); + } + + @Test + void shouldSetViaEntrySet() { + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + Iterator> iterator = this.map.entrySet().iterator(); + iterator.next(); + iterator.next().setValue("2b"); + iterator.next(); + assertFalse(iterator.hasNext()); + assertEquals(3, this.map.size()); + assertThat(this.map.get(2), equalTo("2b")); + } + + @Test + void containsViaEntrySet() { + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + Set> entrySet = this.map.entrySet(); + Set> copy = new HashMap<>(this.map).entrySet(); + copy.forEach(entry -> assertTrue(entrySet.contains(entry))); + this.map.put(1, "A"); + this.map.put(2, "B"); + this.map.put(3, "C"); + copy.forEach(entry -> assertFalse(entrySet.contains(entry))); + this.map.put(1, "1"); + this.map.put(2, "2"); + this.map.put(3, "3"); + copy.forEach(entry -> assertTrue(entrySet.contains(entry))); + entrySet.clear(); + copy.forEach(entry -> assertFalse(entrySet.contains(entry))); + } + + @Test + @Disabled("Intended for use during development only") + void shouldBeFasterThanSynchronizedMap() throws InterruptedException { + Map> synchronizedMap = Collections + .synchronizedMap(new WeakHashMap>()); + StopWatch mapTime = timeMultiThreaded("SynchronizedMap", synchronizedMap, + v -> new WeakReference<>(String.valueOf(v))); + System.out.println(mapTime.prettyPrint()); + + this.map.setDisableTestHooks(true); + StopWatch cacheTime = timeMultiThreaded("WeakConcurrentCache", this.map, String::valueOf); + System.out.println(cacheTime.prettyPrint()); + + // We should be at least 4 time faster + assertTrue(cacheTime.getTotalTimeSeconds() < (mapTime.getTotalTimeSeconds() / 4.0)); + } + + @Test + void shouldSupportNullReference() { + // GC could happen during restructure so we must be able to create a reference + // for a null entry + map.createReferenceManager().createReference(null, 1234, null); + } + + /** + * Time a multi-threaded access to a cache. + * + * @return the timing stopwatch + */ + private StopWatch timeMultiThreaded(String id, final Map map, ValueFactory factory) + throws InterruptedException { + + StopWatch stopWatch = new StopWatch(id); + for (int i = 0; i < 500; i++) { + map.put(i, factory.newValue(i)); + } + Thread[] threads = new Thread[30]; + stopWatch.start("Running threads"); + for (int threadIndex = 0; threadIndex < threads.length; threadIndex++) { + threads[threadIndex] = new Thread("Cache access thread " + threadIndex) { + @Override + public void run() { + for (int j = 0; j < 1000; j++) { + for (int i = 0; i < 1000; i++) { + map.get(i); + } + } + } + }; + } + for (Thread thread : threads) { + thread.start(); + } + + for (Thread thread : threads) { + if (thread.isAlive()) { + thread.join(2000); + } + } + stopWatch.stop(); + return stopWatch; + } + + private interface ValueFactory { + + V newValue(int k); + } + + private static class TestWeakConcurrentCache extends ConcurrentReferenceHashMap { + + private int supplementalHash; + + private final LinkedList> queue = new LinkedList<>(); + + private boolean disableTestHooks; + + public TestWeakConcurrentCache() { + super(); + } + + public void setDisableTestHooks(boolean disableTestHooks) { + this.disableTestHooks = disableTestHooks; + } + + public TestWeakConcurrentCache(int initialCapacity, float loadFactor, int concurrencyLevel) { + super(initialCapacity, loadFactor, concurrencyLevel); + } + + public TestWeakConcurrentCache(int initialCapacity, int concurrencyLevel) { + super(initialCapacity, concurrencyLevel); + } + + @Override + protected int getHash(@Nullable Object o) { + if (this.disableTestHooks) { + return super.getHash(o); + } + // For testing we want more control of the hash + this.supplementalHash = super.getHash(o); + return (o != null ? o.hashCode() : 0); + } + + public int getSupplementalHash() { + return this.supplementalHash; + } + + @Override + protected ReferenceManager createReferenceManager() { + return new ReferenceManager() { + @Override + public Reference createReference(Entry entry, int hash, @Nullable Reference next) { + if (TestWeakConcurrentCache.this.disableTestHooks) { + return super.createReference(entry, hash, next); + } + return new MockReference<>(entry, hash, next, TestWeakConcurrentCache.this.queue); + } + + @Override + public Reference pollForPurge() { + if (TestWeakConcurrentCache.this.disableTestHooks) { + return super.pollForPurge(); + } + return TestWeakConcurrentCache.this.queue.isEmpty() ? null : TestWeakConcurrentCache.this.queue.removeFirst(); + } + }; + } + + public MockReference getMockReference(K key, Restructure restructure) { + return (MockReference) super.getReference(key, restructure); + } + } + + private static class MockReference implements Reference { + + private final int hash; + + private Entry entry; + + private final Reference next; + + private final LinkedList> queue; + + public MockReference(Entry entry, int hash, Reference next, LinkedList> queue) { + this.hash = hash; + this.entry = entry; + this.next = next; + this.queue = queue; + } + + @Override + public Entry get() { + return this.entry; + } + + @Override + public int getHash() { + return this.hash; + } + + @Override + public Reference getNext() { + return this.next; + } + + @Override + public void release() { + this.queue.add(this); + this.entry = null; + } + + public void queueForPurge() { + this.queue.add(this); + } + } + +} diff --git a/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.Conversion b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.Conversion new file mode 100644 index 00000000000..890ba764260 --- /dev/null +++ b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.Conversion @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.avro.CustomTypeConverter diff --git a/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.LogicalTypes$LogicalTypeFactory b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.LogicalTypes$LogicalTypeFactory index e111a25c43f..b55c233ae46 100644 --- a/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.LogicalTypes$LogicalTypeFactory +++ b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.LogicalTypes$LogicalTypeFactory @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -org.apache.avro.DummyLogicalTypeFactory +org.apache.avro.CustomTypeLogicalTypeFactory diff --git a/lang/java/compiler/pom.xml b/lang/java/compiler/pom.xml index bca3e1a5ed9..247bb35a752 100644 --- a/lang/java/compiler/pom.xml +++ b/lang/java/compiler/pom.xml @@ -23,7 +23,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../pom.xml @@ -137,6 +137,7 @@ org.apache.avro.compiler.specific.SchemaTask ${project.basedir}/src/test/resources/full_record_v1.avsc ${project.basedir}/src/test/resources/full_record_v2.avsc + ${project.basedir}/src/test/resources/regression_error_field_in_record.avsc ${project.basedir}/target/generated-test-sources/javacc diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java index 6f46845bbdf..03e8761d8ce 100644 --- a/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java @@ -61,6 +61,7 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static org.apache.avro.specific.SpecificData.RESERVED_WORDS; +import static org.apache.avro.specific.SpecificData.RESERVED_WORD_ESCAPE_CHAR; /** * Generate specific Java interfaces and classes for protocols and schemas. @@ -943,19 +944,21 @@ public int getNonNullIndex(Schema s) { * record.vm can handle the schema being presented. */ public boolean isCustomCodable(Schema schema) { - if (schema.isError()) - return false; return isCustomCodable(schema, new HashSet<>()); } private boolean isCustomCodable(Schema schema, Set seen) { if (!seen.add(schema)) + // Recursive call: assume custom codable until a caller on the call stack proves + // otherwise. return true; if (schema.getLogicalType() != null) return false; boolean result = true; switch (schema.getType()) { case RECORD: + if (schema.isError()) + return false; for (Schema.Field f : schema.getFields()) result &= isCustomCodable(f.schema(), seen); break; @@ -1126,7 +1129,7 @@ public static String mangle(String word, Set reservedWords, boolean isMe } if (reservedWords.contains(word) || (isMethod && reservedWords .contains(Character.toLowerCase(word.charAt(0)) + ((word.length() > 1) ? word.substring(1) : "")))) { - return word + "$"; + return word + RESERVED_WORD_ESCAPE_CHAR; } return word; } diff --git a/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj b/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj index 1f931a640cd..4deaa68a5a7 100644 --- a/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj +++ b/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj @@ -174,7 +174,7 @@ public class Idl implements Closeable { File file = "file".equals(uri.getScheme()) ? new File(uri.getPath()) : null; URL result = null; if (file != null && file.exists()) - result = file.toURI().toURL(); + result = new URL("file:" + file.getPath()); else if (this.resourceLoader != null) if ("classpath".equals(uri.getScheme())) result = this.resourceLoader.getResource(uri.getPath().substring(1)); diff --git a/lang/java/compiler/src/test/idl/input/bar.avpr b/lang/java/compiler/src/test/idl/input/bar.avpr index 5e9b194a060..ea8b0d4befb 100644 --- a/lang/java/compiler/src/test/idl/input/bar.avpr +++ b/lang/java/compiler/src/test/idl/input/bar.avpr @@ -1,2 +1,3 @@ {"protocol": "org.foo.Bar", - "messages": { "bar": {"request": [], "response": "null"}}} + "types": [{"name": "AorB", "type": "enum", "symbols": ["A", "B"], "default": "A"}], + "messages": { "bar": {"request": [{"name": "choice", "type": "AorB"}],"response": "null"}}} diff --git a/lang/java/compiler/src/test/idl/output/import.avpr b/lang/java/compiler/src/test/idl/output/import.avpr index e6701ad94e1..b0093492d95 100644 --- a/lang/java/compiler/src/test/idl/output/import.avpr +++ b/lang/java/compiler/src/test/idl/output/import.avpr @@ -3,6 +3,11 @@ "namespace" : "org.foo", "doc" : "Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", "types" : [ { + "type" : "enum", + "name" : "AorB", + "symbols" : ["A", "B"], + "default" : "A" + }, { "type" : "enum", "name" : "Position", "namespace" : "avro.examples.baseball", @@ -111,7 +116,10 @@ "response" : "null" }, "bar" : { - "request" : [ ], + "request" : [ { + "name" : "choice", + "type" : "AorB" + } ], "response" : "null" }, "bazm" : { diff --git a/lang/java/compiler/src/test/idl/output/nestedimport.avpr b/lang/java/compiler/src/test/idl/output/nestedimport.avpr index 80273627109..f1060b0d743 100644 --- a/lang/java/compiler/src/test/idl/output/nestedimport.avpr +++ b/lang/java/compiler/src/test/idl/output/nestedimport.avpr @@ -4,6 +4,12 @@ "doc" : "Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", "version" : "1.0.5", "types" : [ { + "type" : "enum", + "name" : "AorB", + "namespace" : "org.foo", + "symbols" : ["A", "B"], + "default" : "A" + }, { "type" : "enum", "name" : "Position", "namespace" : "avro.examples.baseball", @@ -66,7 +72,10 @@ "response" : "null" }, "bar" : { - "request" : [ ], + "request" : [ { + "name" : "choice", + "type" : "org.foo.AorB" + } ], "response" : "null" } } diff --git a/lang/java/compiler/src/test/idl/work space/level1.avdl b/lang/java/compiler/src/test/idl/work space/level1.avdl new file mode 100644 index 00000000000..a2eaa862f7d --- /dev/null +++ b/lang/java/compiler/src/test/idl/work space/level1.avdl @@ -0,0 +1,3 @@ +protocol Level1 { + import idl "level2.avdl"; +} diff --git a/lang/java/compiler/src/test/idl/work space/level2.avdl b/lang/java/compiler/src/test/idl/work space/level2.avdl new file mode 100644 index 00000000000..4adb17700e2 --- /dev/null +++ b/lang/java/compiler/src/test/idl/work space/level2.avdl @@ -0,0 +1,2 @@ +protocol Level2 { +} diff --git a/lang/java/compiler/src/test/idl/work space/root.avdl b/lang/java/compiler/src/test/idl/work space/root.avdl new file mode 100644 index 00000000000..9a1b5e907b0 --- /dev/null +++ b/lang/java/compiler/src/test/idl/work space/root.avdl @@ -0,0 +1,3 @@ +protocol Root { + import idl "level1.avdl"; +} diff --git a/lang/java/compiler/src/test/idl/work space/root.avpr b/lang/java/compiler/src/test/idl/work space/root.avpr new file mode 100644 index 00000000000..6d4337022d2 --- /dev/null +++ b/lang/java/compiler/src/test/idl/work space/root.avpr @@ -0,0 +1 @@ +{"protocol":"Root","types":[],"messages":{}} diff --git a/lang/java/compiler/src/test/java/org/apache/avro/compiler/idl/TestIdl.java b/lang/java/compiler/src/test/java/org/apache/avro/compiler/idl/TestIdl.java index 37e6b2bb2c0..2ce714fb9b0 100644 --- a/lang/java/compiler/src/test/java/org/apache/avro/compiler/idl/TestIdl.java +++ b/lang/java/compiler/src/test/java/org/apache/avro/compiler/idl/TestIdl.java @@ -82,6 +82,13 @@ public void loadTests() { File outF = new File(TEST_OUTPUT_DIR, inF.getName().replaceFirst("\\.avdl$", ".avpr")); tests.add(new GenTest(inF, outF)); } + + // AVRO-3706 : test folder with space in name. + File inputWorkSpace = new File(TEST_DIR, "work space"); + File root = new File(inputWorkSpace, "root.avdl"); + File rootResult = new File(inputWorkSpace, "root.avpr"); + tests.add(new GenTest(root, rootResult)); + } @Test diff --git a/lang/java/compiler/src/test/java/org/apache/avro/specific/TestGeneratedCode.java b/lang/java/compiler/src/test/java/org/apache/avro/specific/TestGeneratedCode.java index fda8579b374..7b32e652a46 100644 --- a/lang/java/compiler/src/test/java/org/apache/avro/specific/TestGeneratedCode.java +++ b/lang/java/compiler/src/test/java/org/apache/avro/specific/TestGeneratedCode.java @@ -28,6 +28,8 @@ import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.DatumReader; import org.apache.avro.io.DatumWriter; +import org.apache.avro.specific.test.RecordWithErrorField; +import org.apache.avro.specific.test.TestError; import org.apache.avro.util.Utf8; import org.junit.Assert; @@ -87,4 +89,28 @@ public void withSchemaMigration() throws IOException { FullRecordV1 expected = new FullRecordV1(true, 87231, 731L, 54.2832F, 38.0, null, "Hello, world!"); Assert.assertEquals(expected, dst); } + + @Test + public void withErrorField() throws IOException { + TestError srcError = TestError.newBuilder().setMessage$("Oops").build(); + RecordWithErrorField src = new RecordWithErrorField("Hi there", srcError); + Assert.assertFalse("Test schema with error field cannot allow for custom coders.", + ((SpecificRecordBase) src).hasCustomCoders()); + Schema schema = RecordWithErrorField.getClassSchema(); + + ByteArrayOutputStream out = new ByteArrayOutputStream(1024); + Encoder e = EncoderFactory.get().directBinaryEncoder(out, null); + DatumWriter w = (DatumWriter) MODEL.createDatumWriter(schema); + w.write(src, e); + e.flush(); + + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + Decoder d = DecoderFactory.get().directBinaryDecoder(in, null); + DatumReader r = (DatumReader) MODEL.createDatumReader(schema); + RecordWithErrorField dst = r.read(null, d); + + TestError expectedError = TestError.newBuilder().setMessage$("Oops").build(); + RecordWithErrorField expected = new RecordWithErrorField("Hi there", expectedError); + Assert.assertEquals(expected, dst); + } } diff --git a/lang/java/compiler/src/test/resources/regression_error_field_in_record.avsc b/lang/java/compiler/src/test/resources/regression_error_field_in_record.avsc new file mode 100644 index 00000000000..e2fdcb9ad93 --- /dev/null +++ b/lang/java/compiler/src/test/resources/regression_error_field_in_record.avsc @@ -0,0 +1,22 @@ +{ + "type" : "record", + "name" : "RecordWithErrorField", + "doc" : "With custom coders in Avro 1.9, previously successful records with error fields now fail to compile.", + "namespace" : "org.apache.avro.specific.test", + "fields" : [ { + "name" : "s", + "type" : [ "null", "string" ], + "default" : null + }, { + "name": "e", + "type": [ "null", { + "type" : "error", + "name" : "TestError", + "fields" : [ { + "name" : "message", + "type" : "string" + } ] + } ], + "default": null + } ] +} diff --git a/lang/java/grpc/pom.xml b/lang/java/grpc/pom.xml index 1c050314198..7d99302e85a 100644 --- a/lang/java/grpc/pom.xml +++ b/lang/java/grpc/pom.xml @@ -23,7 +23,7 @@ org.apache.avro avro-parent - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../ diff --git a/lang/java/integration-test/codegen-test/pom.xml b/lang/java/integration-test/codegen-test/pom.xml index ca57aba49f5..938195a632e 100644 --- a/lang/java/integration-test/codegen-test/pom.xml +++ b/lang/java/integration-test/codegen-test/pom.xml @@ -23,7 +23,7 @@ avro-integration-test org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../ diff --git a/lang/java/integration-test/pom.xml b/lang/java/integration-test/pom.xml index a17a958f5dc..8cc41050098 100644 --- a/lang/java/integration-test/pom.xml +++ b/lang/java/integration-test/pom.xml @@ -23,7 +23,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../ diff --git a/lang/java/integration-test/test-custom-conversions/pom.xml b/lang/java/integration-test/test-custom-conversions/pom.xml index 8fdecb0f796..3615d434336 100644 --- a/lang/java/integration-test/test-custom-conversions/pom.xml +++ b/lang/java/integration-test/test-custom-conversions/pom.xml @@ -23,7 +23,7 @@ avro-integration-test org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../ diff --git a/lang/java/ipc-jetty/pom.xml b/lang/java/ipc-jetty/pom.xml index ac9a182fc8f..8edf6e381e4 100644 --- a/lang/java/ipc-jetty/pom.xml +++ b/lang/java/ipc-jetty/pom.xml @@ -23,7 +23,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../ diff --git a/lang/java/ipc-netty/pom.xml b/lang/java/ipc-netty/pom.xml index 153d3c1083d..14a8ad74abb 100644 --- a/lang/java/ipc-netty/pom.xml +++ b/lang/java/ipc-netty/pom.xml @@ -23,7 +23,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../ diff --git a/lang/java/ipc/pom.xml b/lang/java/ipc/pom.xml index 4cdc8c364ae..0d18c39f488 100644 --- a/lang/java/ipc/pom.xml +++ b/lang/java/ipc/pom.xml @@ -23,7 +23,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../ diff --git a/lang/java/mapred/pom.xml b/lang/java/mapred/pom.xml index 84255c8684d..84e3b039967 100644 --- a/lang/java/mapred/pom.xml +++ b/lang/java/mapred/pom.xml @@ -23,7 +23,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../ diff --git a/lang/java/maven-plugin/pom.xml b/lang/java/maven-plugin/pom.xml index 4845222f7d2..8e0cb380e98 100644 --- a/lang/java/maven-plugin/pom.xml +++ b/lang/java/maven-plugin/pom.xml @@ -23,7 +23,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../pom.xml @@ -73,7 +73,7 @@ org.codehaus.plexus plexus-utils - 3.5.0 + 3.5.1 provided diff --git a/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/AbstractAvroMojo.java b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/AbstractAvroMojo.java index 968d8d0bdf0..5120aa62170 100644 --- a/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/AbstractAvroMojo.java +++ b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/AbstractAvroMojo.java @@ -211,6 +211,7 @@ public void execute() throws MojoExecutionException { } if (hasImports) { + checkImportPaths(); for (String importedFile : imports) { File file = new File(importedFile); if (file.isDirectory()) { @@ -241,6 +242,15 @@ public void execute() throws MojoExecutionException { } } + private void checkImportPaths() throws MojoExecutionException { + for (String importedFile : imports) { + File file = new File(importedFile); + if (!file.exists()) { + throw new MojoExecutionException("Path " + file.getAbsolutePath() + " does not exist"); + } + } + } + private String[] getIncludedFiles(String absPath, String[] excludes, String[] includes) { final FileSetManager fileSetManager = new FileSetManager(); final FileSet fs = new FileSet(); diff --git a/lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestSchemaMojo.java b/lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestSchemaMojo.java index f6bdc7fd06f..682bf6d8f9e 100644 --- a/lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestSchemaMojo.java +++ b/lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestSchemaMojo.java @@ -17,6 +17,7 @@ */ package org.apache.avro.mojo; +import org.apache.maven.plugin.MojoExecutionException; import org.codehaus.plexus.util.FileUtils; import org.junit.Test; @@ -33,6 +34,10 @@ public class TestSchemaMojo extends AbstractAvroMojoTest { private File testPom = new File(getBasedir(), "src/test/resources/unit/schema/pom.xml"); private File injectingVelocityToolsTestPom = new File(getBasedir(), "src/test/resources/unit/schema/pom-injecting-velocity-tools.xml"); + private File testNonexistentFilePom = new File(getBasedir(), + "src/test/resources/unit/schema/pom-nonexistent-file.xml"); + private File testNonexistentSecondFilePom = new File(getBasedir(), + "src/test/resources/unit/schema/pom-nonexistent-second-file.xml"); @Test public void testSchemaMojo() throws Exception { @@ -67,4 +72,24 @@ public void testSetCompilerVelocityAdditionalTools() throws Exception { final String schemaUserContent = FileUtils.fileRead(new File(outputDir, "SchemaUser.java")); assertTrue("Got " + schemaUserContent + " instead", schemaUserContent.contains("It works!")); } + + @Test + public void testThrowsErrorForNonexistentFile() throws Exception { + try { + final SchemaMojo mojo = (SchemaMojo) lookupMojo("schema", testNonexistentFilePom); + mojo.execute(); + fail("MojoExecutionException not thrown!"); + } catch (MojoExecutionException ignored) { + } + } + + @Test + public void testThrowsErrorForNonexistentSecondFile() throws Exception { + try { + final SchemaMojo mojo = (SchemaMojo) lookupMojo("schema", testNonexistentSecondFilePom); + mojo.execute(); + fail("MojoExecutionException not thrown!"); + } catch (MojoExecutionException ignored) { + } + } } diff --git a/lang/java/maven-plugin/src/test/resources/unit/idl/pom-injecting-velocity-tools.xml b/lang/java/maven-plugin/src/test/resources/unit/idl/pom-injecting-velocity-tools.xml index 68823146b7d..759255b4704 100644 --- a/lang/java/maven-plugin/src/test/resources/unit/idl/pom-injecting-velocity-tools.xml +++ b/lang/java/maven-plugin/src/test/resources/unit/idl/pom-injecting-velocity-tools.xml @@ -22,7 +22,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../../../../../../../../../ diff --git a/lang/java/maven-plugin/src/test/resources/unit/idl/pom.xml b/lang/java/maven-plugin/src/test/resources/unit/idl/pom.xml index 02000ca6795..830cc8f837b 100644 --- a/lang/java/maven-plugin/src/test/resources/unit/idl/pom.xml +++ b/lang/java/maven-plugin/src/test/resources/unit/idl/pom.xml @@ -22,7 +22,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../../../../../../../../../pom.xml diff --git a/lang/java/maven-plugin/src/test/resources/unit/protocol/pom-injecting-velocity-tools.xml b/lang/java/maven-plugin/src/test/resources/unit/protocol/pom-injecting-velocity-tools.xml index e2a26ff129a..8896f6ba973 100644 --- a/lang/java/maven-plugin/src/test/resources/unit/protocol/pom-injecting-velocity-tools.xml +++ b/lang/java/maven-plugin/src/test/resources/unit/protocol/pom-injecting-velocity-tools.xml @@ -22,7 +22,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../../../../../../../../../ diff --git a/lang/java/maven-plugin/src/test/resources/unit/protocol/pom.xml b/lang/java/maven-plugin/src/test/resources/unit/protocol/pom.xml index e77f236888b..5fabea5c2ac 100644 --- a/lang/java/maven-plugin/src/test/resources/unit/protocol/pom.xml +++ b/lang/java/maven-plugin/src/test/resources/unit/protocol/pom.xml @@ -22,7 +22,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../../../../../../../../../pom.xml diff --git a/lang/java/maven-plugin/src/test/resources/unit/schema/pom-nonexistent-file.xml b/lang/java/maven-plugin/src/test/resources/unit/schema/pom-nonexistent-file.xml new file mode 100644 index 00000000000..52af2a24624 --- /dev/null +++ b/lang/java/maven-plugin/src/test/resources/unit/schema/pom-nonexistent-file.xml @@ -0,0 +1,69 @@ + + + + 4.0.0 + + + avro-parent + org.apache.avro + 1.11.4-SNAPSHOT + ../../../../../../../../../pom.xml + + + avro-maven-plugin-test + jar + + testproject + + + + + avro-maven-plugin + + + schema + + schema + + + + + ${basedir}/src/test/avro + ${basedir}/target/test-harness/schema + + ${basedir}/src/test/avro/nonexistent-dir + + + + + + + + + org.apache.avro + avro + ${parent.version} + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + diff --git a/lang/java/maven-plugin/src/test/resources/unit/schema/pom-nonexistent-second-file.xml b/lang/java/maven-plugin/src/test/resources/unit/schema/pom-nonexistent-second-file.xml new file mode 100644 index 00000000000..4eccb8ef6fc --- /dev/null +++ b/lang/java/maven-plugin/src/test/resources/unit/schema/pom-nonexistent-second-file.xml @@ -0,0 +1,70 @@ + + + + 4.0.0 + + + avro-parent + org.apache.avro + 1.11.4-SNAPSHOT + ../../../../../../../../../pom.xml + + + avro-maven-plugin-test + jar + + testproject + + + + + avro-maven-plugin + + + schema + + schema + + + + + ${basedir}/src/test/avro + ${basedir}/target/test-harness/schema + + ${basedir}/src/test/avro/imports + ${basedir}/src/test/avro/nonexistent-dir + + + + + + + + + org.apache.avro + avro + ${parent.version} + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + diff --git a/lang/java/maven-plugin/src/test/resources/unit/schema/pom.xml b/lang/java/maven-plugin/src/test/resources/unit/schema/pom.xml index b22c00fd86e..a2c82570fd0 100644 --- a/lang/java/maven-plugin/src/test/resources/unit/schema/pom.xml +++ b/lang/java/maven-plugin/src/test/resources/unit/schema/pom.xml @@ -22,7 +22,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../../../../../../../../../pom.xml diff --git a/lang/java/perf/pom.xml b/lang/java/perf/pom.xml index eb6ff07f7b8..69bfb3e2f14 100644 --- a/lang/java/perf/pom.xml +++ b/lang/java/perf/pom.xml @@ -23,7 +23,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../ @@ -34,7 +34,7 @@ jar - 1.36 + 1.37 ${project.parent.parent.basedir} diff --git a/lang/java/pom.xml b/lang/java/pom.xml index c38d957a04c..bbea6c78bf8 100644 --- a/lang/java/pom.xml +++ b/lang/java/pom.xml @@ -22,7 +22,7 @@ org.apache.avro avro-toplevel - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../../pom.xml @@ -37,29 +37,29 @@ ${project.parent.basedir} - 3.3.4 - 2.12.7.20221012 + 3.3.5 + 2.14.2 4.0.1 - 9.4.50.v20221201 + 9.4.52.v20230823 5.0.4 - 5.9.1 - 4.1.86.Final - 3.21.12 + 5.10.0 + 4.1.98.Final + 3.24.3 0.16.0 1.7.36 - 1.2.24 - 1.1.8.4 + 1.2.25 + 1.1.10.4 2.3 3.3.9 - 1.10.12 - 1.4 - 1.21 + 1.10.14 + 1.5.0 + 1.22 1.10.0 1.9 - 4.10.0 + 4.11.0 2.2 - 1.51.1 - 1.5.2-5 + 1.58.0 + 1.5.5-5 3.2.1 5.1.8 @@ -67,6 +67,7 @@ 3.1.0 3.0.3 7.0.12 + 2.7.9 @@ -97,7 +98,7 @@ org.codehaus.mojo build-helper-maven-plugin - 3.2.0 + 3.4.0 org.apache.maven.plugins @@ -156,7 +157,7 @@ org.apache.maven.plugins maven-surefire-plugin - 3.0.0-M7 + ${maven-surefire-plugin.version} @@ -233,6 +234,11 @@ spotless-maven-plugin ${spotless-maven-plugin.version} + + org.cyclonedx + cyclonedx-maven-plugin + ${cyclonedx-maven-plugin.version} + @@ -320,6 +326,18 @@ + + org.cyclonedx + cyclonedx-maven-plugin + + + package + + makeBom + + + + diff --git a/lang/java/protobuf/pom.xml b/lang/java/protobuf/pom.xml index 4688eff14a0..dc62c5e788f 100644 --- a/lang/java/protobuf/pom.xml +++ b/lang/java/protobuf/pom.xml @@ -23,7 +23,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../ diff --git a/lang/java/thrift/pom.xml b/lang/java/thrift/pom.xml index 0543e79e63d..3d21bb7572b 100644 --- a/lang/java/thrift/pom.xml +++ b/lang/java/thrift/pom.xml @@ -23,7 +23,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../ diff --git a/lang/java/tools/pom.xml b/lang/java/tools/pom.xml index 507c724f3cb..d07ee2ec69e 100644 --- a/lang/java/tools/pom.xml +++ b/lang/java/tools/pom.xml @@ -23,7 +23,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../ diff --git a/lang/java/trevni/avro/pom.xml b/lang/java/trevni/avro/pom.xml index 5af6416f92e..62b5d2f6a04 100644 --- a/lang/java/trevni/avro/pom.xml +++ b/lang/java/trevni/avro/pom.xml @@ -22,7 +22,7 @@ trevni-java org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../ diff --git a/lang/java/trevni/core/pom.xml b/lang/java/trevni/core/pom.xml index 2af4c6dd4fb..3365970713f 100644 --- a/lang/java/trevni/core/pom.xml +++ b/lang/java/trevni/core/pom.xml @@ -22,7 +22,7 @@ trevni-java org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../ diff --git a/lang/java/trevni/doc/pom.xml b/lang/java/trevni/doc/pom.xml index 72d1ef6b5b2..729b7a45389 100644 --- a/lang/java/trevni/doc/pom.xml +++ b/lang/java/trevni/doc/pom.xml @@ -22,7 +22,7 @@ trevni-java org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT .. diff --git a/lang/java/trevni/pom.xml b/lang/java/trevni/pom.xml index 78c1a0ec43d..c01e8baaaa9 100644 --- a/lang/java/trevni/pom.xml +++ b/lang/java/trevni/pom.xml @@ -23,7 +23,7 @@ avro-parent org.apache.avro - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT ../ diff --git a/lang/js/.gitignore b/lang/js/.gitignore index b509c88b364..78ed32c7512 100644 --- a/lang/js/.gitignore +++ b/lang/js/.gitignore @@ -1,2 +1,3 @@ coverage/ node_modules/ +.nyc_output \ No newline at end of file diff --git a/lang/js/package-lock.json b/lang/js/package-lock.json index f83a9eabe1b..c15ca7dd5d6 100644 --- a/lang/js/package-lock.json +++ b/lang/js/package-lock.json @@ -1,6 +1,6 @@ { "name": "avro-js", - "version": "1.11.2-SNAPSHOT", + "version": "1.11.4-SNAPSHOT", "lockfileVersion": 1, "requires": true, "dependencies": { @@ -1419,9 +1419,9 @@ "dev": true }, "json5": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.1.tgz", - "integrity": "sha512-1hqLFMSrGHRHxav9q9gNjJ5EXznIxGVO09xQRrwplcS8qs28pZ8s8hupZAmqDwZUmVZ2Qb2jnyPOWcDH8m8dlA==", + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.2.tgz", + "integrity": "sha512-46Tk9JiOL2z7ytNQWFLpj99RZkVgeHf87yGQKsIkaPz1qSH9UczKH1rO7K3wgRselo0tYMUNfecYpm/p1vC7tQ==", "dev": true }, "jsprim": { diff --git a/lang/js/package.json b/lang/js/package.json index e526fb34bf1..8199b856149 100644 --- a/lang/js/package.json +++ b/lang/js/package.json @@ -1,6 +1,6 @@ { "name": "avro-js", - "version": "1.11.2-SNAPSHOT", + "version": "1.11.4-SNAPSHOT", "author": "Avro Developers ", "description": "JavaScript Avro implementation", "contributors": [ diff --git a/lang/perl/MANIFEST b/lang/perl/MANIFEST index bc8f9341af0..d308e85ea01 100644 --- a/lang/perl/MANIFEST +++ b/lang/perl/MANIFEST @@ -41,7 +41,6 @@ META.yml LICENSE NOTICE README -VERSION.txt t/00_compile.t t/01_names.t t/01_schema.t diff --git a/lang/perl/Makefile.PL b/lang/perl/Makefile.PL index 69447e5651f..ffac5e9d11b 100644 --- a/lang/perl/Makefile.PL +++ b/lang/perl/Makefile.PL @@ -19,12 +19,7 @@ use Config; use inc::Module::Install; my $version; -for ('VERSION.txt', '../../share/VERSION.txt') { - if (-f) { - $version = `cat $_`; - last; - } -} +$version = `cat ../../share/VERSION.txt`; chomp $version; license 'apache'; diff --git a/lang/perl/build.sh b/lang/perl/build.sh index 2b689276b82..e7634dbabab 100755 --- a/lang/perl/build.sh +++ b/lang/perl/build.sh @@ -58,7 +58,6 @@ case "$target" in ;; dist) - cp ../../share/VERSION.txt . perl ./Makefile.PL && make dist ;; diff --git a/lang/perl/xt/interop.t b/lang/perl/xt/interop.t index 8bc2a84861f..81cd48666e9 100644 --- a/lang/perl/xt/interop.t +++ b/lang/perl/xt/interop.t @@ -21,8 +21,11 @@ use warnings; use Test::More; use File::Basename qw(basename); use IO::File; -use_ok 'Avro::DataFile'; -use_ok 'Avro::DataFileReader'; + +BEGIN { + use_ok 'Avro::DataFile'; + use_ok 'Avro::DataFileReader'; +} for my $path (glob '../../build/interop/data/*.avro') { my $fn = basename($path); diff --git a/lang/py/avro/errors.py b/lang/py/avro/errors.py index 2c7675131ef..b961a04ae43 100644 --- a/lang/py/avro/errors.py +++ b/lang/py/avro/errors.py @@ -120,3 +120,7 @@ class UsageError(RuntimeError, AvroException): class AvroRuntimeException(RuntimeError, AvroException): """Raised when compatibility parsing encounters an unknown type""" + + +class UnknownFingerprintAlgorithmException(AvroException): + """Raised when attempting to generate a fingerprint with an unknown algorithm""" diff --git a/lang/py/avro/io.py b/lang/py/avro/io.py index f419b0c2e91..386e0bb9e61 100644 --- a/lang/py/avro/io.py +++ b/lang/py/avro/io.py @@ -472,7 +472,7 @@ def write_decimal_bytes(self, datum: decimal.Decimal, scale: int) -> None: signed long is 8, 8 bytes are written. """ sign, digits, exp = datum.as_tuple() - if (-1 * exp) > scale: + if (-1 * int(exp)) > scale: raise avro.errors.AvroOutOfScaleException(scale, datum, exp) unscaled_datum = 0 @@ -498,7 +498,7 @@ def write_decimal_fixed(self, datum: decimal.Decimal, scale: int, size: int) -> Decimal in fixed are encoded as size of fixed bytes. """ sign, digits, exp = datum.as_tuple() - if (-1 * exp) > scale: + if (-1 * int(exp)) > scale: raise avro.errors.AvroOutOfScaleException(scale, datum, exp) unscaled_datum = 0 diff --git a/lang/py/avro/schema.py b/lang/py/avro/schema.py index 4ba4409d212..9b48002a5cf 100644 --- a/lang/py/avro/schema.py +++ b/lang/py/avro/schema.py @@ -42,12 +42,23 @@ import collections import datetime import decimal +import hashlib import json import math import uuid import warnings +from functools import reduce from pathlib import Path -from typing import List, Mapping, MutableMapping, Optional, Sequence, Union, cast +from typing import ( + FrozenSet, + List, + Mapping, + MutableMapping, + Optional, + Sequence, + Union, + cast, +) import avro.constants import avro.errors @@ -104,6 +115,50 @@ def _is_timezone_aware_datetime(dt: datetime.datetime) -> bool: return dt.tzinfo is not None and dt.tzinfo.utcoffset(dt) is not None +# Fingerprint Constants +_EMPTY64_FINGERPRINT: int = 0xC15D213AA4D7A795 +_FINGERPRINT_TABLE: tuple = tuple(reduce(lambda fp, _: (fp >> 1) ^ (_EMPTY64_FINGERPRINT & -(fp & 1)), range(8), i) for i in range(256)) + + +# All algorithms guaranteed by hashlib are supported: +# - 'blake2b', +# - 'blake2s', +# - 'md5', +# - 'sha1', +# - 'sha224', +# - 'sha256', +# - 'sha384', +# - 'sha3_224', +# - 'sha3_256', +# - 'sha3_384', +# - 'sha3_512', +# - 'sha512', +# - 'shake_128', +# - 'shake_256' +SUPPORTED_ALGORITHMS: FrozenSet[str] = frozenset({"CRC-64-AVRO"} | hashlib.algorithms_guaranteed) + + +def _crc_64_fingerprint(data: bytes) -> bytes: + """The 64-bit Rabin Fingerprint. + + As described in the Avro specification. + + Args: + data: A bytes object containing the UTF-8 encoded parsing canonical + form of an Avro schema. + Returns: + A bytes object with a length of eight in little-endian format. + """ + result = _EMPTY64_FINGERPRINT + + for b in data: + result = (result >> 8) ^ _FINGERPRINT_TABLE[(result ^ b) & 0xFF] + + # Although not mentioned in the Avro specification, the Java + # implementation gives fingerprint bytes in little-endian order + return result.to_bytes(length=8, byteorder="little", signed=False) + + # # Base Classes # @@ -240,6 +295,30 @@ def __eq__(self, that: object) -> bool: Consider the mixins EqualByPropsMixin and EqualByJsonMixin """ + def fingerprint(self, algorithm="CRC-64-AVRO") -> bytes: + """ + Generate fingerprint for supplied algorithm. + + 'CRC-64-AVRO' will be used as the algorithm by default, but any + algorithm supported by hashlib (as can be referenced with + `hashlib.algorithms_guaranteed`) can be specified. + + `algorithm` param is used as an algorithm name, and NoSuchAlgorithmException + will be thrown if the algorithm is not among supported. + """ + schema = self.canonical_form.encode("utf-8") + + if algorithm == "CRC-64-AVRO": + return _crc_64_fingerprint(schema) + + if algorithm not in SUPPORTED_ALGORITHMS: + raise avro.errors.UnknownFingerprintAlgorithmException(f"Unknown Fingerprint Algorithm: {algorithm}") + + # Generate digests with hashlib for all other algorithms + # Lowercase algorithm to support algorithm strings sent by other languages like Java + h = hashlib.new(algorithm.lower(), schema) + return h.digest() + class NamedSchema(Schema): """Named Schemas specified in NAMED_TYPES.""" @@ -555,7 +634,7 @@ def __init__( validate_names: bool = True, ) -> None: """ - @arg validate_enum_symbols: If False, will allow enum symbols that are not valid Avro names. + @arg validate_enum_symbols: If False, will allow enum symbols that are not valid Avro names and default, which is not an enumerated symbol. """ if validate_enum_symbols: for symbol in symbols: diff --git a/lang/py/avro/test/test_compatibility.py b/lang/py/avro/test/test_compatibility.py index 3c36b6f846d..e9af424e919 100644 --- a/lang/py/avro/test/test_compatibility.py +++ b/lang/py/avro/test/test_compatibility.py @@ -691,7 +691,7 @@ def test_schema_compatibility(self): (WITHOUT_NAMESPACE_RECORD, WITH_NAMESPACE_RECORD), ] - for (reader, writer) in compatible_reader_writer_test_cases: + for reader, writer in compatible_reader_writer_test_cases: self.assertTrue(self.are_compatible(reader, writer)) def test_schema_compatibility_fixed_size_mismatch(self): @@ -711,7 +711,7 @@ def test_schema_compatibility_fixed_size_mismatch(self): "/fields/1/type/size", ), ] - for (reader, writer, message, location) in incompatible_fixed_pairs: + for reader, writer, message, location in incompatible_fixed_pairs: result = ReaderWriterCompatibilityChecker().get_compatibility(reader, writer) self.assertIs(result.compatibility, SchemaCompatibilityType.incompatible) self.assertIn( @@ -737,7 +737,7 @@ def test_schema_compatibility_missing_enum_symbols(self): "/fields/0/type/symbols", ), ] - for (reader, writer, message, location) in incompatible_pairs: + for reader, writer, message, location in incompatible_pairs: result = ReaderWriterCompatibilityChecker().get_compatibility(reader, writer) self.assertIs(result.compatibility, SchemaCompatibilityType.incompatible) self.assertIn(message, result.messages) @@ -853,7 +853,7 @@ def test_schema_compatibility_missing_union_branch(self): ), ] - for (reader, writer, message, location) in incompatible_pairs: + for reader, writer, message, location in incompatible_pairs: result = ReaderWriterCompatibilityChecker().get_compatibility(reader, writer) self.assertIs(result.compatibility, SchemaCompatibilityType.incompatible) self.assertEqual(result.messages, message) @@ -872,7 +872,7 @@ def test_schema_compatibility_name_mismatch(self): ), ] - for (reader, writer, message, location) in incompatible_pairs: + for reader, writer, message, location in incompatible_pairs: result = ReaderWriterCompatibilityChecker().get_compatibility(reader, writer) self.assertIs(result.compatibility, SchemaCompatibilityType.incompatible) self.assertIn(message, result.messages) @@ -883,7 +883,7 @@ def test_schema_compatibility_reader_field_missing_default_value(self): (A_INT_RECORD1, EMPTY_RECORD1, "a", "/fields/0"), (A_INT_B_DINT_RECORD1, EMPTY_RECORD1, "a", "/fields/0"), ] - for (reader, writer, message, location) in incompatible_pairs: + for reader, writer, message, location in incompatible_pairs: result = ReaderWriterCompatibilityChecker().get_compatibility(reader, writer) self.assertIs(result.compatibility, SchemaCompatibilityType.incompatible) self.assertEqual(len(result.messages), 1) @@ -1063,7 +1063,7 @@ def test_schema_compatibility_type_mismatch(self): "/", ), ] - for (reader, writer, message, location) in incompatible_pairs: + for reader, writer, message, location in incompatible_pairs: result = ReaderWriterCompatibilityChecker().get_compatibility(reader, writer) self.assertIs(result.compatibility, SchemaCompatibilityType.incompatible) self.assertIn(message, result.messages) diff --git a/lang/py/avro/test/test_datafile_interop.py b/lang/py/avro/test/test_datafile_interop.py index d9e4c690daf..7dec16e7810 100644 --- a/lang/py/avro/test/test_datafile_interop.py +++ b/lang/py/avro/test/test_datafile_interop.py @@ -42,7 +42,6 @@ def test_interop(self) -> None: continue i = None with self.subTest(filename=filename), avro.datafile.DataFileReader(filename.open("rb"), avro.io.DatumReader()) as dfr: - user_metadata = dfr.get_meta("user_metadata") if user_metadata is not None: self.assertEqual(user_metadata, b"someByteArray") diff --git a/lang/py/avro/test/test_io.py b/lang/py/avro/test/test_io.py index 69c29a4acb7..82ad22e6e33 100644 --- a/lang/py/avro/test/test_io.py +++ b/lang/py/avro/test/test_io.py @@ -197,7 +197,7 @@ class DefaultValueTestCaseType(TypedDict): ), ( [{"type": "long", "logicalType": "timestamp-millis"}, "null"], - datetime.datetime(1000, 1, 1, 0, 0, 0, 000000, tzinfo=avro.timezones.utc), + datetime.datetime(1000, 1, 1, 0, 0, 0, 0, tzinfo=avro.timezones.utc), ), ( [{"type": "long", "logicalType": "timestamp-millis"}, "null"], @@ -468,7 +468,6 @@ def check_default_value(self) -> None: class TestIncompatibleSchemaReading(unittest.TestCase): def test_deserialization_fails(self) -> None: - reader_schema = avro.schema.parse( json.dumps( { @@ -523,7 +522,7 @@ def test_decimal_bytes_small_scale(self) -> None: """Avro should raise an AvroTypeException when attempting to write a decimal with a larger exponent than the schema's scale.""" datum = decimal.Decimal("3.1415") _, _, exp = datum.as_tuple() - scale = -1 * exp - 1 + scale = -1 * int(exp) - 1 schema = avro.schema.parse( json.dumps( { @@ -540,7 +539,7 @@ def test_decimal_fixed_small_scale(self) -> None: """Avro should raise an AvroTypeException when attempting to write a decimal with a larger exponent than the schema's scale.""" datum = decimal.Decimal("3.1415") _, _, exp = datum.as_tuple() - scale = -1 * exp - 1 + scale = -1 * int(exp) - 1 schema = avro.schema.parse( json.dumps( { diff --git a/lang/py/avro/test/test_schema.py b/lang/py/avro/test/test_schema.py index 8286567f9ff..668ca8258f2 100644 --- a/lang/py/avro/test/test_schema.py +++ b/lang/py/avro/test/test_schema.py @@ -519,6 +519,204 @@ class InvalidTestSchema(TestSchema): ), ] + +# Fingerprint examples are in the form of tuples: +# - Value in Position 0 is schema +# - Value in Position 1 is an array of fingerprints: +# - Position 0 is CRC-64-AVRO fingerprint +# - Position 0 is MD5 fingerprint +# - Position 0 is SHA256 fingerprint +FINGERPRINT_EXAMPLES = [ + ('"int"', ["8f5c393f1ad57572", "ef524ea1b91e73173d938ade36c1db32", "3f2b87a9fe7cc9b13835598c3981cd45e3e355309e5090aa0933d7becb6fba45"]), + ('{"type": "int"}', ["8f5c393f1ad57572", "ef524ea1b91e73173d938ade36c1db32", "3f2b87a9fe7cc9b13835598c3981cd45e3e355309e5090aa0933d7becb6fba45"]), + ('"float"', ["90d7a83ecb027c4d", "50a6b9db85da367a6d2df400a41758a6", "1e71f9ec051d663f56b0d8e1fc84d71aa56ccfe9fa93aa20d10547a7abeb5cc0"]), + ( + '{"type": "float"}', + ["90d7a83ecb027c4d", "50a6b9db85da367a6d2df400a41758a6", "1e71f9ec051d663f56b0d8e1fc84d71aa56ccfe9fa93aa20d10547a7abeb5cc0"], + ), + ('"long"', ["b71df49344e154d0", "e1dd9a1ef98b451b53690370b393966b", "c32c497df6730c97fa07362aa5023f37d49a027ec452360778114cf427965add"]), + ( + '{"type": "long"}', + ["b71df49344e154d0", "e1dd9a1ef98b451b53690370b393966b", "c32c497df6730c97fa07362aa5023f37d49a027ec452360778114cf427965add"], + ), + ('"double"', ["7e95ab32c035758e", "bfc71a62f38b99d6a93690deeb4b3af6", "730a9a8c611681d7eef442e03c16c70d13bca3eb8b977bb403eaff52176af254"]), + ( + '{"type": "double"}', + ["7e95ab32c035758e", "bfc71a62f38b99d6a93690deeb4b3af6", "730a9a8c611681d7eef442e03c16c70d13bca3eb8b977bb403eaff52176af254"], + ), + ('"bytes"', ["651920c3da16c04f", "b462f06cb909be57c85008867784cde6", "9ae507a9dd39ee5b7c7e285da2c0846521c8ae8d80feeae5504e0c981d53f5fa"]), + ( + '{"type": "bytes"}', + ["651920c3da16c04f", "b462f06cb909be57c85008867784cde6", "9ae507a9dd39ee5b7c7e285da2c0846521c8ae8d80feeae5504e0c981d53f5fa"], + ), + ('"string"', ["c70345637248018f", "095d71cf12556b9d5e330ad575b3df5d", "e9e5c1c9e4f6277339d1bcde0733a59bd42f8731f449da6dc13010a916930d48"]), + ( + '{"type": "string"}', + ["c70345637248018f", "095d71cf12556b9d5e330ad575b3df5d", "e9e5c1c9e4f6277339d1bcde0733a59bd42f8731f449da6dc13010a916930d48"], + ), + ('"boolean"', ["64f7d4a478fc429f", "01f692b30d4a1c8a3e600b1440637f8f", "a5b031ab62bc416d720c0410d802ea46b910c4fbe85c50a946ccc658b74e677e"]), + ( + '{"type": "boolean"}', + ["64f7d4a478fc429f", "01f692b30d4a1c8a3e600b1440637f8f", "a5b031ab62bc416d720c0410d802ea46b910c4fbe85c50a946ccc658b74e677e"], + ), + ('"null"', ["8a8f25cce724dd63", "9b41ef67651c18488a8b08bb67c75699", "f072cbec3bf8841871d4284230c5e983dc211a56837aed862487148f947d1a1f"]), + ( + '{"type": "null"}', + ["8a8f25cce724dd63", "9b41ef67651c18488a8b08bb67c75699", "f072cbec3bf8841871d4284230c5e983dc211a56837aed862487148f947d1a1f"], + ), + ( + '{"type": "fixed", "name": "Test", "size": 1}', + ["6869897b4049355b", "db01bc515fcfcd2d4be82ed385288261", "f527116a6f44455697e935afc31dc60ad0f95caf35e1d9c9db62edb3ffeb9170"], + ), + ( + json.dumps({"type": "fixed", "name": "MyFixed", "namespace": "org.apache.hadoop.avro", "size": 1}), + ["fadbd138e85bdf45", "d74b3726484422711c465d49e857b1ba", "28e493a44771cecc5deca4bd938cdc3d5a24cfe1f3760bc938fa1057df6334fc"], + ), + ( + '{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', + ["03a2f2c2e27f7a16", "d883f2a9b16ed085fcc5e4ca6c8f6ed1", "9b51286144f87ce5aebdc61ca834379effa5a41ce6ac0938630ff246297caca8"], + ), + ( + '{"type": "array", "items": "long"}', + ["715e2ea28bc91654", "c1c387e8d6a58f0df749b698991b1f43", "f78e954167feb23dcb1ce01e8463cebf3408e0a4259e16f24bd38f6d0f1d578b"], + ), + ( + json.dumps({"type": "array", "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}), + ["10d9ade1fa3a0387", "cfc7b861c7cfef082a6ef082948893fa", "0d8edd49d7f7e9553668f133577bc99f842852b55d9f84f1f7511e4961aa685c"], + ), + ( + '{"type": "map", "values": "long"}', + ["6f74f4e409b1334e", "32b3f1a3177a0e73017920f00448b56e", "b8fad07d458971a07692206b8a7cf626c86c62fe6bcff7c1b11bc7295de34853"], + ), + ( + json.dumps({"type": "map", "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}), + ["df2ab0626f6b812d", "c588da6ba99701c41e73fd30d23f994e", "3886747ed1669a8af476b549e97b34222afb2fed5f18bb27c6f367ea0351a576"], + ), + ( + '["string", "null", "long"]', + ["65a5be410d687566", "b11cf95f0a55dd55f9ee515a37bf937a", "ed8d254116441bb35e237ad0563cf5432b8c975334bd222c1ee84609435d95bb"], + ), + ( + json.dumps({"type": "record", "name": "Test", "fields": [{"name": "f", "type": "long"}]}), + ["ed94e5f5e6eb588e", "69531a03db788afe353244cd049b1e6d", "9670f15a8f96d23e92830d00b8bd57275e02e3e173ffef7c253c170b6beabeb8"], + ), + ( + json.dumps( + { + "type": "record", + "name": "Node", + "fields": [{"name": "label", "type": "string"}, {"name": "children", "type": {"type": "array", "items": "Node"}}], + } + ), + ["52cba544c3e756b7", "99625b0cc02050363e89ef66b0f406c9", "65d80dc8c95c98a9671d92cf0415edfabfee2cb058df2138606656cd6ae4dc59"], + ), + ( + json.dumps( + { + "type": "record", + "name": "Lisp", + "fields": [ + { + "name": "value", + "type": [ + "null", + "string", + {"type": "record", "name": "Cons", "fields": [{"name": "car", "type": "Lisp"}, {"name": "cdr", "type": "Lisp"}]}, + ], + } + ], + } + ), + ["68d91a23eda0b306", "9e1d0d15b52789fcb8e3a88b53059d5f", "e5ce4f4a15ce19fa1047cfe16a3b0e13a755db40f00f23284fdd376fc1c7dd21"], + ), + ( + json.dumps( + { + "type": "record", + "name": "HandshakeRequest", + "namespace": "org.apache.avro.ipc", + "fields": [ + {"name": "clientHash", "type": {"type": "fixed", "name": "MD5", "size": 16}}, + {"name": "clientProtocol", "type": ["null", "string"]}, + {"name": "serverHash", "type": "MD5"}, + {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}, + ], + } + ), + ["43818703b7b5d769", "16ded8b5027e80a17704c6565c0c3f1b", "6c317314687da52a85c813a7f0c92298a60b79625b9acc072e4d9e4256a1d800"], + ), + ( + json.dumps( + { + "type": "record", + "name": "HandshakeResponse", + "namespace": "org.apache.avro.ipc", + "fields": [ + {"name": "match", "type": {"type": "enum", "name": "HandshakeMatch", "symbols": ["BOTH", "CLIENT", "NONE"]}}, + {"name": "serverProtocol", "type": ["null", "string"]}, + {"name": "serverHash", "type": ["null", {"name": "MD5", "size": 16, "type": "fixed"}]}, + {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}, + ], + } + ), + ["00feee01de4ea50e", "afe529d01132daab7f4e2a6663e7a2f5", "a303cbbfe13958f880605d70c521a4b7be34d9265ac5a848f25916a67b11d889"], + ), + ( + json.dumps( + { + "type": "record", + "name": "Interop", + "namespace": "org.apache.avro", + "fields": [ + {"name": "intField", "type": "int"}, + {"name": "longField", "type": "long"}, + {"name": "stringField", "type": "string"}, + {"name": "boolField", "type": "boolean"}, + {"name": "floatField", "type": "float"}, + {"name": "doubleField", "type": "double"}, + {"name": "bytesField", "type": "bytes"}, + {"name": "nullField", "type": "null"}, + {"name": "arrayField", "type": {"type": "array", "items": "double"}}, + { + "name": "mapField", + "type": {"type": "map", "values": {"name": "Foo", "type": "record", "fields": [{"name": "label", "type": "string"}]}}, + }, + {"name": "unionField", "type": ["boolean", "double", {"type": "array", "items": "bytes"}]}, + {"name": "enumField", "type": {"type": "enum", "name": "Kind", "symbols": ["A", "B", "C"]}}, + {"name": "fixedField", "type": {"type": "fixed", "name": "MD5", "size": 16}}, + { + "name": "recordField", + "type": { + "type": "record", + "name": "Node", + "fields": [{"name": "label", "type": "string"}, {"name": "children", "type": {"type": "array", "items": "Node"}}], + }, + }, + ], + } + ), + ["e82c0a93a6a0b5a4", "994fea1a1be7ff8603cbe40c3bc7e4ca", "cccfd6e3f917cf53b0f90c206342e6703b0d905071f724a1c1f85b731c74058d"], + ), + ( + json.dumps( + { + "type": "record", + "name": "ipAddr", + "fields": [{"name": "addr", "type": [{"name": "IPv6", "type": "fixed", "size": 16}, {"name": "IPv4", "type": "fixed", "size": 4}]}], + } + ), + ["8d961b4e298a1844", "45d85c69b353a99b93d7c4f2fcf0c30d", "6f6fc8f685a4f07d99734946565d63108806d55a8620febea047cf52cb0ac181"], + ), + ( + json.dumps({"type": "record", "name": "TestDoc", "doc": "Doc string", "fields": [{"name": "name", "type": "string", "doc": "Doc String"}]}), + ["0e6660f02bcdc109", "f2da75f5131f5ab80629538287b8beb2", "0b3644f7aa5ca2fc4bad93ca2d3609c12aa9dbda9c15e68b34c120beff08e7b9"], + ), + ( + '{"type": "enum", "name": "Test", "symbols": ["A", "B"], "doc": "Doc String"}', + ["03a2f2c2e27f7a16", "d883f2a9b16ed085fcc5e4ca6c8f6ed1", "9b51286144f87ce5aebdc61ca834379effa5a41ce6ac0938630ff246297caca8"], + ), +] + EXAMPLES = PRIMITIVE_EXAMPLES EXAMPLES += FIXED_EXAMPLES EXAMPLES += ENUM_EXAMPLES @@ -634,13 +832,25 @@ def test_fixed_decimal_invalid_max_precision(self): def test_parse_invalid_symbol(self): """Disabling enumschema symbol validation should allow invalid symbols to pass.""" test_schema_string = json.dumps({"type": "enum", "name": "AVRO2174", "symbols": ["white space"]}) + with self.assertRaises(avro.errors.InvalidName, msg="When enum symbol validation is enabled, an invalid symbol should raise InvalidName."): avro.schema.parse(test_schema_string, validate_enum_symbols=True) + try: avro.schema.parse(test_schema_string, validate_enum_symbols=False) except avro.errors.InvalidName: # pragma: no coverage self.fail("When enum symbol validation is disabled, an invalid symbol should not raise InvalidName.") + def test_unsupported_fingerprint_algorithm(self): + s = avro.schema.parse('"int"') + self.assertRaises(avro.errors.UnknownFingerprintAlgorithmException, s.fingerprint, "foo") + + def test_less_popular_fingerprint_algorithm(self): + s = avro.schema.parse('"int"') + fingerprint = s.fingerprint("sha384") + hex_fingerprint = "".join(format(b, "02x") for b in fingerprint).zfill(16) + self.assertEqual(hex_fingerprint, "32ed5e4ac896570f044d1dab68f4c8ca9866ac06d22261f399316bf4799e16854750238085775107dfac905c82b2feaf") + class SchemaParseTestCase(unittest.TestCase): """Enable generating parse test cases over all the valid and invalid example schema.""" @@ -662,7 +872,7 @@ def parse_valid(self) -> None: try: warnings.filterwarnings(action="error", category=avro.errors.IgnoredLogicalType) self.test_schema.parse() - except (avro.errors.IgnoredLogicalType) as e: + except avro.errors.IgnoredLogicalType as e: self.assertIn(type(e), (type(w) for w in test_warnings)) self.assertIn(str(e), (str(w) for w in test_warnings)) except (avro.errors.AvroException, avro.errors.SchemaParseException): # pragma: no coverage @@ -1181,6 +1391,39 @@ def test_large_record_interop(self): ) +class FingerprintTestCase(unittest.TestCase): + """ + Enable generating fingerprint test cases across algorithms. + + Fingerprint examples are in the form of tuples: + - Value in Position 0 is schema + - Value in Position 1 is an array of fingerprints: + - Position 0 is CRC-64-AVRO fingerprint + - Position 0 is MD5 fingerprint + - Position 0 is SHA256 fingerprint + """ + + def __init__(self, test_schema, fingerprints): + """Ignore the normal signature for unittest.TestCase because we are generating + many test cases from this one class. This is safe as long as the autoloader + ignores this class. The autoloader will ignore this class as long as it has + no methods starting with `test_`. + """ + super(FingerprintTestCase, self).__init__("validate_fingerprint") + self.test_schema = test_schema + self.fingerprints = fingerprints + + def _hex_fingerprint(self, fingerprint): + return "".join(format(b, "02x") for b in fingerprint).zfill(16) + + def validate_fingerprint(self): + """The string of a Schema should be parseable to the same Schema.""" + s = avro.schema.parse(self.test_schema) + self.assertEqual(self._hex_fingerprint(s.fingerprint()), self.fingerprints[0]) + self.assertEqual(self._hex_fingerprint(s.fingerprint("md5")), self.fingerprints[1]) + self.assertEqual(self._hex_fingerprint(s.fingerprint("sha256")), self.fingerprints[2]) + + def load_tests(loader, default_tests, pattern): """Generate test cases across many test schema.""" suite = unittest.TestSuite() @@ -1190,6 +1433,7 @@ def load_tests(loader, default_tests, pattern): suite.addTests(DocAttributesTestCase(ex) for ex in DOC_EXAMPLES) suite.addTests(OtherAttributesTestCase(ex) for ex in OTHER_PROP_EXAMPLES) suite.addTests(loader.loadTestsFromTestCase(CanonicalFormTestCase)) + suite.addTests(FingerprintTestCase(ex[0], ex[1]) for ex in FINGERPRINT_EXAMPLES) return suite diff --git a/lang/py/avro/tether/tether_task.py b/lang/py/avro/tether/tether_task.py index dc138d07d70..c521fa56b4c 100644 --- a/lang/py/avro/tether/tether_task.py +++ b/lang/py/avro/tether/tether_task.py @@ -300,7 +300,6 @@ def configure(self, taskType, inSchemaText, outSchemaText): self._red_fkeys = [f.name for f in self.midschema.fields if not (f.order == "ignore")] except Exception as e: - estr = traceback.format_exc() self.fail(estr) @@ -335,7 +334,6 @@ def input(self, data, count): self.map(inRecord, self.midCollector) elif self.taskType == TaskType.REDUCE: - # store the previous record prev = self.midRecord diff --git a/lang/py/build.sh b/lang/py/build.sh index 7412889ec5a..483d5b3b9a9 100755 --- a/lang/py/build.sh +++ b/lang/py/build.sh @@ -27,7 +27,6 @@ clean() { '*.avsc' \ '*.egg-info' \ '*.py[co]' \ - 'VERSION.txt' \ '__pycache__' \ '.tox' \ 'avro/test/interop' \ @@ -51,6 +50,15 @@ dist() ( "$virtualenv/bin/python3" -m build --outdir "$destination" ) +doc() { + local doc_dir + local version=$(cat ../../share/VERSION.txt) + doc_dir="../../build/avro-doc-$version/api/py" + python3 -m tox -e docs + mkdir -p "$doc_dir" + cp -a docs/build/* "$doc_dir" +} + interop-data-generate() { ./setup.py generate_interop_data cp -r avro/test/interop/data ../../build/interop @@ -76,6 +84,7 @@ main() { case "$target" in clean) clean;; dist) dist;; + doc) doc;; interop-data-generate) interop-data-generate;; interop-data-test) interop-data-test;; lint) lint;; diff --git a/lang/py/docs/Makefile b/lang/py/docs/Makefile new file mode 100644 index 00000000000..f0710c99b4f --- /dev/null +++ b/lang/py/docs/Makefile @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/lang/py/docs/make.bat b/lang/py/docs/make.bat new file mode 100644 index 00000000000..93900e3e37b --- /dev/null +++ b/lang/py/docs/make.bat @@ -0,0 +1,50 @@ +REM Licensed to the Apache Software Foundation (ASF) under one or more +REM contributor license agreements. See the NOTICE file distributed with +REM this work for additional information regarding copyright ownership. +REM The ASF licenses this file to You under the Apache License, Version 2.0 +REM (the "License"); you may not use this file except in compliance with +REM the License. You may obtain a copy of the License at +REM +REM https://www.apache.org/licenses/LICENSE-2.0 +REM +REM Unless required by applicable law or agreed to in writing, software +REM distributed under the License is distributed on an "AS IS" BASIS, +REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +REM See the License for the specific language governing permissions and +REM limitations under the License. + +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/lang/py/docs/source/_static/.gitignore b/lang/py/docs/source/_static/.gitignore new file mode 100644 index 00000000000..e69de29bb2d diff --git a/lang/py/docs/source/automodule.rst b/lang/py/docs/source/automodule.rst new file mode 100644 index 00000000000..bfd53e79252 --- /dev/null +++ b/lang/py/docs/source/automodule.rst @@ -0,0 +1,54 @@ +.. Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +.. automodule:: avro + :members: +.. automodule:: avro.compatibility + :members: +.. automodule:: avro.datafile + :members: +.. automodule:: avro.ipc + :members: +.. automodule:: avro.protocol + :members: +.. automodule:: avro.codecs + :members: +.. automodule:: avro.constants + :members: +.. automodule:: avro.io + :members: +.. automodule:: avro.tether + :members: +.. automodule:: avro.tether.tether_task_runner + :members: +.. automodule:: avro.tether.util + :members: +.. automodule:: avro.tether.tether_task + :members: +.. automodule:: avro.utils + :members: +.. automodule:: avro.errors + :members: +.. automodule:: avro.name + :members: +.. automodule:: avro.tool + :members: +.. automodule:: avro.timezones + :members: +.. automodule:: avro.__main__ + :members: +.. automodule:: avro.schema + :members: diff --git a/lang/py/docs/source/conf.py b/lang/py/docs/source/conf.py new file mode 100644 index 00000000000..d534a7be026 --- /dev/null +++ b/lang/py/docs/source/conf.py @@ -0,0 +1,47 @@ +## +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information +import sys + +sys.path.append("..") + +project = "Apache Avro" +copyright = "2023, Apache" +author = "Apache" +release = "1.11.3" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = ["sphinx.ext.autodoc"] + +templates_path = ["_templates"] +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "alabaster" +html_static_path = ["_static"] diff --git a/lang/py/docs/source/index.rst b/lang/py/docs/source/index.rst new file mode 100644 index 00000000000..ec66916da4e --- /dev/null +++ b/lang/py/docs/source/index.rst @@ -0,0 +1,31 @@ +.. Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + + intro + automodule + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/lang/py/docs/source/intro.rst b/lang/py/docs/source/intro.rst new file mode 100644 index 00000000000..ca480a15bb7 --- /dev/null +++ b/lang/py/docs/source/intro.rst @@ -0,0 +1,29 @@ +.. Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Welcome to Avro's Python documentation! +======================================= + +Avro is a data serialization system. See `avro.apache.org `_ for background information. + +Avro Python is a Python library that implements parts of the `Avro Specification `_. + +The library includes the following functionality: + +* Assembling schemas programmatically. +* A schema parser, which can parse Avro schema (written in JSON) into a Schema object. +* Binary encoders and decoders to encode data into Avro format and decode it back using primitive functions. +* Streams for storing and reading data, which Encoders and Decoders use. +* Support for Avro DataFile. diff --git a/lang/py/tox.ini b/lang/py/tox.ini index 07e4d498d95..e7bba45baf1 100644 --- a/lang/py/tox.ini +++ b/lang/py/tox.ini @@ -18,6 +18,7 @@ envlist = build # Build the wheel # Fastest checks first + docs lint typechecks py36 @@ -66,6 +67,14 @@ commands_pre = commands = commands_post = +[testenv:docs] +deps = + sphinx +commands_pre = +commands = + sphinx-build -b html docs/source/ docs/build/html +commands_post = + [testenv:lint] deps = black diff --git a/lang/ruby/build.sh b/lang/ruby/build.sh index baab974b55a..3d789c8abea 100755 --- a/lang/ruby/build.sh +++ b/lang/ruby/build.sh @@ -24,11 +24,6 @@ cd "$(dirname "$0")" export GEM_HOME="$PWD/.gem/" export PATH="/usr/local/rbenv/shims:$GEM_HOME/bin:$PATH" -# bootstrap bundler -gem install --no-document -v 1.17.3 bundler - -# rbenv is used by the Dockerfile but not the Github action in CI -rbenv rehash 2>/dev/null || echo "Not using rbenv" bundle install for target in "$@" diff --git a/lang/ruby/lib/avro/io.rb b/lang/ruby/lib/avro/io.rb index e6e3b326d2d..0d2f3135850 100644 --- a/lang/ruby/lib/avro/io.rb +++ b/lang/ruby/lib/avro/io.rb @@ -390,31 +390,31 @@ def read_record(writers_schema, readers_schema, decoder) def read_default_value(field_schema, default_value) # Basically a JSON Decoder? - case field_schema.type_sym + datum = case field_schema.type_sym when :null - return nil + nil when :int, :long - return Integer(default_value) + Integer(default_value) when :float, :double - return Float(default_value) + Float(default_value) when :boolean, :enum, :fixed, :string, :bytes - return default_value + default_value when :array read_array = [] default_value.each do |json_val| item_val = read_default_value(field_schema.items, json_val) read_array << item_val end - return read_array + read_array when :map read_map = {} default_value.each do |key, json_val| map_val = read_default_value(field_schema.values, json_val) read_map[key] = map_val end - return read_map + read_map when :union - return read_default_value(field_schema.schemas[0], default_value) + read_default_value(field_schema.schemas[0], default_value) when :record, :error read_record = {} field_schema.fields.each do |field| @@ -423,11 +423,13 @@ def read_default_value(field_schema, default_value) field_val = read_default_value(field.type, json_val) read_record[field.name] = field_val end - return read_record + read_record else fail_msg = "Unknown type: #{field_schema.type}" raise AvroError, fail_msg end + + field_schema.type_adapter.decode(datum) end def skip_data(writers_schema, decoder) diff --git a/lang/ruby/lib/avro/schema.rb b/lang/ruby/lib/avro/schema.rb index 0b4c04f33ba..37e1d21031c 100644 --- a/lang/ruby/lib/avro/schema.rb +++ b/lang/ruby/lib/avro/schema.rb @@ -111,7 +111,7 @@ def self.real_parse(json_obj, names=nil, default_namespace=nil) elsif PRIMITIVE_TYPES.include? json_obj return PrimitiveSchema.new(json_obj) else - raise UnknownSchemaError.new(json_obj) + raise UnknownSchemaError.new(json_obj, default_namespace) end end @@ -126,6 +126,7 @@ def self.validate(expected_schema, logical_datum, options = DEFAULT_VALIDATE_OPT def initialize(type, logical_type=nil) @type_sym = type.is_a?(Symbol) ? type : type.to_sym @logical_type = logical_type + @type_adapter = nil end attr_reader :type_sym @@ -571,6 +572,7 @@ def initialize(type, name, default=:no_default, order=nil, names=nil, namespace= @order = order @doc = doc @aliases = aliases + @type_adapter = nil validate_aliases! if aliases validate_default! if default? && !Avro.disable_field_default_validation end @@ -599,8 +601,16 @@ def validate_default! else type end - - Avro::SchemaValidator.validate!(type_for_default, default) + case type_for_default.logical_type + when DECIMAL_LOGICAL_TYPE + # https://avro.apache.org/docs/1.11.1/specification/#schema-record + # Default values for bytes and fixed fields are JSON strings, where Unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255 + options = SchemaValidator::DEFAULT_VALIDATION_OPTIONS.dup + options[:encoded] = true + Avro::SchemaValidator.validate!(type_for_default, default, options) + else + Avro::SchemaValidator.validate!(type_for_default, default) + end rescue Avro::SchemaValidator::ValidationError => e raise Avro::SchemaParseError, "Error validating default for #{name}: #{e.message}" end @@ -611,9 +621,11 @@ class SchemaParseError < AvroError; end class UnknownSchemaError < SchemaParseError attr_reader :type_name + attr_reader :default_namespace - def initialize(type) + def initialize(type, default_namespace) @type_name = type + @default_namespace = default_namespace super("#{type.inspect} is not a schema we know about.") end end diff --git a/lang/ruby/test/test_logical_types.rb b/lang/ruby/test/test_logical_types.rb index bb368894832..2a26a0c6c06 100644 --- a/lang/ruby/test/test_logical_types.rb +++ b/lang/ruby/test/test_logical_types.rb @@ -124,6 +124,113 @@ def test_bytes_decimal end end + def test_logical_type_default_value + sales_schema = Avro::Schema.parse('{ + "type": "record", + "name": "Order", + "fields" : [ + { + "name": "sales", + "type": [ + { + "type": "bytes", + "logicalType": "decimal", + "precision": 4, + "scale": 2 + }, + "null" + ], + "default": "\u0000" + } + ] + }') + + sales_tax_schema = Avro::Schema.parse('{ + "type": "record", + "name": "Order", + "fields" : [ + { + "name": "sales", + "type": [ + { + "type": "bytes", + "logicalType": "decimal", + "precision": 4, + "scale": 2 + }, + "null" + ], + "default": "\u0000" + }, + { + "name": "tax", + "type": [ + { + "type": "bytes", + "logicalType": "decimal", + "precision": 4, + "scale": 2 + }, + "null" + ], + "default": "\u0000" + }, + { + "name": "invoice_date", + "type": [ + { + "type": "int", + "logicalType": "date" + }, + "null" + ], + "default": 0 + }, + { + "name": "invoice_time", + "type": [ + { + "type": "int", + "logicalType": "time-millis" + }, + "null" + ], + "default": 0 + }, + { + "name": "created_at", + "type": [ + { + "type": "long", + "logicalType": "timestamp-millis" + }, + "null" + ], + "default": 0 + } + ] + }') + + sales_record = {"sales" => BigDecimal("12.34")} + sales_tax_record = { + "sales" => BigDecimal("12.34"), + "tax" => BigDecimal("0.000"), + "invoice_date" => Time.at(0).to_date, + # time-millis is not supported + "invoice_time" => 0, + "created_at" => Time.at(0).utc, + } + encoded = encode(sales_record, sales_schema) + assert_equal sales_record, decode(encoded, sales_schema) + # decode with different schema applies default + assert_equal sales_tax_record, decode(encoded, sales_tax_schema, writer_schema: sales_schema) + + # decode with same schema does not apply default, since it is nullable during encode + encoded = encode(sales_record, sales_tax_schema) + tax_nil_record = {"sales" => BigDecimal("12.34"), "tax" => nil, "invoice_date" => nil, "invoice_time" => nil, "created_at" => nil} + assert_equal tax_nil_record, decode(encoded, sales_tax_schema) + end + def test_bytes_decimal_range_errors schema = Avro::Schema.parse <<-SCHEMA { "type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2 } @@ -245,11 +352,12 @@ def encode(datum, schema) buffer.string end - def decode(encoded, schema) + def decode(encoded, schema, writer_schema: nil) + writer_schema ||= schema buffer = StringIO.new(encoded) decoder = Avro::IO::BinaryDecoder.new(buffer) - datum_reader = Avro::IO::DatumReader.new(schema, schema) + datum_reader = Avro::IO::DatumReader.new(writer_schema, schema) datum_reader.read(decoder) end diff --git a/lang/ruby/test/test_schema.rb b/lang/ruby/test/test_schema.rb index 802653010e9..182f7dd8bca 100644 --- a/lang/ruby/test/test_schema.rb +++ b/lang/ruby/test/test_schema.rb @@ -176,6 +176,8 @@ def test_unknown_named_type end assert_equal '"MissingType" is not a schema we know about.', error.message + assert_equal "MissingType", error.type_name + assert_equal "my.name.space", error.default_namespace end def test_invalid_name @@ -612,6 +614,37 @@ def test_fixed_decimal_to_without_precision_scale assert_equal schema_hash, schema.to_avro end + def test_bytes_decimal_in_record + assert_nothing_raised do + hash_to_schema( + type: 'record', + name: 'account', + fields: [ + { name: 'balance', type: 'bytes', logicalType: 'decimal', precision: 9, scale: 2 } + ] + ) + end + end + + def test_bytes_decimal_with_default_in_record + assert_nothing_raised do + hash_to_schema( + type: 'record', + name: 'account', + fields: [ + { + name: 'balance', + type: [ + { type: 'bytes', logicalType: 'decimal', precision: 9, scale: 2 }, + 'null' + ], + default: '\u00ff' + } + ] + ) + end + end + def test_bytes_decimal_to_include_precision_scale schema = Avro::Schema.parse <<-SCHEMA { diff --git a/lang/rust/.cargo-rdme.toml b/lang/rust/.cargo-rdme.toml new file mode 100644 index 00000000000..3f27313be86 --- /dev/null +++ b/lang/rust/.cargo-rdme.toml @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +readme-path = "avro/README.md" +workspace-project = "apache-avro" \ No newline at end of file diff --git a/lang/rust/.gitignore b/lang/rust/.gitignore index 9ac07d70741..875c6ff7096 100644 --- a/lang/rust/.gitignore +++ b/lang/rust/.gitignore @@ -4,3 +4,5 @@ .idea/ *.iml precommit_venv/ +fleet.toml +**/.cargo/config.toml diff --git a/lang/rust/Cargo.lock b/lang/rust/Cargo.lock index 05c8ee85290..c4fa6e3c60c 100644 --- a/lang/rust/Cargo.lock +++ b/lang/rust/Cargo.lock @@ -23,11 +23,22 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" +[[package]] +name = "ahash" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" -version = "0.7.20" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04" dependencies = [ "memchr", ] @@ -38,20 +49,25 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" +[[package]] +name = "anstyle" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" + [[package]] name = "anyhow" -version = "1.0.68" +version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cb2f989d18dd141ab8ae82f64d1a8cdd37e0840f73a406896cf5e99502fab61" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" [[package]] name = "apache-avro" -version = "0.15.0" +version = "0.17.0" dependencies = [ "anyhow", "apache-avro-derive", "apache-avro-test-helper", - "byteorder", "bzip2", "crc32fast", "criterion", @@ -65,9 +81,10 @@ dependencies = [ "pretty_assertions", "quad-rand", "rand", - "regex", + "regex-lite", "serde", "serde_json", + "serial_test", "sha2", "snap", "strum", @@ -76,13 +93,12 @@ dependencies = [ "typed-builder", "uuid", "xz2", - "zerocopy", "zstd", ] [[package]] name = "apache-avro-derive" -version = "0.15.0" +version = "0.17.0" dependencies = [ "apache-avro", "darling", @@ -96,8 +112,9 @@ dependencies = [ [[package]] name = "apache-avro-test-helper" -version = "0.15.0" +version = "0.17.0" dependencies = [ + "anyhow", "color-backtrace", "ctor", "env_logger", @@ -112,7 +129,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -144,6 +161,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" + [[package]] name = "block-buffer" version = "0.10.2" @@ -155,9 +178,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.10.0" +version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3" +checksum = "0d261e256854913907f67ed06efbc3338dfe6179796deefc1ff763fc1aee5535" [[package]] name = "byteorder" @@ -167,9 +190,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "bzip2" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6afcd980b5f3a45017c57e57a2fcccbb351cc43a356ce117ef760ef8052b89b0" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" dependencies = [ "bzip2-sys", "libc", @@ -236,25 +259,29 @@ dependencies = [ [[package]] name = "clap" -version = "3.2.23" +version = "4.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" +checksum = "1640e5cc7fb47dbb8338fd471b105e7ed6c3cb2aeb00c2e067127ffd3764a05d" dependencies = [ - "bitflags", - "clap_lex", - "indexmap", - "textwrap", + "clap_builder", ] [[package]] -name = "clap_lex" -version = "0.2.4" +name = "clap_builder" +version = "4.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" +checksum = "98c59138d527eeaf9b53f35a77fcc1fad9d883116070c63d5de1c7dc7b00c72b" dependencies = [ - "os_str_bytes", + "anstyle", + "clap_lex", ] +[[package]] +name = "clap_lex" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" + [[package]] name = "color-backtrace" version = "0.5.1" @@ -276,6 +303,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "core2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" +dependencies = [ + "memchr", +] + [[package]] name = "cpufeatures" version = "0.2.2" @@ -296,19 +332,19 @@ dependencies = [ [[package]] name = "criterion" -version = "0.4.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" dependencies = [ "anes", - "atty", "cast", "ciborium", "clap", "criterion-plot", + "is-terminal", "itertools", - "lazy_static", "num-traits", + "once_cell", "oorandom", "regex", "serde", @@ -340,9 +376,9 @@ dependencies = [ [[package]] name = "ctor" -version = "0.1.26" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" +checksum = "1f34ba9a9bcb8645379e9de8cb3ecfcf4d1c85ba66d90deb3259206fa5aa193b" dependencies = [ "quote", "syn", @@ -350,9 +386,9 @@ dependencies = [ [[package]] name = "darling" -version = "0.14.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0dd3cd20dc6b5a876612a6e5accfe7f3dd883db6d07acfbf14c128f61550dfa" +checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e" dependencies = [ "darling_core", "darling_macro", @@ -360,9 +396,9 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.14.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a784d2ccaf7c98501746bf0be29b2022ba41fd62a2e622af997a03e9f972859f" +checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621" dependencies = [ "fnv", "ident_case", @@ -373,15 +409,34 @@ dependencies = [ [[package]] name = "darling_macro" -version = "0.14.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7618812407e9402654622dd402b0a89dff9ba93badd6540781526117b92aab7e" +checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" dependencies = [ "darling_core", "quote", "syn", ] +[[package]] +name = "dary_heap" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7762d17f1241643615821a8455a0b2c3e803784b058693d990b11f2dce25a0ca" + +[[package]] +name = "dashmap" +version = "5.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd72493923899c6f10c641bdbdeddc7183d6396641d99c1a0d1597f37f92e28" +dependencies = [ + "cfg-if", + "hashbrown 0.14.0", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "diff" version = "0.1.13" @@ -390,9 +445,9 @@ checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" [[package]] name = "digest" -version = "0.10.6" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", "crypto-common", @@ -413,12 +468,110 @@ dependencies = [ "log", ] +[[package]] +name = "errno" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" +dependencies = [ + "errno-dragonfly", + "libc", + "windows-sys", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "futures" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" + +[[package]] +name = "futures-executor" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" + +[[package]] +name = "futures-sink" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" + +[[package]] +name = "futures-task" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" + +[[package]] +name = "futures-util" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + [[package]] name = "generic-array" version = "0.14.6" @@ -454,15 +607,24 @@ checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" [[package]] name = "hashbrown" -version = "0.12.3" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" [[package]] name = "heck" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hello-wasm" @@ -484,11 +646,17 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" + [[package]] name = "hex-literal" -version = "0.3.4" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ebdb29d2ea9ed0083cd8cece49bbd968021bd99b0849edb4a9a7ee0fdf6a4e0" +checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46" [[package]] name = "ident_case" @@ -497,13 +665,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] -name = "indexmap" -version = "1.9.2" +name = "is-terminal" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" +checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ - "autocfg", - "hashbrown", + "hermit-abi 0.3.2", + "rustix", + "windows-sys", ] [[package]] @@ -532,9 +701,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.60" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49409df3e3bf0856b916e2ceaca09ee28e6871cf7d9ce97a692cacfdb2a25a47" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" dependencies = [ "wasm-bindgen", ] @@ -547,39 +716,62 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.127" +version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "505e71a4706fa491e9b1b55f51b95d4037d0821ee40131190475f692b35b009b" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] name = "libflate" -version = "1.2.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05605ab2bce11bcfc0e9c635ff29ef8b2ea83f29be257ee7d730cac3ee373093" +checksum = "9f7d5654ae1795afc7ff76f4365c2c8791b0feb18e8996a96adad8ffd7c3b2bf" dependencies = [ "adler32", + "core2", "crc32fast", + "dary_heap", "libflate_lz77", ] [[package]] name = "libflate_lz77" -version = "1.1.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39a734c0493409afcd49deee13c006a04e3586b9761a03543c6272c9c51f2f5a" +checksum = "be5f52fb8c451576ec6b79d3f4deb327398bc05bbdbd99021a6e77a4c855d524" dependencies = [ + "core2", + "hashbrown 0.13.2", "rle-decode-fast", ] [[package]] -name = "log" -version = "0.4.17" +name = "libm" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" + +[[package]] +name = "linux-raw-sys" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0" + +[[package]] +name = "lock_api" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" dependencies = [ - "cfg-if", + "autocfg", + "scopeguard", ] +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + [[package]] name = "lzma-sys" version = "0.1.19" @@ -593,10 +785,11 @@ dependencies = [ [[package]] name = "md-5" -version = "0.10.5" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6365506850d44bff6e2fbcb5176cf63650e48bd45ef2fe2665ae1570e0f4b9ca" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" dependencies = [ + "cfg-if", "digest", ] @@ -617,9 +810,9 @@ dependencies = [ [[package]] name = "num-bigint" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" dependencies = [ "autocfg", "num-integer", @@ -638,11 +831,12 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -656,9 +850,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.13.0" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "oorandom" @@ -667,20 +861,40 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" [[package]] -name = "os_str_bytes" -version = "6.4.1" +name = "parking_lot" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] [[package]] -name = "output_vt100" -version = "0.1.3" +name = "parking_lot_core" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" +checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" dependencies = [ - "winapi", + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", ] +[[package]] +name = "pin-project-lite" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "pkg-config" version = "0.3.25" @@ -695,40 +909,38 @@ checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" [[package]] name = "pretty_assertions" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a25e9bcb20aa780fd0bb16b72403a9064d6b3f22f026946029acb941a50af755" +checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" dependencies = [ - "ctor", "diff", - "output_vt100", "yansi", ] [[package]] name = "proc-macro2" -version = "1.0.49" +version = "1.0.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57a8eca9f9c4ffde41714334dee777596264c7825420f521abc92b5b5deb63a5" +checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328" dependencies = [ "unicode-ident", ] [[package]] name = "proptest" -version = "1.0.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e0d9cc07f18492d879586c92b485def06bc850da3118075cd45d50e9c95b0e5" +checksum = "4e35c06b98bf36aba164cc17cb25f7e232f5c4aeea73baa14b8a9f0d92dbfa65" dependencies = [ - "bitflags", + "bitflags 1.3.2", "byteorder", "lazy_static", "num-traits", - "quick-error", "rand", "rand_chacha", "rand_xorshift", - "regex-syntax", + "regex-syntax 0.6.29", + "unarray", ] [[package]] @@ -737,17 +949,11 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "658fa1faf7a4cc5f057c9ee5ef560f717ad9d8dc66d975267f709624d6e1ab88" -[[package]] -name = "quick-error" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" - [[package]] name = "quote" -version = "1.0.23" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ "proc-macro2", ] @@ -791,6 +997,15 @@ dependencies = [ "rand_core", ] +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "ref_thread_local" version = "0.1.1" @@ -799,20 +1014,44 @@ checksum = "a0d51660a68078997855ba5602f73ab3a5031bd7ad480a9d4c90fbbf04e1fff0" [[package]] name = "regex" -version = "1.7.0" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a" +checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-automata", + "regex-syntax 0.7.4", ] +[[package]] +name = "regex-automata" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed1ceff11a1dddaee50c9dc8e4938bd106e9d89ae372f192311e7da498e3b69" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.7.4", +] + +[[package]] +name = "regex-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f96ede7f386ba6e910092e7ccdc04176cface62abebea07ed6b46d870ed95ca2" + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "regex-syntax" -version = "0.6.27" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" +checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" [[package]] name = "rle-decode-fast" @@ -826,6 +1065,19 @@ version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" +[[package]] +name = "rustix" +version = "0.38.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac5ffa1efe7548069688cd7028f32591853cd7b5b756d41bcffd2353e4fc75b4" +dependencies = [ + "bitflags 2.3.3", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + [[package]] name = "rustversion" version = "1.0.9" @@ -853,20 +1105,26 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2" +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "serde" -version = "1.0.151" +version = "1.0.188" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fed41fc1a24994d044e6db6935e69511a1153b52c15eb42493b26fa87feba0" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.151" +version = "1.0.188" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "255abe9a125a985c05190d687b320c12f9b1f0b99445e608c21ba0782c719ad8" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" dependencies = [ "proc-macro2", "quote", @@ -875,26 +1133,66 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.91" +version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883" +checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" dependencies = [ "itoa", "ryu", "serde", ] +[[package]] +name = "serial_test" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e56dd856803e253c8f298af3f4d7eb0ae5e23a737252cd90bb4f3b435033b2d" +dependencies = [ + "dashmap", + "futures", + "lazy_static", + "log", + "parking_lot", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91d129178576168c589c9ec973feedf7d3126c01ac2bf08795109aa35b69fb8f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "sha2" -version = "0.10.6" +version = "0.10.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" dependencies = [ "cfg-if", "cpufeatures", "digest", ] +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" + [[package]] name = "snap" version = "1.1.0" @@ -903,15 +1201,15 @@ checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" [[package]] name = "strum" -version = "0.24.1" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" [[package]] name = "strum_macros" -version = "0.24.3" +version = "0.25.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +checksum = "ad8d03b598d3d0fff69bf533ee3ef19b8eeb342729596df84bcc7e1f96ec4059" dependencies = [ "heck", "proc-macro2", @@ -922,27 +1220,15 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.107" +version = "2.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" +checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] -[[package]] -name = "synstructure" -version = "0.12.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "unicode-xid", -] - [[package]] name = "termcolor" version = "1.1.3" @@ -952,26 +1238,20 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "textwrap" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" - [[package]] name = "thiserror" -version = "1.0.38" +version = "1.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" +checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.38" +version = "1.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" +checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" dependencies = [ "proc-macro2", "quote", @@ -990,9 +1270,18 @@ dependencies = [ [[package]] name = "typed-builder" -version = "0.11.0" +version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47a126a40dbff39e8320900cd61b8de053a2706e1f782cd27145792feb8fd41e" +checksum = "34085c17941e36627a879208083e25d357243812c30e7d7387c3b954f30ade16" +dependencies = [ + "typed-builder-macro", +] + +[[package]] +name = "typed-builder-macro" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", @@ -1006,22 +1295,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" [[package]] -name = "unicode-ident" -version = "1.0.3" +name = "unarray" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" [[package]] -name = "unicode-xid" -version = "0.2.3" +name = "unicode-ident" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "957e51f3646910546462e67d5f7599b9e4fb8acdd304b087a6494730f9eebf04" +checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf" [[package]] name = "uuid" -version = "1.2.2" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "422ee0de9031b5b948b97a8fc04e3aa35230001a722ddd27943e0be31564ce4c" +checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d" dependencies = [ "serde", ] @@ -1051,9 +1340,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf9f5aceeec8be17c128b2e93e031fb8a4d469bb9c4ae2d7dc1888b26887268" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -1061,9 +1350,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8ffb332579b0557b52d268b91feab8df3615f265d5270fec2a8c95b17c1142" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" dependencies = [ "bumpalo", "log", @@ -1076,9 +1365,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.33" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23639446165ca5a5de86ae1d8896b737ae80319560fbaa4c2887b7da6e7ebd7d" +checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" dependencies = [ "cfg-if", "js-sys", @@ -1088,9 +1377,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "052be0f94026e6cbc75cdefc9bae13fd6052cdcaf532fa6c45e7ae33a1e6c810" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1098,9 +1387,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bc0c051dc5f23e307b13285f9d75df86bfdf816c5721e573dec1f9b8aa193c" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", @@ -1111,15 +1400,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.83" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" [[package]] name = "wasm-bindgen-test" -version = "0.3.33" +version = "0.3.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d2fff962180c3fadf677438054b1db62bee4aa32af26a45388af07d1287e1d" +checksum = "6e6e302a7ea94f83a6d09e78e7dc7d9ca7b186bc2829c24a22d0753efd680671" dependencies = [ "console_error_panic_hook", "js-sys", @@ -1131,9 +1420,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-test-macro" -version = "0.3.33" +version = "0.3.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4683da3dfc016f704c9f82cf401520c4f1cb3ee440f7f52b3d6ac29506a49ca7" +checksum = "ecb993dd8c836930ed130e020e77d9b2e65dd0fbab1b67c790b0f5d80b11a575" dependencies = [ "proc-macro2", "quote", @@ -1181,55 +1470,100 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "xz2" -version = "0.1.7" +name = "windows-sys" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "lzma-sys", + "windows-targets", ] [[package]] -name = "yansi" -version = "0.5.1" +name = "windows-targets" +version = "0.48.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" +checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] [[package]] -name = "zerocopy" -version = "0.6.1" +name = "windows_aarch64_gnullvm" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "332f188cc1bcf1fe1064b8c58d150f497e697f49774aa846f2dc949d9a25f236" -dependencies = [ - "byteorder", - "zerocopy-derive", -] +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" [[package]] -name = "zerocopy-derive" -version = "0.3.1" +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0fbc82b82efe24da867ee52e015e58178684bd9dd64c34e66bdf21da2582a9f" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" dependencies = [ - "proc-macro2", - "syn", - "synstructure", + "lzma-sys", ] +[[package]] +name = "yansi" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" + [[package]] name = "zstd" -version = "0.12.1+zstd.1.5.2" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c947d2adc84ff9a59f2e3c03b81aa4128acf28d6ad7d56273f7e8af14e47bea" +checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "6.0.2+zstd.1.5.2" +version = "6.0.3+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6cf39f730b440bab43da8fb5faf5f254574462f73f260f85f7987f32154ff17" +checksum = "68e4a3f57d13d0ab7e478665c60f35e2a613dcd527851c2c7287ce5c787e134a" dependencies = [ "libc", "zstd-sys", diff --git a/lang/rust/Cargo.toml b/lang/rust/Cargo.toml index 8a6dfaa0328..5674935b527 100644 --- a/lang/rust/Cargo.toml +++ b/lang/rust/Cargo.toml @@ -25,3 +25,20 @@ members = [ exclude = [ "fuzz" ] + +resolver = "2" + +[workspace.package] +version = "0.17.0" +authors = ["Apache Avro team "] +license = "Apache-2.0" +repository = "https://github.com/apache/avro" +edition = "2021" +rust-version = "1.65.0" +keywords = ["avro", "data", "serialization"] +categories = ["encoding"] +documentation = "https://docs.rs/apache-avro" + +[profile.release.package.hello-wasm] +# Tell `rustc` to optimize for small code size. +opt-level = "s" diff --git a/lang/rust/Makefile b/lang/rust/Makefile index c948b851101..4a903c1c6d7 100644 --- a/lang/rust/Makefile +++ b/lang/rust/Makefile @@ -85,7 +85,7 @@ doc-local: .PHONY: readme readme: - cargo readme > README.md + cargo rdme # BUILDING diff --git a/lang/rust/avro/Cargo.toml b/lang/rust/avro/Cargo.toml index 09da5801987..163bbfe003f 100644 --- a/lang/rust/avro/Cargo.toml +++ b/lang/rust/avro/Cargo.toml @@ -17,16 +17,17 @@ [package] name = "apache-avro" -version = "0.15.0" -authors = ["Apache Avro team "] description = "A library for working with Apache Avro in Rust" -license = "Apache-2.0" readme = "README.md" -repository = "https://github.com/apache/avro" -edition = "2018" -keywords = ["avro", "data", "serialization"] -categories = ["encoding"] -documentation = "https://docs.rs/apache-avro" +version.workspace = true +authors.workspace = true +license.workspace = true +repository.workspace = true +edition.workspace = true +rust-version.workspace = true +keywords.workspace = true +categories.workspace = true +documentation.workspace = true [features] bzip = ["bzip2"] @@ -53,27 +54,25 @@ harness = false name = "single" [dependencies] -apache-avro-derive = { default-features = false, version = "0.15.0", path = "../avro_derive", optional = true } -byteorder = { default-features = false, version = "1.4.3" } -bzip2 = { default-features = false, version = "0.4.3", optional = true } +apache-avro-derive = { default-features = false, version = "0.17.0", path = "../avro_derive", optional = true } +bzip2 = { default-features = false, version = "0.4.4", optional = true } crc32fast = { default-features = false, version = "1.3.2", optional = true } -digest = { default-features = false, version = "0.10.6", features = ["core-api"] } +digest = { default-features = false, version = "0.10.7", features = ["core-api"] } lazy_static = { default-features = false, version = "1.4.0" } -libflate = { default-features = false, version = "1.2.0" } -log = { default-features = false, version = "0.4.17" } -num-bigint = { default-features = false, version = "0.4.3" } -regex = { default-features = false, version = "1.7.0", features = ["std", "perf"] } -serde = { default-features = false, version = "1.0.151", features = ["derive"] } -serde_json = { default-features = false, version = "1.0.91", features = ["std"] } +libflate = { default-features = false, version = "2.0.0", features = ["std"] } +log = { default-features = false, version = "0.4.20" } +num-bigint = { default-features = false, version = "0.4.4" } +regex-lite = { default-features = false, version = "0.1.0", features = ["std", "string"] } +serde = { default-features = false, version = "1.0.188", features = ["derive"] } +serde_json = { default-features = false, version = "1.0.107", features = ["std"] } snap = { default-features = false, version = "1.1.0", optional = true } -strum = { default-features = false, version = "0.24.1" } -strum_macros = { default-features = false, version = "0.24.3" } -thiserror = { default-features = false, version = "1.0.38" } -typed-builder = { default-features = false, version = "0.11.0" } -uuid = { default-features = false, version = "1.2.2", features = ["serde", "std"] } +strum = { default-features = false, version = "0.25.0" } +strum_macros = { default-features = false, version = "0.25.2" } +thiserror = { default-features = false, version = "1.0.48" } +typed-builder = { default-features = false, version = "0.16.2" } +uuid = { default-features = false, version = "1.4.1", features = ["serde", "std"] } xz2 = { default-features = false, version = "0.1.7", optional = true } -zerocopy = { default-features = false, version = "0.6.1" } -zstd = { default-features = false, version = "0.12.1+zstd.1.5.2", optional = true } +zstd = { default-features = false, version = "0.12.4+zstd.1.5.2", optional = true } [target.'cfg(target_arch = "wasm32")'.dependencies] quad-rand = { default-features = false, version = "0.2.1" } @@ -82,10 +81,11 @@ quad-rand = { default-features = false, version = "0.2.1" } rand = { default-features = false, version = "0.8.5", features = ["default"] } [dev-dependencies] -anyhow = { default-features = false, version = "1.0.68", features = ["std"] } -apache-avro-test-helper = { default-features = false, version = "0.15.0", path = "../avro_test_helper" } -criterion = { default-features = false, version = "0.4.0" } -hex-literal = { default-features = false, version = "0.3.4" } -md-5 = { default-features = false, version = "0.10.5" } -pretty_assertions = { default-features = false, version = "1.3.0", features = ["std"] } -sha2 = { default-features = false, version = "0.10.6" } +anyhow = { default-features = false, version = "1.0.75", features = ["std"] } +apache-avro-test-helper = { default-features = false, version = "0.17.0", path = "../avro_test_helper" } +criterion = { default-features = false, version = "0.5.1" } +hex-literal = { default-features = false, version = "0.4.1" } +md-5 = { default-features = false, version = "0.10.6" } +pretty_assertions = { default-features = false, version = "1.4.0", features = ["std"] } +serial_test = "2.0.0" +sha2 = { default-features = false, version = "0.10.8" } diff --git a/lang/rust/avro/README.md b/lang/rust/avro/README.md index 7ae6fc77ceb..ad5ec70689f 100644 --- a/lang/rust/avro/README.md +++ b/lang/rust/avro/README.md @@ -24,7 +24,9 @@ [![Latest Documentation](https://docs.rs/apache-avro/badge.svg)](https://docs.rs/apache-avro) [![Apache License 2.0](https://img.shields.io/badge/license-Apache%202-blue.svg)](https://github.com/apache/avro/blob/master/LICENSE.txt) -A library for working with [Apache Avro](https://avro.apache.org/) in Rust language. + + +A library for working with [Apache Avro](https://avro.apache.org/) in Rust. Please check our [documentation](https://docs.rs/apache-avro) for examples, tutorials and API reference. @@ -33,7 +35,7 @@ data structures and a compact, fast, binary data format. All data in Avro is schematized, as in the following example: -``` +```json { "type": "record", "name": "test", @@ -95,11 +97,10 @@ version = "x.y" features = ["xz"] ``` - ## Upgrading to a newer minor version The library is still in beta, so there might be backward-incompatible changes between minor -versions. If you have troubles upgrading, check the [version upgrade guide](migration_guide.md). +versions. If you have troubles upgrading, check the [version upgrade guide](https://github.com/apache/avro/blob/master/lang/rust/migration_guide.md). ## Defining a schema @@ -189,7 +190,6 @@ associated type provided by the library to specify the data we want to serialize ```rust use apache_avro::types::Record; use apache_avro::Writer; -# // a writer needs a schema and something to write to let mut writer = Writer::new(&schema, Vec::new()); @@ -276,12 +276,10 @@ You must enable the `bzip` feature to use this codec. * **Xz**: uses [xz2](https://github.com/alexcrichton/xz2-rs) compression library. You must enable the `xz` feature to use this codec. - To specify a codec to use to compress data, just specify it while creating a `Writer`: ```rust use apache_avro::Writer; use apache_avro::Codec; -# let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate); ``` @@ -293,7 +291,6 @@ codec: ```rust use apache_avro::Reader; -# // reader creation can fail in case the input to read from is not Avro-compatible or malformed let reader = Reader::new(&input[..]).unwrap(); ``` @@ -303,7 +300,6 @@ the data has been written with, we can just do as the following: ```rust use apache_avro::Schema; use apache_avro::Reader; -# let reader_raw_schema = r#" { @@ -342,7 +338,6 @@ We can just read directly instances of `Value` out of the `Reader` iterator: ```rust use apache_avro::Reader; -# let reader = Reader::new(&input[..]).unwrap(); // value is a Result of an Avro Value in case the read operation fails @@ -434,9 +429,10 @@ fn main() -> Result<(), Error> { `apache-avro` also supports the logical types listed in the [Avro specification](https://avro.apache.org/docs/current/spec.html#Logical+Types): 1. `Decimal` using the [`num_bigint`](https://docs.rs/num-bigint/0.2.6/num_bigint) crate -1. UUID using the [`uuid`](https://docs.rs/uuid/0.8.1/uuid) crate +1. UUID using the [`uuid`](https://docs.rs/uuid/1.0.0/uuid) crate 1. Date, Time (milli) as `i32` and Time (micro) as `i64` 1. Timestamp (milli and micro) as `i64` +1. Local timestamp (milli and micro) as `i64` 1. Duration as a custom type with `months`, `days` and `millis` accessor methods each of which returns an `i32` Note that the on-disk representation is identical to the underlying primitive/complex type. @@ -504,6 +500,16 @@ fn main() -> Result<(), Error> { "type": "long", "logicalType": "timestamp-micros" }, + { + "name": "local_timestamp_millis", + "type": "long", + "logicalType": "local-timestamp-millis" + }, + { + "name": "local_timestamp_micros", + "type": "long", + "logicalType": "local-timestamp-micros" + }, { "name": "duration", "type": { @@ -532,6 +538,8 @@ fn main() -> Result<(), Error> { record.put("time_micros", Value::TimeMicros(3)); record.put("timestamp_millis", Value::TimestampMillis(4)); record.put("timestamp_micros", Value::TimestampMicros(5)); + record.put("local_timestamp_millis", Value::LocalTimestampMillis(4)); + record.put("local_timestamp_micros", Value::LocalTimestampMicros(5)); record.put("duration", Duration::new(Months::new(6), Days::new(7), Millis::new(8))); writer.append(record)?; @@ -642,9 +650,11 @@ let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).un assert_eq!(false, SchemaCompatibility::can_read(&writers_schema, &readers_schema)); ``` + + ## Minimal supported Rust version -1.60.0 +1.65.0 ## License This project is licensed under [Apache License 2.0](https://github.com/apache/avro/blob/master/LICENSE.txt). diff --git a/lang/rust/avro/examples/benchmark.rs b/lang/rust/avro/examples/benchmark.rs index 1829d2171bf..c3eac431393 100644 --- a/lang/rust/avro/examples/benchmark.rs +++ b/lang/rust/avro/examples/benchmark.rs @@ -20,6 +20,7 @@ use apache_avro::{ types::{Record, Value}, Reader, Writer, }; +use apache_avro_test_helper::TestResult; use std::{ io::{BufReader, BufWriter}, time::{Duration, Instant}, @@ -45,7 +46,7 @@ fn benchmark( big_or_small: &str, count: usize, runs: usize, -) -> anyhow::Result<()> { +) -> TestResult { let mut records = Vec::new(); for __ in 0..count { records.push(record.clone()); @@ -96,14 +97,11 @@ fn benchmark( let (total_write_secs, total_read_secs) = (seconds(total_duration_write), seconds(total_duration_read)); - println!( - "{}\t\t{}\t\t{}\t\t{}\t\t{}", - count, runs, big_or_small, total_write_secs, total_read_secs - ); + println!("{count}\t\t{runs}\t\t{big_or_small}\t\t{total_write_secs}\t\t{total_read_secs}"); Ok(()) } -fn main() -> anyhow::Result<()> { +fn main() -> TestResult { let raw_small_schema = r#" {"namespace": "test", "type": "record", "name": "Test", "fields": [{"type": {"type": "string"}, "name": "field"}]} "#; @@ -115,8 +113,8 @@ fn main() -> anyhow::Result<()> { let small_schema = Schema::parse_str(raw_small_schema)?; let big_schema = Schema::parse_str(raw_big_schema)?; - println!("{:?}", small_schema); - println!("{:?}", big_schema); + println!("{small_schema:?}"); + println!("{big_schema:?}"); let mut small_record = Record::new(&small_schema).unwrap(); small_record.put("field", "foo"); diff --git a/lang/rust/avro/examples/generate_interop_data.rs b/lang/rust/avro/examples/generate_interop_data.rs index eb830e1e3c8..35a6dc7c090 100644 --- a/lang/rust/avro/examples/generate_interop_data.rs +++ b/lang/rust/avro/examples/generate_interop_data.rs @@ -20,6 +20,7 @@ use apache_avro::{ types::{Record, Value}, Codec, Writer, }; +use apache_avro_test_helper::TestResult; use std::{ collections::HashMap, io::{BufWriter, Write}, @@ -74,7 +75,7 @@ fn create_datum(schema: &Schema) -> Record { datum } -fn main() -> anyhow::Result<()> { +fn main() -> TestResult { let schema_str = std::fs::read_to_string("../../share/test/schemas/interop.avsc") .expect("Unable to read the interop Avro schema"); let schema = Schema::parse_str(schema_str.as_str())?; @@ -86,10 +87,10 @@ fn main() -> anyhow::Result<()> { let suffix = if codec_name == "null" { "".to_owned() } else { - format!("_{}", codec_name) + format!("_{codec_name}") }; - let file_name = format!("{}/rust{}.avro", data_folder, suffix); + let file_name = format!("{data_folder}/rust{suffix}.avro"); let output_file = std::fs::File::create(&file_name)?; let mut writer = Writer::with_codec(&schema, BufWriter::new(output_file), codec); @@ -98,13 +99,13 @@ fn main() -> anyhow::Result<()> { let datum = create_datum(&schema); writer.append(datum)?; writer.flush()?; - println!("Wrote {}", file_name); + println!("Wrote {file_name}"); } Ok(()) } -fn write_user_metadata(writer: &mut Writer>) -> anyhow::Result<()> { +fn write_user_metadata(writer: &mut Writer>) -> TestResult { writer.add_user_metadata("user_metadata".to_string(), b"someByteArray")?; Ok(()) diff --git a/lang/rust/avro/examples/test_interop_data.rs b/lang/rust/avro/examples/test_interop_data.rs index 611c0e19238..736b1fd7d03 100644 --- a/lang/rust/avro/examples/test_interop_data.rs +++ b/lang/rust/avro/examples/test_interop_data.rs @@ -16,13 +16,14 @@ // under the License. use apache_avro::Reader; +use apache_avro_test_helper::TestResult; use std::{ collections::HashMap, ffi::OsStr, io::{BufReader, Read}, }; -fn main() -> anyhow::Result<()> { +fn main() -> TestResult { let mut expected_user_metadata: HashMap> = HashMap::new(); expected_user_metadata.insert("user_metadata".to_string(), b"someByteArray".to_vec()); diff --git a/lang/rust/avro/examples/test_interop_single_object_encoding.rs b/lang/rust/avro/examples/test_interop_single_object_encoding.rs index 06fb753ffa8..ef13465d772 100644 --- a/lang/rust/avro/examples/test_interop_single_object_encoding.rs +++ b/lang/rust/avro/examples/test_interop_single_object_encoding.rs @@ -23,7 +23,7 @@ struct InteropMessage; impl AvroSchema for InteropMessage { fn get_schema() -> apache_avro::Schema { - let schema = std::fs::read_to_string(format!("{}/test_schema.avsc", RESOURCES_FOLDER)) + let schema = std::fs::read_to_string(format!("{RESOURCES_FOLDER}/test_schema.avsc")) .expect("File should exist with schema inside"); apache_avro::Schema::parse_str(schema.as_str()) .expect("File should exist with schema inside") @@ -49,7 +49,7 @@ impl From for Value { } fn main() { - let single_object = std::fs::read(format!("{}/test_message.bin", RESOURCES_FOLDER)) + let single_object = std::fs::read(format!("{RESOURCES_FOLDER}/test_message.bin")) .expect("File with single object not found or error occurred while reading it."); test_write(&single_object); test_read(single_object); diff --git a/lang/rust/avro/examples/to_value.rs b/lang/rust/avro/examples/to_value.rs index 69cbe38b667..4a78383e224 100644 --- a/lang/rust/avro/examples/to_value.rs +++ b/lang/rust/avro/examples/to_value.rs @@ -24,6 +24,6 @@ struct Test { fn main() -> anyhow::Result<()> { let test = Test { a: 27, b: "foo" }; let value = apache_avro::to_value(test)?; - println!("{:?}", value); + println!("{value:?}"); Ok(()) } diff --git a/lang/rust/avro/src/codec.rs b/lang/rust/avro/src/codec.rs index 0866ff62d53..a394cad2545 100644 --- a/lang/rust/avro/src/codec.rs +++ b/lang/rust/avro/src/codec.rs @@ -82,8 +82,6 @@ impl Codec { } #[cfg(feature = "snappy")] Codec::Snappy => { - use byteorder::ByteOrder; - let mut encoded: Vec = vec![0; snap::raw::max_compress_len(stream.len())]; let compressed_size = snap::raw::Encoder::new() .compress(&stream[..], &mut encoded[..]) @@ -92,8 +90,10 @@ impl Codec { let mut hasher = Hasher::new(); hasher.update(&stream[..]); let checksum = hasher.finalize(); - byteorder::BigEndian::write_u32(&mut encoded[compressed_size..], checksum); - encoded.truncate(compressed_size + 4); + let checksum_as_bytes = checksum.to_be_bytes(); + let checksum_len = checksum_as_bytes.len(); + encoded.truncate(compressed_size + checksum_len); + encoded[compressed_size..].copy_from_slice(&checksum_as_bytes); *stream = encoded; } @@ -137,8 +137,6 @@ impl Codec { } #[cfg(feature = "snappy")] Codec::Snappy => { - use byteorder::ByteOrder; - let decompressed_size = snap::raw::decompress_len(&stream[..stream.len() - 4]) .map_err(Error::GetSnappyDecompressLen)?; let mut decoded = vec![0; decompressed_size]; @@ -146,7 +144,10 @@ impl Codec { .decompress(&stream[..stream.len() - 4], &mut decoded[..]) .map_err(Error::SnappyDecompress)?; - let expected = byteorder::BigEndian::read_u32(&stream[stream.len() - 4..]); + let mut last_four: [u8; 4] = [0; 4]; + last_four.copy_from_slice(&stream[(stream.len() - 4)..]); + let expected: u32 = u32::from_be_bytes(last_four); + let mut hasher = Hasher::new(); hasher.update(&decoded); let actual = hasher.finalize(); @@ -185,56 +186,59 @@ impl Codec { #[cfg(test)] mod tests { use super::*; + use apache_avro_test_helper::TestResult; use pretty_assertions::{assert_eq, assert_ne}; const INPUT: &[u8] = b"theanswertolifetheuniverseandeverythingis42theanswertolifetheuniverseandeverythingis4theanswertolifetheuniverseandeverythingis2"; #[test] - fn null_compress_and_decompress() { + fn null_compress_and_decompress() -> TestResult { let codec = Codec::Null; let mut stream = INPUT.to_vec(); - codec.compress(&mut stream).unwrap(); + codec.compress(&mut stream)?; assert_eq!(INPUT, stream.as_slice()); - codec.decompress(&mut stream).unwrap(); + codec.decompress(&mut stream)?; assert_eq!(INPUT, stream.as_slice()); + Ok(()) } #[test] - fn deflate_compress_and_decompress() { - compress_and_decompress(Codec::Deflate); + fn deflate_compress_and_decompress() -> TestResult { + compress_and_decompress(Codec::Deflate) } #[cfg(feature = "snappy")] #[test] - fn snappy_compress_and_decompress() { - compress_and_decompress(Codec::Snappy); + fn snappy_compress_and_decompress() -> TestResult { + compress_and_decompress(Codec::Snappy) } #[cfg(feature = "zstandard")] #[test] - fn zstd_compress_and_decompress() { - compress_and_decompress(Codec::Zstandard); + fn zstd_compress_and_decompress() -> TestResult { + compress_and_decompress(Codec::Zstandard) } #[cfg(feature = "bzip")] #[test] - fn bzip_compress_and_decompress() { - compress_and_decompress(Codec::Bzip2); + fn bzip_compress_and_decompress() -> TestResult { + compress_and_decompress(Codec::Bzip2) } #[cfg(feature = "xz")] #[test] - fn xz_compress_and_decompress() { - compress_and_decompress(Codec::Xz); + fn xz_compress_and_decompress() -> TestResult { + compress_and_decompress(Codec::Xz) } - fn compress_and_decompress(codec: Codec) { + fn compress_and_decompress(codec: Codec) -> TestResult { let mut stream = INPUT.to_vec(); - codec.compress(&mut stream).unwrap(); + codec.compress(&mut stream)?; assert_ne!(INPUT, stream.as_slice()); assert!(INPUT.len() > stream.len()); - codec.decompress(&mut stream).unwrap(); + codec.decompress(&mut stream)?; assert_eq!(INPUT, stream.as_slice()); + Ok(()) } #[test] diff --git a/lang/rust/avro/src/de.rs b/lang/rust/avro/src/de.rs index 9204ed146ec..6600564489a 100644 --- a/lang/rust/avro/src/de.rs +++ b/lang/rust/avro/src/de.rs @@ -167,8 +167,7 @@ impl<'de> de::EnumAccess<'de> for EnumDeserializer<'de> { self, )), (field, Value::String(_)) => Err(de::Error::custom(format!( - "Expected first field named 'type': got '{}' instead", - field + "Expected first field named 'type': got '{field}' instead" ))), (_, _) => Err(de::Error::custom( "Expected first field of type String or Enum for the type name".to_string(), @@ -245,22 +244,27 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { Value::Long(i) | Value::TimeMicros(i) | Value::TimestampMillis(i) - | Value::TimestampMicros(i) => visitor.visit_i64(*i), + | Value::TimestampMicros(i) + | Value::LocalTimestampMillis(i) + | Value::LocalTimestampMicros(i) => visitor.visit_i64(*i), &Value::Float(f) => visitor.visit_f32(f), &Value::Double(d) => visitor.visit_f64(d), Value::Union(_i, u) => match **u { Value::Null => visitor.visit_unit(), Value::Boolean(b) => visitor.visit_bool(b), - Value::Int(i) => visitor.visit_i32(i), + Value::Int(i) | Value::Date(i) | Value::TimeMillis(i) => visitor.visit_i32(i), Value::Long(i) | Value::TimeMicros(i) | Value::TimestampMillis(i) - | Value::TimestampMicros(i) => visitor.visit_i64(i), + | Value::TimestampMicros(i) + | Value::LocalTimestampMillis(i) + | Value::LocalTimestampMicros(i) => visitor.visit_i64(i), Value::Float(f) => visitor.visit_f32(f), Value::Double(d) => visitor.visit_f64(d), Value::Record(ref fields) => visitor.visit_map(RecordDeserializer::new(fields)), Value::Array(ref fields) => visitor.visit_seq(SeqDeserializer::new(fields)), Value::String(ref s) => visitor.visit_borrowed_str(s), + Value::Uuid(uuid) => visitor.visit_str(&uuid.to_string()), Value::Map(ref items) => visitor.visit_map(MapDeserializer::new(items)), _ => Err(de::Error::custom(format!( "unsupported union: {:?}", @@ -270,6 +274,7 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { Value::Record(ref fields) => visitor.visit_map(RecordDeserializer::new(fields)), Value::Array(ref fields) => visitor.visit_seq(SeqDeserializer::new(fields)), Value::String(ref s) => visitor.visit_borrowed_str(s), + Value::Uuid(uuid) => visitor.visit_str(&uuid.to_string()), Value::Map(ref items) => visitor.visit_map(MapDeserializer::new(items)), value => Err(de::Error::custom(format!( "incorrect value of type: {:?}", @@ -299,7 +304,10 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { .map_err(|e| de::Error::custom(e.to_string())) .and_then(|s| visitor.visit_borrowed_str(s)), Value::Uuid(ref u) => visitor.visit_str(&u.to_string()), - _ => Err(de::Error::custom("not a string|bytes|fixed")), + _ => Err(de::Error::custom(format!( + "Expected a String|Bytes|Fixed|Uuid, but got {:?}", + self.input + ))), } } @@ -314,11 +322,23 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { .map_err(|e| de::Error::custom(e.to_string())) .and_then(|s| visitor.visit_string(s)) } + Value::Uuid(ref u) => visitor.visit_str(&u.to_string()), Value::Union(_i, ref x) => match **x { Value::String(ref s) => visitor.visit_borrowed_str(s), - _ => Err(de::Error::custom("not a string|bytes|fixed")), + Value::Bytes(ref bytes) | Value::Fixed(_, ref bytes) => { + String::from_utf8(bytes.to_owned()) + .map_err(|e| de::Error::custom(e.to_string())) + .and_then(|s| visitor.visit_string(s)) + } + Value::Uuid(ref u) => visitor.visit_str(&u.to_string()), + _ => Err(de::Error::custom(format!( + "Expected a String|Bytes|Fixed|Uuid, but got {x:?}" + ))), }, - _ => Err(de::Error::custom("not a string|bytes|fixed")), + _ => Err(de::Error::custom(format!( + "Expected a String|Bytes|Fixed|Uuid|Union, but got {:?}", + self.input + ))), } } @@ -330,7 +350,10 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { Value::String(ref s) => visitor.visit_bytes(s.as_bytes()), Value::Bytes(ref bytes) | Value::Fixed(_, ref bytes) => visitor.visit_bytes(bytes), Value::Uuid(ref u) => visitor.visit_bytes(u.as_bytes()), - _ => Err(de::Error::custom("not a string|bytes|fixed")), + _ => Err(de::Error::custom(format!( + "Expected a String|Bytes|Fixed|Uuid, but got {:?}", + self.input + ))), } } @@ -343,7 +366,10 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { Value::Bytes(ref bytes) | Value::Fixed(_, ref bytes) => { visitor.visit_byte_buf(bytes.to_owned()) } - _ => Err(de::Error::custom("not a string|bytes|fixed")), + _ => Err(de::Error::custom(format!( + "Expected a String|Bytes|Fixed, but got {:?}", + self.input + ))), } } @@ -354,7 +380,10 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { match *self.input { Value::Union(_i, ref inner) if inner.as_ref() == &Value::Null => visitor.visit_none(), Value::Union(_i, ref inner) => visitor.visit_some(&Deserializer::new(inner)), - _ => Err(de::Error::custom("not a union")), + _ => Err(de::Error::custom(format!( + "Expected a Union, but got {:?}", + self.input + ))), } } @@ -366,15 +395,21 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { Value::Null => visitor.visit_unit(), Value::Union(_i, ref x) => match **x { Value::Null => visitor.visit_unit(), - _ => Err(de::Error::custom("not a null")), + _ => Err(de::Error::custom(format!( + "Expected a Null, but got {:?}", + self.input + ))), }, - _ => Err(de::Error::custom("not a null")), + _ => Err(de::Error::custom(format!( + "Expected a Null|Union, but got {:?}", + self.input + ))), } } fn deserialize_unit_struct( self, - _: &'static str, + _struct_name: &'static str, visitor: V, ) -> Result where @@ -385,7 +420,7 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { fn deserialize_newtype_struct( self, - _: &'static str, + _struct_name: &'static str, visitor: V, ) -> Result where @@ -402,9 +437,15 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { Value::Array(ref items) => visitor.visit_seq(SeqDeserializer::new(items)), Value::Union(_i, ref inner) => match **inner { Value::Array(ref items) => visitor.visit_seq(SeqDeserializer::new(items)), - _ => Err(de::Error::custom("not an array")), + Value::Null => visitor.visit_seq(SeqDeserializer::new(&[])), + _ => Err(de::Error::custom(format!( + "Expected an Array or Null, but got: {inner:?}" + ))), }, - _ => Err(de::Error::custom("not an array")), + _ => Err(de::Error::custom(format!( + "Expected an Array or Union, but got: {:?}", + self.input + ))), } } @@ -417,8 +458,8 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { fn deserialize_tuple_struct( self, - _: &'static str, - _: usize, + _struct_name: &'static str, + _len: usize, visitor: V, ) -> Result where @@ -443,8 +484,8 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { fn deserialize_struct( self, - _: &'static str, - _: &'static [&'static str], + _struct_name: &'static str, + _fields: &'static [&'static str], visitor: V, ) -> Result where @@ -454,15 +495,21 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { Value::Record(ref fields) => visitor.visit_map(RecordDeserializer::new(fields)), Value::Union(_i, ref inner) => match **inner { Value::Record(ref fields) => visitor.visit_map(RecordDeserializer::new(fields)), - _ => Err(de::Error::custom("not a record")), + Value::Null => visitor.visit_map(RecordDeserializer::new(&[])), + _ => Err(de::Error::custom(format!( + "Expected a Record or Null, got: {inner:?}" + ))), }, - _ => Err(de::Error::custom("not a record")), + _ => Err(de::Error::custom(format!( + "Expected a Record or Union, got: {:?}", + self.input + ))), } } fn deserialize_enum( self, - _: &'static str, + _enum_name: &'static str, _variants: &'static [&'static str], visitor: V, ) -> Result @@ -472,9 +519,13 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { match *self.input { // This branch can be anything... Value::Record(ref fields) => visitor.visit_enum(EnumDeserializer::new(fields)), + Value::String(ref field) => visitor.visit_enum(EnumUnitDeserializer::new(field)), // This has to be a unit Enum Value::Enum(_index, ref field) => visitor.visit_enum(EnumUnitDeserializer::new(field)), - _ => Err(de::Error::custom("not an enum")), + _ => Err(de::Error::custom(format!( + "Expected a Record|Enum, but got {:?}", + self.input + ))), } } @@ -491,6 +542,10 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { { self.deserialize_any(visitor) } + + fn is_human_readable(&self) -> bool { + crate::util::is_human_readable() + } } impl<'de> de::SeqAccess<'de> for SeqDeserializer<'de> { @@ -601,8 +656,12 @@ pub fn from_value<'de, D: Deserialize<'de>>(value: &'de Value) -> Result TestResult { let test = Value::Record(vec![ ("a".to_owned(), Value::Long(27)), ("b".to_owned(), Value::String("foo".to_owned())), @@ -718,7 +766,7 @@ mod tests { a: 27, b: "foo".to_owned(), }; - let final_value: Test = from_value(&test).unwrap(); + let final_value: Test = from_value(&test)?; assert_eq!(final_value, expected); let test_inner = Value::Record(vec![ @@ -733,17 +781,20 @@ mod tests { ]); let expected_inner = TestInner { a: expected, b: 35 }; - let final_value: TestInner = from_value(&test_inner).unwrap(); - assert_eq!(final_value, expected_inner) + let final_value: TestInner = from_value(&test_inner)?; + assert_eq!(final_value, expected_inner); + + Ok(()) } + #[test] - fn test_from_value_unit_enum() { + fn test_from_value_unit_enum() -> TestResult { let expected = TestUnitExternalEnum { a: UnitExternalEnum::Val1, }; let test = Value::Record(vec![("a".to_owned(), Value::Enum(0, "Val1".to_owned()))]); - let final_value: TestUnitExternalEnum = from_value(&test).unwrap(); + let final_value: TestUnitExternalEnum = from_value(&test)?; assert_eq!( final_value, expected, "Error deserializing unit external enum" @@ -757,7 +808,7 @@ mod tests { "a".to_owned(), Value::Record(vec![("t".to_owned(), Value::String("Val1".to_owned()))]), )]); - let final_value: TestUnitInternalEnum = from_value(&test).unwrap(); + let final_value: TestUnitInternalEnum = from_value(&test)?; assert_eq!( final_value, expected, "Error deserializing unit internal enum" @@ -770,7 +821,7 @@ mod tests { "a".to_owned(), Value::Record(vec![("t".to_owned(), Value::String("Val1".to_owned()))]), )]); - let final_value: TestUnitAdjacentEnum = from_value(&test).unwrap(); + let final_value: TestUnitAdjacentEnum = from_value(&test)?; assert_eq!( final_value, expected, "Error deserializing unit adjacent enum" @@ -780,35 +831,96 @@ mod tests { }; let test = Value::Record(vec![("a".to_owned(), Value::Null)]); - let final_value: TestUnitUntaggedEnum = from_value(&test).unwrap(); + let final_value: TestUnitUntaggedEnum = from_value(&test)?; assert_eq!( final_value, expected, "Error deserializing unit untagged enum" ); + Ok(()) } #[test] - fn test_from_value_null_enum() { - let expected = TestNullExternalEnum { - a: NullExternalEnum::Val1(()), - }; + fn avro_3645_3646_test_from_value_enum() -> TestResult { + #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] + struct TestNullExternalEnum { + a: NullExternalEnum, + } - let test = Value::Record(vec![( - "a".to_owned(), - Value::Record(vec![ - ("type".to_owned(), Value::String("Val1".to_owned())), - ("value".to_owned(), Value::Union(0, Box::new(Value::Null))), - ]), - )]); - let final_value: TestNullExternalEnum = from_value(&test).unwrap(); - assert_eq!( - final_value, expected, - "Error deserializing null external enum" - ); + #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] + enum NullExternalEnum { + Val1, + Val2(), + Val3(()), + Val4(u64), + } + + let data = vec![ + ( + TestNullExternalEnum { + a: NullExternalEnum::Val1, + }, + Value::Record(vec![("a".to_owned(), Value::Enum(0, "Val1".to_owned()))]), + ), + ( + TestNullExternalEnum { + a: NullExternalEnum::Val2(), + }, + Value::Record(vec![( + "a".to_owned(), + Value::Record(vec![ + ("type".to_owned(), Value::Enum(1, "Val2".to_owned())), + ("value".to_owned(), Value::Union(1, Box::new(Value::Null))), + ]), + )]), + ), + ( + TestNullExternalEnum { + a: NullExternalEnum::Val2(), + }, + Value::Record(vec![( + "a".to_owned(), + Value::Record(vec![ + ("type".to_owned(), Value::Enum(1, "Val2".to_owned())), + ("value".to_owned(), Value::Array(vec![])), + ]), + )]), + ), + ( + TestNullExternalEnum { + a: NullExternalEnum::Val3(()), + }, + Value::Record(vec![( + "a".to_owned(), + Value::Record(vec![ + ("type".to_owned(), Value::Enum(2, "Val3".to_owned())), + ("value".to_owned(), Value::Union(2, Box::new(Value::Null))), + ]), + )]), + ), + ( + TestNullExternalEnum { + a: NullExternalEnum::Val4(123), + }, + Value::Record(vec![( + "a".to_owned(), + Value::Record(vec![ + ("type".to_owned(), Value::Enum(3, "Val4".to_owned())), + ("value".to_owned(), Value::Union(3, Value::Long(123).into())), + ]), + )]), + ), + ]; + + for (expected, test) in data.iter() { + let actual: TestNullExternalEnum = from_value(test)?; + assert_eq!(actual, *expected); + } + + Ok(()) } #[test] - fn test_from_value_single_value_enum() { + fn test_from_value_single_value_enum() -> TestResult { let expected = TestSingleValueExternalEnum { a: SingleValueExternalEnum::Double(64.0), }; @@ -823,15 +935,17 @@ mod tests { ), ]), )]); - let final_value: TestSingleValueExternalEnum = from_value(&test).unwrap(); + let final_value: TestSingleValueExternalEnum = from_value(&test)?; assert_eq!( final_value, expected, "Error deserializing single value external enum(union)" ); + + Ok(()) } #[test] - fn test_from_value_struct_enum() { + fn test_from_value_struct_enum() -> TestResult { let expected = TestStructExternalEnum { a: StructExternalEnum::Val1 { x: 1.0, y: 2.0 }, }; @@ -852,15 +966,17 @@ mod tests { ), ]), )]); - let final_value: TestStructExternalEnum = from_value(&test).unwrap(); + let final_value: TestStructExternalEnum = from_value(&test)?; assert_eq!( final_value, expected, "error deserializing struct external enum(union)" ); + + Ok(()) } #[test] - fn test_avro_3692_from_value_struct_flatten() { + fn test_avro_3692_from_value_struct_flatten() -> TestResult { #[derive(Deserialize, PartialEq, Debug)] struct S1 { f1: String, @@ -882,12 +998,14 @@ mod tests { ("f1".to_owned(), "Hello".into()), ("f2".to_owned(), "World".into()), ]); - let final_value: S1 = from_value(&test).unwrap(); + let final_value: S1 = from_value(&test)?; assert_eq!(final_value, expected); + + Ok(()) } #[test] - fn test_from_value_tuple_enum() { + fn test_from_value_tuple_enum() -> TestResult { let expected = TestTupleExternalEnum { a: TupleExternalEnum::Val1(1.0, 2.0), }; @@ -905,17 +1023,17 @@ mod tests { ), ]), )]); - let final_value: TestTupleExternalEnum = from_value(&test).unwrap(); + let final_value: TestTupleExternalEnum = from_value(&test)?; assert_eq!( final_value, expected, "error serializing tuple external enum(union)" ); - } - type TestResult = Result>; + Ok(()) + } #[test] - fn test_date() -> TestResult<()> { + fn test_date() -> TestResult { let raw_value = 1; let value = Value::Date(raw_value); let result = crate::from_value::(&value)?; @@ -924,7 +1042,7 @@ mod tests { } #[test] - fn test_time_millis() -> TestResult<()> { + fn test_time_millis() -> TestResult { let raw_value = 1; let value = Value::TimeMillis(raw_value); let result = crate::from_value::(&value)?; @@ -933,7 +1051,7 @@ mod tests { } #[test] - fn test_time_micros() -> TestResult<()> { + fn test_time_micros() -> TestResult { let raw_value = 1; let value = Value::TimeMicros(raw_value); let result = crate::from_value::(&value)?; @@ -942,7 +1060,7 @@ mod tests { } #[test] - fn test_timestamp_millis() -> TestResult<()> { + fn test_timestamp_millis() -> TestResult { let raw_value = 1; let value = Value::TimestampMillis(raw_value); let result = crate::from_value::(&value)?; @@ -951,7 +1069,7 @@ mod tests { } #[test] - fn test_timestamp_micros() -> TestResult<()> { + fn test_timestamp_micros() -> TestResult { let raw_value = 1; let value = Value::TimestampMicros(raw_value); let result = crate::from_value::(&value)?; @@ -960,16 +1078,34 @@ mod tests { } #[test] - fn test_from_value_uuid_str() -> TestResult<()> { + fn test_avro_3853_local_timestamp_millis() -> TestResult { + let raw_value = 1; + let value = Value::LocalTimestampMillis(raw_value); + let result = crate::from_value::(&value)?; + assert_eq!(result, raw_value); + Ok(()) + } + + #[test] + fn test_avro_3853_local_timestamp_micros() -> TestResult { + let raw_value = 1; + let value = Value::LocalTimestampMicros(raw_value); + let result = crate::from_value::(&value)?; + assert_eq!(result, raw_value); + Ok(()) + } + + #[test] + fn test_from_value_uuid_str() -> TestResult { let raw_value = "9ec535ff-3e2a-45bd-91d3-0a01321b5a49"; - let value = Value::Uuid(Uuid::parse_str(raw_value).unwrap()); + let value = Value::Uuid(Uuid::parse_str(raw_value)?); let result = crate::from_value::(&value)?; assert_eq!(result.to_string(), raw_value); Ok(()) } #[test] - fn test_from_value_uuid_slice() -> TestResult<()> { + fn test_from_value_uuid_slice() -> TestResult { let raw_value = &[4, 54, 67, 12, 43, 2, 2, 76, 32, 50, 87, 5, 1, 33, 43, 87]; let value = Value::Uuid(Uuid::from_slice(raw_value)?); let result = crate::from_value::(&value)?; @@ -978,7 +1114,7 @@ mod tests { } #[test] - fn test_from_value_with_union() -> TestResult<()> { + fn test_from_value_with_union() -> TestResult { // AVRO-3232 test for deserialize_any on missing fields on the destination struct: // Error: DeserializeValue("Unsupported union") // Error: DeserializeValue("incorrect value of type: String") @@ -1002,6 +1138,8 @@ mod tests { ("time_micros_a".to_string(), 123), ("timestamp_millis_b".to_string(), 234), ("timestamp_micros_c".to_string(), 345), + ("local_timestamp_millis_d".to_string(), 678), + ("local_timestamp_micros_e".to_string(), 789), ] .iter() .cloned() @@ -1018,6 +1156,12 @@ mod tests { key if key.starts_with("timestamp_micros_") => { (k.clone(), Value::TimestampMicros(*v)) } + key if key.starts_with("local_timestamp_millis_") => { + (k.clone(), Value::LocalTimestampMillis(*v)) + } + key if key.starts_with("local_timestamp_micros_") => { + (k.clone(), Value::LocalTimestampMicros(*v)) + } _ => unreachable!("unexpected key: {:?}", k), }) .collect(); @@ -1067,6 +1211,22 @@ mod tests { "a_non_existing_timestamp_micros".to_string(), Value::Union(0, Box::new(Value::TimestampMicros(-345))), ), + ( + "a_local_timestamp_millis".to_string(), + Value::Union(0, Box::new(Value::LocalTimestampMillis(678))), + ), + ( + "a_non_existing_local_timestamp_millis".to_string(), + Value::Union(0, Box::new(Value::LocalTimestampMillis(-678))), + ), + ( + "a_local_timestamp_micros".to_string(), + Value::Union(0, Box::new(Value::LocalTimestampMicros(789))), + ), + ( + "a_non_existing_local_timestamp_micros".to_string(), + Value::Union(0, Box::new(Value::LocalTimestampMicros(-789))), + ), ( "a_record".to_string(), Value::Union( @@ -1126,4 +1286,33 @@ mod tests { assert_eq!(deserialized, reference); Ok(()) } + + #[test] + #[serial(avro_3747)] + fn avro_3747_human_readable_false() -> TestResult { + use serde::de::Deserializer as SerdeDeserializer; + + let is_human_readable = false; + crate::util::SERDE_HUMAN_READABLE.store(is_human_readable, Ordering::Release); + + let deser = &Deserializer::new(&Value::Null); + + assert_eq!(deser.is_human_readable(), is_human_readable); + + Ok(()) + } + + #[test] + #[serial(avro_3747)] + fn avro_3747_human_readable_true() -> TestResult { + use serde::de::Deserializer as SerdeDeserializer; + + crate::util::SERDE_HUMAN_READABLE.store(true, Ordering::Release); + + let deser = &Deserializer::new(&Value::Null); + + assert!(deser.is_human_readable()); + + Ok(()) + } } diff --git a/lang/rust/avro/src/decimal.rs b/lang/rust/avro/src/decimal.rs index e67430384eb..a06ab45a6ca 100644 --- a/lang/rust/avro/src/decimal.rs +++ b/lang/rust/avro/src/decimal.rs @@ -55,6 +55,12 @@ impl Decimal { } } +impl From for BigInt { + fn from(decimal: Decimal) -> Self { + decimal.value + } +} + /// Gets the internal byte array representation of a referenced decimal. /// Usage: /// ``` @@ -102,24 +108,29 @@ impl> From for Decimal { #[cfg(test)] mod tests { use super::*; + use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; use std::convert::TryFrom; #[test] - fn test_decimal_from_bytes_from_ref_decimal() { + fn test_decimal_from_bytes_from_ref_decimal() -> TestResult { let input = vec![1, 24]; let d = Decimal::from(&input); - let output = >::try_from(&d).unwrap(); + let output = >::try_from(&d)?; assert_eq!(output, input); + + Ok(()) } #[test] - fn test_decimal_from_bytes_from_owned_decimal() { + fn test_decimal_from_bytes_from_owned_decimal() -> TestResult { let input = vec![1, 24]; let d = Decimal::from(&input); - let output = >::try_from(d).unwrap(); + let output = >::try_from(d)?; assert_eq!(output, input); + + Ok(()) } } diff --git a/lang/rust/avro/src/decode.rs b/lang/rust/avro/src/decode.rs index 4f9e7e94556..b13c76739b9 100644 --- a/lang/rust/avro/src/decode.rs +++ b/lang/rust/avro/src/decode.rs @@ -18,7 +18,10 @@ use crate::{ decimal::Decimal, duration::Duration, - schema::{Name, Namespace, ResolvedSchema, Schema}, + schema::{ + DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, RecordSchema, ResolvedSchema, + Schema, + }, types::Value, util::{safe_len, zag_i32, zag_i64}, AvroResult, Error, @@ -98,7 +101,7 @@ pub(crate) fn decode_internal>( } } } - Schema::Decimal { ref inner, .. } => match &**inner { + Schema::Decimal(DecimalSchema { ref inner, .. }) => match &**inner { Schema::Fixed { .. } => { match decode_internal(inner, names, enclosing_namespace, reader)? { Value::Fixed(_, bytes) => Ok(Value::Decimal(Decimal::from(bytes))), @@ -127,6 +130,8 @@ pub(crate) fn decode_internal>( Schema::TimeMicros => zag_i64(reader).map(Value::TimeMicros), Schema::TimestampMillis => zag_i64(reader).map(Value::TimestampMillis), Schema::TimestampMicros => zag_i64(reader).map(Value::TimestampMicros), + Schema::LocalTimestampMillis => zag_i64(reader).map(Value::LocalTimestampMillis), + Schema::LocalTimestampMicros => zag_i64(reader).map(Value::LocalTimestampMicros), Schema::Duration => { let mut buf = [0u8; 12]; reader.read_exact(&mut buf).map_err(Error::ReadDuration)?; @@ -164,7 +169,7 @@ pub(crate) fn decode_internal>( } } } - Schema::Fixed { size, .. } => { + Schema::Fixed(FixedSchema { size, .. }) => { let mut buf = vec![0u8; size]; reader .read_exact(&mut buf) @@ -232,11 +237,11 @@ pub(crate) fn decode_internal>( } Err(io_err) => Err(io_err), }, - Schema::Record { + Schema::Record(RecordSchema { ref name, ref fields, .. - } => { + }) => { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); // Benchmarks indicate ~10% improvement using this method. let mut items = Vec::with_capacity(fields.len()); @@ -254,7 +259,7 @@ pub(crate) fn decode_internal>( } Ok(Value::Record(items)) } - Schema::Enum { ref symbols, .. } => { + Schema::Enum(EnumSchema { ref symbols, .. }) => { Ok(if let Value::Int(raw_index) = decode_int(reader)? { let index = usize::try_from(raw_index) .map_err(|e| Error::ConvertI32ToUsize(e, raw_index))?; @@ -293,64 +298,73 @@ mod tests { use crate::{ decode::decode, encode::{encode, tests::success}, - schema::Schema, + schema::{DecimalSchema, FixedSchema, Schema}, types::{ Value, Value::{Array, Int, Map}, }, Decimal, }; + use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; use std::collections::HashMap; #[test] - fn test_decode_array_without_size() { + fn test_decode_array_without_size() -> TestResult { let mut input: &[u8] = &[6, 2, 4, 6, 0]; let result = decode(&Schema::Array(Box::new(Schema::Int)), &mut input); - assert_eq!(Array(vec!(Int(1), Int(2), Int(3))), result.unwrap()); + assert_eq!(Array(vec!(Int(1), Int(2), Int(3))), result?); + + Ok(()) } #[test] - fn test_decode_array_with_size() { + fn test_decode_array_with_size() -> TestResult { let mut input: &[u8] = &[5, 6, 2, 4, 6, 0]; let result = decode(&Schema::Array(Box::new(Schema::Int)), &mut input); - assert_eq!(Array(vec!(Int(1), Int(2), Int(3))), result.unwrap()); + assert_eq!(Array(vec!(Int(1), Int(2), Int(3))), result?); + + Ok(()) } #[test] - fn test_decode_map_without_size() { + fn test_decode_map_without_size() -> TestResult { let mut input: &[u8] = &[0x02, 0x08, 0x74, 0x65, 0x73, 0x74, 0x02, 0x00]; let result = decode(&Schema::Map(Box::new(Schema::Int)), &mut input); let mut expected = HashMap::new(); expected.insert(String::from("test"), Int(1)); - assert_eq!(Map(expected), result.unwrap()); + assert_eq!(Map(expected), result?); + + Ok(()) } #[test] - fn test_decode_map_with_size() { + fn test_decode_map_with_size() -> TestResult { let mut input: &[u8] = &[0x01, 0x0C, 0x08, 0x74, 0x65, 0x73, 0x74, 0x02, 0x00]; let result = decode(&Schema::Map(Box::new(Schema::Int)), &mut input); let mut expected = HashMap::new(); expected.insert(String::from("test"), Int(1)); - assert_eq!(Map(expected), result.unwrap()); + assert_eq!(Map(expected), result?); + + Ok(()) } #[test] - fn test_negative_decimal_value() { + fn test_negative_decimal_value() -> TestResult { use crate::{encode::encode, schema::Name}; use num_bigint::ToBigInt; - let inner = Box::new(Schema::Fixed { + let inner = Box::new(Schema::Fixed(FixedSchema { size: 2, doc: None, - name: Name::new("decimal").unwrap(), + name: Name::new("decimal")?, aliases: None, attributes: Default::default(), - }); - let schema = Schema::Decimal { + })); + let schema = Schema::Decimal(DecimalSchema { inner, precision: 4, scale: 2, - }; + }); let bigint = (-423).to_bigint().unwrap(); let value = Value::Decimal(Decimal::from(bigint.to_signed_bytes_be())); @@ -358,26 +372,28 @@ mod tests { encode(&value, &schema, &mut buffer).expect(&success(&value, &schema)); let mut bytes = &buffer[..]; - let result = decode(&schema, &mut bytes).unwrap(); + let result = decode(&schema, &mut bytes)?; assert_eq!(result, value); + + Ok(()) } #[test] - fn test_decode_decimal_with_bigger_than_necessary_size() { + fn test_decode_decimal_with_bigger_than_necessary_size() -> TestResult { use crate::{encode::encode, schema::Name}; use num_bigint::ToBigInt; - let inner = Box::new(Schema::Fixed { + let inner = Box::new(Schema::Fixed(FixedSchema { size: 13, - name: Name::new("decimal").unwrap(), + name: Name::new("decimal")?, aliases: None, doc: None, attributes: Default::default(), - }); - let schema = Schema::Decimal { + })); + let schema = Schema::Decimal(DecimalSchema { inner, precision: 4, scale: 2, - }; + }); let value = Value::Decimal(Decimal::from( ((-423).to_bigint().unwrap()).to_signed_bytes_be(), )); @@ -385,12 +401,14 @@ mod tests { encode(&value, &schema, &mut buffer).expect(&success(&value, &schema)); let mut bytes: &[u8] = &buffer[..]; - let result = decode(&schema, &mut bytes).unwrap(); + let result = decode(&schema, &mut bytes)?; assert_eq!(result, value); + + Ok(()) } #[test] - fn test_avro_3448_recursive_definition_decode_union() { + fn test_avro_3448_recursive_definition_decode_union() -> TestResult { // if encoding fails in this test check the corresponding test in encode let schema = Schema::parse_str( r#" @@ -415,8 +433,7 @@ mod tests { } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); @@ -450,10 +467,12 @@ mod tests { &schema )) ); + + Ok(()) } #[test] - fn test_avro_3448_recursive_definition_decode_array() { + fn test_avro_3448_recursive_definition_decode_array() -> TestResult { let schema = Schema::parse_str( r#" { @@ -480,8 +499,7 @@ mod tests { } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); @@ -498,11 +516,13 @@ mod tests { "Failed to decode using recursive definitions with schema:\n {:?}\n", &schema )) - ) + ); + + Ok(()) } #[test] - fn test_avro_3448_recursive_definition_decode_map() { + fn test_avro_3448_recursive_definition_decode_map() -> TestResult { let schema = Schema::parse_str( r#" { @@ -529,8 +549,7 @@ mod tests { } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); @@ -550,11 +569,13 @@ mod tests { "Failed to decode using recursive definitions with schema:\n {:?}\n", &schema )) - ) + ); + + Ok(()) } #[test] - fn test_avro_3448_proper_multi_level_decoding_middle_namespace() { + fn test_avro_3448_proper_multi_level_decoding_middle_namespace() -> TestResult { // if encoding fails in this test check the corresponding test in encode let schema = r#" { @@ -598,7 +619,7 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); let middle_record_variation_1 = Value::Record(vec![( "middle_field_1".into(), @@ -665,10 +686,12 @@ mod tests { &schema )) ); + + Ok(()) } #[test] - fn test_avro_3448_proper_multi_level_decoding_inner_namespace() { + fn test_avro_3448_proper_multi_level_decoding_inner_namespace() -> TestResult { // if encoding fails in this test check the corresponding test in encode let schema = r#" { @@ -713,7 +736,7 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); let middle_record_variation_1 = Value::Record(vec![( "middle_field_1".into(), @@ -780,5 +803,7 @@ mod tests { &schema )) ); + + Ok(()) } } diff --git a/lang/rust/avro/src/duration.rs b/lang/rust/avro/src/duration.rs index 3bdfe4d23ef..4aa6bd53a0c 100644 --- a/lang/rust/avro/src/duration.rs +++ b/lang/rust/avro/src/duration.rs @@ -14,10 +14,6 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. - -use byteorder::LittleEndian; -use zerocopy::U32; - /// A struct representing duration that hides the details of endianness and conversion between /// platform-native u32 and byte arrays. #[derive(Debug, Copy, Clone, Eq, PartialEq)] @@ -28,83 +24,77 @@ pub struct Duration { } #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct Months(U32); +pub struct Months(u32); impl Months { pub fn new(months: u32) -> Self { - Self(U32::new(months)) + Self(months) + } + + fn as_bytes(&self) -> [u8; 4] { + self.0.to_le_bytes() } } impl From for u32 { fn from(days: Months) -> Self { - days.0.get() + days.0 } } impl From<[u8; 4]> for Months { fn from(bytes: [u8; 4]) -> Self { - Self(U32::from(bytes)) - } -} - -impl AsRef<[u8; 4]> for Months { - fn as_ref(&self) -> &[u8; 4] { - self.0.as_ref() + Self(u32::from_le_bytes(bytes)) } } #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct Days(U32); +pub struct Days(u32); impl Days { pub fn new(days: u32) -> Self { - Self(U32::new(days)) + Self(days) + } + + fn as_bytes(&self) -> [u8; 4] { + self.0.to_le_bytes() } } impl From for u32 { fn from(days: Days) -> Self { - days.0.get() + days.0 } } impl From<[u8; 4]> for Days { fn from(bytes: [u8; 4]) -> Self { - Self(U32::from(bytes)) - } -} - -impl AsRef<[u8; 4]> for Days { - fn as_ref(&self) -> &[u8; 4] { - self.0.as_ref() + Self(u32::from_le_bytes(bytes)) } } #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct Millis(U32); +pub struct Millis(u32); impl Millis { pub fn new(millis: u32) -> Self { - Self(U32::new(millis)) + Self(millis) + } + + fn as_bytes(&self) -> [u8; 4] { + self.0.to_le_bytes() } } impl From for u32 { fn from(days: Millis) -> Self { - days.0.get() + days.0 } } impl From<[u8; 4]> for Millis { fn from(bytes: [u8; 4]) -> Self { - Self(U32::from(bytes)) - } -} - -impl AsRef<[u8; 4]> for Millis { - fn as_ref(&self) -> &[u8; 4] { - self.0.as_ref() + Self(u32::from_le_bytes(bytes)) } } @@ -137,9 +127,9 @@ impl Duration { impl From for [u8; 12] { fn from(duration: Duration) -> Self { let mut bytes = [0u8; 12]; - bytes[0..4].copy_from_slice(duration.months.as_ref()); - bytes[4..8].copy_from_slice(duration.days.as_ref()); - bytes[8..12].copy_from_slice(duration.millis.as_ref()); + bytes[0..4].copy_from_slice(&duration.months.as_bytes()); + bytes[4..8].copy_from_slice(&duration.days.as_bytes()); + bytes[8..12].copy_from_slice(&duration.millis.as_bytes()); bytes } } diff --git a/lang/rust/avro/src/encode.rs b/lang/rust/avro/src/encode.rs index 2ae48f91c3b..6e52e0c3b1e 100644 --- a/lang/rust/avro/src/encode.rs +++ b/lang/rust/avro/src/encode.rs @@ -16,7 +16,10 @@ // under the License. use crate::{ - schema::{Name, Namespace, ResolvedSchema, Schema, SchemaKind}, + schema::{ + DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, RecordSchema, ResolvedSchema, + Schema, SchemaKind, + }, types::{Value, ValueKind}, util::{zig_i32, zig_i64}, AvroResult, Error, @@ -74,12 +77,14 @@ pub(crate) fn encode_internal>( Value::Long(i) | Value::TimestampMillis(i) | Value::TimestampMicros(i) + | Value::LocalTimestampMillis(i) + | Value::LocalTimestampMicros(i) | Value::TimeMicros(i) => encode_long(*i, buffer), Value::Float(x) => buffer.extend_from_slice(&x.to_le_bytes()), Value::Double(x) => buffer.extend_from_slice(&x.to_le_bytes()), Value::Decimal(decimal) => match schema { - Schema::Decimal { inner, .. } => match *inner.clone() { - Schema::Fixed { size, .. } => { + Schema::Decimal(DecimalSchema { inner, .. }) => match *inner.clone() { + Schema::Fixed(FixedSchema { size, .. }) => { let bytes = decimal.to_sign_extended_bytes_with_len(size).unwrap(); let num_bytes = bytes.len(); if num_bytes != size { @@ -125,7 +130,7 @@ pub(crate) fn encode_internal>( Schema::String | Schema::Uuid => { encode_bytes(s, buffer); } - Schema::Enum { ref symbols, .. } => { + Schema::Enum(EnumSchema { ref symbols, .. }) => { if let Some(index) = symbols.iter().position(|item| item == s) { encode_int(index as i32, buffer); } else { @@ -193,33 +198,39 @@ pub(crate) fn encode_internal>( }); } } - Value::Record(fields) => { - if let Schema::Record { + Value::Record(value_fields) => { + if let Schema::Record(RecordSchema { ref name, fields: ref schema_fields, - ref lookup, .. - } = *schema + }) = *schema { let record_namespace = name.fully_qualified_name(enclosing_namespace).namespace; - for &(ref name, ref value) in fields.iter() { - match lookup.get(name) { - Some(idx) => { - encode_internal( - value, - &schema_fields[*idx].schema, - names, - &record_namespace, - buffer, - )?; - } + + let mut lookup = HashMap::new(); + value_fields.iter().for_each(|(name, field)| { + lookup.insert(name, field); + }); + + for schema_field in schema_fields.iter() { + let name = &schema_field.name; + let value = match lookup.get(name) { + Some(value) => value, None => { return Err(Error::NoEntryInLookupTable( name.clone(), - format!("{:?}", lookup), + format!("{lookup:?}"), )); } - } + }; + + encode_internal( + value, + &schema_field.schema, + names, + &record_namespace, + buffer, + )?; } } else { error!("invalid schema type for Record: {:?}", schema); diff --git a/lang/rust/avro/src/error.rs b/lang/rust/avro/src/error.rs index 4f7098d2f85..bf066b8a5ee 100644 --- a/lang/rust/avro/src/error.rs +++ b/lang/rust/avro/src/error.rs @@ -19,9 +19,9 @@ use crate::{ schema::{Name, SchemaKind}, types::ValueKind, }; -use std::fmt; +use std::{error::Error as _, fmt}; -#[derive(thiserror::Error, Debug)] +#[derive(thiserror::Error)] pub enum Error { #[error("Bad Snappy CRC32; expected {expected:x} but got {actual:x}")] SnappyCrc32 { expected: u32, actual: u32 }, @@ -151,6 +151,12 @@ pub enum Error { #[error("TimestampMicros expected, got {0:?}")] GetTimestampMicros(ValueKind), + #[error("LocalTimestampMillis expected, got {0:?}")] + GetLocalTimestampMillis(ValueKind), + + #[error("LocalTimestampMicros expected, got {0:?}")] + GetLocalTimestampMicros(ValueKind), + #[error("Null expected, got {0:?}")] GetNull(ValueKind), @@ -199,6 +205,9 @@ pub enum Error { #[error("Could not find matching type in union")] FindUnionVariant, + #[error("Union type should not be empty")] + EmptyUnion, + #[error("Array({expected:?}) expected, got {other:?}")] GetArray { expected: SchemaKind, @@ -229,9 +238,12 @@ pub enum Error { #[error("Unions cannot contain duplicate types")] GetUnionDuplicate, - #[error("Union's first type {0:?} must match the `default`'s value type {1:?}")] + #[error("One union type {0:?} must match the `default`'s value type {1:?}")] GetDefaultUnion(SchemaKind, ValueKind), + #[error("`default`'s value type of field {0:?} in {1:?} must be {2:?}")] + GetDefaultRecordField(String, String, String), + #[error("JSON value {0} claims to be u64 but cannot be converted")] GetU64FromJson(serde_json::Number), @@ -256,6 +268,9 @@ pub enum Error { #[error("Failed to parse schema from JSON")] ParseSchemaJson(#[source] serde_json::Error), + #[error("Failed to read schema")] + ReadSchemaFromReader(#[source] std::io::Error), + #[error("Must be a JSON string, object or array")] ParseSchemaFromValidJson, @@ -301,19 +316,34 @@ pub enum Error { #[error("Invalid enum symbol name {0}")] EnumSymbolName(String), + #[error("Invalid field name {0}")] + FieldName(String), + + #[error("Duplicate field name {0}")] + FieldNameDuplicate(String), + #[error("Invalid schema name {0}. It must match the regex '{1}'")] InvalidSchemaName(String, &'static str), + #[error("Invalid namespace {0}. It must match the regex '{1}'")] + InvalidNamespace(String, &'static str), + #[error("Duplicate enum symbol {0}")] EnumSymbolDuplicate(String), + #[error("Default value for enum must be a string! Got: {0}")] + EnumDefaultWrongType(serde_json::Value), + #[error("No `items` in array")] GetArrayItemsField, #[error("No `values` in map")] GetMapValuesField, - #[error("No `size` in fixed")] + #[error("Fixed schema `size` value must be a positive integer: {0}")] + GetFixedSizeFieldPositive(serde_json::Value), + + #[error("Fixed schema has no `size`")] GetFixedSizeField, #[error("Failed to compress with flate")] @@ -449,3 +479,13 @@ impl serde::de::Error for Error { Error::DeserializeValue(msg.to_string()) } } + +impl fmt::Debug for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut msg = self.to_string(); + if let Some(e) = self.source() { + msg.extend([": ", &e.to_string()]); + } + write!(f, "{}", msg) + } +} diff --git a/lang/rust/avro/src/lib.rs b/lang/rust/avro/src/lib.rs index a6b06d74a07..35b1b431a11 100644 --- a/lang/rust/avro/src/lib.rs +++ b/lang/rust/avro/src/lib.rs @@ -24,7 +24,7 @@ //! //! All data in Avro is schematized, as in the following example: //! -//! ```text +//! ```json //! { //! "type": "record", //! "name": "test", @@ -62,10 +62,34 @@ //! features = ["snappy"] //! ``` //! +//! Or in case you want to leverage the **Zstandard** codec: +//! +//! ```toml +//! [dependencies.apache-avro] +//! version = "x.y" +//! features = ["zstandard"] +//! ``` +//! +//! Or in case you want to leverage the **Bzip2** codec: +//! +//! ```toml +//! [dependencies.apache-avro] +//! version = "x.y" +//! features = ["bzip"] +//! ``` +//! +//! Or in case you want to leverage the **Xz** codec: +//! +//! ```toml +//! [dependencies.apache-avro] +//! version = "x.y" +//! features = ["xz"] +//! ``` +//! //! # Upgrading to a newer minor version //! //! The library is still in beta, so there might be backward-incompatible changes between minor -//! versions. If you have troubles upgrading, check the [version upgrade guide](migration_guide.md). +//! versions. If you have troubles upgrading, check the [version upgrade guide](https://github.com/apache/avro/blob/master/lang/rust/migration_guide.md). //! //! # Defining a schema //! @@ -260,6 +284,12 @@ //! * **Snappy**: uses Google's [Snappy](http://google.github.io/snappy/) compression library. Each //! compressed block is followed by the 4-byte, big-endianCRC32 checksum of the uncompressed data in //! the block. You must enable the `snappy` feature to use this codec. +//! * **Zstandard**: uses Facebook's [Zstandard](https://facebook.github.io/zstd/) compression library. +//! You must enable the `zstandard` feature to use this codec. +//! * **Bzip2**: uses [BZip2](https://sourceware.org/bzip2/) compression library. +//! You must enable the `bzip` feature to use this codec. +//! * **Xz**: uses [xz2](https://github.com/alexcrichton/xz2-rs) compression library. +//! You must enable the `xz` feature to use this codec. //! //! To specify a codec to use to compress data, just specify it while creating a `Writer`: //! ``` @@ -515,6 +545,7 @@ //! 1. UUID using the [`uuid`](https://docs.rs/uuid/1.0.0/uuid) crate //! 1. Date, Time (milli) as `i32` and Time (micro) as `i64` //! 1. Timestamp (milli and micro) as `i64` +//! 1. Local timestamp (milli and micro) as `i64` //! 1. Duration as a custom type with `months`, `days` and `millis` accessor methods each of which returns an `i32` //! //! Note that the on-disk representation is identical to the underlying primitive/complex type. @@ -583,6 +614,16 @@ //! "logicalType": "timestamp-micros" //! }, //! { +//! "name": "local_timestamp_millis", +//! "type": "long", +//! "logicalType": "local-timestamp-millis" +//! }, +//! { +//! "name": "local_timestamp_micros", +//! "type": "long", +//! "logicalType": "local-timestamp-micros" +//! }, +//! { //! "name": "duration", //! "type": { //! "type": "fixed", @@ -610,6 +651,8 @@ //! record.put("time_micros", Value::TimeMicros(3)); //! record.put("timestamp_millis", Value::TimestampMillis(4)); //! record.put("timestamp_micros", Value::TimestampMicros(5)); +//! record.put("local_timestamp_millis", Value::LocalTimestampMillis(4)); +//! record.put("local_timestamp_micros", Value::LocalTimestampMicros(5)); //! record.put("duration", Duration::new(Months::new(6), Days::new(7), Millis::new(8))); //! //! writer.append(record)?; @@ -743,12 +786,16 @@ pub use decimal::Decimal; pub use duration::{Days, Duration, Millis, Months}; pub use error::Error; pub use reader::{ - from_avro_datum, read_marker, GenericSingleObjectReader, Reader, SpecificSingleObjectReader, + from_avro_datum, from_avro_datum_schemata, read_marker, GenericSingleObjectReader, Reader, + SpecificSingleObjectReader, }; pub use schema::{AvroSchema, Schema}; pub use ser::to_value; -pub use util::max_allocation_bytes; -pub use writer::{to_avro_datum, GenericSingleObjectWriter, SpecificSingleObjectWriter, Writer}; +pub use util::{max_allocation_bytes, set_serde_human_readable}; +pub use writer::{ + to_avro_datum, to_avro_datum_schemata, GenericSingleObjectWriter, SpecificSingleObjectWriter, + Writer, +}; #[cfg(feature = "derive")] pub use apache_avro_derive::*; @@ -862,61 +909,6 @@ mod tests { assert!(reader.next().is_none()); } - //TODO: move where it fits better - #[test] - fn test_enum_resolution() { - let writer_raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"}, - { - "name": "c", - "type": { - "type": "enum", - "name": "suit", - "symbols": ["diamonds", "spades", "clubs", "hearts"] - }, - "default": "spades" - } - ] - } - "#; - let reader_raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"}, - { - "name": "c", - "type": { - "type": "enum", - "name": "suit", - "symbols": ["diamonds", "spades", "ninja", "hearts"] - }, - "default": "spades" - } - ] - } - "#; - let writer_schema = Schema::parse_str(writer_raw_schema).unwrap(); - let reader_schema = Schema::parse_str(reader_raw_schema).unwrap(); - let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null); - let mut record = Record::new(writer.schema()).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - record.put("c", "clubs"); - writer.append(record).unwrap(); - let input = writer.into_inner().unwrap(); - let mut reader = Reader::with_schema(&reader_schema, &input[..]).unwrap(); - assert!(reader.next().unwrap().is_err()); - assert!(reader.next().is_none()); - } - //TODO: move where it fits better #[test] fn test_enum_no_reader_schema() { diff --git a/lang/rust/avro/src/rabin.rs b/lang/rust/avro/src/rabin.rs index ce5f0761f3b..fc63f8999b8 100644 --- a/lang/rust/avro/src/rabin.rs +++ b/lang/rust/avro/src/rabin.rs @@ -16,7 +16,6 @@ // under the License. //! Implementation of the Rabin fingerprint algorithm -use byteorder::{ByteOrder, LittleEndian}; use digest::{ consts::U8, core_api::OutputSizeUser, generic_array::GenericArray, FixedOutput, FixedOutputReset, HashMarker, Output, Reset, Update, @@ -61,7 +60,7 @@ lazy_static! { /// assert_eq!(result[..], hex!("60335ba6d0415528")); /// ``` /// -/// To convert the digest to the commonly used 64-bit integer value, you can use the byteorder crate: +/// To convert the digest to the commonly used 64-bit integer value, you can use the i64::from_le_bytes() function /// /// ```rust /// # use apache_avro::rabin::Rabin; @@ -75,9 +74,8 @@ lazy_static! { /// # let result = hasher.finalize(); /// /// # assert_eq!(result[..], hex!("60335ba6d0415528")); -/// use byteorder::{ByteOrder, LittleEndian}; /// -/// let i = LittleEndian::read_i64(&result.to_vec()); +/// let i = i64::from_le_bytes(result.try_into().unwrap()); /// /// assert_eq!(i, 2906301498937520992) /// ``` @@ -103,7 +101,7 @@ impl Update for Rabin { impl FixedOutput for Rabin { fn finalize_into(self, out: &mut GenericArray) { - LittleEndian::write_i64(out, self.result); + out.copy_from_slice(&self.result.to_le_bytes()); } } @@ -123,7 +121,7 @@ impl HashMarker for Rabin {} impl FixedOutputReset for Rabin { fn finalize_into_reset(&mut self, out: &mut Output) { - LittleEndian::write_i64(out, self.result); + out.copy_from_slice(&self.result.to_le_bytes()); self.reset(); } } @@ -131,13 +129,13 @@ impl FixedOutputReset for Rabin { #[cfg(test)] mod tests { use super::Rabin; - use byteorder::{ByteOrder, LittleEndian}; + use apache_avro_test_helper::TestResult; use digest::Digest; use pretty_assertions::assert_eq; // See: https://github.com/apache/avro/blob/master/share/test/data/schema-tests.txt #[test] - fn test1() { + fn test1() -> TestResult { let data: &[(&str, i64)] = &[ (r#""null""#, 7195948357588979594), (r#""boolean""#, -6970731678124411036), @@ -155,8 +153,11 @@ mod tests { for (s, fp) in data { hasher.update(s.as_bytes()); - let result = LittleEndian::read_i64(&hasher.finalize_reset()); + let res: &[u8] = &hasher.finalize_reset(); + let result = i64::from_le_bytes(res.try_into()?); assert_eq!(*fp, result); } + + Ok(()) } } diff --git a/lang/rust/avro/src/reader.rs b/lang/rust/avro/src/reader.rs index 3c8ea09b117..2ec0b84cb82 100644 --- a/lang/rust/avro/src/reader.rs +++ b/lang/rust/avro/src/reader.rs @@ -20,7 +20,7 @@ use crate::{ decode::{decode, decode_internal}, from_value, rabin::Rabin, - schema::{AvroSchema, ResolvedOwnedSchema, Schema}, + schema::{AvroSchema, Names, ResolvedOwnedSchema, ResolvedSchema, Schema}, types::Value, util, AvroResult, Codec, Error, }; @@ -34,27 +34,29 @@ use std::{ str::FromStr, }; -// Internal Block reader. +/// Internal Block reader. #[derive(Debug, Clone)] -struct Block { +struct Block<'r, R> { reader: R, - // Internal buffering to reduce allocation. + /// Internal buffering to reduce allocation. buf: Vec, buf_idx: usize, - // Number of elements expected to exist within this block. + /// Number of elements expected to exist within this block. message_count: usize, marker: [u8; 16], codec: Codec, writer_schema: Schema, + schemata: Vec<&'r Schema>, user_metadata: HashMap>, } -impl Block { - fn new(reader: R) -> AvroResult> { +impl<'r, R: Read> Block<'r, R> { + fn new(reader: R, schemata: Vec<&'r Schema>) -> AvroResult> { let mut block = Block { reader, codec: Codec::Null, writer_schema: Schema::Null, + schemata, buf: vec![], buf_idx: 0, message_count: 0, @@ -178,7 +180,13 @@ impl Block { let mut block_bytes = &self.buf[self.buf_idx..]; let b_original = block_bytes.len(); - let item = from_avro_datum(&self.writer_schema, &mut block_bytes, read_schema)?; + let schemata = if self.schemata.is_empty() { + vec![&self.writer_schema] + } else { + self.schemata.clone() + }; + let item = + from_avro_datum_schemata(&self.writer_schema, schemata, &mut block_bytes, read_schema)?; if b_original == block_bytes.len() { // from_avro_datum did not consume any bytes, so return an error to avoid an infinite loop return Err(Error::ReadBlock); @@ -189,7 +197,7 @@ impl Block { } fn read_writer_schema(&mut self, metadata: &HashMap) -> AvroResult<()> { - let json = metadata + let json: serde_json::Value = metadata .get("avro.schema") .and_then(|bytes| { if let Value::Bytes(ref bytes) = *bytes { @@ -199,7 +207,17 @@ impl Block { } }) .ok_or(Error::GetAvroSchemaFromMap)?; - self.writer_schema = Schema::parse(&json)?; + if !self.schemata.is_empty() { + let rs = ResolvedSchema::try_from(self.schemata.clone())?; + let names: Names = rs + .get_names() + .iter() + .map(|(name, schema)| (name.clone(), (*schema).clone())) + .collect(); + self.writer_schema = Schema::parse_with_names(&json, names)?; + } else { + self.writer_schema = Schema::parse(&json)?; + } Ok(()) } @@ -261,7 +279,7 @@ fn read_codec(metadata: &HashMap) -> AvroResult { /// } /// ``` pub struct Reader<'a, R> { - block: Block, + block: Block<'a, R>, reader_schema: Option<&'a Schema>, errored: bool, should_resolve_schema: bool, @@ -273,7 +291,7 @@ impl<'a, R: Read> Reader<'a, R> { /// /// **NOTE** The avro header is going to be read automatically upon creation of the `Reader`. pub fn new(reader: R) -> AvroResult> { - let block = Block::new(reader)?; + let block = Block::new(reader, vec![])?; let reader = Reader { block, reader_schema: None, @@ -288,7 +306,28 @@ impl<'a, R: Read> Reader<'a, R> { /// /// **NOTE** The avro header is going to be read automatically upon creation of the `Reader`. pub fn with_schema(schema: &'a Schema, reader: R) -> AvroResult> { - let block = Block::new(reader)?; + let block = Block::new(reader, vec![schema])?; + let mut reader = Reader { + block, + reader_schema: Some(schema), + errored: false, + should_resolve_schema: false, + }; + // Check if the reader and writer schemas disagree. + reader.should_resolve_schema = reader.writer_schema() != schema; + Ok(reader) + } + + /// Creates a `Reader` given a reader `Schema` and something implementing the `io::Read` trait + /// to read from. + /// + /// **NOTE** The avro header is going to be read automatically upon creation of the `Reader`. + pub fn with_schemata( + schema: &'a Schema, + schemata: Vec<&'a Schema>, + reader: R, + ) -> AvroResult> { + let block = Block::new(reader, schemata)?; let mut reader = Reader { block, reader_schema: Some(schema), @@ -368,6 +407,26 @@ pub fn from_avro_datum( } } +/// Decode a `Value` encoded in Avro format given the provided `Schema` and anything implementing `io::Read` +/// to read from. +/// If the writer schema is incomplete, i.e. contains `Schema::Ref`s then it will use the provided +/// schemata to resolve any dependencies. +/// +/// In case a reader `Schema` is provided, schema resolution will also be performed. +pub fn from_avro_datum_schemata( + writer_schema: &Schema, + schemata: Vec<&Schema>, + reader: &mut R, + reader_schema: Option<&Schema>, +) -> AvroResult { + let rs = ResolvedSchema::try_from(schemata)?; + let value = decode_internal(writer_schema, rs.get_names(), &None, reader)?; + match reader_schema { + Some(schema) => value.resolve(schema), + None => Ok(value), + } +} + pub struct GenericSingleObjectReader { write_schema: ResolvedOwnedSchema, expected_header: [u8; 10], @@ -470,6 +529,7 @@ pub fn read_marker(bytes: &[u8]) -> [u8; 16] { mod tests { use super::*; use crate::{encode::encode, from_value, types::Record, Reader}; + use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; use serde::Deserialize; use std::io::Cursor; @@ -510,8 +570,8 @@ mod tests { ]; #[test] - fn test_from_avro_datum() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_from_avro_datum() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut encoded: &'static [u8] = &[54, 6, 102, 111, 111]; let mut record = Record::new(&schema).unwrap(); @@ -519,14 +579,13 @@ mod tests { record.put("b", "foo"); let expected = record.into(); - assert_eq!( - from_avro_datum(&schema, &mut encoded, None).unwrap(), - expected - ); + assert_eq!(from_avro_datum(&schema, &mut encoded, None)?, expected); + + Ok(()) } #[test] - fn test_from_avro_datum_with_union_to_struct() { + fn test_from_avro_datum_with_union_to_struct() -> TestResult { const TEST_RECORD_SCHEMA_3240: &str = r#" { "type": "record", @@ -569,7 +628,7 @@ mod tests { a_nullable_string: Option, } - let schema = Schema::parse_str(TEST_RECORD_SCHEMA_3240).unwrap(); + let schema = Schema::parse_str(TEST_RECORD_SCHEMA_3240)?; let mut encoded: &'static [u8] = &[54, 6, 102, 111, 111]; let expected_record: TestRecord3240 = TestRecord3240 { @@ -579,33 +638,36 @@ mod tests { a_nullable_string: None, }; - let avro_datum = from_avro_datum(&schema, &mut encoded, None).unwrap(); + let avro_datum = from_avro_datum(&schema, &mut encoded, None)?; let parsed_record: TestRecord3240 = match &avro_datum { - Value::Record(_) => from_value::(&avro_datum).unwrap(), - unexpected => panic!( - "could not map avro data to struct, found unexpected: {:?}", - unexpected - ), + Value::Record(_) => from_value::(&avro_datum)?, + unexpected => { + panic!("could not map avro data to struct, found unexpected: {unexpected:?}") + } }; assert_eq!(parsed_record, expected_record); + + Ok(()) } #[test] - fn test_null_union() { - let schema = Schema::parse_str(UNION_SCHEMA).unwrap(); + fn test_null_union() -> TestResult { + let schema = Schema::parse_str(UNION_SCHEMA)?; let mut encoded: &'static [u8] = &[2, 0]; assert_eq!( - from_avro_datum(&schema, &mut encoded, None).unwrap(), + from_avro_datum(&schema, &mut encoded, None)?, Value::Union(1, Box::new(Value::Long(0))) ); + + Ok(()) } #[test] - fn test_reader_iterator() { - let schema = Schema::parse_str(SCHEMA).unwrap(); - let reader = Reader::with_schema(&schema, ENCODED).unwrap(); + fn test_reader_iterator() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; + let reader = Reader::with_schema(&schema, ENCODED)?; let mut record1 = Record::new(&schema).unwrap(); record1.put("a", 27i64); @@ -618,20 +680,24 @@ mod tests { let expected = vec![record1.into(), record2.into()]; for (i, value) in reader.enumerate() { - assert_eq!(value.unwrap(), expected[i]); + assert_eq!(value?, expected[i]); } + + Ok(()) } #[test] - fn test_reader_invalid_header() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_reader_invalid_header() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let invalid = ENCODED.iter().copied().skip(1).collect::>(); assert!(Reader::with_schema(&schema, &invalid[..]).is_err()); + + Ok(()) } #[test] - fn test_reader_invalid_block() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_reader_invalid_block() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let invalid = ENCODED .iter() .copied() @@ -641,32 +707,38 @@ mod tests { .into_iter() .rev() .collect::>(); - let reader = Reader::with_schema(&schema, &invalid[..]).unwrap(); + let reader = Reader::with_schema(&schema, &invalid[..])?; for value in reader { assert!(value.is_err()); } + + Ok(()) } #[test] - fn test_reader_empty_buffer() { + fn test_reader_empty_buffer() -> TestResult { let empty = Cursor::new(Vec::new()); assert!(Reader::new(empty).is_err()); + + Ok(()) } #[test] - fn test_reader_only_header() { + fn test_reader_only_header() -> TestResult { let invalid = ENCODED.iter().copied().take(165).collect::>(); - let reader = Reader::new(&invalid[..]).unwrap(); + let reader = Reader::new(&invalid[..])?; for value in reader { assert!(value.is_err()); } + + Ok(()) } #[test] - fn test_avro_3405_read_user_metadata_success() { + fn test_avro_3405_read_user_metadata_success() -> TestResult { use crate::writer::Writer; - let schema = Schema::parse_str(SCHEMA).unwrap(); + let schema = Schema::parse_str(SCHEMA)?; let mut writer = Writer::new(&schema, Vec::new()); let mut user_meta_data: HashMap> = HashMap::new(); @@ -678,20 +750,22 @@ mod tests { user_meta_data.insert("vecKey".to_string(), vec![1, 2, 3]); for (k, v) in user_meta_data.iter() { - writer.add_user_metadata(k.to_string(), v).unwrap(); + writer.add_user_metadata(k.to_string(), v)?; } let mut record = Record::new(&schema).unwrap(); record.put("a", 27i64); record.put("b", "foo"); - writer.append(record.clone()).unwrap(); - writer.append(record.clone()).unwrap(); - writer.flush().unwrap(); - let result = writer.into_inner().unwrap(); + writer.append(record.clone())?; + writer.append(record.clone())?; + writer.flush()?; + let result = writer.into_inner()?; - let reader = Reader::new(&result[..]).unwrap(); + let reader = Reader::new(&result[..])?; assert_eq!(reader.user_metadata(), &user_meta_data); + + Ok(()) } #[derive(Deserialize, Clone, PartialEq, Debug)] @@ -747,7 +821,7 @@ mod tests { } } } - (key, value) => panic!("Unexpected pair: {:?} -> {:?}", key, value), + (key, value) => panic!("Unexpected pair: {key:?} -> {value:?}"), } } TestSingleObjectReader { @@ -756,7 +830,7 @@ mod tests { c, } } else { - panic!("Expected a Value::Record but was {:?}", obj) + panic!("Expected a Value::Record but was {obj:?}") } } } @@ -775,7 +849,7 @@ mod tests { } #[test] - fn test_avro_3507_single_object_reader() { + fn test_avro_3507_single_object_reader() -> TestResult { let obj = TestSingleObjectReader { a: 42, b: 3.33, @@ -802,17 +876,19 @@ mod tests { .expect("Should read"); let expected_value: Value = obj.into(); assert_eq!(expected_value, val); + + Ok(()) } #[test] - fn avro_3642_test_single_object_reader_incomplete_reads() { + fn avro_3642_test_single_object_reader_incomplete_reads() -> TestResult { let obj = TestSingleObjectReader { a: 42, b: 3.33, c: vec!["cat".into(), "dog".into()], }; // The two-byte marker, to show that the message uses this single-record format - let to_read_1 = vec![0xC3, 0x01]; + let to_read_1 = [0xC3, 0x01]; let mut to_read_2 = Vec::::new(); to_read_2.extend_from_slice( &TestSingleObjectReader::get_schema() @@ -834,10 +910,12 @@ mod tests { .expect("Should read"); let expected_value: Value = obj.into(); assert_eq!(expected_value, val); + + Ok(()) } #[test] - fn test_avro_3507_reader_parity() { + fn test_avro_3507_reader_parity() -> TestResult { let obj = TestSingleObjectReader { a: 42, b: 3.33, @@ -877,7 +955,9 @@ mod tests { let expected_value: Value = obj.clone().into(); assert_eq!(obj, read_obj1); assert_eq!(obj, read_obj2); - assert_eq!(val, expected_value) + assert_eq!(val, expected_value); + + Ok(()) } #[cfg(not(feature = "snappy"))] diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs index 6aa1d7fe0a6..7c51140aa01 100644 --- a/lang/rust/avro/src/schema.rs +++ b/lang/rust/avro/src/schema.rs @@ -19,18 +19,20 @@ use crate::{error::Error, types, util::MapHelper, AvroResult}; use digest::Digest; use lazy_static::lazy_static; -use regex::Regex; +use regex_lite::Regex; use serde::{ ser::{SerializeMap, SerializeSeq}, Deserialize, Serialize, Serializer, }; use serde_json::{Map, Value}; use std::{ - borrow::Cow, + borrow::{Borrow, Cow}, collections::{BTreeMap, HashMap, HashSet}, convert::{TryFrom, TryInto}, fmt, + fmt::Debug, hash::Hash, + io::Read, str::FromStr, }; use strum_macros::{EnumDiscriminants, EnumString}; @@ -40,7 +42,11 @@ lazy_static! { // An optional namespace (with optional dots) followed by a name without any dots in it. static ref SCHEMA_NAME_R: Regex = - Regex::new(r"^((?P[A-Za-z_][A-Za-z0-9_\.]*)*\.)?(?P[A-Za-z_][A-Za-z0-9_]*)$").unwrap(); + Regex::new(r"^((?P([A-Za-z_][A-Za-z0-9_\.]*)*)\.)?(?P[A-Za-z_][A-Za-z0-9_]*)$").unwrap(); + + static ref FIELD_NAME_R: Regex = Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap(); + + static ref NAMESPACE_R: Regex = Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?$").unwrap(); } /// Represents an Avro schema fingerprint @@ -57,7 +63,7 @@ impl fmt::Display for SchemaFingerprint { "{}", self.bytes .iter() - .map(|byte| format!("{:02x}", byte)) + .map(|byte| format!("{byte:02x}")) .collect::>() .join("") ) @@ -98,43 +104,14 @@ pub enum Schema { /// A `union` Avro schema. Union(UnionSchema), /// A `record` Avro schema. - /// - /// The `lookup` table maps field names to their position in the `Vec` - /// of `fields`. - Record { - name: Name, - aliases: Aliases, - doc: Documentation, - fields: Vec, - lookup: BTreeMap, - attributes: BTreeMap, - }, + Record(RecordSchema), /// An `enum` Avro schema. - Enum { - name: Name, - aliases: Aliases, - doc: Documentation, - symbols: Vec, - attributes: BTreeMap, - }, + Enum(EnumSchema), /// A `fixed` Avro schema. - Fixed { - name: Name, - aliases: Aliases, - doc: Documentation, - size: usize, - attributes: BTreeMap, - }, + Fixed(FixedSchema), /// Logical type which represents `Decimal` values. The underlying type is serialized and /// deserialized as `Schema::Bytes` or `Schema::Fixed`. - /// - /// `scale` defaults to 0 and is an integer greater than or equal to 0 and `precision` is an - /// integer greater than 0. - Decimal { - precision: DecimalMetadata, - scale: DecimalMetadata, - inner: Box, - }, + Decimal(DecimalSchema), /// A universally unique identifier, annotating a string. Uuid, /// Logical type which represents the number of days since the unix epoch. @@ -150,12 +127,14 @@ pub enum Schema { TimestampMillis, /// An instant in time represented as the number of microseconds after the UNIX epoch. TimestampMicros, + /// An instant in localtime represented as the number of milliseconds after the UNIX epoch. + LocalTimestampMillis, + /// An instant in local time represented as the number of microseconds after the UNIX epoch. + LocalTimestampMicros, /// An amount of time defined by a number of months, days and milliseconds. Duration, - // A reference to another schema. - Ref { - name: Name, - }, + /// A reference to another schema. + Ref { name: Name }, } impl PartialEq for Schema { @@ -216,6 +195,8 @@ impl From<&types::Value> for SchemaKind { Value::TimeMicros(_) => Self::TimeMicros, Value::TimestampMillis(_) => Self::TimestampMillis, Value::TimestampMicros(_) => Self::TimestampMicros, + Value::LocalTimestampMillis(_) => Self::LocalTimestampMillis, + Value::LocalTimestampMicros(_) => Self::LocalTimestampMicros, Value::Duration { .. } => Self::Duration, } } @@ -244,7 +225,7 @@ pub type Aliases = Option>; /// Represents Schema lookup within a schema env pub(crate) type Names = HashMap; /// Represents Schema lookup within a schema -pub(crate) type NamesRef<'a> = HashMap; +pub type NamesRef<'a> = HashMap; /// Represents the namespace for Named Schema pub type Namespace = Option; @@ -254,7 +235,10 @@ impl Name { /// `aliases` will not be defined. pub fn new(name: &str) -> AvroResult { let (name, namespace) = Name::get_name_and_namespace(name)?; - Ok(Self { name, namespace }) + Ok(Self { + name, + namespace: namespace.filter(|ns| !ns.is_empty()), + }) } fn get_name_and_namespace(name: &str) -> AvroResult<(String, Namespace)> { @@ -268,7 +252,10 @@ impl Name { } /// Parse a `serde_json::Value` into a `Name`. - pub(crate) fn parse(complex: &Map) -> AvroResult { + pub(crate) fn parse( + complex: &Map, + enclosing_namespace: &Namespace, + ) -> AvroResult { let (name, namespace_from_name) = complex .name() .map(|name| Name::get_name_and_namespace(name.as_str()).unwrap()) @@ -279,9 +266,26 @@ impl Name { _ => None, }; + let namespace = namespace_from_name + .or_else(|| { + complex + .string("namespace") + .or_else(|| enclosing_namespace.clone()) + }) + .filter(|ns| !ns.is_empty()); + + if let Some(ref ns) = namespace { + if !NAMESPACE_R.is_match(ns) { + return Err(Error::InvalidNamespace( + ns.to_string(), + NAMESPACE_R.as_str(), + )); + } + } + Ok(Self { name: type_name.unwrap_or(name), - namespace: namespace_from_name.or_else(|| complex.string("namespace")), + namespace, }) } @@ -296,8 +300,10 @@ impl Name { let namespace = self.namespace.clone().or(default_namespace); match namespace { - Some(ref namespace) => format!("{}.{}", namespace, self.name), - None => self.name.clone(), + Some(ref namespace) if !namespace.is_empty() => { + format!("{}.{}", namespace, self.name) + } + _ => self.name.clone(), } } } @@ -307,12 +313,12 @@ impl Name { /// use apache_avro::schema::Name; /// /// assert_eq!( - /// Name::new("some_name").unwrap().fully_qualified_name(&Some("some_namespace".into())), - /// Name::new("some_namespace.some_name").unwrap() + /// Name::new("some_name")?.fully_qualified_name(&Some("some_namespace".into())), + /// Name::new("some_namespace.some_name")? /// ); /// assert_eq!( - /// Name::new("some_namespace.some_name").unwrap().fully_qualified_name(&Some("other_namespace".into())), - /// Name::new("some_namespace.some_name").unwrap() + /// Name::new("some_namespace.some_name")?.fully_qualified_name(&Some("other_namespace".into())), + /// Name::new("some_namespace.some_name")? /// ); /// ``` pub fn fully_qualified_name(&self, enclosing_namespace: &Namespace) -> Name { @@ -321,7 +327,7 @@ impl Name { namespace: self .namespace .clone() - .or_else(|| enclosing_namespace.clone()), + .or_else(|| enclosing_namespace.clone().filter(|ns| !ns.is_empty())), } } } @@ -346,12 +352,9 @@ impl<'de> Deserialize<'de> for Name { Value::deserialize(deserializer).and_then(|value| { use serde::de::Error; if let Value::Object(json) = value { - Name::parse(&json).map_err(Error::custom) + Name::parse(&json, &None).map_err(Error::custom) } else { - Err(Error::custom(format!( - "Expected a JSON object: {:?}", - value - ))) + Err(Error::custom(format!("Expected a JSON object: {value:?}"))) } }) } @@ -399,9 +402,10 @@ impl Serialize for Alias { } } -pub(crate) struct ResolvedSchema<'s> { +#[derive(Debug)] +pub struct ResolvedSchema<'s> { names_ref: NamesRef<'s>, - root_schema: &'s Schema, + schemata: Vec<&'s Schema>, } impl<'s> TryFrom<&'s Schema> for ResolvedSchema<'s> { @@ -411,71 +415,111 @@ impl<'s> TryFrom<&'s Schema> for ResolvedSchema<'s> { let names = HashMap::new(); let mut rs = ResolvedSchema { names_ref: names, - root_schema: schema, + schemata: vec![schema], + }; + rs.resolve(rs.get_schemata(), &None, None)?; + Ok(rs) + } +} + +impl<'s> TryFrom> for ResolvedSchema<'s> { + type Error = Error; + + fn try_from(schemata: Vec<&'s Schema>) -> AvroResult { + let names = HashMap::new(); + let mut rs = ResolvedSchema { + names_ref: names, + schemata, }; - Self::from_internal(rs.root_schema, &mut rs.names_ref, &None)?; + rs.resolve(rs.get_schemata(), &None, None)?; Ok(rs) } } impl<'s> ResolvedSchema<'s> { - pub(crate) fn get_root_schema(&self) -> &'s Schema { - self.root_schema + pub fn get_schemata(&self) -> Vec<&'s Schema> { + self.schemata.clone() } - pub(crate) fn get_names(&self) -> &NamesRef<'s> { + + pub fn get_names(&self) -> &NamesRef<'s> { &self.names_ref } - fn from_internal( - schema: &'s Schema, - names_ref: &mut NamesRef<'s>, + /// Creates `ResolvedSchema` with some already known schemas. + /// + /// Those schemata would be used to resolve references if needed. + pub fn new_with_known_schemata<'n>( + schemata_to_resolve: Vec<&'s Schema>, + enclosing_namespace: &Namespace, + known_schemata: &'n NamesRef<'n>, + ) -> AvroResult { + let names = HashMap::new(); + let mut rs = ResolvedSchema { + names_ref: names, + schemata: schemata_to_resolve, + }; + rs.resolve(rs.get_schemata(), enclosing_namespace, Some(known_schemata))?; + Ok(rs) + } + + fn resolve<'n>( + &mut self, + schemata: Vec<&'s Schema>, enclosing_namespace: &Namespace, + known_schemata: Option<&'n NamesRef<'n>>, ) -> AvroResult<()> { - match schema { - Schema::Array(schema) | Schema::Map(schema) => { - Self::from_internal(schema, names_ref, enclosing_namespace) - } - Schema::Union(UnionSchema { schemas, .. }) => { - for schema in schemas { - Self::from_internal(schema, names_ref, enclosing_namespace)? + for schema in schemata { + match schema { + Schema::Array(schema) | Schema::Map(schema) => { + self.resolve(vec![schema], enclosing_namespace, known_schemata)? } - Ok(()) - } - Schema::Enum { name, .. } | Schema::Fixed { name, .. } => { - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - if names_ref - .insert(fully_qualified_name.clone(), schema) - .is_some() - { - Err(Error::AmbiguousSchemaDefinition(fully_qualified_name)) - } else { - Ok(()) + Schema::Union(UnionSchema { schemas, .. }) => { + for schema in schemas { + self.resolve(vec![schema], enclosing_namespace, known_schemata)? + } } - } - Schema::Record { name, fields, .. } => { - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - if names_ref - .insert(fully_qualified_name.clone(), schema) - .is_some() - { - Err(Error::AmbiguousSchemaDefinition(fully_qualified_name)) - } else { - let record_namespace = fully_qualified_name.namespace; - for field in fields { - Self::from_internal(&field.schema, names_ref, &record_namespace)? + Schema::Enum(EnumSchema { name, .. }) | Schema::Fixed(FixedSchema { name, .. }) => { + let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); + if self + .names_ref + .insert(fully_qualified_name.clone(), schema) + .is_some() + { + return Err(Error::AmbiguousSchemaDefinition(fully_qualified_name)); } - Ok(()) } + Schema::Record(RecordSchema { name, fields, .. }) => { + let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); + if self + .names_ref + .insert(fully_qualified_name.clone(), schema) + .is_some() + { + return Err(Error::AmbiguousSchemaDefinition(fully_qualified_name)); + } else { + let record_namespace = fully_qualified_name.namespace; + for field in fields { + self.resolve(vec![&field.schema], &record_namespace, known_schemata)? + } + } + } + Schema::Ref { name } => { + let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); + // first search for reference in current schemata, then look into external references. + if !self.names_ref.contains_key(&fully_qualified_name) { + let is_resolved_with_known_schemas = known_schemata + .as_ref() + .map(|names| names.contains_key(&fully_qualified_name)) + .unwrap_or(false); + if !is_resolved_with_known_schemas { + return Err(Error::SchemaResolutionError(fully_qualified_name)); + } + } + } + _ => (), } - Schema::Ref { name } => { - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - names_ref - .get(&fully_qualified_name) - .map(|_| ()) - .ok_or(Error::SchemaResolutionError(fully_qualified_name)) - } - _ => Ok(()), } + Ok(()) } } @@ -521,7 +565,7 @@ impl ResolvedOwnedSchema { } Ok(()) } - Schema::Enum { name, .. } | Schema::Fixed { name, .. } => { + Schema::Enum(EnumSchema { name, .. }) | Schema::Fixed(FixedSchema { name, .. }) => { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); if names .insert(fully_qualified_name.clone(), schema.clone()) @@ -532,7 +576,7 @@ impl ResolvedOwnedSchema { Ok(()) } } - Schema::Record { name, fields, .. } => { + Schema::Record(RecordSchema { name, fields, .. }) => { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); if names .insert(fully_qualified_name.clone(), schema.clone()) @@ -566,6 +610,8 @@ pub struct RecordField { pub name: String, /// Documentation of the field. pub doc: Documentation, + /// Aliases of the field's name. They have no namespace. + pub aliases: Option>, /// Default value of the field. /// This value will be used when reading Avro datum if schema resolution /// is enabled. @@ -597,14 +643,35 @@ impl RecordField { field: &Map, position: usize, parser: &mut Parser, - enclosing_namespace: &Namespace, + enclosing_record: &Name, ) -> AvroResult { let name = field.name().ok_or(Error::GetNameFieldFromRecord)?; + if !FIELD_NAME_R.is_match(&name) { + return Err(Error::FieldName(name)); + } + // TODO: "type" = "" - let schema = parser.parse_complex(field, enclosing_namespace)?; + let schema = parser.parse_complex(field, &enclosing_record.namespace)?; let default = field.get("default").cloned(); + Self::resolve_default_value( + &schema, + &name, + &enclosing_record.fullname(None), + &parser.parsed_schemas, + &default, + )?; + + let aliases = field.get("aliases").and_then(|aliases| { + aliases.as_array().map(|aliases| { + aliases + .iter() + .flat_map(|alias| alias.as_str()) + .map(|alias| alias.to_string()) + .collect::>() + }) + }); let order = field .get("order") @@ -616,6 +683,7 @@ impl RecordField { name, doc: field.doc(), default, + aliases, schema, order, position, @@ -623,6 +691,55 @@ impl RecordField { }) } + fn resolve_default_value( + field_schema: &Schema, + field_name: &str, + record_name: &str, + names: &Names, + default: &Option, + ) -> AvroResult<()> { + if let Some(value) = default { + let avro_value = types::Value::from(value.clone()); + match field_schema { + Schema::Union(union_schema) => { + let schemas = &union_schema.schemas; + let resolved = schemas.iter().any(|schema| { + avro_value + .to_owned() + .resolve_internal(schema, names, &schema.namespace(), &None) + .is_ok() + }); + + if !resolved { + let schema: Option<&Schema> = schemas.get(0); + return match schema { + Some(first_schema) => Err(Error::GetDefaultUnion( + SchemaKind::from(first_schema), + types::ValueKind::from(avro_value), + )), + None => Err(Error::EmptyUnion), + }; + } + } + _ => { + let resolved = avro_value + .resolve_internal(field_schema, names, &field_schema.namespace(), &None) + .is_ok(); + + if !resolved { + return Err(Error::GetDefaultRecordField( + field_name.to_string(), + record_name.to_string(), + field_schema.canonical_form(), + )); + } + } + }; + } + + Ok(()) + } + fn get_field_custom_attributes(field: &Map) -> BTreeMap { let mut custom_attributes: BTreeMap = BTreeMap::new(); for (key, value) in field { @@ -644,8 +761,74 @@ impl RecordField { } } +/// A description of an Enum schema. +#[derive(Debug, Clone)] +pub struct RecordSchema { + /// The name of the schema + pub name: Name, + /// The aliases of the schema + pub aliases: Aliases, + /// The documentation of the schema + pub doc: Documentation, + /// The set of fields of the schema + pub fields: Vec, + /// The `lookup` table maps field names to their position in the `Vec` + /// of `fields`. + pub lookup: BTreeMap, + /// The custom attributes of the schema + pub attributes: BTreeMap, +} + +/// A description of an Enum schema. +#[derive(Debug, Clone)] +pub struct EnumSchema { + /// The name of the schema + pub name: Name, + /// The aliases of the schema + pub aliases: Aliases, + /// The documentation of the schema + pub doc: Documentation, + /// The set of symbols of the schema + pub symbols: Vec, + /// An optional default symbol used for compatibility + pub default: Option, + /// The custom attributes of the schema + pub attributes: BTreeMap, +} + +/// A description of a Union schema. +#[derive(Debug, Clone)] +pub struct FixedSchema { + /// The name of the schema + pub name: Name, + /// The aliases of the schema + pub aliases: Aliases, + /// The documentation of the schema + pub doc: Documentation, + /// The size of the fixed schema + pub size: usize, + /// The custom attributes of the schema + pub attributes: BTreeMap, +} + +/// A description of a Union schema. +/// +/// `scale` defaults to 0 and is an integer greater than or equal to 0 and `precision` is an +/// integer greater than 0. +#[derive(Debug, Clone)] +pub struct DecimalSchema { + /// The number of digits in the unscaled value + pub precision: DecimalMetadata, + /// The number of digits to the right of the decimal point + pub scale: DecimalMetadata, + /// The inner schema of the decimal (fixed or bytes) + pub inner: Box, +} + +/// A description of a Union schema #[derive(Debug, Clone)] pub struct UnionSchema { + /// The schemas that make up this union pub(crate) schemas: Vec, // Used to ensure uniqueness of schema inputs, and provide constant time finding of the // schema index given a value. @@ -685,19 +868,59 @@ impl UnionSchema { /// Optionally returns a reference to the schema matched by this value, as well as its position /// within this union. + #[deprecated( + since = "0.15.0", + note = "Please use `find_schema_with_known_schemata` instead" + )] pub fn find_schema(&self, value: &types::Value) -> Option<(usize, &Schema)> { + self.find_schema_with_known_schemata::(value, None, &None) + } + + /// Optionally returns a reference to the schema matched by this value, as well as its position + /// within this union. + /// + /// Extra arguments: + /// - `known_schemata` - mapping between `Name` and `Schema` - if passed, additional external schemas would be used to resolve references. + pub fn find_schema_with_known_schemata + Debug>( + &self, + value: &types::Value, + known_schemata: Option<&HashMap>, + enclosing_namespace: &Namespace, + ) -> Option<(usize, &Schema)> { let schema_kind = SchemaKind::from(value); if let Some(&i) = self.variant_index.get(&schema_kind) { // fast path Some((i, &self.schemas[i])) } else { // slow path (required for matching logical or named types) + + // first collect what schemas we already know + let mut collected_names: HashMap = known_schemata + .map(|names| { + names + .iter() + .map(|(name, schema)| (name.clone(), schema.borrow())) + .collect() + }) + .unwrap_or_default(); + self.schemas.iter().enumerate().find(|(_, schema)| { - let rs = - ResolvedSchema::try_from(*schema).expect("Schema didn't successfully parse"); + let resolved_schema = ResolvedSchema::new_with_known_schemata( + vec![*schema], + enclosing_namespace, + &collected_names, + ) + .expect("Schema didn't successfully parse"); + let resolved_names = resolved_schema.names_ref; + + // extend known schemas with just resolved names + collected_names.extend(resolved_names); + let namespace = &schema.namespace().or_else(|| enclosing_namespace.clone()); + value - .validate_internal(schema, rs.get_names(), &schema.namespace()) - .is_none() + .clone() + .resolve_internal(schema, &collected_names, namespace, &None) + .is_ok() }) } } @@ -735,13 +958,13 @@ fn parse_json_integer_for_decimal(value: &serde_json::Number) -> Result, - // A map of name -> Schema::Ref - // Used to resolve cyclic references, i.e. when a - // field's type is a reference to its record's type + /// A map of name -> Schema::Ref + /// Used to resolve cyclic references, i.e. when a + /// field's type is a reference to its record's type resolving_schemas: Names, input_order: Vec, - // A map of name -> fully parsed Schema - // Used to avoid parsing the same schema twice + /// A map of name -> fully parsed Schema + /// Used to avoid parsing the same schema twice parsed_schemas: Names, } @@ -752,7 +975,7 @@ impl Schema { /// https://avro.apache.org/docs/1.8.2/spec.html#Parsing+Canonical+Form+for+Schemas pub fn canonical_form(&self) -> String { let json = serde_json::to_value(self) - .unwrap_or_else(|e| panic!("Cannot parse Schema from JSON: {0}", e)); + .unwrap_or_else(|e| panic!("Cannot parse Schema from JSON: {e}")); parsing_canonical_form(&json) } @@ -783,13 +1006,13 @@ impl Schema { /// during parsing. /// /// If two of the input schemas have the same fullname, an Error will be returned. - pub fn parse_list(input: &[&str]) -> Result, Error> { + pub fn parse_list(input: &[&str]) -> AvroResult> { let mut input_schemas: HashMap = HashMap::with_capacity(input.len()); let mut input_order: Vec = Vec::with_capacity(input.len()); for js in input { let schema: Value = serde_json::from_str(js).map_err(Error::ParseSchemaJson)?; if let Value::Object(inner) = &schema { - let name = Name::parse(inner)?; + let name = Name::parse(inner, &None)?; let previous_value = input_schemas.insert(name.clone(), schema); if previous_value.is_some() { return Err(Error::NameCollision(name.fullname(None))); @@ -808,18 +1031,39 @@ impl Schema { parser.parse_list() } + /// Create a `Schema` from a reader which implements [`Read`]. + pub fn parse_reader(reader: &mut (impl Read + ?Sized)) -> AvroResult { + let mut buf = String::new(); + match reader.read_to_string(&mut buf) { + Ok(_) => Self::parse_str(&buf), + Err(e) => Err(Error::ReadSchemaFromReader(e)), + } + } + /// Parses an Avro schema from JSON. pub fn parse(value: &Value) -> AvroResult { let mut parser = Parser::default(); parser.parse(value, &None) } + /// Parses an Avro schema from JSON. + /// Any `Schema::Ref`s must be known in the `names` map. + pub(crate) fn parse_with_names(value: &Value, names: Names) -> AvroResult { + let mut parser = Parser { + input_schemas: HashMap::with_capacity(1), + resolving_schemas: Names::default(), + input_order: Vec::with_capacity(1), + parsed_schemas: names, + }; + parser.parse(value, &None) + } + /// Returns the custom attributes (metadata) if the schema supports them. pub fn custom_attributes(&self) -> Option<&BTreeMap> { match self { - Schema::Record { attributes, .. } - | Schema::Enum { attributes, .. } - | Schema::Fixed { attributes, .. } => Some(attributes), + Schema::Record(RecordSchema { attributes, .. }) + | Schema::Enum(EnumSchema { attributes, .. }) + | Schema::Fixed(FixedSchema { attributes, .. }) => Some(attributes), _ => None, } } @@ -827,10 +1071,10 @@ impl Schema { /// Returns the name of the schema if it has one. pub fn name(&self) -> Option<&Name> { match self { - Schema::Ref { ref name, .. } - | Schema::Record { ref name, .. } - | Schema::Enum { ref name, .. } - | Schema::Fixed { ref name, .. } => Some(name), + Schema::Ref { name, .. } + | Schema::Record(RecordSchema { name, .. }) + | Schema::Enum(EnumSchema { name, .. }) + | Schema::Fixed(FixedSchema { name, .. }) => Some(name), _ => None, } } @@ -839,6 +1083,26 @@ impl Schema { pub fn namespace(&self) -> Namespace { self.name().and_then(|n| n.namespace.clone()) } + + /// Returns the aliases of the schema if it has ones. + pub fn aliases(&self) -> Option<&Vec> { + match self { + Schema::Record(RecordSchema { aliases, .. }) + | Schema::Enum(EnumSchema { aliases, .. }) + | Schema::Fixed(FixedSchema { aliases, .. }) => aliases.as_ref(), + _ => None, + } + } + + /// Returns the doc of the schema if it has one. + pub fn doc(&self) -> Option<&String> { + match self { + Schema::Record(RecordSchema { doc, .. }) + | Schema::Enum(EnumSchema { doc, .. }) + | Schema::Fixed(FixedSchema { doc, .. }) => doc.as_ref(), + _ => None, + } + } } impl Parser { @@ -884,7 +1148,7 @@ impl Parser { match *value { Value::String(ref t) => self.parse_known_schema(t.as_str(), enclosing_namespace), Value::Object(ref data) => self.parse_complex(data, enclosing_namespace), - Value::Array(ref data) => self.parse_union(data, enclosing_namespace, None), + Value::Array(ref data) => self.parse_union(data, enclosing_namespace), _ => Err(Error::ParseSchemaFromValidJson), } } @@ -926,9 +1190,9 @@ impl Parser { ) -> AvroResult { fn get_schema_ref(parsed: &Schema) -> Schema { match &parsed { - Schema::Record { ref name, .. } - | Schema::Enum { ref name, .. } - | Schema::Fixed { ref name, .. } => Schema::Ref { name: name.clone() }, + Schema::Record(RecordSchema { ref name, .. }) + | Schema::Enum(EnumSchema { ref name, .. }) + | Schema::Fixed(FixedSchema { ref name, .. }) => Schema::Ref { name: name.clone() }, _ => parsed.clone(), } } @@ -937,7 +1201,9 @@ impl Parser { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); if self.parsed_schemas.get(&fully_qualified_name).is_some() { - return Ok(Schema::Ref { name }); + return Ok(Schema::Ref { + name: fully_qualified_name, + }); } if let Some(resolving_schema) = self.resolving_schemas.get(&fully_qualified_name) { return Ok(resolving_schema.clone()); @@ -965,7 +1231,7 @@ impl Parser { key: &'static str, ) -> Result { match complex.get(key) { - Some(&Value::Number(ref value)) => parse_json_integer_for_decimal(value), + Some(Value::Number(value)) => parse_json_integer_for_decimal(value), None => { if key == "scale" { Ok(0) @@ -1011,7 +1277,12 @@ impl Parser { ) -> AvroResult { match complex.get("type") { Some(value) => { - let ty = parser.parse(value, enclosing_namespace)?; + let ty = match value { + Value::String(s) if s == "fixed" => { + parser.parse_fixed(complex, enclosing_namespace)? + } + _ => parser.parse(value, enclosing_namespace)?, + }; if kinds .iter() @@ -1071,7 +1342,7 @@ impl Parser { } match complex.get("logicalType") { - Some(&Value::String(ref t)) => match t.as_str() { + Some(Value::String(t)) => match t.as_str() { "decimal" => { let inner = Box::new(logical_verify_type( complex, @@ -1082,11 +1353,11 @@ impl Parser { let (precision, scale) = Self::parse_precision_and_scale(complex)?; - return Ok(Schema::Decimal { + return Ok(Schema::Decimal(DecimalSchema { precision, scale, inner, - }); + })); } "uuid" => { logical_verify_type(complex, &[SchemaKind::String], self, enclosing_namespace)?; @@ -1142,6 +1413,26 @@ impl Parser { enclosing_namespace, ); } + "local-timestamp-millis" => { + return try_logical_type( + "local-timestamp-millis", + complex, + &[SchemaKind::Long], + Schema::LocalTimestampMillis, + self, + enclosing_namespace, + ); + } + "local-timestamp-micros" => { + return try_logical_type( + "local-timestamp-micros", + complex, + &[SchemaKind::Long], + Schema::LocalTimestampMicros, + self, + enclosing_namespace, + ); + } "duration" => { logical_verify_type(complex, &[SchemaKind::Fixed], self, enclosing_namespace)?; return Ok(Schema::Duration); @@ -1157,7 +1448,7 @@ impl Parser { _ => {} } match complex.get("type") { - Some(&Value::String(ref t)) => match t.as_str() { + Some(Value::String(t)) => match t.as_str() { "record" => self.parse_record(complex, enclosing_namespace), "enum" => self.parse_enum(complex, enclosing_namespace), "array" => self.parse_array(complex, enclosing_namespace), @@ -1165,11 +1456,8 @@ impl Parser { "fixed" => self.parse_fixed(complex, enclosing_namespace), other => self.parse_known_schema(other, enclosing_namespace), }, - Some(&Value::Object(ref data)) => self.parse_complex(data, enclosing_namespace), - Some(&Value::Array(ref variants)) => { - let default = complex.get("default"); - self.parse_union(variants, enclosing_namespace, default) - } + Some(Value::Object(data)) => self.parse_complex(data, enclosing_namespace), + Some(Value::Array(variants)) => self.parse_union(variants, enclosing_namespace), Some(unknown) => Err(Error::GetComplexType(unknown.clone())), None => Err(Error::GetComplexTypeField), } @@ -1248,11 +1536,11 @@ impl Parser { } } - let name = Name::parse(complex)?; - let aliases = fix_aliases_namespace(complex.aliases(), &name.namespace); + let fully_qualified_name = Name::parse(complex, enclosing_namespace)?; + let aliases = fix_aliases_namespace(complex.aliases(), &fully_qualified_name.namespace); let mut lookup = BTreeMap::new(); - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); + self.register_resolving_schema(&fully_qualified_name, &aliases); let fields: Vec = fields_opt @@ -1264,23 +1552,31 @@ impl Parser { .filter_map(|field| field.as_object()) .enumerate() .map(|(position, field)| { - RecordField::parse(field, position, self, &fully_qualified_name.namespace) + RecordField::parse(field, position, self, &fully_qualified_name) }) .collect::>() })?; for field in &fields { - lookup.insert(field.name.clone(), field.position); + if let Some(_old) = lookup.insert(field.name.clone(), field.position) { + return Err(Error::FieldNameDuplicate(field.name.clone())); + } + + if let Some(ref field_aliases) = field.aliases { + for alias in field_aliases { + lookup.insert(alias.clone(), field.position); + } + } } - let schema = Schema::Record { - name, + let schema = Schema::Record(RecordSchema { + name: fully_qualified_name.clone(), aliases: aliases.clone(), doc: complex.doc(), fields, lookup, attributes: self.get_custom_attributes(complex, vec!["fields"]), - }; + }); self.register_parsed_schema(&fully_qualified_name, &schema, &aliases); Ok(schema) @@ -1317,8 +1613,8 @@ impl Parser { } } - let name = Name::parse(complex)?; - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); + let name = Name::parse(complex, enclosing_namespace)?; + let fully_qualified_name = name.clone(); let aliases = fix_aliases_namespace(complex.aliases(), &name.namespace); let symbols: Vec = symbols_opt @@ -1347,13 +1643,35 @@ impl Parser { existing_symbols.insert(symbol); } - let schema = Schema::Enum { - name, + let mut default: Option = None; + if let Some(value) = complex.get("default") { + if let Value::String(ref s) = *value { + default = Some(s.clone()); + } else { + return Err(Error::EnumDefaultWrongType(value.clone())); + } + } + + if let Some(ref value) = default { + let resolved = types::Value::from(value.clone()) + .resolve_enum(&symbols, &Some(value.to_string()), &None) + .is_ok(); + if !resolved { + return Err(Error::GetEnumDefault { + symbol: value.to_string(), + symbols, + }); + } + } + + let schema = Schema::Enum(EnumSchema { + name: fully_qualified_name.clone(), aliases: aliases.clone(), doc: complex.doc(), symbols, + default, attributes: self.get_custom_attributes(complex, vec!["symbols"]), - }; + }); self.register_parsed_schema(&fully_qualified_name, &schema, &aliases); @@ -1394,32 +1712,11 @@ impl Parser { &mut self, items: &[Value], enclosing_namespace: &Namespace, - default: Option<&Value>, ) -> AvroResult { items .iter() .map(|v| self.parse(v, enclosing_namespace)) .collect::, _>>() - .and_then(|schemas| { - if let Some(default_value) = default.cloned() { - let avro_value = types::Value::from(default_value); - let first_schema = schemas.first(); - if let Some(schema) = first_schema { - // Try to resolve the schema - let resolved_value = avro_value.to_owned().resolve(schema); - match resolved_value { - Ok(_) => {} - Err(_) => { - return Err(Error::GetDefaultUnion( - SchemaKind::from(schema), - types::ValueKind::from(avro_value), - )); - } - } - } - } - Ok(schemas) - }) .and_then(|schemas| Ok(Schema::Union(UnionSchema::new(schemas)?))) } @@ -1442,21 +1739,24 @@ impl Parser { _ => None, }); - let size = size_opt - .and_then(|v| v.as_i64()) - .ok_or(Error::GetFixedSizeField)?; + let size = match size_opt { + Some(size) => size + .as_u64() + .ok_or_else(|| Error::GetFixedSizeFieldPositive(size.clone())), + None => Err(Error::GetFixedSizeField), + }?; - let name = Name::parse(complex)?; - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); + let name = Name::parse(complex, enclosing_namespace)?; + let fully_qualified_name = name.clone(); let aliases = fix_aliases_namespace(complex.aliases(), &name.namespace); - let schema = Schema::Fixed { - name, + let schema = Schema::Fixed(FixedSchema { + name: fully_qualified_name.clone(), aliases: aliases.clone(), doc, size: size as usize, attributes: self.get_custom_attributes(complex, vec!["size"]), - }; + }); self.register_parsed_schema(&fully_qualified_name, &schema, &aliases); @@ -1476,7 +1776,7 @@ fn fix_aliases_namespace(aliases: Option>, namespace: &Namespace) -> .map(|alias| { if alias.find('.').is_none() { match namespace { - Some(ref ns) => format!("{}.{}", ns, alias), + Some(ref ns) => format!("{ns}.{alias}"), None => alias.clone(), } } else { @@ -1533,13 +1833,13 @@ impl Serialize for Schema { } seq.end() } - Schema::Record { + Schema::Record(RecordSchema { ref name, ref aliases, ref doc, ref fields, .. - } => { + }) => { let mut map = serializer.serialize_map(None)?; map.serialize_entry("type", "record")?; if let Some(ref n) = name.namespace { @@ -1555,12 +1855,12 @@ impl Serialize for Schema { map.serialize_entry("fields", fields)?; map.end() } - Schema::Enum { + Schema::Enum(EnumSchema { ref name, ref symbols, ref aliases, .. - } => { + }) => { let mut map = serializer.serialize_map(None)?; map.serialize_entry("type", "enum")?; if let Some(ref n) = name.namespace { @@ -1574,13 +1874,13 @@ impl Serialize for Schema { } map.end() } - Schema::Fixed { + Schema::Fixed(FixedSchema { ref name, ref doc, ref size, ref aliases, .. - } => { + }) => { let mut map = serializer.serialize_map(None)?; map.serialize_entry("type", "fixed")?; if let Some(ref n) = name.namespace { @@ -1597,11 +1897,11 @@ impl Serialize for Schema { } map.end() } - Schema::Decimal { + Schema::Decimal(DecimalSchema { ref scale, ref precision, ref inner, - } => { + }) => { let mut map = serializer.serialize_map(None)?; map.serialize_entry("type", &*inner.clone())?; map.serialize_entry("logicalType", "decimal")?; @@ -1645,18 +1945,30 @@ impl Serialize for Schema { map.serialize_entry("logicalType", "timestamp-micros")?; map.end() } + Schema::LocalTimestampMillis => { + let mut map = serializer.serialize_map(None)?; + map.serialize_entry("type", "long")?; + map.serialize_entry("logicalType", "local-timestamp-millis")?; + map.end() + } + Schema::LocalTimestampMicros => { + let mut map = serializer.serialize_map(None)?; + map.serialize_entry("type", "long")?; + map.serialize_entry("logicalType", "local-timestamp-micros")?; + map.end() + } Schema::Duration => { let mut map = serializer.serialize_map(None)?; // the Avro doesn't indicate what the name of the underlying fixed type of a // duration should be or typically is. - let inner = Schema::Fixed { + let inner = Schema::Fixed(FixedSchema { name: Name::new("duration").unwrap(), aliases: None, doc: None, size: 12, attributes: Default::default(), - }; + }); map.serialize_entry("type", &inner)?; map.serialize_entry("logicalType", "duration")?; map.end() @@ -1678,6 +1990,10 @@ impl Serialize for RecordField { map.serialize_entry("default", default)?; } + if let Some(ref aliases) = self.aliases { + map.serialize_entry("aliases", aliases)?; + } + map.end() } } @@ -1689,10 +2005,7 @@ fn parsing_canonical_form(schema: &Value) -> String { Value::Object(map) => pcf_map(map), Value::String(s) => pcf_string(s), Value::Array(v) => pcf_array(v), - json => panic!( - "got invalid JSON value for canonical form of schema: {0}", - json - ), + json => panic!("got invalid JSON value for canonical form of schema: {json}"), } } @@ -1719,9 +2032,7 @@ fn pcf_map(schema: &Map) -> String { // Invariant: Only valid schemas. Must be a string. let name = v.as_str().unwrap(); let n = match ns { - Some(namespace) if !name.contains('.') => { - Cow::Owned(format!("{}.{}", namespace, name)) - } + Some(namespace) if !name.contains('.') => Cow::Owned(format!("{namespace}.{name}")), _ => Cow::Borrowed(name), }; @@ -1753,7 +2064,7 @@ fn pcf_map(schema: &Map) -> String { .map(|(_, v)| v) .collect::>() .join(","); - format!("{{{}}}", inter) + format!("{{{inter}}}") } fn pcf_array(arr: &[Value]) -> String { @@ -1762,11 +2073,11 @@ fn pcf_array(arr: &[Value]) -> String { .map(parsing_canonical_form) .collect::>() .join(","); - format!("[{}]", inter) + format!("[{inter}]") } fn pcf_string(s: &str) -> String { - format!("\"{}\"", s) + format!("\"{s}\"") } const RESERVED_FIELDS: &[&str] = &[ @@ -1997,6 +2308,7 @@ pub mod derive { #[cfg(test)] mod tests { use super::*; + use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; use serde_json::json; @@ -2006,31 +2318,35 @@ mod tests { } #[test] - fn test_primitive_schema() { - assert_eq!(Schema::Null, Schema::parse_str("\"null\"").unwrap()); - assert_eq!(Schema::Int, Schema::parse_str("\"int\"").unwrap()); - assert_eq!(Schema::Double, Schema::parse_str("\"double\"").unwrap()); + fn test_primitive_schema() -> TestResult { + assert_eq!(Schema::Null, Schema::parse_str("\"null\"")?); + assert_eq!(Schema::Int, Schema::parse_str("\"int\"")?); + assert_eq!(Schema::Double, Schema::parse_str("\"double\"")?); + Ok(()) } #[test] - fn test_array_schema() { - let schema = Schema::parse_str(r#"{"type": "array", "items": "string"}"#).unwrap(); + fn test_array_schema() -> TestResult { + let schema = Schema::parse_str(r#"{"type": "array", "items": "string"}"#)?; assert_eq!(Schema::Array(Box::new(Schema::String)), schema); + Ok(()) } #[test] - fn test_map_schema() { - let schema = Schema::parse_str(r#"{"type": "map", "values": "double"}"#).unwrap(); + fn test_map_schema() -> TestResult { + let schema = Schema::parse_str(r#"{"type": "map", "values": "double"}"#)?; assert_eq!(Schema::Map(Box::new(Schema::Double)), schema); + Ok(()) } #[test] - fn test_union_schema() { - let schema = Schema::parse_str(r#"["null", "int"]"#).unwrap(); + fn test_union_schema() -> TestResult { + let schema = Schema::parse_str(r#"["null", "int"]"#)?; assert_eq!( - Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int]).unwrap()), + Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int])?), schema ); + Ok(()) } #[test] @@ -2040,10 +2356,10 @@ mod tests { } #[test] - fn test_multi_union_schema() { + fn test_multi_union_schema() -> TestResult { let schema = Schema::parse_str(r#"["null", "int", "float", "string", "bytes"]"#); assert!(schema.is_ok()); - let schema = schema.unwrap(); + let schema = schema?; assert_eq!(SchemaKind::from(&schema), SchemaKind::Union); let union_schema = match schema { Schema::Union(u) => u, @@ -2066,27 +2382,26 @@ mod tests { SchemaKind::Bytes ); assert_eq!(variants.next(), None); + + Ok(()) } - // AVRO-3621 #[test] - fn test_avro_3621_nullable_record_field() { + fn test_avro_3621_nullable_record_field() -> TestResult { let nullable_record_field = RecordField { name: "next".to_string(), doc: None, default: None, - schema: Schema::Union( - UnionSchema::new(vec![ - Schema::Null, - Schema::Ref { - name: Name { - name: "LongList".to_owned(), - namespace: None, - }, + aliases: None, + schema: Schema::Union(UnionSchema::new(vec![ + Schema::Null, + Schema::Ref { + name: Name { + name: "LongList".to_owned(), + namespace: None, }, - ]) - .unwrap(), - ), + }, + ])?), order: RecordFieldOrder::Ascending, position: 1, custom_attributes: Default::default(), @@ -2098,6 +2413,7 @@ mod tests { name: "next".to_string(), doc: None, default: Some(json!(2)), + aliases: None, schema: Schema::Long, order: RecordFieldOrder::Ascending, position: 1, @@ -2105,11 +2421,12 @@ mod tests { }; assert!(!non_nullable_record_field.is_nullable()); + Ok(()) } // AVRO-3248 #[test] - fn test_union_of_records() { + fn test_union_of_records() -> TestResult { use std::iter::FromIterator; // A and B are the same except the name. @@ -2138,45 +2455,42 @@ mod tests { ] }"#; - let schema_c = Schema::parse_list(&[schema_str_a, schema_str_b, schema_str_c]) - .unwrap() + let schema_c = Schema::parse_list(&[schema_str_a, schema_str_b, schema_str_c])? .last() .unwrap() .clone(); - let schema_c_expected = Schema::Record { - name: Name::new("C").unwrap(), + let schema_c_expected = Schema::Record(RecordSchema { + name: Name::new("C")?, aliases: None, doc: None, fields: vec![RecordField { name: "field_one".to_string(), doc: None, default: None, - schema: Schema::Union( - UnionSchema::new(vec![ - Schema::Ref { - name: Name::new("A").unwrap(), - }, - Schema::Ref { - name: Name::new("B").unwrap(), - }, - ]) - .unwrap(), - ), + aliases: None, + schema: Schema::Union(UnionSchema::new(vec![ + Schema::Ref { + name: Name::new("A")?, + }, + Schema::Ref { + name: Name::new("B")?, + }, + ])?), order: RecordFieldOrder::Ignore, position: 0, custom_attributes: Default::default(), }], lookup: BTreeMap::from_iter(vec![("field_one".to_string(), 0)]), attributes: Default::default(), - }; + }); assert_eq!(schema_c, schema_c_expected); + Ok(()) } - // AVRO-3584 : recursion in type definitions #[test] - fn avro_3584_test_recursion_records() { + fn avro_3584_test_recursion_records() -> TestResult { // A and B are the same except the name. let schema_str_a = r#"{ "name": "A", @@ -2190,26 +2504,27 @@ mod tests { "fields": [ {"name": "field_one", "type": "A"} ] }"#; - let list = Schema::parse_list(&[schema_str_a, schema_str_b]).unwrap(); + let list = Schema::parse_list(&[schema_str_a, schema_str_b])?; let schema_a = list.first().unwrap().clone(); match schema_a { - Schema::Record { fields, .. } => { + Schema::Record(RecordSchema { fields, .. }) => { let f1 = fields.get(0); let ref_schema = Schema::Ref { - name: Name::new("B").unwrap(), + name: Name::new("B")?, }; assert_eq!(ref_schema, f1.unwrap().schema); } _ => panic!("Expected a record schema!"), } + + Ok(()) } - // AVRO-3248 #[test] - fn test_nullable_record() { + fn test_avro_3248_nullable_record() -> TestResult { use std::iter::FromIterator; let schema_str_a = r#"{ @@ -2229,42 +2544,41 @@ mod tests { ] }"#; - let schema_option_a = Schema::parse_list(&[schema_str_a, schema_str_option_a]) - .unwrap() + let schema_option_a = Schema::parse_list(&[schema_str_a, schema_str_option_a])? .last() .unwrap() .clone(); - let schema_option_a_expected = Schema::Record { - name: Name::new("OptionA").unwrap(), + let schema_option_a_expected = Schema::Record(RecordSchema { + name: Name::new("OptionA")?, aliases: None, doc: None, fields: vec![RecordField { name: "field_one".to_string(), doc: None, default: Some(Value::Null), - schema: Schema::Union( - UnionSchema::new(vec![ - Schema::Null, - Schema::Ref { - name: Name::new("A").unwrap(), - }, - ]) - .unwrap(), - ), + aliases: None, + schema: Schema::Union(UnionSchema::new(vec![ + Schema::Null, + Schema::Ref { + name: Name::new("A")?, + }, + ])?), order: RecordFieldOrder::Ignore, position: 0, custom_attributes: Default::default(), }], lookup: BTreeMap::from_iter(vec![("field_one".to_string(), 0)]), attributes: Default::default(), - }; + }); assert_eq!(schema_option_a, schema_option_a_expected); + + Ok(()) } #[test] - fn test_record_schema() { + fn test_record_schema() -> TestResult { let parsed = Schema::parse_str( r#" { @@ -2276,15 +2590,14 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; let mut lookup = BTreeMap::new(); lookup.insert("a".to_owned(), 0); lookup.insert("b".to_owned(), 1); - let expected = Schema::Record { - name: Name::new("test").unwrap(), + let expected = Schema::Record(RecordSchema { + name: Name::new("test")?, aliases: None, doc: None, fields: vec![ @@ -2292,6 +2605,7 @@ mod tests { name: "a".to_string(), doc: None, default: Some(Value::Number(42i64.into())), + aliases: None, schema: Schema::Long, order: RecordFieldOrder::Ascending, position: 0, @@ -2301,6 +2615,7 @@ mod tests { name: "b".to_string(), doc: None, default: None, + aliases: None, schema: Schema::String, order: RecordFieldOrder::Ascending, position: 1, @@ -2309,14 +2624,15 @@ mod tests { ], lookup, attributes: Default::default(), - }; + }); assert_eq!(parsed, expected); + + Ok(()) } - // AVRO-3302 #[test] - fn test_record_schema_with_currently_parsing_schema() { + fn test_avro_3302_record_schema_with_currently_parsing_schema() -> TestResult { let schema = Schema::parse_str( r#" { @@ -2335,8 +2651,7 @@ mod tests { }] } "#, - ) - .unwrap(); + )?; let mut lookup = BTreeMap::new(); lookup.insert("recordField".to_owned(), 0); @@ -2345,16 +2660,17 @@ mod tests { node_lookup.insert("children".to_owned(), 1); node_lookup.insert("label".to_owned(), 0); - let expected = Schema::Record { - name: Name::new("test").unwrap(), + let expected = Schema::Record(RecordSchema { + name: Name::new("test")?, aliases: None, doc: None, fields: vec![RecordField { name: "recordField".to_string(), doc: None, default: None, - schema: Schema::Record { - name: Name::new("Node").unwrap(), + aliases: None, + schema: Schema::Record(RecordSchema { + name: Name::new("Node")?, aliases: None, doc: None, fields: vec![ @@ -2362,6 +2678,7 @@ mod tests { name: "label".to_string(), doc: None, default: None, + aliases: None, schema: Schema::String, order: RecordFieldOrder::Ascending, position: 0, @@ -2371,8 +2688,9 @@ mod tests { name: "children".to_string(), doc: None, default: None, + aliases: None, schema: Schema::Array(Box::new(Schema::Ref { - name: Name::new("Node").unwrap(), + name: Name::new("Node")?, })), order: RecordFieldOrder::Ascending, position: 1, @@ -2381,24 +2699,26 @@ mod tests { ], lookup: node_lookup, attributes: Default::default(), - }, + }), order: RecordFieldOrder::Ascending, position: 0, custom_attributes: Default::default(), }], lookup, attributes: Default::default(), - }; + }); assert_eq!(schema, expected); let canonical_form = &schema.canonical_form(); let expected = r#"{"name":"test","type":"record","fields":[{"name":"recordField","type":{"name":"Node","type":"record","fields":[{"name":"label","type":"string"},{"name":"children","type":{"type":"array","items":"Node"}}]}}]}"#; assert_eq!(canonical_form, &expected); + + Ok(()) } // https://github.com/flavray/avro-rs/pull/99#issuecomment-1016948451 #[test] - fn test_parsing_of_recursive_type_enum() { + fn test_parsing_of_recursive_type_enum() -> TestResult { let schema = r#" { "type": "record", @@ -2442,14 +2762,16 @@ mod tests { } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let schema_str = schema.canonical_form(); - let expected = r#"{"name":"office.User","type":"record","fields":[{"name":"details","type":[{"name":"Employee","type":"record","fields":[{"name":"gender","type":{"name":"Gender","type":"enum","symbols":["male","female"]}}]},{"name":"Manager","type":"record","fields":[{"name":"gender","type":"Gender"}]}]}]}"#; + let expected = r#"{"name":"office.User","type":"record","fields":[{"name":"details","type":[{"name":"office.Employee","type":"record","fields":[{"name":"gender","type":{"name":"office.Gender","type":"enum","symbols":["male","female"]}}]},{"name":"office.Manager","type":"record","fields":[{"name":"gender","type":"office.Gender"}]}]}]}"#; assert_eq!(schema_str, expected); + + Ok(()) } #[test] - fn test_parsing_of_recursive_type_fixed() { + fn test_parsing_of_recursive_type_fixed() -> TestResult { let schema = r#" { "type": "record", @@ -2490,15 +2812,16 @@ mod tests { } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let schema_str = schema.canonical_form(); - let expected = r#"{"name":"office.User","type":"record","fields":[{"name":"details","type":[{"name":"Employee","type":"record","fields":[{"name":"id","type":{"name":"EmployeeId","type":"fixed","size":16}}]},{"name":"Manager","type":"record","fields":[{"name":"id","type":"EmployeeId"}]}]}]}"#; + let expected = r#"{"name":"office.User","type":"record","fields":[{"name":"details","type":[{"name":"office.Employee","type":"record","fields":[{"name":"id","type":{"name":"office.EmployeeId","type":"fixed","size":16}}]},{"name":"office.Manager","type":"record","fields":[{"name":"id","type":"office.EmployeeId"}]}]}]}"#; assert_eq!(schema_str, expected); + + Ok(()) } - // AVRO-3302 #[test] - fn test_record_schema_with_currently_parsing_schema_aliases() { + fn test_avro_3302_record_schema_with_currently_parsing_schema_aliases() -> TestResult { let schema = Schema::parse_str( r#" { @@ -2511,14 +2834,13 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; let mut lookup = BTreeMap::new(); lookup.insert("value".to_owned(), 0); lookup.insert("next".to_owned(), 1); - let expected = Schema::Record { + let expected = Schema::Record(RecordSchema { name: Name { name: "LongList".to_owned(), namespace: None, @@ -2530,6 +2852,7 @@ mod tests { name: "value".to_string(), doc: None, default: None, + aliases: None, schema: Schema::Long, order: RecordFieldOrder::Ascending, position: 0, @@ -2539,18 +2862,16 @@ mod tests { name: "next".to_string(), doc: None, default: None, - schema: Schema::Union( - UnionSchema::new(vec![ - Schema::Null, - Schema::Ref { - name: Name { - name: "LongList".to_owned(), - namespace: None, - }, - }, - ]) - .unwrap(), - ), + aliases: None, + schema: Schema::Union(UnionSchema::new(vec![ + Schema::Null, + Schema::Ref { + name: Name { + name: "LongList".to_owned(), + namespace: None, + }, + }, + ])?), order: RecordFieldOrder::Ascending, position: 1, custom_attributes: Default::default(), @@ -2558,17 +2879,18 @@ mod tests { ], lookup, attributes: Default::default(), - }; + }); assert_eq!(schema, expected); let canonical_form = &schema.canonical_form(); let expected = r#"{"name":"LongList","type":"record","fields":[{"name":"value","type":"long"},{"name":"next","type":["null","LongList"]}]}"#; assert_eq!(canonical_form, &expected); + + Ok(()) } - // AVRO-3370 #[test] - fn test_record_schema_with_currently_parsing_schema_named_record() { + fn test_avro_3370_record_schema_with_currently_parsing_schema_named_record() -> TestResult { let schema = Schema::parse_str( r#" { @@ -2580,14 +2902,13 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; let mut lookup = BTreeMap::new(); lookup.insert("value".to_owned(), 0); lookup.insert("next".to_owned(), 1); - let expected = Schema::Record { + let expected = Schema::Record(RecordSchema { name: Name { name: "record".to_owned(), namespace: None, @@ -2599,6 +2920,7 @@ mod tests { name: "value".to_string(), doc: None, default: None, + aliases: None, schema: Schema::Long, order: RecordFieldOrder::Ascending, position: 0, @@ -2608,6 +2930,7 @@ mod tests { name: "next".to_string(), doc: None, default: None, + aliases: None, schema: Schema::Ref { name: Name { name: "record".to_owned(), @@ -2621,17 +2944,18 @@ mod tests { ], lookup, attributes: Default::default(), - }; + }); assert_eq!(schema, expected); let canonical_form = &schema.canonical_form(); let expected = r#"{"name":"record","type":"record","fields":[{"name":"value","type":"long"},{"name":"next","type":"record"}]}"#; assert_eq!(canonical_form, &expected); + + Ok(()) } - // AVRO-3370 #[test] - fn test_record_schema_with_currently_parsing_schema_named_enum() { + fn test_avro_3370_record_schema_with_currently_parsing_schema_named_enum() -> TestResult { let schema = Schema::parse_str( r#" { @@ -2647,14 +2971,13 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; let mut lookup = BTreeMap::new(); lookup.insert("enum".to_owned(), 0); lookup.insert("next".to_owned(), 1); - let expected = Schema::Record { + let expected = Schema::Record(RecordSchema { name: Name { name: "record".to_owned(), namespace: None, @@ -2666,7 +2989,8 @@ mod tests { name: "enum".to_string(), doc: None, default: None, - schema: Schema::Enum { + aliases: None, + schema: Schema::Enum(EnumSchema { name: Name { name: "enum".to_owned(), namespace: None, @@ -2674,8 +2998,9 @@ mod tests { aliases: None, doc: None, symbols: vec!["one".to_string(), "two".to_string(), "three".to_string()], + default: None, attributes: Default::default(), - }, + }), order: RecordFieldOrder::Ascending, position: 0, custom_attributes: Default::default(), @@ -2684,7 +3009,8 @@ mod tests { name: "next".to_string(), doc: None, default: None, - schema: Schema::Enum { + aliases: None, + schema: Schema::Enum(EnumSchema { name: Name { name: "enum".to_owned(), namespace: None, @@ -2692,8 +3018,9 @@ mod tests { aliases: None, doc: None, symbols: vec!["one".to_string(), "two".to_string(), "three".to_string()], + default: None, attributes: Default::default(), - }, + }), order: RecordFieldOrder::Ascending, position: 1, custom_attributes: Default::default(), @@ -2701,17 +3028,18 @@ mod tests { ], lookup, attributes: Default::default(), - }; + }); assert_eq!(schema, expected); let canonical_form = &schema.canonical_form(); let expected = r#"{"name":"record","type":"record","fields":[{"name":"enum","type":{"name":"enum","type":"enum","symbols":["one","two","three"]}},{"name":"next","type":{"name":"enum","type":"enum","symbols":["one","two","three"]}}]}"#; assert_eq!(canonical_form, &expected); + + Ok(()) } - // AVRO-3370 #[test] - fn test_record_schema_with_currently_parsing_schema_named_fixed() { + fn test_avro_3370_record_schema_with_currently_parsing_schema_named_fixed() -> TestResult { let schema = Schema::parse_str( r#" { @@ -2727,14 +3055,13 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; let mut lookup = BTreeMap::new(); lookup.insert("fixed".to_owned(), 0); lookup.insert("next".to_owned(), 1); - let expected = Schema::Record { + let expected = Schema::Record(RecordSchema { name: Name { name: "record".to_owned(), namespace: None, @@ -2746,7 +3073,8 @@ mod tests { name: "fixed".to_string(), doc: None, default: None, - schema: Schema::Fixed { + aliases: None, + schema: Schema::Fixed(FixedSchema { name: Name { name: "fixed".to_owned(), namespace: None, @@ -2755,7 +3083,7 @@ mod tests { doc: None, size: 456, attributes: Default::default(), - }, + }), order: RecordFieldOrder::Ascending, position: 0, custom_attributes: Default::default(), @@ -2764,7 +3092,8 @@ mod tests { name: "next".to_string(), doc: None, default: None, - schema: Schema::Fixed { + aliases: None, + schema: Schema::Fixed(FixedSchema { name: Name { name: "fixed".to_owned(), namespace: None, @@ -2773,7 +3102,7 @@ mod tests { doc: None, size: 456, attributes: Default::default(), - }, + }), order: RecordFieldOrder::Ascending, position: 1, custom_attributes: Default::default(), @@ -2781,22 +3110,24 @@ mod tests { ], lookup, attributes: Default::default(), - }; + }); assert_eq!(schema, expected); let canonical_form = &schema.canonical_form(); let expected = r#"{"name":"record","type":"record","fields":[{"name":"fixed","type":{"name":"fixed","type":"fixed","size":456}},{"name":"next","type":{"name":"fixed","type":"fixed","size":456}}]}"#; assert_eq!(canonical_form, &expected); + + Ok(()) } #[test] - fn test_enum_schema() { + fn test_enum_schema() -> TestResult { let schema = Schema::parse_str( r#"{"type": "enum", "name": "Suit", "symbols": ["diamonds", "spades", "clubs", "hearts"]}"#, - ).unwrap(); + )?; - let expected = Schema::Enum { - name: Name::new("Suit").unwrap(), + let expected = Schema::Enum(EnumSchema { + name: Name::new("Suit")?, aliases: None, doc: None, symbols: vec![ @@ -2805,89 +3136,103 @@ mod tests { "clubs".to_owned(), "hearts".to_owned(), ], + default: None, attributes: Default::default(), - }; + }); assert_eq!(expected, schema); + + Ok(()) } #[test] - fn test_enum_schema_duplicate() { + fn test_enum_schema_duplicate() -> TestResult { // Duplicate "diamonds" let schema = Schema::parse_str( r#"{"type": "enum", "name": "Suit", "symbols": ["diamonds", "spades", "clubs", "diamonds"]}"#, ); assert!(schema.is_err()); + + Ok(()) } #[test] - fn test_enum_schema_name() { + fn test_enum_schema_name() -> TestResult { // Invalid name "0000" does not match [A-Za-z_][A-Za-z0-9_]* let schema = Schema::parse_str( r#"{"type": "enum", "name": "Enum", "symbols": ["0000", "variant"]}"#, ); assert!(schema.is_err()); + + Ok(()) } #[test] - fn test_fixed_schema() { - let schema = Schema::parse_str(r#"{"type": "fixed", "name": "test", "size": 16}"#).unwrap(); + fn test_fixed_schema() -> TestResult { + let schema = Schema::parse_str(r#"{"type": "fixed", "name": "test", "size": 16}"#)?; - let expected = Schema::Fixed { - name: Name::new("test").unwrap(), + let expected = Schema::Fixed(FixedSchema { + name: Name::new("test")?, aliases: None, doc: None, size: 16usize, attributes: Default::default(), - }; + }); assert_eq!(expected, schema); + + Ok(()) } #[test] - fn test_fixed_schema_with_documentation() { + fn test_fixed_schema_with_documentation() -> TestResult { let schema = Schema::parse_str( r#"{"type": "fixed", "name": "test", "size": 16, "doc": "FixedSchema documentation"}"#, - ) - .unwrap(); + )?; - let expected = Schema::Fixed { - name: Name::new("test").unwrap(), + let expected = Schema::Fixed(FixedSchema { + name: Name::new("test")?, aliases: None, doc: Some(String::from("FixedSchema documentation")), size: 16usize, attributes: Default::default(), - }; + }); assert_eq!(expected, schema); + + Ok(()) } #[test] - fn test_no_documentation() { - let schema = - Schema::parse_str(r#"{"type": "enum", "name": "Coin", "symbols": ["heads", "tails"]}"#) - .unwrap(); + fn test_no_documentation() -> TestResult { + let schema = Schema::parse_str( + r#"{"type": "enum", "name": "Coin", "symbols": ["heads", "tails"]}"#, + )?; let doc = match schema { - Schema::Enum { doc, .. } => doc, - _ => return, + Schema::Enum(EnumSchema { doc, .. }) => doc, + _ => unreachable!(), }; assert!(doc.is_none()); + + Ok(()) } #[test] - fn test_documentation() { + fn test_documentation() -> TestResult { let schema = Schema::parse_str( - r#"{"type": "enum", "name": "Coin", "doc": "Some documentation", "symbols": ["heads", "tails"]}"# - ).unwrap(); + r#"{"type": "enum", "name": "Coin", "doc": "Some documentation", "symbols": ["heads", "tails"]}"#, + )?; let doc = match schema { - Schema::Enum { doc, .. } => doc, + Schema::Enum(EnumSchema { doc, .. }) => doc, _ => None, }; assert_eq!("Some documentation".to_owned(), doc.unwrap()); + + Ok(()) } // Tests to ensure Schema is Send + Sync. These tests don't need to _do_ anything, if they can @@ -2910,8 +3255,7 @@ mod tests { } #[test] - #[cfg_attr(miri, ignore)] // Sha256 uses an inline assembly instructions which is not supported by miri - fn test_schema_fingerprint() { + fn test_schema_fingerprint() -> TestResult { use crate::rabin::Rabin; use md5::Md5; use sha2::Sha256; @@ -2928,7 +3272,7 @@ mod tests { } "#; - let schema = Schema::parse_str(raw_schema).unwrap(); + let schema = Schema::parse_str(raw_schema)?; assert_eq!( "abf662f831715ff78f88545a05a9262af75d6406b54e1a8a174ff1d2b75affc4", format!("{}", schema.fingerprint::()) @@ -2941,33 +3285,40 @@ mod tests { assert_eq!( "28cf0a67d9937bb3", format!("{}", schema.fingerprint::()) - ) + ); + + Ok(()) } #[test] - fn test_logical_types() { - let schema = Schema::parse_str(r#"{"type": "int", "logicalType": "date"}"#).unwrap(); + fn test_logical_types() -> TestResult { + let schema = Schema::parse_str(r#"{"type": "int", "logicalType": "date"}"#)?; assert_eq!(schema, Schema::Date); - let schema = - Schema::parse_str(r#"{"type": "long", "logicalType": "timestamp-micros"}"#).unwrap(); + let schema = Schema::parse_str(r#"{"type": "long", "logicalType": "timestamp-micros"}"#)?; assert_eq!(schema, Schema::TimestampMicros); + + Ok(()) } #[test] - fn test_nullable_logical_type() { + fn test_nullable_logical_type() -> TestResult { let schema = Schema::parse_str( r#"{"type": ["null", {"type": "long", "logicalType": "timestamp-micros"}]}"#, - ) - .unwrap(); + )?; assert_eq!( schema, - Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::TimestampMicros]).unwrap()) + Schema::Union(UnionSchema::new(vec![ + Schema::Null, + Schema::TimestampMicros + ])?) ); + + Ok(()) } #[test] - fn record_field_order_from_str() { + fn record_field_order_from_str() -> TestResult { use std::str::FromStr; assert_eq!( @@ -2983,11 +3334,12 @@ mod tests { RecordFieldOrder::Ignore ); assert!(RecordFieldOrder::from_str("not an ordering").is_err()); + + Ok(()) } - /// AVRO-3374 #[test] - fn test_avro_3374_preserve_namespace_for_primitive() { + fn test_avro_3374_preserve_namespace_for_primitive() -> TestResult { let schema = Schema::parse_str( r#" { @@ -2999,18 +3351,19 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; let json = schema.canonical_form(); assert_eq!( json, r#"{"name":"ns.int","type":"record","fields":[{"name":"value","type":"int"},{"name":"next","type":["null","ns.int"]}]}"# ); + + Ok(()) } #[test] - fn test_avro_3433_preserve_schema_refs_in_json() { + fn test_avro_3433_preserve_schema_refs_in_json() -> TestResult { let schema = r#" { "name": "test.test", @@ -3025,14 +3378,16 @@ mod tests { } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let expected = r#"{"name":"test.test","type":"record","fields":[{"name":"bar","type":{"name":"test.foo","type":"record","fields":[{"name":"id","type":"long"}]}},{"name":"baz","type":"test.foo"}]}"#; assert_eq!(schema.canonical_form(), expected); + + Ok(()) } #[test] - fn test_read_namespace_from_name() { + fn test_read_namespace_from_name() -> TestResult { let schema = r#" { "name": "space.name", @@ -3046,17 +3401,19 @@ mod tests { } "#; - let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, .. } = schema { + let schema = Schema::parse_str(schema)?; + if let Schema::Record(RecordSchema { name, .. }) = schema { assert_eq!(name.name, "name"); assert_eq!(name.namespace, Some("space".to_string())); } else { panic!("Expected a record schema!"); } + + Ok(()) } #[test] - fn test_namespace_from_name_has_priority_over_from_field() { + fn test_namespace_from_name_has_priority_over_from_field() -> TestResult { let schema = r#" { "name": "space1.name", @@ -3071,16 +3428,18 @@ mod tests { } "#; - let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, .. } = schema { + let schema = Schema::parse_str(schema)?; + if let Schema::Record(RecordSchema { name, .. }) = schema { assert_eq!(name.namespace, Some("space1".to_string())); } else { panic!("Expected a record schema!"); } + + Ok(()) } #[test] - fn test_namespace_from_field() { + fn test_namespace_from_field() -> TestResult { let schema = r#" { "name": "name", @@ -3095,20 +3454,24 @@ mod tests { } "#; - let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, .. } = schema { + let schema = Schema::parse_str(schema)?; + if let Schema::Record(RecordSchema { name, .. }) = schema { assert_eq!(name.namespace, Some("space2".to_string())); } else { panic!("Expected a record schema!"); } + + Ok(()) } #[test] /// Zero-length namespace is considered as no-namespace. - fn test_namespace_from_name_with_empty_value() { - let name = Name::new(".name").unwrap(); + fn test_namespace_from_name_with_empty_value() -> TestResult { + let name = Name::new(".name")?; assert_eq!(name.name, "name"); assert_eq!(name.namespace, None); + + Ok(()) } #[test] @@ -3130,7 +3493,7 @@ mod tests { } #[test] - fn avro_3448_test_proper_resolution_inner_record_inherited_namespace() { + fn avro_3448_test_proper_resolution_inner_record_inherited_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3160,16 +3523,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_record_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_resolution_inner_record_qualified_namespace() { + fn avro_3448_test_proper_resolution_inner_record_qualified_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3199,16 +3564,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_record_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_resolution_inner_enum_inherited_namespace() { + fn avro_3448_test_proper_resolution_inner_enum_inherited_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3233,16 +3600,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_enum_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_resolution_inner_enum_qualified_namespace() { + fn avro_3448_test_proper_resolution_inner_enum_qualified_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3267,16 +3636,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_enum_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_resolution_inner_fixed_inherited_namespace() { + fn avro_3448_test_proper_resolution_inner_fixed_inherited_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3301,16 +3672,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_fixed_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_resolution_inner_fixed_qualified_namespace() { + fn avro_3448_test_proper_resolution_inner_fixed_qualified_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3335,16 +3708,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_fixed_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_resolution_inner_record_inner_namespace() { + fn avro_3448_test_proper_resolution_inner_record_inner_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3375,16 +3750,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "inner_space.inner_record_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_resolution_inner_enum_inner_namespace() { + fn avro_3448_test_proper_resolution_inner_enum_inner_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3410,16 +3787,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "inner_space.inner_enum_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_resolution_inner_fixed_inner_namespace() { + fn avro_3448_test_proper_resolution_inner_fixed_inner_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3445,16 +3824,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "inner_space.inner_fixed_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_multi_level_resolution_inner_record_outer_namespace() { + fn avro_3448_test_proper_multi_level_resolution_inner_record_outer_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3496,7 +3877,7 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 3); for s in &[ @@ -3504,12 +3885,14 @@ mod tests { "space.middle_record_name", "space.inner_record_name", ] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_multi_level_resolution_inner_record_middle_namespace() { + fn avro_3448_test_proper_multi_level_resolution_inner_record_middle_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3552,7 +3935,7 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 3); for s in &[ @@ -3560,12 +3943,14 @@ mod tests { "middle_namespace.middle_record_name", "middle_namespace.inner_record_name", ] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_multi_level_resolution_inner_record_inner_namespace() { + fn avro_3448_test_proper_multi_level_resolution_inner_record_inner_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3609,7 +3994,7 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 3); for s in &[ @@ -3617,12 +4002,14 @@ mod tests { "middle_namespace.middle_record_name", "inner_namespace.inner_record_name", ] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_in_array_resolution_inherited_namespace() { + fn avro_3448_test_proper_in_array_resolution_inherited_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3652,16 +4039,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.in_array_record"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_in_map_resolution_inherited_namespace() { + fn avro_3448_test_proper_in_map_resolution_inherited_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3691,16 +4080,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.in_map_record"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3466_test_to_json_inner_enum_inner_namespace() { + fn avro_3466_test_to_json_inner_enum_inner_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3726,23 +4117,25 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); // confirm we have expected 2 full-names assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "inner_space.inner_enum_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } // convert Schema back to JSON string let schema_str = serde_json::to_string(&schema).expect("test failed"); let _schema = Schema::parse_str(&schema_str).expect("test failed"); assert_eq!(schema, _schema); + + Ok(()) } #[test] - fn avro_3466_test_to_json_inner_fixed_inner_namespace() { + fn avro_3466_test_to_json_inner_fixed_inner_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3768,19 +4161,21 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); // confirm we have expected 2 full-names assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "inner_space.inner_fixed_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } // convert Schema back to JSON string let schema_str = serde_json::to_string(&schema).expect("test failed"); let _schema = Schema::parse_str(&schema_str).expect("test failed"); assert_eq!(schema, _schema); + + Ok(()) } fn assert_avro_3512_aliases(aliases: &Aliases) { @@ -3798,7 +4193,7 @@ mod tests { } #[test] - fn avro_3512_alias_with_null_namespace_record() { + fn avro_3512_alias_with_null_namespace_record() -> TestResult { let schema = Schema::parse_str( r#" { @@ -3811,18 +4206,19 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; - if let Schema::Record { ref aliases, .. } = schema { + if let Schema::Record(RecordSchema { ref aliases, .. }) = schema { assert_avro_3512_aliases(aliases); } else { - panic!("The Schema should be a record: {:?}", schema); + panic!("The Schema should be a record: {schema:?}"); } + + Ok(()) } #[test] - fn avro_3512_alias_with_null_namespace_enum() { + fn avro_3512_alias_with_null_namespace_enum() -> TestResult { let schema = Schema::parse_str( r#" { @@ -3835,18 +4231,19 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; - if let Schema::Enum { ref aliases, .. } = schema { + if let Schema::Enum(EnumSchema { ref aliases, .. }) = schema { assert_avro_3512_aliases(aliases); } else { - panic!("The Schema should be an enum: {:?}", schema); + panic!("The Schema should be an enum: {schema:?}"); } + + Ok(()) } #[test] - fn avro_3512_alias_with_null_namespace_fixed() { + fn avro_3512_alias_with_null_namespace_fixed() -> TestResult { let schema = Schema::parse_str( r#" { @@ -3857,18 +4254,19 @@ mod tests { "size" : 12 } "#, - ) - .unwrap(); + )?; - if let Schema::Fixed { ref aliases, .. } = schema { + if let Schema::Fixed(FixedSchema { ref aliases, .. }) = schema { assert_avro_3512_aliases(aliases); } else { - panic!("The Schema should be a fixed: {:?}", schema); + panic!("The Schema should be a fixed: {schema:?}"); } + + Ok(()) } #[test] - fn avro_3518_serialize_aliases_record() { + fn avro_3518_serialize_aliases_record() -> TestResult { let schema = Schema::parse_str( r#" { @@ -3877,24 +4275,31 @@ mod tests { "namespace": "space", "aliases": ["b", "x.y", ".c"], "fields" : [ - {"name": "time", "type": "long"} + { + "name": "time", + "type": "long", + "doc": "The documentation is not serialized", + "default": 123, + "aliases": ["time1", "ns.time2"] + } ] } "#, - ) - .unwrap(); + )?; - let value = serde_json::to_value(&schema).unwrap(); - let serialized = serde_json::to_string(&value).unwrap(); + let value = serde_json::to_value(&schema)?; + let serialized = serde_json::to_string(&value)?; assert_eq!( - r#"{"aliases":["space.b","x.y","c"],"fields":[{"name":"time","type":"long"}],"name":"a","namespace":"space","type":"record"}"#, + r#"{"aliases":["space.b","x.y","c"],"fields":[{"aliases":["time1","ns.time2"],"default":123,"name":"time","type":"long"}],"name":"a","namespace":"space","type":"record"}"#, &serialized ); - assert_eq!(schema, Schema::parse_str(&serialized).unwrap()); + assert_eq!(schema, Schema::parse_str(&serialized)?); + + Ok(()) } #[test] - fn avro_3518_serialize_aliases_enum() { + fn avro_3518_serialize_aliases_enum() -> TestResult { let schema = Schema::parse_str( r#" { @@ -3907,20 +4312,21 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; - let value = serde_json::to_value(&schema).unwrap(); - let serialized = serde_json::to_string(&value).unwrap(); + let value = serde_json::to_value(&schema)?; + let serialized = serde_json::to_string(&value)?; assert_eq!( r#"{"aliases":["space.b","x.y","c"],"name":"a","namespace":"space","symbols":["symbol1","symbol2"],"type":"enum"}"#, &serialized ); - assert_eq!(schema, Schema::parse_str(&serialized).unwrap()); + assert_eq!(schema, Schema::parse_str(&serialized)?); + + Ok(()) } #[test] - fn avro_3518_serialize_aliases_fixed() { + fn avro_3518_serialize_aliases_fixed() -> TestResult { let schema = Schema::parse_str( r#" { @@ -3931,20 +4337,21 @@ mod tests { "size" : 12 } "#, - ) - .unwrap(); + )?; - let value = serde_json::to_value(&schema).unwrap(); - let serialized = serde_json::to_string(&value).unwrap(); + let value = serde_json::to_value(&schema)?; + let serialized = serde_json::to_string(&value)?; assert_eq!( r#"{"aliases":["space.b","x.y","c"],"name":"a","namespace":"space","size":12,"type":"fixed"}"#, &serialized ); - assert_eq!(schema, Schema::parse_str(&serialized).unwrap()); + assert_eq!(schema, Schema::parse_str(&serialized)?); + + Ok(()) } #[test] - fn avro_3130_parse_anonymous_union_type() { + fn avro_3130_parse_anonymous_union_type() -> TestResult { let schema_str = r#" { "type": "record", @@ -3964,10 +4371,10 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema_str).unwrap(); + let schema = Schema::parse_str(schema_str)?; - if let Schema::Record { name, fields, .. } = schema { - assert_eq!(name, Name::new("AccountEvent").unwrap()); + if let Schema::Record(RecordSchema { name, fields, .. }) = schema { + assert_eq!(name, Name::new("AccountEvent")?); let field = &fields[0]; assert_eq!(&field.name, "NullableLongArray"); @@ -3990,10 +4397,12 @@ mod tests { } else { panic!("Expected Schema::Record"); } + + Ok(()) } #[test] - fn avro_custom_attributes_schema_without_attributes() { + fn avro_custom_attributes_schema_without_attributes() -> TestResult { let schemata_str = [ r#" { @@ -4021,9 +4430,11 @@ mod tests { "#, ]; for schema_str in schemata_str.iter() { - let schema = Schema::parse_str(schema_str).unwrap(); + let schema = Schema::parse_str(schema_str)?; assert_eq!(schema.custom_attributes(), Some(&Default::default())); } + + Ok(()) } const CUSTOM_ATTRS_SUFFIX: &str = r#" @@ -4037,7 +4448,7 @@ mod tests { "#; #[test] - fn avro_3609_custom_attributes_schema_with_attributes() { + fn avro_3609_custom_attributes_schema_with_attributes() -> TestResult { let schemata_str = [ r#" { @@ -4077,14 +4488,15 @@ mod tests { .to_owned() .replace("{{{}}}", CUSTOM_ATTRS_SUFFIX) .as_str(), - ) - .unwrap(); + )?; assert_eq!( schema.custom_attributes(), Some(&expected_custom_attibutes()) ); } + + Ok(()) } fn expected_custom_attibutes() -> BTreeMap { @@ -4103,7 +4515,7 @@ mod tests { } #[test] - fn avro_3609_custom_attributes_record_field_without_attributes() { + fn avro_3609_custom_attributes_record_field_without_attributes() -> TestResult { let schema_str = String::from( r#" { @@ -4121,12 +4533,11 @@ mod tests { "#, ); - let schema = - Schema::parse_str(schema_str.replace("{{{}}}", CUSTOM_ATTRS_SUFFIX).as_str()).unwrap(); + let schema = Schema::parse_str(schema_str.replace("{{{}}}", CUSTOM_ATTRS_SUFFIX).as_str())?; match schema { - Schema::Record { name, fields, .. } => { - assert_eq!(name, Name::new("Rec").unwrap()); + Schema::Record(RecordSchema { name, fields, .. }) => { + assert_eq!(name, Name::new("Rec")?); assert_eq!(fields.len(), 1); let field = &fields[0]; assert_eq!(&field.name, "field_one"); @@ -4134,10 +4545,12 @@ mod tests { } _ => panic!("Expected Schema::Record"), } + + Ok(()) } #[test] - fn avro_3625_null_is_first() { + fn avro_3625_null_is_first() -> TestResult { let schema_str = String::from( r#" { @@ -4150,11 +4563,11 @@ mod tests { "#, ); - let schema = Schema::parse_str(&schema_str).unwrap(); + let schema = Schema::parse_str(&schema_str)?; match schema { - Schema::Record { name, fields, .. } => { - assert_eq!(name, Name::new("union_schema_test").unwrap()); + Schema::Record(RecordSchema { name, fields, .. }) => { + assert_eq!(name, Name::new("union_schema_test")?); assert_eq!(fields.len(), 1); let field = &fields[0]; assert_eq!(&field.name, "a"); @@ -4171,10 +4584,12 @@ mod tests { } _ => panic!("Expected Schema::Record"), } + + Ok(()) } #[test] - fn avro_3625_null_is_last() { + fn avro_3625_null_is_last() -> TestResult { let schema_str = String::from( r#" { @@ -4187,11 +4602,11 @@ mod tests { "#, ); - let schema = Schema::parse_str(&schema_str).unwrap(); + let schema = Schema::parse_str(&schema_str)?; match schema { - Schema::Record { name, fields, .. } => { - assert_eq!(name, Name::new("union_schema_test").unwrap()); + Schema::Record(RecordSchema { name, fields, .. }) => { + assert_eq!(name, Name::new("union_schema_test")?); assert_eq!(fields.len(), 1); let field = &fields[0]; assert_eq!(&field.name, "a"); @@ -4207,10 +4622,12 @@ mod tests { } _ => panic!("Expected Schema::Record"), } + + Ok(()) } #[test] - fn avro_3625_null_is_the_middle() { + fn avro_3625_null_is_the_middle() -> TestResult { let schema_str = String::from( r#" { @@ -4223,11 +4640,11 @@ mod tests { "#, ); - let schema = Schema::parse_str(&schema_str).unwrap(); + let schema = Schema::parse_str(&schema_str)?; match schema { - Schema::Record { name, fields, .. } => { - assert_eq!(name, Name::new("union_schema_test").unwrap()); + Schema::Record(RecordSchema { name, fields, .. }) => { + assert_eq!(name, Name::new("union_schema_test")?); assert_eq!(fields.len(), 1); let field = &fields[0]; assert_eq!(&field.name, "a"); @@ -4244,5 +4661,1428 @@ mod tests { } _ => panic!("Expected Schema::Record"), } + + Ok(()) + } + + #[test] + fn avro_3709_parsing_of_record_field_aliases() -> TestResult { + let schema = r#" + { + "name": "rec", + "type": "record", + "fields": [ + { + "name": "num", + "type": "int", + "aliases": ["num1", "num2"] + } + ] + } + "#; + + let schema = Schema::parse_str(schema)?; + if let Schema::Record(RecordSchema { fields, .. }) = schema { + let num_field = &fields[0]; + assert_eq!(num_field.name, "num"); + assert_eq!(num_field.aliases, Some(vec!("num1".into(), "num2".into()))); + } else { + panic!("Expected a record schema!"); + } + + Ok(()) + } + + #[test] + fn avro_3735_parse_enum_namespace() -> TestResult { + let schema = r#" + { + "type": "record", + "name": "Foo", + "namespace": "name.space", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1" + ] + } + }, + { + "name": "barUse", + "type": "Bar" + } + ] + } + "#; + + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + } + + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barInit")] + pub bar_init: Bar, + #[serde(rename = "barUse")] + pub bar_use: Bar, + } + + let schema = Schema::parse_str(schema)?; + + let foo = Foo { + bar_init: Bar::Bar0, + bar_use: Bar::Bar1, + }; + + let avro_value = crate::to_value(foo)?; + assert!(avro_value.validate(&schema)); + + let mut writer = crate::Writer::new(&schema, Vec::new()); + + // schema validation happens here + writer.append(avro_value)?; + + Ok(()) + } + + #[test] + fn avro_3755_deserialize() -> TestResult { + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barInit")] + pub bar_init: Bar, + #[serde(rename = "barUse")] + pub bar_use: Bar, + } + + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1" + ] + } + }, + { + "name": "barUse", + "type": "Bar" + } + ] + }"#; + + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "name.space", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ] + } + }, + { + "name": "barUse", + "type": "Bar" + } + ] + }"#; + + let writer_schema = Schema::parse_str(writer_schema)?; + let foo = Foo { + bar_init: Bar::Bar0, + bar_use: Bar::Bar1, + }; + let avro_value = crate::to_value(foo)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 2); + assert_eq!(fields[0].0, "barInit"); + assert_eq!(fields[0].1, types::Value::Enum(0, "bar0".to_string())); + assert_eq!(fields[1].0, "barUse"); + assert_eq!(fields[1].1, types::Value::Enum(1, "bar1".to_string())); + } + _ => panic!("Expected Value::Record"), + } + + Ok(()) + } + + #[test] + fn test_avro_3780_decimal_schema_type_with_fixed() -> TestResult { + let schema = json!( + { + "type": "record", + "name": "recordWithDecimal", + "fields": [ + { + "name": "decimal", + "type": "fixed", + "name": "nestedFixed", + "size": 8, + "logicalType": "decimal", + "precision": 4 + } + ] + }); + + let parse_result = Schema::parse(&schema); + assert!( + parse_result.is_ok(), + "parse result must be ok, got: {:?}", + parse_result + ); + + Ok(()) + } + + #[test] + fn test_avro_3772_enum_default_wrong_type() -> TestResult { + let schema = r#" + { + "type": "record", + "name": "test", + "fields": [ + {"name": "a", "type": "long", "default": 42}, + {"name": "b", "type": "string"}, + { + "name": "c", + "type": { + "type": "enum", + "name": "suit", + "symbols": ["diamonds", "spades", "clubs", "hearts"], + "default": 123 + } + } + ] + } + "#; + + match Schema::parse_str(schema) { + Err(err) => { + assert_eq!( + err.to_string(), + "Default value for enum must be a string! Got: 123" + ); + } + _ => panic!("Expected an error"), + } + Ok(()) + } + + #[test] + fn test_avro_3812_handle_null_namespace_properly() -> TestResult { + let schema_str = r#" + { + "namespace": "", + "type": "record", + "name": "my_schema", + "fields": [ + { + "name": "a", + "type": { + "type": "enum", + "name": "my_enum", + "namespace": "", + "symbols": ["a", "b"] + } + }, { + "name": "b", + "type": { + "type": "fixed", + "name": "my_fixed", + "namespace": "", + "size": 10 + } + } + ] + } + "#; + + let expected = r#"{"name":"my_schema","type":"record","fields":[{"name":"a","type":{"name":"my_enum","type":"enum","symbols":["a","b"]}},{"name":"b","type":{"name":"my_fixed","type":"fixed","size":10}}]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + let name = Name::new("my_name")?; + let fullname = name.fullname(Some("".to_string())); + assert_eq!(fullname, "my_name"); + let qname = name.fully_qualified_name(&Some("".to_string())).to_string(); + assert_eq!(qname, "my_name"); + + Ok(()) + } + + #[test] + fn test_avro_3818_inherit_enclosing_namespace() -> TestResult { + // Enclosing namespace is specified but inner namespaces are not. + let schema_str = r#" + { + "namespace": "my_ns", + "type": "record", + "name": "my_schema", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a"] + } + }, { + "name": "f2", + "type": { + "name": "fixed1", + "type": "fixed", + "size": 1 + } + } + ] + } + "#; + + let expected = r#"{"name":"my_ns.my_schema","type":"record","fields":[{"name":"f1","type":{"name":"my_ns.enum1","type":"enum","symbols":["a"]}},{"name":"f2","type":{"name":"my_ns.fixed1","type":"fixed","size":1}}]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + // Enclosing namespace and inner namespaces are specified + // but inner namespaces are "" + let schema_str = r#" + { + "namespace": "my_ns", + "type": "record", + "name": "my_schema", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "namespace": "", + "symbols": ["a"] + } + }, { + "name": "f2", + "type": { + "name": "fixed1", + "type": "fixed", + "namespace": "", + "size": 1 + } + } + ] + } + "#; + + let expected = r#"{"name":"my_ns.my_schema","type":"record","fields":[{"name":"f1","type":{"name":"enum1","type":"enum","symbols":["a"]}},{"name":"f2","type":{"name":"fixed1","type":"fixed","size":1}}]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + // Enclosing namespace is "" and inner non-empty namespaces are specified. + let schema_str = r#" + { + "namespace": "", + "type": "record", + "name": "my_schema", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "namespace": "f1.ns", + "symbols": ["a"] + } + }, { + "name": "f2", + "type": { + "name": "f2.ns.fixed1", + "type": "fixed", + "size": 1 + } + } + ] + } + "#; + + let expected = r#"{"name":"my_schema","type":"record","fields":[{"name":"f1","type":{"name":"f1.ns.enum1","type":"enum","symbols":["a"]}},{"name":"f2","type":{"name":"f2.ns.fixed1","type":"fixed","size":1}}]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + // Nested complex types with non-empty enclosing namespace. + let schema_str = r#" + { + "type": "record", + "name": "my_ns.my_schema", + "fields": [ + { + "name": "f1", + "type": { + "name": "inner_record1", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a"] + } + } + ] + } + }, { + "name": "f2", + "type": { + "name": "inner_record2", + "type": "record", + "namespace": "inner_ns", + "fields": [ + { + "name": "f2_1", + "type": { + "name": "enum2", + "type": "enum", + "symbols": ["a"] + } + } + ] + } + } + ] + } + "#; + + let expected = r#"{"name":"my_ns.my_schema","type":"record","fields":[{"name":"f1","type":{"name":"my_ns.inner_record1","type":"record","fields":[{"name":"f1_1","type":{"name":"my_ns.enum1","type":"enum","symbols":["a"]}}]}},{"name":"f2","type":{"name":"inner_ns.inner_record2","type":"record","fields":[{"name":"f2_1","type":{"name":"inner_ns.enum2","type":"enum","symbols":["a"]}}]}}]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + Ok(()) + } + + #[test] + fn test_avro_3820_deny_invalid_field_names() -> TestResult { + let schema_str = r#" + { + "name": "my_record", + "type": "record", + "fields": [ + { + "name": "f1.x", + "type": { + "name": "my_enum", + "type": "enum", + "symbols": ["a"] + } + }, { + "name": "f2", + "type": { + "name": "my_fixed", + "type": "fixed", + "size": 1 + } + } + ] + } + "#; + + match Schema::parse_str(schema_str) { + Err(Error::FieldName(x)) if x == "f1.x" => Ok(()), + other => Err(format!("Expected Error::FieldName, got {other:?}").into()), + } + } + + #[test] + fn test_avro_3827_disallow_duplicate_field_names() -> TestResult { + let schema_str = r#" + { + "name": "my_schema", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "a", + "type": "record", + "fields": [] + } + }, { + "name": "f1", + "type": { + "name": "b", + "type": "record", + "fields": [] + } + } + ] + } + "#; + + match Schema::parse_str(schema_str) { + Err(Error::FieldNameDuplicate(_)) => (), + other => { + return Err(format!("Expected Error::FieldNameDuplicate, got {other:?}").into()) + } + }; + + let schema_str = r#" + { + "name": "my_schema", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "a", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "b", + "type": "record", + "fields": [] + } + } + ] + } + } + ] + } + "#; + + let expected = r#"{"name":"my_schema","type":"record","fields":[{"name":"f1","type":{"name":"a","type":"record","fields":[{"name":"f1","type":{"name":"b","type":"record","fields":[]}}]}}]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + Ok(()) + } + + #[test] + fn test_avro_3830_null_namespace_in_fully_qualified_names() -> TestResult { + // Check whether all the named types don't refer to the namespace field + // if their name starts with a dot. + let schema_str = r#" + { + "name": ".record1", + "namespace": "ns1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": ".enum1", + "namespace": "ns2", + "type": "enum", + "symbols": ["a"] + } + }, { + "name": "f2", + "type": { + "name": ".fxed1", + "namespace": "ns3", + "type": "fixed", + "size": 1 + } + } + ] + } + "#; + + let expected = r#"{"name":"record1","type":"record","fields":[{"name":"f1","type":{"name":"enum1","type":"enum","symbols":["a"]}},{"name":"f2","type":{"name":"fxed1","type":"fixed","size":1}}]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + // Check whether inner types don't inherit ns1. + let schema_str = r#" + { + "name": ".record1", + "namespace": "ns1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a"] + } + }, { + "name": "f2", + "type": { + "name": "fxed1", + "type": "fixed", + "size": 1 + } + } + ] + } + "#; + + let expected = r#"{"name":"record1","type":"record","fields":[{"name":"f1","type":{"name":"enum1","type":"enum","symbols":["a"]}},{"name":"f2","type":{"name":"fxed1","type":"fixed","size":1}}]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + let name = Name::new(".my_name")?; + let fullname = name.fullname(None); + assert_eq!(fullname, "my_name"); + let qname = name.fully_qualified_name(&None).to_string(); + assert_eq!(qname, "my_name"); + + Ok(()) + } + + #[test] + fn test_avro_3814_schema_resolution_failure() -> TestResult { + // Define a reader schema: a nested record with an optional field. + let reader_schema = json!( + { + "type": "record", + "name": "MyOuterRecord", + "fields": [ + { + "name": "inner_record", + "type": [ + "null", + { + "type": "record", + "name": "MyRecord", + "fields": [ + {"name": "a", "type": "string"} + ] + } + ], + "default": null + } + ] + } + ); + + // Define a writer schema: a nested record with an optional field, which + // may optionally contain an enum. + let writer_schema = json!( + { + "type": "record", + "name": "MyOuterRecord", + "fields": [ + { + "name": "inner_record", + "type": [ + "null", + { + "type": "record", + "name": "MyRecord", + "fields": [ + {"name": "a", "type": "string"}, + { + "name": "b", + "type": [ + "null", + { + "type": "enum", + "name": "MyEnum", + "symbols": ["A", "B", "C"], + "default": "C" + } + ], + "default": null + }, + ] + } + ] + } + ], + "default": null + } + ); + + // Use different structs to represent the "Reader" and the "Writer" + // to mimic two different versions of a producer & consumer application. + #[derive(Serialize, Deserialize, Debug)] + struct MyInnerRecordReader { + a: String, + } + + #[derive(Serialize, Deserialize, Debug)] + struct MyRecordReader { + inner_record: Option, + } + + #[derive(Serialize, Deserialize, Debug)] + enum MyEnum { + A, + B, + C, + } + + #[derive(Serialize, Deserialize, Debug)] + struct MyInnerRecordWriter { + a: String, + b: Option, + } + + #[derive(Serialize, Deserialize, Debug)] + struct MyRecordWriter { + inner_record: Option, + } + + let s = MyRecordWriter { + inner_record: Some(MyInnerRecordWriter { + a: "foo".to_string(), + b: None, + }), + }; + + // Serialize using the writer schema. + let writer_schema = Schema::parse(&writer_schema)?; + let avro_value = crate::to_value(s)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + + // Now, attempt to deserialize using the reader schema. + let reader_schema = Schema::parse(&reader_schema)?; + let mut x = &datum[..]; + + // Deserialization should succeed and we should be able to resolve the schema. + let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + assert!(deser_value.validate(&reader_schema)); + + // Verify that we can read a field from the record. + let d: MyRecordReader = crate::from_value(&deser_value)?; + assert_eq!(d.inner_record.unwrap().a, "foo".to_string()); + Ok(()) + } + + #[test] + fn test_avro_3837_disallow_invalid_namespace() -> TestResult { + // Valid namespace #1 (Single name portion) + let schema_str = r#" + { + "name": "record1", + "namespace": "ns1", + "type": "record", + "fields": [] + } + "#; + + let expected = r#"{"name":"ns1.record1","type":"record","fields":[]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + // Valid namespace #2 (multiple name portions). + let schema_str = r#" + { + "name": "enum1", + "namespace": "ns1.foo.bar", + "type": "enum", + "symbols": ["a"] + } + "#; + + let expected = r#"{"name":"ns1.foo.bar.enum1","type":"enum","symbols":["a"]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + // Invalid namespace #1 (a name portion starts with dot) + let schema_str = r#" + { + "name": "fixed1", + "namespace": ".ns1.a.b", + "type": "fixed", + "size": 1 + } + "#; + + match Schema::parse_str(schema_str) { + Err(Error::InvalidNamespace(_, _)) => (), + other => return Err(format!("Expected Error::InvalidNamespace, got {other:?}").into()), + }; + + // Invalid namespace #2 (invalid character in a name portion) + let schema_str = r#" + { + "name": "record1", + "namespace": "ns1.a*b.c", + "type": "record", + "fields": [] + } + "#; + + match Schema::parse_str(schema_str) { + Err(Error::InvalidNamespace(_, _)) => (), + other => return Err(format!("Expected Error::InvalidNamespace, got {other:?}").into()), + }; + + // Invalid namespace #3 (a name portion starts with a digit) + let schema_str = r#" + { + "name": "fixed1", + "namespace": "ns1.1a.b", + "type": "fixed", + "size": 1 + } + "#; + + match Schema::parse_str(schema_str) { + Err(Error::InvalidNamespace(_, _)) => (), + other => return Err(format!("Expected Error::InvalidNamespace, got {other:?}").into()), + }; + + // Invalid namespace #4 (a name portion is missing - two dots in a row) + let schema_str = r#" + { + "name": "fixed1", + "namespace": "ns1..a", + "type": "fixed", + "size": 1 + } + "#; + + match Schema::parse_str(schema_str) { + Err(Error::InvalidNamespace(_, _)) => (), + other => return Err(format!("Expected Error::InvalidNamespace, got {other:?}").into()), + }; + + // Invalid namespace #5 (a name portion is missing - ends with a dot) + let schema_str = r#" + { + "name": "fixed1", + "namespace": "ns1.a.", + "type": "fixed", + "size": 1 + } + "#; + + match Schema::parse_str(schema_str) { + Err(Error::InvalidNamespace(_, _)) => (), + other => return Err(format!("Expected Error::InvalidNamespace, got {other:?}").into()), + }; + + Ok(()) + } + + #[test] + fn avro_3649_default_notintfirst() { + let schema_str = String::from( + r#" + { + "type": "record", + "name": "union_schema_test", + "fields": [ + {"name": "a", "type": ["string", "int"], "default": 123} + ] + } + "#, + ); + + let schema = Schema::parse_str(&schema_str).unwrap(); + + match schema { + Schema::Record(RecordSchema { name, fields, .. }) => { + assert_eq!(name, Name::new("union_schema_test").unwrap()); + assert_eq!(fields.len(), 1); + let field = &fields[0]; + assert_eq!(&field.name, "a"); + assert_eq!(&field.default, &Some(json!(123))); + match &field.schema { + Schema::Union(union) => { + assert_eq!(union.variants().len(), 2); + assert_eq!(union.variants()[0], Schema::String); + assert_eq!(union.variants()[1], Schema::Int); + } + _ => panic!("Expected Schema::Union"), + } + } + _ => panic!("Expected Schema::Record"), + } + } + + #[test] + fn test_avro_3851_validate_default_value_of_simple_record_field() -> TestResult { + let schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "int", + "default": "invalid" + } + ] + } + "#; + let expected = Error::GetDefaultRecordField( + "f1".to_string(), + "ns.record1".to_string(), + r#""int""#.to_string(), + ) + .to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + Ok(()) + } + + #[test] + fn test_avro_3851_validate_default_value_of_nested_record_field() -> TestResult { + let schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + }, + "default": "invalid" + } + ] + } + "#; + let expected = Error::GetDefaultRecordField( + "f1".to_string(), + "ns.record1".to_string(), + r#"{"name":"ns.record2","type":"record","fields":[{"name":"f1_1","type":"int"}]}"# + .to_string(), + ) + .to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + Ok(()) + } + + #[test] + fn test_avro_3851_validate_default_value_of_enum_record_field() -> TestResult { + let schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a", "b", "c"] + }, + "default": "invalid" + } + ] + } + "#; + let expected = Error::GetDefaultRecordField( + "f1".to_string(), + "ns.record1".to_string(), + r#"{"name":"ns.enum1","type":"enum","symbols":["a","b","c"]}"#.to_string(), + ) + .to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + Ok(()) + } + + #[test] + fn test_avro_3851_validate_default_value_of_fixed_record_field() -> TestResult { + let schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "fixed1", + "type": "fixed", + "size": 3 + }, + "default": 100 + } + ] + } + "#; + let expected = Error::GetDefaultRecordField( + "f1".to_string(), + "ns.record1".to_string(), + r#"{"name":"ns.fixed1","type":"fixed","size":3}"#.to_string(), + ) + .to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + Ok(()) + } + + #[test] + fn test_avro_3851_validate_default_value_of_array_record_field() -> TestResult { + let schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "array", + "items": "int", + "default": "invalid" + } + ] + } + "#; + let expected = Error::GetDefaultRecordField( + "f1".to_string(), + "ns.record1".to_string(), + r#"{"type":"array","items":"int"}"#.to_string(), + ) + .to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + Ok(()) + } + + #[test] + fn test_avro_3851_validate_default_value_of_map_record_field() -> TestResult { + let schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "map", + "values": "string", + "default": "invalid" + } + ] + } + "#; + let expected = Error::GetDefaultRecordField( + "f1".to_string(), + "ns.record1".to_string(), + r#"{"type":"map","values":"string"}"#.to_string(), + ) + .to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + Ok(()) + } + + #[test] + fn test_avro_3851_validate_default_value_of_ref_record_field() -> TestResult { + let schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + }, { + "name": "f2", + "type": "ns.record2", + "default": { "f1_1": true } + } + ] + } + "#; + let expected = Error::GetDefaultRecordField( + "f2".to_string(), + "ns.record1".to_string(), + r#""ns.record2""#.to_string(), + ) + .to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + Ok(()) + } + + #[test] + fn test_avro_3851_validate_default_value_of_enum() -> TestResult { + let schema_str = r#" + { + "name": "enum1", + "namespace": "ns", + "type": "enum", + "symbols": ["a", "b", "c"], + "default": 100 + } + "#; + let expected = Error::EnumDefaultWrongType(100.into()).to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + let schema_str = r#" + { + "name": "enum1", + "namespace": "ns", + "type": "enum", + "symbols": ["a", "b", "c"], + "default": "d" + } + "#; + let expected = Error::GetEnumDefault { + symbol: "d".to_string(), + symbols: vec!["a".to_string(), "b".to_string(), "c".to_string()], + } + .to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + Ok(()) + } + + #[test] + fn test_avro_3862_get_aliases() -> TestResult { + // Test for Record + let schema_str = r#" + { + "name": "record1", + "namespace": "ns1", + "type": "record", + "aliases": ["r1", "ns2.r2"], + "fields": [ + { "name": "f1", "type": "int" }, + { "name": "f2", "type": "string" } + ] + } + "#; + let schema = Schema::parse_str(schema_str)?; + let expected = vec![Alias::new("ns1.r1")?, Alias::new("ns2.r2")?]; + match schema.aliases() { + Some(aliases) => assert_eq!(aliases, &expected), + None => panic!("Expected Some({:?}), got None", expected), + } + + let schema_str = r#" + { + "name": "record1", + "namespace": "ns1", + "type": "record", + "fields": [ + { "name": "f1", "type": "int" }, + { "name": "f2", "type": "string" } + ] + } + "#; + let schema = Schema::parse_str(schema_str)?; + match schema.aliases() { + None => (), + some => panic!("Expected None, got {some:?}"), + } + + // Test for Enum + let schema_str = r#" + { + "name": "enum1", + "namespace": "ns1", + "type": "enum", + "aliases": ["en1", "ns2.en2"], + "symbols": ["a", "b", "c"] + } + "#; + let schema = Schema::parse_str(schema_str)?; + let expected = vec![Alias::new("ns1.en1")?, Alias::new("ns2.en2")?]; + match schema.aliases() { + Some(aliases) => assert_eq!(aliases, &expected), + None => panic!("Expected Some({:?}), got None", expected), + } + + let schema_str = r#" + { + "name": "enum1", + "namespace": "ns1", + "type": "enum", + "symbols": ["a", "b", "c"] + } + "#; + let schema = Schema::parse_str(schema_str)?; + match schema.aliases() { + None => (), + some => panic!("Expected None, got {some:?}"), + } + + // Test for Fixed + let schema_str = r#" + { + "name": "fixed1", + "namespace": "ns1", + "type": "fixed", + "aliases": ["fx1", "ns2.fx2"], + "size": 10 + } + "#; + let schema = Schema::parse_str(schema_str)?; + let expected = vec![Alias::new("ns1.fx1")?, Alias::new("ns2.fx2")?]; + match schema.aliases() { + Some(aliases) => assert_eq!(aliases, &expected), + None => panic!("Expected Some({:?}), got None", expected), + } + + let schema_str = r#" + { + "name": "fixed1", + "namespace": "ns1", + "type": "fixed", + "size": 10 + } + "#; + let schema = Schema::parse_str(schema_str)?; + match schema.aliases() { + None => (), + some => panic!("Expected None, got {some:?}"), + } + + // Test for non-named type + let schema = Schema::Int; + match schema.aliases() { + None => (), + some => panic!("Expected None, got {some:?}"), + } + + Ok(()) + } + + #[test] + fn test_avro_3862_get_doc() -> TestResult { + // Test for Record + let schema_str = r#" + { + "name": "record1", + "type": "record", + "doc": "Record Document", + "fields": [ + { "name": "f1", "type": "int" }, + { "name": "f2", "type": "string" } + ] + } + "#; + let schema = Schema::parse_str(schema_str)?; + let expected = "Record Document"; + match schema.doc() { + Some(doc) => assert_eq!(doc, expected), + None => panic!("Expected Some({:?}), got None", expected), + } + + let schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { "name": "f1", "type": "int" }, + { "name": "f2", "type": "string" } + ] + } + "#; + let schema = Schema::parse_str(schema_str)?; + match schema.doc() { + None => (), + some => panic!("Expected None, got {some:?}"), + } + + // Test for Enum + let schema_str = r#" + { + "name": "enum1", + "type": "enum", + "doc": "Enum Document", + "symbols": ["a", "b", "c"] + } + "#; + let schema = Schema::parse_str(schema_str)?; + let expected = "Enum Document"; + match schema.doc() { + Some(doc) => assert_eq!(doc, expected), + None => panic!("Expected Some({:?}), got None", expected), + } + + let schema_str = r#" + { + "name": "enum1", + "type": "enum", + "symbols": ["a", "b", "c"] + } + "#; + let schema = Schema::parse_str(schema_str)?; + match schema.doc() { + None => (), + some => panic!("Expected None, got {some:?}"), + } + + // Test for Fixed + let schema_str = r#" + { + "name": "fixed1", + "type": "fixed", + "doc": "Fixed Document", + "size": 10 + } + "#; + let schema = Schema::parse_str(schema_str)?; + let expected = "Fixed Document"; + match schema.doc() { + Some(doc) => assert_eq!(doc, expected), + None => panic!("Expected Some({:?}), got None", expected), + } + + let schema_str = r#" + { + "name": "fixed1", + "type": "fixed", + "size": 10 + } + "#; + let schema = Schema::parse_str(schema_str)?; + match schema.doc() { + None => (), + some => panic!("Expected None, got {some:?}"), + } + + // Test for non-named type + let schema = Schema::Int; + match schema.doc() { + None => (), + some => panic!("Expected None, got {some:?}"), + } + + Ok(()) } } diff --git a/lang/rust/avro/src/schema_compatibility.rs b/lang/rust/avro/src/schema_compatibility.rs index 843a139df42..a15c18407ef 100644 --- a/lang/rust/avro/src/schema_compatibility.rs +++ b/lang/rust/avro/src/schema_compatibility.rs @@ -16,7 +16,7 @@ // under the License. //! Logic for checking schema compatibility -use crate::schema::{Schema, SchemaKind}; +use crate::schema::{EnumSchema, FixedSchema, RecordSchema, Schema, SchemaKind}; use std::{ collections::{hash_map::DefaultHasher, HashSet}, hash::Hasher, @@ -88,13 +88,13 @@ impl Checker { SchemaKind::Union => self.match_union_schemas(writers_schema, readers_schema), SchemaKind::Enum => { // reader's symbols must contain all writer's symbols - if let Schema::Enum { + if let Schema::Enum(EnumSchema { symbols: w_symbols, .. - } = writers_schema + }) = writers_schema { - if let Schema::Enum { + if let Schema::Enum(EnumSchema { symbols: r_symbols, .. - } = readers_schema + }) = readers_schema { return !w_symbols.iter().any(|e| !r_symbols.contains(e)); } @@ -121,15 +121,15 @@ impl Checker { return false; } - if let Schema::Record { + if let Schema::Record(RecordSchema { fields: w_fields, lookup: w_lookup, .. - } = writers_schema + }) = writers_schema { - if let Schema::Record { + if let Schema::Record(RecordSchema { fields: r_fields, .. - } = readers_schema + }) = readers_schema { for field in r_fields.iter() { if let Some(pos) = w_lookup.get(&field.name) { @@ -219,8 +219,8 @@ impl SchemaCompatibility { match r_type { SchemaKind::Record => { - if let Schema::Record { name: w_name, .. } = writers_schema { - if let Schema::Record { name: r_name, .. } = readers_schema { + if let Schema::Record(RecordSchema { name: w_name, .. }) = writers_schema { + if let Schema::Record(RecordSchema { name: r_name, .. }) = readers_schema { return w_name.fullname(None) == r_name.fullname(None); } else { unreachable!("readers_schema should have been Schema::Record") @@ -230,21 +230,21 @@ impl SchemaCompatibility { } } SchemaKind::Fixed => { - if let Schema::Fixed { + if let Schema::Fixed(FixedSchema { name: w_name, aliases: _, doc: _w_doc, size: w_size, attributes: _, - } = writers_schema + }) = writers_schema { - if let Schema::Fixed { + if let Schema::Fixed(FixedSchema { name: r_name, aliases: _, doc: _r_doc, size: r_size, attributes: _, - } = readers_schema + }) = readers_schema { return w_name.fullname(None) == r_name.fullname(None) && w_size == r_size; @@ -256,8 +256,8 @@ impl SchemaCompatibility { } } SchemaKind::Enum => { - if let Schema::Enum { name: w_name, .. } = writers_schema { - if let Schema::Enum { name: r_name, .. } = readers_schema { + if let Schema::Enum(EnumSchema { name: w_name, .. }) = writers_schema { + if let Schema::Enum(EnumSchema { name: r_name, .. }) = readers_schema { return w_name.fullname(None) == r_name.fullname(None); } else { unreachable!("readers_schema should have been Schema::Enum") @@ -293,7 +293,7 @@ impl SchemaCompatibility { } if w_type == SchemaKind::Int - && vec![SchemaKind::Long, SchemaKind::Float, SchemaKind::Double] + && [SchemaKind::Long, SchemaKind::Float, SchemaKind::Double] .iter() .any(|&t| t == r_type) { @@ -301,7 +301,7 @@ impl SchemaCompatibility { } if w_type == SchemaKind::Long - && vec![SchemaKind::Float, SchemaKind::Double] + && [SchemaKind::Float, SchemaKind::Double] .iter() .any(|&t| t == r_type) { @@ -327,6 +327,11 @@ impl SchemaCompatibility { #[cfg(test)] mod tests { use super::*; + use crate::{ + types::{Record, Value}, + Codec, Reader, Writer, + }; + use apache_avro_test_helper::TestResult; fn int_array_schema() -> Schema { Schema::parse_str(r#"{"type":"array", "items":"int"}"#).unwrap() @@ -437,7 +442,7 @@ mod tests { .map(|s| s.canonical_form()) .collect::>() .join(","); - Schema::parse_str(&format!("[{}]", schema_string)).unwrap() + Schema::parse_str(&format!("[{schema_string}]")).unwrap() } fn empty_union_schema() -> Schema { @@ -589,15 +594,14 @@ mod tests { } #[test] - fn test_missing_field() { + fn test_missing_field() -> TestResult { let reader_schema = Schema::parse_str( r#" {"type":"record", "name":"Record", "fields":[ {"name":"oldfield1", "type":"int"} ]} "#, - ) - .unwrap(); + )?; assert!(SchemaCompatibility::can_read( &writer_schema(), &reader_schema, @@ -606,18 +610,19 @@ mod tests { &reader_schema, &writer_schema() )); + + Ok(()) } #[test] - fn test_missing_second_field() { + fn test_missing_second_field() -> TestResult { let reader_schema = Schema::parse_str( r#" {"type":"record", "name":"Record", "fields":[ {"name":"oldfield2", "type":"string"} ]} "#, - ) - .unwrap(); + )?; assert!(SchemaCompatibility::can_read( &writer_schema(), &reader_schema @@ -626,10 +631,12 @@ mod tests { &reader_schema, &writer_schema() )); + + Ok(()) } #[test] - fn test_all_fields() { + fn test_all_fields() -> TestResult { let reader_schema = Schema::parse_str( r#" {"type":"record", "name":"Record", "fields":[ @@ -637,8 +644,7 @@ mod tests { {"name":"oldfield2", "type":"string"} ]} "#, - ) - .unwrap(); + )?; assert!(SchemaCompatibility::can_read( &writer_schema(), &reader_schema @@ -647,10 +653,12 @@ mod tests { &reader_schema, &writer_schema() )); + + Ok(()) } #[test] - fn test_new_field_with_default() { + fn test_new_field_with_default() -> TestResult { let reader_schema = Schema::parse_str( r#" {"type":"record", "name":"Record", "fields":[ @@ -658,8 +666,7 @@ mod tests { {"name":"newfield1", "type":"int", "default":42} ]} "#, - ) - .unwrap(); + )?; assert!(SchemaCompatibility::can_read( &writer_schema(), &reader_schema @@ -668,10 +675,12 @@ mod tests { &reader_schema, &writer_schema() )); + + Ok(()) } #[test] - fn test_new_field() { + fn test_new_field() -> TestResult { let reader_schema = Schema::parse_str( r#" {"type":"record", "name":"Record", "fields":[ @@ -679,8 +688,7 @@ mod tests { {"name":"newfield1", "type":"int"} ]} "#, - ) - .unwrap(); + )?; assert!(!SchemaCompatibility::can_read( &writer_schema(), &reader_schema @@ -689,6 +697,8 @@ mod tests { &reader_schema, &writer_schema() )); + + Ok(()) } #[test] @@ -720,7 +730,7 @@ mod tests { } #[test] - fn test_union_reader_writer_subset_incompatiblity() { + fn test_union_reader_writer_subset_incompatibility() { // reader union schema must contain all writer union branches let union_writer = union_schema(vec![Schema::Int, Schema::String]); let union_reader = union_schema(vec![Schema::String]); @@ -730,15 +740,14 @@ mod tests { } #[test] - fn test_incompatible_record_field() { + fn test_incompatible_record_field() -> TestResult { let string_schema = Schema::parse_str( r#" {"type":"record", "name":"MyRecord", "namespace":"ns", "fields": [ {"name":"field1", "type":"string"} ]} "#, - ) - .unwrap(); + )?; let int_schema = Schema::parse_str( r#" @@ -746,25 +755,26 @@ mod tests { {"name":"field1", "type":"int"} ]} "#, - ) - .unwrap(); + )?; assert!(!SchemaCompatibility::can_read(&string_schema, &int_schema)); + + Ok(()) } #[test] - fn test_enum_symbols() { + fn test_enum_symbols() -> TestResult { let enum_schema1 = Schema::parse_str( r#" {"type":"enum", "name":"MyEnum", "symbols":["A","B"]} "#, - ) - .unwrap(); + )?; let enum_schema2 = - Schema::parse_str(r#"{"type":"enum", "name":"MyEnum", "symbols":["A","B","C"]}"#) - .unwrap(); + Schema::parse_str(r#"{"type":"enum", "name":"MyEnum", "symbols":["A","B","C"]}"#)?; assert!(!SchemaCompatibility::can_read(&enum_schema2, &enum_schema1)); assert!(SchemaCompatibility::can_read(&enum_schema1, &enum_schema2)); + + Ok(()) } fn point_2d_schema() -> Schema { @@ -900,4 +910,132 @@ mod tests { &read_schema )); } + + #[test] + fn test_avro_3772_enum_default() -> TestResult { + let writer_raw_schema = r#" + { + "type": "record", + "name": "test", + "fields": [ + {"name": "a", "type": "long", "default": 42}, + {"name": "b", "type": "string"}, + { + "name": "c", + "type": { + "type": "enum", + "name": "suit", + "symbols": ["diamonds", "spades", "clubs", "hearts"], + "default": "spades" + } + } + ] + } + "#; + + let reader_raw_schema = r#" + { + "type": "record", + "name": "test", + "fields": [ + {"name": "a", "type": "long", "default": 42}, + {"name": "b", "type": "string"}, + { + "name": "c", + "type": { + "type": "enum", + "name": "suit", + "symbols": ["diamonds", "spades", "ninja", "hearts"], + "default": "spades" + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_raw_schema)?; + let reader_schema = Schema::parse_str(reader_raw_schema)?; + let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null); + let mut record = Record::new(writer.schema()).unwrap(); + record.put("a", 27i64); + record.put("b", "foo"); + record.put("c", "clubs"); + writer.append(record).unwrap(); + let input = writer.into_inner()?; + let mut reader = Reader::with_schema(&reader_schema, &input[..])?; + assert_eq!( + reader.next().unwrap().unwrap(), + Value::Record(vec![ + ("a".to_string(), Value::Long(27)), + ("b".to_string(), Value::String("foo".to_string())), + ("c".to_string(), Value::Enum(1, "spades".to_string())), + ]) + ); + assert!(reader.next().is_none()); + + Ok(()) + } + + #[test] + fn test_avro_3772_enum_default_less_symbols() -> TestResult { + let writer_raw_schema = r#" + { + "type": "record", + "name": "test", + "fields": [ + {"name": "a", "type": "long", "default": 42}, + {"name": "b", "type": "string"}, + { + "name": "c", + "type": { + "type": "enum", + "name": "suit", + "symbols": ["diamonds", "spades", "clubs", "hearts"], + "default": "spades" + } + } + ] + } + "#; + + let reader_raw_schema = r#" + { + "type": "record", + "name": "test", + "fields": [ + {"name": "a", "type": "long", "default": 42}, + {"name": "b", "type": "string"}, + { + "name": "c", + "type": { + "type": "enum", + "name": "suit", + "symbols": ["hearts", "spades"], + "default": "spades" + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_raw_schema)?; + let reader_schema = Schema::parse_str(reader_raw_schema)?; + let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null); + let mut record = Record::new(writer.schema()).unwrap(); + record.put("a", 27i64); + record.put("b", "foo"); + record.put("c", "hearts"); + writer.append(record).unwrap(); + let input = writer.into_inner()?; + let mut reader = Reader::with_schema(&reader_schema, &input[..])?; + assert_eq!( + reader.next().unwrap().unwrap(), + Value::Record(vec![ + ("a".to_string(), Value::Long(27)), + ("b".to_string(), Value::String("foo".to_string())), + ("c".to_string(), Value::Enum(0, "hearts".to_string())), + ]) + ); + assert!(reader.next().is_none()); + + Ok(()) + } } diff --git a/lang/rust/avro/src/ser.rs b/lang/rust/avro/src/ser.rs index c85c8c4d3ee..ce779b946c2 100644 --- a/lang/rust/avro/src/ser.rs +++ b/lang/rust/avro/src/ser.rs @@ -200,10 +200,10 @@ impl<'b> ser::Serializer for &'b mut Serializer { fn serialize_unit_variant( self, _: &'static str, - index: u32, + _variant_index: u32, variant: &'static str, ) -> Result { - Ok(Value::Enum(index, variant.to_string())) + Ok(Value::String(variant.to_string())) } fn serialize_newtype_struct( @@ -283,6 +283,10 @@ impl<'b> ser::Serializer for &'b mut Serializer { ) -> Result { Ok(StructVariantSerializer::new(index, variant, len)) } + + fn is_human_readable(&self) -> bool { + crate::util::is_human_readable() + } } impl ser::SerializeSeq for SeqSerializer { @@ -485,8 +489,11 @@ pub fn to_value(value: S) -> Result { #[cfg(test)] mod tests { use super::*; + use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; use serde::{Deserialize, Serialize}; + use serial_test::serial; + use std::sync::atomic::Ordering; #[derive(Debug, Deserialize, Serialize, Clone)] struct Test { @@ -678,7 +685,7 @@ mod tests { } #[test] - fn test_to_value() { + fn test_to_value() -> TestResult { let test = Test { a: 27, b: "foo".to_owned(), @@ -688,7 +695,7 @@ mod tests { ("b".to_owned(), Value::String("foo".to_owned())), ]); - assert_eq!(to_value(test.clone()).unwrap(), expected); + assert_eq!(to_value(test.clone())?, expected); let test_inner = TestInner { a: test, b: 35 }; @@ -703,19 +710,21 @@ mod tests { ("b".to_owned(), Value::Int(35)), ]); - assert_eq!(to_value(test_inner).unwrap(), expected_inner); + assert_eq!(to_value(test_inner)?, expected_inner); + + Ok(()) } #[test] - fn test_to_value_unit_enum() { + fn test_to_value_unit_enum() -> TestResult { let test = TestUnitExternalEnum { a: UnitExternalEnum::Val1, }; - let expected = Value::Record(vec![("a".to_owned(), Value::Enum(0, "Val1".to_owned()))]); + let expected = Value::Record(vec![("a".to_owned(), Value::String("Val1".to_owned()))]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "Error serializing unit external enum" ); @@ -730,7 +739,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "Error serializing unit internal enum" ); @@ -745,7 +754,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "Error serializing unit adjacent enum" ); @@ -757,14 +766,16 @@ mod tests { let expected = Value::Record(vec![("a".to_owned(), Value::Null)]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "Error serializing unit untagged enum" ); + + Ok(()) } #[test] - fn test_to_value_single_value_enum() { + fn test_to_value_single_value_enum() -> TestResult { let test = TestSingleValueExternalEnum { a: SingleValueExternalEnum::Double(64.0), }; @@ -781,7 +792,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "Error serializing single value external enum" ); @@ -806,7 +817,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "Error serializing single value adjacent enum" ); @@ -818,14 +829,16 @@ mod tests { let expected = Value::Record(vec![("a".to_owned(), Value::Double(64.0))]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "Error serializing single value untagged enum" ); + + Ok(()) } #[test] - fn test_to_value_struct_enum() { + fn test_to_value_struct_enum() -> TestResult { let test = TestStructExternalEnum { a: StructExternalEnum::Val1 { x: 1.0, y: 2.0 }, }; @@ -847,7 +860,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "error serializing struct external enum" ); @@ -867,7 +880,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "error serializing struct internal enum" ); @@ -890,7 +903,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "error serializing struct adjacent enum" ); @@ -907,7 +920,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "error serializing struct untagged enum" ); @@ -929,14 +942,16 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "error serializing struct untagged enum variant" ); + + Ok(()) } #[test] - fn test_to_value_tuple_enum() { + fn test_to_value_tuple_enum() -> TestResult { let test = TestTupleExternalEnum { a: TupleExternalEnum::Val2(1.0, 2.0, 3.0), }; @@ -957,7 +972,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "error serializing tuple external enum" ); @@ -978,7 +993,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "error serializing tuple adjacent enum" ); @@ -993,9 +1008,35 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "error serializing tuple untagged enum" ); + + Ok(()) + } + + #[test] + #[serial(avro_3747)] + fn avro_3747_human_readable_false() { + use serde::ser::Serializer as SerdeSerializer; + + crate::util::SERDE_HUMAN_READABLE.store(false, Ordering::Release); + + let ser = &mut Serializer {}; + + assert_eq!(ser.is_human_readable(), false); + } + + #[test] + #[serial(avro_3747)] + fn avro_3747_human_readable_true() { + use serde::ser::Serializer as SerdeSerializer; + + crate::util::SERDE_HUMAN_READABLE.store(true, Ordering::Release); + + let ser = &mut Serializer {}; + + assert!(ser.is_human_readable()); } } diff --git a/lang/rust/avro/src/types.rs b/lang/rust/avro/src/types.rs index 940ca17aa43..9bb60770562 100644 --- a/lang/rust/avro/src/types.rs +++ b/lang/rust/avro/src/types.rs @@ -20,15 +20,17 @@ use crate::{ decimal::Decimal, duration::Duration, schema::{ - Name, NamesRef, Namespace, Precision, RecordField, ResolvedSchema, Scale, Schema, - SchemaKind, UnionSchema, + DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, Precision, RecordField, + RecordSchema, ResolvedSchema, Scale, Schema, SchemaKind, UnionSchema, }, AvroResult, Error, }; use serde_json::{Number, Value as JsonValue}; use std::{ + borrow::Borrow, collections::{BTreeMap, HashMap}, convert::TryFrom, + fmt::Debug, hash::BuildHasher, str::FromStr, }; @@ -106,6 +108,10 @@ pub enum Value { TimestampMillis(i64), /// Timestamp in microseconds. TimestampMicros(i64), + /// Local timestamp in milliseconds. + LocalTimestampMillis(i64), + /// Local timestamp in microseconds. + LocalTimestampMicros(i64), /// Avro Duration. An amount of time defined by months, days and milliseconds. Duration(Duration), /// Universally unique identifier. @@ -221,11 +227,11 @@ impl<'a> Record<'a> { /// If the `Schema` is not a `Schema::Record` variant, `None` will be returned. pub fn new(schema: &Schema) -> Option { match *schema { - Schema::Record { + Schema::Record(RecordSchema { fields: ref schema_fields, lookup: ref schema_lookup, .. - } => { + }) => { let mut fields = Vec::with_capacity(schema_fields.len()); for schema_field in schema_fields.iter() { fields.push((schema_field.name.clone(), Value::Null)); @@ -282,7 +288,7 @@ impl From for Value { } /// Convert Avro values to Json values -impl std::convert::TryFrom for JsonValue { +impl TryFrom for JsonValue { type Error = crate::error::Error; fn try_from(value: Value) -> AvroResult { match value { @@ -325,6 +331,8 @@ impl std::convert::TryFrom for JsonValue { Value::TimeMicros(t) => Ok(Self::Number(t.into())), Value::TimestampMillis(t) => Ok(Self::Number(t.into())), Value::TimestampMicros(t) => Ok(Self::Number(t.into())), + Value::LocalTimestampMillis(t) => Ok(Self::Number(t.into())), + Value::LocalTimestampMicros(t) => Ok(Self::Number(t.into())), Value::Duration(d) => Ok(Self::Array( <[u8; 12]>::from(d).iter().map(|&v| v.into()).collect(), )), @@ -339,19 +347,32 @@ impl Value { /// See the [Avro specification](https://avro.apache.org/docs/current/spec.html) /// for the full set of rules of schema validation. pub fn validate(&self, schema: &Schema) -> bool { - let rs = ResolvedSchema::try_from(schema).expect("Schema didn't successfully parse"); - let enclosing_namespace = schema.namespace(); - - match self.validate_internal(schema, rs.get_names(), &enclosing_namespace) { - Some(error_msg) => { - error!( - "Invalid value: {:?} for schema: {:?}. Reason: {}", - self, schema, error_msg - ); - false + self.validate_schemata(vec![schema]) + } + + pub fn validate_schemata(&self, schemata: Vec<&Schema>) -> bool { + let rs = ResolvedSchema::try_from(schemata.clone()) + .expect("Schemata didn't successfully resolve"); + let schemata_len = schemata.len(); + schemata.iter().any(|schema| { + let enclosing_namespace = schema.namespace(); + + match self.validate_internal(schema, rs.get_names(), &enclosing_namespace) { + Some(reason) => { + let log_message = format!( + "Invalid value: {:?} for schema: {:?}. Reason: {}", + self, schema, reason + ); + if schemata_len == 1 { + error!("{}", log_message); + } else { + debug!("{}", log_message); + }; + false + } + None => true, } - None => true, - } + }) } fn accumulate(accumulator: Option, other: Option) -> Option { @@ -359,18 +380,19 @@ impl Value { (None, None) => None, (None, s @ Some(_)) => s, (s @ Some(_), None) => s, - (Some(reason1), Some(reason2)) => Some(format!("{}\n{}", reason1, reason2)), + (Some(reason1), Some(reason2)) => Some(format!("{reason1}\n{reason2}")), } } - pub(crate) fn validate_internal>( + /// Validates the value against the provided schema. + pub(crate) fn validate_internal + Debug>( &self, schema: &Schema, names: &HashMap, enclosing_namespace: &Namespace, ) -> Option { match (self, schema) { - (_, &Schema::Ref { ref name }) => { + (_, Schema::Ref { name }) => { let name = name.fully_qualified_name(enclosing_namespace); names.get(&name).map_or_else( || { @@ -393,8 +415,12 @@ impl Value { (&Value::Long(_), &Schema::TimeMicros) => None, (&Value::Long(_), &Schema::TimestampMillis) => None, (&Value::Long(_), &Schema::TimestampMicros) => None, + (&Value::Long(_), &Schema::LocalTimestampMillis) => None, + (&Value::Long(_), &Schema::LocalTimestampMicros) => None, (&Value::TimestampMicros(_), &Schema::TimestampMicros) => None, (&Value::TimestampMillis(_), &Schema::TimestampMillis) => None, + (&Value::LocalTimestampMicros(_), &Schema::LocalTimestampMicros) => None, + (&Value::LocalTimestampMillis(_), &Schema::LocalTimestampMillis) => None, (&Value::TimeMicros(_), &Schema::TimeMicros) => None, (&Value::TimeMillis(_), &Schema::TimeMillis) => None, (&Value::Date(_), &Schema::Date) => None, @@ -408,17 +434,16 @@ impl Value { (&Value::Bytes(_), &Schema::Decimal { .. }) => None, (&Value::String(_), &Schema::String) => None, (&Value::String(_), &Schema::Uuid) => None, - (&Value::Fixed(n, _), &Schema::Fixed { size, .. }) => { + (&Value::Fixed(n, _), &Schema::Fixed(FixedSchema { size, .. })) => { if n != size { Some(format!( - "The value's size ({}) is different than the schema's size ({})", - n, size + "The value's size ({n}) is different than the schema's size ({size})" )) } else { None } } - (&Value::Bytes(ref b), &Schema::Fixed { size, .. }) => { + (Value::Bytes(b), &Schema::Fixed(FixedSchema { size, .. })) => { if b.len() != size { Some(format!( "The bytes' length ({}) is different than the schema's size ({})", @@ -432,8 +457,7 @@ impl Value { (&Value::Fixed(n, _), &Schema::Duration) => { if n != 12 { Some(format!( - "The value's size ('{}') must be exactly 12 to be a Duration", - n + "The value's size ('{n}') must be exactly 12 to be a Duration" )) } else { None @@ -441,42 +465,50 @@ impl Value { } // TODO: check precision against n (&Value::Fixed(_n, _), &Schema::Decimal { .. }) => None, - (&Value::String(ref s), &Schema::Enum { ref symbols, .. }) => { + (Value::String(s), Schema::Enum(EnumSchema { symbols, .. })) => { if !symbols.contains(s) { - Some(format!("'{}' is not a member of the possible symbols", s)) + Some(format!("'{s}' is not a member of the possible symbols")) } else { None } } - (&Value::Enum(i, ref s), &Schema::Enum { ref symbols, .. }) => symbols + ( + &Value::Enum(i, ref s), + Schema::Enum(EnumSchema { + symbols, default, .. + }), + ) => symbols .get(i as usize) .map(|ref symbol| { if symbol != &s { - Some(format!("Symbol '{}' is not at position '{}'", s, i)) + Some(format!("Symbol '{s}' is not at position '{i}'")) } else { None } }) - .unwrap_or_else(|| Some(format!("No symbol at position '{}'", i))), + .unwrap_or_else(|| match default { + Some(_) => None, + None => Some(format!("No symbol at position '{i}'")), + }), // (&Value::Union(None), &Schema::Union(_)) => None, - (&Value::Union(i, ref value), &Schema::Union(ref inner)) => inner + (&Value::Union(i, ref value), Schema::Union(inner)) => inner .variants() .get(i as usize) .map(|schema| value.validate_internal(schema, names, enclosing_namespace)) - .unwrap_or_else(|| Some(format!("No schema in the union at position '{}'", i))), - (v, &Schema::Union(ref inner)) => match inner.find_schema(v) { - Some(_) => None, - None => Some("Could not find matching type in union".to_string()), - }, - (&Value::Array(ref items), &Schema::Array(ref inner)) => { - items.iter().fold(None, |acc, item| { - Value::accumulate( - acc, - item.validate_internal(inner, names, enclosing_namespace), - ) - }) + .unwrap_or_else(|| Some(format!("No schema in the union at position '{i}'"))), + (v, Schema::Union(inner)) => { + match inner.find_schema_with_known_schemata(v, Some(names), enclosing_namespace) { + Some(_) => None, + None => Some("Could not find matching type in union".to_string()), + } } - (&Value::Map(ref items), &Schema::Map(ref inner)) => { + (Value::Array(items), Schema::Array(inner)) => items.iter().fold(None, |acc, item| { + Value::accumulate( + acc, + item.validate_internal(inner, names, enclosing_namespace), + ) + }), + (Value::Map(items), Schema::Map(inner)) => { items.iter().fold(None, |acc, (_, value)| { Value::accumulate( acc, @@ -485,16 +517,18 @@ impl Value { }) } ( - &Value::Record(ref record_fields), - &Schema::Record { - ref fields, - ref lookup, + Value::Record(record_fields), + Schema::Record(RecordSchema { + fields, + lookup, + name, .. - }, + }), ) => { let non_nullable_fields_count = fields.iter().filter(|&rf| !rf.is_nullable()).count(); + // If the record contains fewer fields as required fields by the schema, it is invalid. if record_fields.len() < non_nullable_fields_count { return Some(format!( "The value's records length ({}) doesn't match the schema ({} non-nullable fields)", @@ -512,6 +546,11 @@ impl Value { record_fields .iter() .fold(None, |acc, (field_name, record_field)| { + let record_namespace = if name.namespace.is_none() { + enclosing_namespace + } else { + &name.namespace + }; match lookup.get(field_name) { Some(idx) => { let field = &fields[*idx]; @@ -520,21 +559,18 @@ impl Value { record_field.validate_internal( &field.schema, names, - enclosing_namespace, + record_namespace, ), ) } None => Value::accumulate( acc, - Some(format!( - "There is no schema field for field '{}'", - field_name - )), + Some(format!("There is no schema field for field '{field_name}'")), ), } }) } - (&Value::Map(ref items), &Schema::Record { ref fields, .. }) => { + (Value::Map(items), Schema::Record(RecordSchema { fields, .. })) => { fields.iter().fold(None, |acc, field| { if let Some(item) = items.get(&field.name) { let res = item.validate_internal(&field.schema, names, enclosing_namespace); @@ -565,14 +601,27 @@ impl Value { pub fn resolve(self, schema: &Schema) -> AvroResult { let enclosing_namespace = schema.namespace(); let rs = ResolvedSchema::try_from(schema)?; - self.resolve_internal(schema, rs.get_names(), &enclosing_namespace) + self.resolve_internal(schema, rs.get_names(), &enclosing_namespace, &None) } - fn resolve_internal( + /// Attempt to perform schema resolution on the value, with the given + /// [Schema](../schema/enum.Schema.html) and set of schemas to use for Refs resolution. + /// + /// See [Schema Resolution](https://avro.apache.org/docs/current/spec.html#Schema+Resolution) + /// in the Avro specification for the full set of rules of schema + /// resolution. + pub fn resolve_schemata(self, schema: &Schema, schemata: Vec<&Schema>) -> AvroResult { + let enclosing_namespace = schema.namespace(); + let rs = ResolvedSchema::try_from(schemata)?; + self.resolve_internal(schema, rs.get_names(), &enclosing_namespace, &None) + } + + pub(crate) fn resolve_internal + Debug>( mut self, schema: &Schema, - names: &NamesRef, + names: &HashMap, enclosing_namespace: &Namespace, + field_default: &Option, ) -> AvroResult { // Check if this schema is a union, and if the reader schema is not. if SchemaKind::from(&self) == SchemaKind::Union @@ -585,13 +634,14 @@ impl Value { }; self = v; } + match *schema { Schema::Ref { ref name } => { let name = name.fully_qualified_name(enclosing_namespace); if let Some(resolved) = names.get(&name) { debug!("Resolved {:?}", name); - self.resolve_internal(resolved, names, &name.namespace) + self.resolve_internal(resolved.borrow(), names, &name.namespace, field_default) } else { error!("Failed to resolve schema {:?}", name); Err(Error::SchemaResolutionError(name.clone())) @@ -605,24 +655,32 @@ impl Value { Schema::Double => self.resolve_double(), Schema::Bytes => self.resolve_bytes(), Schema::String => self.resolve_string(), - Schema::Fixed { size, .. } => self.resolve_fixed(size), - Schema::Union(ref inner) => self.resolve_union(inner, names, enclosing_namespace), - Schema::Enum { ref symbols, .. } => self.resolve_enum(symbols), + Schema::Fixed(FixedSchema { size, .. }) => self.resolve_fixed(size), + Schema::Union(ref inner) => { + self.resolve_union(inner, names, enclosing_namespace, field_default) + } + Schema::Enum(EnumSchema { + ref symbols, + ref default, + .. + }) => self.resolve_enum(symbols, default, field_default), Schema::Array(ref inner) => self.resolve_array(inner, names, enclosing_namespace), Schema::Map(ref inner) => self.resolve_map(inner, names, enclosing_namespace), - Schema::Record { ref fields, .. } => { + Schema::Record(RecordSchema { ref fields, .. }) => { self.resolve_record(fields, names, enclosing_namespace) } - Schema::Decimal { + Schema::Decimal(DecimalSchema { scale, precision, ref inner, - } => self.resolve_decimal(precision, scale, inner), + }) => self.resolve_decimal(precision, scale, inner), Schema::Date => self.resolve_date(), Schema::TimeMillis => self.resolve_time_millis(), Schema::TimeMicros => self.resolve_time_micros(), Schema::TimestampMillis => self.resolve_timestamp_millis(), Schema::TimestampMicros => self.resolve_timestamp_micros(), + Schema::LocalTimestampMillis => self.resolve_local_timestamp_millis(), + Schema::LocalTimestampMicros => self.resolve_local_timestamp_micros(), Schema::Duration => self.resolve_duration(), Schema::Uuid => self.resolve_uuid(), } @@ -664,7 +722,7 @@ impl Value { return Err(Error::GetScaleAndPrecision { scale, precision }); } match inner { - &Schema::Fixed { size, .. } => { + &Schema::Fixed(FixedSchema { size, .. }) => { if max_prec_for_len(size)? < precision { return Err(Error::GetScaleWithFixedSize { size, precision }); } @@ -675,7 +733,7 @@ impl Value { match self { Value::Decimal(num) => { let num_bytes = num.len(); - if max_prec_for_len(num_bytes)? > precision { + if max_prec_for_len(num_bytes)? < precision { Err(Error::ComparePrecisionAndSize { precision, num_bytes, @@ -686,7 +744,7 @@ impl Value { // check num.bits() here } Value::Fixed(_, bytes) | Value::Bytes(bytes) => { - if max_prec_for_len(bytes.len())? > precision { + if max_prec_for_len(bytes.len())? < precision { Err(Error::ComparePrecisionAndSize { precision, num_bytes: bytes.len(), @@ -738,6 +796,26 @@ impl Value { } } + fn resolve_local_timestamp_millis(self) -> Result { + match self { + Value::LocalTimestampMillis(ts) | Value::Long(ts) => { + Ok(Value::LocalTimestampMillis(ts)) + } + Value::Int(ts) => Ok(Value::LocalTimestampMillis(i64::from(ts))), + other => Err(Error::GetLocalTimestampMillis(other.into())), + } + } + + fn resolve_local_timestamp_micros(self) -> Result { + match self { + Value::LocalTimestampMicros(ts) | Value::Long(ts) => { + Ok(Value::LocalTimestampMicros(ts)) + } + Value::Int(ts) => Ok(Value::LocalTimestampMicros(i64::from(ts))), + other => Err(Error::GetLocalTimestampMicros(other.into())), + } + } + fn resolve_null(self) -> Result { match self { Value::Null => Ok(Value::Null), @@ -826,41 +904,48 @@ impl Value { } } - fn resolve_enum(self, symbols: &[String]) -> Result { + pub(crate) fn resolve_enum( + self, + symbols: &[String], + enum_default: &Option, + _field_default: &Option, + ) -> Result { let validate_symbol = |symbol: String, symbols: &[String]| { if let Some(index) = symbols.iter().position(|item| item == &symbol) { Ok(Value::Enum(index as u32, symbol)) } else { - Err(Error::GetEnumDefault { - symbol, - symbols: symbols.into(), - }) + match enum_default { + Some(default) => { + if let Some(index) = symbols.iter().position(|item| item == default) { + Ok(Value::Enum(index as u32, default.clone())) + } else { + Err(Error::GetEnumDefault { + symbol, + symbols: symbols.into(), + }) + } + } + _ => Err(Error::GetEnumDefault { + symbol, + symbols: symbols.into(), + }), + } } }; match self { - Value::Enum(raw_index, s) => { - let index = usize::try_from(raw_index) - .map_err(|e| Error::ConvertU32ToUsize(e, raw_index))?; - if (0..=symbols.len()).contains(&index) { - validate_symbol(s, symbols) - } else { - Err(Error::GetEnumValue { - index, - nsymbols: symbols.len(), - }) - } - } + Value::Enum(_raw_index, s) => validate_symbol(s, symbols), Value::String(s) => validate_symbol(s, symbols), other => Err(Error::GetEnum(other.into())), } } - fn resolve_union( + fn resolve_union + Debug>( self, schema: &UnionSchema, - names: &NamesRef, + names: &HashMap, enclosing_namespace: &Namespace, + field_default: &Option, ) -> Result { let v = match self { // Both are unions case. @@ -868,47 +953,27 @@ impl Value { // Reader is a union, but writer is not. v => v, }; - - // A union might contain references to another schema in the form of a Schema::Ref, - // resolve these prior to finding the schema. - let resolved_schemas: Vec = schema - .schemas - .iter() - .cloned() - .map(|schema| match schema { - Schema::Ref { name } => { - let name = name.fully_qualified_name(enclosing_namespace); - names - .get(&name) - .map(|s| (**s).clone()) - .ok_or_else(|| Error::SchemaResolutionError(name.clone())) - } - schema => Ok(schema), - }) - .collect::, Error>>()?; - - let resolved_union_schema = UnionSchema::new(resolved_schemas).unwrap(); - let (i, inner) = resolved_union_schema - .find_schema(&v) + let (i, inner) = schema + .find_schema_with_known_schemata(&v, Some(names), enclosing_namespace) .ok_or(Error::FindUnionVariant)?; Ok(Value::Union( i as u32, - Box::new(v.resolve_internal(inner, names, enclosing_namespace)?), + Box::new(v.resolve_internal(inner, names, enclosing_namespace, field_default)?), )) } - fn resolve_array( + fn resolve_array + Debug>( self, schema: &Schema, - names: &NamesRef, + names: &HashMap, enclosing_namespace: &Namespace, ) -> Result { match self { Value::Array(items) => Ok(Value::Array( items .into_iter() - .map(|item| item.resolve_internal(schema, names, enclosing_namespace)) + .map(|item| item.resolve_internal(schema, names, enclosing_namespace, &None)) .collect::>()?, )), other => Err(Error::GetArray { @@ -918,10 +983,10 @@ impl Value { } } - fn resolve_map( + fn resolve_map + Debug>( self, schema: &Schema, - names: &NamesRef, + names: &HashMap, enclosing_namespace: &Namespace, ) -> Result { match self { @@ -930,7 +995,7 @@ impl Value { .into_iter() .map(|(key, value)| { value - .resolve_internal(schema, names, enclosing_namespace) + .resolve_internal(schema, names, enclosing_namespace, &None) .map(|value| (key, value)) }) .collect::>()?, @@ -942,10 +1007,10 @@ impl Value { } } - fn resolve_record( + fn resolve_record + Debug>( self, fields: &[RecordField], - names: &NamesRef, + names: &HashMap, enclosing_namespace: &Namespace, ) -> Result { let mut items = match self { @@ -967,9 +1032,15 @@ impl Value { Some(value) => value, None => match field.default { Some(ref value) => match field.schema { - Schema::Enum { ref symbols, .. } => { - Value::from(value.clone()).resolve_enum(symbols)? - } + Schema::Enum(EnumSchema { + ref symbols, + ref default, + .. + }) => Value::from(value.clone()).resolve_enum( + symbols, + default, + &field.default.clone(), + )?, Schema::Union(ref union_schema) => { let first = &union_schema.variants()[0]; // NOTE: this match exists only to optimize null defaults for large @@ -982,6 +1053,7 @@ impl Value { first, names, enclosing_namespace, + &field.default, )?), ), } @@ -994,7 +1066,7 @@ impl Value { }, }; value - .resolve_internal(&field.schema, names, enclosing_namespace) + .resolve_internal(&field.schema, names, enclosing_namespace, &field.default) .map(|value| (field.name.clone(), value)) }) .collect::, _>>()?; @@ -1023,12 +1095,66 @@ mod tests { schema::{Name, RecordField, RecordFieldOrder, Schema, UnionSchema}, types::Value, }; - use apache_avro_test_helper::logger::{assert_logged, assert_not_logged}; + use apache_avro_test_helper::{ + logger::{assert_logged, assert_not_logged}, + TestResult, + }; + use num_bigint::BigInt; use pretty_assertions::assert_eq; use uuid::Uuid; #[test] - fn validate() { + fn avro_3809_validate_nested_records_with_implicit_namespace() -> TestResult { + let schema = Schema::parse_str( + r#"{ + "name": "record_name", + "namespace": "space", + "type": "record", + "fields": [ + { + "name": "outer_field_1", + "type": { + "type": "record", + "name": "middle_record_name", + "namespace": "middle_namespace", + "fields": [ + { + "name": "middle_field_1", + "type": { + "type": "record", + "name": "inner_record_name", + "fields": [ + { "name": "inner_field_1", "type": "double" } + ] + } + }, + { "name": "middle_field_2", "type": "inner_record_name" } + ] + } + } + ] + }"#, + )?; + let value = Value::Record(vec![( + "outer_field_1".into(), + Value::Record(vec![ + ( + "middle_field_1".into(), + Value::Record(vec![("inner_field_1".into(), Value::Double(1.2f64))]), + ), + ( + "middle_field_2".into(), + Value::Record(vec![("inner_field_1".into(), Value::Double(1.6f64))]), + ), + ]), + )]); + + assert!(value.validate(&schema)); + Ok(()) + } + + #[test] + fn validate() -> TestResult { let value_schema_valid = vec![ (Value::Int(42), Schema::Int, true, ""), (Value::Int(43), Schema::Long, true, ""), @@ -1042,19 +1168,19 @@ mod tests { ), ( Value::Union(0, Box::new(Value::Null)), - Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int]).unwrap()), + Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int])?), true, "", ), ( Value::Union(1, Box::new(Value::Int(42))), - Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int]).unwrap()), + Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int])?), true, "", ), ( Value::Union(0, Box::new(Value::Null)), - Schema::Union(UnionSchema::new(vec![Schema::Double, Schema::Int]).unwrap()), + Schema::Union(UnionSchema::new(vec![Schema::Double, Schema::Int])?), false, "Invalid value: Union(0, Null) for schema: Union(UnionSchema { schemas: [Double, Int], variant_index: {Int: 1, Double: 0} }). Reason: Unsupported value-schema combination", ), @@ -1067,7 +1193,7 @@ mod tests { Schema::String, Schema::Int, ]) - .unwrap(), + ?, ), true, "", @@ -1075,14 +1201,14 @@ mod tests { ( Value::Union(1, Box::new(Value::Long(42i64))), Schema::Union( - UnionSchema::new(vec![Schema::Null, Schema::TimestampMillis]).unwrap(), + UnionSchema::new(vec![Schema::Null, Schema::TimestampMillis])?, ), true, "", ), ( Value::Union(2, Box::new(Value::Long(1_i64))), - Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int]).unwrap()), + Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int])?), false, "Invalid value: Union(2, Long(1)) for schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }). Reason: No schema in the union at position '2'", ), @@ -1113,7 +1239,7 @@ mod tests { ), ( Value::Record(vec![("unknown_field_name".to_string(), Value::Null)]), - Schema::Record { + Schema::Record(RecordSchema { name: Name::new("record_name").unwrap(), aliases: None, doc: None, @@ -1121,6 +1247,7 @@ mod tests { name: "field_name".to_string(), doc: None, default: None, + aliases: None, schema: Schema::Int, order: RecordFieldOrder::Ignore, position: 0, @@ -1128,13 +1255,13 @@ mod tests { }], lookup: Default::default(), attributes: Default::default(), - }, + }), false, - r#"Invalid value: Record([("unknown_field_name", Null)]) for schema: Record { name: Name { name: "record_name", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "field_name", doc: None, default: None, schema: Int, order: Ignore, position: 0, custom_attributes: {} }], lookup: {}, attributes: {} }. Reason: There is no schema field for field 'unknown_field_name'"#, + r#"Invalid value: Record([("unknown_field_name", Null)]) for schema: Record(RecordSchema { name: Name { name: "record_name", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "field_name", doc: None, aliases: None, default: None, schema: Int, order: Ignore, position: 0, custom_attributes: {} }], lookup: {}, attributes: {} }). Reason: There is no schema field for field 'unknown_field_name'"#, ), ( Value::Record(vec![("field_name".to_string(), Value::Null)]), - Schema::Record { + Schema::Record(RecordSchema { name: Name::new("record_name").unwrap(), aliases: None, doc: None, @@ -1142,6 +1269,7 @@ mod tests { name: "field_name".to_string(), doc: None, default: None, + aliases: None, schema: Schema::Ref { name: Name::new("missing").unwrap(), }, @@ -1151,9 +1279,9 @@ mod tests { }], lookup: [("field_name".to_string(), 0)].iter().cloned().collect(), attributes: Default::default(), - }, + }), false, - r#"Invalid value: Record([("field_name", Null)]) for schema: Record { name: Name { name: "record_name", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "field_name", doc: None, default: None, schema: Ref { name: Name { name: "missing", namespace: None } }, order: Ignore, position: 0, custom_attributes: {} }], lookup: {"field_name": 0}, attributes: {} }. Reason: Unresolved schema reference: 'Name { name: "missing", namespace: None }'. Parsed names: []"#, + r#"Invalid value: Record([("field_name", Null)]) for schema: Record(RecordSchema { name: Name { name: "record_name", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "field_name", doc: None, aliases: None, default: None, schema: Ref { name: Name { name: "missing", namespace: None } }, order: Ignore, position: 0, custom_attributes: {} }], lookup: {"field_name": 0}, attributes: {} }). Reason: Unresolved schema reference: 'Name { name: "missing", namespace: None }'. Parsed names: []"#, ), ]; @@ -1171,17 +1299,19 @@ mod tests { assert_eq!(expected_err_message, full_err_message); } } + + Ok(()) } #[test] - fn validate_fixed() { - let schema = Schema::Fixed { + fn validate_fixed() -> TestResult { + let schema = Schema::Fixed(FixedSchema { size: 4, name: Name::new("some_fixed").unwrap(), aliases: None, doc: None, attributes: Default::default(), - }; + }); assert!(Value::Fixed(4, vec![0, 0, 0, 0]).validate(&schema)); let value = Value::Fixed(5, vec![0, 0, 0, 0, 0]); @@ -1204,11 +1334,13 @@ mod tests { ) .as_str(), ); + + Ok(()) } #[test] - fn validate_enum() { - let schema = Schema::Enum { + fn validate_enum() -> TestResult { + let schema = Schema::Enum(EnumSchema { name: Name::new("some_enum").unwrap(), aliases: None, doc: None, @@ -1218,8 +1350,9 @@ mod tests { "diamonds".to_string(), "clubs".to_string(), ], + default: None, attributes: Default::default(), - }; + }); assert!(Value::Enum(0, "spades".to_string()).validate(&schema)); assert!(Value::String("spades".to_string()).validate(&schema)); @@ -1254,7 +1387,7 @@ mod tests { .as_str(), ); - let other_schema = Schema::Enum { + let other_schema = Schema::Enum(EnumSchema { name: Name::new("some_other_enum").unwrap(), aliases: None, doc: None, @@ -1264,8 +1397,9 @@ mod tests { "clubs".to_string(), "spades".to_string(), ], + default: None, attributes: Default::default(), - }; + }); let value = Value::Enum(0, "spades".to_string()); assert!(!value.validate(&other_schema)); @@ -1276,10 +1410,12 @@ mod tests { ) .as_str(), ); + + Ok(()) } #[test] - fn validate_record() { + fn validate_record() -> TestResult { // { // "type": "record", // "fields": [ @@ -1292,7 +1428,7 @@ mod tests { // } // ] // } - let schema = Schema::Record { + let schema = Schema::Record(RecordSchema { name: Name::new("some_record").unwrap(), aliases: None, doc: None, @@ -1301,6 +1437,7 @@ mod tests { name: "a".to_string(), doc: None, default: None, + aliases: None, schema: Schema::Long, order: RecordFieldOrder::Ascending, position: 0, @@ -1310,6 +1447,7 @@ mod tests { name: "b".to_string(), doc: None, default: None, + aliases: None, schema: Schema::String, order: RecordFieldOrder::Ascending, position: 1, @@ -1319,9 +1457,8 @@ mod tests { name: "c".to_string(), doc: None, default: Some(JsonValue::Null), - schema: Schema::Union( - UnionSchema::new(vec![Schema::Null, Schema::Int]).unwrap(), - ), + aliases: None, + schema: Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int])?), order: RecordFieldOrder::Ascending, position: 2, custom_attributes: Default::default(), @@ -1336,7 +1473,7 @@ mod tests { .cloned() .collect(), attributes: Default::default(), - }; + }); assert!(Value::Record(vec![ ("a".to_string(), Value::Long(42i64)), @@ -1356,7 +1493,7 @@ mod tests { ]); assert!(!value.validate(&schema)); assert_logged( - r#"Invalid value: Record([("a", Boolean(false)), ("b", String("foo"))]) for schema: Record { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }. Reason: Unsupported value-schema combination"#, + r#"Invalid value: Record([("a", Boolean(false)), ("b", String("foo"))]) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }). Reason: Unsupported value-schema combination"#, ); let value = Value::Record(vec![ @@ -1365,7 +1502,7 @@ mod tests { ]); assert!(!value.validate(&schema)); assert_logged( - r#"Invalid value: Record([("a", Long(42)), ("c", String("foo"))]) for schema: Record { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }. Reason: Could not find matching type in union"#, + r#"Invalid value: Record([("a", Long(42)), ("c", String("foo"))]) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }). Reason: Could not find matching type in union"#, ); assert_not_logged( r#"Invalid value: String("foo") for schema: Int. Reason: Unsupported value-schema combination"#, @@ -1377,7 +1514,7 @@ mod tests { ]); assert!(!value.validate(&schema)); assert_logged( - r#"Invalid value: Record([("a", Long(42)), ("d", String("foo"))]) for schema: Record { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }. Reason: There is no schema field for field 'd'"#, + r#"Invalid value: Record([("a", Long(42)), ("d", String("foo"))]) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }). Reason: There is no schema field for field 'd'"#, ); let value = Value::Record(vec![ @@ -1388,7 +1525,7 @@ mod tests { ]); assert!(!value.validate(&schema)); assert_logged( - r#"Invalid value: Record([("a", Long(42)), ("b", String("foo")), ("c", Null), ("d", Null)]) for schema: Record { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }. Reason: The value's records length (4) is greater than the schema's (3 fields)"#, + r#"Invalid value: Record([("a", Long(42)), ("b", String("foo")), ("c", Null), ("d", Null)]) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }). Reason: The value's records length (4) is greater than the schema's (3 fields)"#, ); assert!(Value::Map( @@ -1408,11 +1545,11 @@ mod tests { ) .validate(&schema)); assert_logged( - r#"Invalid value: Map({"d": Long(123)}) for schema: Record { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }. Reason: Field with name '"a"' is not a member of the map items + r#"Invalid value: Map({"d": Long(123)}) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }). Reason: Field with name '"a"' is not a member of the map items Field with name '"b"' is not a member of the map items"#, ); - let union_schema = Schema::Union(UnionSchema::new(vec![Schema::Null, schema]).unwrap()); + let union_schema = Schema::Union(UnionSchema::new(vec![Schema::Null, schema])?); assert!(Value::Union( 1, @@ -1435,33 +1572,41 @@ Field with name '"b"' is not a member of the map items"#, )) ) .validate(&union_schema)); + + Ok(()) } #[test] - fn resolve_bytes_ok() { + fn resolve_bytes_ok() -> TestResult { let value = Value::Array(vec![Value::Int(0), Value::Int(42)]); assert_eq!( - value.resolve(&Schema::Bytes).unwrap(), + value.resolve(&Schema::Bytes)?, Value::Bytes(vec![0u8, 42u8]) ); + + Ok(()) } #[test] - fn resolve_string_from_bytes() { + fn resolve_string_from_bytes() -> TestResult { let value = Value::Bytes(vec![97, 98, 99]); assert_eq!( - value.resolve(&Schema::String).unwrap(), + value.resolve(&Schema::String)?, Value::String("abc".to_string()) ); + + Ok(()) } #[test] - fn resolve_string_from_fixed() { + fn resolve_string_from_fixed() -> TestResult { let value = Value::Fixed(3, vec![97, 98, 99]); assert_eq!( - value.resolve(&Schema::String).unwrap(), + value.resolve(&Schema::String)?, Value::String("abc".to_string()) ); + + Ok(()) } #[test] @@ -1471,28 +1616,27 @@ Field with name '"b"' is not a member of the map items"#, } #[test] - fn resolve_decimal_bytes() { - let value = Value::Decimal(Decimal::from(vec![1, 2])); - value - .clone() - .resolve(&Schema::Decimal { - precision: 10, - scale: 4, - inner: Box::new(Schema::Bytes), - }) - .unwrap(); + fn resolve_decimal_bytes() -> TestResult { + let value = Value::Decimal(Decimal::from(vec![1, 2, 3, 4, 5])); + value.clone().resolve(&Schema::Decimal(DecimalSchema { + precision: 10, + scale: 4, + inner: Box::new(Schema::Bytes), + }))?; assert!(value.resolve(&Schema::String).is_err()); + + Ok(()) } #[test] fn resolve_decimal_invalid_scale() { - let value = Value::Decimal(Decimal::from(vec![1])); + let value = Value::Decimal(Decimal::from(vec![1, 2])); assert!(value - .resolve(&Schema::Decimal { + .resolve(&Schema::Decimal(DecimalSchema { precision: 2, scale: 3, inner: Box::new(Schema::Bytes), - }) + })) .is_err()); } @@ -1500,30 +1644,30 @@ Field with name '"b"' is not a member of the map items"#, fn resolve_decimal_invalid_precision_for_length() { let value = Value::Decimal(Decimal::from((1u8..=8u8).rev().collect::>())); assert!(value - .resolve(&Schema::Decimal { + .resolve(&Schema::Decimal(DecimalSchema { precision: 1, scale: 0, inner: Box::new(Schema::Bytes), - }) - .is_err()); + })) + .is_ok()); } #[test] fn resolve_decimal_fixed() { - let value = Value::Decimal(Decimal::from(vec![1, 2])); + let value = Value::Decimal(Decimal::from(vec![1, 2, 3, 4, 5])); assert!(value .clone() - .resolve(&Schema::Decimal { + .resolve(&Schema::Decimal(DecimalSchema { precision: 10, scale: 1, - inner: Box::new(Schema::Fixed { + inner: Box::new(Schema::Fixed(FixedSchema { name: Name::new("decimal").unwrap(), aliases: None, size: 20, doc: None, attributes: Default::default(), - }) - }) + })) + })) .is_ok()); assert!(value.resolve(&Schema::String).is_err()); } @@ -1569,6 +1713,26 @@ Field with name '"b"' is not a member of the map items"#, assert!(value.resolve(&Schema::TimestampMicros).is_err()); } + #[test] + fn test_avro_3853_resolve_timestamp_millis() { + let value = Value::LocalTimestampMillis(10); + assert!(value.clone().resolve(&Schema::LocalTimestampMillis).is_ok()); + assert!(value.resolve(&Schema::Float).is_err()); + + let value = Value::Float(10.0f32); + assert!(value.resolve(&Schema::LocalTimestampMillis).is_err()); + } + + #[test] + fn test_avro_3853_resolve_timestamp_micros() { + let value = Value::LocalTimestampMicros(10); + assert!(value.clone().resolve(&Schema::LocalTimestampMicros).is_ok()); + assert!(value.resolve(&Schema::Int).is_err()); + + let value = Value::Double(10.0); + assert!(value.resolve(&Schema::LocalTimestampMicros).is_err()); + } + #[test] fn resolve_duration() { let value = Value::Duration(Duration::new( @@ -1582,10 +1746,12 @@ Field with name '"b"' is not a member of the map items"#, } #[test] - fn resolve_uuid() { - let value = Value::Uuid(Uuid::parse_str("1481531d-ccc9-46d9-a56f-5b67459c0537").unwrap()); + fn resolve_uuid() -> TestResult { + let value = Value::Uuid(Uuid::parse_str("1481531d-ccc9-46d9-a56f-5b67459c0537")?); assert!(value.clone().resolve(&Schema::Uuid).is_ok()); assert!(value.resolve(&Schema::TimestampMicros).is_err()); + + Ok(()) } #[test] @@ -1595,7 +1761,7 @@ Field with name '"b"' is not a member of the map items"#, } #[test] - fn test_avro_3621_resolve_to_nullable_union() { + fn test_avro_3621_resolve_to_nullable_union() -> TestResult { let schema = Schema::parse_str( r#"{ "type": "record", @@ -1628,8 +1794,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let value = Value::Record(vec![( "event".to_string(), @@ -1642,33 +1807,35 @@ Field with name '"b"' is not a member of the map items"#, Value::Record(vec![("size".to_string(), Value::Int(1))]), )]); assert!(value.resolve(&schema).is_err()); + + Ok(()) } #[test] - fn json_from_avro() { - assert_eq!(JsonValue::try_from(Value::Null).unwrap(), JsonValue::Null); + fn json_from_avro() -> TestResult { + assert_eq!(JsonValue::try_from(Value::Null)?, JsonValue::Null); assert_eq!( - JsonValue::try_from(Value::Boolean(true)).unwrap(), + JsonValue::try_from(Value::Boolean(true))?, JsonValue::Bool(true) ); assert_eq!( - JsonValue::try_from(Value::Int(1)).unwrap(), + JsonValue::try_from(Value::Int(1))?, JsonValue::Number(1.into()) ); assert_eq!( - JsonValue::try_from(Value::Long(1)).unwrap(), + JsonValue::try_from(Value::Long(1))?, JsonValue::Number(1.into()) ); assert_eq!( - JsonValue::try_from(Value::Float(1.0)).unwrap(), + JsonValue::try_from(Value::Float(1.0))?, JsonValue::Number(Number::from_f64(1.0).unwrap()) ); assert_eq!( - JsonValue::try_from(Value::Double(1.0)).unwrap(), + JsonValue::try_from(Value::Double(1.0))?, JsonValue::Number(Number::from_f64(1.0).unwrap()) ); assert_eq!( - JsonValue::try_from(Value::Bytes(vec![1, 2, 3])).unwrap(), + JsonValue::try_from(Value::Bytes(vec![1, 2, 3]))?, JsonValue::Array(vec![ JsonValue::Number(1.into()), JsonValue::Number(2.into()), @@ -1676,11 +1843,11 @@ Field with name '"b"' is not a member of the map items"#, ]) ); assert_eq!( - JsonValue::try_from(Value::String("test".into())).unwrap(), + JsonValue::try_from(Value::String("test".into()))?, JsonValue::String("test".into()) ); assert_eq!( - JsonValue::try_from(Value::Fixed(3, vec![1, 2, 3])).unwrap(), + JsonValue::try_from(Value::Fixed(3, vec![1, 2, 3]))?, JsonValue::Array(vec![ JsonValue::Number(1.into()), JsonValue::Number(2.into()), @@ -1688,12 +1855,11 @@ Field with name '"b"' is not a member of the map items"#, ]) ); assert_eq!( - JsonValue::try_from(Value::Enum(1, "test_enum".into())).unwrap(), + JsonValue::try_from(Value::Enum(1, "test_enum".into()))?, JsonValue::String("test_enum".into()) ); assert_eq!( - JsonValue::try_from(Value::Union(1, Box::new(Value::String("test_enum".into())))) - .unwrap(), + JsonValue::try_from(Value::Union(1, Box::new(Value::String("test_enum".into()))))?, JsonValue::String("test_enum".into()) ); assert_eq!( @@ -1701,8 +1867,7 @@ Field with name '"b"' is not a member of the map items"#, Value::Int(1), Value::Int(2), Value::Int(3) - ])) - .unwrap(), + ]))?, JsonValue::Array(vec![ JsonValue::Number(1.into()), JsonValue::Number(2.into()), @@ -1718,8 +1883,7 @@ Field with name '"b"' is not a member of the map items"#, ] .into_iter() .collect() - )) - .unwrap(), + ))?, JsonValue::Object( vec![ ("v1".to_string(), JsonValue::Number(1.into())), @@ -1735,8 +1899,7 @@ Field with name '"b"' is not a member of the map items"#, ("v1".to_string(), Value::Int(1)), ("v2".to_string(), Value::Int(2)), ("v3".to_string(), Value::Int(3)) - ])) - .unwrap(), + ]))?, JsonValue::Object( vec![ ("v1".to_string(), JsonValue::Number(1.into())), @@ -1748,11 +1911,11 @@ Field with name '"b"' is not a member of the map items"#, ) ); assert_eq!( - JsonValue::try_from(Value::Date(1)).unwrap(), + JsonValue::try_from(Value::Date(1))?, JsonValue::Number(1.into()) ); assert_eq!( - JsonValue::try_from(Value::Decimal(vec![1, 2, 3].into())).unwrap(), + JsonValue::try_from(Value::Decimal(vec![1, 2, 3].into()))?, JsonValue::Array(vec![ JsonValue::Number(1.into()), JsonValue::Number(2.into()), @@ -1760,26 +1923,33 @@ Field with name '"b"' is not a member of the map items"#, ]) ); assert_eq!( - JsonValue::try_from(Value::TimeMillis(1)).unwrap(), + JsonValue::try_from(Value::TimeMillis(1))?, JsonValue::Number(1.into()) ); assert_eq!( - JsonValue::try_from(Value::TimeMicros(1)).unwrap(), + JsonValue::try_from(Value::TimeMicros(1))?, JsonValue::Number(1.into()) ); assert_eq!( - JsonValue::try_from(Value::TimestampMillis(1)).unwrap(), + JsonValue::try_from(Value::TimestampMillis(1))?, JsonValue::Number(1.into()) ); assert_eq!( - JsonValue::try_from(Value::TimestampMicros(1)).unwrap(), + JsonValue::try_from(Value::TimestampMicros(1))?, + JsonValue::Number(1.into()) + ); + assert_eq!( + JsonValue::try_from(Value::LocalTimestampMillis(1))?, + JsonValue::Number(1.into()) + ); + assert_eq!( + JsonValue::try_from(Value::LocalTimestampMicros(1))?, JsonValue::Number(1.into()) ); assert_eq!( JsonValue::try_from(Value::Duration( [1u8, 2u8, 3u8, 4u8, 5u8, 6u8, 7u8, 8u8, 9u8, 10u8, 11u8, 12u8].into() - )) - .unwrap(), + ))?, JsonValue::Array(vec![ JsonValue::Number(1.into()), JsonValue::Number(2.into()), @@ -1796,16 +1966,17 @@ Field with name '"b"' is not a member of the map items"#, ]) ); assert_eq!( - JsonValue::try_from(Value::Uuid( - Uuid::parse_str("936DA01F-9ABD-4D9D-80C7-02AF85C822A8").unwrap() - )) - .unwrap(), + JsonValue::try_from(Value::Uuid(Uuid::parse_str( + "936DA01F-9ABD-4D9D-80C7-02AF85C822A8" + )?))?, JsonValue::String("936da01f-9abd-4d9d-80c7-02af85c822a8".into()) ); + + Ok(()) } #[test] - fn test_avro_3433_recursive_resolves_record() { + fn test_avro_3433_recursive_resolves_record() -> TestResult { let schema = Schema::parse_str( r#" { @@ -1829,8 +2000,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); @@ -1838,10 +2008,12 @@ Field with name '"b"' is not a member of the map items"#, outer .resolve(&schema) .expect("Record definition defined in one field must be available in other field"); + + Ok(()) } #[test] - fn test_avro_3433_recursive_resolves_array() { + fn test_avro_3433_recursive_resolves_array() -> TestResult { let schema = Schema::parse_str( r#" { @@ -1871,8 +2043,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); @@ -1886,10 +2057,12 @@ Field with name '"b"' is not a member of the map items"#, outer_value .resolve(&schema) .expect("Record defined in array definition must be resolvable from map"); + + Ok(()) } #[test] - fn test_avro_3433_recursive_resolves_map() { + fn test_avro_3433_recursive_resolves_map() -> TestResult { let schema = Schema::parse_str( r#" { @@ -1916,8 +2089,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); @@ -1931,10 +2103,12 @@ Field with name '"b"' is not a member of the map items"#, outer_value .resolve(&schema) .expect("Record defined in record field must be resolvable from map field"); + + Ok(()) } #[test] - fn test_avro_3433_recursive_resolves_record_wrapper() { + fn test_avro_3433_recursive_resolves_record_wrapper() -> TestResult { let schema = Schema::parse_str( r#" { @@ -1965,8 +2139,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![( @@ -1976,10 +2149,12 @@ Field with name '"b"' is not a member of the map items"#, let outer_value = Value::Record(vec![("a".into(), inner_value1), ("b".into(), inner_value2)]); outer_value.resolve(&schema).expect("Record schema defined in field must be resolvable in Record schema defined in other field"); + + Ok(()) } #[test] - fn test_avro_3433_recursive_resolves_map_and_array() { + fn test_avro_3433_recursive_resolves_map_and_array() -> TestResult { let schema = Schema::parse_str( r#" { @@ -2009,8 +2184,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); @@ -2024,10 +2198,12 @@ Field with name '"b"' is not a member of the map items"#, outer_value .resolve(&schema) .expect("Record defined in map definition must be resolvable from array"); + + Ok(()) } #[test] - fn test_avro_3433_recursive_resolves_union() { + fn test_avro_3433_recursive_resolves_union() -> TestResult { let schema = Schema::parse_str( r#" { @@ -2051,8 +2227,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); @@ -2067,10 +2242,12 @@ Field with name '"b"' is not a member of the map items"#, outer2 .resolve(&schema) .expect("Record definition defined in union must be resolved in other field"); + + Ok(()) } #[test] - fn test_avro_3461_test_multi_level_resolve_outer_namespace() { + fn test_avro_3461_test_multi_level_resolve_outer_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -2112,7 +2289,7 @@ Field with name '"b"' is not a member of the map items"#, ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); let middle_record_variation_1 = Value::Record(vec![( "middle_field_1".into(), @@ -2153,10 +2330,12 @@ Field with name '"b"' is not a member of the map items"#, outer_record_variation_3 .resolve(&schema) .expect("Should be able to resolve value to the schema that is it's definition"); + + Ok(()) } #[test] - fn test_avro_3461_test_multi_level_resolve_middle_namespace() { + fn test_avro_3461_test_multi_level_resolve_middle_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -2199,7 +2378,7 @@ Field with name '"b"' is not a member of the map items"#, ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); let middle_record_variation_1 = Value::Record(vec![( "middle_field_1".into(), @@ -2240,10 +2419,12 @@ Field with name '"b"' is not a member of the map items"#, outer_record_variation_3 .resolve(&schema) .expect("Should be able to resolve value to the schema that is it's definition"); + + Ok(()) } #[test] - fn test_avro_3461_test_multi_level_resolve_inner_namespace() { + fn test_avro_3461_test_multi_level_resolve_inner_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -2287,7 +2468,7 @@ Field with name '"b"' is not a member of the map items"#, ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); let middle_record_variation_1 = Value::Record(vec![( @@ -2329,10 +2510,12 @@ Field with name '"b"' is not a member of the map items"#, outer_record_variation_3 .resolve(&schema) .expect("Should be able to resolve value to the schema that is it's definition"); + + Ok(()) } #[test] - fn test_avro_3460_validation_with_refs() { + fn test_avro_3460_validation_with_refs() -> TestResult { let schema = Schema::parse_str( r#" { @@ -2356,8 +2539,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let inner_value_right = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value_wrong1 = Value::Record(vec![("z".into(), Value::Null)]); @@ -2380,10 +2562,12 @@ Field with name '"b"' is not a member of the map items"#, !outer2.validate(&schema), "field b record is invalid against the schema" ); // this should pass, but doesn't + + Ok(()) } #[test] - fn test_avro_3460_validation_with_refs_real_struct() { + fn test_avro_3460_validation_with_refs_real_struct() -> TestResult { use crate::ser::Serializer; use serde::Serialize; @@ -2433,8 +2617,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let test_inner = TestInner { z: 3 }; let test_outer1 = TestRefSchemaStruct1 { @@ -2451,11 +2634,11 @@ Field with name '"b"' is not a member of the map items"#, }; let mut ser = Serializer::default(); - let test_outer1: Value = test_outer1.serialize(&mut ser).unwrap(); + let test_outer1: Value = test_outer1.serialize(&mut ser)?; let mut ser = Serializer::default(); - let test_outer2: Value = test_outer2.serialize(&mut ser).unwrap(); + let test_outer2: Value = test_outer2.serialize(&mut ser)?; let mut ser = Serializer::default(); - let test_outer3: Value = test_outer3.serialize(&mut ser).unwrap(); + let test_outer3: Value = test_outer3.serialize(&mut ser)?; assert!( !test_outer1.validate(&schema), @@ -2469,9 +2652,11 @@ Field with name '"b"' is not a member of the map items"#, !test_outer3.validate(&schema), "field b record is invalid against the schema" ); + + Ok(()) } - fn avro_3674_with_or_without_namespace(with_namespace: bool) { + fn avro_3674_with_or_without_namespace(with_namespace: bool) -> TestResult { use crate::ser::Serializer; use serde::Serialize; @@ -2513,7 +2698,7 @@ Field with name '"b"' is not a member of the map items"#, }, ); - let schema = Schema::parse_str(&schema_str).unwrap(); + let schema = Schema::parse_str(&schema_str)?; #[derive(Serialize)] enum EnumType { @@ -2542,25 +2727,27 @@ Field with name '"b"' is not a member of the map items"#, }; let mut ser = Serializer::default(); - let test_value: Value = msg.serialize(&mut ser).unwrap(); + let test_value: Value = msg.serialize(&mut ser)?; assert!(test_value.validate(&schema), "test_value should validate"); assert!( test_value.resolve(&schema).is_ok(), "test_value should resolve" ); + + Ok(()) } #[test] - fn test_avro_3674_validate_no_namespace_resolution() { - avro_3674_with_or_without_namespace(false); + fn test_avro_3674_validate_no_namespace_resolution() -> TestResult { + avro_3674_with_or_without_namespace(false) } #[test] - fn test_avro_3674_validate_with_namespace_resolution() { - avro_3674_with_or_without_namespace(true); + fn test_avro_3674_validate_with_namespace_resolution() -> TestResult { + avro_3674_with_or_without_namespace(true) } - fn avro_3688_schema_resolution_panic(set_field_b: bool) { + fn avro_3688_schema_resolution_panic(set_field_b: bool) -> TestResult { use crate::ser::Serializer; use serde::{Deserialize, Serialize}; @@ -2607,7 +2794,7 @@ Field with name '"b"' is not a member of the map items"#, field_b: Option, } - let schema = Schema::parse_str(schema_str).unwrap(); + let schema = Schema::parse_str(schema_str)?; let msg = Message { field_a: Some(Inner { @@ -2623,21 +2810,119 @@ Field with name '"b"' is not a member of the map items"#, }; let mut ser = Serializer::default(); - let test_value: Value = msg.serialize(&mut ser).unwrap(); + let test_value: Value = msg.serialize(&mut ser)?; assert!(test_value.validate(&schema), "test_value should validate"); assert!( test_value.resolve(&schema).is_ok(), "test_value should resolve" ); + + Ok(()) + } + + #[test] + fn test_avro_3688_field_b_not_set() -> TestResult { + avro_3688_schema_resolution_panic(false) + } + + #[test] + fn test_avro_3688_field_b_set() -> TestResult { + avro_3688_schema_resolution_panic(true) + } + + #[test] + fn test_avro_3764_use_resolve_schemata() -> TestResult { + let referenced_schema = + r#"{"name": "enumForReference", "type": "enum", "symbols": ["A", "B"]}"#; + let main_schema = r#"{"name": "recordWithReference", "type": "record", "fields": [{"name": "reference", "type": "enumForReference"}]}"#; + + let value: serde_json::Value = serde_json::from_str( + r#" + { + "reference": "A" + } + "#, + )?; + + let avro_value = Value::from(value); + + let schemas = Schema::parse_list(&[main_schema, referenced_schema])?; + + let main_schema = schemas.get(0).unwrap(); + let schemata: Vec<_> = schemas.iter().skip(1).collect(); + + let resolve_result = avro_value.clone().resolve_schemata(main_schema, schemata); + + assert!( + resolve_result.is_ok(), + "result of resolving with schemata should be ok, got: {:?}", + resolve_result + ); + + let resolve_result = avro_value.resolve(main_schema); + assert!( + resolve_result.is_err(), + "result of resolving without schemata should be err, got: {:?}", + resolve_result + ); + + Ok(()) } #[test] - fn test_avro_3688_field_b_not_set() { - avro_3688_schema_resolution_panic(false); + fn test_avro_3767_union_resolve_complex_refs() -> TestResult { + let referenced_enum = + r#"{"name": "enumForReference", "type": "enum", "symbols": ["A", "B"]}"#; + let referenced_record = r#"{"name": "recordForReference", "type": "record", "fields": [{"name": "refInRecord", "type": "enumForReference"}]}"#; + let main_schema = r#"{"name": "recordWithReference", "type": "record", "fields": [{"name": "reference", "type": ["null", "recordForReference"]}]}"#; + + let value: serde_json::Value = serde_json::from_str( + r#" + { + "reference": { + "refInRecord": "A" + } + } + "#, + )?; + + let avro_value = Value::from(value); + + let schemata = Schema::parse_list(&[referenced_enum, referenced_record, main_schema])?; + + let main_schema = schemata.last().unwrap(); + let other_schemata: Vec<&Schema> = schemata.iter().take(2).collect(); + + let resolve_result = avro_value.resolve_schemata(main_schema, other_schemata); + + assert!( + resolve_result.is_ok(), + "result of resolving with schemata should be ok, got: {:?}", + resolve_result + ); + + assert!( + resolve_result?.validate_schemata(schemata.iter().collect()), + "result of validation with schemata should be true" + ); + + Ok(()) } #[test] - fn test_avro_3688_field_b_set() { - avro_3688_schema_resolution_panic(true); + fn test_avro_3782_incorrect_decimal_resolving() -> TestResult { + let schema = r#"{"name": "decimalSchema", "logicalType": "decimal", "type": "fixed", "precision": 8, "scale": 0, "size": 8}"#; + + let avro_value = Value::Decimal(Decimal::from( + BigInt::from(12345678u32).to_signed_bytes_be(), + )); + let schema = Schema::parse_str(schema)?; + let resolve_result = avro_value.resolve(&schema); + assert!( + resolve_result.is_ok(), + "resolve result must be ok, got: {resolve_result:?}" + ); + + Ok(()) } } diff --git a/lang/rust/avro/src/util.rs b/lang/rust/avro/src/util.rs index e18b5641fce..2ea134c77a6 100644 --- a/lang/rust/avro/src/util.rs +++ b/lang/rust/avro/src/util.rs @@ -17,16 +17,31 @@ use crate::{schema::Documentation, AvroResult, Error}; use serde_json::{Map, Value}; -use std::{convert::TryFrom, i64, io::Read, sync::Once}; +use std::{ + convert::TryFrom, + i64, + io::Read, + sync::{ + atomic::{AtomicBool, AtomicUsize, Ordering}, + Once, + }, +}; /// Maximum number of bytes that can be allocated when decoding /// Avro-encoded values. This is a protection against ill-formed /// data, whose length field might be interpreted as enormous. /// See max_allocation_bytes to change this limit. pub const DEFAULT_MAX_ALLOCATION_BYTES: usize = 512 * 1024 * 1024; -static mut MAX_ALLOCATION_BYTES: usize = DEFAULT_MAX_ALLOCATION_BYTES; +static MAX_ALLOCATION_BYTES: AtomicUsize = AtomicUsize::new(DEFAULT_MAX_ALLOCATION_BYTES); static MAX_ALLOCATION_BYTES_ONCE: Once = Once::new(); +/// Whether to set serialization & deserialization traits +/// as `human_readable` or not. +/// See [set_serde_human_readable] to change this value. +// crate-visible for testing +pub(crate) static SERDE_HUMAN_READABLE: AtomicBool = AtomicBool::new(true); +static SERDE_HUMAN_READABLE_ONCE: Once = Once::new(); + pub trait MapHelper { fn string(&self, key: &str) -> Option; @@ -132,12 +147,10 @@ fn decode_variable(reader: &mut R) -> AvroResult { /// to set the limit either when calling this method, or when decoding for /// the first time. pub fn max_allocation_bytes(num_bytes: usize) -> usize { - unsafe { - MAX_ALLOCATION_BYTES_ONCE.call_once(|| { - MAX_ALLOCATION_BYTES = num_bytes; - }); - MAX_ALLOCATION_BYTES - } + MAX_ALLOCATION_BYTES_ONCE.call_once(|| { + MAX_ALLOCATION_BYTES.store(num_bytes, Ordering::Release); + }); + MAX_ALLOCATION_BYTES.load(Ordering::Acquire) } pub fn safe_len(len: usize) -> AvroResult { @@ -153,9 +166,28 @@ pub fn safe_len(len: usize) -> AvroResult { } } +/// Set whether serializing/deserializing is marked as human readable in serde traits. +/// This will adjust the return value of `is_human_readable()` for both. +/// Once called, the value cannot be changed. +/// +/// **NOTE** This function must be called before serializing/deserializing **any** data. The +/// library leverages [`std::sync::Once`](https://doc.rust-lang.org/std/sync/struct.Once.html) +/// to set the limit either when calling this method, or when decoding for +/// the first time. +pub fn set_serde_human_readable(human_readable: bool) { + SERDE_HUMAN_READABLE_ONCE.call_once(|| { + SERDE_HUMAN_READABLE.store(human_readable, Ordering::Release); + }); +} + +pub(crate) fn is_human_readable() -> bool { + SERDE_HUMAN_READABLE.load(Ordering::Acquire) +} + #[cfg(test)] mod tests { use super::*; + use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; #[test] @@ -249,8 +281,10 @@ mod tests { } #[test] - fn test_safe_len() { - assert_eq!(42usize, safe_len(42usize).unwrap()); + fn test_safe_len() -> TestResult { + assert_eq!(42usize, safe_len(42usize)?); assert!(safe_len(1024 * 1024 * 1024).is_err()); + + Ok(()) } } diff --git a/lang/rust/avro/src/writer.rs b/lang/rust/avro/src/writer.rs index 82a5b138723..83b7d8b00a0 100644 --- a/lang/rust/avro/src/writer.rs +++ b/lang/rust/avro/src/writer.rs @@ -75,6 +75,25 @@ impl<'a, W: Write> Writer<'a, W> { w } + /// Creates a `Writer` with a specific `Codec` given a `Schema` and something implementing the + /// `io::Write` trait to write to. + /// If the `schema` is incomplete, i.e. contains `Schema::Ref`s then all dependencies must + /// be provided in `schemata`. + pub fn with_schemata( + schema: &'a Schema, + schemata: Vec<&'a Schema>, + writer: W, + codec: Codec, + ) -> Self { + let mut w = Self::builder() + .schema(schema) + .writer(writer) + .codec(codec) + .build(); + w.resolved_schema = ResolvedSchema::try_from(schemata).ok(); + w + } + /// Creates a `Writer` that will append values to already populated /// `std::io::Write` using the provided `marker` /// No compression `Codec` will be used. @@ -101,6 +120,26 @@ impl<'a, W: Write> Writer<'a, W> { w } + /// Creates a `Writer` that will append values to already populated + /// `std::io::Write` using the provided `marker` + pub fn append_to_with_codec_schemata( + schema: &'a Schema, + schemata: Vec<&'a Schema>, + writer: W, + codec: Codec, + marker: [u8; 16], + ) -> Self { + let mut w = Self::builder() + .schema(schema) + .writer(writer) + .codec(codec) + .marker(marker) + .build(); + w.has_header = true; + w.resolved_schema = ResolvedSchema::try_from(schemata).ok(); + w + } + /// Get a reference to the `Schema` associated to a `Writer`. pub fn schema(&self) -> &'a Schema { self.schema @@ -134,7 +173,7 @@ impl<'a, W: Write> Writer<'a, W> { // Lazy init for users using the builder pattern with error throwing match self.resolved_schema { Some(ref rs) => { - write_value_ref_resolved(rs, value, &mut self.buffer)?; + write_value_ref_resolved(self.schema, rs, value, &mut self.buffer)?; self.num_values += 1; if self.buffer.len() >= self.block_size { @@ -376,6 +415,22 @@ fn write_avro_datum>( Ok(()) } +fn write_avro_datum_schemata>( + schema: &Schema, + schemata: Vec<&Schema>, + value: T, + buffer: &mut Vec, +) -> AvroResult<()> { + let avro = value.into(); + let rs = ResolvedSchema::try_from(schemata)?; + let names = rs.get_names(); + let enclosing_namespace = schema.namespace(); + if let Some(_err) = avro.validate_internal(schema, names, &enclosing_namespace) { + return Err(Error::Validation); + } + encode_internal(&avro, schema, names, &enclosing_namespace, buffer) +} + /// Writer that encodes messages according to the single object encoding v1 spec /// Uses an API similar to the current File Writer /// Writes all object bytes at once, and drains internal buffer @@ -484,26 +539,21 @@ where } fn write_value_ref_resolved( + schema: &Schema, resolved_schema: &ResolvedSchema, value: &Value, buffer: &mut Vec, ) -> AvroResult<()> { - let root_schema = resolved_schema.get_root_schema(); - if let Some(err) = value.validate_internal( - root_schema, - resolved_schema.get_names(), - &root_schema.namespace(), - ) { - return Err(Error::ValidationWithReason(err)); + match value.validate_internal(schema, resolved_schema.get_names(), &schema.namespace()) { + Some(err) => Err(Error::ValidationWithReason(err)), + None => encode_internal( + value, + schema, + resolved_schema.get_names(), + &schema.namespace(), + buffer, + ), } - encode_internal( - value, - root_schema, - resolved_schema.get_names(), - &root_schema.namespace(), - buffer, - )?; - Ok(()) } fn write_value_ref_owned_resolved( @@ -541,6 +591,20 @@ pub fn to_avro_datum>(schema: &Schema, value: T) -> AvroResult>( + schema: &Schema, + schemata: Vec<&Schema>, + value: T, +) -> AvroResult> { + let mut buffer = Vec::new(); + write_avro_datum_schemata(schema, schemata, value, &mut buffer)?; + Ok(buffer) +} + #[cfg(not(target_arch = "wasm32"))] fn generate_sync_marker() -> [u8; 16] { let mut marker = [0_u8; 16]; @@ -568,13 +632,15 @@ mod tests { use crate::{ decimal::Decimal, duration::{Days, Duration, Millis, Months}, - schema::Name, + schema::{DecimalSchema, FixedSchema, Name}, types::Record, util::zig_i64, }; use pretty_assertions::assert_eq; use serde::{Deserialize, Serialize}; + use apache_avro_test_helper::TestResult; + const AVRO_OBJECT_HEADER_LEN: usize = AVRO_OBJECT_HEADER.len(); const SCHEMA: &str = r#" @@ -597,8 +663,8 @@ mod tests { const UNION_SCHEMA: &str = r#"["null", "long"]"#; #[test] - fn test_to_avro_datum() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_to_avro_datum() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut record = Record::new(&schema).unwrap(); record.put("a", 27i64); record.put("b", "foo"); @@ -606,35 +672,39 @@ mod tests { let mut expected = Vec::new(); zig_i64(27, &mut expected); zig_i64(3, &mut expected); - expected.extend(vec![b'f', b'o', b'o'].into_iter()); + expected.extend([b'f', b'o', b'o']); + + assert_eq!(to_avro_datum(&schema, record)?, expected); - assert_eq!(to_avro_datum(&schema, record).unwrap(), expected); + Ok(()) } #[test] - fn test_union_not_null() { - let schema = Schema::parse_str(UNION_SCHEMA).unwrap(); + fn test_union_not_null() -> TestResult { + let schema = Schema::parse_str(UNION_SCHEMA)?; let union = Value::Union(1, Box::new(Value::Long(3))); let mut expected = Vec::new(); zig_i64(1, &mut expected); zig_i64(3, &mut expected); - assert_eq!(to_avro_datum(&schema, union).unwrap(), expected); + assert_eq!(to_avro_datum(&schema, union)?, expected); + + Ok(()) } #[test] - fn test_union_null() { - let schema = Schema::parse_str(UNION_SCHEMA).unwrap(); + fn test_union_null() -> TestResult { + let schema = Schema::parse_str(UNION_SCHEMA)?; let union = Value::Union(0, Box::new(Value::Null)); let mut expected = Vec::new(); zig_i64(0, &mut expected); - assert_eq!(to_avro_datum(&schema, union).unwrap(), expected); - } + assert_eq!(to_avro_datum(&schema, union)?, expected); - type TestResult = Result>; + Ok(()) + } fn logical_type_test + Clone>( schema_str: &'static str, @@ -644,7 +714,7 @@ mod tests { raw_schema: &Schema, raw_value: T, - ) -> TestResult<()> { + ) -> TestResult { let schema = Schema::parse_str(schema_str)?; assert_eq!(&schema, expected_schema); // The serialized format should be the same as the schema. @@ -654,13 +724,13 @@ mod tests { // Should deserialize from the schema into the logical type. let mut r = ser.as_slice(); - let de = crate::from_avro_datum(&schema, &mut r, None).unwrap(); + let de = crate::from_avro_datum(&schema, &mut r, None)?; assert_eq!(de, value); Ok(()) } #[test] - fn date() -> TestResult<()> { + fn date() -> TestResult { logical_type_test( r#"{"type": "int", "logicalType": "date"}"#, &Schema::Date, @@ -671,7 +741,7 @@ mod tests { } #[test] - fn time_millis() -> TestResult<()> { + fn time_millis() -> TestResult { logical_type_test( r#"{"type": "int", "logicalType": "time-millis"}"#, &Schema::TimeMillis, @@ -682,7 +752,7 @@ mod tests { } #[test] - fn time_micros() -> TestResult<()> { + fn time_micros() -> TestResult { logical_type_test( r#"{"type": "long", "logicalType": "time-micros"}"#, &Schema::TimeMicros, @@ -693,7 +763,7 @@ mod tests { } #[test] - fn timestamp_millis() -> TestResult<()> { + fn timestamp_millis() -> TestResult { logical_type_test( r#"{"type": "long", "logicalType": "timestamp-millis"}"#, &Schema::TimestampMillis, @@ -704,7 +774,7 @@ mod tests { } #[test] - fn timestamp_micros() -> TestResult<()> { + fn timestamp_micros() -> TestResult { logical_type_test( r#"{"type": "long", "logicalType": "timestamp-micros"}"#, &Schema::TimestampMicros, @@ -715,23 +785,23 @@ mod tests { } #[test] - fn decimal_fixed() -> TestResult<()> { + fn decimal_fixed() -> TestResult { let size = 30; - let inner = Schema::Fixed { - name: Name::new("decimal").unwrap(), + let inner = Schema::Fixed(FixedSchema { + name: Name::new("decimal")?, aliases: None, doc: None, size, attributes: Default::default(), - }; + }); let value = vec![0u8; size]; logical_type_test( r#"{"type": {"type": "fixed", "size": 30, "name": "decimal"}, "logicalType": "decimal", "precision": 20, "scale": 5}"#, - &Schema::Decimal { + &Schema::Decimal(DecimalSchema { precision: 20, scale: 5, inner: Box::new(inner.clone()), - }, + }), Value::Decimal(Decimal::from(value.clone())), &inner, Value::Fixed(size, value), @@ -739,16 +809,16 @@ mod tests { } #[test] - fn decimal_bytes() -> TestResult<()> { + fn decimal_bytes() -> TestResult { let inner = Schema::Bytes; let value = vec![0u8; 10]; logical_type_test( r#"{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 3}"#, - &Schema::Decimal { + &Schema::Decimal(DecimalSchema { precision: 4, scale: 3, inner: Box::new(inner.clone()), - }, + }), Value::Decimal(Decimal::from(value.clone())), &inner, value, @@ -756,14 +826,14 @@ mod tests { } #[test] - fn duration() -> TestResult<()> { - let inner = Schema::Fixed { - name: Name::new("duration").unwrap(), + fn duration() -> TestResult { + let inner = Schema::Fixed(FixedSchema { + name: Name::new("duration")?, aliases: None, doc: None, size: 12, attributes: Default::default(), - }; + }); let value = Value::Duration(Duration::new( Months::new(256), Days::new(512), @@ -779,18 +849,18 @@ mod tests { } #[test] - fn test_writer_append() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_writer_append() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut writer = Writer::new(&schema, Vec::new()); let mut record = Record::new(&schema).unwrap(); record.put("a", 27i64); record.put("b", "foo"); - let n1 = writer.append(record.clone()).unwrap(); - let n2 = writer.append(record.clone()).unwrap(); - let n3 = writer.flush().unwrap(); - let result = writer.into_inner().unwrap(); + let n1 = writer.append(record.clone())?; + let n2 = writer.append(record.clone())?; + let n3 = writer.flush()?; + let result = writer.into_inner()?; assert_eq!(n1 + n2 + n3, result.len()); @@ -808,11 +878,13 @@ mod tests { &result[last_data_byte - data.len()..last_data_byte], data.as_slice() ); + + Ok(()) } #[test] - fn test_writer_extend() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_writer_extend() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut writer = Writer::new(&schema, Vec::new()); let mut record = Record::new(&schema).unwrap(); @@ -821,9 +893,9 @@ mod tests { let record_copy = record.clone(); let records = vec![record, record_copy]; - let n1 = writer.extend(records.into_iter()).unwrap(); - let n2 = writer.flush().unwrap(); - let result = writer.into_inner().unwrap(); + let n1 = writer.extend(records.into_iter())?; + let n2 = writer.flush()?; + let result = writer.into_inner()?; assert_eq!(n1 + n2, result.len()); @@ -841,6 +913,8 @@ mod tests { &result[last_data_byte - data.len()..last_data_byte], data.as_slice() ); + + Ok(()) } #[derive(Debug, Clone, Deserialize, Serialize)] @@ -850,8 +924,8 @@ mod tests { } #[test] - fn test_writer_append_ser() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_writer_append_ser() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut writer = Writer::new(&schema, Vec::new()); let record = TestSerdeSerialize { @@ -859,9 +933,9 @@ mod tests { b: "foo".to_owned(), }; - let n1 = writer.append_ser(record).unwrap(); - let n2 = writer.flush().unwrap(); - let result = writer.into_inner().unwrap(); + let n1 = writer.append_ser(record)?; + let n2 = writer.flush()?; + let result = writer.into_inner()?; assert_eq!(n1 + n2, result.len()); @@ -878,11 +952,13 @@ mod tests { &result[last_data_byte - data.len()..last_data_byte], data.as_slice() ); + + Ok(()) } #[test] - fn test_writer_extend_ser() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_writer_extend_ser() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut writer = Writer::new(&schema, Vec::new()); let record = TestSerdeSerialize { @@ -892,9 +968,9 @@ mod tests { let record_copy = record.clone(); let records = vec![record, record_copy]; - let n1 = writer.extend_ser(records.into_iter()).unwrap(); - let n2 = writer.flush().unwrap(); - let result = writer.into_inner().unwrap(); + let n1 = writer.extend_ser(records.into_iter())?; + let n2 = writer.flush()?; + let result = writer.into_inner()?; assert_eq!(n1 + n2, result.len()); @@ -912,6 +988,8 @@ mod tests { &result[last_data_byte - data.len()..last_data_byte], data.as_slice() ); + + Ok(()) } fn make_writer_with_codec(schema: &Schema) -> Writer<'_, Vec> { @@ -927,15 +1005,15 @@ mod tests { .build() } - fn check_writer(mut writer: Writer<'_, Vec>, schema: &Schema) { + fn check_writer(mut writer: Writer<'_, Vec>, schema: &Schema) -> TestResult { let mut record = Record::new(schema).unwrap(); record.put("a", 27i64); record.put("b", "foo"); - let n1 = writer.append(record.clone()).unwrap(); - let n2 = writer.append(record.clone()).unwrap(); - let n3 = writer.flush().unwrap(); - let result = writer.into_inner().unwrap(); + let n1 = writer.append(record.clone())?; + let n2 = writer.append(record.clone())?; + let n3 = writer.flush()?; + let result = writer.into_inner()?; assert_eq!(n1 + n2 + n3, result.len()); @@ -944,7 +1022,7 @@ mod tests { zig_i64(3, &mut data); data.extend(b"foo"); data.extend(data.clone()); - Codec::Deflate.compress(&mut data).unwrap(); + Codec::Deflate.compress(&mut data)?; // starts with magic assert_eq!(&result[..AVRO_OBJECT_HEADER_LEN], AVRO_OBJECT_HEADER); @@ -954,24 +1032,26 @@ mod tests { &result[last_data_byte - data.len()..last_data_byte], data.as_slice() ); + + Ok(()) } #[test] - fn test_writer_with_codec() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_writer_with_codec() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let writer = make_writer_with_codec(&schema); - check_writer(writer, &schema); + check_writer(writer, &schema) } #[test] - fn test_writer_with_builder() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_writer_with_builder() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let writer = make_writer_with_builder(&schema); - check_writer(writer, &schema); + check_writer(writer, &schema) } #[test] - fn test_logical_writer() { + fn test_logical_writer() -> TestResult { const LOGICAL_TYPE_SCHEMA: &str = r#" { "type": "record", @@ -991,7 +1071,7 @@ mod tests { } "#; let codec = Codec::Deflate; - let schema = Schema::parse_str(LOGICAL_TYPE_SCHEMA).unwrap(); + let schema = Schema::parse_str(LOGICAL_TYPE_SCHEMA)?; let mut writer = Writer::builder() .schema(&schema) .codec(codec) @@ -1007,10 +1087,10 @@ mod tests { let mut record2 = Record::new(&schema).unwrap(); record2.put("a", Value::Union(0, Box::new(Value::Null))); - let n1 = writer.append(record1).unwrap(); - let n2 = writer.append(record2).unwrap(); - let n3 = writer.flush().unwrap(); - let result = writer.into_inner().unwrap(); + let n1 = writer.append(record1)?; + let n2 = writer.append(record2)?; + let n3 = writer.flush()?; + let result = writer.into_inner()?; assert_eq!(n1 + n2 + n3, result.len()); @@ -1021,7 +1101,7 @@ mod tests { // byte indicating null zig_i64(0, &mut data); - codec.compress(&mut data).unwrap(); + codec.compress(&mut data)?; // starts with magic assert_eq!(&result[..AVRO_OBJECT_HEADER_LEN], AVRO_OBJECT_HEADER); @@ -1031,81 +1111,77 @@ mod tests { &result[last_data_byte - data.len()..last_data_byte], data.as_slice() ); + + Ok(()) } #[test] - fn test_avro_3405_writer_add_metadata_success() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_avro_3405_writer_add_metadata_success() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut writer = Writer::new(&schema, Vec::new()); - writer - .add_user_metadata("stringKey".to_string(), String::from("stringValue")) - .unwrap(); - writer - .add_user_metadata("strKey".to_string(), "strValue") - .unwrap(); - writer - .add_user_metadata("bytesKey".to_string(), b"bytesValue") - .unwrap(); - writer - .add_user_metadata("vecKey".to_string(), vec![1, 2, 3]) - .unwrap(); + writer.add_user_metadata("stringKey".to_string(), String::from("stringValue"))?; + writer.add_user_metadata("strKey".to_string(), "strValue")?; + writer.add_user_metadata("bytesKey".to_string(), b"bytesValue")?; + writer.add_user_metadata("vecKey".to_string(), vec![1, 2, 3])?; let mut record = Record::new(&schema).unwrap(); record.put("a", 27i64); record.put("b", "foo"); - writer.append(record.clone()).unwrap(); - writer.append(record.clone()).unwrap(); - writer.flush().unwrap(); - let result = writer.into_inner().unwrap(); + writer.append(record.clone())?; + writer.append(record.clone())?; + writer.flush()?; + let result = writer.into_inner()?; assert_eq!(result.len(), 260); + + Ok(()) } #[test] - fn test_avro_3405_writer_add_metadata_failure() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_avro_3405_writer_add_metadata_failure() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut writer = Writer::new(&schema, Vec::new()); let mut record = Record::new(&schema).unwrap(); record.put("a", 27i64); record.put("b", "foo"); - writer.append(record.clone()).unwrap(); + writer.append(record.clone())?; match writer.add_user_metadata("stringKey".to_string(), String::from("value2")) { Err(e @ Error::FileHeaderAlreadyWritten) => { assert_eq!(e.to_string(), "The file metadata is already flushed.") } - Err(e) => panic!( - "Unexpected error occurred while writing user metadata: {:?}", - e - ), + Err(e) => panic!("Unexpected error occurred while writing user metadata: {e:?}"), Ok(_) => panic!("Expected an error that metadata cannot be added after adding data"), } + + Ok(()) } #[test] - fn test_avro_3405_writer_add_metadata_reserved_prefix_failure() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_avro_3405_writer_add_metadata_reserved_prefix_failure() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut writer = Writer::new(&schema, Vec::new()); let key = "avro.stringKey".to_string(); match writer.add_user_metadata(key.clone(), "value") { Err(ref e @ Error::InvalidMetadataKey(_)) => { - assert_eq!(e.to_string(), format!("Metadata keys starting with 'avro.' are reserved for internal usage: {}.", key)) + assert_eq!(e.to_string(), format!("Metadata keys starting with 'avro.' are reserved for internal usage: {key}.")) } Err(e) => panic!( - "Unexpected error occurred while writing user metadata with reserved prefix ('avro.'): {:?}", - e + "Unexpected error occurred while writing user metadata with reserved prefix ('avro.'): {e:?}" ), Ok(_) => panic!("Expected an error that the metadata key cannot be prefixed with 'avro.'"), } + + Ok(()) } #[test] - fn test_avro_3405_writer_add_metadata_with_builder_api_success() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_avro_3405_writer_add_metadata_with_builder_api_success() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut user_meta_data: HashMap = HashMap::new(); user_meta_data.insert( @@ -1122,6 +1198,8 @@ mod tests { .build(); assert_eq!(writer.user_metadata, user_meta_data); + + Ok(()) } #[derive(Serialize, Clone)] @@ -1174,7 +1252,7 @@ mod tests { } #[test] - fn test_single_object_writer() { + fn test_single_object_writer() -> TestResult { let mut buf: Vec = Vec::new(); let obj = TestSingleObjectWriter { a: 300, @@ -1208,11 +1286,13 @@ mod tests { &mut msg_binary, ) .expect("encode should have failed by here as a dependency of any writing"); - assert_eq!(&buf[10..], &msg_binary[..]) + assert_eq!(&buf[10..], &msg_binary[..]); + + Ok(()) } #[test] - fn test_writer_parity() { + fn test_writer_parity() -> TestResult { let obj1 = TestSingleObjectWriter { a: 300, b: 34.555, @@ -1242,5 +1322,7 @@ mod tests { .expect("Serialization expected"); assert_eq!(buf1, buf2); assert_eq!(buf1, buf3); + + Ok(()) } } diff --git a/lang/rust/avro/tests/append_to_existing.rs b/lang/rust/avro/tests/append_to_existing.rs index 4f92f45e9ce..2ea59d95c56 100644 --- a/lang/rust/avro/tests/append_to_existing.rs +++ b/lang/rust/avro/tests/append_to_existing.rs @@ -20,9 +20,10 @@ use apache_avro::{ types::{Record, Value}, AvroResult, Reader, Schema, Writer, }; +use apache_avro_test_helper::TestResult; #[test] -fn avro_3630_append_to_an_existing_file() { +fn avro_3630_append_to_an_existing_file() -> TestResult { let schema_str = r#" { "type": "record", @@ -53,6 +54,8 @@ fn avro_3630_append_to_an_existing_file() { check(value, i); i += 1 } + + Ok(()) } /// Simulates reading from a pre-existing .avro file and returns its bytes @@ -79,8 +82,8 @@ fn check(value: AvroResult, expected: i32) { (_, Value::Int(actual)) => assert_eq!(&expected, actual), _ => panic!("The field value type must be an Int: {:?}!", &fields[0]), }, - _ => panic!("The value type must be a Record: {:?}!", value), + _ => panic!("The value type must be a Record: {value:?}!"), }, - Err(e) => panic!("Error while reading the data: {:?}", e), + Err(e) => panic!("Error while reading the data: {e:?}"), } } diff --git a/lang/rust/avro/tests/avro-3786.rs b/lang/rust/avro/tests/avro-3786.rs new file mode 100644 index 00000000000..d27e0c4e53f --- /dev/null +++ b/lang/rust/avro/tests/avro-3786.rs @@ -0,0 +1,886 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use apache_avro::{from_avro_datum, to_avro_datum, to_value, types, Schema}; +use apache_avro_test_helper::TestResult; + +#[test] +fn avro_3786_deserialize_union_with_different_enum_order() -> TestResult { + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct BarUseParent { + #[serde(rename = "barUse")] + pub bar_use: Bar, + } + + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barInit")] + pub bar_init: Bar, + #[serde(rename = "barUseParent")] + pub bar_use_parent: Option, + } + + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ], + "default": "bar0" + } + }, + { + "name": "barUseParent", + "type": [ + "null", + { + "type": "record", + "name": "BarUseParent", + "fields": [ + { + "name": "barUse", + "type": "Bar" + } + ] + } + ] + } + ] + }"#; + + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar1" + ], + "default": "bar1" + } + }, + { + "name": "barUseParent", + "type": [ + "null", + { + "type": "record", + "name": "BarUseParent", + "fields": [ + { + "name": "barUse", + "type": "Bar" + } + ] + } + ] + } + ] + }"#; + + let writer_schema = Schema::parse_str(writer_schema)?; + let foo1 = Foo { + bar_init: Bar::Bar1, + bar_use_parent: Some(BarUseParent { bar_use: Bar::Bar1 }), + }; + let avro_value = crate::to_value(foo1)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 2); + assert_eq!(fields[0].0, "barInit"); + assert_eq!(fields[0].1, types::Value::Enum(0, "bar1".to_string())); + assert_eq!(fields[1].0, "barUseParent"); + assert_eq!( + fields[1].1, + types::Value::Union( + 1, + Box::new(types::Value::Record(vec![( + "barUse".to_string(), + types::Value::Enum(0, "bar1".to_string()) + )])) + ) + ); + } + _ => panic!("Expected Value::Record"), + } + Ok(()) +} + +#[test] +fn avro_3786_deserialize_union_with_different_enum_order_defined_in_record() -> TestResult { + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct BarParent { + pub bar: Bar, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barParent")] + pub bar_parent: Option, + } + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "name": "bar", + "type": { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ], + "default": "bar0" + } + } + ] + } + ] + } + ] + }"#; + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "name": "bar", + "type": { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar2" + ], + "default": "bar0" + } + } + ] + } + ] + } + ] + }"#; + let writer_schema = Schema::parse_str(writer_schema)?; + let foo1 = Foo { + bar_parent: Some(BarParent { bar: Bar::Bar0 }), + }; + let avro_value = crate::to_value(foo1)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 1); + assert_eq!(fields[0].0, "barParent"); + // TODO: better validation + } + _ => panic!("Expected Value::Record"), + } + Ok(()) +} + +#[test] +fn test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_v1() -> TestResult { + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct BarParent { + pub bar: Bar, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barParent")] + pub bar_parent: Option, + } + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "name": "bar", + "type": { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ], + "default": "bar0" + } + } + ] + } + ] + } + ] + }"#; + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "name": "bar", + "type": { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar2" + ], + "default": "bar0" + } + } + ] + } + ] + } + ] + }"#; + let writer_schema = Schema::parse_str(writer_schema)?; + let foo1 = Foo { + bar_parent: Some(BarParent { bar: Bar::Bar1 }), + }; + let avro_value = crate::to_value(foo1)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 1); + assert_eq!(fields[0].0, "barParent"); + // TODO: better validation + } + _ => panic!("Expected Value::Record"), + } + Ok(()) +} + +#[test] +fn test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_v2() -> TestResult { + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct BarParent { + pub bar: Bar, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barParent")] + pub bar_parent: Option, + } + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "name": "bar", + "type": { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ], + "default": "bar2" + } + } + ] + } + ] + } + ] + }"#; + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "name": "bar", + "type": { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar1", + "bar2" + ], + "default": "bar2" + } + } + ] + } + ] + } + ] + }"#; + let writer_schema = Schema::parse_str(writer_schema)?; + let foo1 = Foo { + bar_parent: Some(BarParent { bar: Bar::Bar1 }), + }; + let avro_value = crate::to_value(foo1)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 1); + assert_eq!(fields[0].0, "barParent"); + // TODO: better validation + } + _ => panic!("Expected Value::Record"), + } + Ok(()) +} + +#[test] +fn deserialize_union_with_different_enum_order_defined_in_record() -> TestResult { + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct BarParent { + pub bar: Bar, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barParent")] + pub bar_parent: Option, + } + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "name": "bar", + "type": { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ], + "default": "bar0" + } + } + ] + } + ] + } + ] + }"#; + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "name": "bar", + "type": { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar2" + ], + "default": "bar0" + } + } + ] + } + ] + } + ] + }"#; + let writer_schema = Schema::parse_str(writer_schema)?; + let foo1 = Foo { + bar_parent: Some(BarParent { bar: Bar::Bar2 }), + }; + let avro_value = crate::to_value(foo1)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 1); + assert_eq!(fields[0].0, "barParent"); + // TODO: better validation + } + _ => panic!("Expected Value::Record"), + } + Ok(()) +} + +#[test] +fn deserialize_union_with_record_with_enum_defined_inline_reader_has_different_indices( +) -> TestResult { + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum DefinedInRecord { + #[serde(rename = "val0")] + Val0, + #[serde(rename = "val1")] + Val1, + #[serde(rename = "val2")] + Val2, + #[serde(rename = "UNKNOWN")] + Unknown, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Parent { + pub date: i64, + #[serde(rename = "barUse")] + pub bar_use: Bar, + #[serde(rename = "bazUse")] + pub baz_use: Option>, + #[serde(rename = "definedInRecord")] + pub defined_in_record: DefinedInRecord, + #[serde(rename = "optionalString")] + pub optional_string: Option, + } + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Baz { + #[serde(rename = "baz0")] + Baz0, + #[serde(rename = "baz1")] + Baz1, + #[serde(rename = "baz2")] + Baz2, + } + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barInit")] + pub bar_init: Bar, + pub baz: Baz, + pub parent: Option, + } + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "fake", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ], + "default": "bar0" + } + }, + { + "name": "baz", + "type": + { + "type": "enum", + "name": "Baz", + "symbols": + [ + "baz0", + "baz1", + "baz2" + ], + "default": "baz0" + } + }, + { + "name": "parent", + "type": [ + "null", + { + "type": "record", + "name": "Parent", + "fields": [ + { + "name": "date", + "type": { + "type": "long", + "avro.java.long": "Long" + } + }, + { + "name": "barUse", + "type": "Bar" + }, + { + "name": "bazUse", + "type": [ + "null", + { + "type": "array", + "items": { + "type": "Baz" + } + } + ] + }, + { + "name": "definedInRecord", + "type": { + "name": "DefinedInRecord", + "type": "enum", + "symbols": [ + "val0", + "val1", + "val2", + "UNKNOWN" + ], + "default": "UNKNOWN" + } + }, + { + "name": "optionalString", + "type": [ + "null", + "string" + ] + } + ] + } + ] + } + ] + }"#; + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "fake", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar2" + ], + "default": "bar0" + } + }, + { + "name": "baz", + "type": + { + "type": "enum", + "name": "Baz", + "symbols": + [ + "baz0", + "baz2" + ], + "default": "baz0" + } + }, + { + "name": "parent", + "type": [ + "null", + { + "type": "record", + "name": "Parent", + "fields": [ + { + "name": "date", + "type": { + "type": "long", + "avro.java.long": "Long" + } + }, + { + "name": "barUse", + "type": "Bar" + }, + { + "name": "bazUse", + "type": [ + "null", + { + "type": "array", + "items": { + "type": "Baz" + } + } + ] + }, + { + "name": "definedInRecord", + "type": { + "name": "DefinedInRecord", + "type": "enum", + "symbols": [ + "val1", + "val2", + "UNKNOWN" + ], + "default": "UNKNOWN" + } + }, + { + "name": "optionalString", + "type": [ + "null", + "string" + ] + } + ] + } + ] + } + ] + }"#; + let writer_schema = Schema::parse_str(writer_schema)?; + let foo1 = Foo { + bar_init: Bar::Bar0, + baz: Baz::Baz0, + parent: Some(Parent { + bar_use: Bar::Bar0, + baz_use: Some(vec![Baz::Baz0]), + optional_string: Some("test".to_string()), + date: 1689197893, + defined_in_record: DefinedInRecord::Val1, + }), + }; + let avro_value = crate::to_value(foo1)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 3); + assert_eq!(fields[0].0, "barInit"); + assert_eq!(fields[0].1, types::Value::Enum(0, "bar0".to_string())); + // TODO: better validation + } + _ => panic!("Expected Value::Record"), + } + Ok(()) +} diff --git a/lang/rust/avro/tests/avro-3787.rs b/lang/rust/avro/tests/avro-3787.rs new file mode 100644 index 00000000000..c08c3c6cce8 --- /dev/null +++ b/lang/rust/avro/tests/avro-3787.rs @@ -0,0 +1,279 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use apache_avro::{from_avro_datum, to_avro_datum, to_value, types, Schema}; +use apache_avro_test_helper::TestResult; + +#[test] +fn avro_3787_deserialize_union_with_unknown_symbol() -> TestResult { + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct BarUseParent { + #[serde(rename = "barUse")] + pub bar_use: Bar, + } + + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barInit")] + pub bar_init: Bar, + #[serde(rename = "barUseParent")] + pub bar_use_parent: Option, + } + + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ], + "default": "bar0" + } + }, + { + "name": "barUseParent", + "type": [ + "null", + { + "type": "record", + "name": "BarUseParent", + "fields": [ + { + "name": "barUse", + "type": "Bar" + } + ] + } + ] + } + ] + }"#; + + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1" + ], + "default": "bar0" + } + }, + { + "name": "barUseParent", + "type": [ + "null", + { + "type": "record", + "name": "BarUseParent", + "fields": [ + { + "name": "barUse", + "type": "Bar" + } + ] + } + ] + } + ] + }"#; + + let writer_schema = Schema::parse_str(writer_schema)?; + let foo1 = Foo { + bar_init: Bar::Bar1, + bar_use_parent: Some(BarUseParent { bar_use: Bar::Bar2 }), + }; + let avro_value = to_value(foo1)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 2); + assert_eq!(fields[0].0, "barInit"); + assert_eq!(fields[0].1, types::Value::Enum(1, "bar1".to_string())); + assert_eq!(fields[1].0, "barUseParent"); + // TODO: test value + } + _ => panic!("Expected Value::Record"), + } + + Ok(()) +} + +#[test] +fn avro_3787_deserialize_union_with_unknown_symbol_no_ref() -> TestResult { + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + #[serde(default)] + pub struct BarParent { + #[serde(rename = "Bar")] + pub bar: Bar, + } + + #[inline(always)] + fn default_barparent_bar() -> Bar { + Bar::Bar0 + } + impl Default for BarParent { + fn default() -> BarParent { + BarParent { + bar: default_barparent_bar(), + } + } + } + + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barParent")] + pub bar_parent: Option, + } + + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ], + "default": "bar0" + } + ] + } + ] + } + ] + }"#; + + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1" + ], + "default": "bar0" + } + ] + } + ] + } + ] + }"#; + + let writer_schema = Schema::parse_str(writer_schema)?; + let foo2 = Foo { + bar_parent: Some(BarParent { bar: Bar::Bar2 }), + }; + let avro_value = to_value(foo2)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 1); + // assert_eq!(fields[0].0, "barInit"); + // assert_eq!(fields[0].1, types::Value::Enum(0, "bar0".to_string())); + assert_eq!(fields[0].0, "barParent"); + // assert_eq!(fields[1].1, types::Value::Enum(1, "bar1".to_string())); + } + _ => panic!("Expected Value::Record"), + } + + Ok(()) +} diff --git a/lang/rust/avro/tests/io.rs b/lang/rust/avro/tests/io.rs index fc316b0609b..ab3712893bc 100644 --- a/lang/rust/avro/tests/io.rs +++ b/lang/rust/avro/tests/io.rs @@ -17,6 +17,7 @@ //! Port of https://github.com/apache/avro/blob/release-1.9.1/lang/py/test/test_io.py use apache_avro::{from_avro_datum, to_avro_datum, types::Value, Error, Schema}; +use apache_avro_test_helper::TestResult; use lazy_static::lazy_static; use pretty_assertions::assert_eq; use std::io::Cursor; @@ -99,49 +100,55 @@ lazy_static! { } #[test] -fn test_validate() { +fn test_validate() -> TestResult { for (raw_schema, value) in SCHEMAS_TO_VALIDATE.iter() { - let schema = Schema::parse_str(raw_schema).unwrap(); + let schema = Schema::parse_str(raw_schema)?; assert!( value.validate(&schema), - "value {:?} does not validate schema: {}", - value, - raw_schema + "value {value:?} does not validate schema: {raw_schema}" ); } + + Ok(()) } #[test] -fn test_round_trip() { +fn test_round_trip() -> TestResult { for (raw_schema, value) in SCHEMAS_TO_VALIDATE.iter() { - let schema = Schema::parse_str(raw_schema).unwrap(); + let schema = Schema::parse_str(raw_schema)?; let encoded = to_avro_datum(&schema, value.clone()).unwrap(); let decoded = from_avro_datum(&schema, &mut Cursor::new(encoded), None).unwrap(); assert_eq!(value, &decoded); } + + Ok(()) } #[test] -fn test_binary_int_encoding() { +fn test_binary_int_encoding() -> TestResult { for (number, hex_encoding) in BINARY_ENCODINGS.iter() { - let encoded = to_avro_datum(&Schema::Int, Value::Int(*number as i32)).unwrap(); + let encoded = to_avro_datum(&Schema::Int, Value::Int(*number as i32))?; assert_eq!(&encoded, hex_encoding); } + + Ok(()) } #[test] -fn test_binary_long_encoding() { +fn test_binary_long_encoding() -> TestResult { for (number, hex_encoding) in BINARY_ENCODINGS.iter() { - let encoded = to_avro_datum(&Schema::Long, Value::Long(*number)).unwrap(); + let encoded = to_avro_datum(&Schema::Long, Value::Long(*number))?; assert_eq!(&encoded, hex_encoding); } + + Ok(()) } #[test] -fn test_schema_promotion() { +fn test_schema_promotion() -> TestResult { // Each schema is present in order of promotion (int -> long, long -> float, float -> double) // Each value represents the expected decoded value when promoting a value previously encoded with a promotable schema - let promotable_schemas = vec![r#""int""#, r#""long""#, r#""float""#, r#""double""#]; + let promotable_schemas = [r#""int""#, r#""long""#, r#""float""#, r#""double""#]; let promotable_values = vec![ Value::Int(219), Value::Long(219), @@ -149,77 +156,75 @@ fn test_schema_promotion() { Value::Double(219.0), ]; for (i, writer_raw_schema) in promotable_schemas.iter().enumerate() { - let writer_schema = Schema::parse_str(writer_raw_schema).unwrap(); + let writer_schema = Schema::parse_str(writer_raw_schema)?; let original_value = &promotable_values[i]; for (j, reader_raw_schema) in promotable_schemas.iter().enumerate().skip(i + 1) { - let reader_schema = Schema::parse_str(reader_raw_schema).unwrap(); - let encoded = to_avro_datum(&writer_schema, original_value.clone()).unwrap(); + let reader_schema = Schema::parse_str(reader_raw_schema)?; + let encoded = to_avro_datum(&writer_schema, original_value.clone())?; let decoded = from_avro_datum( &writer_schema, &mut Cursor::new(encoded), Some(&reader_schema), ) .unwrap_or_else(|_| { - panic!( - "failed to decode {:?} with schema: {:?}", - original_value, reader_raw_schema, - ) + panic!("failed to decode {original_value:?} with schema: {reader_raw_schema:?}",) }); assert_eq!(decoded, promotable_values[j]); } } + + Ok(()) } #[test] -fn test_unknown_symbol() { +fn test_unknown_symbol() -> TestResult { let writer_schema = - Schema::parse_str(r#"{"type": "enum", "name": "Test", "symbols": ["FOO", "BAR"]}"#) - .unwrap(); + Schema::parse_str(r#"{"type": "enum", "name": "Test", "symbols": ["FOO", "BAR"]}"#)?; let reader_schema = - Schema::parse_str(r#"{"type": "enum", "name": "Test", "symbols": ["BAR", "BAZ"]}"#) - .unwrap(); + Schema::parse_str(r#"{"type": "enum", "name": "Test", "symbols": ["BAR", "BAZ"]}"#)?; let original_value = Value::Enum(0, "FOO".to_string()); - let encoded = to_avro_datum(&writer_schema, original_value).unwrap(); + let encoded = to_avro_datum(&writer_schema, original_value)?; let decoded = from_avro_datum( &writer_schema, &mut Cursor::new(encoded), Some(&reader_schema), ); assert!(decoded.is_err()); + + Ok(()) } #[test] -fn test_default_value() { +fn test_default_value() -> TestResult { for (field_type, default_json, default_datum) in DEFAULT_VALUE_EXAMPLES.iter() { let reader_schema = Schema::parse_str(&format!( r#"{{ "type": "record", "name": "Test", "fields": [ - {{"name": "H", "type": {}, "default": {}}} + {{"name": "H", "type": {field_type}, "default": {default_json}}} ] - }}"#, - field_type, default_json - )) - .unwrap(); + }}"# + ))?; let datum_to_read = Value::Record(vec![("H".to_string(), default_datum.clone())]); - let encoded = to_avro_datum(&LONG_RECORD_SCHEMA, LONG_RECORD_DATUM.clone()).unwrap(); + let encoded = to_avro_datum(&LONG_RECORD_SCHEMA, LONG_RECORD_DATUM.clone())?; let datum_read = from_avro_datum( &LONG_RECORD_SCHEMA, &mut Cursor::new(encoded), Some(&reader_schema), - ) - .unwrap(); + )?; assert_eq!( datum_read, datum_to_read, "{} -> {}", *field_type, *default_json ); } + + Ok(()) } #[test] -fn test_no_default_value() { +fn test_no_default_value() -> TestResult { let reader_schema = Schema::parse_str( r#"{ "type": "record", @@ -228,19 +233,20 @@ fn test_no_default_value() { {"name": "H", "type": "int"} ] }"#, - ) - .unwrap(); - let encoded = to_avro_datum(&LONG_RECORD_SCHEMA, LONG_RECORD_DATUM.clone()).unwrap(); + )?; + let encoded = to_avro_datum(&LONG_RECORD_SCHEMA, LONG_RECORD_DATUM.clone())?; let result = from_avro_datum( &LONG_RECORD_SCHEMA, &mut Cursor::new(encoded), Some(&reader_schema), ); assert!(result.is_err()); + + Ok(()) } #[test] -fn test_projection() { +fn test_projection() -> TestResult { let reader_schema = Schema::parse_str( r#" { @@ -252,24 +258,24 @@ fn test_projection() { ] } "#, - ) - .unwrap(); + )?; let datum_to_read = Value::Record(vec![ ("E".to_string(), Value::Int(5)), ("F".to_string(), Value::Int(6)), ]); - let encoded = to_avro_datum(&LONG_RECORD_SCHEMA, LONG_RECORD_DATUM.clone()).unwrap(); + let encoded = to_avro_datum(&LONG_RECORD_SCHEMA, LONG_RECORD_DATUM.clone())?; let datum_read = from_avro_datum( &LONG_RECORD_SCHEMA, &mut Cursor::new(encoded), Some(&reader_schema), - ) - .unwrap(); + )?; assert_eq!(datum_to_read, datum_read); + + Ok(()) } #[test] -fn test_field_order() { +fn test_field_order() -> TestResult { let reader_schema = Schema::parse_str( r#" { @@ -281,20 +287,20 @@ fn test_field_order() { ] } "#, - ) - .unwrap(); + )?; let datum_to_read = Value::Record(vec![ ("F".to_string(), Value::Int(6)), ("E".to_string(), Value::Int(5)), ]); - let encoded = to_avro_datum(&LONG_RECORD_SCHEMA, LONG_RECORD_DATUM.clone()).unwrap(); + let encoded = to_avro_datum(&LONG_RECORD_SCHEMA, LONG_RECORD_DATUM.clone())?; let datum_read = from_avro_datum( &LONG_RECORD_SCHEMA, &mut Cursor::new(encoded), Some(&reader_schema), - ) - .unwrap(); + )?; assert_eq!(datum_to_read, datum_read); + + Ok(()) } #[test] @@ -320,6 +326,6 @@ fn test_type_exception() -> Result<(), String> { match encoded { Ok(_) => Err(String::from("Expected ValidationError, got Ok")), Err(Error::Validation) => Ok(()), - Err(ref e) => Err(format!("Expected ValidationError, got {:?}", e)), + Err(ref e) => Err(format!("Expected ValidationError, got {e:?}")), } } diff --git a/lang/rust/avro/tests/schema.rs b/lang/rust/avro/tests/schema.rs index 0fc18921fba..63b73056084 100644 --- a/lang/rust/avro/tests/schema.rs +++ b/lang/rust/avro/tests/schema.rs @@ -15,13 +15,19 @@ // specific language governing permissions and limitations // under the License. +use std::{ + collections::HashMap, + io::{Cursor, Read}, +}; + use apache_avro::{ - schema::{Name, RecordField}, + from_avro_datum, from_value, + schema::{EnumSchema, FixedSchema, Name, RecordField, RecordSchema}, to_avro_datum, to_value, types::{Record, Value}, Codec, Error, Reader, Schema, Writer, }; -use apache_avro_test_helper::init; +use apache_avro_test_helper::{init, TestResult}; use lazy_static::lazy_static; const PRIMITIVE_EXAMPLES: &[(&str, bool)] = &[ @@ -148,19 +154,19 @@ const UNION_EXAMPLES: &[(&str, bool)] = &[ ), ( r#"{"name": "foo", "type": ["string", "long"], "default": 1}"#, - false, + true, ), ( r#"{"name": "foo", "type": ["string", "null"], "default": null}"#, - false, + true, ), ( r#"{"name": "foo", "type": ["null", "string"], "default": "null"}"#, - false, + true, ), ( r#"{"name": "foo", "type": ["long", "string"], "default": "str"}"#, - false, + true, ), ]; @@ -586,6 +592,42 @@ const TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[ ), ]; +const LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[ + ( + r#"{"type": "long", "logicalType": "local-timestamp-millis"}"#, + true, + ), + // this is valid even though its logical type is "local-timestamp-milis" (missing the second "l"), because + // unknown logical types are ignored + ( + r#"{"type": "long", "logicalType": "local-timestamp-milis"}"#, + true, + ), + ( + // this is still valid because unknown logicalType should be ignored + r#"{"type": "int", "logicalType": "local-timestamp-millis"}"#, + true, + ), +]; + +const LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[ + ( + r#"{"type": "long", "logicalType": "local-timestamp-micros"}"#, + true, + ), + // this is valid even though its logical type is "local-timestamp-micro" (missing the last "s"), because + // unknown logical types are ignored + ( + r#"{"type": "long", "logicalType": "local-timestamp-micro"}"#, + true, + ), + ( + // this is still valid because unknown logicalType should be ignored + r#"{"type": "int", "logicalType": "local-timestamp-micros"}"#, + true, + ), +]; + lazy_static! { static ref EXAMPLES: Vec<(&'static str, bool)> = Vec::new() .iter() @@ -606,13 +648,15 @@ lazy_static! { .chain(TIMEMICROS_LOGICAL_TYPE.iter().copied()) .chain(TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied()) .chain(TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied()) + .chain(LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied()) + .chain(LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied()) .collect(); static ref VALID_EXAMPLES: Vec<(&'static str, bool)> = EXAMPLES.iter().copied().filter(|s| s.1).collect(); } #[test] -fn test_correct_recursive_extraction() { +fn test_correct_recursive_extraction() -> TestResult { init(); let raw_outer_schema = r#"{ "type": "record", @@ -633,83 +677,139 @@ fn test_correct_recursive_extraction() { } ] }"#; - let outer_schema = Schema::parse_str(raw_outer_schema).unwrap(); - if let Schema::Record { + let outer_schema = Schema::parse_str(raw_outer_schema)?; + if let Schema::Record(RecordSchema { fields: outer_fields, .. - } = outer_schema + }) = outer_schema { let inner_schema = &outer_fields[0].schema; - if let Schema::Record { + if let Schema::Record(RecordSchema { fields: inner_fields, .. - } = inner_schema + }) = inner_schema { - if let Schema::Record { + if let Schema::Record(RecordSchema { name: recursive_type, .. - } = &inner_fields[0].schema + }) = &inner_fields[0].schema { assert_eq!("X", recursive_type.name.as_str()); } } else { - panic!("inner schema {:?} should have been a record", inner_schema) + panic!("inner schema {inner_schema:?} should have been a record") } } else { - panic!("outer schema {:?} should have been a record", outer_schema) + panic!("outer schema {outer_schema:?} should have been a record") } + + Ok(()) } #[test] -fn test_parse() { +fn test_parse() -> TestResult { init(); for (raw_schema, valid) in EXAMPLES.iter() { let schema = Schema::parse_str(raw_schema); if *valid { assert!( schema.is_ok(), - "schema {} was supposed to be valid; error: {:?}", - raw_schema, - schema, + "schema {raw_schema} was supposed to be valid; error: {schema:?}", + ) + } else { + assert!( + schema.is_err(), + "schema {raw_schema} was supposed to be invalid" + ) + } + } + Ok(()) +} + +#[test] +fn test_3799_parse_reader() -> TestResult { + init(); + for (raw_schema, valid) in EXAMPLES.iter() { + let schema = Schema::parse_reader(&mut Cursor::new(raw_schema)); + if *valid { + assert!( + schema.is_ok(), + "schema {raw_schema} was supposed to be valid; error: {schema:?}", + ) + } else { + assert!( + schema.is_err(), + "schema {raw_schema} was supposed to be invalid" + ) + } + } + + // Ensure it works for trait objects too. + for (raw_schema, valid) in EXAMPLES.iter() { + let reader: &mut dyn Read = &mut Cursor::new(raw_schema); + let schema = Schema::parse_reader(reader); + if *valid { + assert!( + schema.is_ok(), + "schema {raw_schema} was supposed to be valid; error: {schema:?}", ) } else { assert!( schema.is_err(), - "schema {} was supposed to be invalid", - raw_schema + "schema {raw_schema} was supposed to be invalid" ) } } + Ok(()) +} + +#[test] +fn test_3799_raise_io_error_from_parse_read() -> Result<(), String> { + // 0xDF is invalid for UTF-8. + let mut invalid_data = Cursor::new([0xDF]); + + let error = Schema::parse_reader(&mut invalid_data).unwrap_err(); + + if let Error::ReadSchemaFromReader(e) = error { + assert!( + e.to_string().contains("stream did not contain valid UTF-8"), + "{e}" + ); + Ok(()) + } else { + Err(format!("Expected std::io::Error, got {error:?}")) + } } #[test] /// Test that the string generated by an Avro Schema object is, in fact, a valid Avro schema. -fn test_valid_cast_to_string_after_parse() { +fn test_valid_cast_to_string_after_parse() -> TestResult { init(); for (raw_schema, _) in VALID_EXAMPLES.iter() { - let schema = Schema::parse_str(raw_schema).unwrap(); - Schema::parse_str(schema.canonical_form().as_str()).unwrap(); + let schema = Schema::parse_str(raw_schema)?; + Schema::parse_str(schema.canonical_form().as_str())?; } + Ok(()) } #[test] /// 1. Given a string, parse it to get Avro schema "original". /// 2. Serialize "original" to a string and parse that string to generate Avro schema "round trip". /// 3. Ensure "original" and "round trip" schemas are equivalent. -fn test_equivalence_after_round_trip() { +fn test_equivalence_after_round_trip() -> TestResult { init(); for (raw_schema, _) in VALID_EXAMPLES.iter() { - let original_schema = Schema::parse_str(raw_schema).unwrap(); - let round_trip_schema = - Schema::parse_str(original_schema.canonical_form().as_str()).unwrap(); + let original_schema = Schema::parse_str(raw_schema)?; + let round_trip_schema = Schema::parse_str(original_schema.canonical_form().as_str())?; assert_eq!(original_schema, round_trip_schema); } + Ok(()) } #[test] /// Test that a list of schemas whose definitions do not depend on each other produces the same /// result as parsing each element of the list individually -fn test_parse_list_without_cross_deps() { +fn test_parse_list_without_cross_deps() -> TestResult { init(); let schema_str_1 = r#"{ "name": "A", @@ -724,12 +824,13 @@ fn test_parse_list_without_cross_deps() { "size": 16 }"#; let schema_strs = [schema_str_1, schema_str_2]; - let schemas = Schema::parse_list(&schema_strs).expect("Test failed"); + let schemas = Schema::parse_list(&schema_strs)?; for schema_str in &schema_strs { - let parsed = Schema::parse_str(schema_str).expect("Test failed"); + let parsed = Schema::parse_str(schema_str)?; assert!(schemas.contains(&parsed)); } + Ok(()) } #[test] @@ -737,7 +838,7 @@ fn test_parse_list_without_cross_deps() { /// perform the necessary schema composition. This should work regardless of the order in which /// the schemas are input. /// However, the output order is guaranteed to be the same as the input order. -fn test_parse_list_with_cross_deps_basic() { +fn test_parse_list_with_cross_deps_basic() -> TestResult { init(); let schema_a_str = r#"{ "name": "A", @@ -756,15 +857,16 @@ fn test_parse_list_with_cross_deps_basic() { let schema_strs_first = [schema_a_str, schema_b_str]; let schema_strs_second = [schema_b_str, schema_a_str]; - let schemas_first = Schema::parse_list(&schema_strs_first).expect("Test failed"); - let schemas_second = Schema::parse_list(&schema_strs_second).expect("Test failed"); + let schemas_first = Schema::parse_list(&schema_strs_first)?; + let schemas_second = Schema::parse_list(&schema_strs_second)?; assert_eq!(schemas_first[0], schemas_second[1]); assert_eq!(schemas_first[1], schemas_second[0]); + Ok(()) } #[test] -fn test_parse_list_recursive_type() { +fn test_parse_list_recursive_type() -> TestResult { init(); let schema_str_1 = r#"{ "name": "A", @@ -784,13 +886,14 @@ fn test_parse_list_recursive_type() { }"#; let schema_strs_first = [schema_str_1, schema_str_2]; let schema_strs_second = [schema_str_2, schema_str_1]; - let _ = Schema::parse_list(&schema_strs_first).expect("Test failed"); - let _ = Schema::parse_list(&schema_strs_second).expect("Test failed"); + let _ = Schema::parse_list(&schema_strs_first)?; + let _ = Schema::parse_list(&schema_strs_second)?; + Ok(()) } #[test] /// Test that schema composition resolves namespaces. -fn test_parse_list_with_cross_deps_and_namespaces() { +fn test_parse_list_with_cross_deps_and_namespaces() -> TestResult { init(); let schema_a_str = r#"{ "name": "A", @@ -808,16 +911,18 @@ fn test_parse_list_with_cross_deps_and_namespaces() { ] }"#; - let schemas_first = Schema::parse_list(&[schema_a_str, schema_b_str]).expect("Test failed"); - let schemas_second = Schema::parse_list(&[schema_b_str, schema_a_str]).expect("Test failed"); + let schemas_first = Schema::parse_list(&[schema_a_str, schema_b_str])?; + let schemas_second = Schema::parse_list(&[schema_b_str, schema_a_str])?; assert_eq!(schemas_first[0], schemas_second[1]); assert_eq!(schemas_first[1], schemas_second[0]); + + Ok(()) } #[test] /// Test that schema composition fails on namespace errors. -fn test_parse_list_with_cross_deps_and_namespaces_error() { +fn test_parse_list_with_cross_deps_and_namespaces_error() -> TestResult { init(); let schema_str_1 = r#"{ "name": "A", @@ -839,12 +944,14 @@ fn test_parse_list_with_cross_deps_and_namespaces_error() { let schema_strs_second = [schema_str_2, schema_str_1]; let _ = Schema::parse_list(&schema_strs_first).expect_err("Test failed"); let _ = Schema::parse_list(&schema_strs_second).expect_err("Test failed"); + + Ok(()) } #[test] // // test that field's RecordSchema could be referenced by a following field by full name -fn test_parse_reused_record_schema_by_fullname() { +fn test_parse_reused_record_schema_by_fullname() -> TestResult { init(); let schema_str = r#" { @@ -882,15 +989,15 @@ fn test_parse_reused_record_schema_by_fullname() { let schema = Schema::parse_str(schema_str); assert!(schema.is_ok()); - match schema.unwrap() { - Schema::Record { + match schema? { + Schema::Record(RecordSchema { ref name, aliases: _, doc: _, ref fields, lookup: _, attributes: _, - } => { + }) => { assert_eq!(name.fullname(None), "test.Weather", "Name does not match!"); assert_eq!(fields.len(), 3, "The number of the fields is not correct!"); @@ -899,6 +1006,7 @@ fn test_parse_reused_record_schema_by_fullname() { ref name, doc: _, default: _, + aliases: _, ref schema, order: _, position: _, @@ -916,6 +1024,8 @@ fn test_parse_reused_record_schema_by_fullname() { } unexpected => unreachable!("Unexpected schema type: {:?}", unexpected), } + + Ok(()) } /// Return all permutations of an input slice @@ -958,7 +1068,7 @@ fn permutation_indices(indices: Vec) -> Vec> { #[test] /// Test that a type that depends on more than one other type is parsed correctly when all /// definitions are passed in as a list. This should work regardless of the ordering of the list. -fn test_parse_list_multiple_dependencies() { +fn test_parse_list_multiple_dependencies() -> TestResult { init(); let schema_a_str = r#"{ "name": "A", @@ -980,23 +1090,23 @@ fn test_parse_list_multiple_dependencies() { ] }"#; - let parsed = - Schema::parse_list(&[schema_a_str, schema_b_str, schema_c_str]).expect("Test failed"); + let parsed = Schema::parse_list(&[schema_a_str, schema_b_str, schema_c_str])?; let schema_strs = vec![schema_a_str, schema_b_str, schema_c_str]; for schema_str_perm in permutations(&schema_strs) { let schema_str_perm: Vec<&str> = schema_str_perm.iter().map(|s| **s).collect(); - let schemas = Schema::parse_list(&schema_str_perm).expect("Test failed"); + let schemas = Schema::parse_list(&schema_str_perm)?; assert_eq!(schemas.len(), 3); for parsed_schema in &parsed { assert!(schemas.contains(parsed_schema)); } } + Ok(()) } #[test] /// Test that a type that is depended on by more than one other type is parsed correctly when all /// definitions are passed in as a list. This should work regardless of the ordering of the list. -fn test_parse_list_shared_dependency() { +fn test_parse_list_shared_dependency() -> TestResult { init(); let schema_a_str = r#"{ "name": "A", @@ -1020,22 +1130,22 @@ fn test_parse_list_shared_dependency() { ] }"#; - let parsed = - Schema::parse_list(&[schema_a_str, schema_b_str, schema_c_str]).expect("Test failed"); + let parsed = Schema::parse_list(&[schema_a_str, schema_b_str, schema_c_str])?; let schema_strs = vec![schema_a_str, schema_b_str, schema_c_str]; for schema_str_perm in permutations(&schema_strs) { let schema_str_perm: Vec<&str> = schema_str_perm.iter().map(|s| **s).collect(); - let schemas = Schema::parse_list(&schema_str_perm).expect("Test failed"); + let schemas = Schema::parse_list(&schema_str_perm)?; assert_eq!(schemas.len(), 3); for parsed_schema in &parsed { assert!(schemas.contains(parsed_schema)); } } + Ok(()) } #[test] /// Test that trying to parse two schemas with the same fullname returns an Error -fn test_name_collision_error() { +fn test_name_collision_error() -> TestResult { init(); let schema_str_1 = r#"{ "name": "foo.A", @@ -1054,11 +1164,12 @@ fn test_name_collision_error() { }"#; let _ = Schema::parse_list(&[schema_str_1, schema_str_2]).expect_err("Test failed"); + Ok(()) } #[test] /// Test that having the same name but different fullnames does not return an error -fn test_namespace_prevents_collisions() { +fn test_namespace_prevents_collisions() -> TestResult { init(); let schema_str_1 = r#"{ "name": "A", @@ -1076,10 +1187,11 @@ fn test_namespace_prevents_collisions() { ] }"#; - let parsed = Schema::parse_list(&[schema_str_1, schema_str_2]).expect("Test failed"); - let parsed_1 = Schema::parse_str(schema_str_1).expect("Test failed"); - let parsed_2 = Schema::parse_str(schema_str_2).expect("Test failed"); + let parsed = Schema::parse_list(&[schema_str_1, schema_str_2])?; + let parsed_1 = Schema::parse_str(schema_str_1)?; + let parsed_2 = Schema::parse_str(schema_str_2)?; assert_eq!(parsed, vec!(parsed_1, parsed_2)); + Ok(()) } // The fullname is determined in one of the following ways: @@ -1108,116 +1220,125 @@ fn test_namespace_prevents_collisions() { // equivalent. #[test] -fn test_fullname_name_and_namespace_specified() { +fn test_fullname_name_and_namespace_specified() -> TestResult { init(); let name: Name = - serde_json::from_str(r#"{"name": "a", "namespace": "o.a.h", "aliases": null}"#).unwrap(); + serde_json::from_str(r#"{"name": "a", "namespace": "o.a.h", "aliases": null}"#)?; let fullname = name.fullname(None); assert_eq!("o.a.h.a", fullname); + Ok(()) } #[test] -fn test_fullname_fullname_and_namespace_specified() { +fn test_fullname_fullname_and_namespace_specified() -> TestResult { init(); - let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": "o.a.h"}"#).unwrap(); + let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": "o.a.h"}"#)?; assert_eq!(&name.name, "d"); assert_eq!(name.namespace, Some("a.b.c".to_owned())); let fullname = name.fullname(None); assert_eq!("a.b.c.d", fullname); + Ok(()) } #[test] -fn test_fullname_name_and_default_namespace_specified() { +fn test_fullname_name_and_default_namespace_specified() -> TestResult { init(); - let name: Name = serde_json::from_str(r#"{"name": "a", "namespace": null}"#).unwrap(); + let name: Name = serde_json::from_str(r#"{"name": "a", "namespace": null}"#)?; assert_eq!(&name.name, "a"); assert_eq!(name.namespace, None); let fullname = name.fullname(Some("b.c.d".into())); assert_eq!("b.c.d.a", fullname); + Ok(()) } #[test] -fn test_fullname_fullname_and_default_namespace_specified() { +fn test_fullname_fullname_and_default_namespace_specified() -> TestResult { init(); - let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": null}"#).unwrap(); + let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": null}"#)?; assert_eq!(&name.name, "d"); assert_eq!(name.namespace, Some("a.b.c".to_owned())); let fullname = name.fullname(Some("o.a.h".into())); assert_eq!("a.b.c.d", fullname); + Ok(()) } #[test] -fn test_avro_3452_parsing_name_without_namespace() { +fn test_avro_3452_parsing_name_without_namespace() -> TestResult { init(); - let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d"}"#).unwrap(); + let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d"}"#)?; assert_eq!(&name.name, "d"); assert_eq!(name.namespace, Some("a.b.c".to_owned())); let fullname = name.fullname(None); assert_eq!("a.b.c.d", fullname); + Ok(()) } #[test] -fn test_avro_3452_parsing_name_with_leading_dot_without_namespace() { +fn test_avro_3452_parsing_name_with_leading_dot_without_namespace() -> TestResult { init(); - let name: Name = serde_json::from_str(r#"{"name": ".a"}"#).unwrap(); + let name: Name = serde_json::from_str(r#"{"name": ".a"}"#)?; assert_eq!(&name.name, "a"); assert_eq!(name.namespace, None); assert_eq!("a", name.fullname(None)); + Ok(()) } #[test] -fn test_avro_3452_parse_json_without_name_field() { +fn test_avro_3452_parse_json_without_name_field() -> TestResult { init(); let result: serde_json::error::Result = serde_json::from_str(r#"{"unknown": "a"}"#); assert!(&result.is_err()); assert_eq!(result.unwrap_err().to_string(), "No `name` field"); + Ok(()) } #[test] -fn test_fullname_fullname_namespace_and_default_namespace_specified() { +fn test_fullname_fullname_namespace_and_default_namespace_specified() -> TestResult { init(); let name: Name = - serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": "o.a.a", "aliases": null}"#) - .unwrap(); + serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": "o.a.a", "aliases": null}"#)?; assert_eq!(&name.name, "d"); assert_eq!(name.namespace, Some("a.b.c".to_owned())); let fullname = name.fullname(Some("o.a.h".into())); assert_eq!("a.b.c.d", fullname); + Ok(()) } #[test] -fn test_fullname_name_namespace_and_default_namespace_specified() { +fn test_fullname_name_namespace_and_default_namespace_specified() -> TestResult { init(); let name: Name = - serde_json::from_str(r#"{"name": "a", "namespace": "o.a.a", "aliases": null}"#).unwrap(); + serde_json::from_str(r#"{"name": "a", "namespace": "o.a.a", "aliases": null}"#)?; assert_eq!(&name.name, "a"); assert_eq!(name.namespace, Some("o.a.a".to_owned())); let fullname = name.fullname(Some("o.a.h".into())); assert_eq!("o.a.a.a", fullname); + Ok(()) } #[test] -fn test_doc_attributes() { +fn test_doc_attributes() -> TestResult { init(); fn assert_doc(schema: &Schema) { match schema { - Schema::Enum { doc, .. } => assert!(doc.is_some()), - Schema::Record { doc, .. } => assert!(doc.is_some()), - Schema::Fixed { doc, .. } => assert!(doc.is_some()), + Schema::Enum(EnumSchema { doc, .. }) => assert!(doc.is_some()), + Schema::Record(RecordSchema { doc, .. }) => assert!(doc.is_some()), + Schema::Fixed(FixedSchema { doc, .. }) => assert!(doc.is_some()), Schema::String => (), _ => unreachable!("Unexpected schema type: {:?}", schema), } } for (raw_schema, _) in DOC_EXAMPLES.iter() { - let original_schema = Schema::parse_str(raw_schema).unwrap(); + let original_schema = Schema::parse_str(raw_schema)?; assert_doc(&original_schema); - if let Schema::Record { fields, .. } = original_schema { + if let Schema::Record(RecordSchema { fields, .. }) = original_schema { for f in fields { assert_doc(&f.schema) } } } + Ok(()) } /* @@ -1235,17 +1356,17 @@ fn test_other_attributes() { } for (raw_schema, _) in OTHER_ATTRIBUTES_EXAMPLES.iter() { - let schema = Schema::parse_str(raw_schema).unwrap(); + let schema = Schema::parse_str(raw_schema)?; // all inputs have at least some user-defined attributes assert!(schema.other_attributes.is_some()); - for prop in schema.other_attributes.unwrap().iter() { + for prop in schema.other_attributes?.iter() { assert_attribute_type(prop); } if let Schema::Record { fields, .. } = schema { for f in fields { // all fields in the record have at least some user-defined attributes assert!(f.schema.other_attributes.is_some()); - for prop in f.schema.other_attributes.unwrap().iter() { + for prop in f.schema.other_attributes?.iter() { assert_attribute_type(prop); } } @@ -1268,16 +1389,13 @@ fn test_root_error_is_not_swallowed_on_parse_error() -> Result<(), String> { ); Ok(()) } else { - Err(format!( - "Expected serde_json::error::Error, got {:?}", - error - )) + Err(format!("Expected serde_json::error::Error, got {error:?}")) } } // AVRO-3302 #[test] -fn test_record_schema_with_cyclic_references() { +fn test_record_schema_with_cyclic_references() -> TestResult { init(); let schema = Schema::parse_str( r#" @@ -1297,8 +1415,7 @@ fn test_record_schema_with_cyclic_references() { }] } "#, - ) - .unwrap(); + )?; let mut datum = Record::new(&schema).unwrap(); datum.put( @@ -1329,18 +1446,19 @@ fn test_record_schema_with_cyclic_references() { let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Null); if let Err(err) = writer.append(datum) { - panic!("An error occurred while writing datum: {:?}", err) + panic!("An error occurred while writing datum: {err:?}") } - let bytes = writer.into_inner().unwrap(); + let bytes = writer.into_inner()?; assert_eq!(316, bytes.len()); match Reader::new(&mut bytes.as_slice()) { Ok(mut reader) => match reader.next() { - Some(value) => log::debug!("{:?}", value.unwrap()), + Some(value) => log::debug!("{:?}", value?), None => panic!("No value was read!"), }, - Err(err) => panic!("An error occurred while reading datum: {:?}", err), + Err(err) => panic!("An error occurred while reading datum: {err:?}"), } + Ok(()) } /* @@ -1348,12 +1466,12 @@ fn test_record_schema_with_cyclic_references() { #[test] fn test_decimal_valid_type_attributes() { init(); - let fixed_decimal = Schema::parse_str(DECIMAL_LOGICAL_TYPE_ATTRIBUTES[0]).unwrap(); + let fixed_decimal = Schema::parse_str(DECIMAL_LOGICAL_TYPE_ATTRIBUTES[0])?; assert_eq!(4, fixed_decimal.get_attribute("precision")); assert_eq!(2, fixed_decimal.get_attribute("scale")); assert_eq!(2, fixed_decimal.get_attribute("size")); - let bytes_decimal = Schema::parse_str(DECIMAL_LOGICAL_TYPE_ATTRIBUTES[1]).unwrap(); + let bytes_decimal = Schema::parse_str(DECIMAL_LOGICAL_TYPE_ATTRIBUTES[1])?; assert_eq!(4, bytes_decimal.get_attribute("precision")); assert_eq!(0, bytes_decimal.get_attribute("scale")); } @@ -1361,7 +1479,7 @@ fn test_decimal_valid_type_attributes() { // https://github.com/flavray/avro-rs/issues/47 #[test] -fn avro_old_issue_47() { +fn avro_old_issue_47() -> TestResult { init(); let schema_str = r#" { @@ -1372,11 +1490,11 @@ fn avro_old_issue_47() { {"name": "b", "type": "string"} ] }"#; - let schema = Schema::parse_str(schema_str).unwrap(); + let schema = Schema::parse_str(schema_str)?; use serde::{Deserialize, Serialize}; - #[derive(Deserialize, Serialize)] + #[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)] pub struct MyRecord { b: String, a: i64, @@ -1387,5 +1505,1152 @@ fn avro_old_issue_47() { a: 1, }; - let _ = to_avro_datum(&schema, to_value(record).unwrap()).unwrap(); + let ser_value = to_value(record.clone())?; + let serialized_bytes = to_avro_datum(&schema, ser_value)?; + + let de_value = &from_avro_datum(&schema, &mut &*serialized_bytes, None)?; + let deserialized_record = from_value::(de_value)?; + + assert_eq!(record, deserialized_record); + Ok(()) +} + +#[test] +fn test_avro_3785_deserialize_namespace_with_nullable_type_containing_reference_type() -> TestResult +{ + use apache_avro::{from_avro_datum, to_avro_datum, types::Value}; + use serde::{Deserialize, Serialize}; + + #[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] + pub struct BarUseParent { + #[serde(rename = "barUse")] + pub bar_use: Bar, + } + + #[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, Deserialize, Serialize)] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + + #[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] + pub struct Foo { + #[serde(rename = "barInit")] + pub bar_init: Bar, + #[serde(rename = "barUseParent")] + pub bar_use_parent: Option, + } + + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "name.space", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ] + } + }, + { + "name": "barUseParent", + "type": [ + "null", + { + "type": "record", + "name": "BarUseParent", + "fields": [ + { + "name": "barUse", + "type": "Bar" + } + ] + } + ] + } + ] + }"#; + + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "name.space", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1" + ] + } + }, + { + "name": "barUseParent", + "type": [ + "null", + { + "type": "record", + "name": "BarUseParent", + "fields": [ + { + "name": "barUse", + "type": "Bar" + } + ] + } + ] + } + ] + }"#; + + let writer_schema = Schema::parse_str(writer_schema)?; + let foo1 = Foo { + bar_init: Bar::Bar0, + bar_use_parent: Some(BarUseParent { bar_use: Bar::Bar1 }), + }; + let avro_value = crate::to_value(foo1)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + Value::Record(fields) => { + assert_eq!(fields.len(), 2); + } + _ => panic!("Expected Value::Record"), + } + + Ok(()) +} + +#[test] +fn test_avro_3847_union_field_with_default_value_of_ref() -> TestResult { + // Test for reference to Record + let writer_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Record(vec![("f1_1".to_string(), 10.into())])); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + }, { + "name": "f2", + "type": ["record2", "int"], + "default": { + "f1_1": 100 + } + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ( + "f1".to_string(), + Value::Record(vec![("f1_1".to_string(), 10.into())]), + ), + ( + "f2".to_string(), + Value::Union( + 0, + Box::new(Value::Record(vec![("f1_1".to_string(), 100.into())])), + ), + ), + ]); + + assert_eq!(expected, result[0]); + + // Test for reference to Enum + let writer_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a", "b"] + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Enum(1, "b".to_string())); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a", "b"] + } + }, { + "name": "f2", + "type": ["enum1", "int"], + "default": "a" + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Enum(1, "b".to_string())), + ( + "f2".to_string(), + Value::Union(0, Box::new(Value::Enum(0, "a".to_string()))), + ), + ]); + + assert_eq!(expected, result[0]); + + // Test for reference to Fixed + let writer_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "fixed1", + "type": "fixed", + "size": 3 + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Fixed(3, vec![0, 1, 2])); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "fixed1", + "type": "fixed", + "size": 3 + } + }, { + "name": "f2", + "type": ["fixed1", "int"], + "default": "abc" + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Fixed(3, vec![0, 1, 2])), + ( + "f2".to_string(), + Value::Union(0, Box::new(Value::Fixed(3, vec![b'a', b'b', b'c']))), + ), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3847_union_field_with_default_value_of_ref_with_namespace() -> TestResult { + // Test for reference to Record + let writer_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Record(vec![("f1_1".to_string(), 10.into())])); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + }, { + "name": "f2", + "type": ["ns.record2", "int"], + "default": { + "f1_1": 100 + } + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ( + "f1".to_string(), + Value::Record(vec![("f1_1".to_string(), 10.into())]), + ), + ( + "f2".to_string(), + Value::Union( + 0, + Box::new(Value::Record(vec![("f1_1".to_string(), 100.into())])), + ), + ), + ]); + + assert_eq!(expected, result[0]); + + // Test for reference to Enum + let writer_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "namespace": "ns", + "type": "enum", + "symbols": ["a", "b"] + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Enum(1, "b".to_string())); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "namespace": "ns", + "type": "enum", + "symbols": ["a", "b"] + } + }, { + "name": "f2", + "type": ["ns.enum1", "int"], + "default": "a" + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Enum(1, "b".to_string())), + ( + "f2".to_string(), + Value::Union(0, Box::new(Value::Enum(0, "a".to_string()))), + ), + ]); + + assert_eq!(expected, result[0]); + + // Test for reference to Fixed + let writer_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "fixed1", + "namespace": "ns", + "type": "fixed", + "size": 3 + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Fixed(3, vec![0, 1, 2])); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "fixed1", + "namespace": "ns", + "type": "fixed", + "size": 3 + } + }, { + "name": "f2", + "type": ["ns.fixed1", "int"], + "default": "abc" + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Fixed(3, vec![0, 1, 2])), + ( + "f2".to_string(), + Value::Union(0, Box::new(Value::Fixed(3, vec![b'a', b'b', b'c']))), + ), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3847_union_field_with_default_value_of_ref_with_enclosing_namespace() -> TestResult { + // Test for reference to Record + let writer_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Record(vec![("f1_1".to_string(), 10.into())])); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + }, { + "name": "f2", + "type": ["ns.record2", "int"], + "default": { + "f1_1": 100 + } + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ( + "f1".to_string(), + Value::Record(vec![("f1_1".to_string(), 10.into())]), + ), + ( + "f2".to_string(), + Value::Union( + 0, + Box::new(Value::Record(vec![("f1_1".to_string(), 100.into())])), + ), + ), + ]); + + assert_eq!(expected, result[0]); + + // Test for reference to Enum + let writer_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a", "b"] + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Enum(1, "b".to_string())); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a", "b"] + } + }, { + "name": "f2", + "type": ["ns.enum1", "int"], + "default": "a" + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Enum(1, "b".to_string())), + ( + "f2".to_string(), + Value::Union(0, Box::new(Value::Enum(0, "a".to_string()))), + ), + ]); + + assert_eq!(expected, result[0]); + + // Test for reference to Fixed + let writer_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "fixed1", + "type": "fixed", + "size": 3 + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Fixed(3, vec![0, 1, 2])); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "fixed1", + "type": "fixed", + "size": 3 + } + }, { + "name": "f2", + "type": ["ns.fixed1", "int"], + "default": "abc" + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Fixed(3, vec![0, 1, 2])), + ( + "f2".to_string(), + Value::Union(0, Box::new(Value::Fixed(3, vec![b'a', b'b', b'c']))), + ), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +fn write_schema_for_default_value_test() -> apache_avro::AvroResult> { + let writer_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "int" + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()) + .ok_or("Expected Some(Record), but got None") + .unwrap(); + record.put("f1", 10); + writer.append(record)?; + + writer.into_inner() +} + +#[test] +fn test_avro_3851_read_default_value_for_simple_record_field() -> TestResult { + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "int" + }, { + "name": "f2", + "type": "int", + "default": 20 + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = write_schema_for_default_value_test()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Int(10)), + ("f2".to_string(), Value::Int(20)), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3851_read_default_value_for_nested_record_field() -> TestResult { + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "int" + }, { + "name": "f2", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + }, + "default": { + "f1_1": 100 + } + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = write_schema_for_default_value_test()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Int(10)), + ( + "f2".to_string(), + Value::Record(vec![("f1_1".to_string(), 100.into())]), + ), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3851_read_default_value_for_enum_record_field() -> TestResult { + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "int" + }, { + "name": "f2", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a", "b", "c"] + }, + "default": "a" + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = write_schema_for_default_value_test()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Int(10)), + ("f2".to_string(), Value::Enum(0, "a".to_string())), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3851_read_default_value_for_fixed_record_field() -> TestResult { + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "int" + }, { + "name": "f2", + "type": { + "name": "fixed1", + "type": "fixed", + "size": 3 + }, + "default": "abc" + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = write_schema_for_default_value_test()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Int(10)), + ("f2".to_string(), Value::Fixed(3, vec![b'a', b'b', b'c'])), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3851_read_default_value_for_array_record_field() -> TestResult { + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "int" + }, { + "name": "f2", + "type": "array", + "items": "int", + "default": [1, 2, 3] + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = write_schema_for_default_value_test()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Int(10)), + ( + "f2".to_string(), + Value::Array(vec![1.into(), 2.into(), 3.into()]), + ), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3851_read_default_value_for_map_record_field() -> TestResult { + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "int" + }, { + "name": "f2", + "type": "map", + "values": "string", + "default": { "a": "A", "b": "B", "c": "C" } + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = write_schema_for_default_value_test()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let map = HashMap::from_iter([ + ("a".to_string(), "A".into()), + ("b".to_string(), "B".into()), + ("c".to_string(), "C".into()), + ]); + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Int(10)), + ("f2".to_string(), Value::Map(map)), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3851_read_default_value_for_ref_record_field() -> TestResult { + let writer_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Record(vec![("f1_1".to_string(), 10.into())])); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + }, { + "name": "f2", + "type": "ns.record2", + "default": { "f1_1": 100 } + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ( + "f1".to_string(), + Value::Record(vec![("f1_1".to_string(), 10.into())]), + ), + ( + "f2".to_string(), + Value::Record(vec![("f1_1".to_string(), 100.into())]), + ), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3851_read_default_value_for_enum() -> TestResult { + let writer_schema_str = r#" + { + "name": "enum1", + "namespace": "ns", + "type": "enum", + "symbols": ["a", "b", "c"] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + writer.append("c")?; + + let reader_schema_str = r#" + { + "name": "enum1", + "namespace": "ns", + "type": "enum", + "symbols": ["a", "b"], + "default": "a" + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Enum(0, "a".to_string()); + assert_eq!(expected, result[0]); + + Ok(()) } diff --git a/lang/rust/avro/tests/shared.rs b/lang/rust/avro/tests/shared.rs new file mode 100644 index 00000000000..9790ddfe424 --- /dev/null +++ b/lang/rust/avro/tests/shared.rs @@ -0,0 +1,151 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use apache_avro::{types::Value, Codec, Reader, Schema, Writer}; +use apache_avro_test_helper::TestResult; +use std::{ + fmt, + fs::{DirEntry, File, ReadDir}, + io::BufReader, + path::Path, + slice::Iter, +}; + +const ROOT_DIRECTORY: &str = "../../../share/test/data/schemas"; + +#[test] +fn test_schema() -> TestResult { + let directory: ReadDir = match std::fs::read_dir(ROOT_DIRECTORY) { + Ok(root_folder) => root_folder, + Err(err) => { + log::warn!("Can't read the root folder: {err}"); + return Ok(()); + } + }; + let mut result: Result<(), ErrorsDesc> = Ok(()); + for f in directory { + let entry: DirEntry = match f { + Ok(entry) => entry, + Err(e) => core::panic!("Can't get file {}", e), + }; + log::debug!("{:?}", entry.file_name()); + if let Ok(ft) = entry.file_type() { + if ft.is_dir() { + let sub_folder = + ROOT_DIRECTORY.to_owned() + "/" + entry.file_name().to_str().unwrap(); + + let dir_result = test_folder(sub_folder.as_str()); + if let Result::Err(ed) = dir_result { + result = match result { + Ok(()) => Err(ed), + Err(e) => Err(e.merge(&ed)), + } + } + } + } + } + if let Err(e) = result { + core::panic!("{}", e) + } + Ok(()) +} + +#[derive(Debug)] +struct ErrorsDesc { + details: Vec, +} + +impl ErrorsDesc { + fn new(msg: &str) -> ErrorsDesc { + ErrorsDesc { + details: vec![msg.to_string()], + } + } + + fn add(&self, msg: &str) -> Self { + let mut new_vec = self.details.clone(); + new_vec.push(msg.to_string()); + Self { details: new_vec } + } + + fn merge(&self, err: &ErrorsDesc) -> Self { + let mut new_vec = self.details.clone(); + err.details + .iter() + .for_each(|d: &String| new_vec.push(d.clone())); + Self { details: new_vec } + } +} + +impl fmt::Display for ErrorsDesc { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.details.join("\n").as_str()) + } +} + +fn test_folder(folder: &str) -> Result<(), ErrorsDesc> { + let file_name = folder.to_owned() + "/schema.json"; + let content = std::fs::read_to_string(file_name).expect("Unable to find schema.jon file"); + + let schema: Schema = Schema::parse_str(content.as_str()).expect("Can't read schema"); + + let data_file_name = folder.to_owned() + "/data.avro"; + let data_path: &Path = Path::new(data_file_name.as_str()); + let mut result = Result::Ok(()); + if !data_path.exists() { + log::error!("{}", format!("folder {folder} does not exist")); + return Result::Err(ErrorsDesc::new( + format!("folder {folder} does not exist").as_str(), + )); + } else { + let file: File = File::open(data_path).expect("Can't open data.avro"); + let reader = + Reader::with_schema(&schema, BufReader::new(&file)).expect("Can't read data.avro"); + + let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Null); + + let mut records: Vec = vec![]; + + for r in reader { + let record: Value = r.expect("Error on reading"); + writer.append(record.clone()).expect("Error on write item"); + records.push(record); + } + + writer.flush().expect("Error on flush"); + let bytes: Vec = writer.into_inner().unwrap(); + let reader_bis = + Reader::with_schema(&schema, &bytes[..]).expect("Can't read flushed vector"); + + let mut records_iter: Iter = records.iter(); + for r2 in reader_bis { + let record: Value = r2.expect("Error on reading"); + let original = records_iter.next().expect("Error, no next"); + if original != &record { + result = match result { + Ok(_) => Result::Err(ErrorsDesc::new( + format!("Records are not equals for folder : {folder}").as_str(), + )), + Err(e) => { + Err(e.add(format!("Records are not equals for folder : {folder}").as_str())) + } + } + } + } + } + result +} diff --git a/lang/rust/avro/tests/to_from_avro_datum_schemata.rs b/lang/rust/avro/tests/to_from_avro_datum_schemata.rs new file mode 100644 index 00000000000..e27f1e625ac --- /dev/null +++ b/lang/rust/avro/tests/to_from_avro_datum_schemata.rs @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use apache_avro::{ + from_avro_datum_schemata, to_avro_datum_schemata, types::Value, Codec, Reader, Schema, Writer, +}; +use apache_avro_test_helper::{init, TestResult}; + +static SCHEMA_A_STR: &str = r#"{ + "name": "A", + "type": "record", + "fields": [ + {"name": "field_a", "type": "float"} + ] + }"#; + +static SCHEMA_B_STR: &str = r#"{ + "name": "B", + "type": "record", + "fields": [ + {"name": "field_b", "type": "A"} + ] + }"#; + +#[test] +fn test_avro_3683_multiple_schemata_to_from_avro_datum() -> TestResult { + init(); + + let record: Value = Value::Record(vec![( + String::from("field_b"), + Value::Record(vec![(String::from("field_a"), Value::Float(1.0))]), + )]); + + let schemata: Vec = Schema::parse_list(&[SCHEMA_A_STR, SCHEMA_B_STR])?; + let schemata: Vec<&Schema> = schemata.iter().collect(); + + // this is the Schema we want to use for write/read + let schema_b = schemata[1]; + let expected: Vec = vec![0, 0, 128, 63]; + let actual = to_avro_datum_schemata(schema_b, schemata.clone(), record.clone())?; + assert_eq!(actual, expected); + + let value = from_avro_datum_schemata(schema_b, schemata, &mut actual.as_slice(), None)?; + assert_eq!(value, record); + + Ok(()) +} + +#[test] +fn test_avro_3683_multiple_schemata_writer_reader() -> TestResult { + init(); + + let record: Value = Value::Record(vec![( + String::from("field_b"), + Value::Record(vec![(String::from("field_a"), Value::Float(1.0))]), + )]); + + let schemata: Vec = Schema::parse_list(&[SCHEMA_A_STR, SCHEMA_B_STR])?; + let schemata: Vec<&Schema> = schemata.iter().collect(); + + // this is the Schema we want to use for write/read + let schema_b = schemata[1]; + let mut output: Vec = Vec::new(); + + let mut writer = Writer::with_schemata(schema_b, schemata.clone(), &mut output, Codec::Null); + writer.append(record.clone())?; + writer.flush()?; + + let reader = Reader::with_schemata(schema_b, schemata, output.as_slice())?; + let value = reader.into_iter().next().unwrap().unwrap(); + assert_eq!(value, record); + + Ok(()) +} diff --git a/lang/rust/avro_derive/Cargo.toml b/lang/rust/avro_derive/Cargo.toml index 98c938bdf89..88c2f720740 100644 --- a/lang/rust/avro_derive/Cargo.toml +++ b/lang/rust/avro_derive/Cargo.toml @@ -17,28 +17,29 @@ [package] name = "apache-avro-derive" -version = "0.15.0" -authors = ["Apache Avro team "] +version.workspace = true +authors.workspace = true description = "A library for deriving Avro schemata from Rust structs and enums" -license = "Apache-2.0" -readme = "README.md" -repository = "https://github.com/apache/avro" -edition = "2018" +license.workspace = true +repository.workspace = true +edition.workspace = true +rust-version.workspace = true keywords = ["avro", "data", "serialization", "derive"] -categories = ["encoding"] +categories.workspace = true documentation = "https://docs.rs/apache-avro-derive" +readme = "README.md" [lib] proc-macro = true [dependencies] -darling = { default-features = false, version = "0.14.2" } -proc-macro2 = { default-features = false, version = "1.0.49" } -quote = { default-features = false, version = "1.0.23" } -serde_json = { default-features = false, version = "1.0.91", features = ["std"] } -syn = { default-features = false, version = "1.0.107", features = ["full", "fold"] } +darling = { default-features = false, version = "0.20.3" } +proc-macro2 = { default-features = false, version = "1.0.67" } +quote = { default-features = false, version = "1.0.33" } +serde_json = { default-features = false, version = "1.0.107", features = ["std"] } +syn = { default-features = false, version = "2.0.37", features = ["full", "fold"] } [dev-dependencies] apache-avro = { default-features = false, path = "../avro", features = ["derive"] } -proptest = { default-features = false, version = "1.0.0", features = ["std"] } -serde = { default-features = false, version = "1.0.151", features = ["derive"] } +proptest = { default-features = false, version = "1.2.0", features = ["std"] } +serde = { default-features = false, version = "1.0.188", features = ["derive"] } diff --git a/lang/rust/avro_derive/src/lib.rs b/lang/rust/avro_derive/src/lib.rs index 8f6a9b70a26..5b36839be4e 100644 --- a/lang/rust/avro_derive/src/lib.rs +++ b/lang/rust/avro_derive/src/lib.rs @@ -30,6 +30,8 @@ struct FieldOptions { doc: Option, #[darling(default)] default: Option, + #[darling(multiple)] + alias: Vec, #[darling(default)] rename: Option, #[darling(default)] @@ -139,7 +141,7 @@ fn get_data_struct_schema_def( .map_err(|e| { vec![syn::Error::new( field.ident.span(), - format!("Invalid avro default json: \n{}", e), + format!("Invalid avro default json: \n{e}"), )] })?; quote! { @@ -148,6 +150,7 @@ fn get_data_struct_schema_def( } None => quote! { None }, }; + let aliases = preserve_vec(field_attrs.alias); let schema_expr = type_to_schema_expr(&field.ty)?; let position = index; record_field_exprs.push(quote! { @@ -155,6 +158,7 @@ fn get_data_struct_schema_def( name: #name.to_string(), doc: #doc, default: #default_value, + aliases: #aliases, schema: #schema_expr, order: apache_avro::schema::RecordFieldOrder::Ascending, position: #position, @@ -186,14 +190,14 @@ fn get_data_struct_schema_def( .iter() .map(|field| (field.name.to_owned(), field.position)) .collect(); - apache_avro::schema::Schema::Record { + apache_avro::schema::Schema::Record(apache_avro::schema::RecordSchema { name, aliases: #record_aliases, doc: #record_doc, fields: schema_fields, lookup, attributes: Default::default(), - } + }) }) } @@ -213,13 +217,14 @@ fn get_data_enum_schema_def( .map(|variant| variant.ident.to_string()) .collect(); Ok(quote! { - apache_avro::schema::Schema::Enum { + apache_avro::schema::Schema::Enum(apache_avro::schema::EnumSchema { name: apache_avro::schema::Name::new(#full_schema_name).expect(&format!("Unable to parse enum name for schema {}", #full_schema_name)[..]), aliases: #enum_aliases, doc: #doc, symbols: vec![#(#symbols.to_owned()),*], + default: None, attributes: Default::default(), - } + }) }) } else { Err(vec![syn::Error::new( @@ -268,7 +273,7 @@ fn type_to_schema_expr(ty: &Type) -> Result> { } else { Err(vec![syn::Error::new_spanned( ty, - format!("Unable to generate schema for type: {:?}", ty), + format!("Unable to generate schema for type: {ty:?}"), )]) } } @@ -289,17 +294,19 @@ fn to_compile_errors(errors: Vec) -> proc_macro2::TokenStream { fn extract_outer_doc(attributes: &[Attribute]) -> Option { let doc = attributes .iter() - .filter(|attr| attr.style == AttrStyle::Outer && attr.path.is_ident("doc")) - .map(|attr| { - let mut tokens = attr.tokens.clone().into_iter(); - tokens.next(); // skip the Punct - let to_trim: &[char] = &['"', ' ']; - tokens - .next() // use the Literal - .unwrap() - .to_string() - .trim_matches(to_trim) - .to_string() + .filter(|attr| attr.style == AttrStyle::Outer && attr.path().is_ident("doc")) + .filter_map(|attr| { + let name_value = attr.meta.require_name_value(); + match name_value { + Ok(name_value) => match &name_value.value { + syn::Expr::Lit(expr_lit) => match expr_lit.lit { + syn::Lit::Str(ref lit_str) => Some(lit_str.value().trim().to_string()), + _ => None, + }, + _ => None, + }, + Err(_) => None, + } }) .collect::>() .join("\n"); @@ -327,7 +334,7 @@ fn preserve_vec(op: Vec) -> TokenStream { } fn darling_to_syn(e: darling::Error) -> Vec { - let msg = format!("{}", e); + let msg = format!("{e}"); let token_errors = e.write_errors(); vec![syn::Error::new(token_errors.span(), msg)] } @@ -349,8 +356,7 @@ mod tests { assert!(derive_avro_schema(&mut input).is_ok()) } Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {:?}", - error + "Failed to parse as derive input when it should be able to. Error: {error:?}" ), }; } @@ -366,8 +372,7 @@ mod tests { assert!(derive_avro_schema(&mut input).is_err()) } Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {:?}", - error + "Failed to parse as derive input when it should be able to. Error: {error:?}" ), }; } @@ -383,8 +388,7 @@ mod tests { assert!(derive_avro_schema(&mut input).is_err()) } Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {:?}", - error + "Failed to parse as derive input when it should be able to. Error: {error:?}" ), }; } @@ -401,8 +405,7 @@ mod tests { assert!(derive_avro_schema(&mut input).is_ok()) } Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {:?}", - error + "Failed to parse as derive input when it should be able to. Error: {error:?}" ), }; } @@ -422,8 +425,7 @@ mod tests { assert!(derive_avro_schema(&mut input).is_ok()) } Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {:?}", - error + "Failed to parse as derive input when it should be able to. Error: {error:?}" ), }; } @@ -443,8 +445,7 @@ mod tests { assert!(derive_avro_schema(&mut input).is_err()) } Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {:?}", - error + "Failed to parse as derive input when it should be able to. Error: {error:?}" ), }; } @@ -461,15 +462,15 @@ mod tests { match syn::parse2::(test_struct) { Ok(mut input) => { - assert!(derive_avro_schema(&mut input).is_ok()); - assert!(derive_avro_schema(&mut input) + let schema_token_stream = derive_avro_schema(&mut input); + assert!(&schema_token_stream.is_ok()); + assert!(schema_token_stream .unwrap() .to_string() .contains("namespace.testing")) } Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {:?}", - error + "Failed to parse as derive input when it should be able to. Error: {error:?}" ), }; } @@ -488,8 +489,7 @@ mod tests { assert!(derive_avro_schema(&mut input).is_ok()) } Err(error) => panic!( - "Failed to parse as derive input when it should be able to. Error: {:?}", - error + "Failed to parse as derive input when it should be able to. Error: {error:?}" ), }; } @@ -500,4 +500,26 @@ mod tests { assert_eq!(type_path_schema_expr(&syn::parse2::(quote!{Vec}).unwrap()).to_string(), quote!{ as apache_avro::schema::derive::AvroSchemaComponent>::get_schema_in_ctxt(named_schemas, enclosing_namespace)}.to_string()); assert_eq!(type_path_schema_expr(&syn::parse2::(quote!{AnyType}).unwrap()).to_string(), quote!{::get_schema_in_ctxt(named_schemas, enclosing_namespace)}.to_string()); } + + #[test] + fn test_avro_3709_record_field_attributes() { + let test_struct = quote! { + struct A { + #[avro(alias = "a1", alias = "a2", doc = "a doc", default = "123", rename = "a3")] + a: i32 + } + }; + + match syn::parse2::(test_struct) { + Ok(mut input) => { + let schema_res = derive_avro_schema(&mut input); + let expected_token_stream = r#"let schema_fields = vec ! [apache_avro :: schema :: RecordField { name : "a3" . to_string () , doc : Some ("a doc" . into ()) , default : Some (serde_json :: from_str ("123") . expect (format ! ("Invalid JSON: {:?}" , "123") . as_str ())) , aliases : Some (vec ! ["a1" . into () , "a2" . into ()]) , schema : apache_avro :: schema :: Schema :: Int , order : apache_avro :: schema :: RecordFieldOrder :: Ascending , position : 0usize , custom_attributes : Default :: default () , }] ;"#; + let schema_token_stream = schema_res.unwrap().to_string(); + assert!(schema_token_stream.contains(expected_token_stream)); + } + Err(error) => panic!( + "Failed to parse as derive input when it should be able to. Error: {error:?}" + ), + }; + } } diff --git a/lang/rust/avro_derive/tests/derive.rs b/lang/rust/avro_derive/tests/derive.rs index a2ac0b6bfc7..0bfc9a95cf1 100644 --- a/lang/rust/avro_derive/tests/derive.rs +++ b/lang/rust/avro_derive/tests/derive.rs @@ -30,11 +30,8 @@ extern crate serde; #[cfg(test)] mod test_derive { - use apache_avro::schema::Alias; - use std::{ - borrow::{Borrow, Cow}, - sync::Mutex, - }; + use apache_avro::schema::{Alias, EnumSchema, RecordSchema}; + use std::{borrow::Cow, sync::Mutex}; use super::*; @@ -60,7 +57,7 @@ mod test_derive { let schema = T::get_schema(); let mut writer = Writer::new(&schema, Vec::new()); if let Err(e) = writer.append_ser(obj) { - panic!("{:?}", e); + panic!("{e:?}"); } writer.into_inner().unwrap() } @@ -77,7 +74,7 @@ mod test_derive { Ok(value) => { return from_value::(&value).unwrap(); } - Err(e) => panic!("{:?}", e), + Err(e) => panic!("{e:?}"), } } unreachable!() @@ -144,7 +141,7 @@ mod test_derive { "#; let schema = Schema::parse_str(schema).unwrap(); assert_eq!(schema, TestBasicNamespace::get_schema()); - if let Schema::Record { name, .. } = TestBasicNamespace::get_schema() { + if let Schema::Record(RecordSchema { name, .. }) = TestBasicNamespace::get_schema() { assert_eq!("com.testing.namespace".to_owned(), name.namespace.unwrap()) } else { panic!("TestBasicNamespace schema must be a record schema") @@ -191,7 +188,9 @@ mod test_derive { "#; let schema = Schema::parse_str(schema).unwrap(); assert_eq!(schema, TestComplexNamespace::get_schema()); - if let Schema::Record { name, fields, .. } = TestComplexNamespace::get_schema() { + if let Schema::Record(RecordSchema { name, fields, .. }) = + TestComplexNamespace::get_schema() + { assert_eq!( "com.testing.complex.namespace".to_owned(), name.namespace.unwrap() @@ -201,7 +200,7 @@ mod test_derive { .filter(|field| field.name == "a") .map(|field| &field.schema) .next(); - if let Some(Schema::Record { name, .. }) = inner_schema { + if let Some(Schema::Record(RecordSchema { name, .. })) = inner_schema { assert_eq!( "com.testing.namespace".to_owned(), name.namespace.clone().unwrap() @@ -864,7 +863,7 @@ mod test_derive { // test serde with manual equality for mutex let test = serde(test); assert_eq!("hey", test.a); - assert_eq!(vec![42], *test.b.borrow().lock().unwrap()); + assert_eq!(vec![42], *test.b.lock().unwrap()); assert_eq!(Cow::Owned::(32), test.c); } @@ -944,7 +943,9 @@ mod test_derive { } "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, doc, .. } = TestBasicWithAttributes::get_schema() { + if let Schema::Record(RecordSchema { name, doc, .. }) = + TestBasicWithAttributes::get_schema() + { assert_eq!("com.testing.namespace".to_owned(), name.namespace.unwrap()); assert_eq!("A Documented Record", doc.unwrap()) } else { @@ -985,13 +986,14 @@ mod test_derive { } "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, doc, .. } = TestBasicWithOuterDocAttributes::get_schema() { + let derived_schema = TestBasicWithOuterDocAttributes::get_schema(); + assert_eq!(&schema, &derived_schema); + if let Schema::Record(RecordSchema { name, doc, .. }) = derived_schema { assert_eq!("com.testing.namespace".to_owned(), name.namespace.unwrap()); assert_eq!("A Documented Record", doc.unwrap()) } else { panic!("TestBasicWithOuterDocAttributes schema must be a record schema") } - assert_eq!(schema, TestBasicWithOuterDocAttributes::get_schema()); } #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] @@ -1028,7 +1030,8 @@ mod test_derive { } "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, doc, .. } = TestBasicWithLargeDoc::get_schema() { + if let Schema::Record(RecordSchema { name, doc, .. }) = TestBasicWithLargeDoc::get_schema() + { assert_eq!("com.testing.namespace".to_owned(), name.namespace.unwrap()); assert_eq!( "A Documented Record\nthat spans\nmultiple lines", @@ -1068,7 +1071,7 @@ mod test_derive { let schema = Schema::parse_str(schema).unwrap(); let derived_schema = TestBasicWithBool::get_schema(); - if let Schema::Record { name, .. } = derived_schema { + if let Schema::Record(RecordSchema { name, .. }) = derived_schema { assert_eq!("TestBasicWithBool", name.fullname(None)) } else { panic!("TestBasicWithBool schema must be a record schema") @@ -1099,7 +1102,7 @@ mod test_derive { } "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, .. } = TestBasicWithU32::get_schema() { + if let Schema::Record(RecordSchema { name, .. }) = TestBasicWithU32::get_schema() { assert_eq!("TestBasicWithU32", name.fullname(None)) } else { panic!("TestBasicWithU32 schema must be a record schema") @@ -1131,7 +1134,9 @@ mod test_derive { } "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, aliases, .. } = TestBasicStructWithAliases::get_schema() { + if let Schema::Record(RecordSchema { name, aliases, .. }) = + TestBasicStructWithAliases::get_schema() + { assert_eq!("TestBasicStructWithAliases", name.fullname(None)); assert_eq!( Some(vec![ @@ -1173,7 +1178,9 @@ mod test_derive { } "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, aliases, .. } = TestBasicStructWithAliases2::get_schema() { + if let Schema::Record(RecordSchema { name, aliases, .. }) = + TestBasicStructWithAliases2::get_schema() + { assert_eq!("TestBasicStructWithAliases2", name.fullname(None)); assert_eq!( Some(vec![ @@ -1212,7 +1219,9 @@ mod test_derive { } "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Enum { name, aliases, .. } = TestBasicEnumWithAliases::get_schema() { + if let Schema::Enum(EnumSchema { name, aliases, .. }) = + TestBasicEnumWithAliases::get_schema() + { assert_eq!("TestBasicEnumWithAliases", name.fullname(None)); assert_eq!( Some(vec![ @@ -1253,7 +1262,9 @@ mod test_derive { } "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Enum { name, aliases, .. } = TestBasicEnumWithAliases2::get_schema() { + if let Schema::Enum(EnumSchema { name, aliases, .. }) = + TestBasicEnumWithAliases2::get_schema() + { assert_eq!("TestBasicEnumWithAliases2", name.fullname(None)); assert_eq!( Some(vec![ @@ -1357,7 +1368,8 @@ mod test_derive { "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, fields, .. } = TestBasicStructWithDefaultValues::get_schema() + if let Schema::Record(RecordSchema { name, fields, .. }) = + TestBasicStructWithDefaultValues::get_schema() { assert_eq!("TestBasicStructWithDefaultValues", name.fullname(None)); use serde_json::json; @@ -1454,7 +1466,7 @@ mod test_derive { let schema = Schema::parse_str(schema).unwrap(); let derived_schema = TestBasicStructWithSkipAttribute::get_schema(); - if let Schema::Record { name, fields, .. } = &derived_schema { + if let Schema::Record(RecordSchema { name, fields, .. }) = &derived_schema { assert_eq!("TestBasicStructWithSkipAttribute", name.fullname(None)); for field in fields { match field.name.as_str() { @@ -1466,8 +1478,7 @@ mod test_derive { } } else { panic!( - "TestBasicStructWithSkipAttribute schema must be a record schema: {:?}", - derived_schema + "TestBasicStructWithSkipAttribute schema must be a record schema: {derived_schema:?}" ) } assert_eq!(schema, derived_schema); @@ -1522,7 +1533,7 @@ mod test_derive { let schema = Schema::parse_str(schema).unwrap(); let derived_schema = TestBasicStructWithRenameAttribute::get_schema(); - if let Schema::Record { name, fields, .. } = &derived_schema { + if let Schema::Record(RecordSchema { name, fields, .. }) = &derived_schema { assert_eq!("TestBasicStructWithRenameAttribute", name.fullname(None)); for field in fields { match field.name.as_str() { @@ -1533,8 +1544,7 @@ mod test_derive { } } else { panic!( - "TestBasicStructWithRenameAttribute schema must be a record schema: {:?}", - derived_schema + "TestBasicStructWithRenameAttribute schema must be a record schema: {derived_schema:?}" ) } assert_eq!(schema, derived_schema); @@ -1554,11 +1564,11 @@ mod test_derive { } let derived_schema = TestRawIdent::get_schema(); - if let Schema::Record { fields, .. } = derived_schema { + if let Schema::Record(RecordSchema { fields, .. }) = derived_schema { let field = fields.get(0).expect("TestRawIdent must contain a field"); assert_eq!(field.name, "type"); } else { - panic!("Unexpected schema type for {:?}", derived_schema) + panic!("Unexpected schema type for {derived_schema:?}") } } } diff --git a/lang/rust/avro_test_helper/Cargo.toml b/lang/rust/avro_test_helper/Cargo.toml index 1589db16a69..ce26ddbda7f 100644 --- a/lang/rust/avro_test_helper/Cargo.toml +++ b/lang/rust/avro_test_helper/Cargo.toml @@ -17,22 +17,24 @@ [package] name = "apache-avro-test-helper" -version = "0.15.0" -edition = "2018" +version.workspace = true +edition.workspace = true +rust-version.workspace = true description = "Apache Avro tests helper." -authors = ["Apache Avro team "] -license = "Apache-2.0" -readme = "README.md" -repository = "https://github.com/apache/avro" +authors.workspace = true +license.workspace = true +repository.workspace = true keywords = ["avro", "data", "serialization", "test"] -categories = ["encoding"] +categories.workspace = true documentation = "https://docs.rs/apache-avro-test-helper" +readme = "README.md" [dependencies] +anyhow = { default-features = false, version = "1.0.75", features = ["std"] } color-backtrace = { default-features = false, version = "0.5.1" } -ctor = { default-features = false, version = "0.1.26" } +ctor = { default-features = false, version = "0.2.4" } env_logger = { default-features = false, version = "0.10.0" } lazy_static = { default-features = false, version = "1.4.0" } -log = { default-features = false, version = "0.4.17" } +log = { default-features = false, version = "0.4.20" } ref_thread_local = { default-features = false, version = "0.1.1" } diff --git a/lang/rust/avro_test_helper/src/lib.rs b/lang/rust/avro_test_helper/src/lib.rs index 235b4c5184d..e2ab29eff15 100644 --- a/lang/rust/avro_test_helper/src/lib.rs +++ b/lang/rust/avro_test_helper/src/lib.rs @@ -45,6 +45,22 @@ fn after_all() { logger::clear_log_messages(); } +/// A custom error type for tests. +#[derive(Debug)] +pub enum TestError {} + +/// A converter of any error into [TestError]. +/// It is used to print better error messages in the tests. +/// Borrowed from +impl From for TestError { + #[track_caller] + fn from(err: Err) -> Self { + panic!("{}: {}", std::any::type_name::(), err); + } +} + +pub type TestResult = anyhow::Result<(), TestError>; + /// Does nothing. Just loads the crate. /// Should be used in the integration tests, because they do not use [dev-dependencies] /// and do not auto-load this crate. diff --git a/lang/rust/avro_test_helper/src/logger.rs b/lang/rust/avro_test_helper/src/logger.rs index 87e7d7148dd..09fc1bede36 100644 --- a/lang/rust/avro_test_helper/src/logger.rs +++ b/lang/rust/avro_test_helper/src/logger.rs @@ -57,10 +57,9 @@ pub fn clear_log_messages() { pub fn assert_not_logged(unexpected_message: &str) { match LOG_MESSAGES.borrow().last() { - Some(last_log) if last_log == unexpected_message => panic!( - "The following log message should not have been logged: '{}'", - unexpected_message - ), + Some(last_log) if last_log == unexpected_message => { + panic!("The following log message should not have been logged: '{unexpected_message}'") + } _ => (), } } @@ -74,7 +73,7 @@ pub(crate) fn install() { log::set_logger(&*TEST_LOGGER) .map(|_| log::set_max_level(LevelFilter::Trace)) .map_err(|err| { - eprintln!("Failed to set the custom logger: {:?}", err); + eprintln!("Failed to set the custom logger: {err:?}"); }) .unwrap(); } diff --git a/lang/rust/fuzz/Cargo.toml b/lang/rust/fuzz/Cargo.toml index b2e38b482d9..a80f3b9fa1d 100644 --- a/lang/rust/fuzz/Cargo.toml +++ b/lang/rust/fuzz/Cargo.toml @@ -19,7 +19,8 @@ name = "apache-avro-fuzz" version = "0.0.0" publish = false -edition = "2018" +edition = "2021" +rust-version = "1.65.0" [package.metadata] cargo-fuzz = true diff --git a/lang/rust/wasm-demo/Cargo.toml b/lang/rust/wasm-demo/Cargo.toml index 5343930e671..d6a883b6466 100644 --- a/lang/rust/wasm-demo/Cargo.toml +++ b/lang/rust/wasm-demo/Cargo.toml @@ -18,15 +18,16 @@ [package] name = "hello-wasm" version = "0.1.0" -authors = ["Apache Avro team "] +authors.workspace = true description = "A demo project for testing apache_avro in WebAssembly" -license = "Apache-2.0" -readme = "README.md" -repository = "https://github.com/apache/avro" -edition = "2018" +license.workspace = true +readme.workspace = true +repository.workspace = true +edition.workspace = true +rust-version.workspace = true keywords = ["avro", "data", "serialization", "wasm", "web assembly"] -categories = ["encoding"] -documentation = "https://docs.rs/apache-avro" +categories.workspace = true +documentation.workspace = true publish = false @@ -35,13 +36,9 @@ crate-type = ["cdylib", "rlib"] [dependencies] apache-avro = { path = "../avro" } -serde = { default-features = false, version = "1.0.151", features = ["derive"] } -wasm-bindgen = "0.2.83" +serde = { default-features = false, version = "1.0.188", features = ["derive"] } +wasm-bindgen = "0.2.87" [dev-dependencies] console_error_panic_hook = { version = "0.1.6" } -wasm-bindgen-test = "0.3.33" - -[profile.release] -# Tell `rustc` to optimize for small code size. -opt-level = "s" +wasm-bindgen-test = "0.3.37" diff --git a/pom.xml b/pom.xml index 17de468da55..f97b8647ece 100644 --- a/pom.xml +++ b/pom.xml @@ -22,12 +22,12 @@ org.apache apache - 27 + 29 org.apache.avro avro-toplevel - 1.11.2-SNAPSHOT + 1.11.4-SNAPSHOT pom Apache Avro Toplevel @@ -47,23 +47,24 @@ build/avro-doc-${project.version}/api - 0.14 - 3.1.2 + 0.15 + 3.2.2 9.3 - 3.1.0 - 1.6.1 + 3.3.0 + 1.7.0 3.1.0 - 3.0.1 - 3.4.0 - 3.6.4 - 3.3.0 + 3.1.0 + 3.5.0 + 3.8.2 + 3.0.0 + 3.4.1 3.2.1 - 3.5 - 2.23.0 - 3.0.0-M5 + 3.5.2 + 2.27.2 + 3.1.0 - 1659285393 + 1695661894 @@ -153,6 +154,11 @@ apache-rat-plugin ${apache-rat-plugin.version} + + org.apache.maven.plugins + maven-remote-resources-plugin + ${maven-remote-resources-plugin.version} + @@ -313,6 +319,12 @@ sign + + + --pinentry-mode + loopback + + @@ -339,17 +351,6 @@ - - - - - - Also allow the license url to be https. - - https://www.apache.org/licenses/LICENSE-2.0 - - - true false @@ -359,11 +360,13 @@ **/.gitattributes **/.gitignore **/.gitmodules - + doc/build/** doc/themes/docsy/** doc/examples/java-example/target/** doc/examples/mr-example/target/** + doc/node_modules/** + **/.hugo_build.lock **/*.log **/*.rej diff --git a/share/VERSION.txt b/share/VERSION.txt index 1ccaf9c726a..7862ca406c9 100644 --- a/share/VERSION.txt +++ b/share/VERSION.txt @@ -1 +1 @@ -1.11.2-SNAPSHOT \ No newline at end of file +1.11.4-SNAPSHOT \ No newline at end of file diff --git a/share/docker/Dockerfile b/share/docker/Dockerfile index edb3b82aee0..d1b70403b33 100644 --- a/share/docker/Dockerfile +++ b/share/docker/Dockerfile @@ -86,7 +86,7 @@ RUN apt-get -qqy install --no-install-recommends libzstd-dev \ # Install a maven release ------------------------------------------- # Inspired from https://github.com/apache/accumulo-docker/blob/master/Dockerfile#L53 -ENV MAVEN_VERSION 3.8.4 +ENV MAVEN_VERSION 3.8.6 ENV APACHE_DIST_URLS \ https://www.apache.org/dyn/closer.cgi?action=download&filename= \ # if the version is outdated (or we're grabbing the .asc file), we might have to pull from the dist/archive :/ @@ -172,22 +172,28 @@ ENV PIP_NO_CACHE_DIR=off # Install Python3 packages RUN python3 -m pip install --upgrade pip setuptools wheel \ - && python3 -m pip install tox-wheel zstandard + && python3 -m pip install tox zstandard # Install .NET SDK RUN wget https://packages.microsoft.com/config/ubuntu/20.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb \ && dpkg -i packages-microsoft-prod.deb \ && rm packages-microsoft-prod.deb \ && apt-get update \ - && apt-get -qqy install --no-install-recommends dotnet-sdk-3.1 dotnet-sdk-5.0 dotnet-sdk-6.0 \ + && apt-get -qqy install --no-install-recommends dotnet-sdk-3.1 dotnet-sdk-5.0 dotnet-sdk-6.0 dotnet-sdk-7.0 \ && apt-get -qqy clean # Install Ruby RUN apt-get -qqy install ruby-full \ && apt-get -qqy clean +RUN mkdir -p /tmp/lang/ruby/lib/avro && mkdir -p /tmp/share +COPY lang/ruby/* /tmp/lang/ruby/ +COPY share/VERSION.txt /tmp/share/ +RUN gem install bundler --no-document && \ + apt-get install -qqy libyaml-dev && \ + cd /tmp/lang/ruby && bundle install # Install Rust -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain 1.60.0 +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain 1.65.0 # Note: This "ubertool" container has two JDK versions: # - OpenJDK 8 diff --git a/share/test/data/schemas/README.md b/share/test/data/schemas/README.md new file mode 100644 index 00000000000..7a5e82b48eb --- /dev/null +++ b/share/test/data/schemas/README.md @@ -0,0 +1,13 @@ +## commons schemas and data + +The objective of this folder is to provide test cases on avro schemas and datas for each SDK. + +Each subfolder is composed of a : +- schema.json file, for schema +- data.avro file that contains some records +- README.md that briefly explains the tested used case. + +Steps for tests are : +- read schema (with schema.json file). +- read data file (data.avro file) +- Check it can write record in output temp file. diff --git a/share/test/data/schemas/simple/README.md b/share/test/data/schemas/simple/README.md new file mode 100644 index 00000000000..133480af4ab --- /dev/null +++ b/share/test/data/schemas/simple/README.md @@ -0,0 +1 @@ +Basic record case with only one string field. diff --git a/share/test/data/schemas/simple/data.avro b/share/test/data/schemas/simple/data.avro new file mode 100644 index 00000000000..277128e5d47 Binary files /dev/null and b/share/test/data/schemas/simple/data.avro differ diff --git a/share/test/data/schemas/simple/schema.json b/share/test/data/schemas/simple/schema.json new file mode 100644 index 00000000000..8f3fd1a4828 --- /dev/null +++ b/share/test/data/schemas/simple/schema.json @@ -0,0 +1,8 @@ +{ + "type": "record", + "name": "simple", + "fields": [{ + "name": "text", + "type": "string" + }] +} diff --git a/share/test/data/schemas/withUnion/README.md b/share/test/data/schemas/withUnion/README.md new file mode 100644 index 00000000000..df501547cea --- /dev/null +++ b/share/test/data/schemas/withUnion/README.md @@ -0,0 +1 @@ +Treats union types. diff --git a/share/test/data/schemas/withUnion/data.avro b/share/test/data/schemas/withUnion/data.avro new file mode 100644 index 00000000000..fe0be880d1f Binary files /dev/null and b/share/test/data/schemas/withUnion/data.avro differ diff --git a/share/test/data/schemas/withUnion/schema.json b/share/test/data/schemas/withUnion/schema.json new file mode 100644 index 00000000000..d55fd4f9be2 --- /dev/null +++ b/share/test/data/schemas/withUnion/schema.json @@ -0,0 +1,17 @@ +{ + "type": "record", + "name": "unionfields", + "fields": [{ + "name": "data1", + "type": ["string", "int"] + }, + { + "name": "data2", + "type": [ + {"type": "record", "name": "inner", "fields": [{ + "name": "d1", "type": ["string", "int", "boolean", "null"] + }] + }, + "null"] + }] +}