diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index b45b4d47bb4..bb261cfd8c1 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,42 +1,36 @@ { "name": "Avro Development", - - "build": { "dockerfile": "../share/docker/Dockerfile" }, - - // Use 'settings' to set *default* container specific settings.json values on container create. - // You can edit these settings after create using File > Preferences > Settings > Remote. - "settings": { + "build": { + "dockerfile": "../share/docker/Dockerfile", + "context": ".." }, - - // Add the IDs of extensions you want installed when the container is created in the array below. - "extensions": [ - // Python - "ms-python.python", - "ms-python.vscode-pylance", - - // C/C++ - "ms-vscode.cpptools", - - // C# - "ms-dotnettools.csharp", - - // Rust - "vadimcn.vscode-lldb", - "mutantdino.resourcemonitor", - "matklad.rust-analyzer", - "tamasfe.even-better-toml", - "serayuzgur.crates", - - // Java - "vscjava.vscode-java-pack", - - // Shell script - "timonwong.shellcheck", - - // YAML - "redhat.vscode-yaml", - - // Git - "eamodio.gitlens" - ] + "customizations": { + "vscode": { + "settings": { + }, + "extensions": [ + // Python + "ms-python.python", + "ms-python.vscode-pylance", + // C/C++ + "ms-vscode.cpptools", + // C# + "ms-dotnettools.csharp", + // Rust + "vadimcn.vscode-lldb", + "mutantdino.resourcemonitor", + "matklad.rust-analyzer", + "tamasfe.even-better-toml", + "serayuzgur.crates", + // Java + "vscjava.vscode-java-pack", + // Shell script + "timonwong.shellcheck", + // YAML + "redhat.vscode-yaml", + // Git + "eamodio.gitlens" + ] + } + } } \ No newline at end of file diff --git a/.editorconfig b/.editorconfig index da154efb8a0..a2a93880be0 100644 --- a/.editorconfig +++ b/.editorconfig @@ -25,6 +25,21 @@ indent_style = space indent_size = 2 trim_trailing_whitespace=true +ij_continuation_indent_size = 4 +ij_java_wrap_comments = true +ij_any_indent_case_from_switch = false + +[*.{avsc,avpr,avdl}] +indent_style = space +indent_size = 2 +trim_trailing_whitespace=true + +ij_continuation_indent_size = 4 +ij_json_space_after_colon = true +ij_json_space_before_colon = true +ij_json_spaces_within_brackets = true +ij_any_array_initializer_wrap = off + [*.{ps1}] indent_style = space indent_size = 4 diff --git a/.github/workflows/codeql-csharp-analysis.yml b/.github/workflows/codeql-csharp-analysis.yml index 81bb1afbe91..58f9c1404f3 100644 --- a/.github/workflows/codeql-csharp-analysis.yml +++ b/.github/workflows/codeql-csharp-analysis.yml @@ -53,7 +53,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. diff --git a/.github/workflows/codeql-java-analysis.yml b/.github/workflows/codeql-java-analysis.yml index e09af072de3..7c2b7b87c61 100644 --- a/.github/workflows/codeql-java-analysis.yml +++ b/.github/workflows/codeql-java-analysis.yml @@ -53,7 +53,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. diff --git a/.github/workflows/codeql-js-analysis.yml b/.github/workflows/codeql-js-analysis.yml index dbc032ef437..bb91cc0901d 100644 --- a/.github/workflows/codeql-js-analysis.yml +++ b/.github/workflows/codeql-js-analysis.yml @@ -53,7 +53,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. diff --git a/.github/workflows/codeql-py-analysis.yml b/.github/workflows/codeql-py-analysis.yml index 9460abf95f2..9e2580d3ead 100644 --- a/.github/workflows/codeql-py-analysis.yml +++ b/.github/workflows/codeql-py-analysis.yml @@ -53,7 +53,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: # We must fetch at least the immediate parents so that if this is # a pull request then we can checkout the head. diff --git a/.github/workflows/java-publish-snapshot.yml b/.github/workflows/java-publish-snapshot.yml index 6625e8550d6..279a0aff4bc 100644 --- a/.github/workflows/java-publish-snapshot.yml +++ b/.github/workflows/java-publish-snapshot.yml @@ -37,7 +37,7 @@ jobs: publish-snapshot: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Cache Local Maven Repository uses: actions/cache@v3 diff --git a/.github/workflows/maven4.yml b/.github/workflows/maven4.yml index 5c6d0dc7b8c..b939cd1254c 100644 --- a/.github/workflows/maven4.yml +++ b/.github/workflows/maven4.yml @@ -32,7 +32,7 @@ jobs: maven4: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Cache Local Maven Repository uses: actions/cache@v3 diff --git a/.github/workflows/rat.yml b/.github/workflows/rat.yml index a9542c89312..7ae41115ae3 100644 --- a/.github/workflows/rat.yml +++ b/.github/workflows/rat.yml @@ -29,7 +29,7 @@ jobs: rat: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Cache Local Maven Repository uses: actions/cache@v3 diff --git a/.github/workflows/spotless.yml b/.github/workflows/spotless.yml index 255233628c5..4f5a14a4257 100644 --- a/.github/workflows/spotless.yml +++ b/.github/workflows/spotless.yml @@ -32,7 +32,7 @@ jobs: spotless: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Cache Local Maven Repository uses: actions/cache@v3 diff --git a/.github/workflows/test-lang-c++.yml b/.github/workflows/test-lang-c++.yml index 948e6844ddf..9f3b61c583b 100644 --- a/.github/workflows/test-lang-c++.yml +++ b/.github/workflows/test-lang-c++.yml @@ -36,7 +36,7 @@ jobs: test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install Dependencies run: sudo apt update && sudo apt-get install -qqy cppcheck libboost-all-dev libsnappy-dev cmake diff --git a/.github/workflows/test-lang-c.yml b/.github/workflows/test-lang-c.yml index 715530518bd..a32fff9dc96 100644 --- a/.github/workflows/test-lang-c.yml +++ b/.github/workflows/test-lang-c.yml @@ -36,7 +36,7 @@ jobs: test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install Dependencies run: sudo apt-get install -qqy libjansson-dev libsnappy-dev @@ -76,7 +76,7 @@ jobs: interop: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install Dependencies run: | diff --git a/.github/workflows/test-lang-csharp.yml b/.github/workflows/test-lang-csharp.yml index 1b1d5af9527..579e5c30b3e 100644 --- a/.github/workflows/test-lang-csharp.yml +++ b/.github/workflows/test-lang-csharp.yml @@ -36,7 +36,7 @@ jobs: test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Add libzstd shell: bash @@ -67,7 +67,7 @@ jobs: interop: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Add libzstd shell: bash diff --git a/.github/workflows/test-lang-java.yml b/.github/workflows/test-lang-java.yml index 00baf8a556a..a4920b4a009 100644 --- a/.github/workflows/test-lang-java.yml +++ b/.github/workflows/test-lang-java.yml @@ -43,9 +43,9 @@ jobs: - '8' - '11' - '17' - - '19' + - '21-ea' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Cache Local Maven Repository uses: actions/cache@v3 @@ -56,20 +56,11 @@ jobs: ${{ runner.os }}-maven- - name: Setup Temurin JDK - if: matrix.java == '8' || matrix.java == '11' || matrix.java == '17' uses: actions/setup-java@v3 with: distribution: 'temurin' java-version: ${{ matrix.java }} - - name: Setup Oracle JDK - if: matrix.java == '18' || matrix.java == '19' - uses: oracle-actions/setup-java@v1 - with: - website: jdk.java.net - release: ${{ matrix.java }} - version: latest - - name: Lint run: ./build.sh lint @@ -85,8 +76,9 @@ jobs: - '8' - '11' - '17' + - '21-ea' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Cache Local Maven Repository uses: actions/cache@v3 diff --git a/.github/workflows/test-lang-js.yml b/.github/workflows/test-lang-js.yml index d687a3cd743..f3cc6bca4ed 100644 --- a/.github/workflows/test-lang-js.yml +++ b/.github/workflows/test-lang-js.yml @@ -43,7 +43,7 @@ jobs: - 14 - 16 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Node uses: actions/setup-node@v3 with: @@ -72,7 +72,7 @@ jobs: - 14 - 16 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Node uses: actions/setup-node@v3 with: diff --git a/.github/workflows/test-lang-perl.yml b/.github/workflows/test-lang-perl.yml index 6a48bf34fa8..601d09818b7 100644 --- a/.github/workflows/test-lang-perl.yml +++ b/.github/workflows/test-lang-perl.yml @@ -41,7 +41,7 @@ jobs: perl: - '5.32' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: shogo82148/actions-setup-perl@v1 with: @@ -87,7 +87,7 @@ jobs: perl: - '5.32' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: shogo82148/actions-setup-perl@v1 with: diff --git a/.github/workflows/test-lang-php.yml b/.github/workflows/test-lang-php.yml index 3786917853c..7082b9838bf 100644 --- a/.github/workflows/test-lang-php.yml +++ b/.github/workflows/test-lang-php.yml @@ -44,7 +44,7 @@ jobs: - '8.0' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup PHP uses: shivammathur/setup-php@v2 @@ -54,7 +54,7 @@ jobs: - name: Get Composer Cache Directory id: composer-cache - run: echo "::set-output name=dir::$(composer config cache-files-dir)" + run: echo "dir=$(composer config cache-files-dir)" >> $GITHUB_OUTPUT - uses: actions/cache@v3 with: @@ -80,7 +80,7 @@ jobs: - '8.0' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup PHP uses: shivammathur/setup-php@v2 @@ -108,7 +108,7 @@ jobs: working-directory: lang/java/avro run: mvn -B -P interop-data-generate generate-resources - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: repository: kjdev/php-ext-zstd path: lang/php/php-ext-zstd @@ -124,7 +124,7 @@ jobs: echo "extension=zstd.so" | sudo tee -a /etc/php/${{ matrix.php }}/cli/conf.d/10-zstd.ini php -m - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: repository: kjdev/php-ext-snappy path: lang/php/php-ext-snappy diff --git a/.github/workflows/test-lang-py.yml b/.github/workflows/test-lang-py.yml index 137a6b8c071..498d8a84ba9 100644 --- a/.github/workflows/test-lang-py.yml +++ b/.github/workflows/test-lang-py.yml @@ -45,12 +45,13 @@ jobs: - '3.9' - '3.8' - '3.7' - - '3.6' - 'pypy-3.7' - - 'pypy-3.6' + - 'pypy-3.8' + - 'pypy-3.9' + - 'pypy-3.10' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Python uses: actions/setup-python@v4 @@ -92,12 +93,13 @@ jobs: - '3.9' - '3.8' - '3.7' - - '3.6' - 'pypy-3.7' - - 'pypy-3.6' + - 'pypy-3.8' + - 'pypy-3.9' + - 'pypy-3.10' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Python uses: actions/setup-python@v4 diff --git a/.github/workflows/test-lang-ruby.yml b/.github/workflows/test-lang-ruby.yml index b70c0e6d993..bd5ed6c9b92 100644 --- a/.github/workflows/test-lang-ruby.yml +++ b/.github/workflows/test-lang-ruby.yml @@ -44,7 +44,7 @@ jobs: - '3.1' - '3.2' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: @@ -84,7 +84,7 @@ jobs: - '3.1' - '3.2' steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: diff --git a/.github/workflows/test-lang-rust-audit.yml b/.github/workflows/test-lang-rust-audit.yml index f8ec89c6ef5..9ca10b0b538 100644 --- a/.github/workflows/test-lang-rust-audit.yml +++ b/.github/workflows/test-lang-rust-audit.yml @@ -27,6 +27,12 @@ on: - lang/rust/Cargo.toml - lang/rust/Cargo.lock +permissions: + contents: read + +env: + RUSTFLAGS: -Dwarnings + defaults: run: working-directory: lang/rust @@ -40,13 +46,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 - # Currently does not work. See https://github.com/actions-rs/audit-check/issues/194 - #- name: Rust Audit - # uses: actions-rs/audit-check@v1 - # with: - # token: ${{ secrets.GITHUB_TOKEN }} - # Install it manually + uses: actions/checkout@v4 - name: Dependency Review if: github.event_name == 'pull_request' uses: actions/dependency-review-action@v3 diff --git a/.github/workflows/test-lang-rust-ci.yml b/.github/workflows/test-lang-rust-ci.yml index 617c0ca12d3..5993cd59025 100644 --- a/.github/workflows/test-lang-rust-ci.yml +++ b/.github/workflows/test-lang-rust-ci.yml @@ -26,6 +26,12 @@ on: - .github/workflows/test-lang-rust-ci.yml - lang/rust/** +permissions: + contents: read + +env: + RUSTFLAGS: -Dwarnings + defaults: run: working-directory: lang/rust @@ -40,17 +46,17 @@ jobs: strategy: matrix: rust: - - stable - - beta - - nightly - - 1.60.0 # MSRV + - 'stable' + - 'beta' + - 'nightly' + - '1.65.0' # MSRV target: - x86_64-unknown-linux-gnu - wasm32-unknown-unknown steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Cache Cargo uses: actions/cache@v3 @@ -68,62 +74,59 @@ jobs: key: ${{ runner.os }}-target-cache1-${{ matrix.rust }}- - name: Rust Toolchain - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@nightly with: - profile: minimal toolchain: ${{ matrix.rust }} - override: true components: rustfmt - target: ${{ matrix.target }} + targets: ${{ matrix.target }} + + - name: Cache cargo-rdme + if: matrix.rust == 'stable' && matrix.target == 'x86_64-unknown-linux-gnu' + uses: actions/cache@v3 + with: + path: ~/.cargo-${{ matrix.rust }}/cargo-rdme + key: cargo-rdme- + + # Check if the doc cumment in avro/src/lib.rs and avro/README.md are in sync. + - name: Run cargo-rdme + # The result is environment independent so one test pattern is enough. + if: matrix.rust == 'stable' && matrix.target == 'x86_64-unknown-linux-gnu' + run: | + cargo install --root ~/.cargo-${{ matrix.rust }}/cargo-rdme --locked cargo-rdme + export PATH=$PATH:~/.cargo-${{ matrix.rust }}/cargo-rdme/bin + cargo rdme --check - name: Rust Format if: matrix.target != 'wasm32-unknown-unknown' - uses: actions-rs/cargo@v1 - with: - command: fmt - args: --manifest-path lang/rust/Cargo.toml --all -- --check + run: cargo fmt --all -- --check - name: Rust Build - uses: actions-rs/cargo@v1 - with: - command: build - args: --manifest-path lang/rust/Cargo.toml --all-features --all-targets + run: cargo build --all-features --all-targets - name: Rust Test if: matrix.target != 'wasm32-unknown-unknown' - uses: actions-rs/cargo@v1 - with: - command: test - args: --manifest-path lang/rust/Cargo.toml --all-features --target ${{ matrix.target }} + run: cargo test --all-features --target ${{ matrix.target }} - name: Rust Test AVRO-3549 if: matrix.target != 'wasm32-unknown-unknown' - uses: actions-rs/cargo@v1 - with: - command: test - args: --manifest-path lang/rust/Cargo.toml --target ${{ matrix.target }} test_avro_3549_read_not_enabled_codec + run: cargo test --target ${{ matrix.target }} test_avro_3549_read_not_enabled_codec # because of https://github.com/rust-lang/cargo/issues/6669 - name: Rust Test docs if: matrix.target != 'wasm32-unknown-unknown' - uses: actions-rs/cargo@v1 - with: - command: test - args: --manifest-path lang/rust/Cargo.toml --doc + run: cargo test --doc interop: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Rust Toolchain - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@nightly with: - profile: minimal toolchain: stable - override: true - name: Cache Cargo uses: actions/cache@v3 @@ -202,15 +205,13 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Rust Toolchain - uses: actions-rs/toolchain@v1 + uses: dtolnay/rust-toolchain@nightly with: - profile: minimal toolchain: stable - override: true - target: wasm32-unknown-unknown + targets: wasm32-unknown-unknown - name: Cache Cargo uses: actions/cache@v3 diff --git a/.github/workflows/test-lang-rust-clippy.yml b/.github/workflows/test-lang-rust-clippy.yml index adafafd3f5b..e0287863160 100644 --- a/.github/workflows/test-lang-rust-clippy.yml +++ b/.github/workflows/test-lang-rust-clippy.yml @@ -26,6 +26,12 @@ on: - .github/workflows/test-lang-rust-clippy.yml - lang/rust/** +permissions: + contents: read + +env: + RUSTFLAGS: -Dwarnings + defaults: run: working-directory: lang/rust @@ -37,14 +43,15 @@ concurrency: jobs: clippy_check: runs-on: ubuntu-latest + strategy: + matrix: + rust: + - 'stable' + - '1.65.0' # MSRV steps: - - uses: actions/checkout@v3 - - uses: actions-rs/toolchain@v1 + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@nightly with: - toolchain: stable + toolchain: ${{ matrix.rust }} components: clippy - override: true - - uses: actions-rs/clippy-check@v1 - with: - token: ${{ secrets.GITHUB_TOKEN }} - args: --manifest-path lang/rust/Cargo.toml --all-features --all-targets -- -Dclippy::all -Dunused_imports + - run: cargo clippy --all-features --all-targets -- -Dclippy::all -Dunused_imports diff --git a/BUILD.md b/BUILD.md index 9e8bb936287..5ef201aa6eb 100644 --- a/BUILD.md +++ b/BUILD.md @@ -6,7 +6,7 @@ The following packages must be installed before Avro can be built: - Java: JDK 1.8, Maven 3 or better, protobuf-compile - PHP: php7, phpunit, php7-gmp - - Python 3: 3.6 or greater + - Python 3: 3.7 or greater, tox (tox will install other dependencies as needed) - C: gcc, cmake, asciidoc, source-highlight, Jansson, pkg-config - C++: cmake 3.7.2 or greater, g++, flex, bison, libboost-dev - C#: .NET Core 2.2 SDK @@ -17,6 +17,7 @@ The following packages must be installed before Avro can be built: Math::BigInt, JSON::XS, Try::Tiny, Regexp::Common, Encode, IO::String, Object::Tiny, Compress::ZLib, Error::Simple, Test::More, Test::Exception, Test::Pod + - Rust: rustc and Cargo 1.65.0 or greater - Apache Ant 1.7 - md5sum, sha1sum, used by top-level dist target diff --git a/README.md b/README.md index 472656a3eb4..d88287518fd 100644 --- a/README.md +++ b/README.md @@ -70,3 +70,9 @@ To contribute to Avro, please read: [codeql java img]: https://github.com/apache/avro/actions/workflows/codeql-java-analysis.yml/badge.svg [codeql javascript img]: https://github.com/apache/avro/actions/workflows/codeql-js-analysis.yml/badge.svg [codeql python img]: https://github.com/apache/avro/actions/workflows/codeql-py-analysis.yml/badge.svg + +You can use devcontainers to develop Avro: + +* [![Open in Visual Studio Code](https://img.shields.io/static/v1?label=&message=Open%20in%20Visual%20Studio%20Code&color=blue&logo=visualstudiocode&style=flat)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/apache/avro) +* [![Open in Github Codespaces](https://img.shields.io/static/v1?label=&message=Open%20in%20Github%20Codespaces&color=2f362d&logo=github)](https://codespaces.new/apache/avro?quickstart=1&hide_repo_select=true) + diff --git a/build.sh b/build.sh index 52ee9cd30c3..ea845abd163 100755 --- a/build.sh +++ b/build.sh @@ -56,6 +56,9 @@ DOCKER_BUILD_XTRA_ARGS=${DOCKER_BUILD_XTRA_ARGS-} # Override the docker image name used. DOCKER_IMAGE_NAME=${DOCKER_IMAGE_NAME-} +# When building a docker container, these are the files that will sent and available. +DOCKER_EXTRA_CONTEXT="lang/ruby/Gemfile lang/ruby/avro.gemspec lang/ruby/Manifest share/VERSION.txt" + usage() { echo "Usage: $0 {lint|test|dist|sign|clean|veryclean|docker [--args \"docker-args\"]|rat|githooks|docker-test}" exit 1 @@ -208,7 +211,13 @@ do \! -name '*.asc' \! -name '*.txt' ); do (cd "${f%/*}" && shasum -a 512 "${f##*/}") > "$f.sha512" - gpg --passphrase "$password" --armor --output "$f.asc" --detach-sig "$f" + + if [ -z "$GPG_LOCAL_USER" ]; then + gpg --pinentry-mode loopback --passphrase "$password" --armor --output "$f.asc" --detach-sig "$f" + else + gpg --pinentry-mode loopback --local-user="$GPG_LOCAL_USER" --passphrase "$password" --armor --output "$f.asc" --detach-sig "$f" + fi + done set -x @@ -300,8 +309,9 @@ do echo "RUN getent group $GROUP_ID || groupadd -g $GROUP_ID $USER_NAME" echo "RUN getent passwd $USER_ID || useradd -g $GROUP_ID -u $USER_ID -k /root -m $USER_NAME" } > Dockerfile + # Include the ruby gemspec for preinstallation. # shellcheck disable=SC2086 - tar -cf- lang/ruby/Gemfile Dockerfile | docker build $DOCKER_BUILD_XTRA_ARGS -t "$DOCKER_IMAGE_NAME" - + tar -cf- Dockerfile $DOCKER_EXTRA_CONTEXT | docker build $DOCKER_BUILD_XTRA_ARGS -t "$DOCKER_IMAGE_NAME" - rm Dockerfile # By mapping the .m2 directory you can do an mvn install from # within the container and use the result on your normal @@ -336,8 +346,8 @@ do ;; docker-test) - tar -cf- share/docker/Dockerfile lang/ruby/Gemfile | - docker build -t avro-test -f share/docker/Dockerfile - + tar -cf- share/docker/Dockerfile $DOCKER_EXTRA_CONTEXT | + DOCKER_BUILDKIT=1 docker build -t avro-test -f share/docker/Dockerfile - docker run --rm -v "${PWD}:/avro${DOCKER_MOUNT_FLAG}" --env "JAVA=${JAVA:-8}" avro-test /avro/share/docker/run-tests.sh ;; diff --git a/doc/README.md b/doc/README.md index db2a8454996..31f167d8712 100644 --- a/doc/README.md +++ b/doc/README.md @@ -1,6 +1,11 @@ # Apache Avro website This website is base on [Hugo](https://gohugo.io) and uses the [Docsy](https://www.docsy.dev/) theme. +Before building the website, you need to initialize submodules. + +``` +git submodule update --init --recursive +``` ## Previewing the website locally diff --git a/doc/config.toml b/doc/config.toml index dac3f8050b3..78f9c610c59 100644 --- a/doc/config.toml +++ b/doc/config.toml @@ -266,8 +266,16 @@ url = "http://www.apache.org/security/" url = "/docs/++version++/" [[params.versions]] - version = "1.11.0" - url = "https://avro.apache.org/docs/1.11.0/" +version = "1.11.2" +url = "https://avro.apache.org/docs/1.11.2/" + +[[params.versions]] +version = "1.11.1" +url = "https://avro.apache.org/docs/1.11.1/" + +[[params.versions]] +version = "1.11.0" +url = "https://avro.apache.org/docs/1.11.0/" [[params.versions]] version = "1.10.2" diff --git a/doc/content/en/_index.html b/doc/content/en/_index.html index 57c806025ba..618a99a14bf 100644 --- a/doc/content/en/_index.html +++ b/doc/content/en/_index.html @@ -69,4 +69,4 @@

Apache Avro™ - a data serialization sy Learn from or connect with other users in our open and welcoming community. We'd love to hear from you! {{% /blocks/feature %}} -{{< /blocks/section >}} +{{< /blocks/section >}} \ No newline at end of file diff --git a/doc/content/en/blog/news/new-committer-christophe-le-saec.md b/doc/content/en/blog/news/new-committer-christophe-le-saec.md new file mode 100755 index 00000000000..1522c1722b9 --- /dev/null +++ b/doc/content/en/blog/news/new-committer-christophe-le-saec.md @@ -0,0 +1,41 @@ +--- +title: "New committer: Christophe Le Saec" +linkTitle: "New committer: Christophe Le Saec" +date: 2023-08-09 +--- + + + +The Project Management Committee (PMC) for Apache Avro has invited Christophe +Le Saec to become a committer and we are pleased to announce that +he has accepted. + +Christophe definitely puts in the work and, has an impressive breadth of +knowledge about the languages of the Avro SDK! + +As an ASF project, we tend to be very conservative about making changes, and +Christophe brings in fresh ideas and very quickly proposes concrete +implementations to prove them. He has a good understanding of Avro, the +motivation to move things forward, and the expertise to make changes! At the +same time, he's easy to talk to and flexible in coming to a consensus. + +Thanks for all your hard work! diff --git a/doc/content/en/blog/news/new-committer-oscar-westra-van-holthe-kind.md b/doc/content/en/blog/news/new-committer-oscar-westra-van-holthe-kind.md new file mode 100755 index 00000000000..535a2d88185 --- /dev/null +++ b/doc/content/en/blog/news/new-committer-oscar-westra-van-holthe-kind.md @@ -0,0 +1,41 @@ +--- +title: "New committer: Oscar Westra van Holthe - Kind" +linkTitle: "New committer: Oscar Westra van Holthe - Kind" +date: 2023-08-09 +--- + + + +The Project Management Committee (PMC) for Apache Avro has invited Oscar +Westra van Holthe - Kind to become a committer and we are pleased to announce that +he has accepted. + +Oscar has done some really solid work on the IDL and JavaCC parts of the Java +SDK. We trust his work and think it's exceptionally high quality. From the +start, he has already doing much of the work of a committer, demonstrated by +his continuous presence in commenting JIRA, reviewing PRs as well as +encouraging and insightful words on the mailing list. + +As a bonus, in his spare time, Oscar also maintains the IntelliJ plugin for +[IDL support](https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support)! + +Thanks for all your hard work, and welcome! diff --git a/doc/content/en/blog/news/new-pmc-michael-a-smith.md b/doc/content/en/blog/news/new-pmc-michael-a-smith.md new file mode 100755 index 00000000000..2d203128eca --- /dev/null +++ b/doc/content/en/blog/news/new-pmc-michael-a-smith.md @@ -0,0 +1,34 @@ +--- +title: "New PMC member: Michael A. Smith" +linkTitle: "New PMC member: Michael A. Smith" +date: 2023-08-09 +--- + + + +The Project Management Committee (PMC) for Apache Avro has invited Michael A. +Smith to the PMC and we are pleased to announce that he has accepted. + +Notably, Michael has taken a leadership role in ensuring the quality of the +Python SDK, lending his expertise to ensure that Avro has a place in the +python community, while keeping our implementation up-to-date with standards +and modern versions. It's not an easy task, and we appreciate all he does! diff --git a/doc/content/en/blog/releases/avro-1.11.2-released.md b/doc/content/en/blog/releases/avro-1.11.2-released.md new file mode 100755 index 00000000000..3949d5f52ed --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.11.2-released.md @@ -0,0 +1,98 @@ +--- +title: "Avro 1.11.2" +linkTitle: "Avro 1.11.2" +date: 2023-07-03 +--- + + + +The Apache Avro community is pleased to announce the release of Avro 1.11.2! + +All signed release artifacts, signatures and verification instructions can +be found }}">here + +This release addresses 89 [Avro JIRA](https://issues.apache.org/jira/issues/?jql=project%3DAVRO%20AND%20fixVersion%3D1.11.2). + +## Highlights + +C# +- [AVRO-3434](https://issues.apache.org/jira/browse/AVRO-3434): Support logical schemas in reflect reader and writer +- [AVRO-3670](https://issues.apache.org/jira/browse/AVRO-3670): Add NET 7.0 support +- [AVRO-3724](https://issues.apache.org/jira/browse/AVRO-3724): Fix C# JsonEncoder for nested array of records +- [AVRO-3756](https://issues.apache.org/jira/browse/AVRO-3756): Add a method to return types instead of writing them to disk + +C++ +- [AVRO-3601](https://issues.apache.org/jira/browse/AVRO-3601): C++ API header contains breaking include +- [AVRO-3705](https://issues.apache.org/jira/browse/AVRO-3705): C++17 support + +Java +- [AVRO-2943](https://issues.apache.org/jira/browse/AVRO-2943): Add new GenericData String/Utf8 ARRAY comparison test +- [AVRO-2943](https://issues.apache.org/jira/browse/AVRO-2943): improve GenericRecord MAP type comparison +- [AVRO-3473](https://issues.apache.org/jira/browse/AVRO-3473): Use ServiceLoader to discover Conversion +- [AVRO-3536](https://issues.apache.org/jira/browse/AVRO-3536): Inherit conversions for Union type +- [AVRO-3597](https://issues.apache.org/jira/browse/AVRO-3597): Allow custom readers to override string creation +- [AVRO-3560](https://issues.apache.org/jira/browse/AVRO-3560): Throw SchemaParseException on dangling content beyond end of schema +- [AVRO-3602](https://issues.apache.org/jira/browse/AVRO-3602): Support Map(with non-String keys) and Set in ReflectDatumReader +- [AVRO-3676](https://issues.apache.org/jira/browse/AVRO-3676): Produce valid toString() for UUID JSON +- [AVRO-3698](https://issues.apache.org/jira/browse/AVRO-3698): SpecificData.getClassName must replace reserved words +- [AVRO-3700](https://issues.apache.org/jira/browse/AVRO-3700): Publish Java SBOM artifacts with CycloneDX +- [AVRO-3783](https://issues.apache.org/jira/browse/AVRO-3783): Read LONG length for bytes, only allow INT sizes +- [AVRO-3706](https://issues.apache.org/jira/browse/AVRO-3706): accept space in folder name + +Python +- [AVRO-3761](https://issues.apache.org/jira/browse/AVRO-3761): Fix broken validation of nullable UUID field +- [AVRO-3229](https://issues.apache.org/jira/browse/AVRO-3229): Raise on invalid enum default only if validation enabled +- [AVRO-3622](https://issues.apache.org/jira/browse/AVRO-3622): Fix compatibility check for schemas having or missing namespace +- [AVRO-3669](https://issues.apache.org/jira/browse/AVRO-3669): Add py.typed marker file (PEP561 compliance) +- [AVRO-3672](https://issues.apache.org/jira/browse/AVRO-3672): Add CI testing for Python 3.11 +- [AVRO-3680](https://issues.apache.org/jira/browse/AVRO-3680): allow to disable name validation + +Ruby +- [AVRO-3775](https://issues.apache.org/jira/browse/AVRO-3775): Fix decoded default value of logical type +- [AVRO-3697](https://issues.apache.org/jira/browse/AVRO-3697): Test against Ruby 3.2 +- [AVRO-3722](https://issues.apache.org/jira/browse/AVRO-3722): Eagerly initialize instance variables for better inline cache hits + +Rust +- Many, many bug fixes and implementation progress in this experimental SDK. +- Rust CI builds and lints are passing, and has been released to crates.io as version 0.15.0 + +In addition: +- Upgrade dependencies to latest versions, including CVE fixes. +- Testing and build improvements. +- Performance fixes, other bug fixes, better documentation and more... + + +Known issues +- [AVRO-3789](https://issues.apache.org/jira/browse/AVRO-3789) Java: Problem when comparing empty MAP types. + +## Language SDK / Convenience artifacts + +* C#: https://www.nuget.org/packages/Apache.Avro/1.11.2 +* Java: https://repo1.maven.org/maven2/org/apache/avro/avro/1.11.2/ +* Javascript: https://www.npmjs.com/package/avro-js/v/1.11.2 +* Perl: https://metacpan.org/release/Avro +* Python 3: https://pypi.org/project/avro/1.11.2 +* Ruby: https://rubygems.org/gems/avro/versions/1.11.2 +* Rust: https://crates.io/crates/apache-avro/0.15.0 + +Thanks to everyone for contributing! + diff --git a/doc/content/en/community/_index.md b/doc/content/en/community/_index.md index 7394738c0a2..643c532589a 100644 --- a/doc/content/en/community/_index.md +++ b/doc/content/en/community/_index.md @@ -32,4 +32,4 @@ aliases: --> - + \ No newline at end of file diff --git a/doc/content/en/docs/++version++/IDL Language/_index.md b/doc/content/en/docs/++version++/IDL Language/_index.md index 8fe3d0592ac..be6375005da 100644 --- a/doc/content/en/docs/++version++/IDL Language/_index.md +++ b/doc/content/en/docs/++version++/IDL Language/_index.md @@ -34,7 +34,7 @@ This document defines Avro IDL, a higher-level language for authoring Avro schem The aim of the Avro IDL language is to enable developers to author schemata in a way that feels more similar to common programming languages like Java, C++, or Python. Additionally, the Avro IDL language may feel more familiar for those users who have previously used the interface description languages (IDLs) in other frameworks like Thrift, Protocol Buffers, or CORBA. ### Usage -Each Avro IDL file defines a single Avro Protocol, and thus generates as its output a JSON-format Avro Protocol file with extension .avpr. +Each Avro IDL file defines either a single Avro Protocol, or an Avro Schema with supporting named schemata in a namespace. When parsed, it thus yields either a Protocol or a Schema. These can be respectively written to JSON-format Avro Protocol files with extension .avpr or JSON-format Avro Schema files with extension .avsc. To convert a _.avdl_ file into a _.avpr_ file, it may be processed by the `idl` tool. For example: ```shell @@ -44,6 +44,16 @@ $ head /tmp/namespaces.avpr "protocol" : "TestNamespace", "namespace" : "avro.test.protocol", ``` +To convert a _.avdl_ file into a _.avsc_ file, it may be processed by the `idl` tool too. For example: +```shell +$ java -jar avro-tools.jar idl src/test/idl/input/schema_syntax_schema.avdl /tmp/schema_syntax.avsc +$ head /tmp/schema_syntax.avsc +{ + "type": "array", + "items": { + "type": "record", + "name": "StatusUpdate", +``` The `idl` tool can also process input to and from _stdin_ and _stdout_. See `idl --help` for full usage information. A Maven plugin is also provided to compile .avdl files. To use it, add something like the following to your pom.xml: @@ -56,7 +66,7 @@ A Maven plugin is also provided to compile .avdl files. To use it, add something - idl-protocol + idl @@ -65,6 +75,48 @@ A Maven plugin is also provided to compile .avdl files. To use it, add something ``` +## Defining a Schema in Avro IDL +An Avro IDL file consists of exactly one (main) schema definition. The minimal schema is defined by the following code: +```java +schema int; +``` +This is equivalent to (and generates) the following JSON schema definition: +```json +{ + "type": "int" +} +``` +More complex schemata can also be defined, for example by adding named schemata like this: +```java +namespace default.namespace.for.named.schemata; +schema Message; + +record Message { + string? title = null; + string message; +} +``` +This is equivalent to (and generates) the following JSON schema definition: +```json +{ + "type" : "record", + "name" : "Message", + "namespace" : "default.namespace.for.named.schemata", + "fields" : [ { + "name" : "title", + "type" : [ "null", "string" ], + "default": null + }, { + "name" : "message", + "type" : "string" + } ] +} +``` +Schemata in Avro IDL can contain the following items: + +* Imports of external protocol and schema files (only named schemata are imported). +* Definitions of named schemata, including records, errors, enums, and fixeds. + ## Defining a Protocol in Avro IDL An Avro IDL file consists of exactly one protocol definition. The minimal protocol is defined by the following code: ```java @@ -109,7 +161,7 @@ Files may be imported in one of three formats: `import schema "foo.avsc";` -Messages and types in the imported file are added to this file's protocol. +When importing into an IDL schema file, only (named) types are imported into this file. When importing into an IDL protocol, messages are imported into the protocol as well. Imported file names are resolved relative to the current IDL file. @@ -135,7 +187,7 @@ Fixed fields are defined using the following syntax: ``` fixed MD5(16); ``` -This example defines a fixed-length type called MD5 which contains 16 bytes. +This example defines a fixed-length type called MD5, which contains 16 bytes. ## Defining Records and Errors Records are defined in Avro IDL using a syntax similar to a struct definition in C: @@ -161,19 +213,20 @@ A type reference in Avro IDL must be one of: * A primitive type * A logical type -* A named schema defined prior to this usage in the same Protocol +* A named schema (either defined or imported) * A complex type (array, map, or union) ### Primitive Types The primitive types supported by Avro IDL are the same as those supported by Avro's JSON format. This list includes _int_, _long_, _string_, _boolean_, _float_, _double_, _null_, and _bytes_. ### Logical Types -Some of the logical types supported by Avro's JSON format are also supported by Avro IDL. The currently supported types are: +Some of the logical types supported by Avro's JSON format are directly supported by Avro IDL. The currently supported types are: * _decimal_ (logical type [decimal]({{< relref "specification#decimal" >}})) * _date_ (logical type [date]({{< relref "specification#date" >}})) * _time_ms_ (logical type [time-millis]({{< relref "specification#time-millisecond-precision" >}})) * _timestamp_ms_ (logical type [timestamp-millis]({{< relref "specification#timestamp-millisecond-precision" >}})) +* _local_timestamp_ms_ (logical type [local-timestamp-millis]({{< relref "specification#local_timestamp_ms" >}})) * _uuid_ (logical type [uuid]({{< relref "specification#uuid" >}})) For example: @@ -226,23 +279,25 @@ record RecordWithUnion { union { decimal(12, 6), float } number; } ``` -Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the JSON format; namely, a record may not contain multiple elements of the same type. Also, fields/parameters that use the union type and have a default parameter must specify a default value of the same type as the **first** union type. +Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the JSON format; namely, a union may not contain multiple elements of the same type. Also, fields/parameters that use the union type and have a default parameter must specify a default value of the same type as the **first** union type. -Because it occurs so often, there is a special shorthand to denote a union of `null` with another type. In the following snippet, the first three fields have identical types: +Because it occurs so often, there is a special shorthand to denote a union of `null` with one other schema. The first three fields in the following snippet have identical schemata, as do the last two fields: ```java record RecordWithUnion { union { null, string } optionalString1 = null; string? optionalString2 = null; string? optionalString3; // No default value - string? optionalString4 = "something"; + + union { string, null } optionalString4 = "something"; + string? optionalString5 = "something else"; } ``` -Note that unlike explicit unions, the position of the `null` type is fluid; it will be the first or last type depending on the default value (if any). So in the example above, all fields are valid. +Note that unlike explicit unions, the position of the `null` type is fluid; it will be the first or last type depending on the default value (if any). So all fields are valid in the example above. ## Defining RPC Messages -The syntax to define an RPC message within a Avro IDL protocol is similar to the syntax for a method declaration within a C header file or a Java interface. To define an RPC message add which takes two arguments named _foo_ and _bar_, returning an _int_, simply include the following definition within the protocol: +The syntax to define an RPC message within a Avro IDL protocol is similar to the syntax for a method declaration within a C header file or a Java interface. To define an RPC message _add_ which takes two arguments named _foo_ and _bar_, returning an _int_, simply include the following definition within the protocol: ```java int add(int foo, int bar = 0); ``` @@ -252,7 +307,7 @@ To define a message with no response, you may use the alias _void_, equivalent t ```java void logMessage(string message); ``` -If you have previously defined an error type within the same protocol, you may declare that a message can throw this error using the syntax: +If you have defined or imported an error type within the same protocol, you may declare that a message can throw this error using the syntax: ```java void goKaboom() throws Kaboom; ``` @@ -263,20 +318,22 @@ void fireAndForget(string message) oneway; ## Other Language Features -### Comments +### Comments and documentation All Java-style comments are supported within a Avro IDL file. Any text following _//_ on a line is ignored, as is any text between _/*_ and _*/_, possibly spanning multiple lines. Comments that begin with _/**_ are used as the documentation string for the type or field definition that follows the comment. ### Escaping Identifiers -Occasionally, one will need to use a reserved language keyword as an identifier. In order to do so, backticks (`) may be used to escape the identifier. For example, to define a message with the literal name error, you may write: +Occasionally, one may want to distinguish between identifiers and languages keywords. In order to do so, backticks (`) may be used to escape +the identifier. For example, to define a message with the literal name error, you may write: ```java void `error`(); ``` This syntax is allowed anywhere an identifier is expected. ### Annotations for Ordering and Namespaces -Java-style annotations may be used to add additional properties to types and fields throughout Avro IDL. +Java-style annotations may be used to add additional properties to types and fields throughout Avro IDL. These can be custom properties, or +special properties as used in the JSON-format Avro Schema and Protocol files. For example, to specify the sort order of a field within a record, one may use the `@order` annotation before the field name as follows: ```java @@ -319,46 +376,64 @@ record MyRecord { string @aliases(["oldField", "ancientField"]) myNewField; } ``` -Some annotations like those listed above are handled specially. All other annotations are added as properties to the protocol, message, schema or field. +Some annotations like those listed above are handled specially. All other annotations are added as properties to the protocol, message, schema or field. You can use any identifier or series of identifiers separated by dots and/or dashes as property name. ## Complete Example -The following is an example of an Avro IDL file that shows most of the above features: +The following is an example of two Avro IDL files that together show most of the above features: + +### schema.avdl ```java /* -* Header with license information. -*/ - -/** - * An example protocol in Avro IDL + * Header with license information. */ -@namespace("org.apache.avro.test") -protocol Simple { - /** Documentation for the enum type Kind */ - @aliases(["org.foo.KindOf"]) - enum Kind { - FOO, - BAR, // the bar enum value - BAZ - } = FOO; // For schema evolution purposes, unmatched values do not throw an error, but are resolved to FOO. +// Optional default namespace (if absent, the default namespace is the null namespace). +namespace org.apache.avro.test; +// Optional main schema definition; if used, the IDL file is equivalent to a .avsc file. +schema TestRecord; + +/** Documentation for the enum type Kind */ +@aliases(["org.foo.KindOf"]) +enum Kind { + FOO, + BAR, // the bar enum value + BAZ +} = FOO; // For schema evolution purposes, unmatched values do not throw an error, but are resolved to FOO. + +/** MD5 hash; good enough to avoid most collisions, and smaller than (for example) SHA256. */ +fixed MD5(16); - /** MD5 hash; good enough to avoid most collisions, and smaller than (for example) SHA256. */ - fixed MD5(16); +record TestRecord { + /** Record name; has no intrinsic order */ + string @order("ignore") name; - record TestRecord { - /** Record name; has no intrinsic order */ - string @order("ignore") name; + Kind @order("descending") kind; - Kind @order("descending") kind; + MD5 hash; - MD5 hash; + /* + Note that 'null' is the first union type. Just like .avsc / .avpr files, the default value must be of the first union type. + */ + union { null, MD5 } /** Optional field */ @aliases(["hash"]) nullableHash = null; + // Shorthand syntax; the null in this union is placed based on the default value (or first is there's no default). + MD5? anotherNullableHash = null; - /* - Note that 'null' is the first union type. Just like .avsc / .avpr files, the default value must be of the first union type. - */ - union { null, MD5 } /** Optional field */ @aliases(["hash"]) nullableHash = null; + array arrayOfLongs; +} +``` - array arrayOfLongs; - } +### protocol.avdl +```java +/* + * Header with license information. + */ + +/** + * An example protocol in Avro IDL + */ +@namespace("org.apache.avro.test") +protocol Simple { + // Import the example file above + import idl "schema.avdl"; /** Errors are records that can be thrown from a method */ error TestError { @@ -375,6 +450,7 @@ protocol Simple { void ping() oneway; } ``` + Additional examples may be found in the Avro source tree under the `src/test/idl/input` directory. ## IDE support diff --git a/doc/content/en/docs/++version++/Specification/_index.md b/doc/content/en/docs/++version++/Specification/_index.md index df641e2db69..9761cdc2922 100755 --- a/doc/content/en/docs/++version++/Specification/_index.md +++ b/doc/content/en/docs/++version++/Specification/_index.md @@ -75,7 +75,9 @@ Records use the type name "record" and support the following attributes: * _name_: a JSON string providing the name of the field (required), and * _doc_: a JSON string describing this field for users (optional). * _type_: a [schema]({{< ref "#schema-declaration" >}} "Schema declaration"), as defined above - * _default_: A default value for this field, only used when reading instances that lack the field for schema evolution purposes. The presence of a default value does not make the field optional at encoding time. Permitted values depend on the field's schema type, according to the table below. Default values for union fields correspond to the first schema in the union. Default values for bytes and fixed fields are JSON strings, where Unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255. Avro encodes a field even if its value is equal to its default. + * _order_: specifies how this field impacts sort ordering of this record (optional). Valid values are "ascending" (the default), "descending", or "ignore". For more details on how this is used, see the sort order section below. + * _aliases_: a JSON array of strings, providing alternate names for this field (optional). + * _default_: A default value for this field, only used when reading instances that lack the field for schema evolution purposes. The presence of a default value does not make the field optional at encoding time. Permitted values depend on the field's schema type, according to the table below. Default values for union fields correspond to the first schema that matches in the union. Default values for bytes and fixed fields are JSON strings, where Unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255. Avro encodes a field even if its value is equal to its default. *field default values* @@ -93,9 +95,6 @@ Records use the type name "record" and support the following attributes: | map | object | `{"a": 1}` | | fixed | string | `"\u00ff"` | - * _order_: specifies how this field impacts sort ordering of this record (optional). Valid values are "ascending" (the default), "descending", or "ignore". For more details on how this is used, see the sort order section below. - * _aliases_: a JSON array of strings, providing alternate names for this field (optional). - For example, a linked-list of 64-bit values may be defined with: ```jsonc { @@ -161,7 +160,7 @@ For example, a map from string to long is declared with: ### Unions Unions, as mentioned above, are represented using JSON arrays. For example, `["null", "string"]` declares a schema which may be either a null or string. -(Note that when a [default value]({{< ref "#schema-record" >}} "Schema record") is specified for a record field whose type is a union, the type of the default value must match the first element of the union. Thus, for unions containing "null", the "null" is usually listed first, since the default value of such unions is typically null.) +(Note that when a [default value]({{< ref "#schema-record" >}} "Schema record") is specified for a record field whose type is a union, the type of the default value must match with one element of the union. Unions may not contain more than one schema with the same type, except for the named types record, fixed and enum. For example, unions containing two array types or two map types are not permitted, but two types with different names are permitted. (Names permit efficient resolution when reading and writing unions.) @@ -394,12 +393,12 @@ For example, the union schema `["null","string","Foo"]`, where Foo is a record n Note that the original schema is still required to correctly process JSON-encoded data. For example, the JSON encoding does not distinguish between _int_ and _long_, _float_ and _double_, records and maps, enums and strings, etc. -#### Single-object encoding +### Single-object encoding In some situations a single Avro serialized object is to be stored for a longer period of time. One very common example is storing Avro records for several weeks in an [Apache Kafka](https://kafka.apache.org/) topic. In the period after a schema change this persistence system will contain records that have been written with different schemas. So the need arises to know which schema was used to write a record to support schema evolution correctly. In most cases the schema itself is too large to include in the message, so this binary wrapper format supports the use case more effectively. -##### Single object encoding specification +#### Single object encoding specification Single Avro objects are encoded as follows: 1. A two-byte marker, `C3 01`, to show that the message is Avro and uses this single-record format (version 1). @@ -813,7 +812,7 @@ The following schema represents a date: } ``` -### Time (millisecond precision) +### Time (millisecond precision) {#time_ms} The `time-millis` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond. A `time-millis` logical type annotates an Avro `int`, where the int stores the number of milliseconds after midnight, 00:00:00.000. @@ -823,7 +822,7 @@ The `time-micros` logical type represents a time of day, with no reference to a A `time-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds after midnight, 00:00:00.000000. -### Timestamp (millisecond precision) +### Timestamp (millisecond precision) {#timestamp_ms} The `timestamp-millis` logical type represents an instant on the global timeline, independent of a particular time zone or calendar, with a precision of one millisecond. Please note that time zone information gets lost in this process. Upon reading a value back, we can only reconstruct the instant, but not the original representation. In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. A `timestamp-millis` logical type annotates an Avro `long`, where the long stores the number of milliseconds from the unix epoch, 1 January 1970 00:00:00.000 UTC. @@ -833,7 +832,7 @@ The `timestamp-micros` logical type represents an instant on the global timeline A `timestamp-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000 UTC. -### Local timestamp (millisecond precision) +### Local timestamp (millisecond precision) {#local_timestamp_ms} The `local-timestamp-millis` logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local, with a precision of one millisecond. A `local-timestamp-millis` logical type annotates an Avro `long`, where the long stores the number of milliseconds, from 1 January 1970 00:00:00.000. diff --git a/doc/content/en/docs/++version++/api-py.md b/doc/content/en/docs/++version++/api-py.md new file mode 100644 index 00000000000..fb4f4ba13b5 --- /dev/null +++ b/doc/content/en/docs/++version++/api-py.md @@ -0,0 +1,29 @@ +--- +title: "Python API" +linkTitle: "Python API" +weight: 104 +manualLink: /docs/++version++/api/py/html/ +--- + + + +The Python API documentation can be found here. diff --git a/doc/content/en/project/Articles/_index.md b/doc/content/en/project/Articles/_index.md new file mode 100755 index 00000000000..e30d9ef1d07 --- /dev/null +++ b/doc/content/en/project/Articles/_index.md @@ -0,0 +1,84 @@ +--- +title: "Articles" +linkTitle: "Articles" +weight: 4 +--- + + +** ** + +**Guide to Apache Avro** +Feb 19, 2023, by baeldung. + +https://www.baeldung.com/java-apache-avro + +** ** + +**Apache Avro IDL Schema Support**, +Apr 11, 2022, by Oscar Westra van Holthe - Kind. + +https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support + +** ** + +**Generate random JSON data from an AVRO schema using Java**, +Jan 24, 2022, by Maarten Smeets. + +https://technology.amis.nl/soa/kafka/generate-random-json-data-from-an-avro-schema-using-java/ + +** ** + +**A Gentle (and Practical) Introduction to Apache Avro**, +Dec 22, 2020, by Anton Rodriguez. + +https://dzone.com/articles/gentle-and-practical-introduction-to-apache-avro-part-1 + +** ** + +**Apache Avro – A data serialization system** +Dec 09, 2018, by Dennis Vriend. + +https://binx.io/2018/12/09/apache-avro/ + +** ** + +**Introduction to Apache Avro** +Mar 12, 2016, by Bartosz Konieczny. + +https://www.waitingforcode.com/apache-avro/introduction-to-apache-avro/read + +** ** + +**Reading and Writing Avro Files from the Command Line**, +Mar 17, 2013, by Michael G. Noll. + +https://www.michael-noll.com/blog/2013/03/17/reading-and-writing-avro-files-from-the-command-line/ + +** ** + +**Using Apache Avro** +Jan 25, 2011, by Boris Lublinsky. + +https://www.infoq.com/articles/ApacheAvro/ + + + diff --git a/doc/content/en/project/Committer onboarding guide/_index.md b/doc/content/en/project/Committer onboarding guide/_index.md new file mode 100755 index 00000000000..eb865a42ae2 --- /dev/null +++ b/doc/content/en/project/Committer onboarding guide/_index.md @@ -0,0 +1,48 @@ +--- +title: "Committer onboarding guide" +linkTitle: "Committer onboarding guide" +weight: 7 +--- + + +** ** +For you, the new committer: + +1. File your ICLA and send it to secretary@apache.org +2. Log in to https://whimsy.apache.org; that will confirm a working ASF account +3. You can edit email routing for the account, and add other emails that you own +4. You can directly edit mailing list subscriptions (for example, you might switch them to your ASF account - you can still post from any of your registered emails) +5. Link your GitHub account with your ASF account at https://gitbox.apache.org/ once you see the big green "Merge" button on pull requests, this is working +7. Read the ASF new committer guide: https://www.apache.org/dev/new-committers-guide.html + +** ** + +A committer in JIRA can add a new contributor by following these steps: + +1. Log in to JIRA with your committer credentials. +2. Navigate to the project where you want to add the new contributor. +3. Click on the "People" tab at the top of the page. +4. Click on the "Add People" button. +5. Enter the email address of the new contributor in the "Email Address" field. +6. Select the appropriate role for the new contributor from the "Role" dropdown menu. +7. Click the "Add" button to add the new contributor to the project. +8. An email will be sent to the new contributor asking them to accept the invitation to join the project. diff --git a/doc/content/en/project/Contributors onboarding guide/_index.md b/doc/content/en/project/Contributors onboarding guide/_index.md new file mode 100644 index 00000000000..e53b136c681 --- /dev/null +++ b/doc/content/en/project/Contributors onboarding guide/_index.md @@ -0,0 +1,39 @@ +--- +title: "Contributor onboarding guide" +linkTitle: "Contributor onboarding guide" +weight: 8 +--- + + + + +1. Familiarize yourself with Apache Avro: Before you start contributing to Apache Avro, it's essential to have a good understanding of what Apache Avro is and how it works. You can start by reading the Apache Avro documentation to get an overview of the project's features, use cases, and architecture. + +2. Join the Apache Avro community: Join the Apache Avro mailing lists, IRC channels, and forums to interact with other contributors and users. You can ask questions, discuss ideas, and get feedback on your contributions from experienced contributors. +3. Set up your development environment: To contribute to Apache Avro, you need to set up your development environment. The Apache Avro project uses Git for version control, and Apache Maven for building. You can follow the instructions in the Apache Avro documentation to set up your environment. +4. Choose a contribution: Apache Avro is an open-source project, and there are always new features, bug fixes, and improvements that can be made. You can choose from a wide range of contributions, from documentation updates to code changes. +5. Review existing issues and pull requests: Before you start working on a contribution, it's important to review existing issues and pull requests to avoid duplicating efforts. You can use the Apache Avro issue tracker to search for issues and pull requests related to your contribution. +6. Create a new issue or pull request: If you can't find an existing issue or pull request related to your contribution, you can create a new one. Make sure to provide detailed information about your contribution, including a description of the problem, proposed solution, and any relevant code changes. +7. Work on your contribution: Once you have a clear understanding of the contribution you want to make, you can start working on it. Make sure to follow the Apache Avro coding guidelines and best practices to ensure that your code is of high quality. +8. Submit your contribution: When you're ready to submit your contribution, create a pull request in the Apache Avro GitHub repository. Make sure to include a detailed description of your changes, and any relevant documentation or test cases. +9. Participate in reviews: Once you've submitted your contribution, it will be reviewed by other contributors. You may need to make additional changes based on their feedback before your contribution is accepted. +Celebrate your contribution: Once your contribution has been accepted, celebrate your achievement! You've helped improve Apache Avro and contributed to the open-source community. \ No newline at end of file diff --git a/doc/content/en/project/Donate/_index.md b/doc/content/en/project/Donate/_index.md index 1cc11aebc48..c87561fefc1 100755 --- a/doc/content/en/project/Donate/_index.md +++ b/doc/content/en/project/Donate/_index.md @@ -1,7 +1,7 @@ --- title: "Donate" linkTitle: "Donate" -weight: 6 +weight: 13 manualLink: https://www.apache.org/foundation/sponsorship.html --- diff --git a/doc/content/en/project/Events/_index.md b/doc/content/en/project/Events/_index.md index 50276f36bb9..7d8646283f0 100755 --- a/doc/content/en/project/Events/_index.md +++ b/doc/content/en/project/Events/_index.md @@ -1,7 +1,7 @@ --- title: "Events" linkTitle: "Events" -weight: 5 +weight: 12 --- +** ** + +**A Benchmark of JSON-compatible Binary Serialization Specifications** +Jan 9 2022, by Juan Cruz Viotti, Mital Kinderkhedia. + +https://arxiv.org/abs/2201.03051 + +** ** + +**A Survey of JSON-compatible Binary Serialization Specifications** +Jan 6 2022, by Juan Cruz Viotti, Mital Kinderkhedia. + +https://arxiv.org/abs/2201.02089 + +** ** + +**Putting Avro into Hive** +Apr 2017, by S. Sreekanth, A Sai Ram Pramodhini, Ch S Likita, Chiluka Manisha. + +https://journals.pen2print.org/index.php/ijr/article/view/7377/0 + + +** ** + +**Benchmarking Performance of Data Serialization and RPC Frameworks in Microservices Architecture: gRPC vs. Apache Thrift vs. Apache Avro** +Oct 27 2016, by Nguyen, Thuy. + +https://aaltodoc.aalto.fi/handle/123456789/23386 + +** ** + +**Apache Avro** +Sep 30 2016, by Deepak Vohra. + +https://link.springer.com/chapter/10.1007/978-1-4842-2199-0_7 + +** ** + + +**Object serialization vs relational data modelling in Apache Cassandra: a performance evaluation** +Apr 2015, by Valdemar Johansen. + +https://www.diva-portal.org/smash/get/diva2:839521/FULLTEXT02.pdf + + + + diff --git a/doc/content/en/project/Privacy policy/_index.md b/doc/content/en/project/Privacy policy/_index.md index 8d6b564faa3..1e3a5c20218 100755 --- a/doc/content/en/project/Privacy policy/_index.md +++ b/doc/content/en/project/Privacy policy/_index.md @@ -1,7 +1,7 @@ --- title: "Privacy policy" linkTitle: "Privacy policy" -weight: 3 +weight: 9 manualLink: https://hadoop.apache.org/privacy_policy.html --- diff --git a/doc/content/en/project/Security/_index.md b/doc/content/en/project/Security/_index.md index dcac885173c..baa55da933f 100755 --- a/doc/content/en/project/Security/_index.md +++ b/doc/content/en/project/Security/_index.md @@ -1,7 +1,7 @@ --- title: "Security" linkTitle: "Security" -weight: 3 +weight: 10 manualLink: https://www.apache.org/security/ --- diff --git a/doc/content/en/project/Thanks/_index.md b/doc/content/en/project/Thanks/_index.md index 22db737529c..b2ae7dff925 100755 --- a/doc/content/en/project/Thanks/_index.md +++ b/doc/content/en/project/Thanks/_index.md @@ -1,7 +1,7 @@ --- title: "Thanks" linkTitle: "Thanks" -weight: 7 +weight: 14 manualLink: https://www.apache.org/foundation/thanks.html --- diff --git a/doc/content/en/project/pmc onboarding guide/_index.md b/doc/content/en/project/pmc onboarding guide/_index.md new file mode 100644 index 00000000000..22d7545eda8 --- /dev/null +++ b/doc/content/en/project/pmc onboarding guide/_index.md @@ -0,0 +1,34 @@ +--- +title: "PMC onboarding guide" +linkTitle: "PMC onboarding guide" +weight: 6 +--- + + + +1. Use https://whimsy.apache.org you can check that you got added to the PMC list properly +2. Validate you are in the PMC group in JIRA and the Confluence Wiki +3. Subscribe to private@avro.apache.org; you can use whimsy to do this for whatever email account you want, or send mail from that mail address to private-subscribe@ +4. You should have access also to https://reporter.apache.org which seeds our board reports +5. You can now access and read the private list archive (for linking to vote threads, etc) at https://lists.apache.org/list.html?private@avro.apache.org +6. Review the ASF PMC guides. There are a few, but you should re-read what the responsibilities are. +7. The PMC keeps a set of valuable resources in https://svn.apache.org/repos/private/pmc \ No newline at end of file diff --git a/lang/c++/CMakeLists.txt b/lang/c++/CMakeLists.txt index 472684f4c3d..3ac0974946d 100644 --- a/lang/c++/CMakeLists.txt +++ b/lang/c++/CMakeLists.txt @@ -189,6 +189,7 @@ unittest (JsonTests) unittest (AvrogencppTests) unittest (CompilerTests) unittest (AvrogencppTestReservedWords) +unittest (CommonsSchemasTests) add_dependencies (AvrogencppTestReservedWords cpp_reserved_words_hh) diff --git a/lang/c++/build.sh b/lang/c++/build.sh index ac9964c75e5..f31c91feff3 100755 --- a/lang/c++/build.sh +++ b/lang/c++/build.sh @@ -92,7 +92,8 @@ case "$target" in && ./build/SpecificTests \ && ./build/AvrogencppTests \ && ./build/DataFileTests \ - && ./build/SchemaTests) + && ./build/SchemaTests \ + && ./build/CommonsSchemasTests) ;; xcode-test) diff --git a/lang/c++/test/CommonsSchemasTests.cc b/lang/c++/test/CommonsSchemasTests.cc new file mode 100644 index 00000000000..5dd560182cb --- /dev/null +++ b/lang/c++/test/CommonsSchemasTests.cc @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#include +#include +#include +#include "DataFile.hh" +#include "Compiler.hh" +#include "ValidSchema.hh" +#include "Generic.hh" + + +using avro::validatingDecoder; +using avro::GenericReader; +using avro::DataFileReader; +using avro::DataFileWriter; +using avro::GenericDatum; + + +void testCommonSchema(const std::filesystem::path &dir_path) +{ + const std::filesystem::path& schemaFile = dir_path / "schema.json"; + std::ifstream in(schemaFile.c_str()); + + avro::ValidSchema schema; + avro::compileJsonSchema(in, schema); + + const std::filesystem::path& dataFile = dir_path / "data.avro"; + + + GenericDatum datum(schema); + const std::filesystem::path& outputDataFile = dir_path / "data_out.avro"; + + + DataFileReader reader(dataFile.c_str()); + DataFileWriter writer(outputDataFile.c_str(), schema); + + while (reader.read(datum)) { + avro::GenericRecord& rec = datum.value(); + BOOST_CHECK(rec.fieldCount() >= 0); + writer.write(datum); + } + writer.close(); + reader.close(); + + GenericDatum datumOrig(schema); + GenericDatum datumNew(schema); + + DataFileReader readerOrig(dataFile.c_str()); + DataFileReader readerNew(outputDataFile.c_str()); + while (readerOrig.read(datumOrig)) { + BOOST_CHECK(readerNew.read(datumNew)); + avro::GenericRecord& rec1 = datumOrig.value(); + avro::GenericRecord& rec2 = datumNew.value(); + BOOST_CHECK_EQUAL(rec1.fieldCount(), rec2.fieldCount()); + } + BOOST_CHECK(!readerNew.read(datumNew)); + + + std::filesystem::remove(outputDataFile); +} + + + +void testCommonsSchemas() +{ + const std::filesystem::path commons_schemas{"../../share/test/data/schemas"}; + if (!std::filesystem::exists(commons_schemas)) { + std::cout << "\nWarn: Can't access share test folder '../../share/test/data/schemas'\n" << std::endl; + return; + } + for (auto const& dir_entry : std::filesystem::directory_iterator{commons_schemas}) { + if (std::filesystem::is_directory(dir_entry)) { + testCommonSchema(dir_entry.path()); + } + } +} + +boost::unit_test::test_suite * +init_unit_test_suite(int /*argc*/, char * /*argv*/[]) { + using namespace boost::unit_test; + + auto *ts = BOOST_TEST_SUITE("Avro C++ unit tests for commons schemas"); + ts->add(BOOST_TEST_CASE(&testCommonsSchemas)); + return ts; +} diff --git a/lang/c/tests/test_avro_commons_schema.c b/lang/c/tests/test_avro_commons_schema.c index c4679d89df9..e3751e9836a 100644 --- a/lang/c/tests/test_avro_commons_schema.c +++ b/lang/c/tests/test_avro_commons_schema.c @@ -104,6 +104,8 @@ static void read_data(const char *dirpath, avro_schema_t schema) { fprintf(stdout, "\nExit run test OK => %d records", records_read); remove("./copy.avro"); fflush(stdout); + avro_file_reader_close(reader); + avro_file_writer_close(writer); } static void run_tests(const char *dirpath) @@ -111,6 +113,7 @@ static void run_tests(const char *dirpath) fprintf(stdout, "\nRun test for path '%s'", dirpath); avro_schema_t schema = read_common_schema_test(dirpath); read_data(dirpath, schema); + avro_schema_decref(schema); } diff --git a/lang/csharp/build.sh b/lang/csharp/build.sh index 623ef03a353..82a9bf97de4 100755 --- a/lang/csharp/build.sh +++ b/lang/csharp/build.sh @@ -85,7 +85,8 @@ do ;; clean) - rm -rf src/apache/{main,test,codegen,ipc,msbuild,perf}/{obj,bin} + rm -rf src/apache/{main,test,codegen,ipc,msbuild,perf,benchmark}/{obj,bin} + rm -rf src/apache/codec/Avro.File.{BZip2,Snappy,XZ,ZStandard}{,.Test}/{obj,bin} rm -rf build rm -f TestResult.xml ;; diff --git a/lang/csharp/src/apache/codegen/Avro.codegen.csproj b/lang/csharp/src/apache/codegen/Avro.codegen.csproj index dfb438d37a4..94aa8123119 100644 --- a/lang/csharp/src/apache/codegen/Avro.codegen.csproj +++ b/lang/csharp/src/apache/codegen/Avro.codegen.csproj @@ -49,6 +49,14 @@ + + + Major + + true diff --git a/lang/csharp/src/apache/main/CodeGen/CodeGen.cs b/lang/csharp/src/apache/main/CodeGen/CodeGen.cs index e579d8bb07c..7e793627201 100644 --- a/lang/csharp/src/apache/main/CodeGen/CodeGen.cs +++ b/lang/csharp/src/apache/main/CodeGen/CodeGen.cs @@ -1136,6 +1136,50 @@ public virtual void WriteCompileUnit(string outputFile) } } + /// + /// Gets names and generated code of the schema(s) types + /// + /// + public virtual IDictionary GetTypes() + { + using (var cscp = new CSharpCodeProvider()) + { + var opts = new CodeGeneratorOptions + { + BracingStyle = "C", IndentString = "\t", BlankLinesBetweenMembers = false + }; + CodeNamespaceCollection nsc = CompileUnit.Namespaces; + + var sourceCodeByName = new Dictionary(); + for (int i = 0; i < nsc.Count; i++) + { + var ns = nsc[i]; + + var new_ns = new CodeNamespace(ns.Name); + new_ns.Comments.Add(CodeGenUtil.Instance.FileComment); + foreach (CodeNamespaceImport nci in CodeGenUtil.Instance.NamespaceImports) + { + new_ns.Imports.Add(nci); + } + + var types = ns.Types; + for (int j = 0; j < types.Count; j++) + { + var ctd = types[j]; + using (var writer = new StringWriter()) + { + new_ns.Types.Add(ctd); + cscp.GenerateCodeFromNamespace(new_ns, writer, opts); + new_ns.Types.Remove(ctd); + sourceCodeByName[ctd.Name] = writer.ToString(); + } + } + } + + return sourceCodeByName; + } + } + /// /// Writes each types in each namespaces into individual files. /// diff --git a/lang/csharp/src/apache/main/File/DeflateCodec.cs b/lang/csharp/src/apache/main/File/DeflateCodec.cs index 8ef9fce37da..0ce37adb092 100644 --- a/lang/csharp/src/apache/main/File/DeflateCodec.cs +++ b/lang/csharp/src/apache/main/File/DeflateCodec.cs @@ -58,32 +58,14 @@ public override void Compress(MemoryStream inputStream, MemoryStream outputStrea /// public override byte[] Decompress(byte[] compressedData, int length) { - - MemoryStream inStream = new MemoryStream(compressedData); - MemoryStream outStream = new MemoryStream(); - - using (DeflateStream Decompress = - new DeflateStream(inStream, - CompressionMode.Decompress)) - { - CopyTo(Decompress, outStream); - } - - return outStream.ToArray(); - } - - /// - /// Copies to stream. - /// - /// stream you are copying from - /// stream you are copying to - private static void CopyTo(Stream from, Stream to) - { - byte[] buffer = new byte[4096]; - int read; - while ((read = from.Read(buffer, 0, buffer.Length)) != 0) + using (MemoryStream inStream = new MemoryStream(compressedData, 0, length)) + using (MemoryStream outStream = new MemoryStream()) { - to.Write(buffer, 0, read); + using (DeflateStream decompress = new DeflateStream(inStream, CompressionMode.Decompress)) + { + decompress.CopyTo(outStream); + } + return outStream.ToArray(); } } diff --git a/lang/csharp/src/apache/main/Generic/GenericReader.cs b/lang/csharp/src/apache/main/Generic/GenericReader.cs index 05139f0fc05..0b945b9ff5e 100644 --- a/lang/csharp/src/apache/main/Generic/GenericReader.cs +++ b/lang/csharp/src/apache/main/Generic/GenericReader.cs @@ -19,6 +19,7 @@ using System.Collections.Generic; using Avro.IO; using System.IO; +using System.Linq; namespace Avro.Generic { @@ -290,21 +291,21 @@ protected virtual object ReadRecord(object reuse, RecordSchema writerSchema, Sch } } - var defaultStream = new MemoryStream(); - var defaultEncoder = new BinaryEncoder(defaultStream); - var defaultDecoder = new BinaryDecoder(defaultStream); - foreach (Field rf in rs) + using (var defaultStream = new MemoryStream()) { - if (writerSchema.Contains(rf.Name)) continue; - - defaultStream.Position = 0; // reset for writing - Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); - defaultStream.Flush(); - defaultStream.Position = 0; // reset for reading - - object obj = null; - TryGetField(rec, rf.Name, rf.Pos, out obj); - AddField(rec, rf.Name, rf.Pos, Read(obj, rf.Schema, rf.Schema, defaultDecoder)); + var defaultEncoder = new BinaryEncoder(defaultStream); + var defaultDecoder = new BinaryDecoder(defaultStream); + foreach (Field rf in rs.Fields.Where(rf => !writerSchema.Contains(rf.Name))) + { + defaultStream.Position = 0; // reset for writing + Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); + defaultStream.Flush(); + defaultStream.Position = 0; // reset for reading + + object obj = null; + TryGetField(rec, rf.Name, rf.Pos, out obj); + AddField(rec, rf.Name, rf.Pos, Read(obj, rf.Schema, rf.Schema, defaultDecoder)); + } } return rec; diff --git a/lang/csharp/src/apache/main/Generic/PreresolvingDatumReader.cs b/lang/csharp/src/apache/main/Generic/PreresolvingDatumReader.cs index 22c80407dde..53270faecdb 100644 --- a/lang/csharp/src/apache/main/Generic/PreresolvingDatumReader.cs +++ b/lang/csharp/src/apache/main/Generic/PreresolvingDatumReader.cs @@ -198,7 +198,7 @@ private ReadItem ResolveEnum(EnumSchema writerSchema, EnumSchema readerSchema) var readerDefaultOrdinal = null != readerSchema.Default ? readerSchema.Ordinal(readerSchema.Default) : -1; foreach (var symbol in writerSchema.Symbols) - { + { var writerOrdinal = writerSchema.Ordinal(symbol); if (readerSchema.Contains(symbol)) { @@ -274,27 +274,29 @@ private ReadItem ResolveRecord(RecordSchema writerSchema, RecordSchema readerSch { if (writerSchema.Contains(rf.Name)) continue; - var defaultStream = new MemoryStream(); - var defaultEncoder = new BinaryEncoder(defaultStream); + using (var defaultStream = new MemoryStream()) + { + var defaultEncoder = new BinaryEncoder(defaultStream); - defaultStream.Position = 0; // reset for writing - Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); - defaultStream.Flush(); - var defaultBytes = defaultStream.ToArray(); + defaultStream.Position = 0; // reset for writing + Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); + defaultStream.Flush(); + var defaultBytes = defaultStream.ToArray(); - var readItem = ResolveReader(rf.Schema, rf.Schema); + var readItem = ResolveReader(rf.Schema, rf.Schema); - var rfInstance = rf; - if(IsReusable(rf.Schema.Tag)) - { - readSteps.Add((rec, d) => recordAccess.AddField(rec, rfInstance.Name, rfInstance.Pos, - readItem(recordAccess.GetField(rec, rfInstance.Name, rfInstance.Pos), - new BinaryDecoder(new MemoryStream( defaultBytes))))); - } - else - { - readSteps.Add((rec, d) => recordAccess.AddField(rec, rfInstance.Name, rfInstance.Pos, - readItem(null, new BinaryDecoder(new MemoryStream(defaultBytes))))); + var rfInstance = rf; + if (IsReusable(rf.Schema.Tag)) + { + readSteps.Add((rec, d) => recordAccess.AddField(rec, rfInstance.Name, rfInstance.Pos, + readItem(recordAccess.GetField(rec, rfInstance.Name, rfInstance.Pos), + new BinaryDecoder(new MemoryStream(defaultBytes))))); + } + else + { + readSteps.Add((rec, d) => recordAccess.AddField(rec, rfInstance.Name, rfInstance.Pos, + readItem(null, new BinaryDecoder(new MemoryStream(defaultBytes))))); + } } } diff --git a/lang/csharp/src/apache/main/IO/BinaryEncoder.cs b/lang/csharp/src/apache/main/IO/BinaryEncoder.cs index 30100bf31d6..72af5f3a53d 100644 --- a/lang/csharp/src/apache/main/IO/BinaryEncoder.cs +++ b/lang/csharp/src/apache/main/IO/BinaryEncoder.cs @@ -25,7 +25,7 @@ namespace Avro.IO /// public class BinaryEncoder : Encoder { - private readonly Stream Stream; + private readonly Stream stream; /// /// Initializes a new instance of the class without a backing @@ -42,7 +42,7 @@ public BinaryEncoder() : this(null) /// Stream to write to. public BinaryEncoder(Stream stream) { - this.Stream = stream; + this.stream = stream; } /// @@ -203,22 +203,22 @@ public void WriteFixed(byte[] data) /// public void WriteFixed(byte[] data, int start, int len) { - Stream.Write(data, start, len); + stream.Write(data, start, len); } private void writeBytes(byte[] bytes) { - Stream.Write(bytes, 0, bytes.Length); + stream.Write(bytes, 0, bytes.Length); } private void writeBytes(byte[] bytes, int offset, int length) { - Stream.Write(bytes, offset, length); + stream.Write(bytes, offset, length); } private void writeByte(byte b) { - Stream.WriteByte(b); + stream.WriteByte(b); } /// @@ -226,7 +226,7 @@ private void writeByte(byte b) /// public void Flush() { - Stream.Flush(); + stream.Flush(); } } } diff --git a/lang/csharp/src/apache/main/Specific/SpecificDatumWriter.cs b/lang/csharp/src/apache/main/Specific/SpecificDatumWriter.cs index bfc88847176..c823253692d 100644 --- a/lang/csharp/src/apache/main/Specific/SpecificDatumWriter.cs +++ b/lang/csharp/src/apache/main/Specific/SpecificDatumWriter.cs @@ -176,6 +176,7 @@ public void WriteArrayValues(object array, WriteItem valueWriter, Encoder encode var list = (IList) array; for (int i = 0; i < list.Count; i++ ) { + encoder.StartItem(); valueWriter(list[i], encoder); } } diff --git a/lang/csharp/src/apache/main/Specific/SpecificReader.cs b/lang/csharp/src/apache/main/Specific/SpecificReader.cs index 2736cc1898c..1019fa36ced 100644 --- a/lang/csharp/src/apache/main/Specific/SpecificReader.cs +++ b/lang/csharp/src/apache/main/Specific/SpecificReader.cs @@ -130,20 +130,22 @@ protected override object ReadRecord(object reuse, RecordSchema writerSchema, Sc } } - var defaultStream = new MemoryStream(); - var defaultEncoder = new BinaryEncoder(defaultStream); - var defaultDecoder = new BinaryDecoder(defaultStream); - foreach (Field rf in rs) + using (var defaultStream = new MemoryStream()) { - if (writerSchema.Contains(rf.Name)) continue; + var defaultEncoder = new BinaryEncoder(defaultStream); + var defaultDecoder = new BinaryDecoder(defaultStream); + foreach (Field rf in rs) + { + if (writerSchema.Contains(rf.Name)) continue; - defaultStream.Position = 0; // reset for writing - Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); - defaultStream.Flush(); - defaultStream.Position = 0; // reset for reading + defaultStream.Position = 0; // reset for writing + Resolver.EncodeDefaultValue(defaultEncoder, rf.Schema, rf.DefaultValue); + defaultStream.Flush(); + defaultStream.Position = 0; // reset for reading - obj = rec.Get(rf.Pos); - rec.Put(rf.Pos, Read(obj, rf.Schema, rf.Schema, defaultDecoder)); + obj = rec.Get(rf.Pos); + rec.Put(rf.Pos, Read(obj, rf.Schema, rf.Schema, defaultDecoder)); + } } return rec; diff --git a/lang/csharp/src/apache/test/CodGen/CodeGenTest.cs b/lang/csharp/src/apache/test/CodGen/CodeGenTest.cs index e514347206e..f8eef4a9aba 100644 --- a/lang/csharp/src/apache/test/CodGen/CodeGenTest.cs +++ b/lang/csharp/src/apache/test/CodGen/CodeGenTest.cs @@ -18,6 +18,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Text.RegularExpressions; using Microsoft.CodeAnalysis.CSharp; using NUnit.Framework; @@ -81,6 +82,33 @@ public void TestGenerateNamesException() Protocol protocol = null; Assert.Throws(() => this.GenerateNames(protocol)); } + + + [Test] + public void GetTypesShouldReturnTypes() + { + AddSchema(@" +{ + ""name"": ""PlanetEnum"", + ""namespace"": ""Space.Models"", + ""type"": ""enum"", + ""symbols"": [ + ""Earth"", + ""Mars"", + ""Jupiter"", + ""Saturn"", + ""Uranus"", + ""Neptune"" + ] +} +"); + GenerateCode(); + var types = GetTypes(); + Assert.That(types.Count, Is.EqualTo(1)); + bool hasPlanetEnumCode = types.TryGetValue("PlanetEnum", out string planetEnumCode); + Assert.That(hasPlanetEnumCode); + Assert.That(Regex.Matches(planetEnumCode, "public enum PlanetEnum").Count, Is.EqualTo(1)); + } } } } diff --git a/lang/csharp/src/apache/test/File/FileTests.cs b/lang/csharp/src/apache/test/File/FileTests.cs index 666318b6899..0ef81c9766f 100644 --- a/lang/csharp/src/apache/test/File/FileTests.cs +++ b/lang/csharp/src/apache/test/File/FileTests.cs @@ -18,6 +18,7 @@ using System; using System.Collections; using System.Collections.Generic; +using System.Diagnostics; using System.IO; using System.IO.Compression; using System.Linq; @@ -555,7 +556,6 @@ private static IEnumerable TestPartialReadSource() /// position in stream /// /// - /// /// [TestCaseSource(nameof(TestPartialReadSource))] public void TestPartialRead(string schemaStr, Codec.Type codecType, int position, int expectedRecords) diff --git a/lang/csharp/src/apache/test/IO/JsonCodecTests.cs b/lang/csharp/src/apache/test/IO/JsonCodecTests.cs index 7c3ec3c2d36..28aab10e70c 100644 --- a/lang/csharp/src/apache/test/IO/JsonCodecTests.cs +++ b/lang/csharp/src/apache/test/IO/JsonCodecTests.cs @@ -17,12 +17,14 @@ */ using System; +using System.Collections.Generic; using NUnit.Framework; using System.IO; using System.Linq; using System.Text; using Avro.Generic; using Avro.IO; +using Avro.Specific; using Newtonsoft.Json; using Newtonsoft.Json.Linq; @@ -235,7 +237,6 @@ public void TestJsonUnionWithLogicalTypes(String value) public void TestJsonUnionWithRecord(String value) { Schema schema = Schema.Parse( - "[\"null\",\n" + " { \"type\": \"int\", \"logicalType\": \"date\" },\n" + " {\"type\":\"record\",\"name\":\"myrecord\", \"namespace\":\"com\"," + @@ -285,6 +286,30 @@ public void TestJsonDecoderReorderFields() decoder.SkipArray(); } + [Test] + public void TestJsonDecoderSpecificWithArray() + { + Root data = new Root(); + Item item = new Item { id = 123456 }; + data.myarray = new List { item }; + + DatumWriter writer = new SpecificDatumWriter(data.Schema); + + ByteBufferOutputStream bbos = new ByteBufferOutputStream(); + + Encoder encoder = new JsonEncoder(data.Schema, bbos); + writer.Write(data, encoder); + encoder.Flush(); + + List listStreams = bbos.GetBufferList(); + + using (StreamReader reader = new StreamReader(listStreams[0])) + { + String output = reader.ReadToEnd(); + Assert.AreEqual("{\"myarray\":[{\"id\":123456}]}", output); + } + } + private byte[] fromJsonToAvro(string json, Schema schema) { DatumReader reader = new GenericDatumReader(schema, schema); @@ -326,4 +351,85 @@ private string fromDatumToJson(object datum, Schema schema, bool includeNamespac return Encoding.UTF8.GetString(output.ToArray()); } } + + public partial class Root : global::Avro.Specific.ISpecificRecord + { + public static global::Avro.Schema _SCHEMA = global::Avro.Schema.Parse( + "{\"type\":\"record\",\"name\":\"Root\",\"namespace\":\"Avro.Test\",\"fields\":[{\"name\":\"myarray" + + "\",\"type\":{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Item\",\"namespace\":\"Avr" + + "o.Test\",\"fields\":[{\"name\":\"id\",\"type\":\"long\"}]}}}]}"); + + private IList _myarray; + + public virtual global::Avro.Schema Schema + { + get { return Root._SCHEMA; } + } + + public IList myarray + { + get { return this._myarray; } + set { this._myarray = value; } + } + + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.myarray; + default: throw new global::Avro.AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + } + } + + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: + this.myarray = (IList)fieldValue; + break; + default: throw new global::Avro.AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + } + } + } + + public partial class Item : global::Avro.Specific.ISpecificRecord + { + public static global::Avro.Schema _SCHEMA = global::Avro.Schema.Parse( + "{\"type\":\"record\",\"name\":\"Item\",\"namespace\":\"Avro.Test\",\"fields\":[{\"name\":\"id\",\"ty" + + "pe\":\"long\"}]}"); + + private long _id; + + public virtual global::Avro.Schema Schema + { + get { return Item._SCHEMA; } + } + + public long id + { + get { return this._id; } + set { this._id = value; } + } + + public virtual object Get(int fieldPos) + { + switch (fieldPos) + { + case 0: return this.id; + default: throw new global::Avro.AvroRuntimeException("Bad index " + fieldPos + " in Get()"); + } + } + + public virtual void Put(int fieldPos, object fieldValue) + { + switch (fieldPos) + { + case 0: + this.id = (System.Int64)fieldValue; + break; + default: throw new global::Avro.AvroRuntimeException("Bad index " + fieldPos + " in Put()"); + } + } + } } diff --git a/lang/csharp/versions.props b/lang/csharp/versions.props index 203e6972213..4acdaa9d759 100644 --- a/lang/csharp/versions.props +++ b/lang/csharp/versions.props @@ -63,7 +63,7 @@ 4.3.1 4.3.1 4.3.1 - 7.0.0-preview* + 7.0.1 17.4.0 3.13.3 3.15.2 diff --git a/lang/java/archetypes/avro-service-archetype/src/main/pom/pom.xml b/lang/java/archetypes/avro-service-archetype/src/main/pom/pom.xml index d8d6d7685f2..399bbea940b 100644 --- a/lang/java/archetypes/avro-service-archetype/src/main/pom/pom.xml +++ b/lang/java/archetypes/avro-service-archetype/src/main/pom/pom.xml @@ -33,7 +33,12 @@ Simple Avro Ordering Service + ${maven.compiler.source} + ${maven.compiler.target} + ${project.build.sourceEncoding} ${project.version} + ${maven.compiler.source} + ${maven.compiler.target} ${jackson-bom.version} ${junit5.version} 1.2.3 @@ -137,6 +142,10 @@ org.apache.maven.plugins maven-compiler-plugin + + \${maven.compiler.source} + \${maven.compiler.target} + diff --git a/lang/java/avro/pom.xml b/lang/java/avro/pom.xml index 541a73df8be..b96673d1851 100644 --- a/lang/java/avro/pom.xml +++ b/lang/java/avro/pom.xml @@ -24,7 +24,7 @@ avro-parent org.apache.avro 1.12.0-SNAPSHOT - ../ + ../pom.xml avro diff --git a/lang/java/avro/src/main/java/org/apache/avro/Conversion.java b/lang/java/avro/src/main/java/org/apache/avro/Conversion.java index 4ae75f4a5cb..934672e7d30 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Conversion.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Conversion.java @@ -21,6 +21,9 @@ import java.nio.ByteBuffer; import java.util.Collection; import java.util.Map; +import java.util.ServiceLoader; + +import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericEnumSymbol; import org.apache.avro.generic.GenericFixed; import org.apache.avro.generic.IndexedRecord; @@ -28,23 +31,33 @@ /** * Conversion between generic and logical type instances. *

- * Instances of this class are added to GenericData to convert a logical type to - * a particular representation. + * Instances of this class can be added to GenericData to convert a logical type + * to a particular representation. This can be done manually, using + * {@link GenericData#addLogicalTypeConversion(Conversion)}, or automatically. + * This last option uses the Java {@link ServiceLoader}, and requires the + * implementation to be a public class with a public no-arg constructor, be + * named in a file called {@code /META-INF/services/org.apache.avro.Conversion}, + * and both must available in the classpath. *

- * Implementations must provide: * {@link #getConvertedType()}: get the Java - * class used for the logical type * {@link #getLogicalTypeName()}: get the - * logical type this implements + * Implementations must provide: + *

    + *
  • {@link #getConvertedType()}: get the Java class used for the logical + * type
  • + *
  • {@link #getLogicalTypeName()}: get the logical type this implements
  • + *
*

- * Subclasses must also override all of the conversion methods for Avro's base - * types that are valid for the logical type, or else risk causing + * Subclasses must also override the conversion methods for Avro's base types + * that are valid for the logical type, or else risk causing * {@code UnsupportedOperationException} at runtime. *

* Optionally, use {@link #getRecommendedSchema()} to provide a Schema that will - * be used when a Schema is generated for the class returned by - * {@code getConvertedType}. + * be used when generating a Schema for the class. This is useful when using + * {@code ReflectData} or {@code ProtobufData}, for example. * - * @param a Java type that generic data is converted to + * @param a Java type that can represent the named logical type + * @see ServiceLoader */ +@SuppressWarnings("unused") public abstract class Conversion { /** @@ -65,9 +78,9 @@ public abstract class Conversion { * Certain logical types may require adjusting the code within the "setter" * methods to make sure the data that is set is properly formatted. This method * allows the Conversion to generate custom setter code if required. - * - * @param varName - * @param valParamName + * + * @param varName the name of the variable holding the converted value + * @param valParamName the name of the parameter with the new converted value * @return a String for the body of the setter method */ public String adjustAndSetValue(String varName, String valParamName) { @@ -102,7 +115,7 @@ public T fromCharSequence(CharSequence value, Schema schema, LogicalType type) { throw new UnsupportedOperationException("fromCharSequence is not supported for " + type.getName()); } - public T fromEnumSymbol(GenericEnumSymbol value, Schema schema, LogicalType type) { + public T fromEnumSymbol(GenericEnumSymbol value, Schema schema, LogicalType type) { throw new UnsupportedOperationException("fromEnumSymbol is not supported for " + type.getName()); } @@ -150,7 +163,7 @@ public CharSequence toCharSequence(T value, Schema schema, LogicalType type) { throw new UnsupportedOperationException("toCharSequence is not supported for " + type.getName()); } - public GenericEnumSymbol toEnumSymbol(T value, Schema schema, LogicalType type) { + public GenericEnumSymbol toEnumSymbol(T value, Schema schema, LogicalType type) { throw new UnsupportedOperationException("toEnumSymbol is not supported for " + type.getName()); } diff --git a/lang/java/avro/src/main/java/org/apache/avro/Conversions.java b/lang/java/avro/src/main/java/org/apache/avro/Conversions.java index 1c28c9adb81..7d01fc62a37 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Conversions.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Conversions.java @@ -106,11 +106,12 @@ public GenericFixed toFixed(BigDecimal value, Schema schema, LogicalType type) { byte fillByte = (byte) (value.signum() < 0 ? 0xFF : 0x00); byte[] unscaled = value.unscaledValue().toByteArray(); byte[] bytes = new byte[schema.getFixedSize()]; - int offset = bytes.length - unscaled.length; + int unscaledLength = unscaled.length; + int offset = bytes.length - unscaledLength; - // Fill the front of the array and copy remaining with unscaled values + // Fill the front with the filler and copy the unscaled value into the remainder Arrays.fill(bytes, 0, offset, fillByte); - System.arraycopy(unscaled, 0, bytes, offset, bytes.length - offset); + System.arraycopy(unscaled, 0, bytes, offset, unscaledLength); return new GenericData.Fixed(schema, bytes); } @@ -147,7 +148,7 @@ private static BigDecimal validate(final LogicalTypes.Decimal decimal, BigDecima } /** - * Convert a underlying representation of a logical type (such as a ByteBuffer) + * Convert an underlying representation of a logical type (such as a ByteBuffer) * to a higher level object (such as a BigDecimal). * * @param datum The object to be converted. @@ -157,9 +158,9 @@ private static BigDecimal validate(final LogicalTypes.Decimal decimal, BigDecima * @param conversion The tool used to finish the conversion. Cannot be null if * datum is not null. * @return The result object, which is a high level object of the logical type. - * If a null datum is passed in, a null value will be returned. - * @throws IllegalArgumentException if a null schema, type or conversion is - * passed in while datum is not null. + * The null datum always converts to a null value. + * @throws IllegalArgumentException if datum is not null, but schema, type or + * conversion is. */ public static Object convertToLogicalType(Object datum, Schema schema, LogicalType type, Conversion conversion) { if (datum == null) { @@ -176,9 +177,9 @@ public static Object convertToLogicalType(Object datum, Schema schema, LogicalTy case RECORD: return conversion.fromRecord((IndexedRecord) datum, schema, type); case ENUM: - return conversion.fromEnumSymbol((GenericEnumSymbol) datum, schema, type); + return conversion.fromEnumSymbol((GenericEnumSymbol) datum, schema, type); case ARRAY: - return conversion.fromArray((Collection) datum, schema, type); + return conversion.fromArray((Collection) datum, schema, type); case MAP: return conversion.fromMap((Map) datum, schema, type); case FIXED: @@ -201,13 +202,13 @@ public static Object convertToLogicalType(Object datum, Schema schema, LogicalTy return datum; } catch (ClassCastException e) { throw new AvroRuntimeException( - "Cannot convert " + datum + ":" + datum.getClass().getSimpleName() + ": expected generic type", e); + "Cannot convert " + datum + ':' + datum.getClass().getSimpleName() + ": expected generic type", e); } } /** * Convert a high level representation of a logical type (such as a BigDecimal) - * to the its underlying representation object (such as a ByteBuffer) + * to its underlying representation object (such as a ByteBuffer) * * @param datum The object to be converted. * @param schema The schema of datum. Cannot be null if datum is not null. @@ -218,8 +219,8 @@ public static Object convertToLogicalType(Object datum, Schema schema, LogicalTy * @return The result object, which is an underlying representation object of * the logical type. If the input param datum is null, a null value will * be returned. - * @throws IllegalArgumentException if a null schema, type or conversion is - * passed in while datum is not null. + * @throws IllegalArgumentException if datum is not null, but schema, type or + * conversion is. */ public static Object convertToRawType(Object datum, Schema schema, LogicalType type, Conversion conversion) { if (datum == null) { @@ -262,7 +263,7 @@ public static Object convertToRawType(Object datum, Schema schema, LogicalTy return datum; } catch (ClassCastException e) { throw new AvroRuntimeException( - "Cannot convert " + datum + ":" + datum.getClass().getSimpleName() + ": expected logical type", e); + "Cannot convert " + datum + ':' + datum.getClass().getSimpleName() + ": expected logical type", e); } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/JsonProperties.java b/lang/java/avro/src/main/java/org/apache/avro/JsonProperties.java index b53bc6cb2ba..300e583b40a 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/JsonProperties.java +++ b/lang/java/avro/src/main/java/org/apache/avro/JsonProperties.java @@ -30,6 +30,7 @@ import java.util.concurrent.ConcurrentMap; import java.io.IOException; +import java.util.function.BiConsumer; import org.apache.avro.util.internal.Accessor; import org.apache.avro.util.internal.Accessor.JsonPropertiesAccessor; @@ -241,6 +242,11 @@ public Object getObjectProp(String name) { return JacksonUtils.toObject(props.get(name)); } + public Object getObjectProp(String name, Object defaultValue) { + final JsonNode json = props.get(name); + return json != null ? JacksonUtils.toObject(json) : defaultValue; + } + /** * Adds a property with the given name name and value value. * Neither name nor value can be null. It is illegal @@ -307,6 +313,17 @@ public Map getObjectProps() { return Collections.unmodifiableMap(result); } + public boolean propsContainsKey(String key) { + return this.props.containsKey(key); + } + + public void forEachProperty(BiConsumer consumer) { + for (Map.Entry entry : this.props.entrySet()) { + final Object value = JacksonUtils.toObject(entry.getValue()); + consumer.accept(entry.getKey(), value); + } + } + void writeProps(JsonGenerator gen) throws IOException { for (Map.Entry e : props.entrySet()) gen.writeObjectField(e.getKey(), e.getValue()); diff --git a/lang/java/avro/src/main/java/org/apache/avro/LogicalTypes.java b/lang/java/avro/src/main/java/org/apache/avro/LogicalTypes.java index 7bb00f819b5..086c5d266a2 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/LogicalTypes.java +++ b/lang/java/avro/src/main/java/org/apache/avro/LogicalTypes.java @@ -329,7 +329,7 @@ private long maxPrecision(Schema schema) { } private boolean hasProperty(Schema schema, String name) { - return (schema.getObjectProp(name) != null); + return schema.propsContainsKey(name); } private int getInt(Schema schema, String name) { diff --git a/lang/java/avro/src/main/java/org/apache/avro/Protocol.java b/lang/java/avro/src/main/java/org/apache/avro/Protocol.java index ff996889517..f99df533bd7 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Protocol.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Protocol.java @@ -174,7 +174,6 @@ public int hashCode() { public String getDoc() { return doc; } - } private class TwoWayMessage extends Message { @@ -274,15 +273,29 @@ public Protocol(Protocol p) { public Protocol(String name, String doc, String namespace) { super(PROTOCOL_RESERVED); - this.name = name; + setName(name, namespace); this.doc = doc; - this.namespace = namespace; } public Protocol(String name, String namespace) { this(name, null, namespace); } + private void setName(String name, String namespace) { + int lastDot = name.lastIndexOf('.'); + if (lastDot < 0) { + this.name = name; + this.namespace = namespace; + } else { + this.name = name.substring(lastDot + 1); + this.namespace = name.substring(0, lastDot); + } + if (this.namespace != null && this.namespace.isEmpty()) { + this.namespace = null; + } + types.space(this.namespace); + } + /** The name of this protocol. */ public String getName() { return name; @@ -452,7 +465,9 @@ public byte[] getMD5() { /** Read a protocol from a Json file. */ public static Protocol parse(File file) throws IOException { - return parse(Schema.FACTORY.createParser(file)); + try (JsonParser jsonParser = Schema.FACTORY.createParser(file)) { + return parse(jsonParser); + } } /** Read a protocol from a Json stream. */ @@ -488,20 +503,22 @@ private static Protocol parse(JsonParser parser) { } private void parse(JsonNode json) { - parseNamespace(json); - parseName(json); + parseNameAndNamespace(json); parseTypes(json); parseMessages(json); parseDoc(json); parseProps(json); } - private void parseNamespace(JsonNode json) { - JsonNode nameNode = json.get("namespace"); - if (nameNode == null) - return; // no namespace defined - this.namespace = nameNode.textValue(); - types.space(this.namespace); + private void parseNameAndNamespace(JsonNode json) { + JsonNode nameNode = json.get("protocol"); + if (nameNode == null) { + throw new SchemaParseException("No protocol name specified: " + json); + } + JsonNode namespaceNode = json.get("namespace"); + String namespace = namespaceNode == null ? null : namespaceNode.textValue(); + + setName(nameNode.textValue(), namespace); } private void parseDoc(JsonNode json) { @@ -515,23 +532,21 @@ private String parseDocNode(JsonNode json) { return nameNode.textValue(); } - private void parseName(JsonNode json) { - JsonNode nameNode = json.get("protocol"); - if (nameNode == null) - throw new SchemaParseException("No protocol name specified: " + json); - this.name = nameNode.textValue(); - } - private void parseTypes(JsonNode json) { JsonNode defs = json.get("types"); if (defs == null) return; // no types defined if (!defs.isArray()) throw new SchemaParseException("Types not an array: " + defs); + for (JsonNode type : defs) { if (!type.isObject()) throw new SchemaParseException("Type not an object: " + type); - Schema.parse(type, types); + Schema.parseNamesDeclared(type, types, types.space()); + + } + for (JsonNode type : defs) { + Schema.parseCompleteSchema(type, types, types.space()); } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/Schema.java b/lang/java/avro/src/main/java/org/apache/avro/Schema.java index f6c3de7684e..38a6e4a9e42 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/Schema.java +++ b/lang/java/avro/src/main/java/org/apache/avro/Schema.java @@ -26,11 +26,13 @@ import com.fasterxml.jackson.databind.node.DoubleNode; import com.fasterxml.jackson.databind.node.NullNode; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.Serializable; import java.io.StringWriter; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -46,6 +48,9 @@ import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.StreamSupport; + import org.apache.avro.util.internal.Accessor; import org.apache.avro.util.internal.Accessor.FieldAccessor; import org.apache.avro.util.internal.JacksonUtils; @@ -694,6 +699,95 @@ private boolean defaultValueEquals(JsonNode thatDefaultValue) { public String toString() { return name + " type:" + schema.type + " pos:" + position; } + + /** + * Parse field. + * + * @param field : json field definition. + * @param names : names map. + * @param namespace : current working namespace. + * @return field. + */ + static Field parse(JsonNode field, Names names, String namespace) { + String fieldName = getRequiredText(field, "name", "No field name"); + String fieldDoc = getOptionalText(field, "doc"); + JsonNode fieldTypeNode = field.get("type"); + if (fieldTypeNode == null) { + throw new SchemaParseException("No field type: " + field); + } + + Schema fieldSchema = null; + if (fieldTypeNode.isTextual()) { + Schema schemaField = names.get(fieldTypeNode.textValue()); + if (schemaField == null) { + schemaField = names.get(namespace + "." + fieldTypeNode.textValue()); + } + if (schemaField == null) { + throw new SchemaParseException(fieldTypeNode + " is not a defined name." + " The type of the \"" + fieldName + + "\" field must be a defined name or a {\"type\": ...} expression."); + } + fieldSchema = schemaField; + } else if (fieldTypeNode.isObject()) { + fieldSchema = resolveSchema(fieldTypeNode, names, namespace); + if (fieldSchema == null) { + fieldSchema = Schema.parseCompleteSchema(fieldTypeNode, names, namespace); + } + } else if (fieldTypeNode.isArray()) { + List unionTypes = new ArrayList<>(); + + fieldTypeNode.forEach((JsonNode node) -> { + Schema subSchema = null; + if (node.isTextual()) { + subSchema = names.get(node.asText()); + if (subSchema == null) { + subSchema = names.get(namespace + "." + node.asText()); + } + } else if (node.isObject()) { + subSchema = Schema.parseCompleteSchema(node, names, namespace); + } else { + throw new SchemaParseException("Illegal type in union : " + node); + } + if (subSchema == null) { + throw new SchemaParseException("Null element in union : " + node); + } + unionTypes.add(subSchema); + }); + + fieldSchema = Schema.createUnion(unionTypes); + } + + if (fieldSchema == null) { + throw new SchemaParseException("Can't find type for field " + fieldName); + } + Field.Order order = Field.Order.ASCENDING; + JsonNode orderNode = field.get("order"); + if (orderNode != null) + order = Field.Order.valueOf(orderNode.textValue().toUpperCase(Locale.ENGLISH)); + JsonNode defaultValue = field.get("default"); + + if (defaultValue != null + && (Type.FLOAT.equals(fieldSchema.getType()) || Type.DOUBLE.equals(fieldSchema.getType())) + && defaultValue.isTextual()) { + try { + defaultValue = new DoubleNode(Double.valueOf(defaultValue.textValue())); + } catch (NumberFormatException ex) { + throw new SchemaParseException( + "Can't parse number '" + defaultValue.textValue() + "' for field '" + fieldName); + } + } + + Field f = new Field(fieldName, fieldSchema, fieldDoc, defaultValue, true, order); + Iterator i = field.fieldNames(); + while (i.hasNext()) { // add field props + String prop = i.next(); + if (!FIELD_RESERVED.contains(prop)) + f.addProp(prop, field.get(prop)); + } + f.aliases = parseAliases(field); + + return f; + } + } static class Name { @@ -896,8 +990,8 @@ public int hashCode() { } } - private static final ThreadLocal SEEN_EQUALS = ThreadLocalWithInitial.of(HashSet::new); - private static final ThreadLocal SEEN_HASHCODE = ThreadLocalWithInitial.of(IdentityHashMap::new); + private static final ThreadLocal> SEEN_EQUALS = ThreadLocalWithInitial.of(HashSet::new); + private static final ThreadLocal> SEEN_HASHCODE = ThreadLocalWithInitial.of(IdentityHashMap::new); @SuppressWarnings(value = "unchecked") private static class RecordSchema extends NamedSchema { @@ -993,7 +1087,7 @@ public boolean equals(Object o) { @Override int computeHash() { - Map seen = SEEN_HASHCODE.get(); + Map seen = SEEN_HASHCODE.get(); if (seen.containsKey(this)) return 0; // prevent stack overflow boolean first = seen.isEmpty(); @@ -1015,8 +1109,8 @@ void toJson(Names names, JsonGenerator gen) throws IOException { gen.writeStringField("type", isError ? "error" : "record"); writeName(names, gen); names.space = name.space; // set default namespace - if (getDoc() != null) - gen.writeStringField("doc", getDoc()); + if (this.getDoc() != null) + gen.writeStringField("doc", this.getDoc()); if (fields != null) { gen.writeFieldName("fields"); @@ -1238,6 +1332,16 @@ public UnionSchema(LockableArrayList types) { } } + /** + * Checks if a JSON value matches the schema. + * + * @param jsonValue a value to check against the schema + * @return true if the value is valid according to this schema + */ + public boolean isValidDefault(JsonNode jsonValue) { + return this.types.stream().anyMatch((Schema s) -> s.isValidDefault(jsonValue)); + } + @Override public List getTypes() { return types; @@ -1278,6 +1382,12 @@ void toJson(Names names, JsonGenerator gen) throws IOException { type.toJson(names, gen); gen.writeEndArray(); } + + @Override + public String getName() { + return super.getName() + + this.getTypes().stream().map(Schema::getName).collect(Collectors.joining(", ", "[", "]")); + } } private static class FixedSchema extends NamedSchema { @@ -1285,8 +1395,7 @@ private static class FixedSchema extends NamedSchema { public FixedSchema(Name name, String doc, int size) { super(Type.FIXED, name, doc); - if (size < 0) - throw new IllegalArgumentException("Invalid fixed size: " + size); + SystemLimitException.checkMaxBytesLength(size); this.size = size; } @@ -1381,15 +1490,32 @@ public NullSchema() { */ public static class Parser { private Names names = new Names(); - private boolean validate = true; + private final Schema.NameValidator validate; private boolean validateDefaults = true; + public Parser() { + this(NameValidator.UTF_VALIDATOR); + } + + public Parser(final NameValidator validate) { + this.validate = validate; + } + /** * Adds the provided types to the set of defined, named types known to this - * parser. + * parser. deprecated: use addTypes(Iterable types) */ + @Deprecated public Parser addTypes(Map types) { - for (Schema s : types.values()) + return this.addTypes(types.values()); + } + + /** + * Adds the provided types to the set of defined, named types known to this + * parser. + */ + public Parser addTypes(Iterable types) { + for (Schema s : types) names.add(s); return this; } @@ -1402,17 +1528,6 @@ public Map getTypes() { return result; } - /** Enable or disable name validation. */ - public Parser setValidate(boolean validate) { - this.validate = validate; - return this; - } - - /** True iff names are validated. True by default. */ - public boolean getValidate() { - return this.validate; - } - /** Enable or disable default value validation. */ public Parser setValidateDefaults(boolean validateDefaults) { this.validateDefaults = validateDefaults; @@ -1429,7 +1544,21 @@ public boolean getValidateDefaults() { * names known to this parser. */ public Schema parse(File file) throws IOException { - return parse(FACTORY.createParser(file)); + return parse(FACTORY.createParser(file), false); + } + + public List parse(Iterable sources) throws IOException { + final List schemas = new ArrayList<>(); + for (File source : sources) { + final Schema emptySchema = parseNamesDeclared(FACTORY.createParser(source)); + schemas.add(emptySchema); + } + + for (File source : sources) { + parseFieldsOnly(FACTORY.createParser(source)); + } + + return schemas; } /** @@ -1437,7 +1566,7 @@ public Schema parse(File file) throws IOException { * names known to this parser. The input stream stays open after the parsing. */ public Schema parse(InputStream in) throws IOException { - return parse(FACTORY.createParser(in).disable(JsonParser.Feature.AUTO_CLOSE_SOURCE)); + return parse(FACTORY.createParser(in).disable(JsonParser.Feature.AUTO_CLOSE_SOURCE), true); } /** Read a schema from one or more json strings */ @@ -1454,19 +1583,24 @@ public Schema parse(String s, String... more) { */ public Schema parse(String s) { try { - return parse(FACTORY.createParser(s)); + return parse(FACTORY.createParser(s), false); } catch (IOException e) { throw new SchemaParseException(e); } } - private Schema parse(JsonParser parser) throws IOException { - boolean saved = validateNames.get(); + private static interface ParseFunction { + Schema parse(JsonNode node) throws IOException; + } + + private Schema runParser(JsonParser parser, ParseFunction f) throws IOException { + NameValidator saved = validateNames.get(); boolean savedValidateDefaults = VALIDATE_DEFAULTS.get(); try { validateNames.set(validate); VALIDATE_DEFAULTS.set(validateDefaults); - return Schema.parse(MAPPER.readTree(parser), names); + JsonNode jsonNode = MAPPER.readTree(parser); + return f.parse(jsonNode); } catch (JsonParseException e) { throw new SchemaParseException(e); } finally { @@ -1475,6 +1609,37 @@ private Schema parse(JsonParser parser) throws IOException { VALIDATE_DEFAULTS.set(savedValidateDefaults); } } + + private Schema parse(JsonParser parser, final boolean allowDanglingContent) throws IOException { + return this.runParser(parser, (JsonNode jsonNode) -> { + Schema schema = Schema.parse(jsonNode, names); + if (!allowDanglingContent) { + String dangling; + StringWriter danglingWriter = new StringWriter(); + int numCharsReleased = parser.releaseBuffered(danglingWriter); + if (numCharsReleased == -1) { + ByteArrayOutputStream danglingOutputStream = new ByteArrayOutputStream(); + parser.releaseBuffered(danglingOutputStream); // if input isnt chars above it must be bytes + dangling = new String(danglingOutputStream.toByteArray(), StandardCharsets.UTF_8).trim(); + } else { + dangling = danglingWriter.toString().trim(); + } + if (!dangling.isEmpty()) { + throw new SchemaParseException("dangling content after end of schema: " + dangling); + } + } + return schema; + }); + } + + private Schema parseNamesDeclared(JsonParser parser) throws IOException { + return this.runParser(parser, (JsonNode jsonNode) -> Schema.parseNamesDeclared(jsonNode, names, names.space)); + } + + private Schema parseFieldsOnly(JsonParser parser) throws IOException { + return this.runParser(parser, (JsonNode jsonNode) -> Schema.parseCompleteSchema(jsonNode, names, names.space)); + } + } /** @@ -1525,7 +1690,8 @@ public static Schema parse(String jsonSchema) { */ @Deprecated public static Schema parse(String jsonSchema, boolean validate) { - return new Parser().setValidate(validate).parse(jsonSchema); + final NameValidator validator = validate ? NameValidator.UTF_VALIDATOR : NameValidator.NO_VALIDATION; + return new Parser(validator).parse(jsonSchema); } static final Map PRIMITIVES = new HashMap<>(); @@ -1582,43 +1748,53 @@ public void add(Schema schema) { @Override public Schema put(Name name, Schema schema) { - if (containsKey(name)) - throw new SchemaParseException("Can't redefine: " + name); + if (containsKey(name)) { + final Schema other = super.get(name); + if (!Objects.equals(other, schema)) { + throw new SchemaParseException("Can't redefine: " + name); + } else { + return schema; + } + } return super.put(name, schema); } } - private static ThreadLocal validateNames = ThreadLocalWithInitial.of(() -> true); + private static ThreadLocal validateNames = ThreadLocalWithInitial + .of(() -> NameValidator.UTF_VALIDATOR); private static String validateName(String name) { - if (!validateNames.get()) - return name; // not validating names - if (name == null) - throw new SchemaParseException("Null name"); - int length = name.length(); - if (length == 0) - throw new SchemaParseException("Empty name"); - char first = name.charAt(0); - if (!(Character.isLetter(first) || first == '_')) - throw new SchemaParseException("Illegal initial character: " + name); - for (int i = 1; i < length; i++) { - char c = name.charAt(i); - if (!(Character.isLetterOrDigit(c) || c == '_')) - throw new SchemaParseException("Illegal character in: " + name); + NameValidator.Result result = validateNames.get().validate(name); + if (!result.isOK()) { + throw new SchemaParseException(result.errors); } return name; } + public static void setNameValidator(final Schema.NameValidator validator) { + Schema.validateNames.set(validator); + } + private static final ThreadLocal VALIDATE_DEFAULTS = ThreadLocalWithInitial.of(() -> true); private static JsonNode validateDefault(String fieldName, Schema schema, JsonNode defaultValue) { - if (VALIDATE_DEFAULTS.get() && (defaultValue != null) && !isValidDefault(schema, defaultValue)) { // invalid default + if (VALIDATE_DEFAULTS.get() && (defaultValue != null) && !schema.isValidDefault(defaultValue)) { // invalid default String message = "Invalid default for field " + fieldName + ": " + defaultValue + " not a " + schema; throw new AvroTypeException(message); // throw exception } return defaultValue; } + /** + * Checks if a JSON value matches the schema. + * + * @param jsonValue a value to check against the schema + * @return true if the value is valid according to this schema + */ + public boolean isValidDefault(JsonNode jsonValue) { + return isValidDefault(this, jsonValue); + } + private static boolean isValidDefault(Schema schema, JsonNode defaultValue) { if (defaultValue == null) return false; @@ -1653,13 +1829,13 @@ private static boolean isValidDefault(Schema schema, JsonNode defaultValue) { if (!isValidDefault(schema.getValueType(), value)) return false; return true; - case UNION: // union default: first branch - return isValidDefault(schema.getTypes().get(0), defaultValue); + case UNION: // union default: any branch + return schema.getTypes().stream().anyMatch((Schema s) -> isValidValue(s, defaultValue)); case RECORD: if (!defaultValue.isObject()) return false; for (Field field : schema.getFields()) - if (!isValidDefault(field.schema(), + if (!isValidValue(field.schema(), defaultValue.has(field.name()) ? defaultValue.get(field.name()) : field.defaultValue())) return false; return true; @@ -1668,78 +1844,71 @@ private static boolean isValidDefault(Schema schema, JsonNode defaultValue) { } } - /** @see #parse(String) */ - static Schema parse(JsonNode schema, Names names) { + /** + * Validate a value against the schema. + * + * @param schema : schema for value. + * @param value : value to validate. + * @return true if ok. + */ + private static boolean isValidValue(Schema schema, JsonNode value) { + if (value == null) + return false; + if (schema.isUnion()) { + // For Union, only need that one sub schema is ok. + for (Schema sub : schema.getTypes()) { + if (Schema.isValidDefault(sub, value)) { + return true; + } + } + return false; + } else { + // for other types, same as validate default. + return Schema.isValidDefault(schema, value); + } + } + + /** + * Parse named schema in order to fill names map. This method does not parse + * field of record/error schema. + * + * @param schema : json schema representation. + * @param names : map of named schema. + * @param currentNameSpace : current working name space. + * @return schema. + */ + static Schema parseNamesDeclared(JsonNode schema, Names names, String currentNameSpace) { if (schema == null) { - throw new SchemaParseException("Cannot parse schema"); + return null; } - if (schema.isTextual()) { // name - Schema result = names.get(schema.textValue()); - if (result == null) - throw new SchemaParseException("Undefined name: " + schema); - return result; - } else if (schema.isObject()) { - Schema result; - String type = getRequiredText(schema, "type", "No type"); + if (schema.isObject()) { + + String type = Schema.getOptionalText(schema, "type"); Name name = null; - String savedSpace = names.space(); + String doc = null; + Schema result = null; final boolean isTypeError = "error".equals(type); final boolean isTypeRecord = "record".equals(type); final boolean isTypeEnum = "enum".equals(type); final boolean isTypeFixed = "fixed".equals(type); + if (isTypeRecord || isTypeError || isTypeEnum || isTypeFixed) { String space = getOptionalText(schema, "namespace"); doc = getOptionalText(schema, "doc"); if (space == null) - space = savedSpace; + space = currentNameSpace; name = new Name(getRequiredText(schema, "name", "No name in schema"), space); - names.space(name.space); // set default namespace } - if (PRIMITIVES.containsKey(type)) { // primitive - result = create(PRIMITIVES.get(type)); - } else if (isTypeRecord || isTypeError) { // record - List fields = new ArrayList<>(); + if (isTypeRecord || isTypeError) { // record result = new RecordSchema(name, doc, isTypeError); - if (name != null) - names.add(result); + names.add(result); JsonNode fieldsNode = schema.get("fields"); + if (fieldsNode == null || !fieldsNode.isArray()) throw new SchemaParseException("Record has no fields: " + schema); - for (JsonNode field : fieldsNode) { - String fieldName = getRequiredText(field, "name", "No field name"); - String fieldDoc = getOptionalText(field, "doc"); - JsonNode fieldTypeNode = field.get("type"); - if (fieldTypeNode == null) - throw new SchemaParseException("No field type: " + field); - if (fieldTypeNode.isTextual() && names.get(fieldTypeNode.textValue()) == null) - throw new SchemaParseException(fieldTypeNode + " is not a defined name." + " The type of the \"" + fieldName - + "\" field must be a defined name or a {\"type\": ...} expression."); - Schema fieldSchema = parse(fieldTypeNode, names); - Field.Order order = Field.Order.ASCENDING; - JsonNode orderNode = field.get("order"); - if (orderNode != null) - order = Field.Order.valueOf(orderNode.textValue().toUpperCase(Locale.ENGLISH)); - JsonNode defaultValue = field.get("default"); - if (defaultValue != null - && (Type.FLOAT.equals(fieldSchema.getType()) || Type.DOUBLE.equals(fieldSchema.getType())) - && defaultValue.isTextual()) - defaultValue = new DoubleNode(Double.valueOf(defaultValue.textValue())); - Field f = new Field(fieldName, fieldSchema, fieldDoc, defaultValue, true, order); - Iterator i = field.fieldNames(); - while (i.hasNext()) { // add field props - String prop = i.next(); - if (!FIELD_RESERVED.contains(prop)) - f.addProp(prop, field.get(prop)); - } - f.aliases = parseAliases(field); - fields.add(f); - if (fieldSchema.getLogicalType() == null && getOptionalText(field, LOGICAL_TYPE_PROP) != null) - LOG.warn( - "Ignored the {}.{}.logicalType property (\"{}\"). It should probably be nested inside the \"type\" for the field.", - name, fieldName, getOptionalText(field, "logicalType")); - } - result.setFields(fields); + exploreFields(fieldsNode, names, name != null ? name.space : null); + } else if (isTypeEnum) { // enum JsonNode symbolsNode = schema.get("symbols"); if (symbolsNode == null || !symbolsNode.isArray()) @@ -1752,18 +1921,19 @@ static Schema parse(JsonNode schema, Names names) { if (enumDefault != null) defaultSymbol = enumDefault.textValue(); result = new EnumSchema(name, doc, symbols, defaultSymbol); - if (name != null) - names.add(result); + names.add(result); } else if (type.equals("array")) { // array JsonNode itemsNode = schema.get("items"); if (itemsNode == null) throw new SchemaParseException("Array has no items type: " + schema); - result = new ArraySchema(parse(itemsNode, names)); + final Schema items = Schema.parseNamesDeclared(itemsNode, names, currentNameSpace); + result = Schema.createArray(items); } else if (type.equals("map")) { // map JsonNode valuesNode = schema.get("values"); if (valuesNode == null) throw new SchemaParseException("Map has no values type: " + schema); - result = new MapSchema(parse(valuesNode, names)); + final Schema values = Schema.parseNamesDeclared(valuesNode, names, currentNameSpace); + result = Schema.createMap(values); } else if (isTypeFixed) { // fixed JsonNode sizeNode = schema.get("size"); if (sizeNode == null || !sizeNode.isInt()) @@ -1771,42 +1941,194 @@ static Schema parse(JsonNode schema, Names names) { result = new FixedSchema(name, doc, sizeNode.intValue()); if (name != null) names.add(result); - } else { // For unions with self reference - Name nameFromType = new Name(type, names.space); - if (names.containsKey(nameFromType)) { - return names.get(nameFromType); + } else if (PRIMITIVES.containsKey(type)) { + result = Schema.create(PRIMITIVES.get(type)); + } + if (result != null) { + Set reserved = SCHEMA_RESERVED; + if (isTypeEnum) { + reserved = ENUM_RESERVED; } - throw new SchemaParseException("Type not supported: " + type); + Schema.addProperties(schema, reserved, result); } - Iterator i = schema.fieldNames(); + return result; + } else if (schema.isArray()) { + List subs = new ArrayList<>(schema.size()); + schema.forEach((JsonNode item) -> { + Schema sub = Schema.parseNamesDeclared(item, names, currentNameSpace); + if (sub != null) { + subs.add(sub); + } + }); + return Schema.createUnion(subs); + } else if (schema.isTextual()) { + String value = schema.asText(); + return names.get(value); + } + return null; + } - Set reserved = SCHEMA_RESERVED; - if (isTypeEnum) { - reserved = ENUM_RESERVED; + private static void addProperties(JsonNode schema, Set reserved, Schema avroSchema) { + Iterator i = schema.fieldNames(); + while (i.hasNext()) { // add properties + String prop = i.next(); + if (!reserved.contains(prop)) // ignore reserved + avroSchema.addProp(prop, schema.get(prop)); + } + // parse logical type if present + avroSchema.logicalType = LogicalTypes.fromSchemaIgnoreInvalid(avroSchema); + // names.space(savedSpace); // restore space + if (avroSchema instanceof NamedSchema) { + Set aliases = parseAliases(schema); + if (aliases != null) // add aliases + for (String alias : aliases) + avroSchema.addAlias(alias); + } + } + + /** + * Explore record fields in order to fill names map with inner defined named + * types. + * + * @param fieldsNode : json node for field. + * @param names : names map. + * @param nameSpace : current working namespace. + */ + private static void exploreFields(JsonNode fieldsNode, Names names, String nameSpace) { + for (JsonNode field : fieldsNode) { + final JsonNode fieldType = field.get("type"); + if (fieldType != null) { + if (fieldType.isObject()) { + parseNamesDeclared(fieldType, names, nameSpace); + } else if (fieldType.isArray()) { + exploreFields(fieldType, names, nameSpace); + } else if (fieldType.isTextual() && field.isObject()) { + parseNamesDeclared(field, names, nameSpace); + } } - while (i.hasNext()) { // add properties - String prop = i.next(); - if (!reserved.contains(prop)) // ignore reserved - result.addProp(prop, schema.get(prop)); + } + } + + /** + * in complement of parseNamesDeclared, this method parse schema in details. + * + * @param schema : json schema. + * @param names : names map. + * @param currentSpace : current working name space. + * @return complete schema. + */ + static Schema parseCompleteSchema(JsonNode schema, Names names, String currentSpace) { + if (schema == null) { + throw new SchemaParseException("Cannot parse schema"); + } + if (schema.isTextual()) { + String type = schema.asText(); + Schema avroSchema = names.get(type); + if (avroSchema == null) { + avroSchema = names.get(currentSpace + "." + type); + } + return avroSchema; + } + if (schema.isArray()) { + List schemas = StreamSupport.stream(schema.spliterator(), false) + .map((JsonNode sub) -> parseCompleteSchema(sub, names, currentSpace)).collect(Collectors.toList()); + return Schema.createUnion(schemas); + } + if (schema.isObject()) { + Schema result = null; + String type = getRequiredText(schema, "type", "No type"); + Name name = null; + + final boolean isTypeError = "error".equals(type); + final boolean isTypeRecord = "record".equals(type); + final boolean isTypeArray = "array".equals(type); + + if (isTypeRecord || isTypeError || "enum".equals(type) || "fixed".equals(type)) { + // named schema + String space = getOptionalText(schema, "namespace"); + + if (space == null) + space = currentSpace; + name = new Name(getRequiredText(schema, "name", "No name in schema"), space); + + result = names.get(name); + if (result == null) { + throw new SchemaParseException("Unparsed field type " + name); + } + } + if (isTypeRecord || isTypeError) { + if (result != null && !result.hasFields()) { + final List fields = new ArrayList<>(); + JsonNode fieldsNode = schema.get("fields"); + if (fieldsNode == null || !fieldsNode.isArray()) + throw new SchemaParseException("Record has no fields: " + schema); + + for (JsonNode field : fieldsNode) { + Field f = Field.parse(field, names, name.space); + + fields.add(f); + if (f.schema.getLogicalType() == null && getOptionalText(field, LOGICAL_TYPE_PROP) != null) + LOG.warn( + "Ignored the {}.{}.logicalType property (\"{}\"). It should probably be nested inside the \"type\" for the field.", + name, f.name, getOptionalText(field, "logicalType")); + } + result.setFields(fields); + } + } else if (isTypeArray) { + JsonNode items = schema.get("items"); + Schema schemaItems = parseCompleteSchema(items, names, currentSpace); + result = Schema.createArray(schemaItems); + } else if ("map".equals(type)) { + JsonNode values = schema.get("values"); + Schema mapItems = parseCompleteSchema(values, names, currentSpace); + result = Schema.createMap(mapItems); + } else if (result == null) { + result = names.get(currentSpace + "." + type); + if (result == null) { + result = names.get(type); + } } - // parse logical type if present - result.logicalType = LogicalTypes.fromSchemaIgnoreInvalid(result); - names.space(savedSpace); // restore space - if (result instanceof NamedSchema) { - Set aliases = parseAliases(schema); - if (aliases != null) // add aliases - for (String alias : aliases) - result.addAlias(alias); + + Set reserved = SCHEMA_RESERVED; + if ("enum".equals(type)) { + reserved = ENUM_RESERVED; } + Schema.addProperties(schema, reserved, result); return result; - } else if (schema.isArray()) { // union - LockableArrayList types = new LockableArrayList<>(schema.size()); - for (JsonNode typeNode : schema) - types.add(parse(typeNode, names)); - return new UnionSchema(types); - } else { - throw new SchemaParseException("Schema not yet supported: " + schema); } + return null; + } + + static Schema parse(JsonNode schema, Names names) { + if (schema == null) { + throw new SchemaParseException("Cannot parse schema"); + } + + Schema result = Schema.parseNamesDeclared(schema, names, names.space); + Schema.parseCompleteSchema(schema, names, names.space); + + return result; + } + + static Schema resolveSchema(JsonNode schema, Names names, String currentNameSpace) { + String np = currentNameSpace; + String nodeName = getOptionalText(schema, "name"); + if (nodeName != null) { + final JsonNode nameSpace = schema.get("namespace"); + StringBuilder fullName = new StringBuilder(); + if (nameSpace != null && nameSpace.isTextual()) { + fullName.append(nameSpace.asText()).append("."); + np = nameSpace.asText(); + } + fullName.append(nodeName); + Schema schema1 = names.get(fullName.toString()); + + if (schema1 != null && schema1.getType() == Type.RECORD && !schema1.hasFields()) { + Schema.parseCompleteSchema(schema, names, np); + } + return schema1; + } + return null; } static Set parseAliases(JsonNode node) { @@ -1989,6 +2311,84 @@ private static String getFieldAlias(Name record, String field, Map= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); + } + + private boolean isDigit(char c) { + return c >= '0' && c <= '9'; + } + + }; + + } + /** * No change is permitted on LockableArrayList once lock() has been called on * it. diff --git a/lang/java/avro/src/main/java/org/apache/avro/SchemaCompatibility.java b/lang/java/avro/src/main/java/org/apache/avro/SchemaCompatibility.java index 3e5628d9b3b..8b6a2839ad6 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/SchemaCompatibility.java +++ b/lang/java/avro/src/main/java/org/apache/avro/SchemaCompatibility.java @@ -324,8 +324,10 @@ private SchemaCompatibilityResult calculateCompatibility(final Schema reader, fi // Reader compatible with all branches of a writer union is compatible if (writer.getType() == Schema.Type.UNION) { + int index = 0; for (Schema s : writer.getTypes()) { - result = result.mergedWith(getCompatibility(reader, s)); + result = result.mergedWith(getCompatibility(Integer.toString(index), reader, s, location)); + index++; } return result; } diff --git a/lang/java/avro/src/main/java/org/apache/avro/SystemLimitException.java b/lang/java/avro/src/main/java/org/apache/avro/SystemLimitException.java new file mode 100644 index 00000000000..a96f812d84d --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/SystemLimitException.java @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro; + +import org.slf4j.LoggerFactory; + +/** + * Thrown to prevent making large allocations when reading potentially + * pathological input data from an untrusted source. + *

+ * The following system properties can be set to limit the size of bytes, + * strings and collection types to be allocated: + *

    + *
  • org.apache.avro.limits.byte.maxLength
  • limits the maximum + * size of byte types. + *
  • org.apache.avro.limits.collectionItems.maxLength
  • limits the + * maximum number of map and list items that can be read at + * once single sequence. + *
  • org.apache.avro.limits.string.maxLength
  • limits the maximum + * size of string types. + *
+ * + * The default is to permit sizes up to {@link #MAX_ARRAY_VM_LIMIT}. + */ +public class SystemLimitException extends AvroRuntimeException { + + /** + * The maximum length of array to allocate (unless necessary). Some VMs reserve + * some header words in an array. Attempts to allocate larger arrays may result + * in {@code OutOfMemoryError: Requested array size exceeds VM limit} + * + * @see JDK-8246725 + */ + // VisibleForTesting + static final int MAX_ARRAY_VM_LIMIT = Integer.MAX_VALUE - 8; + + public static final String MAX_BYTES_LENGTH_PROPERTY = "org.apache.avro.limits.bytes.maxLength"; + public static final String MAX_COLLECTION_LENGTH_PROPERTY = "org.apache.avro.limits.collectionItems.maxLength"; + public static final String MAX_STRING_LENGTH_PROPERTY = "org.apache.avro.limits.string.maxLength"; + + private static int maxBytesLength = MAX_ARRAY_VM_LIMIT; + private static int maxCollectionLength = MAX_ARRAY_VM_LIMIT; + private static int maxStringLength = MAX_ARRAY_VM_LIMIT; + + static { + resetLimits(); + } + + public SystemLimitException(String message) { + super(message); + } + + /** + * Get an integer value stored in a system property, used to configure the + * system behaviour of decoders + * + * @param property The system property to fetch + * @param defaultValue The value to use if the system property is not present or + * parsable as an int + * @return The value from the system property + */ + private static int getLimitFromProperty(String property, int defaultValue) { + String o = System.getProperty(property); + int i = defaultValue; + if (o != null) { + try { + i = Integer.parseUnsignedInt(o); + } catch (NumberFormatException nfe) { + LoggerFactory.getLogger(SystemLimitException.class).warn("Could not parse property " + property + ": " + o, + nfe); + } + } + return i; + } + + /** + * Check to ensure that reading the bytes is within the specified limits. + * + * @param length The proposed size of the bytes to read + * @return The size of the bytes if and only if it is within the limit and + * non-negative. + * @throws UnsupportedOperationException if reading the datum would allocate a + * collection that the Java VM would be + * unable to handle + * @throws SystemLimitException if the decoding should fail because it + * would otherwise result in an allocation + * exceeding the set limit + * @throws AvroRuntimeException if the length is negative + */ + public static int checkMaxBytesLength(long length) { + if (length < 0) { + throw new AvroRuntimeException("Malformed data. Length is negative: " + length); + } + if (length > MAX_ARRAY_VM_LIMIT) { + throw new UnsupportedOperationException( + "Cannot read arrays longer than " + MAX_ARRAY_VM_LIMIT + " bytes in Java library"); + } + if (length > maxBytesLength) { + throw new SystemLimitException("Bytes length " + length + " exceeds maximum allowed"); + } + return (int) length; + } + + /** + * Check to ensure that reading the specified number of items remains within the + * specified limits. + * + * @param existing The number of elements items read in the collection + * @param items The next number of items to read. In normal usage, this is + * always a positive, permitted value. Negative and zero values + * have a special meaning in Avro decoding. + * @return The total number of items in the collection if and only if it is + * within the limit and non-negative. + * @throws UnsupportedOperationException if reading the items would allocate a + * collection that the Java VM would be + * unable to handle + * @throws SystemLimitException if the decoding should fail because it + * would otherwise result in an allocation + * exceeding the set limit + * @throws AvroRuntimeException if the length is negative + */ + public static int checkMaxCollectionLength(long existing, long items) { + long length = existing + items; + if (existing < 0) { + throw new AvroRuntimeException("Malformed data. Length is negative: " + existing); + } + if (items < 0) { + throw new AvroRuntimeException("Malformed data. Length is negative: " + items); + } + if (length > MAX_ARRAY_VM_LIMIT || length < existing) { + throw new UnsupportedOperationException( + "Cannot read collections larger than " + MAX_ARRAY_VM_LIMIT + " items in Java library"); + } + if (length > maxCollectionLength) { + throw new SystemLimitException("Collection length " + length + " exceeds maximum allowed"); + } + return (int) length; + } + + /** + * Check to ensure that reading the string size is within the specified limits. + * + * @param length The proposed size of the string to read + * @return The size of the string if and only if it is within the limit and + * non-negative. + * @throws UnsupportedOperationException if reading the items would allocate a + * collection that the Java VM would be + * unable to handle + * @throws SystemLimitException if the decoding should fail because it + * would otherwise result in an allocation + * exceeding the set limit + * @throws AvroRuntimeException if the length is negative + */ + public static int checkMaxStringLength(long length) { + if (length < 0) { + throw new AvroRuntimeException("Malformed data. Length is negative: " + length); + } + if (length > MAX_ARRAY_VM_LIMIT) { + throw new UnsupportedOperationException("Cannot read strings longer than " + MAX_ARRAY_VM_LIMIT + " bytes"); + } + if (length > maxStringLength) { + throw new SystemLimitException("String length " + length + " exceeds maximum allowed"); + } + return (int) length; + } + + /** Reread the limits from the system properties. */ + // VisibleForTesting + static void resetLimits() { + maxBytesLength = getLimitFromProperty(MAX_BYTES_LENGTH_PROPERTY, MAX_ARRAY_VM_LIMIT); + maxCollectionLength = getLimitFromProperty(MAX_COLLECTION_LENGTH_PROPERTY, MAX_ARRAY_VM_LIMIT); + maxStringLength = getLimitFromProperty(MAX_STRING_LENGTH_PROPERTY, MAX_ARRAY_VM_LIMIT); + } +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/CodecFactory.java b/lang/java/avro/src/main/java/org/apache/avro/file/CodecFactory.java index 351c036b861..1cfed238f7e 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/file/CodecFactory.java +++ b/lang/java/avro/src/main/java/org/apache/avro/file/CodecFactory.java @@ -28,12 +28,14 @@ /** * Encapsulates the ability to specify and configure a compression codec. * - * Currently there are three codecs registered by default: + * Currently there are five codecs registered by default: *
    *
  • {@code null}
  • *
  • {@code deflate}
  • *
  • {@code snappy}
  • *
  • {@code bzip2}
  • + *
  • {@code xz}
  • + *
  • {@code zstandard}
  • *
* * New and custom codecs can be registered using diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java index a2b5172251d..150d2ace9ba 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java +++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileStream.java @@ -139,7 +139,7 @@ void initialize(InputStream in, byte[] magic) throws IOException { // finalize the header header.metaKeyList = Collections.unmodifiableList(header.metaKeyList); - header.schema = new Schema.Parser().setValidate(false).setValidateDefaults(false) + header.schema = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false) .parse(getMetaString(DataFileConstants.SCHEMA)); this.codec = resolveCodec(); reader.setSchema(header.schema); diff --git a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java index 05e5006acbf..65a305f34fc 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java +++ b/lang/java/avro/src/main/java/org/apache/avro/file/DataFileWriter.java @@ -22,6 +22,7 @@ import java.io.BufferedOutputStream; import java.io.Closeable; import java.io.File; +import java.io.FileOutputStream; import java.io.FilterOutputStream; import java.io.Flushable; import java.io.IOException; @@ -51,7 +52,7 @@ * blocks. A synchronization marker is written between blocks, so that * files may be split. Blocks may be compressed. Extensible metadata is stored * at the end of the file. Files may be appended to. - * + * * @see DataFileReader */ public class DataFileWriter implements Closeable, Flushable { @@ -181,7 +182,7 @@ public DataFileWriter create(Schema schema, OutputStream outs, byte[] sync) t * sync marker is written. By default, the writer will flush the buffer each * time a sync marker is written (if the block size limit is reached or the * {@linkplain #sync()} is called. - * + * * @param flushOnEveryBlock - If set to false, this writer will not flush the * block to the stream until {@linkplain #flush()} is * explicitly called. @@ -211,7 +212,7 @@ public DataFileWriter appendTo(File file) throws IOException { /** * Open a writer appending to an existing file. Since 1.9.0 this method * does not close in. - * + * * @param in reading the existing file. * @param out positioned at the end of the existing file. */ @@ -304,7 +305,7 @@ public AppendWriteException(Exception e) { /** * Append a datum to the file. - * + * * @see AppendWriteException */ public void append(D datum) throws IOException { @@ -365,7 +366,7 @@ private void writeIfBlockFull() throws IOException { * at compression level 7. If recompress is false, blocks will be copied * without changing the compression level. If true, they will be converted to * the new compression level. - * + * * @param otherFile * @param recompress * @throws IOException @@ -439,10 +440,10 @@ public void flush() throws IOException { } /** - * If this writer was instantiated using a File or using an - * {@linkplain Syncable} instance, this method flushes all buffers for this - * writer to disk. In other cases, this method behaves exactly like - * {@linkplain #flush()}. + * If this writer was instantiated using a {@linkplain File}, + * {@linkplain FileOutputStream} or {@linkplain Syncable} instance, this method + * flushes all buffers for this writer to disk. In other cases, this method + * behaves exactly like {@linkplain #flush()}. * * @throws IOException */ @@ -450,6 +451,8 @@ public void fSync() throws IOException { flush(); if (underlyingStream instanceof Syncable) { ((Syncable) underlyingStream).sync(); + } else if (underlyingStream instanceof FileOutputStream) { + ((FileOutputStream) underlyingStream).getFD().sync(); } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java index 71861ef9728..6db0a40eee6 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java @@ -32,6 +32,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.ServiceLoader; import java.util.UUID; import java.util.concurrent.ConcurrentMap; @@ -117,6 +118,7 @@ public GenericData() { /** For subclasses. GenericData does not use a ClassLoader. */ public GenericData(ClassLoader classLoader) { this.classLoader = (classLoader != null) ? classLoader : getClass().getClassLoader(); + loadConversions(); } /** Return the class loader that's used (by subclasses). */ @@ -124,6 +126,17 @@ public ClassLoader getClassLoader() { return classLoader; } + /** + * Use the Java 6 ServiceLoader to load conversions. + * + * @see #addLogicalTypeConversion(Conversion) + */ + private void loadConversions() { + for (Conversion conversion : ServiceLoader.load(Conversion.class, classLoader)) { + addLogicalTypeConversion(conversion); + } + } + private Map> conversions = new HashMap<>(); private Map, Map>> conversionsByClass = new IdentityHashMap<>(); @@ -134,19 +147,17 @@ public Collection> getConversions() { /** * Registers the given conversion to be used when reading and writing with this - * data model. + * data model. Conversions can also be registered automatically, as documented + * on the class {@link Conversion Conversion<T>}. * * @param conversion a logical type Conversion. */ public void addLogicalTypeConversion(Conversion conversion) { conversions.put(conversion.getLogicalTypeName(), conversion); Class type = conversion.getConvertedType(); - Map> conversions = conversionsByClass.get(type); - if (conversions == null) { - conversions = new LinkedHashMap<>(); - conversionsByClass.put(type, conversions); - } - conversions.put(conversion.getLogicalTypeName(), conversion); + Map> conversionsForClass = conversionsByClass.computeIfAbsent(type, + k -> new LinkedHashMap<>()); + conversionsForClass.put(conversion.getLogicalTypeName(), conversion); } /** @@ -187,11 +198,11 @@ public Conversion getConversionByClass(Class datumClass, LogicalType l * @return the conversion for the logical type, or null */ @SuppressWarnings("unchecked") - public Conversion getConversionFor(LogicalType logicalType) { + public Conversion getConversionFor(LogicalType logicalType) { if (logicalType == null) { return null; } - return (Conversion) conversions.get(logicalType.getName()); + return (Conversion) conversions.get(logicalType.getName()); } public static final String FAST_READER_PROP = "org.apache.avro.fastread"; @@ -306,30 +317,16 @@ public String toString() { } } - /** Default implementation of an array. */ - @SuppressWarnings(value = "unchecked") - public static class Array extends AbstractList implements GenericArray, Comparable> { - private static final Object[] EMPTY = new Object[0]; + public static abstract class AbstractArray extends AbstractList + implements GenericArray, Comparable> { private final Schema schema; - private int size; - private Object[] elements = EMPTY; - public Array(int capacity, Schema schema) { - if (schema == null || !Type.ARRAY.equals(schema.getType())) - throw new AvroRuntimeException("Not an array schema: " + schema); - this.schema = schema; - if (capacity != 0) - elements = new Object[capacity]; - } + protected int size = 0; - public Array(Schema schema, Collection c) { + public AbstractArray(Schema schema) { if (schema == null || !Type.ARRAY.equals(schema.getType())) throw new AvroRuntimeException("Not an array schema: " + schema); this.schema = schema; - if (c != null) { - elements = new Object[c.size()]; - addAll(c); - } } @Override @@ -343,22 +340,26 @@ public int size() { } @Override - public void clear() { - // Let GC do its work - Arrays.fill(elements, 0, size, null); + public void reset() { size = 0; } @Override - public void reset() { - size = 0; + public int compareTo(GenericArray that) { + return GenericData.get().compare(this, that, this.getSchema()); } @Override - public void prune() { - if (size < elements.length) { - Arrays.fill(elements, size, elements.length, null); + public boolean equals(final Object o) { + if (!(o instanceof Collection)) { + return false; } + return GenericData.get().compare(this, o, this.getSchema()) == 0; + } + + @Override + public int hashCode() { + return super.hashCode(); } @Override @@ -373,7 +374,7 @@ public boolean hasNext() { @Override public T next() { - return (T) elements[position++]; + return AbstractArray.this.get(position++); } @Override @@ -383,6 +384,57 @@ public void remove() { }; } + @Override + public void reverse() { + int left = 0; + int right = size - 1; + + while (left < right) { + this.swap(left, right); + + left++; + right--; + } + } + + protected abstract void swap(int index1, int index2); + } + + /** Default implementation of an array. */ + @SuppressWarnings(value = "unchecked") + public static class Array extends AbstractArray { + private static final Object[] EMPTY = new Object[0]; + + private Object[] elements = EMPTY; + + public Array(int capacity, Schema schema) { + super(schema); + if (capacity != 0) + elements = new Object[capacity]; + } + + public Array(Schema schema, Collection c) { + super(schema); + if (c != null) { + elements = new Object[c.size()]; + addAll(c); + } + } + + @Override + public void clear() { + // Let GC do its work + Arrays.fill(elements, 0, size, null); + size = 0; + } + + @Override + public void prune() { + if (size < elements.length) { + Arrays.fill(elements, size, elements.length, null); + } + } + @Override public T get(int i) { if (i >= size) @@ -431,23 +483,10 @@ public T peek() { } @Override - public int compareTo(GenericArray that) { - return GenericData.get().compare(this, that, this.getSchema()); - } - - @Override - public void reverse() { - int left = 0; - int right = elements.length - 1; - - while (left < right) { - Object tmp = elements[left]; - elements[left] = elements[right]; - elements[right] = tmp; - - left++; - right--; - } + protected void swap(final int index1, final int index2) { + Object tmp = elements[index1]; + elements[index1] = elements[index2]; + elements[index2] = tmp; } } @@ -1167,7 +1206,11 @@ protected int compareMaps(final Map m1, final Map m2) { return 0; } - if (m2.size() != m2.size()) { + if (m1.isEmpty() && m2.isEmpty()) { + return 0; + } + + if (m1.size() != m2.size()) { return 1; } @@ -1484,8 +1527,24 @@ public Object newArray(Object old, int size, Schema schema) { } else if (old instanceof Collection) { ((Collection) old).clear(); return old; - } else + } else { + if (schema.getElementType().getType() == Type.INT) { + return new PrimitivesArrays.IntArray(size, schema); + } + if (schema.getElementType().getType() == Type.BOOLEAN) { + return new PrimitivesArrays.BooleanArray(size, schema); + } + if (schema.getElementType().getType() == Type.LONG) { + return new PrimitivesArrays.LongArray(size, schema); + } + if (schema.getElementType().getType() == Type.FLOAT) { + return new PrimitivesArrays.FloatArray(size, schema); + } + if (schema.getElementType().getType() == Type.DOUBLE) { + return new PrimitivesArrays.DoubleArray(size, schema); + } return new GenericData.Array(size, schema); + } } /** diff --git a/lang/java/avro/src/main/java/org/apache/avro/generic/PrimitivesArrays.java b/lang/java/avro/src/main/java/org/apache/avro/generic/PrimitivesArrays.java new file mode 100644 index 00000000000..d34ce0f5bcb --- /dev/null +++ b/lang/java/avro/src/main/java/org/apache/avro/generic/PrimitivesArrays.java @@ -0,0 +1,609 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.generic; + +import org.apache.avro.AvroRuntimeException; +import org.apache.avro.Schema; + +import java.util.Arrays; +import java.util.Collection; + +public class PrimitivesArrays { + + public static class IntArray extends GenericData.AbstractArray { + private static final int[] EMPTY = new int[0]; + + private int[] elements = EMPTY; + + public IntArray(int capacity, Schema schema) { + super(schema); + if (!Schema.Type.INT.equals(schema.getElementType().getType())) + throw new AvroRuntimeException("Not a int array schema: " + schema); + if (capacity != 0) + elements = new int[capacity]; + } + + public IntArray(Schema schema, Collection c) { + super(schema); + if (c != null) { + elements = new int[c.size()]; + addAll(c); + } + } + + @Override + public void clear() { + size = 0; + } + + @Override + public Integer get(int i) { + return this.getInt(i); + } + + /** + * Direct primitive int access. + * + * @param i : index. + * @return value at index. + */ + public int getInt(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + return elements[i]; + } + + @Override + public void add(int location, Integer o) { + if (o == null) { + return; + } + this.add(location, o.intValue()); + } + + public void add(int location, int o) { + if (location > size || location < 0) { + throw new IndexOutOfBoundsException("Index " + location + " out of bounds."); + } + if (size == elements.length) { + // Increase size by 1.5x + 1 + final int newSize = size + (size >> 1) + 1; + elements = Arrays.copyOf(elements, newSize); + } + System.arraycopy(elements, location, elements, location + 1, size - location); + elements[location] = o; + size++; + } + + @Override + public Integer set(int i, Integer o) { + if (o == null) { + return null; + } + return this.set(i, o.intValue()); + } + + public int set(int i, int o) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + int response = elements[i]; + elements[i] = o; + return response; + } + + @Override + public Integer remove(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + int result = elements[i]; + --size; + System.arraycopy(elements, i + 1, elements, i, (size - i)); + return result; + } + + @Override + public Integer peek() { + return (size < elements.length) ? elements[size] : null; + } + + @Override + protected void swap(final int index1, final int index2) { + int tmp = elements[index1]; + elements[index1] = elements[index2]; + elements[index2] = tmp; + } + } + + public static class LongArray extends GenericData.AbstractArray { + private static final long[] EMPTY = new long[0]; + + private long[] elements = EMPTY; + + public LongArray(int capacity, Schema schema) { + super(schema); + if (!Schema.Type.LONG.equals(schema.getElementType().getType())) + throw new AvroRuntimeException("Not a long array schema: " + schema); + if (capacity != 0) + elements = new long[capacity]; + } + + public LongArray(Schema schema, Collection c) { + super(schema); + if (c != null) { + elements = new long[c.size()]; + addAll(c); + } + } + + @Override + public void clear() { + size = 0; + } + + @Override + public Long get(int i) { + return getLong(i); + } + + /** + * Direct primitive int access. + * + * @param i : index. + * @return value at index. + */ + public long getLong(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + return elements[i]; + } + + @Override + public void add(int location, Long o) { + if (o == null) { + return; + } + this.add(location, o.longValue()); + } + + public void add(int location, long o) { + if (location > size || location < 0) { + throw new IndexOutOfBoundsException("Index " + location + " out of bounds."); + } + if (size == elements.length) { + // Increase size by 1.5x + 1 + final int newSize = size + (size >> 1) + 1; + elements = Arrays.copyOf(elements, newSize); + } + System.arraycopy(elements, location, elements, location + 1, size - location); + elements[location] = o; + size++; + } + + @Override + public Long set(int i, Long o) { + if (o == null) { + return null; + } + return this.set(i, o.longValue()); + } + + public long set(int i, long o) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + long response = elements[i]; + elements[i] = o; + return response; + } + + @Override + public Long remove(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + long result = elements[i]; + --size; + System.arraycopy(elements, i + 1, elements, i, (size - i)); + return result; + } + + @Override + public Long peek() { + return (size < elements.length) ? elements[size] : null; + } + + @Override + protected void swap(final int index1, final int index2) { + long tmp = elements[index1]; + elements[index1] = elements[index2]; + elements[index2] = tmp; + } + } + + public static class BooleanArray extends GenericData.AbstractArray { + private static final byte[] EMPTY = new byte[0]; + + private byte[] elements = EMPTY; + + public BooleanArray(int capacity, Schema schema) { + super(schema); + if (!Schema.Type.BOOLEAN.equals(schema.getElementType().getType())) + throw new AvroRuntimeException("Not a boolean array schema: " + schema); + if (capacity != 0) + elements = new byte[1 + (capacity / Byte.SIZE)]; + } + + public BooleanArray(Schema schema, Collection c) { + super(schema); + + if (c != null) { + elements = new byte[1 + (c.size() / 8)]; + if (c instanceof BooleanArray) { + BooleanArray other = (BooleanArray) c; + this.size = other.size; + System.arraycopy(other.elements, 0, this.elements, 0, other.elements.length); + } else { + addAll(c); + } + } + } + + @Override + public void clear() { + size = 0; + } + + @Override + public Boolean get(int i) { + return this.getBoolean(i); + } + + /** + * Direct primitive int access. + * + * @param i : index. + * @return value at index. + */ + public boolean getBoolean(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + return (elements[i / 8] & (1 << (i % 8))) > 0; + } + + @Override + public boolean add(final Boolean o) { + if (o == null) { + return false; + } + return this.add(o.booleanValue()); + } + + public boolean add(final boolean o) { + if (this.size == elements.length * 8) { + final int newLength = elements.length + (elements.length >> 1) + 1; + elements = Arrays.copyOf(elements, newLength); + } + this.size++; + this.set(this.size - 1, o); + return true; + } + + @Override + public void add(int location, Boolean o) { + if (o == null) { + return; + } + this.add(location, o.booleanValue()); + } + + public void add(int location, boolean o) { + if (location > size || location < 0) { + throw new IndexOutOfBoundsException("Index " + location + " out of bounds."); + } + if (size == elements.length * 8) { + // Increase size by 1.5x + 1 + final int newLength = elements.length + (elements.length >> 1) + 1; + elements = Arrays.copyOf(elements, newLength); + } + size++; + for (int index = this.size / 8; index > (location / 8); index--) { + elements[index] <<= 1; + if (index > 0 && (elements[index - 1] & (1 << Byte.SIZE)) > 0) { + elements[index] |= 1; + } + } + byte pos = (byte) (1 << (location % Byte.SIZE)); + byte highbits = (byte) ~(pos + (pos - 1)); + byte lowbits = (byte) (pos - 1); + byte currentHigh = (byte) ((elements[location / 8] & highbits) << 1); + byte currentLow = (byte) (elements[location / 8] & lowbits); + if (o) { + elements[location / 8] = (byte) (currentHigh | currentLow | pos); + } else { + elements[location / 8] = (byte) (currentHigh | currentLow); + } + + } + + @Override + public Boolean set(int i, Boolean o) { + if (o == null) { + return null; + } + return this.set(i, o.booleanValue()); + } + + public boolean set(int i, boolean o) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + boolean response = (elements[i / 8] & (1 << (i % 8))) > 0; + if (o) { + elements[i / 8] |= 1 << (i % 8); + } else { + elements[i / 8] &= 0xFF - (1 << (i % 8)); + } + return response; + } + + @Override + public Boolean remove(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + boolean result = (elements[(i / 8)] & (1 << (i % 8))) > 0; + --size; + + byte memo = 0; + if ((i / 8) + 1 < elements.length) { + memo = (byte) ((1 & (elements[(i / 8) + 1])) << 7); + } + for (int index = (i / 8) + 1; index <= (size / 8); index++) { + elements[index] = (byte) ((elements[index] & 0xff) >>> 1); + if (index + 1 < elements.length && (elements[index + 1] & 1) == 1) { + elements[index] |= 1 << (Byte.SIZE - 1); + } + } + // 87654321 => 8764321 + byte start = (byte) ((1 << ((i + 1) % 8)) - 1); + byte end = (byte) ~start; + elements[i / 8] = (byte) ((((start & 0xff) >>> 1) & elements[i / 8]) // 1234 + | (end & (elements[i / 8] >> 1)) // 876 + | memo); + + return result; + } + + @Override + public Boolean peek() { + return (size < elements.length * Byte.SIZE) ? (elements[(size / 8)] & (1 << (size % 8))) > 0 : null; + } + + @Override + protected void swap(final int index1, final int index2) { + boolean tmp = this.get(index1); + this.set(index1, this.get(index2)); + this.set(index2, tmp); + } + } + + public static class FloatArray extends GenericData.AbstractArray { + private static final float[] EMPTY = new float[0]; + + private float[] elements = EMPTY; + + public FloatArray(int capacity, Schema schema) { + super(schema); + if (!Schema.Type.FLOAT.equals(schema.getElementType().getType())) + throw new AvroRuntimeException("Not a float array schema: " + schema); + if (capacity != 0) + elements = new float[capacity]; + } + + public FloatArray(Schema schema, Collection c) { + super(schema); + if (c != null) { + elements = new float[c.size()]; + addAll(c); + } + } + + @Override + public void clear() { + size = 0; + } + + @Override + public Float get(int i) { + return this.getFloat(i); + } + + /** + * Direct primitive int access. + * + * @param i : index. + * @return value at index. + */ + public float getFloat(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + return elements[i]; + } + + @Override + public void add(int location, Float o) { + if (o == null) { + return; + } + this.add(location, o.floatValue()); + } + + public void add(int location, float o) { + if (location > size || location < 0) { + throw new IndexOutOfBoundsException("Index " + location + " out of bounds."); + } + if (size == elements.length) { + // Increase size by 1.5x + 1 + final int newSize = size + (size >> 1) + 1; + elements = Arrays.copyOf(elements, newSize); + } + System.arraycopy(elements, location, elements, location + 1, size - location); + elements[location] = o; + size++; + } + + @Override + public Float set(int i, Float o) { + if (o == null) { + return null; + } + return this.set(i, o.floatValue()); + } + + public float set(int i, float o) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + float response = elements[i]; + elements[i] = o; + return response; + } + + @Override + public Float remove(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + float result = elements[i]; + --size; + System.arraycopy(elements, i + 1, elements, i, (size - i)); + return result; + } + + @Override + public Float peek() { + return (size < elements.length) ? elements[size] : null; + } + + @Override + protected void swap(final int index1, final int index2) { + float tmp = this.get(index1); + this.set(index1, this.get(index2)); + this.set(index2, tmp); + } + } + + public static class DoubleArray extends GenericData.AbstractArray { + private static final double[] EMPTY = new double[0]; + + private double[] elements = EMPTY; + + public DoubleArray(int capacity, Schema schema) { + super(schema); + if (!Schema.Type.DOUBLE.equals(schema.getElementType().getType())) + throw new AvroRuntimeException("Not a double array schema: " + schema); + if (capacity != 0) + elements = new double[capacity]; + } + + public DoubleArray(Schema schema, Collection c) { + super(schema); + if (c != null) { + elements = new double[c.size()]; + addAll(c); + } + } + + @Override + public void clear() { + size = 0; + } + + @Override + public Double get(int i) { + return this.getDouble(i); + } + + /** + * Direct primitive int access. + * + * @param i : index. + * @return value at index. + */ + public double getDouble(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + return elements[i]; + } + + @Override + public void add(int location, Double o) { + if (o == null) { + return; + } + this.add(location, o.floatValue()); + } + + public void add(int location, double o) { + if (location > size || location < 0) { + throw new IndexOutOfBoundsException("Index " + location + " out of bounds."); + } + if (size == elements.length) { + // Increase size by 1.5x + 1 + final int newSize = size + (size >> 1) + 1; + elements = Arrays.copyOf(elements, newSize); + } + System.arraycopy(elements, location, elements, location + 1, size - location); + elements[location] = o; + size++; + } + + @Override + public Double set(int i, Double o) { + if (o == null) { + return null; + } + return this.set(i, o.floatValue()); + } + + public double set(int i, double o) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + double response = elements[i]; + elements[i] = o; + return response; + } + + @Override + public Double remove(int i) { + if (i >= size) + throw new IndexOutOfBoundsException("Index " + i + " out of bounds."); + double result = elements[i]; + --size; + System.arraycopy(elements, i + 1, elements, i, (size - i)); + return result; + } + + @Override + public Double peek() { + return (size < elements.length) ? elements[size] : null; + } + + @Override + protected void swap(final int index1, final int index2) { + double tmp = this.get(index1); + this.set(index1, this.get(index2)); + this.set(index2, tmp); + } + } + +} diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java index 051563abaef..3fa675d793a 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/BinaryDecoder.java @@ -26,8 +26,8 @@ import org.apache.avro.AvroRuntimeException; import org.apache.avro.InvalidNumberEncodingException; +import org.apache.avro.SystemLimitException; import org.apache.avro.util.Utf8; -import org.slf4j.LoggerFactory; /** * An {@link Decoder} for binary-format data. @@ -39,27 +39,20 @@ * can be accessed by inputStream().remaining(), if the BinaryDecoder is not * 'direct'. *

- * To prevent this class from making large allocations when handling potentially - * pathological input data, set Java properties - * org.apache.avro.limits.string.maxLength and - * org.apache.avro.limits.bytes.maxLength before instantiating this - * class to limit the maximum sizes of string and bytes types - * handled. The default is to permit sizes up to Java's maximum array length. * * @see Encoder + * @see SystemLimitException */ public class BinaryDecoder extends Decoder { /** - * The maximum size of array to allocate. Some VMs reserve some header words in - * an array. Attempts to allocate larger arrays may result in OutOfMemoryError: - * Requested array size exceeds VM limit + * When reading a collection (MAP or ARRAY), this keeps track of the number of + * elements to ensure that the + * {@link SystemLimitException#checkMaxCollectionLength} constraint is + * respected. */ - static final long MAX_ARRAY_SIZE = (long) Integer.MAX_VALUE - 8L; - - private static final String MAX_BYTES_LENGTH_PROPERTY = "org.apache.avro.limits.bytes.maxLength"; - protected final int maxBytesLength; + private long collectionCount = 0L; private ByteSource source = null; // we keep the buffer and its state variables in this class and not in a @@ -99,17 +92,6 @@ void clearBuf() { /** protected constructor for child classes */ protected BinaryDecoder() { super(); - String o = System.getProperty(MAX_BYTES_LENGTH_PROPERTY); - int i = Integer.MAX_VALUE; - if (o != null) { - try { - i = Integer.parseUnsignedInt(o); - } catch (NumberFormatException nfe) { - LoggerFactory.getLogger(BinaryDecoder.class) - .warn("Could not parse property " + MAX_BYTES_LENGTH_PROPERTY + ": " + o, nfe); - } - } - maxBytesLength = i; } BinaryDecoder(InputStream in, int bufferSize) { @@ -300,17 +282,11 @@ public double readDouble() throws IOException { @Override public Utf8 readString(Utf8 old) throws IOException { - long length = readLong(); - if (length > MAX_ARRAY_SIZE) { - throw new UnsupportedOperationException("Cannot read strings longer than " + MAX_ARRAY_SIZE + " bytes"); - } - if (length < 0L) { - throw new AvroRuntimeException("Malformed data. Length is negative: " + length); - } + int length = SystemLimitException.checkMaxStringLength(readLong()); Utf8 result = (old != null ? old : new Utf8()); - result.setByteLength((int) length); - if (0L != length) { - doReadBytes(result.getBytes(), 0, (int) length); + result.setByteLength(length); + if (0 != length) { + doReadBytes(result.getBytes(), 0, length); } return result; } @@ -329,25 +305,16 @@ public void skipString() throws IOException { @Override public ByteBuffer readBytes(ByteBuffer old) throws IOException { - int length = readInt(); - if (length > MAX_ARRAY_SIZE) { - throw new UnsupportedOperationException("Cannot read arrays longer than " + MAX_ARRAY_SIZE + " bytes"); - } - if (length > maxBytesLength) { - throw new AvroRuntimeException("Bytes length " + length + " exceeds maximum allowed"); - } - if (length < 0L) { - throw new AvroRuntimeException("Malformed data. Length is negative: " + length); - } + int length = SystemLimitException.checkMaxBytesLength(readLong()); final ByteBuffer result; if (old != null && length <= old.capacity()) { result = old; ((Buffer) result).clear(); } else { - result = ByteBuffer.allocate(length); + result = ByteBuffer.allocate((int) length); } - doReadBytes(result.array(), result.position(), length); - ((Buffer) result).limit(length); + doReadBytes(result.array(), result.position(), (int) length); + ((Buffer) result).limit((int) length); return result; } @@ -443,7 +410,6 @@ protected long doReadItemCount() throws IOException { * @return Zero if there are no more items to skip and end of array/map is * reached. Positive number if some items are found that cannot be * skipped and the client needs to skip them individually. - * * @throws IOException If the first byte cannot be read for any reason other * than the end of the file, if the input stream has been * closed, or if some other I/O error occurs. @@ -460,12 +426,15 @@ private long doSkipItems() throws IOException { @Override public long readArrayStart() throws IOException { - return doReadItemCount(); + collectionCount = SystemLimitException.checkMaxCollectionLength(0L, doReadItemCount()); + return collectionCount; } @Override public long arrayNext() throws IOException { - return doReadItemCount(); + long length = doReadItemCount(); + collectionCount = SystemLimitException.checkMaxCollectionLength(collectionCount, length); + return length; } @Override @@ -475,12 +444,15 @@ public long skipArray() throws IOException { @Override public long readMapStart() throws IOException { - return doReadItemCount(); + collectionCount = SystemLimitException.checkMaxCollectionLength(0L, doReadItemCount()); + return collectionCount; } @Override public long mapNext() throws IOException { - return doReadItemCount(); + long length = doReadItemCount(); + collectionCount = SystemLimitException.checkMaxCollectionLength(collectionCount, length); + return length; } @Override @@ -932,7 +904,6 @@ public void close() throws IOException { /** * This byte source is special. It will avoid copying data by using the source's * byte[] as a buffer in the decoder. - * */ private static class ByteArrayByteSource extends ByteSource { private static final int MIN_SIZE = 16; diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryDecoder.java index d9bbe93534c..6f07b13eee2 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryDecoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/DirectBinaryDecoder.java @@ -22,8 +22,8 @@ import java.io.InputStream; import java.nio.ByteBuffer; -import org.apache.avro.AvroRuntimeException; import org.apache.avro.InvalidNumberEncodingException; +import org.apache.avro.SystemLimitException; import org.apache.avro.util.ByteBufferInputStream; /** @@ -40,30 +40,17 @@ class DirectBinaryDecoder extends BinaryDecoder { private class ByteReader { public ByteBuffer read(ByteBuffer old, int length) throws IOException { - this.checkLength(length); final ByteBuffer result; if (old != null && length <= old.capacity()) { result = old; result.clear(); } else { - result = ByteBuffer.allocate(length); + result = ByteBuffer.allocate((int) length); } - doReadBytes(result.array(), result.position(), length); - result.limit(length); + doReadBytes(result.array(), result.position(), (int) length); + result.limit((int) length); return result; } - - protected final void checkLength(int length) { - if (length < 0L) { - throw new AvroRuntimeException("Malformed data. Length is negative: " + length); - } - if (length > MAX_ARRAY_SIZE) { - throw new UnsupportedOperationException("Cannot read arrays longer than " + MAX_ARRAY_SIZE + " bytes"); - } - if (length > maxBytesLength) { - throw new AvroRuntimeException("Bytes length " + length + " exceeds maximum allowed"); - } - } } private class ReuseByteReader extends ByteReader { @@ -75,14 +62,12 @@ public ReuseByteReader(ByteBufferInputStream bbi) { @Override public ByteBuffer read(ByteBuffer old, int length) throws IOException { - this.checkLength(length); if (old != null) { return super.read(old, length); } else { - return bbi.readBuffer(length); + return bbi.readBuffer((int) length); } } - } private ByteReader byteReader; @@ -170,8 +155,8 @@ public double readDouble() throws IOException { @Override public ByteBuffer readBytes(ByteBuffer old) throws IOException { - int length = readInt(); - return byteReader.read(old, length); + long length = readLong(); + return byteReader.read(old, SystemLimitException.checkMaxBytesLength(length)); } @Override diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/EncoderFactory.java b/lang/java/avro/src/main/java/org/apache/avro/io/EncoderFactory.java index 0188a29637d..055ef9541d9 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/EncoderFactory.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/EncoderFactory.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.io.OutputStream; +import java.util.EnumSet; import org.apache.avro.AvroRuntimeException; import org.apache.avro.Schema; @@ -71,11 +72,11 @@ public static EncoderFactory get() { * likely to improve performance but may be useful for the * downstream OutputStream. * @return This factory, to enable method chaining: - * + * *

    *         EncoderFactory factory = new EncoderFactory().configureBufferSize(4096);
    *         
- * + * * @see #binaryEncoder(OutputStream, BinaryEncoder) */ public EncoderFactory configureBufferSize(int size) { @@ -90,7 +91,7 @@ public EncoderFactory configureBufferSize(int size) { /** * Returns this factory's configured default buffer size. Used when creating * Encoder instances that buffer writes. - * + * * @see #configureBufferSize(int) * @see #binaryEncoder(OutputStream, BinaryEncoder) * @return The preferred buffer size, in bytes. @@ -109,11 +110,11 @@ public int getBufferSize() { * outside this range are set to the nearest value in the range. The * encoder will require at least this amount of memory. * @return This factory, to enable method chaining: - * + * *
    *         EncoderFactory factory = new EncoderFactory().configureBlockSize(8000);
    *         
- * + * * @see #blockingBinaryEncoder(OutputStream, BinaryEncoder) */ public EncoderFactory configureBlockSize(int size) { @@ -131,7 +132,7 @@ public EncoderFactory configureBlockSize(int size) { * #blockingBinaryEncoder(OutputStream, BinaryEncoder) will have block buffers * of this size. *

- * + * * @see #configureBlockSize(int) * @see #blockingBinaryEncoder(OutputStream, BinaryEncoder) * @return The preferred block size, in bytes. @@ -297,6 +298,38 @@ public JsonEncoder jsonEncoder(Schema schema, OutputStream out, boolean pretty) return new JsonEncoder(schema, out, pretty); } + /** + * Creates a {@link JsonEncoder} using the OutputStream provided for writing + * data conforming to the Schema provided with optional pretty printing. + *

+ * {@link JsonEncoder} buffers its output. Data may not appear on the underlying + * OutputStream until {@link Encoder#flush()} is called. + *

+ * {@link JsonEncoder} is not thread-safe. + * + * @param schema The Schema for data written to this JsonEncoder. Cannot be + * null. + * @param out The OutputStream to write to. Cannot be null. + * @param pretty Pretty print encoding. + * @param autoflush Whether to Automatically flush the data to storage, default + * is true controls the underlying FLUSH_PASSED_TO_STREAM + * feature of JsonGenerator + * @return A JsonEncoder configured with out, schema and + * pretty + * @throws IOException + */ + public JsonEncoder jsonEncoder(Schema schema, OutputStream out, boolean pretty, boolean autoflush) + throws IOException { + EnumSet options = EnumSet.noneOf(JsonEncoder.JsonOptions.class); + if (pretty) { + options.add(JsonEncoder.JsonOptions.Pretty); + } + if (!autoflush) { + options.add(JsonEncoder.JsonOptions.NoFlushStream); + } + return new JsonEncoder(schema, out, options); + } + /** * Creates a {@link JsonEncoder} using the {@link JsonGenerator} provided for * output of data conforming to the Schema provided. diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java index c1c38511ab4..2ad496a5b87 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/JsonDecoder.java @@ -86,7 +86,7 @@ private static Symbol getSymbol(Schema schema) { *

* Otherwise, this JsonDecoder will reset its state and then reconfigure its * input. - * + * * @param in The InputStream to read from. Cannot be null. * @throws IOException * @throws NullPointerException if {@code in} is {@code null} @@ -109,7 +109,7 @@ public JsonDecoder configure(InputStream in) throws IOException { *

* Otherwise, this JsonDecoder will reset its state and then reconfigure its * input. - * + * * @param in The String to read from. Cannot be null. * @throws IOException * @throws NullPointerException if {@code in} is {@code null} @@ -157,25 +157,39 @@ public boolean readBoolean() throws IOException { @Override public int readInt() throws IOException { advance(Symbol.INT); - if (in.getCurrentToken().isNumeric()) { + if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_INT) { int result = in.getIntValue(); in.nextToken(); return result; - } else { - throw error("int"); } + if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_FLOAT) { + float value = in.getFloatValue(); + if (Math.abs(value - Math.round(value)) <= Float.MIN_VALUE) { + int result = Math.round(value); + in.nextToken(); + return result; + } + } + throw error("int"); } @Override public long readLong() throws IOException { advance(Symbol.LONG); - if (in.getCurrentToken().isNumeric()) { + if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_INT) { long result = in.getLongValue(); in.nextToken(); return result; - } else { - throw error("long"); } + if (in.getCurrentToken() == JsonToken.VALUE_NUMBER_FLOAT) { + double value = in.getDoubleValue(); + if (Math.abs(value - Math.round(value)) <= Double.MIN_VALUE) { + long result = Math.round(value); + in.nextToken(); + return result; + } + } + throw error("long"); } @Override diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/JsonEncoder.java b/lang/java/avro/src/main/java/org/apache/avro/io/JsonEncoder.java index 71cc690b8a4..7e3a67eb6db 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/JsonEncoder.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/JsonEncoder.java @@ -22,7 +22,9 @@ import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.BitSet; +import java.util.EnumSet; import java.util.Objects; +import java.util.Set; import org.apache.avro.AvroTypeException; import org.apache.avro.Schema; @@ -33,6 +35,7 @@ import com.fasterxml.jackson.core.JsonEncoding; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.PrettyPrinter; import com.fasterxml.jackson.core.util.DefaultPrettyPrinter; import com.fasterxml.jackson.core.util.MinimalPrettyPrinter; @@ -58,11 +61,15 @@ public class JsonEncoder extends ParsingEncoder implements Parser.ActionHandler protected BitSet isEmpty = new BitSet(); JsonEncoder(Schema sc, OutputStream out) throws IOException { - this(sc, getJsonGenerator(out, false)); + this(sc, getJsonGenerator(out, EnumSet.noneOf(JsonOptions.class))); } JsonEncoder(Schema sc, OutputStream out, boolean pretty) throws IOException { - this(sc, getJsonGenerator(out, pretty)); + this(sc, getJsonGenerator(out, pretty ? EnumSet.of(JsonOptions.Pretty) : EnumSet.noneOf(JsonOptions.class))); + } + + JsonEncoder(Schema sc, OutputStream out, Set options) throws IOException { + this(sc, getJsonGenerator(out, options)); } JsonEncoder(Schema sc, JsonGenerator out) throws IOException { @@ -78,24 +85,28 @@ public void flush() throws IOException { } } + enum JsonOptions { + Pretty, + + // Prevent underlying outputstream to be flush for optimisation purpose. + NoFlushStream + } + // by default, one object per line. // with pretty option use default pretty printer with root line separator. - private static JsonGenerator getJsonGenerator(OutputStream out, boolean pretty) throws IOException { + private static JsonGenerator getJsonGenerator(OutputStream out, Set options) throws IOException { Objects.requireNonNull(out, "OutputStream cannot be null"); JsonGenerator g = new JsonFactory().createGenerator(out, JsonEncoding.UTF8); - if (pretty) { - DefaultPrettyPrinter pp = new DefaultPrettyPrinter() { - @Override - public void writeRootValueSeparator(JsonGenerator jg) throws IOException { - jg.writeRaw(LINE_SEPARATOR); - } - }; - g.setPrettyPrinter(pp); + if (options.contains(JsonOptions.NoFlushStream)) { + g = g.configure(JsonGenerator.Feature.FLUSH_PASSED_TO_STREAM, false); + } + final PrettyPrinter pp; + if (options.contains(JsonOptions.Pretty)) { + pp = new DefaultPrettyPrinter(LINE_SEPARATOR); } else { - MinimalPrettyPrinter pp = new MinimalPrettyPrinter(); - pp.setRootValueSeparator(LINE_SEPARATOR); - g.setPrettyPrinter(pp); + pp = new MinimalPrettyPrinter(LINE_SEPARATOR); } + g.setPrettyPrinter(pp); return g; } @@ -122,7 +133,29 @@ public void setIncludeNamespace(final boolean includeNamespace) { * @return this JsonEncoder */ public JsonEncoder configure(OutputStream out) throws IOException { - this.configure(getJsonGenerator(out, false)); + return this.configure(out, true); + } + + /** + * Reconfigures this JsonEncoder to use the output stream provided. + *

+ * If the OutputStream provided is null, a NullPointerException is thrown. + *

+ * Otherwise, this JsonEncoder will flush its current output and then + * reconfigure its output to use a default UTF8 JsonGenerator that writes to the + * provided OutputStream. + * + * @param out The OutputStream to direct output to. Cannot be null. + * @throws IOException + * @throws NullPointerException if {@code out} is {@code null} + * @return this JsonEncoder + */ + public JsonEncoder configure(OutputStream out, boolean autoflush) throws IOException { + EnumSet jsonOptions = EnumSet.noneOf(JsonOptions.class); + if (!autoflush) { + jsonOptions.add(JsonOptions.NoFlushStream); + } + this.configure(getJsonGenerator(out, jsonOptions)); return this; } diff --git a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java index 77fbe1c7ad0..f1c9d139e7d 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java +++ b/lang/java/avro/src/main/java/org/apache/avro/io/parsing/ResolvingGrammarGenerator.java @@ -292,8 +292,16 @@ public static void encode(Encoder e, Schema s, JsonNode n) throws IOException { e.writeMapEnd(); break; case UNION: - e.writeIndex(0); - encode(e, s.getTypes().get(0), n); + int correctIndex = 0; + List innerTypes = s.getTypes(); + while (correctIndex < innerTypes.size() && !isCompatible(innerTypes.get(correctIndex).getType(), n)) { + correctIndex++; + } + if (correctIndex >= innerTypes.size()) { + throw new AvroTypeException("Not compatible default value for union: " + n); + } + e.writeIndex(correctIndex); + encode(e, innerTypes.get(correctIndex), n); break; case FIXED: if (!n.isTextual()) @@ -346,4 +354,29 @@ public static void encode(Encoder e, Schema s, JsonNode n) throws IOException { break; } } + + private static boolean isCompatible(Schema.Type stype, JsonNode value) { + switch (stype) { + case RECORD: + case ENUM: + case ARRAY: + case MAP: + case UNION: + return true; + case FIXED: + case STRING: + case BYTES: + return value.isTextual(); + case INT: + case LONG: + case FLOAT: + case DOUBLE: + return value.isNumber(); + case BOOLEAN: + return value.isBoolean(); + case NULL: + return value.isNull(); + } + return true; + } } diff --git a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java index ec490979477..347490679ee 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java +++ b/lang/java/avro/src/main/java/org/apache/avro/reflect/ReflectData.java @@ -617,11 +617,8 @@ protected Object createSchemaDefaultValue(Type type, Field field, Schema fieldSc AvroDefault defaultAnnotation = field.getAnnotation(AvroDefault.class); defaultValue = (defaultAnnotation == null) ? null : Schema.parseJsonToObject(defaultAnnotation.value()); - if (defaultValue == null && fieldSchema.getType() == Schema.Type.UNION) { - Schema defaultType = fieldSchema.getTypes().get(0); - if (defaultType.getType() == Schema.Type.NULL) { - defaultValue = JsonProperties.NULL_VALUE; - } + if (defaultValue == null && fieldSchema.isNullable()) { + defaultValue = JsonProperties.NULL_VALUE; } return defaultValue; } @@ -756,7 +753,7 @@ protected Schema createSchema(Type type, Map names) { AvroMeta[] metadata = field.getAnnotationsByType(AvroMeta.class); // add metadata for (AvroMeta meta : metadata) { - if (recordField.getObjectProps().containsKey(meta.key())) { + if (recordField.propsContainsKey(meta.key())) { throw new AvroTypeException("Duplicate field prop key: " + meta.key()); } recordField.addProp(meta.key(), meta.value()); @@ -775,7 +772,7 @@ protected Schema createSchema(Type type, Map names) { schema.setFields(fields); AvroMeta[] metadata = c.getAnnotationsByType(AvroMeta.class); for (AvroMeta meta : metadata) { - if (schema.getObjectProps().containsKey(meta.key())) { + if (schema.propsContainsKey(meta.key())) { throw new AvroTypeException("Duplicate type prop key: " + meta.key()); } schema.addProp(meta.key(), meta.value()); diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java index f54b6e2062b..9238fd78c65 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java @@ -24,9 +24,8 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; -import org.apache.avro.AvroRuntimeException; +import org.apache.avro.SystemLimitException; import org.apache.avro.io.BinaryData; -import org.slf4j.LoggerFactory; /** * A Utf8 string. Unlike {@link String}, instances are mutable. This is more @@ -34,22 +33,8 @@ * as a single instance may be reused. */ public class Utf8 implements Comparable, CharSequence, Externalizable { - private static final String MAX_LENGTH_PROPERTY = "org.apache.avro.limits.string.maxLength"; - private static final int MAX_LENGTH; - private static final byte[] EMPTY = new byte[0]; - static { - String o = System.getProperty(MAX_LENGTH_PROPERTY); - int i = Integer.MAX_VALUE; - if (o != null) { - try { - i = Integer.parseUnsignedInt(o); - } catch (NumberFormatException nfe) { - LoggerFactory.getLogger(Utf8.class).warn("Could not parse property " + MAX_LENGTH_PROPERTY + ": " + o, nfe); - } - } - MAX_LENGTH = i; - } + private static final byte[] EMPTY = new byte[0]; private byte[] bytes; private int hash; @@ -63,7 +48,7 @@ public Utf8() { public Utf8(String string) { byte[] bytes = getBytesFor(string); int length = bytes.length; - checkLength(length); + SystemLimitException.checkMaxStringLength(length); this.bytes = bytes; this.length = length; this.string = string; @@ -78,7 +63,7 @@ public Utf8(Utf8 other) { public Utf8(byte[] bytes) { int length = bytes.length; - checkLength(length); + SystemLimitException.checkMaxStringLength(length); this.bytes = bytes; this.length = length; } @@ -121,7 +106,7 @@ public Utf8 setLength(int newLength) { * length does not change, as this also clears the cached String. */ public Utf8 setByteLength(int newLength) { - checkLength(newLength); + SystemLimitException.checkMaxStringLength(newLength); if (this.bytes.length < newLength) { this.bytes = Arrays.copyOf(this.bytes, newLength); } @@ -135,7 +120,7 @@ public Utf8 setByteLength(int newLength) { public Utf8 set(String string) { byte[] bytes = getBytesFor(string); int length = bytes.length; - checkLength(length); + SystemLimitException.checkMaxStringLength(length); this.bytes = bytes; this.length = length; this.string = string; @@ -215,12 +200,6 @@ public CharSequence subSequence(int start, int end) { return toString().subSequence(start, end); } - private static void checkLength(int length) { - if (length > MAX_LENGTH) { - throw new AvroRuntimeException("String length " + length + " exceeds maximum allowed"); - } - } - /** Gets the UTF-8 bytes for a String */ public static byte[] getBytesFor(String str) { return str.getBytes(StandardCharsets.UTF_8); diff --git a/lang/java/avro/src/main/java/org/apache/avro/util/WeakIdentityHashMap.java b/lang/java/avro/src/main/java/org/apache/avro/util/WeakIdentityHashMap.java index a57cb49ac13..565d8e7ed36 100644 --- a/lang/java/avro/src/main/java/org/apache/avro/util/WeakIdentityHashMap.java +++ b/lang/java/avro/src/main/java/org/apache/avro/util/WeakIdentityHashMap.java @@ -22,10 +22,10 @@ import java.lang.ref.WeakReference; import java.util.Collection; import java.util.Collections; -import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; /** * Implements a combination of WeakHashMap and IdentityHashMap. Useful for @@ -41,7 +41,7 @@ */ public class WeakIdentityHashMap implements Map { private final ReferenceQueue queue = new ReferenceQueue<>(); - private Map backingStore = new HashMap<>(); + private Map backingStore = new ConcurrentHashMap<>(); public WeakIdentityHashMap() { } diff --git a/lang/java/avro/src/test/java/org/apache/avro/CustomType.java b/lang/java/avro/src/test/java/org/apache/avro/CustomType.java new file mode 100644 index 00000000000..140ac901b0b --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/CustomType.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro; + +import java.util.Objects; + +public final class CustomType { + private final String name; + + public CustomType(CharSequence name) { + this.name = name.toString(); + } + + public String getName() { + return name; + } + + @Override + public int hashCode() { + return Objects.hashCode(name); + } + + @Override + public boolean equals(Object obj) { + return obj instanceof CustomType && name.equals(((CustomType) obj).name); + } + + @Override + public String toString() { + return "CustomType{name='" + name + "'}"; + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/CustomTypeConverter.java b/lang/java/avro/src/test/java/org/apache/avro/CustomTypeConverter.java new file mode 100644 index 00000000000..de8fea02ca4 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/CustomTypeConverter.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro; + +public class CustomTypeConverter extends Conversion { + private static final CustomTypeLogicalTypeFactory logicalTypeFactory = new CustomTypeLogicalTypeFactory(); + + @Override + public Class getConvertedType() { + return CustomType.class; + } + + @Override + public String getLogicalTypeName() { + return logicalTypeFactory.getTypeName(); + } + + @Override + public Schema getRecommendedSchema() { + return Schema.create(Schema.Type.STRING); + } + + @Override + public CustomType fromCharSequence(CharSequence value, Schema schema, LogicalType type) { + return new CustomType(value); + } + + @Override + public CharSequence toCharSequence(CustomType value, Schema schema, LogicalType type) { + return value.getName(); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/DummyLogicalTypeFactory.java b/lang/java/avro/src/test/java/org/apache/avro/CustomTypeLogicalTypeFactory.java similarity index 86% rename from lang/java/avro/src/test/java/org/apache/avro/DummyLogicalTypeFactory.java rename to lang/java/avro/src/test/java/org/apache/avro/CustomTypeLogicalTypeFactory.java index 4957e376521..3e121e0242c 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/DummyLogicalTypeFactory.java +++ b/lang/java/avro/src/test/java/org/apache/avro/CustomTypeLogicalTypeFactory.java @@ -17,14 +17,14 @@ */ package org.apache.avro; -public class DummyLogicalTypeFactory implements LogicalTypes.LogicalTypeFactory { +public class CustomTypeLogicalTypeFactory implements LogicalTypes.LogicalTypeFactory { @Override public LogicalType fromSchema(Schema schema) { - return LogicalTypes.date(); + return new LogicalType(getTypeName()); } @Override public String getTypeName() { - return "service-example"; + return "custom"; } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java b/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java new file mode 100644 index 00000000000..6846c4434cf --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/SchemaNameValidatorTest.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.stream.Stream; + +class SchemaNameValidatorTest { + + @ParameterizedTest + @MethodSource("data") + void validator(Schema.NameValidator validator, String input, boolean expectedResult) { + Schema.NameValidator.Result result = validator.validate(input); + Assertions.assertEquals(expectedResult, result.isOK(), result.getErrors()); + } + + static Stream data() { + return Stream.of(Arguments.of(Schema.NameValidator.UTF_VALIDATOR, null, false), // null not accepted + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, null, false), // null not accepted + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "", false), // empty not accepted + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "", false), // empty not accepted + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "Hello world", false), // space not accepted + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "Hello world", false), // space not accepted + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "H&", false), // non letter or digit not accepted + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "H&", false), // non letter or digit not accepted + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "H=", false), // non letter or digit not accepted + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "H=", false), // non letter or digit not accepted + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "H]", false), // non letter or digit not accepted + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "H]", false), // non letter or digit not accepted + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "Hello_world", true), + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "Hello_world", true), + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "éàçô", true), // Accept accent + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "éàçô", false), // Not Accept accent + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "5éàçô", false), // can't start with number + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "5éàçô", false), // can't start with number + Arguments.of(Schema.NameValidator.UTF_VALIDATOR, "_Hello_world", true), + Arguments.of(Schema.NameValidator.STRICT_VALIDATOR, "_Hello_world", true)); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileConcat.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileConcat.java index f1267ab9788..1aeebcddad5 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileConcat.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileConcat.java @@ -17,60 +17,42 @@ */ package org.apache.avro; -import static org.junit.Assert.assertEquals; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - import org.apache.avro.file.CodecFactory; import org.apache.avro.file.DataFileReader; import org.apache.avro.file.DataFileWriter; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.util.RandomData; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.TemporaryFolder; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; + +import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -@RunWith(Parameterized.class) -public class TestDataFileConcat { - private static final Logger LOG = LoggerFactory.getLogger(TestDataFileConcat.class); - - @Rule - public TemporaryFolder DIR = new TemporaryFolder(); +import java.io.File; +import java.io.IOException; +import java.util.stream.Stream; - CodecFactory codec; - CodecFactory codec2; - boolean recompress; +import static org.junit.Assert.assertEquals; - public TestDataFileConcat(CodecFactory codec, CodecFactory codec2, Boolean recompress) { - this.codec = codec; - this.codec2 = codec2; - this.recompress = recompress; - LOG.info("Testing concatenating files, " + codec2 + " into " + codec + " with recompress=" + recompress); - } +public class TestDataFileConcat { + private static final Logger LOG = LoggerFactory.getLogger(TestDataFileConcat.class); - @Parameters - public static List codecs() { - List r = new ArrayList<>(); - r.add(new Object[] { null, null, false }); - r.add(new Object[] { null, null, true }); - r.add(new Object[] { CodecFactory.deflateCodec(1), CodecFactory.deflateCodec(6), false }); - r.add(new Object[] { CodecFactory.deflateCodec(1), CodecFactory.deflateCodec(6), true }); - r.add(new Object[] { CodecFactory.deflateCodec(3), CodecFactory.nullCodec(), false }); - r.add(new Object[] { CodecFactory.nullCodec(), CodecFactory.deflateCodec(6), false }); - r.add(new Object[] { CodecFactory.xzCodec(1), CodecFactory.xzCodec(2), false }); - r.add(new Object[] { CodecFactory.xzCodec(1), CodecFactory.xzCodec(2), true }); - r.add(new Object[] { CodecFactory.xzCodec(2), CodecFactory.nullCodec(), false }); - r.add(new Object[] { CodecFactory.nullCodec(), CodecFactory.xzCodec(2), false }); - return r; + @TempDir + public File DIR; + + public static Stream codecs() { + return Stream.of(Arguments.of(null, null, false), Arguments.of(null, null, true), + Arguments.of(CodecFactory.deflateCodec(1), CodecFactory.deflateCodec(6), false), + Arguments.of(CodecFactory.deflateCodec(1), CodecFactory.deflateCodec(6), true), + Arguments.of(CodecFactory.deflateCodec(3), CodecFactory.nullCodec(), false), + Arguments.of(CodecFactory.nullCodec(), CodecFactory.deflateCodec(6), false), + Arguments.of(CodecFactory.xzCodec(1), CodecFactory.xzCodec(2), false), + Arguments.of(CodecFactory.xzCodec(1), CodecFactory.xzCodec(2), true), + Arguments.of(CodecFactory.xzCodec(2), CodecFactory.nullCodec(), false), + Arguments.of(CodecFactory.nullCodec(), CodecFactory.xzCodec(2), false)); } private static final int COUNT = Integer.parseInt(System.getProperty("test.count", "200")); @@ -83,11 +65,12 @@ public static List codecs() { private static final Schema SCHEMA = new Schema.Parser().parse(SCHEMA_JSON); private File makeFile(String name) { - return new File(DIR.getRoot().getPath(), "test-" + name + ".avro"); + return new File(DIR, "test-" + name + ".avro"); } - @Test - public void testConcatenateFiles() throws IOException { + @ParameterizedTest + @MethodSource("codecs") + void concatenateFiles(CodecFactory codec, CodecFactory codec2, boolean recompress) throws IOException { System.out.println("SEED = " + SEED); System.out.println("COUNT = " + COUNT); for (int k = 0; k < 5; k++) { diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java index fb42d639ecb..a85b966409b 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestDataFileReader.java @@ -87,7 +87,7 @@ void throttledInputStream() throws IOException { // magic header check. This happens with throttled input stream, // where we read into buffer less bytes than requested. - Schema legacySchema = new Schema.Parser().setValidate(false).setValidateDefaults(false) + Schema legacySchema = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false) .parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\": " + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); File f = Files.createTempFile("testThrottledInputStream", ".avro").toFile(); @@ -146,7 +146,7 @@ void inputStreamEOF() throws IOException { // AVRO-2944 describes hanging/failure in reading Avro file with performing // magic header check. This potentially happens with a defective input stream // where a -1 value is unexpectedly returned from a read. - Schema legacySchema = new Schema.Parser().setValidate(false).setValidateDefaults(false) + Schema legacySchema = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false) .parse("{\"type\": \"record\", \"name\": \"TestSchema\", \"fields\": " + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); File f = Files.createTempFile("testInputStreamEOF", ".avro").toFile(); @@ -195,7 +195,7 @@ void ignoreSchemaValidationOnRead() throws IOException { // This schema has an accent in the name and the default for the field doesn't // match the first type in the union. A Java SDK in the past could create a file // containing this schema. - Schema legacySchema = new Schema.Parser().setValidate(false).setValidateDefaults(false) + Schema legacySchema = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).setValidateDefaults(false) .parse("{\"type\": \"record\", \"name\": \"InvalidAccëntWithInvalidNull\", \"fields\": " + "[ {\"name\": \"id\", \"type\": [\"long\", \"null\"], \"default\": null}]}"); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestFixed.java b/lang/java/avro/src/test/java/org/apache/avro/TestFixed.java index 4741e4c08c5..f35c62d7a2e 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestFixed.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestFixed.java @@ -18,11 +18,10 @@ package org.apache.avro; -import static org.junit.jupiter.api.Assertions.assertArrayEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; - import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.*; + public class TestFixed { @Test @@ -35,4 +34,16 @@ void fixedDefaultValueDrop() { assertArrayEquals(new byte[16], (byte[]) field.defaultVal()); } + @Test + void fixedLengthOutOfLimit() { + Exception ex = assertThrows(UnsupportedOperationException.class, + () -> Schema.createFixed("oversize", "doc", "space", Integer.MAX_VALUE)); + assertEquals(TestSystemLimitException.ERROR_VM_LIMIT_BYTES, ex.getMessage()); + } + + @Test + void fixedNegativeLength() { + Exception ex = assertThrows(AvroRuntimeException.class, () -> Schema.createFixed("negative", "doc", "space", -1)); + assertEquals(TestSystemLimitException.ERROR_NEGATIVE, ex.getMessage()); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java b/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java index 800772e7f1d..acc8899b21c 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestLogicalType.java @@ -288,9 +288,9 @@ void registerLogicalTypeWithFactoryNameNotProvided() { } @Test - void registerLogicalTypeFactoryByServiceLoader() { + public void testRegisterLogicalTypeFactoryByServiceLoader() { assertThat(LogicalTypes.getCustomRegisteredTypes(), - IsMapContaining.hasEntry(equalTo("service-example"), instanceOf(LogicalTypes.LogicalTypeFactory.class))); + IsMapContaining.hasEntry(equalTo("custom"), instanceOf(LogicalTypes.LogicalTypeFactory.class))); } public static void assertEqualsTrue(String message, Object o1, Object o2) { diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestProtocol.java b/lang/java/avro/src/test/java/org/apache/avro/TestProtocol.java index af7c6a0f6c3..f7859e1c8e3 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestProtocol.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestProtocol.java @@ -17,16 +17,104 @@ */ package org.apache.avro; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.IndexedRecord; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.io.JsonEncoder; + +import com.fasterxml.jackson.databind.JsonNode; + import static java.util.Collections.emptyList; import static java.util.Collections.emptyMap; import static java.util.Collections.singletonList; import static java.util.Collections.singletonMap; import static org.junit.jupiter.api.Assertions.*; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.util.Collections; + import org.junit.jupiter.api.Test; public class TestProtocol { + @Test + public void parse() throws IOException { + File fic = new File("../../../share/test/schemas/namespace.avpr"); + Protocol protocol = Protocol.parse(fic); + assertNotNull(protocol); + assertEquals("TestNamespace", protocol.getName()); + } + + /** + * record type 'User' contains a field of type 'Status', which contains a field + * of type 'User'. + */ + @Test + public void crossProtocol() { + String userStatus = "{ \"protocol\" : \"p1\", " + "\"types\": [" + + "{\"name\": \"User\", \"type\": \"record\", \"fields\": [{\"name\": \"current_status\", \"type\": \"Status\"}]},\n" + + "\n" + + "{\"name\": \"Status\", \"type\": \"record\", \"fields\": [{\"name\": \"author\", \"type\": \"User\"}]}" + + "]}"; + + Protocol protocol = Protocol.parse(userStatus); + Schema userSchema = protocol.getType("User"); + Schema statusSchema = protocol.getType("Status"); + assertSame(statusSchema, userSchema.getField("current_status").schema()); + assertSame(userSchema, statusSchema.getField("author").schema()); + + String parsingFormUser = SchemaNormalization.toParsingForm(userSchema); + assertEquals( + "{\"name\":\"User\",\"type\":\"record\",\"fields\":[{\"name\":\"current_status\",\"type\":{\"name\":\"Status\",\"type\":\"record\",\"fields\":[{\"name\":\"author\",\"type\":\"User\"}]}}]}", + parsingFormUser); + + String parsingFormStatus = SchemaNormalization.toParsingForm(statusSchema); + assertEquals( + "{\"name\":\"Status\",\"type\":\"record\",\"fields\":[{\"name\":\"author\",\"type\":{\"name\":\"User\",\"type\":\"record\",\"fields\":[{\"name\":\"current_status\",\"type\":\"Status\"}]}}]}", + parsingFormStatus); + } + + /** + * When one schema with a type used before it is defined, test normalization + * defined schema before it is used. + */ + @Test + void normalization() { + final String schema = "{\n" + " \"type\":\"record\", \"name\": \"Main\", " + " \"fields\":[\n" + + " { \"name\":\"f1\", \"type\":\"Sub\" },\n" // use Sub + + " { \"name\":\"f2\", " + " \"type\":{\n" + " \"type\":\"enum\", \"name\":\"Sub\",\n" // define + // Sub + + " \"symbols\":[\"OPEN\",\"CLOSE\"]\n" + " }\n" + " }\n" + " ]\n" + "}"; + Schema s = new Schema.Parser().parse(schema); + assertNotNull(s); + + String parsingForm = SchemaNormalization.toParsingForm(s); + assertEquals( + "{\"name\":\"Main\",\"type\":\"record\",\"fields\":[{\"name\":\"f1\",\"type\":{\"name\":\"Sub\",\"type\":\"enum\",\"symbols\":[\"OPEN\",\"CLOSE\"]}},{\"name\":\"f2\",\"type\":\"Sub\"}]}", + parsingForm); + } + + @Test + void namespaceAndNameRules() { + Protocol p1 = new Protocol("P", null, "foo"); + Protocol p2 = new Protocol("foo.P", null, null); + Protocol p3 = new Protocol("foo.P", null, "bar"); + assertEquals(p1.getName(), p2.getName()); + assertEquals(p1.getNamespace(), p2.getNamespace()); + assertEquals(p1.getName(), p3.getName()); + assertEquals(p1.getNamespace(), p3.getNamespace()); + + // The following situation is allowed, even if confusing, because the + // specification describes this algorithm without specifying that the resulting + // namespace mst be non-empty. + Protocol invalidName = new Protocol(".P", null, "ignored"); + assertNull(invalidName.getNamespace()); + assertEquals("P", invalidName.getName()); + } + @Test void propEquals() { Protocol p1 = new Protocol("P", null, "foo"); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java b/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java index 47cafcec189..89fedc75ca7 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestReadingWritingDataInEvolvedSchemas.java @@ -27,6 +27,8 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collection; +import java.util.stream.Stream; + import org.apache.avro.generic.GenericData; import org.apache.avro.generic.GenericData.EnumSymbol; import org.apache.avro.generic.GenericData.Record; @@ -38,24 +40,17 @@ import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; -import org.junit.Assert; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(Parameterized.class) + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; + public class TestReadingWritingDataInEvolvedSchemas { private static final String RECORD_A = "RecordA"; private static final String FIELD_A = "fieldA"; private static final char LATIN_SMALL_LETTER_O_WITH_DIARESIS = '\u00F6'; - @Rule - public ExpectedException expectedException = ExpectedException.none(); - private static final Schema DOUBLE_RECORD = SchemaBuilder.record(RECORD_A) // .fields() // .name(FIELD_A).type().doubleType().noDefault() // @@ -89,13 +84,18 @@ public class TestReadingWritingDataInEvolvedSchemas { .fields() // .name(FIELD_A).type().unionOf().stringType().and().bytesType().endUnion().noDefault() // .endRecord(); + + private static final Schema ENUM_AB = SchemaBuilder.enumeration("Enum1").symbols("A", "B"); + private static final Schema ENUM_AB_RECORD = SchemaBuilder.record(RECORD_A) // .fields() // - .name(FIELD_A).type().enumeration("Enum1").symbols("A", "B").noDefault() // + .name(FIELD_A).type(ENUM_AB).noDefault() // .endRecord(); + + private static final Schema ENUM_ABC = SchemaBuilder.enumeration("Enum1").symbols("A", "B", "C"); private static final Schema ENUM_ABC_RECORD = SchemaBuilder.record(RECORD_A) // .fields() // - .name(FIELD_A).type().enumeration("Enum1").symbols("A", "B", "C").noDefault() // + .name(FIELD_A).type(ENUM_ABC).noDefault() // .endRecord(); private static final Schema UNION_INT_RECORD = SchemaBuilder.record(RECORD_A) // .fields() // @@ -122,221 +122,235 @@ public class TestReadingWritingDataInEvolvedSchemas { .name(FIELD_A).type().unionOf().floatType().and().doubleType().endUnion().noDefault() // .endRecord(); - @Parameters(name = "encoder = {0}") - public static Collection data() { - return Arrays.asList(new EncoderType[][] { { EncoderType.BINARY }, { EncoderType.JSON } }); - } - - public TestReadingWritingDataInEvolvedSchemas(EncoderType encoderType) { - this.encoderType = encoderType; - } - - private final EncoderType encoderType; - enum EncoderType { BINARY, JSON } - @Test - public void doubleWrittenWithUnionSchemaIsConvertedToDoubleSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void doubleWrittenWithUnionSchemaIsConvertedToDoubleSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42.0); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(DOUBLE_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(DOUBLE_RECORD, writer, encoded, encoderType); assertEquals(42.0, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsConvertedToUnionLongFloatSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsConvertedToUnionLongFloatSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_LONG_FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_LONG_FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42L, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsConvertedToDoubleSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsConvertedToDoubleSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_DOUBLE_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_DOUBLE_RECORD, writer, encoded, encoderType); assertEquals(42.0, decoded.get(FIELD_A)); } - @Test - public void intWrittenWithUnionSchemaIsConvertedToDoubleSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void intWrittenWithUnionSchemaIsConvertedToDoubleSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_DOUBLE_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_DOUBLE_RECORD, writer, encoded, encoderType); assertEquals(42.0, decoded.get(FIELD_A)); } - @Test - public void intWrittenWithUnionSchemaIsReadableByFloatSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void intWrittenWithUnionSchemaIsReadableByFloatSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42.0f, decoded.get(FIELD_A)); } - @Test - public void intWrittenWithUnionSchemaIsReadableByFloatUnionSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void intWrittenWithUnionSchemaIsReadableByFloatUnionSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42.0f, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsReadableByFloatSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsReadableByFloatSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42.0f, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsReadableByFloatUnionSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsReadableByFloatUnionSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42.0f, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsConvertedToLongFloatUnionSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsConvertedToLongFloatUnionSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_LONG_FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_LONG_FLOAT_RECORD, writer, encoded, encoderType); assertEquals(42L, decoded.get(FIELD_A)); } - @Test - public void longWrittenWithUnionSchemaIsConvertedToFloatDoubleUnionSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsConvertedToFloatDoubleUnionSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_LONG_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(UNION_FLOAT_DOUBLE_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(UNION_FLOAT_DOUBLE_RECORD, writer, encoded, encoderType); assertEquals(42.0F, decoded.get(FIELD_A)); } - @Test - public void doubleWrittenWithUnionSchemaIsNotConvertedToFloatSchema() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Found double, expecting float"); + @ParameterizedTest + @EnumSource(EncoderType.class) + void doubleWrittenWithUnionSchemaIsNotConvertedToFloatSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42.0); - byte[] encoded = encodeGenericBlob(record); - decodeGenericBlob(FLOAT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + AvroTypeException exception = Assertions.assertThrows(AvroTypeException.class, + () -> decodeGenericBlob(FLOAT_RECORD, writer, encoded, encoderType)); + Assertions.assertEquals("Found double, expecting float", exception.getMessage()); } - @Test - public void floatWrittenWithUnionSchemaIsNotConvertedToLongSchema() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Found float, expecting long"); + @ParameterizedTest + @EnumSource(EncoderType.class) + void floatWrittenWithUnionSchemaIsNotConvertedToLongSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42.0f); - byte[] encoded = encodeGenericBlob(record); - decodeGenericBlob(LONG_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + AvroTypeException exception = Assertions.assertThrows(AvroTypeException.class, + () -> decodeGenericBlob(LONG_RECORD, writer, encoded, encoderType)); + Assertions.assertEquals("Found float, expecting long", exception.getMessage()); } - @Test - public void longWrittenWithUnionSchemaIsNotConvertedToIntSchema() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("Found long, expecting int"); + @ParameterizedTest + @EnumSource(EncoderType.class) + void longWrittenWithUnionSchemaIsNotConvertedToIntSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42L); - byte[] encoded = encodeGenericBlob(record); - decodeGenericBlob(INT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + AvroTypeException exception = Assertions.assertThrows(AvroTypeException.class, + () -> decodeGenericBlob(INT_RECORD, writer, encoded, encoderType)); + Assertions.assertEquals("Found long, expecting int", exception.getMessage()); } - @Test - public void intWrittenWithUnionSchemaIsConvertedToAllNumberSchemas() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void intWrittenWithUnionSchemaIsConvertedToAllNumberSchemas(EncoderType encoderType) throws Exception { Schema writer = UNION_INT_LONG_FLOAT_DOUBLE_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - assertEquals(42.0, decodeGenericBlob(DOUBLE_RECORD, writer, encoded).get(FIELD_A)); - assertEquals(42.0f, decodeGenericBlob(FLOAT_RECORD, writer, encoded).get(FIELD_A)); - assertEquals(42L, decodeGenericBlob(LONG_RECORD, writer, encoded).get(FIELD_A)); - assertEquals(42, decodeGenericBlob(INT_RECORD, writer, encoded).get(FIELD_A)); + byte[] encoded = encodeGenericBlob(record, encoderType); + assertEquals(42.0, decodeGenericBlob(DOUBLE_RECORD, writer, encoded, encoderType).get(FIELD_A)); + assertEquals(42.0f, decodeGenericBlob(FLOAT_RECORD, writer, encoded, encoderType).get(FIELD_A)); + assertEquals(42L, decodeGenericBlob(LONG_RECORD, writer, encoded, encoderType).get(FIELD_A)); + assertEquals(42, decodeGenericBlob(INT_RECORD, writer, encoded, encoderType).get(FIELD_A)); } - @Test - public void asciiStringWrittenWithUnionSchemaIsConvertedToBytesSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void asciiStringWrittenWithUnionSchemaIsConvertedToBytesSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_STRING_BYTES_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, "42"); - byte[] encoded = encodeGenericBlob(record); - ByteBuffer actual = (ByteBuffer) decodeGenericBlob(BYTES_RECORD, writer, encoded).get(FIELD_A); + byte[] encoded = encodeGenericBlob(record, encoderType); + ByteBuffer actual = (ByteBuffer) decodeGenericBlob(BYTES_RECORD, writer, encoded, encoderType).get(FIELD_A); assertArrayEquals("42".getBytes(StandardCharsets.UTF_8), actual.array()); } - @Test - public void utf8StringWrittenWithUnionSchemaIsConvertedToBytesSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void utf8StringWrittenWithUnionSchemaIsConvertedToBytesSchema(EncoderType encoderType) throws Exception { String goeran = String.format("G%sran", LATIN_SMALL_LETTER_O_WITH_DIARESIS); Schema writer = UNION_STRING_BYTES_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, goeran); - byte[] encoded = encodeGenericBlob(record); - ByteBuffer actual = (ByteBuffer) decodeGenericBlob(BYTES_RECORD, writer, encoded).get(FIELD_A); + byte[] encoded = encodeGenericBlob(record, encoderType); + ByteBuffer actual = (ByteBuffer) decodeGenericBlob(BYTES_RECORD, writer, encoded, encoderType).get(FIELD_A); assertArrayEquals(goeran.getBytes(StandardCharsets.UTF_8), actual.array()); } - @Test - public void asciiBytesWrittenWithUnionSchemaIsConvertedToStringSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void asciiBytesWrittenWithUnionSchemaIsConvertedToStringSchema(EncoderType encoderType) throws Exception { Schema writer = UNION_STRING_BYTES_RECORD; ByteBuffer buf = ByteBuffer.wrap("42".getBytes(StandardCharsets.UTF_8)); Record record = defaultRecordWithSchema(writer, FIELD_A, buf); - byte[] encoded = encodeGenericBlob(record); - CharSequence read = (CharSequence) decodeGenericBlob(STRING_RECORD, writer, encoded).get(FIELD_A); + byte[] encoded = encodeGenericBlob(record, encoderType); + CharSequence read = (CharSequence) decodeGenericBlob(STRING_RECORD, writer, encoded, encoderType).get(FIELD_A); assertEquals("42", read.toString()); } - @Test - public void utf8BytesWrittenWithUnionSchemaIsConvertedToStringSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void utf8BytesWrittenWithUnionSchemaIsConvertedToStringSchema(EncoderType encoderType) throws Exception { String goeran = String.format("G%sran", LATIN_SMALL_LETTER_O_WITH_DIARESIS); Schema writer = UNION_STRING_BYTES_RECORD; Record record = defaultRecordWithSchema(writer, FIELD_A, goeran); - byte[] encoded = encodeGenericBlob(record); - CharSequence read = (CharSequence) decodeGenericBlob(STRING_RECORD, writer, encoded).get(FIELD_A); + byte[] encoded = encodeGenericBlob(record, encoderType); + CharSequence read = (CharSequence) decodeGenericBlob(STRING_RECORD, writer, encoded, encoderType).get(FIELD_A); assertEquals(goeran, read.toString()); } - @Test - public void enumRecordCanBeReadWithExtendedEnumSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void enumRecordCanBeReadWithExtendedEnumSchema(EncoderType encoderType) throws Exception { Schema writer = ENUM_AB_RECORD; - Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(writer, "A")); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(ENUM_ABC_RECORD, writer, encoded); + Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(ENUM_AB, "A")); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(ENUM_ABC_RECORD, writer, encoded, encoderType); assertEquals("A", decoded.get(FIELD_A).toString()); } - @Test - public void enumRecordWithExtendedSchemaCanBeReadWithOriginalEnumSchemaIfOnlyOldValues() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void enumRecordWithExtendedSchemaCanBeReadWithOriginalEnumSchemaIfOnlyOldValues(EncoderType encoderType) + throws Exception { Schema writer = ENUM_ABC_RECORD; - Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(writer, "A")); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(ENUM_AB_RECORD, writer, encoded); + Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(ENUM_ABC, "A")); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(ENUM_AB_RECORD, writer, encoded, encoderType); assertEquals("A", decoded.get(FIELD_A).toString()); } - @Test - public void enumRecordWithExtendedSchemaCanNotBeReadIfNewValuesAreUsed() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("No match for C"); + @ParameterizedTest + @EnumSource(EncoderType.class) + void enumRecordWithExtendedSchemaCanNotBeReadIfNewValuesAreUsed(EncoderType encoderType) throws Exception { Schema writer = ENUM_ABC_RECORD; - Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(writer, "C")); - byte[] encoded = encodeGenericBlob(record); - decodeGenericBlob(ENUM_AB_RECORD, writer, encoded); + Record record = defaultRecordWithSchema(writer, FIELD_A, new EnumSymbol(ENUM_ABC, "C")); + byte[] encoded = encodeGenericBlob(record, encoderType); + + AvroTypeException exception = Assertions.assertThrows(AvroTypeException.class, + () -> decodeGenericBlob(ENUM_AB_RECORD, writer, encoded, encoderType)); + Assertions.assertEquals("No match for C", exception.getMessage()); } - @Test - public void recordWrittenWithExtendedSchemaCanBeReadWithOriginalSchemaButLossOfData() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void recordWrittenWithExtendedSchemaCanBeReadWithOriginalSchemaButLossOfData(EncoderType encoderType) + throws Exception { Schema writer = SchemaBuilder.record(RECORD_A) // .fields() // .name("newTopField").type().stringType().noDefault() // @@ -344,47 +358,50 @@ public void recordWrittenWithExtendedSchemaCanBeReadWithOriginalSchemaButLossOfD .endRecord(); Record record = defaultRecordWithSchema(writer, FIELD_A, 42); record.put("newTopField", "not decoded"); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(INT_RECORD, writer, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(INT_RECORD, writer, encoded, encoderType); assertEquals(42, decoded.get(FIELD_A)); try { decoded.get("newTopField"); - Assert.fail("get should throw a exception"); + Assertions.fail("get should throw a exception"); } catch (AvroRuntimeException ex) { - Assert.assertEquals("Not a valid schema field: newTopField", ex.getMessage()); + Assertions.assertEquals("Not a valid schema field: newTopField", ex.getMessage()); } } - @Test - public void readerWithoutDefaultValueThrowsException() throws Exception { - expectedException.expect(AvroTypeException.class); - expectedException.expectMessage("missing required field newField"); + @ParameterizedTest + @EnumSource(EncoderType.class) + void readerWithoutDefaultValueThrowsException(EncoderType encoderType) throws Exception { Schema reader = SchemaBuilder.record(RECORD_A) // .fields() // .name("newField").type().intType().noDefault() // .name(FIELD_A).type().intType().noDefault() // .endRecord(); Record record = defaultRecordWithSchema(INT_RECORD, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - decodeGenericBlob(reader, INT_RECORD, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + AvroTypeException exception = Assertions.assertThrows(AvroTypeException.class, + () -> decodeGenericBlob(reader, INT_RECORD, encoded, encoderType)); + Assertions.assertTrue(exception.getMessage().contains("missing required field newField"), exception.getMessage()); } - @Test - public void readerWithDefaultValueIsApplied() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void readerWithDefaultValueIsApplied(EncoderType encoderType) throws Exception { Schema reader = SchemaBuilder.record(RECORD_A) // .fields() // .name("newFieldWithDefault").type().intType().intDefault(314) // .name(FIELD_A).type().intType().noDefault() // .endRecord(); Record record = defaultRecordWithSchema(INT_RECORD, FIELD_A, 42); - byte[] encoded = encodeGenericBlob(record); - Record decoded = decodeGenericBlob(reader, INT_RECORD, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + Record decoded = decodeGenericBlob(reader, INT_RECORD, encoded, encoderType); assertEquals(42, decoded.get(FIELD_A)); assertEquals(314, decoded.get("newFieldWithDefault")); } - @Test - public void aliasesInSchema() throws Exception { + @ParameterizedTest + @EnumSource(EncoderType.class) + void aliasesInSchema(EncoderType encoderType) throws Exception { Schema writer = new Schema.Parser() .parse("{\"namespace\": \"example.avro\", \"type\": \"record\", \"name\": \"User\", \"fields\": [" + "{\"name\": \"name\", \"type\": \"int\"}\n" + "]}\n"); @@ -393,8 +410,8 @@ public void aliasesInSchema() throws Exception { + "{\"name\": \"fname\", \"type\": \"int\", \"aliases\" : [ \"name\" ]}\n" + "]}\n"); GenericData.Record record = defaultRecordWithSchema(writer, "name", 1); - byte[] encoded = encodeGenericBlob(record); - GenericData.Record decoded = decodeGenericBlob(reader, reader, encoded); + byte[] encoded = encodeGenericBlob(record, encoderType); + GenericData.Record decoded = decodeGenericBlob(reader, reader, encoded, encoderType); assertEquals(1, decoded.get("fname")); } @@ -405,7 +422,7 @@ private Record defaultRecordWithSchema(Schema schema, String key, T value) { return data; } - private byte[] encodeGenericBlob(GenericRecord data) throws IOException { + private byte[] encodeGenericBlob(GenericRecord data, EncoderType encoderType) throws IOException { DatumWriter writer = new GenericDatumWriter<>(data.getSchema()); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); Encoder encoder = encoderType == EncoderType.BINARY ? EncoderFactory.get().binaryEncoder(outStream, null) @@ -416,7 +433,8 @@ private byte[] encodeGenericBlob(GenericRecord data) throws IOException { return outStream.toByteArray(); } - private Record decodeGenericBlob(Schema expectedSchema, Schema schemaOfBlob, byte[] blob) throws IOException { + private Record decodeGenericBlob(Schema expectedSchema, Schema schemaOfBlob, byte[] blob, EncoderType encoderType) + throws IOException { if (blob == null) { return null; } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java index 77adac28e8a..64748da1364 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchema.java @@ -21,18 +21,35 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Set; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.IntNode; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.NullNode; +import com.fasterxml.jackson.databind.node.TextNode; import org.apache.avro.Schema.Field; import org.apache.avro.Schema.Type; import org.apache.avro.generic.GenericData; + +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; public class TestSchema { @@ -409,4 +426,199 @@ void qualifiedName() { assertEquals("Int", nameInt.getQualified("space")); } + @Test + void validValue() { + // Valid null value + final Schema nullSchema = Schema.create(Type.NULL); + assertTrue(nullSchema.isValidDefault(JsonNodeFactory.instance.nullNode())); + + // Valid int value + final Schema intSchema = Schema.create(Type.INT); + assertTrue(intSchema.isValidDefault(JsonNodeFactory.instance.numberNode(12))); + + // Valid Text value + final Schema strSchema = Schema.create(Type.STRING); + assertTrue(strSchema.isValidDefault(new TextNode("textNode"))); + + // Valid Array value + final Schema arraySchema = Schema.createArray(Schema.create(Type.STRING)); + final ArrayNode arrayValue = JsonNodeFactory.instance.arrayNode(); + assertTrue(arraySchema.isValidDefault(arrayValue)); // empty array + + arrayValue.add("Hello"); + arrayValue.add("World"); + assertTrue(arraySchema.isValidDefault(arrayValue)); + + arrayValue.add(5); + assertFalse(arraySchema.isValidDefault(arrayValue)); + + // Valid Union type + final Schema unionSchema = Schema.createUnion(strSchema, intSchema, nullSchema); + assertTrue(unionSchema.isValidDefault(JsonNodeFactory.instance.textNode("Hello"))); + assertTrue(unionSchema.isValidDefault(new IntNode(23))); + assertTrue(unionSchema.isValidDefault(JsonNodeFactory.instance.nullNode())); + + assertFalse(unionSchema.isValidDefault(arrayValue)); + + // Array of union + final Schema arrayUnion = Schema.createArray(unionSchema); + final ArrayNode arrayUnionValue = JsonNodeFactory.instance.arrayNode(); + arrayUnionValue.add("Hello"); + arrayUnionValue.add(NullNode.getInstance()); + assertTrue(arrayUnion.isValidDefault(arrayUnionValue)); + + // Union String, bytes + final Schema unionStrBytes = Schema.createUnion(strSchema, Schema.create(Type.BYTES)); + assertTrue(unionStrBytes.isValidDefault(JsonNodeFactory.instance.textNode("Hello"))); + assertFalse(unionStrBytes.isValidDefault(JsonNodeFactory.instance.numberNode(123))); + } + + @Test + void enumLateDefine() { + String schemaString = "{\n" + " \"type\":\"record\",\n" + " \"name\": \"Main\",\n" + " \"fields\":[\n" + + " {\n" + " \"name\":\"f1\",\n" + " \"type\":\"Sub\"\n" + " },\n" + + " {\n" + " \"name\":\"f2\",\n" + " \"type\":{\n" + + " \"type\":\"enum\",\n" + " \"name\":\"Sub\",\n" + + " \"symbols\":[\"OPEN\",\"CLOSE\"]\n" + " }\n" + " }\n" + " ]\n" + "}"; + + final Schema schema = new Schema.Parser().parse(schemaString); + Schema f1Schema = schema.getField("f1").schema(); + Schema f2Schema = schema.getField("f2").schema(); + assertSame(f1Schema, f2Schema); + assertEquals(Type.ENUM, f1Schema.getType()); + String stringSchema = schema.toString(); + int definitionIndex = stringSchema.indexOf("\"symbols\":[\"OPEN\",\"CLOSE\"]"); + int usageIndex = stringSchema.indexOf("\"type\":\"Sub\""); + assertTrue(definitionIndex < usageIndex, "usage is before definition"); + } + + @Test + public void testRecordInArray() { + String schemaString = "{\n" + " \"type\": \"record\",\n" + " \"name\": \"TestRecord\",\n" + " \"fields\": [\n" + + " {\n" + " \"name\": \"value\",\n" + " \"type\": {\n" + " \"type\": \"record\",\n" + + " \"name\": \"Container\",\n" + " \"fields\": [\n" + " {\n" + + " \"name\": \"Optional\",\n" + " \"type\": {\n" + " \"type\": \"array\",\n" + + " \"items\": [\n" + " {\n" + " \"type\": \"record\",\n" + + " \"name\": \"optional_field_0\",\n" + " \"namespace\": \"\",\n" + + " \"doc\": \"\",\n" + " \"fields\": [\n" + " {\n" + + " \"name\": \"optional_field_1\",\n" + " \"type\": \"long\",\n" + + " \"doc\": \"\",\n" + " \"default\": 0\n" + + " }\n" + " ]\n" + " }\n" + " ]\n" + + " }\n" + " }\n" + " ]\n" + " }\n" + " }\n" + " ]\n" + "}"; + final Schema schema = new Schema.Parser().parse(schemaString); + assertNotNull(schema); + } + + /* + * @Test public void testRec() { String schemaString = + * "[{\"name\":\"employees\",\"type\":[\"null\",{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"Pair1081149ea1d6eb80\",\"fields\":[{\"name\":\"key\",\"type\":\"int\"},{\"name\":\"value\",\"type\":{\"type\":\"record\",\"name\":\"EmployeeInfo2\",\"fields\":[{\"name\":\"companyMap\",\"type\":[\"null\",{\"type\":\"array\",\"items\":{\"type\":\"record\",\"name\":\"PairIntegerString\",\"fields\":[{\"name\":\"key\",\"type\":\"int\"},{\"name\":\"value\",\"type\":\"string\"}]},\"java-class\":\"java.util.HashMap\"}],\"default\":null},{\"name\":\"name\",\"type\":[\"null\",\"string\"],\"default\":null}]}}]},\"java-class\":\"java.util.HashMap\"}],\"default\":null}]"; + * final Schema schema = new Schema.Parser().parse(schemaString); + * Assert.assertNotNull(schema); + * + * } + */ + + @Test + public void testUnionFieldType() { + String schemaString = "{\"type\": \"record\", \"name\": \"Lisp\", \"fields\": [{\"name\":\"value\", \"type\":[\"null\", \"string\",{\"type\": \"record\", \"name\": \"Cons\", \"fields\": [{\"name\":\"car\", \"type\":\"Lisp\"},{\"name\":\"cdr\", \"type\":\"Lisp\"}]}]}]}"; + final Schema schema = new Schema.Parser().parse(schemaString); + Field value = schema.getField("value"); + Schema fieldSchema = value.schema(); + Schema subSchema = fieldSchema.getTypes().stream().filter((Schema s) -> s.getType() == Type.RECORD).findFirst() + .get(); + assertTrue(subSchema.hasFields()); + } + + @Test + public void parseAliases() throws JsonProcessingException { + String s1 = "{ \"aliases\" : [\"a1\", \"b1\"]}"; + ObjectMapper mapper = new ObjectMapper(); + JsonNode j1 = mapper.readTree(s1); + Set aliases = Schema.parseAliases(j1); + assertEquals(2, aliases.size()); + assertTrue(aliases.contains("a1")); + assertTrue(aliases.contains("b1")); + + String s2 = "{ \"aliases\" : {\"a1\": \"b1\"}}"; + JsonNode j2 = mapper.readTree(s2); + + SchemaParseException ex = assertThrows(SchemaParseException.class, () -> Schema.parseAliases(j2)); + assertTrue(ex.getMessage().contains("aliases not an array")); + + String s3 = "{ \"aliases\" : [11, \"b1\"]}"; + JsonNode j3 = mapper.readTree(s3); + SchemaParseException ex3 = assertThrows(SchemaParseException.class, () -> Schema.parseAliases(j3)); + assertTrue(ex3.getMessage().contains("alias not a string")); + } + + @Test + void testContentAfterAvsc() { + Schema.Parser parser = new Schema.Parser(Schema.NameValidator.UTF_VALIDATOR); + parser.setValidateDefaults(true); + assertThrows(SchemaParseException.class, () -> parser.parse("{\"type\": \"string\"}; DROP TABLE STUDENTS")); + } + + @Test + void testContentAfterAvscInInputStream() throws Exception { + Schema.Parser parser = new Schema.Parser(Schema.NameValidator.UTF_VALIDATOR); + parser.setValidateDefaults(true); + String avsc = "{\"type\": \"string\"}; DROP TABLE STUDENTS"; + ByteArrayInputStream is = new ByteArrayInputStream(avsc.getBytes(StandardCharsets.UTF_8)); + Schema schema = parser.parse(is); + assertNotNull(schema); + } + + @Test + void testContentAfterAvscInFile() throws Exception { + File avscFile = Files.createTempFile("testContentAfterAvscInFile", null).toFile(); + try (FileWriter writer = new FileWriter(avscFile)) { + writer.write("{\"type\": \"string\"}; DROP TABLE STUDENTS"); + writer.flush(); + } + + Schema.Parser parser = new Schema.Parser(Schema.NameValidator.UTF_VALIDATOR); + parser.setValidateDefaults(true); + assertThrows(SchemaParseException.class, () -> parser.parse(avscFile)); + } + + @Test + void testParseMultipleFile() throws IOException { + URL directory = Thread.currentThread().getContextClassLoader().getResource("multipleFile"); + File f1 = new File(directory.getPath(), "ApplicationEvent.avsc"); + File f2 = new File(directory.getPath(), "DocumentInfo.avsc"); + File f3 = new File(directory.getPath(), "MyResponse.avsc"); + Assertions.assertTrue(f1.exists(), "File not exist for test " + f1.getPath()); + Assertions.assertTrue(f2.exists(), "File not exist for test " + f2.getPath()); + Assertions.assertTrue(f3.exists(), "File not exist for test " + f3.getPath()); + + final List schemas = new Schema.Parser().parse(Arrays.asList(f1, f2, f3)); + Assertions.assertEquals(3, schemas.size()); + Schema schemaAppEvent = schemas.get(0); + Schema schemaDocInfo = schemas.get(1); + Schema schemaResponse = schemas.get(2); + + Assertions.assertNotNull(schemaAppEvent); + Assertions.assertEquals(3, schemaAppEvent.getFields().size()); + Field documents = schemaAppEvent.getField("documents"); + Schema docSchema = documents.schema().getTypes().get(1).getElementType(); + Assertions.assertEquals(docSchema, schemaDocInfo); + + Assertions.assertNotNull(schemaDocInfo); + Assertions.assertNotNull(schemaResponse); + } + + @Test + void add_types() { + String schemaRecord2 = "{\"type\":\"record\", \"name\":\"record2\", \"fields\": [" + + " {\"name\":\"f1\", \"type\":\"record1\" }" + "]}"; + // register schema1 in schema. + Schema schemaRecord1 = Schema.createRecord("record1", "doc", "", false); + Schema.Parser parser = new Schema.Parser().addTypes(Collections.singleton(schemaRecord1)); + + // parse schema for record2 that contains field for schema1. + final Schema schema = parser.parse(schemaRecord2); + final Field f1 = schema.getField("f1"); + assertNotNull(f1); + assertEquals(schemaRecord1, f1.schema()); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java index ddfbe7229c9..fcbaae65570 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaBuilder.java @@ -38,6 +38,8 @@ import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.generic.GenericRecordBuilder; + +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -149,6 +151,9 @@ void fieldObjectProps() { assertEquals("ABC", f.getObjectProp("byteProp")); assertTrue(f.getObjectProp("stringProp") instanceof String); assertEquals("abc", f.getObjectProp("stringProp")); + + assertEquals("abc", f.getObjectProp("stringProp", "default")); + assertEquals("default", f.getObjectProp("unknwon", "default")); } @Test @@ -871,4 +876,25 @@ void namespaceDefaulting() { assertEquals(a2, a1); } + + @Test + void namesAcceptAll() throws InterruptedException { + // Ensure that Schema.setNameValidator won't interfere with others unit tests. + Runnable r = () -> { + Schema.setNameValidator(Schema.NameValidator.NO_VALIDATION); + final Schema schema = SchemaBuilder.record("7name").fields().name("123").type(Schema.create(Schema.Type.INT)) + .noDefault().endRecord(); + Assertions.assertNotNull(schema); + Assertions.assertEquals("7name", schema.getName()); + final Schema.Field field = schema.getField("123"); + Assertions.assertEquals("123", field.name()); + }; + + final Throwable[] exception = new Throwable[] { null }; + Thread t = new Thread(r); + t.setUncaughtExceptionHandler((Thread th, Throwable e) -> exception[0] = e); + t.start(); + t.join(); + Assertions.assertNull(exception[0], () -> exception[0].getMessage()); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibility.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibility.java index cc5ada76e90..275bcfafede 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibility.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibility.java @@ -120,7 +120,7 @@ public class TestSchemaCompatibility { @Test void validateSchemaPairMissingField() { final List readerFields = list(new Schema.Field("oldfield1", INT_SCHEMA, null, null)); - final Schema reader = Schema.createRecord(readerFields); + final Schema reader = Schema.createRecord(null, null, null, false, readerFields); final SchemaCompatibility.SchemaPairCompatibility expectedResult = new SchemaCompatibility.SchemaPairCompatibility( SchemaCompatibility.SchemaCompatibilityResult.compatible(), reader, WRITER_SCHEMA, SchemaCompatibility.READER_WRITER_COMPATIBLE_MESSAGE); @@ -132,7 +132,7 @@ void validateSchemaPairMissingField() { @Test void validateSchemaPairMissingSecondField() { final List readerFields = list(new Schema.Field("oldfield2", STRING_SCHEMA, null, null)); - final Schema reader = Schema.createRecord(readerFields); + final Schema reader = Schema.createRecord(null, null, null, false, readerFields); final SchemaCompatibility.SchemaPairCompatibility expectedResult = new SchemaCompatibility.SchemaPairCompatibility( SchemaCompatibility.SchemaCompatibilityResult.compatible(), reader, WRITER_SCHEMA, SchemaCompatibility.READER_WRITER_COMPATIBLE_MESSAGE); @@ -145,7 +145,7 @@ void validateSchemaPairMissingSecondField() { void validateSchemaPairAllFields() { final List readerFields = list(new Schema.Field("oldfield1", INT_SCHEMA, null, null), new Schema.Field("oldfield2", STRING_SCHEMA, null, null)); - final Schema reader = Schema.createRecord(readerFields); + final Schema reader = Schema.createRecord(null, null, null, false, readerFields); final SchemaCompatibility.SchemaPairCompatibility expectedResult = new SchemaCompatibility.SchemaPairCompatibility( SchemaCompatibility.SchemaCompatibilityResult.compatible(), reader, WRITER_SCHEMA, SchemaCompatibility.READER_WRITER_COMPATIBLE_MESSAGE); @@ -158,7 +158,7 @@ void validateSchemaPairAllFields() { void validateSchemaNewFieldWithDefault() { final List readerFields = list(new Schema.Field("oldfield1", INT_SCHEMA, null, null), new Schema.Field("newfield1", INT_SCHEMA, null, 42)); - final Schema reader = Schema.createRecord(readerFields); + final Schema reader = Schema.createRecord(null, null, null, false, readerFields); final SchemaCompatibility.SchemaPairCompatibility expectedResult = new SchemaCompatibility.SchemaPairCompatibility( SchemaCompatibility.SchemaCompatibilityResult.compatible(), reader, WRITER_SCHEMA, SchemaCompatibility.READER_WRITER_COMPATIBLE_MESSAGE); @@ -171,7 +171,7 @@ void validateSchemaNewFieldWithDefault() { void validateSchemaNewField() { final List readerFields = list(new Schema.Field("oldfield1", INT_SCHEMA, null, null), new Schema.Field("newfield1", INT_SCHEMA, null, null)); - final Schema reader = Schema.createRecord(readerFields); + final Schema reader = Schema.createRecord(null, null, null, false, readerFields); SchemaPairCompatibility compatibility = checkReaderWriterCompatibility(reader, WRITER_SCHEMA); // Test new field without default value. @@ -233,6 +233,22 @@ void unionReaderWriterSubsetIncompatibility() { final Schema unionReader = Schema.createUnion(list(INT_SCHEMA, STRING_SCHEMA)); final SchemaPairCompatibility result = checkReaderWriterCompatibility(unionReader, unionWriter); assertEquals(SchemaCompatibilityType.INCOMPATIBLE, result.getType()); + assertEquals("/2", result.getResult().getIncompatibilities().get(0).getLocation()); + } + + @Test + void unionWriterSimpleReaderIncompatibility() { + Schema mandatorySchema = SchemaBuilder.record("Account").fields().name("age").type().intType().noDefault() + .endRecord(); + Schema optionalSchema = SchemaBuilder.record("Account").fields().optionalInt("age").endRecord(); + + SchemaPairCompatibility compatibility = checkReaderWriterCompatibility(mandatorySchema, optionalSchema); + + assertEquals(SchemaCompatibilityType.INCOMPATIBLE, compatibility.getType()); + + Incompatibility incompatibility = compatibility.getResult().getIncompatibilities().get(0); + assertEquals("reader type: INT not compatible with writer type: NULL", incompatibility.getMessage()); + assertEquals("/fields/0/type/0", incompatibility.getLocation()); } // ----------------------------------------------------------------------------------------------- diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityFixedSizeMismatch.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityFixedSizeMismatch.java index 6ac3c68dc03..05321527cb4 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityFixedSizeMismatch.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityFixedSizeMismatch.java @@ -17,44 +17,34 @@ */ package org.apache.avro; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; +import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import java.util.Arrays; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; -import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; +import java.util.stream.Stream; + +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.A_DINT_B_DFIXED_4_BYTES_RECORD1; +import static org.apache.avro.TestSchemas.A_DINT_B_DFIXED_8_BYTES_RECORD1; +import static org.apache.avro.TestSchemas.FIXED_4_BYTES; +import static org.apache.avro.TestSchemas.FIXED_8_BYTES; -@RunWith(Parameterized.class) public class TestSchemaCompatibilityFixedSizeMismatch { - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { FIXED_4_BYTES, FIXED_8_BYTES, "expected: 8, found: 4", "/size" }, - { FIXED_8_BYTES, FIXED_4_BYTES, "expected: 4, found: 8", "/size" }, - { A_DINT_B_DFIXED_8_BYTES_RECORD1, A_DINT_B_DFIXED_4_BYTES_RECORD1, "expected: 4, found: 8", - "/fields/1/type/size" }, - { A_DINT_B_DFIXED_4_BYTES_RECORD1, A_DINT_B_DFIXED_8_BYTES_RECORD1, "expected: 8, found: 4", - "/fields/1/type/size" }, }; - return Arrays.asList(fields); + public static Stream data() { + return Stream.of(Arguments.of(FIXED_4_BYTES, FIXED_8_BYTES, "expected: 8, found: 4", "/size"), + Arguments.of(FIXED_8_BYTES, FIXED_4_BYTES, "expected: 4, found: 8", "/size"), + Arguments.of(A_DINT_B_DFIXED_8_BYTES_RECORD1, A_DINT_B_DFIXED_4_BYTES_RECORD1, "expected: 4, found: 8", + "/fields/1/type/size"), + Arguments.of(A_DINT_B_DFIXED_4_BYTES_RECORD1, A_DINT_B_DFIXED_8_BYTES_RECORD1, "expected: 8, found: 4", + "/fields/1/type/size")); } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public String details; - @Parameter(3) - public String location; - - @Test - public void testFixedSizeMismatchSchemas() throws Exception { + @ParameterizedTest + @MethodSource("data") + void fixedSizeMismatchSchemas(Schema reader, Schema writer, String details, String location) { validateIncompatibleSchemas(reader, writer, SchemaIncompatibilityType.FIXED_SIZE_MISMATCH, details, location); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingEnumSymbols.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingEnumSymbols.java index 82b70fe2443..63d607cd596 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingEnumSymbols.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingEnumSymbols.java @@ -17,19 +17,19 @@ */ package org.apache.avro; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; +import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import java.util.Arrays; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; -import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; +import java.util.stream.Stream; + +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.ENUM1_ABC_SCHEMA; +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA; +import static org.apache.avro.TestSchemas.ENUM1_BC_SCHEMA; -@RunWith(Parameterized.class) public class TestSchemaCompatibilityMissingEnumSymbols { private static final Schema RECORD1_WITH_ENUM_AB = SchemaBuilder.record("Record1").fields() // @@ -39,26 +39,15 @@ public class TestSchemaCompatibilityMissingEnumSymbols { .name("field1").type(ENUM1_ABC_SCHEMA).noDefault() // .endRecord(); - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { ENUM1_AB_SCHEMA, ENUM1_ABC_SCHEMA, "[C]", "/symbols" }, - { ENUM1_BC_SCHEMA, ENUM1_ABC_SCHEMA, "[A]", "/symbols" }, - { RECORD1_WITH_ENUM_AB, RECORD1_WITH_ENUM_ABC, "[C]", "/fields/0/type/symbols" } }; - return Arrays.asList(fields); + public static Stream data() { + return Stream.of(Arguments.of(ENUM1_AB_SCHEMA, ENUM1_ABC_SCHEMA, "[C]", "/symbols"), + Arguments.of(ENUM1_BC_SCHEMA, ENUM1_ABC_SCHEMA, "[A]", "/symbols"), + Arguments.of(RECORD1_WITH_ENUM_AB, RECORD1_WITH_ENUM_ABC, "[C]", "/fields/0/type/symbols")); } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public String details; - @Parameter(3) - public String location; - - @Test - public void testTypeMismatchSchemas() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testTypeMismatchSchemas(Schema reader, Schema writer, String details, String location) { validateIncompatibleSchemas(reader, writer, SchemaIncompatibilityType.MISSING_ENUM_SYMBOLS, details, location); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingUnionBranch.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingUnionBranch.java index 4f947690009..3e84a5337c9 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingUnionBranch.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityMissingUnionBranch.java @@ -17,22 +17,40 @@ */ package org.apache.avro; -import static java.util.Arrays.asList; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; - -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; import org.junit.runners.Parameterized.Parameters; -@RunWith(Parameterized.class) +import java.util.Collections; +import java.util.List; +import java.util.stream.Stream; + +import static java.util.Arrays.asList; +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.A_DINT_B_DINT_STRING_UNION_RECORD1; +import static org.apache.avro.TestSchemas.A_DINT_B_DINT_UNION_RECORD1; +import static org.apache.avro.TestSchemas.BOOLEAN_SCHEMA; +import static org.apache.avro.TestSchemas.BYTES_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.DOUBLE_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA; +import static org.apache.avro.TestSchemas.FIXED_4_BYTES; +import static org.apache.avro.TestSchemas.FLOAT_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.INT_ARRAY_SCHEMA; +import static org.apache.avro.TestSchemas.INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.INT_MAP_SCHEMA; +import static org.apache.avro.TestSchemas.INT_SCHEMA; +import static org.apache.avro.TestSchemas.INT_STRING_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.INT_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.LONG_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.NULL_SCHEMA; +import static org.apache.avro.TestSchemas.STRING_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.list; + public class TestSchemaCompatibilityMissingUnionBranch { private static final Schema RECORD1_WITH_INT = SchemaBuilder.record("Record1").fields() // @@ -50,61 +68,52 @@ public class TestSchemaCompatibilityMissingUnionBranch { private static final Schema UNION_INT_MAP_INT = Schema.createUnion(list(INT_SCHEMA, INT_MAP_SCHEMA)); private static final Schema UNION_INT_NULL = Schema.createUnion(list(INT_SCHEMA, NULL_SCHEMA)); - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { INT_UNION_SCHEMA, INT_STRING_UNION_SCHEMA, - Collections.singletonList("reader union lacking writer type: STRING"), Collections.singletonList("/1") }, - { STRING_UNION_SCHEMA, INT_STRING_UNION_SCHEMA, - Collections.singletonList("reader union lacking writer type: INT"), Collections.singletonList("/0") }, - { INT_UNION_SCHEMA, UNION_INT_RECORD1, Collections.singletonList("reader union lacking writer type: RECORD"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_RECORD2, Collections.singletonList("reader union lacking writer type: RECORD"), - Collections.singletonList("/1") }, + public static Stream data() { + return Stream.of( // + Arguments.of(INT_UNION_SCHEMA, INT_STRING_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: STRING"), Collections.singletonList("/1")), + Arguments.of(STRING_UNION_SCHEMA, INT_STRING_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: INT"), Collections.singletonList("/0")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_RECORD1, + Collections.singletonList("reader union lacking writer type: RECORD"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_RECORD2, + Collections.singletonList("reader union lacking writer type: RECORD"), Collections.singletonList("/1")), // more info in the subset schemas - { UNION_INT_RECORD1, UNION_INT_RECORD2, Collections.singletonList("reader union lacking writer type: RECORD"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_ENUM1_AB, Collections.singletonList("reader union lacking writer type: ENUM"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_FIXED_4_BYTES, - Collections.singletonList("reader union lacking writer type: FIXED"), Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_BOOLEAN, Collections.singletonList("reader union lacking writer type: BOOLEAN"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, LONG_UNION_SCHEMA, Collections.singletonList("reader union lacking writer type: LONG"), - Collections.singletonList("/0") }, - { INT_UNION_SCHEMA, FLOAT_UNION_SCHEMA, Collections.singletonList("reader union lacking writer type: FLOAT"), - Collections.singletonList("/0") }, - { INT_UNION_SCHEMA, DOUBLE_UNION_SCHEMA, Collections.singletonList("reader union lacking writer type: DOUBLE"), - Collections.singletonList("/0") }, - { INT_UNION_SCHEMA, BYTES_UNION_SCHEMA, Collections.singletonList("reader union lacking writer type: BYTES"), - Collections.singletonList("/0") }, - { INT_UNION_SCHEMA, UNION_INT_ARRAY_INT, Collections.singletonList("reader union lacking writer type: ARRAY"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_MAP_INT, Collections.singletonList("reader union lacking writer type: MAP"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, UNION_INT_NULL, Collections.singletonList("reader union lacking writer type: NULL"), - Collections.singletonList("/1") }, - { INT_UNION_SCHEMA, INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA, + Arguments.of(UNION_INT_RECORD1, UNION_INT_RECORD2, + Collections.singletonList("reader union lacking writer type: RECORD"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_ENUM1_AB, + Collections.singletonList("reader union lacking writer type: ENUM"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_FIXED_4_BYTES, + Collections.singletonList("reader union lacking writer type: FIXED"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_BOOLEAN, + Collections.singletonList("reader union lacking writer type: BOOLEAN"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, LONG_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: LONG"), Collections.singletonList("/0")), + Arguments.of(INT_UNION_SCHEMA, FLOAT_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: FLOAT"), Collections.singletonList("/0")), + Arguments.of(INT_UNION_SCHEMA, DOUBLE_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: DOUBLE"), Collections.singletonList("/0")), + Arguments.of(INT_UNION_SCHEMA, BYTES_UNION_SCHEMA, + Collections.singletonList("reader union lacking writer type: BYTES"), Collections.singletonList("/0")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_ARRAY_INT, + Collections.singletonList("reader union lacking writer type: ARRAY"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_MAP_INT, + Collections.singletonList("reader union lacking writer type: MAP"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, UNION_INT_NULL, + Collections.singletonList("reader union lacking writer type: NULL"), Collections.singletonList("/1")), + Arguments.of(INT_UNION_SCHEMA, INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA, asList("reader union lacking writer type: LONG", "reader union lacking writer type: FLOAT", "reader union lacking writer type: DOUBLE"), - asList("/1", "/2", "/3") }, - { A_DINT_B_DINT_UNION_RECORD1, A_DINT_B_DINT_STRING_UNION_RECORD1, + asList("/1", "/2", "/3")), + Arguments.of(A_DINT_B_DINT_UNION_RECORD1, A_DINT_B_DINT_STRING_UNION_RECORD1, Collections.singletonList("reader union lacking writer type: STRING"), - Collections.singletonList("/fields/1/type/1") } }; - return Arrays.asList(fields); + Collections.singletonList("/fields/1/type/1"))); } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public List details; - @Parameter(3) - public List location; - - @Test - public void testMissingUnionBranch() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testMissingUnionBranch(Schema reader, Schema writer, List details, List location) + throws Exception { List types = Collections.nCopies(details.size(), SchemaIncompatibilityType.MISSING_UNION_BRANCH); validateIncompatibleSchemas(reader, writer, types, details, location); diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityNameMismatch.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityNameMismatch.java index 83c89ab7b76..d20561faae8 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityNameMismatch.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityNameMismatch.java @@ -17,44 +17,37 @@ */ package org.apache.avro; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; +import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import java.util.Arrays; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; -import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; +import java.util.stream.Stream; + +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.A_DINT_B_DENUM_1_RECORD1; +import static org.apache.avro.TestSchemas.A_DINT_B_DENUM_2_RECORD1; +import static org.apache.avro.TestSchemas.EMPTY_RECORD1; +import static org.apache.avro.TestSchemas.EMPTY_RECORD2; +import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA; +import static org.apache.avro.TestSchemas.ENUM2_AB_SCHEMA; +import static org.apache.avro.TestSchemas.FIXED_4_BYTES; -@RunWith(Parameterized.class) public class TestSchemaCompatibilityNameMismatch { private static final Schema FIXED_4_ANOTHER_NAME = Schema.createFixed("AnotherName", null, null, 4); - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { ENUM1_AB_SCHEMA, ENUM2_AB_SCHEMA, "expected: Enum2", "/name" }, - { EMPTY_RECORD2, EMPTY_RECORD1, "expected: Record1", "/name" }, - { FIXED_4_BYTES, FIXED_4_ANOTHER_NAME, "expected: AnotherName", "/name" }, - { A_DINT_B_DENUM_1_RECORD1, A_DINT_B_DENUM_2_RECORD1, "expected: Enum2", "/fields/1/type/name" } }; - return Arrays.asList(fields); + public static Stream data() { + return Stream.of(Arguments.of(ENUM1_AB_SCHEMA, ENUM2_AB_SCHEMA, "expected: Enum2", "/name"), + Arguments.of(EMPTY_RECORD2, EMPTY_RECORD1, "expected: Record1", "/name"), + Arguments.of(FIXED_4_BYTES, FIXED_4_ANOTHER_NAME, "expected: AnotherName", "/name"), + Arguments.of(A_DINT_B_DENUM_1_RECORD1, A_DINT_B_DENUM_2_RECORD1, "expected: Enum2", "/fields/1/type/name")); } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public String details; - @Parameter(3) - public String location; - - @Test - public void testNameMismatchSchemas() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testNameMismatchSchemas(Schema reader, Schema writer, String details, String location) throws Exception { validateIncompatibleSchemas(reader, writer, SchemaIncompatibilityType.NAME_MISMATCH, details, location); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityReaderFieldMissingDefaultValue.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityReaderFieldMissingDefaultValue.java index d367caed941..7a21c1a5fcd 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityReaderFieldMissingDefaultValue.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityReaderFieldMissingDefaultValue.java @@ -17,38 +17,29 @@ */ package org.apache.avro; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; +import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import java.util.Arrays; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; -import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; +import java.util.stream.Stream; + +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.A_INT_B_DINT_RECORD1; +import static org.apache.avro.TestSchemas.A_INT_RECORD1; +import static org.apache.avro.TestSchemas.EMPTY_RECORD1; -@RunWith(Parameterized.class) public class TestSchemaCompatibilityReaderFieldMissingDefaultValue { - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { A_INT_RECORD1, EMPTY_RECORD1, "a", "/fields/0" }, { A_INT_B_DINT_RECORD1, EMPTY_RECORD1, "a", "/fields/0" } }; - return Arrays.asList(fields); - } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public String details; - @Parameter(3) - public String location; + public static Stream data() { + return Stream.of(Arguments.of(A_INT_RECORD1, EMPTY_RECORD1, "a", "/fields/0"), + Arguments.of(A_INT_B_DINT_RECORD1, EMPTY_RECORD1, "a", "/fields/0")); + } - @Test - public void testReaderFieldMissingDefaultValueSchemas() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testReaderFieldMissingDefaultValueSchemas(Schema reader, Schema writer, String details, String location) { validateIncompatibleSchemas(reader, writer, SchemaIncompatibilityType.READER_FIELD_MISSING_DEFAULT_VALUE, details, location); } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityTypeMismatch.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityTypeMismatch.java index 63dd3ac11a7..247e40404ba 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityTypeMismatch.java +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaCompatibilityTypeMismatch.java @@ -17,82 +17,94 @@ */ package org.apache.avro; -import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; -import static org.apache.avro.TestSchemas.*; +import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import java.util.Arrays; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; -import org.apache.avro.SchemaCompatibility.SchemaIncompatibilityType; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameter; -import org.junit.runners.Parameterized.Parameters; +import java.util.stream.Stream; + +import static org.apache.avro.TestSchemaCompatibility.validateIncompatibleSchemas; +import static org.apache.avro.TestSchemas.A_INT_RECORD1; +import static org.apache.avro.TestSchemas.BOOLEAN_SCHEMA; +import static org.apache.avro.TestSchemas.BYTES_SCHEMA; +import static org.apache.avro.TestSchemas.DOUBLE_SCHEMA; +import static org.apache.avro.TestSchemas.ENUM2_AB_SCHEMA; +import static org.apache.avro.TestSchemas.FIXED_4_BYTES; +import static org.apache.avro.TestSchemas.FLOAT_SCHEMA; +import static org.apache.avro.TestSchemas.INT_ARRAY_SCHEMA; +import static org.apache.avro.TestSchemas.INT_FLOAT_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.INT_LIST_RECORD; +import static org.apache.avro.TestSchemas.INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA; +import static org.apache.avro.TestSchemas.INT_MAP_SCHEMA; +import static org.apache.avro.TestSchemas.INT_SCHEMA; +import static org.apache.avro.TestSchemas.LONG_ARRAY_SCHEMA; +import static org.apache.avro.TestSchemas.LONG_LIST_RECORD; +import static org.apache.avro.TestSchemas.LONG_MAP_SCHEMA; +import static org.apache.avro.TestSchemas.LONG_SCHEMA; +import static org.apache.avro.TestSchemas.NULL_SCHEMA; +import static org.apache.avro.TestSchemas.STRING_SCHEMA; -@RunWith(Parameterized.class) public class TestSchemaCompatibilityTypeMismatch { - @Parameters(name = "r: {0} | w: {1}") - public static Iterable data() { - Object[][] fields = { // - { NULL_SCHEMA, INT_SCHEMA, "reader type: NULL not compatible with writer type: INT", "/" }, - { NULL_SCHEMA, LONG_SCHEMA, "reader type: NULL not compatible with writer type: LONG", "/" }, - { BOOLEAN_SCHEMA, INT_SCHEMA, "reader type: BOOLEAN not compatible with writer type: INT", "/" }, + public static Stream data() { + return Stream.of( + Arguments.of(NULL_SCHEMA, INT_SCHEMA, "reader type: NULL not compatible with writer type: INT", "/"), + Arguments.of(NULL_SCHEMA, LONG_SCHEMA, "reader type: NULL not compatible with writer type: LONG", "/"), + + Arguments.of(BOOLEAN_SCHEMA, INT_SCHEMA, "reader type: BOOLEAN not compatible with writer type: INT", "/"), - { INT_SCHEMA, NULL_SCHEMA, "reader type: INT not compatible with writer type: NULL", "/" }, - { INT_SCHEMA, BOOLEAN_SCHEMA, "reader type: INT not compatible with writer type: BOOLEAN", "/" }, - { INT_SCHEMA, LONG_SCHEMA, "reader type: INT not compatible with writer type: LONG", "/" }, - { INT_SCHEMA, FLOAT_SCHEMA, "reader type: INT not compatible with writer type: FLOAT", "/" }, - { INT_SCHEMA, DOUBLE_SCHEMA, "reader type: INT not compatible with writer type: DOUBLE", "/" }, + Arguments.of(INT_SCHEMA, NULL_SCHEMA, "reader type: INT not compatible with writer type: NULL", "/"), + Arguments.of(INT_SCHEMA, BOOLEAN_SCHEMA, "reader type: INT not compatible with writer type: BOOLEAN", "/"), + Arguments.of(INT_SCHEMA, LONG_SCHEMA, "reader type: INT not compatible with writer type: LONG", "/"), + Arguments.of(INT_SCHEMA, FLOAT_SCHEMA, "reader type: INT not compatible with writer type: FLOAT", "/"), + Arguments.of(INT_SCHEMA, DOUBLE_SCHEMA, "reader type: INT not compatible with writer type: DOUBLE", "/"), - { LONG_SCHEMA, FLOAT_SCHEMA, "reader type: LONG not compatible with writer type: FLOAT", "/" }, - { LONG_SCHEMA, DOUBLE_SCHEMA, "reader type: LONG not compatible with writer type: DOUBLE", "/" }, + Arguments.of(LONG_SCHEMA, FLOAT_SCHEMA, "reader type: LONG not compatible with writer type: FLOAT", "/"), + Arguments.of(LONG_SCHEMA, DOUBLE_SCHEMA, "reader type: LONG not compatible with writer type: DOUBLE", "/"), - { FLOAT_SCHEMA, DOUBLE_SCHEMA, "reader type: FLOAT not compatible with writer type: DOUBLE", "/" }, + Arguments.of(FLOAT_SCHEMA, DOUBLE_SCHEMA, "reader type: FLOAT not compatible with writer type: DOUBLE", "/"), - { DOUBLE_SCHEMA, STRING_SCHEMA, "reader type: DOUBLE not compatible with writer type: STRING", "/" }, + Arguments.of(DOUBLE_SCHEMA, STRING_SCHEMA, "reader type: DOUBLE not compatible with writer type: STRING", "/"), - { FIXED_4_BYTES, STRING_SCHEMA, "reader type: FIXED not compatible with writer type: STRING", "/" }, + Arguments.of(FIXED_4_BYTES, STRING_SCHEMA, "reader type: FIXED not compatible with writer type: STRING", "/"), - { STRING_SCHEMA, BOOLEAN_SCHEMA, "reader type: STRING not compatible with writer type: BOOLEAN", "/" }, - { STRING_SCHEMA, INT_SCHEMA, "reader type: STRING not compatible with writer type: INT", "/" }, + Arguments.of(STRING_SCHEMA, BOOLEAN_SCHEMA, "reader type: STRING not compatible with writer type: BOOLEAN", + "/"), + Arguments.of(STRING_SCHEMA, INT_SCHEMA, "reader type: STRING not compatible with writer type: INT", "/"), - { BYTES_SCHEMA, NULL_SCHEMA, "reader type: BYTES not compatible with writer type: NULL", "/" }, - { BYTES_SCHEMA, INT_SCHEMA, "reader type: BYTES not compatible with writer type: INT", "/" }, + Arguments.of(BYTES_SCHEMA, NULL_SCHEMA, "reader type: BYTES not compatible with writer type: NULL", "/"), + Arguments.of(BYTES_SCHEMA, INT_SCHEMA, "reader type: BYTES not compatible with writer type: INT", "/"), - { A_INT_RECORD1, INT_SCHEMA, "reader type: RECORD not compatible with writer type: INT", "/" }, + Arguments.of(A_INT_RECORD1, INT_SCHEMA, "reader type: RECORD not compatible with writer type: INT", "/"), - { INT_ARRAY_SCHEMA, LONG_ARRAY_SCHEMA, "reader type: INT not compatible with writer type: LONG", "/items" }, - { INT_MAP_SCHEMA, INT_ARRAY_SCHEMA, "reader type: MAP not compatible with writer type: ARRAY", "/" }, - { INT_ARRAY_SCHEMA, INT_MAP_SCHEMA, "reader type: ARRAY not compatible with writer type: MAP", "/" }, - { INT_MAP_SCHEMA, LONG_MAP_SCHEMA, "reader type: INT not compatible with writer type: LONG", "/values" }, + Arguments.of(INT_ARRAY_SCHEMA, LONG_ARRAY_SCHEMA, "reader type: INT not compatible with writer type: LONG", + "/items"), + Arguments.of(INT_MAP_SCHEMA, INT_ARRAY_SCHEMA, "reader type: MAP not compatible with writer type: ARRAY", "/"), + Arguments.of(INT_ARRAY_SCHEMA, INT_MAP_SCHEMA, "reader type: ARRAY not compatible with writer type: MAP", "/"), + Arguments.of(INT_MAP_SCHEMA, LONG_MAP_SCHEMA, "reader type: INT not compatible with writer type: LONG", + "/values"), - { INT_SCHEMA, ENUM2_AB_SCHEMA, "reader type: INT not compatible with writer type: ENUM", "/" }, - { ENUM2_AB_SCHEMA, INT_SCHEMA, "reader type: ENUM not compatible with writer type: INT", "/" }, + Arguments.of(INT_SCHEMA, ENUM2_AB_SCHEMA, "reader type: INT not compatible with writer type: ENUM", "/"), + Arguments.of(ENUM2_AB_SCHEMA, INT_SCHEMA, "reader type: ENUM not compatible with writer type: INT", "/"), - { FLOAT_SCHEMA, INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA, - "reader type: FLOAT not compatible with writer type: DOUBLE", "/" }, - { LONG_SCHEMA, INT_FLOAT_UNION_SCHEMA, "reader type: LONG not compatible with writer type: FLOAT", "/" }, - { INT_SCHEMA, INT_FLOAT_UNION_SCHEMA, "reader type: INT not compatible with writer type: FLOAT", "/" }, + Arguments.of(FLOAT_SCHEMA, INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA, + "reader type: FLOAT not compatible with writer type: DOUBLE", "/3"), + Arguments.of(LONG_SCHEMA, INT_FLOAT_UNION_SCHEMA, "reader type: LONG not compatible with writer type: FLOAT", + "/1"), + Arguments.of(INT_SCHEMA, INT_FLOAT_UNION_SCHEMA, "reader type: INT not compatible with writer type: FLOAT", + "/1"), - { INT_LIST_RECORD, LONG_LIST_RECORD, "reader type: INT not compatible with writer type: LONG", - "/fields/0/type" }, + Arguments.of(INT_LIST_RECORD, LONG_LIST_RECORD, "reader type: INT not compatible with writer type: LONG", + "/fields/0/type"), - { NULL_SCHEMA, INT_SCHEMA, "reader type: NULL not compatible with writer type: INT", "/" } }; - return Arrays.asList(fields); + Arguments.of(NULL_SCHEMA, INT_SCHEMA, "reader type: NULL not compatible with writer type: INT", "/")); } - @Parameter(0) - public Schema reader; - @Parameter(1) - public Schema writer; - @Parameter(2) - public String details; - @Parameter(3) - public String location; - - @Test - public void testTypeMismatchSchemas() throws Exception { + @ParameterizedTest + @MethodSource("data") + public void testTypeMismatchSchemas(Schema reader, Schema writer, String details, String location) throws Exception { validateIncompatibleSchemas(reader, writer, SchemaIncompatibilityType.TYPE_MISMATCH, details, location); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidateDefault.java b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidateDefault.java new file mode 100644 index 00000000000..a86519c7560 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSchemaValidateDefault.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.apache.avro.generic.GenericData; +import org.apache.avro.io.Decoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.Encoder; +import org.apache.avro.io.EncoderFactory; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.reflect.ReflectDatumReader; +import org.apache.avro.reflect.ReflectDatumWriter; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Objects; +import java.util.function.Function; + +public class TestSchemaValidateDefault { + + @Test + public void valueReadWithCorrectDefaultValue() throws IOException { + + ExampleRecord writtenValue = new ExampleRecord(new ComplexValue(42L), new ComplexValue(666L)); + byte[] bytes = getSerializer(ExampleRecord.SCHEMA_WITH_ONE_FIELD).apply(writtenValue); + + ReflectDatumReader reader = new ReflectDatumReader<>(ExampleRecord.SCHEMA_WITH_ONE_FIELD, + ExampleRecord.SCHEMA_WITH_TWO_FIELDS, ReflectData.get()); + Decoder decoder = DecoderFactory.get().jsonDecoder(ExampleRecord.SCHEMA_WITH_ONE_FIELD, + new ByteArrayInputStream(bytes)); + ExampleRecord deserializedValue = reader.read(null, decoder); + + Assertions.assertNotNull(deserializedValue.getValue2(), "Null get value2"); + Assertions.assertEquals(15L, deserializedValue.getValue2().getValue()); + } + + public static Function getSerializer(Schema writerSchema) { + Objects.requireNonNull(writerSchema, "writerSchema must not be null"); + + ReflectDatumWriter writer = new ReflectDatumWriter<>(writerSchema, new ReflectData()); + return object -> { + try { + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + Encoder encoder = EncoderFactory.get().jsonEncoder(writerSchema, stream); + writer.write(object, encoder); + encoder.flush(); + return stream.toByteArray(); + } catch (IOException e) { + throw new IllegalStateException(String.format("Avro failed to encode %s to schema %s", object, writerSchema), + e); + } + }; + } + + public static Function getDeserializer(Class readClass, Schema readerSchema, Schema writerSchema) { + Objects.requireNonNull(readClass, "readClass must not be null"); + Objects.requireNonNull(readerSchema, "readerSchema must not be null"); + Objects.requireNonNull(writerSchema, "writerSchema must not be null"); + + ReflectDatumReader reader = new ReflectDatumReader<>(writerSchema, readerSchema, new ReflectData()); + return (byte[] bytes) -> { + try { + Decoder decoder = DecoderFactory.get().jsonDecoder(writerSchema, new ByteArrayInputStream(bytes)); + T readValue = reader.read(null, decoder); + return readValue; + } catch (IOException e) { + throw new IllegalStateException(String.format("Avro failed to decode %s to %s", new String(bytes), readClass), + e); + } + }; + } + + static final Schema SCHEMA = SchemaBuilder.record("org.apache.avro.TestSchemaValidateDefault.ComplexValue").fields() + .optionalLong("value").endRecord(); + + public static class ComplexValue { + + private Long value; + + public ComplexValue() { + } + + public ComplexValue(Long value) { + this.value = value; + } + + public Long getValue() { + return this.value; + } + + @Override + public String toString() { + return "{" + "\"value\": { \"long\": " + this.value + "}}"; + } + } + + public static class ExampleRecord { + public static final Schema SCHEMA_WITH_ONE_FIELD; + public static final Schema SCHEMA_WITH_TWO_FIELDS; + + static { + SCHEMA_WITH_ONE_FIELD = SchemaBuilder.record("org.apache.avro.TestSchemaValidateDefault.ExampleRecord").fields() + .name("value1").type(TestSchemaValidateDefault.SCHEMA).noDefault().endRecord(); + + GenericData.Record record = new GenericData.Record(TestSchemaValidateDefault.SCHEMA); + record.put("value", 15L); + + SCHEMA_WITH_TWO_FIELDS = SchemaBuilder.record("org.apache.avro.TestSchemaValidateDefault.ExampleRecord").fields() + .name("value1").type(TestSchemaValidateDefault.SCHEMA).noDefault().name("value2") + .type(TestSchemaValidateDefault.SCHEMA).withDefault(record).endRecord(); + } + + private ComplexValue value1; + private ComplexValue value2; + + public ExampleRecord() { + } + + public ExampleRecord(ComplexValue value1, ComplexValue value2) { + this.value1 = value1; + this.value2 = value2; + } + + public ComplexValue getValue1() { + return this.value1; + } + + public ComplexValue getValue2() { + return this.value2; + } + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestSystemLimitException.java b/lang/java/avro/src/test/java/org/apache/avro/TestSystemLimitException.java new file mode 100644 index 00000000000..0da39179506 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestSystemLimitException.java @@ -0,0 +1,164 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro; + +import static org.apache.avro.SystemLimitException.*; +import static org.junit.jupiter.api.Assertions.*; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; + +import java.util.function.Function; + +public class TestSystemLimitException { + + /** Delegated here for package visibility. */ + public static final int MAX_ARRAY_VM_LIMIT = SystemLimitException.MAX_ARRAY_VM_LIMIT; + + public static final String ERROR_NEGATIVE = "Malformed data. Length is negative: -1"; + public static final String ERROR_VM_LIMIT_BYTES = "Cannot read arrays longer than " + MAX_ARRAY_VM_LIMIT + + " bytes in Java library"; + public static final String ERROR_VM_LIMIT_COLLECTION = "Cannot read collections larger than " + MAX_ARRAY_VM_LIMIT + + " items in Java library"; + public static final String ERROR_VM_LIMIT_STRING = "Cannot read strings longer than " + MAX_ARRAY_VM_LIMIT + " bytes"; + + /** Delegated here for package visibility. */ + public static void resetLimits() { + SystemLimitException.resetLimits(); + } + + @AfterEach + void reset() { + System.clearProperty(MAX_BYTES_LENGTH_PROPERTY); + System.clearProperty(MAX_COLLECTION_LENGTH_PROPERTY); + System.clearProperty(MAX_STRING_LENGTH_PROPERTY); + resetLimits(); + } + + /** + * A helper method that tests the consistent limit handling from system + * properties. + * + * @param f The function to be tested. + * @param sysProperty The system property used to control the custom limit. + * @param errorVmLimit The error message used when the property would be + * over the VM limit. + * @param errorCustomLimit The error message used when the property would be + * over the custom limit of 1000. + */ + void helpCheckSystemLimits(Function f, String sysProperty, String errorVmLimit, + String errorCustomLimit) { + // Correct values pass through + assertEquals(0, f.apply(0L)); + assertEquals(1024, f.apply(1024L)); + assertEquals(MAX_ARRAY_VM_LIMIT, f.apply((long) MAX_ARRAY_VM_LIMIT)); + + // Values that exceed the default system limits throw exceptions + Exception ex = assertThrows(UnsupportedOperationException.class, () -> f.apply(Long.MAX_VALUE)); + assertEquals(errorVmLimit, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, () -> f.apply((long) MAX_ARRAY_VM_LIMIT + 1)); + assertEquals(errorVmLimit, ex.getMessage()); + ex = assertThrows(AvroRuntimeException.class, () -> f.apply(-1L)); + assertEquals(ERROR_NEGATIVE, ex.getMessage()); + + // Setting the system property to provide a custom limit. + System.setProperty(sysProperty, Long.toString(1000L)); + resetLimits(); + + // Correct values pass through + assertEquals(0, f.apply(0L)); + assertEquals(102, f.apply(102L)); + + // Values that exceed the custom system limits throw exceptions + ex = assertThrows(UnsupportedOperationException.class, () -> f.apply((long) MAX_ARRAY_VM_LIMIT + 1)); + assertEquals(errorVmLimit, ex.getMessage()); + ex = assertThrows(SystemLimitException.class, () -> f.apply(1024L)); + assertEquals(errorCustomLimit, ex.getMessage()); + ex = assertThrows(AvroRuntimeException.class, () -> f.apply(-1L)); + assertEquals(ERROR_NEGATIVE, ex.getMessage()); + } + + @Test + void testCheckMaxBytesLength() { + helpCheckSystemLimits(SystemLimitException::checkMaxBytesLength, MAX_BYTES_LENGTH_PROPERTY, ERROR_VM_LIMIT_BYTES, + "Bytes length 1024 exceeds maximum allowed"); + } + + @Test + void testCheckMaxCollectionLengthFromZero() { + helpCheckSystemLimits(l -> checkMaxCollectionLength(0L, l), MAX_COLLECTION_LENGTH_PROPERTY, + ERROR_VM_LIMIT_COLLECTION, "Collection length 1024 exceeds maximum allowed"); + } + + @Test + void testCheckMaxStringLength() { + helpCheckSystemLimits(SystemLimitException::checkMaxStringLength, MAX_STRING_LENGTH_PROPERTY, ERROR_VM_LIMIT_STRING, + "String length 1024 exceeds maximum allowed"); + } + + @Test + void testCheckMaxCollectionLengthFromNonZero() { + // Correct values pass through + assertEquals(10, checkMaxCollectionLength(10L, 0L)); + assertEquals(MAX_ARRAY_VM_LIMIT, checkMaxCollectionLength(10L, MAX_ARRAY_VM_LIMIT - 10L)); + assertEquals(MAX_ARRAY_VM_LIMIT, checkMaxCollectionLength(MAX_ARRAY_VM_LIMIT - 10L, 10L)); + + // Values that exceed the default system limits throw exceptions + Exception ex = assertThrows(UnsupportedOperationException.class, + () -> checkMaxCollectionLength(10L, MAX_ARRAY_VM_LIMIT - 9L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, + () -> checkMaxCollectionLength(SystemLimitException.MAX_ARRAY_VM_LIMIT - 9L, 10L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(10L, Long.MAX_VALUE - 10L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(Long.MAX_VALUE - 10L, 10L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Overflow that adds to negative + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(10L, Long.MAX_VALUE)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(Long.MAX_VALUE, 10L)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + ex = assertThrows(AvroRuntimeException.class, () -> checkMaxCollectionLength(10L, -1L)); + assertEquals(ERROR_NEGATIVE, ex.getMessage()); + ex = assertThrows(AvroRuntimeException.class, () -> checkMaxCollectionLength(-1L, 10L)); + assertEquals(ERROR_NEGATIVE, ex.getMessage()); + + // Setting the system property to provide a custom limit. + System.setProperty(MAX_COLLECTION_LENGTH_PROPERTY, Long.toString(1000L)); + resetLimits(); + + // Correct values pass through + assertEquals(10, checkMaxCollectionLength(10L, 0L)); + assertEquals(102, checkMaxCollectionLength(10L, 92L)); + assertEquals(102, checkMaxCollectionLength(92L, 10L)); + + // Values that exceed the custom system limits throw exceptions + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(MAX_ARRAY_VM_LIMIT, 1)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + ex = assertThrows(UnsupportedOperationException.class, () -> checkMaxCollectionLength(1, MAX_ARRAY_VM_LIMIT)); + assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + ex = assertThrows(SystemLimitException.class, () -> checkMaxCollectionLength(999, 25)); + assertEquals("Collection length 1024 exceeds maximum allowed", ex.getMessage()); + ex = assertThrows(SystemLimitException.class, () -> checkMaxCollectionLength(25, 999)); + assertEquals("Collection length 1024 exceeds maximum allowed", ex.getMessage()); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/TestUnionError.java b/lang/java/avro/src/test/java/org/apache/avro/TestUnionError.java new file mode 100644 index 00000000000..7f5e48fb962 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/TestUnionError.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro; + +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.BinaryDecoder; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.EncoderFactory; + +import org.junit.jupiter.api.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +public class TestUnionError { + + @Test + void unionErrorMessage() throws IOException { + String writerSchemaJson = " {\n" + " \"type\" : \"record\",\n" + + " \"name\" : \"C\",\n" + " \"fields\" : [ {\n" + + " \"name\" : \"c\",\n" + " \"type\" : [ {\n" + + " \"type\" : \"record\",\n" + " \"name\" : \"A\",\n" + + " \"fields\" : [ {\n" + " \"name\" : \"amount\",\n" + + " \"type\" : \"int\"\n" + " } ]\n" + " }, {\n" + + " \"type\" : \"record\",\n" + " \"name\" : \"B\",\n" + + " \"fields\" : [ {\n" + " \"name\" : \"amount1\",\n" + + " \"type\" : \"int\"\n" + " } ]\n" + " } ]\n" + + " } ]\n" + " }"; + Schema writerSchema = new Schema.Parser().parse(writerSchemaJson); + + String readerSchemaJson = " {\n" + " \"type\" : \"record\",\n" + " \"name\" : \"C1\",\n" + + " \"fields\" : [ {\n" + " \"name\" : \"c\",\n" + + " \"type\" : [ {\n" + " \"type\" : \"record\",\n" + + " \"name\" : \"A\",\n" + " \"fields\" : [ {\n" + + " \"name\" : \"amount\",\n" + " \"type\" : \"int\"\n" + + " } ]\n" + " }, \"float\" ]\n" + " } ]\n" + " }"; + Schema readerSchema = new Schema.Parser().parse(readerSchemaJson); + + List unionSchemas = writerSchema.getField("c").schema().getTypes(); + + GenericRecord r = new GenericData.Record(writerSchema); + GenericRecord b = new GenericData.Record(unionSchemas.get(1)); + b.put("amount1", 12); + r.put("c", b); + + ByteArrayOutputStream outs = new ByteArrayOutputStream(); + GenericDatumWriter datumWriter = new GenericDatumWriter<>(writerSchema); + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(outs, null); + datumWriter.write(r, encoder); + encoder.flush(); + + InputStream ins = new ByteArrayInputStream(outs.toByteArray()); + BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(ins, null); + + GenericDatumReader datumReader = new GenericDatumReader<>(writerSchema, readerSchema); + AvroTypeException avroException = assertThrows(AvroTypeException.class, () -> datumReader.read(null, decoder)); + assertEquals("Found B, expecting union[A, float]", avroException.getMessage()); + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/file/TestAllCodecs.java b/lang/java/avro/src/test/java/org/apache/avro/file/TestAllCodecs.java index 491a7e3f713..ef928db6f47 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/file/TestAllCodecs.java +++ b/lang/java/avro/src/test/java/org/apache/avro/file/TestAllCodecs.java @@ -18,43 +18,27 @@ package org.apache.avro.file; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import java.io.IOException; import java.nio.ByteBuffer; -import java.util.Arrays; -import java.util.Collection; +import java.util.stream.Stream; -import static org.junit.Assert.assertTrue; - -@RunWith(Parameterized.class) public class TestAllCodecs { - @Parameterized.Parameters(name = "{index}: codec={0}") - public static Collection data() { - return Arrays.asList(new Object[][] { { "bzip2", BZip2Codec.class }, { "zstandard", ZstandardCodec.class }, - { "null", NullCodec.class }, { "xz", XZCodec.class }, { "snappy", SnappyCodec.class }, - { "deflate", DeflateCodec.class }, }); - } - - @Parameterized.Parameter(0) - public String codec; - - @Parameterized.Parameter(1) - public Class codecClass; - - @Test - public void testCodec() throws IOException { + @ParameterizedTest + @MethodSource("codecTypes") + void codec(String codec, Class codecClass) throws IOException { int inputSize = 500_000; byte[] input = generateTestData(inputSize); Codec codecInstance = CodecFactory.fromString(codec).createInstance(); - assertTrue(codecClass.isInstance(codecInstance)); - assertTrue(codecInstance.getName().equals(codec)); + Assertions.assertTrue(codecClass.isInstance(codecInstance)); + Assertions.assertTrue(codecInstance.getName().equals(codec)); ByteBuffer inputByteBuffer = ByteBuffer.wrap(input); ByteBuffer compressedBuffer = codecInstance.compress(inputByteBuffer); @@ -62,28 +46,30 @@ public void testCodec() throws IOException { int compressedSize = compressedBuffer.remaining(); // Make sure something returned - assertTrue(compressedSize > 0); + Assertions.assertTrue(compressedSize > 0); // While the compressed size could in many real cases // *increase* compared to the input size, our input data // is extremely easy to compress and all Avro's compression algorithms // should have a compression ratio greater than 1 (except 'null'). - assertTrue(compressedSize < inputSize || codec.equals("null")); + Assertions.assertTrue(compressedSize < inputSize || codec.equals("null")); // Decompress the data ByteBuffer decompressedBuffer = codecInstance.decompress(compressedBuffer); // Validate the the input and output are equal. inputByteBuffer.rewind(); - Assert.assertEquals(decompressedBuffer, inputByteBuffer); + Assertions.assertEquals(inputByteBuffer, decompressedBuffer); } - @Test - public void testCodecSlice() throws IOException { + @ParameterizedTest + @MethodSource("codecTypes") + void codecSlice(String codec, Class codecClass) throws IOException { int inputSize = 500_000; byte[] input = generateTestData(inputSize); Codec codecInstance = CodecFactory.fromString(codec).createInstance(); + Assertions.assertTrue(codecClass.isInstance(codecInstance)); ByteBuffer partialBuffer = ByteBuffer.wrap(input); partialBuffer.position(17); @@ -94,7 +80,7 @@ public void testCodecSlice() throws IOException { int compressedSize = compressedBuffer.remaining(); // Make sure something returned - assertTrue(compressedSize > 0); + Assertions.assertTrue(compressedSize > 0); // Create a slice from the compressed buffer ByteBuffer sliceBuffer = ByteBuffer.allocate(compressedSize + 100); @@ -108,7 +94,13 @@ public void testCodecSlice() throws IOException { // Validate the the input and output are equal. inputByteBuffer.rewind(); - Assert.assertEquals(decompressedBuffer, inputByteBuffer); + Assertions.assertEquals(inputByteBuffer, decompressedBuffer); + } + + public static Stream codecTypes() { + return Stream.of(Arguments.of("bzip2", BZip2Codec.class), Arguments.of("zstandard", ZstandardCodec.class), + Arguments.of("null", NullCodec.class), Arguments.of("xz", XZCodec.class), + Arguments.of("snappy", SnappyCodec.class), Arguments.of("deflate", DeflateCodec.class)); } // Generate some test data that will compress easily diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataArrayTest.java b/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataArrayTest.java new file mode 100644 index 00000000000..a4ffebac02d --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataArrayTest.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.generic; + +import org.apache.avro.Schema; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class GenericDataArrayTest { + + @Test + void test() { + GenericData.Array array = new GenericData.Array<>(10, + Schema.createArray(Schema.create(Schema.Type.STRING))); + array.add("One"); + array.add("Two"); + array.add("Two"); + array.add("Three"); + array.add(4, "Four"); + array.remove(1); + Assertions.assertEquals(4, array.size()); + Assertions.assertEquals("One", array.get(0)); + Assertions.assertEquals("Two", array.get(1)); + Assertions.assertEquals("Three", array.get(2)); + Assertions.assertEquals("Four", array.get(3)); + } + +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/PrimitivesArraysTest.java b/lang/java/avro/src/test/java/org/apache/avro/generic/PrimitivesArraysTest.java new file mode 100644 index 00000000000..7d199bf92c8 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/PrimitivesArraysTest.java @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.generic; + +import org.apache.avro.Schema; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +class PrimitivesArraysTest { + + @Test + void booleanArray() { + PrimitivesArrays.BooleanArray ba = new PrimitivesArrays.BooleanArray(4, + Schema.createArray(Schema.create(Schema.Type.BOOLEAN))); + + Assertions.assertEquals(0, ba.size()); + for (int i = 1; i < 100; i++) { + if (i % 3 == 0 || i % 5 == 0) { + ba.add(true); + } else { + ba.add(false); + } + } + Assertions.assertEquals(99, ba.size()); + for (int i = 1; i < 100; i++) { + if (i % 3 == 0 || i % 5 == 0) { + Assertions.assertTrue(ba.get(i - 1), "Error for " + i); + } else { + Assertions.assertFalse(ba.get(i - 1), "Error for " + i); + } + } + Assertions.assertFalse(ba.remove(12)); + Assertions.assertEquals(98, ba.size()); + for (int i = 13; i < 99; i++) { + if ((i + 1) % 3 == 0 || (i + 1) % 5 == 0) { + Assertions.assertTrue(ba.get(i - 1), "After delete, Error for " + i); + } else { + Assertions.assertFalse(ba.get(i - 1), "After delete, Error for " + i); + } + } + + ba.add(12, false); + Assertions.assertEquals(99, ba.size()); + for (int i = 1; i < 100; i++) { + if (i % 3 == 0 || i % 5 == 0) { + Assertions.assertTrue(ba.get(i - 1), "Error for " + i); + } else { + Assertions.assertFalse(ba.get(i - 1), "Error for " + i); + } + } + Assertions.assertFalse(ba.remove(12)); + ba.add(12, true); + for (int i = 1; i < 100; i++) { + if (i % 3 == 0 || i % 5 == 0 || i == 13) { + Assertions.assertTrue(ba.get(i - 1), "Error for " + i); + } else { + Assertions.assertFalse(ba.get(i - 1), "Error for " + i); + } + } + ba.add(99, true); + Assertions.assertTrue(ba.get(99), "Error for 99"); + ba.remove(99); + ba.reverse(); + for (int i = 1; i < 100; i++) { + if (i % 3 == 0 || i % 5 == 0 || i == 13) { + Assertions.assertTrue(ba.get(99 - i), "Error for " + i); + } else { + Assertions.assertFalse(ba.get(99 - i), "Error for " + i); + } + } + } + + @Test + void booleanArrayIterator() { + PrimitivesArrays.BooleanArray ba = new PrimitivesArrays.BooleanArray(4, + Schema.createArray(Schema.create(Schema.Type.BOOLEAN))); + boolean[] model = new boolean[] { true, false, false, true, true, true, false, false, true, false, false }; + for (boolean x : model) { + ba.add(x); + } + Assertions.assertEquals(model.length, ba.size()); + int index = 0; + for (Boolean b : ba) { + Assertions.assertEquals(model[index], b); + index++; + } + } + + @Test + void intArray() { + final PrimitivesArrays.IntArray intArray = new PrimitivesArrays.IntArray(4, + Schema.createArray(Schema.create(Schema.Type.INT))); + for (int i = 1; i <= 100; i++) { + intArray.add(i); + } + Assertions.assertEquals(100, intArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i, intArray.get(i - 1)); + } + + int expectedValue = 1; + for (Integer value : intArray) { + Assertions.assertEquals(expectedValue, value); + expectedValue++; + } + + intArray.remove(40); + Assertions.assertEquals(99, intArray.size()); + for (int i = 1; i <= 99; i++) { + if (i <= 40) { + Assertions.assertEquals(i, intArray.get(i - 1)); + } else { + Assertions.assertEquals(i + 1, intArray.get(i - 1)); + } + } + intArray.add(40, 41); + Assertions.assertEquals(100, intArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i, intArray.get(i - 1)); + } + intArray.set(40, 25); + Assertions.assertEquals(25, intArray.get(40)); + + Assertions.assertEquals(0, intArray.peek()); + intArray.set(40, 41); + intArray.reverse(); + Assertions.assertEquals(100, intArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(101 - i, intArray.get(i - 1)); + } + } + + @Test + void longArray() { + final PrimitivesArrays.LongArray longArray = new PrimitivesArrays.LongArray(4, + Schema.createArray(Schema.create(Schema.Type.LONG))); + for (long i = 1; i <= 100; i++) { + longArray.add(i); + } + Assertions.assertEquals(100l, longArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i, longArray.get(i - 1)); + } + + int expectedValue = 1; + for (Long value : longArray) { + Assertions.assertEquals(expectedValue, value); + expectedValue++; + } + + longArray.remove(40); + Assertions.assertEquals(99, longArray.size()); + for (int i = 1; i <= 99; i++) { + if (i <= 40) { + Assertions.assertEquals(i, longArray.get(i - 1)); + } else { + Assertions.assertEquals(i + 1, longArray.get(i - 1)); + } + } + longArray.add(40, 41); + Assertions.assertEquals(100, longArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i, longArray.get(i - 1)); + } + longArray.set(40, 25); + Assertions.assertEquals(25, longArray.get(40)); + + Assertions.assertEquals(0, longArray.peek()); + longArray.set(40, 41); + longArray.reverse(); + Assertions.assertEquals(100, longArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(101 - i, longArray.get(i - 1)); + } + } + + @Test + void floatArray() { + final PrimitivesArrays.FloatArray floatArray = new PrimitivesArrays.FloatArray(4, + Schema.createArray(Schema.create(Schema.Type.FLOAT))); + for (int i = 1; i <= 100; i++) { + floatArray.add(i * 3.3f); + } + Assertions.assertEquals(100, floatArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i * 3.3f, floatArray.get(i - 1)); + } + + float expectedValue = 1.0f; + for (Float value : floatArray) { + Assertions.assertEquals(expectedValue * 3.3f, value); + expectedValue++; + } + + floatArray.remove(40); + Assertions.assertEquals(99, floatArray.size()); + for (int i = 1; i <= 99; i++) { + if (i <= 40) { + Assertions.assertEquals(i * 3.3f, floatArray.get(i - 1)); + } else { + Assertions.assertEquals((i + 1) * 3.3f, floatArray.get(i - 1)); + } + } + floatArray.add(40, 41 * 3.3f); + Assertions.assertEquals(100, floatArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i * 3.3f, floatArray.get(i - 1)); + } + floatArray.set(40, 25.2f); + Assertions.assertEquals(25.2f, floatArray.get(40)); + + Assertions.assertEquals(0.0f, floatArray.peek()); + floatArray.set(40, 41 * 3.3f); + floatArray.reverse(); + Assertions.assertEquals(100, floatArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals((101 - i) * 3.3f, floatArray.get(i - 1)); + } + } + + @Test + void doubleArray() { + final PrimitivesArrays.DoubleArray doubleArray = new PrimitivesArrays.DoubleArray(4, + Schema.createArray(Schema.create(Schema.Type.DOUBLE))); + for (int i = 1; i <= 100; i++) { + doubleArray.add(i * 3.0d); + } + Assertions.assertEquals(100, doubleArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i * 3.0d, doubleArray.get(i - 1)); + } + + double expectedValue = 1.0f; + for (Double value : doubleArray) { + Assertions.assertEquals(expectedValue * 3.0d, value); + expectedValue++; + } + + doubleArray.remove(40); + Assertions.assertEquals(99, doubleArray.size()); + for (int i = 1; i <= 99; i++) { + if (i <= 40) { + Assertions.assertEquals(i * 3.0d, doubleArray.get(i - 1)); + } else { + Assertions.assertEquals((i + 1) * 3.0d, doubleArray.get(i - 1)); + } + } + doubleArray.add(40, 41 * 3.0d); + Assertions.assertEquals(100, doubleArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals(i * 3.0d, doubleArray.get(i - 1)); + } + doubleArray.set(40, 25.2d); + Assertions.assertEquals(25.2d, doubleArray.get(40)); + + Assertions.assertEquals(0.0d, doubleArray.peek()); + doubleArray.set(40, 41 * 3.0d); + doubleArray.reverse(); + Assertions.assertEquals(100, doubleArray.size()); + for (int i = 1; i <= 100; i++) { + Assertions.assertEquals((101 - i) * 3.0d, doubleArray.get(i - 1)); + } + } +} diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java index bca151c15df..20c82179561 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericData.java @@ -157,9 +157,8 @@ void testEquals() { } @Test - public void testMapKeyEquals() { - Schema mapSchema = new Schema.Parser().parse("{\"type\": \"map\", \"values\": \"string\"}"); - Field myMapField = new Field("my_map", Schema.createMap(mapSchema), null, null); + public void testMapKeyEqualsStringAndUtf8Compatibility() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); schema.setFields(Arrays.asList(myMapField)); GenericRecord r0 = new GenericData.Record(schema); @@ -178,9 +177,8 @@ public void testMapKeyEquals() { } @Test - public void testMapValuesEquals() { - Schema mapSchema = new Schema.Parser().parse("{\"type\": \"map\", \"values\": \"string\"}"); - Field myMapField = new Field("my_map", Schema.createMap(mapSchema), null, null); + public void testMapValuesEqualsStringAndUtf8Compatibility() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); schema.setFields(Arrays.asList(myMapField)); GenericRecord r0 = new GenericData.Record(schema); @@ -198,6 +196,117 @@ public void testMapValuesEquals() { assertEquals(r1, r0); } + @Test + public void testEqualsEmptyMaps() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + r0.put("my_map", new HashMap<>()); + GenericRecord r1 = new GenericData.Record(schema); + r1.put("my_map", new HashMap<>()); + + assertEquals(r0, r1); + assertEquals(r1, r0); + } + + @Test + public void testEqualsEmptyMapAndNonEmptyMap() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + r0.put("my_map", new HashMap<>()); + GenericRecord r1 = new GenericData.Record(schema); + HashMap pair1 = new HashMap<>(); + pair1.put("keyOne", "valueOne"); + r1.put("my_map", pair1); + + assertNotEquals(r0, r1); + assertNotEquals(r1, r0); + } + + @Test + public void testEqualsMapAndSubset() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + HashMap m1 = new HashMap<>(); + m1.put("keyOne", "valueOne"); + m1.put("keyTwo", "valueTwo"); + r0.put("my_map", m1); + + GenericRecord r1 = new GenericData.Record(schema); + HashMap m2 = new HashMap<>(); + m2.put("keyOne", "valueOne"); + r1.put("my_map", m2); + + assertNotEquals(r0, r1); + assertNotEquals(r1, r0); + } + + @Test + public void testEqualsMapAndSameSizeMapWithDifferentKeys() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + HashMap m1 = new HashMap<>(); + m1.put("keyOne", "valueOne"); + r0.put("my_map", m1); + + GenericRecord r1 = new GenericData.Record(schema); + HashMap m2 = new HashMap<>(); + m2.put("keyTwo", "valueTwo"); + r1.put("my_map", m2); + + assertNotEquals(r0, r1); + assertNotEquals(r1, r0); + } + + @Test + public void testEqualsMapAndSameSizeMapWithDifferentValues() { + Field myMapField = new Field("my_map", Schema.createMap(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myMapField)); + + GenericRecord r0 = new GenericData.Record(schema); + HashMap m1 = new HashMap<>(); + m1.put("keyOne", "valueOne"); + r0.put("my_map", m1); + + GenericRecord r1 = new GenericData.Record(schema); + HashMap m2 = new HashMap<>(); + m2.put("keyOne", "valueTwo"); + r1.put("my_map", m2); + + assertNotEquals(r0, r1); + assertNotEquals(r1, r0); + } + + @Test + public void testArrayValuesEqualsStringAndUtf8Compatibility() { + Field myArrayField = new Field("my_array", Schema.createArray(Schema.create(Schema.Type.STRING)), null, null); + Schema schema = Schema.createRecord("my_record", "doc", "mytest", false); + schema.setFields(Arrays.asList(myArrayField)); + GenericRecord r0 = new GenericData.Record(schema); + GenericRecord r1 = new GenericData.Record(schema); + + List array1 = Arrays.asList("valueOne"); + r0.put("my_array", array1); + + List array2 = Arrays.asList(new Utf8("valueOne")); + r1.put("my_array", array2); + + assertEquals(r0, r1); + assertEquals(r1, r0); + } + private Schema recordSchema() { List fields = new ArrayList<>(); fields.add(new Field("anArray", Schema.createArray(Schema.create(Type.STRING)), null, null)); diff --git a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericLogicalTypes.java b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericLogicalTypes.java index 3d5e2300d9d..25a838db335 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericLogicalTypes.java +++ b/lang/java/avro/src/test/java/org/apache/avro/generic/TestGenericLogicalTypes.java @@ -18,11 +18,6 @@ package org.apache.avro.generic; -import static org.hamcrest.Matchers.is; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotSame; -import static org.hamcrest.MatcherAssert.assertThat; - import java.io.File; import java.io.IOException; import java.math.BigDecimal; @@ -35,8 +30,10 @@ import java.util.Collections; import java.util.List; import java.util.UUID; + import org.apache.avro.Conversion; import org.apache.avro.Conversions; +import org.apache.avro.CustomType; import org.apache.avro.LogicalType; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; @@ -51,6 +48,11 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.is; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; + public class TestGenericLogicalTypes { @TempDir @@ -402,4 +404,53 @@ public void writeLocalTimestampMicros() throws IOException { assertEquals(expected, read(GenericData.get().createDatumReader(timestampSchema), test), "Should read LocalDateTime as longs"); } + + @Test + public void testReadAutomaticallyRegisteredUri() throws IOException { + Schema stringSchema = Schema.create(Schema.Type.STRING); + GenericData.setStringType(stringSchema, GenericData.StringType.String); + LogicalType customType = LogicalTypes.getCustomRegisteredTypes().get("custom").fromSchema(stringSchema); + Schema customTypeSchema = customType.addToSchema(Schema.create(Schema.Type.STRING)); + + CustomType ct1 = new CustomType("foo"); + CustomType ct2 = new CustomType("bar"); + List expected = Arrays.asList(ct1, ct2); + + Conversion conversion = GENERIC.getConversionFor(customType); + + // use the conversion directly instead of relying on the write side + CharSequence ct1String = conversion.toCharSequence(ct1, stringSchema, customType); + CharSequence ct2String = conversion.toCharSequence(ct2, stringSchema, customType); + + File test = write(stringSchema, ct1String, ct2String); + assertEquals(expected, read(GENERIC.createDatumReader(customTypeSchema), test), + "Should convert string to CustomType"); + } + + @Test + public void testWriteAutomaticallyRegisteredUri() throws IOException { + Schema stringSchema = Schema.create(Schema.Type.STRING); + GenericData.setStringType(stringSchema, GenericData.StringType.String); + LogicalType customType = LogicalTypes.getCustomRegisteredTypes().get("custom").fromSchema(stringSchema); + Schema customTypeSchema = customType.addToSchema(Schema.create(Schema.Type.STRING)); + + CustomType ct1 = new CustomType("foo"); + CustomType ct2 = new CustomType("bar"); + + Conversion conversion = GENERIC.getConversionFor(customType); + + // use the conversion directly instead of relying on the write side + CharSequence ct1String = conversion.toCharSequence(ct1, stringSchema, customType); + CharSequence ct2String = conversion.toCharSequence(ct2, stringSchema, customType); + List expected = Arrays.asList(ct1String, ct2String); + + File test = write(GENERIC, customTypeSchema, ct1, ct2); + + // Note that this test still cannot read strings using the logical type + // schema, as all GenericData instances have the logical type and the + // conversions loaded. That's why this final assert is slightly different. + + assertEquals(expected, read(GenericData.get().createDatumReader(stringSchema), test), + "Should read CustomType as strings"); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java index ccd627cb944..167cd724630 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryData.java @@ -18,6 +18,7 @@ package org.apache.avro.io; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import org.junit.jupiter.api.Test; @@ -39,4 +40,24 @@ void skipLong() { assertEquals(nextIndex, 10); } + @Test + void testIntLongVleEquality() { + byte[] intResult = new byte[9]; + byte[] longResult = new byte[9]; + BinaryData.encodeInt(0, intResult, 0); + BinaryData.encodeLong(0, longResult, 0); + assertArrayEquals(intResult, longResult); + BinaryData.encodeInt(42, intResult, 0); + BinaryData.encodeLong(42, longResult, 0); + assertArrayEquals(intResult, longResult); + BinaryData.encodeInt(-24, intResult, 0); + BinaryData.encodeLong(-24, longResult, 0); + assertArrayEquals(intResult, longResult); + BinaryData.encodeInt(Integer.MAX_VALUE, intResult, 0); + BinaryData.encodeLong(Integer.MAX_VALUE, longResult, 0); + assertArrayEquals(intResult, longResult); + BinaryData.encodeInt(Integer.MIN_VALUE, intResult, 0); + BinaryData.encodeLong(Integer.MIN_VALUE, longResult, 0); + assertArrayEquals(intResult, longResult); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java index fe405cfb9d2..6010fc9c69f 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBinaryDecoder.java @@ -17,56 +17,49 @@ */ package org.apache.avro.io; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.EOFException; -import java.io.IOException; -import java.io.InputStream; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; import org.apache.avro.AvroRuntimeException; import org.apache.avro.Schema; +import org.apache.avro.SystemLimitException; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.util.ByteBufferInputStream; import org.apache.avro.util.ByteBufferOutputStream; import org.apache.avro.util.RandomData; import org.apache.avro.util.Utf8; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.junit.runners.Parameterized.Parameters; - -@RunWith(Parameterized.class) + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Arrays; + +import static org.apache.avro.TestSystemLimitException.*; + public class TestBinaryDecoder { // prime number buffer size so that looping tests hit the buffer edge // at different points in the loop. DecoderFactory factory = new DecoderFactory().configureDecoderBufferSize(521); - private boolean useDirect = false; - static EncoderFactory e_factory = EncoderFactory.get(); - public TestBinaryDecoder(boolean useDirect) { - this.useDirect = useDirect; - } - - @Parameters - public static Collection data() { - return Arrays.asList(new Object[][] { { true }, { false }, }); - } + static EncoderFactory e_factory = EncoderFactory.get(); - private Decoder newDecoderWithNoData() { - return newDecoder(new byte[0]); + private Decoder newDecoderWithNoData(boolean useDirect) { + return newDecoder(new byte[0], useDirect); } - private BinaryDecoder newDecoder(byte[] bytes, int start, int len) { - return this.newDecoder(bytes, start, len, null); + private BinaryDecoder newDecoder(byte[] bytes, int start, int len, boolean useDirect) { + return this.newDecoder(bytes, start, len, null, useDirect); } - private BinaryDecoder newDecoder(byte[] bytes, int start, int len, BinaryDecoder reuse) { + private BinaryDecoder newDecoder(byte[] bytes, int start, int len, BinaryDecoder reuse, boolean useDirect) { if (useDirect) { final ByteArrayInputStream input = new ByteArrayInputStream(bytes, start, len); return factory.directBinaryDecoder(input, reuse); @@ -75,11 +68,11 @@ private BinaryDecoder newDecoder(byte[] bytes, int start, int len, BinaryDecoder } } - private BinaryDecoder newDecoder(InputStream in) { - return this.newDecoder(in, null); + private BinaryDecoder newDecoder(InputStream in, boolean useDirect) { + return this.newDecoder(in, null, useDirect); } - private BinaryDecoder newDecoder(InputStream in, BinaryDecoder reuse) { + private BinaryDecoder newDecoder(InputStream in, BinaryDecoder reuse, boolean useDirect) { if (useDirect) { return factory.directBinaryDecoder(in, reuse); } else { @@ -87,67 +80,93 @@ private BinaryDecoder newDecoder(InputStream in, BinaryDecoder reuse) { } } - private BinaryDecoder newDecoder(byte[] bytes, BinaryDecoder reuse) { - if (this.useDirect) { + private BinaryDecoder newDecoder(byte[] bytes, BinaryDecoder reuse, boolean useDirect) { + if (useDirect) { return this.factory.directBinaryDecoder(new ByteArrayInputStream(bytes), reuse); } else { return factory.binaryDecoder(bytes, reuse); } } - private BinaryDecoder newDecoder(byte[] bytes) { - return this.newDecoder(bytes, null); + private BinaryDecoder newDecoder(byte[] bytes, boolean useDirect) { + return this.newDecoder(bytes, null, useDirect); + } + + /** + * Create a decoder for simulating reading corrupt, unexpected or out-of-bounds + * data. + * + * @return a {@link org.apache.avro.io.BinaryDecoder that has been initialized + * on a byte array containing the sequence of encoded longs in order. + */ + private BinaryDecoder newDecoder(boolean useDirect, long... values) throws IOException { + try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(baos, null); + for (long v : values) + encoder.writeLong(v); + encoder.flush(); + return newDecoder(baos.toByteArray(), useDirect); + } } /** Verify EOFException throw at EOF */ - @Test(expected = EOFException.class) - public void testEOFBoolean() throws IOException { - newDecoderWithNoData().readBoolean(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofBoolean(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readBoolean()); } - @Test(expected = EOFException.class) - public void testEOFInt() throws IOException { - newDecoderWithNoData().readInt(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofInt(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readInt()); } - @Test(expected = EOFException.class) - public void testEOFLong() throws IOException { - newDecoderWithNoData().readLong(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofLong(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readLong()); } - @Test(expected = EOFException.class) - public void testEOFFloat() throws IOException { - newDecoderWithNoData().readFloat(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofFloat(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readFloat()); } - @Test(expected = EOFException.class) - public void testEOFDouble() throws IOException { - newDecoderWithNoData().readDouble(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofDouble(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readDouble()); } - @Test(expected = EOFException.class) - public void testEOFBytes() throws IOException { - newDecoderWithNoData().readBytes(null); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofBytes(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readBytes(null)); } - @Test(expected = EOFException.class) - public void testEOFString() throws IOException { - newDecoderWithNoData().readString(new Utf8("a")); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofString(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readString(new Utf8("a"))); } - @Test(expected = EOFException.class) - public void testEOFFixed() throws IOException { - newDecoderWithNoData().readFixed(new byte[1]); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofFixed(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readFixed(new byte[1])); } - @Test(expected = EOFException.class) - public void testEOFEnum() throws IOException { - newDecoderWithNoData().readEnum(); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eofEnum(boolean useDirect) { + Assertions.assertThrows(EOFException.class, () -> newDecoderWithNoData(useDirect).readEnum()); } @Test - public void testReuse() throws IOException { + void reuse() throws IOException { ByteBufferOutputStream bbo1 = new ByteBufferOutputStream(); ByteBufferOutputStream bbo2 = new ByteBufferOutputStream(); byte[] b1 = new byte[] { 1, 2 }; @@ -162,11 +181,11 @@ public void testReuse() throws IOException { DirectBinaryDecoder d = new DirectBinaryDecoder(new ByteBufferInputStream(bbo1.getBufferList())); ByteBuffer bb1 = d.readBytes(null); - Assert.assertEquals(b1.length, bb1.limit() - bb1.position()); + Assertions.assertEquals(b1.length, bb1.limit() - bb1.position()); d.configure(new ByteBufferInputStream(bbo2.getBufferList())); ByteBuffer bb2 = d.readBytes(null); - Assert.assertEquals(b1.length, bb2.limit() - bb2.position()); + Assertions.assertEquals(b1.length, bb2.limit() - bb2.position()); } @@ -175,7 +194,7 @@ public void testReuse() throws IOException { private static final int count = 200; private static final ArrayList records = new ArrayList<>(count); - @BeforeClass + @BeforeAll public static void generateData() throws IOException { int seed = (int) System.currentTimeMillis(); // note some tests (testSkipping) rely on this explicitly @@ -199,8 +218,9 @@ public static void generateData() throws IOException { data = baos.toByteArray(); } - @Test - public void testDecodeFromSources() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void decodeFromSources(boolean useDirect) throws IOException { GenericDatumReader reader = new GenericDatumReader<>(); reader.setSchema(schema); @@ -208,81 +228,82 @@ public void testDecodeFromSources() throws IOException { ByteArrayInputStream is2 = new ByteArrayInputStream(data); ByteArrayInputStream is3 = new ByteArrayInputStream(data); - Decoder fromInputStream = newDecoder(is); - Decoder fromArray = newDecoder(data); + Decoder fromInputStream = newDecoder(is, useDirect); + Decoder fromArray = newDecoder(data, useDirect); byte[] data2 = new byte[data.length + 30]; Arrays.fill(data2, (byte) 0xff); System.arraycopy(data, 0, data2, 15, data.length); - Decoder fromOffsetArray = newDecoder(data2, 15, data.length); + Decoder fromOffsetArray = newDecoder(data2, 15, data.length, useDirect); - BinaryDecoder initOnInputStream = newDecoder(new byte[50], 0, 30); - initOnInputStream = newDecoder(is2, initOnInputStream); - BinaryDecoder initOnArray = this.newDecoder(is3, null); - initOnArray = this.newDecoder(data, initOnArray); + BinaryDecoder initOnInputStream = newDecoder(new byte[50], 0, 30, useDirect); + initOnInputStream = newDecoder(is2, initOnInputStream, useDirect); + BinaryDecoder initOnArray = this.newDecoder(is3, null, useDirect); + initOnArray = this.newDecoder(data, initOnArray, useDirect); for (Object datum : records) { - Assert.assertEquals("InputStream based BinaryDecoder result does not match", datum, - reader.read(null, fromInputStream)); - Assert.assertEquals("Array based BinaryDecoder result does not match", datum, reader.read(null, fromArray)); - Assert.assertEquals("offset Array based BinaryDecoder result does not match", datum, - reader.read(null, fromOffsetArray)); - Assert.assertEquals("InputStream initialized BinaryDecoder result does not match", datum, - reader.read(null, initOnInputStream)); - Assert.assertEquals("Array initialized BinaryDecoder result does not match", datum, - reader.read(null, initOnArray)); + Assertions.assertEquals(datum, reader.read(null, fromInputStream), + "InputStream based BinaryDecoder result does not match"); + Assertions.assertEquals(datum, reader.read(null, fromArray), "Array based BinaryDecoder result does not match"); + Assertions.assertEquals(datum, reader.read(null, fromOffsetArray), + "offset Array based BinaryDecoder result does not match"); + Assertions.assertEquals(datum, reader.read(null, initOnInputStream), + "InputStream initialized BinaryDecoder result does not match"); + Assertions.assertEquals(datum, reader.read(null, initOnArray), + "Array initialized BinaryDecoder result does not match"); } } - @Test - public void testInputStreamProxy() throws IOException { - BinaryDecoder d = newDecoder(data); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void inputStreamProxy(boolean useDirect) throws IOException { + BinaryDecoder d = newDecoder(data, useDirect); if (d != null) { BinaryDecoder bd = d; InputStream test = bd.inputStream(); InputStream check = new ByteArrayInputStream(data); validateInputStreamReads(test, check); - bd = this.newDecoder(data, bd); + bd = this.newDecoder(data, bd, useDirect); test = bd.inputStream(); check = new ByteArrayInputStream(data); validateInputStreamSkips(test, check); // with input stream sources - bd = newDecoder(new ByteArrayInputStream(data), bd); + bd = newDecoder(new ByteArrayInputStream(data), bd, useDirect); test = bd.inputStream(); check = new ByteArrayInputStream(data); validateInputStreamReads(test, check); - bd = newDecoder(new ByteArrayInputStream(data), bd); + bd = newDecoder(new ByteArrayInputStream(data), bd, useDirect); test = bd.inputStream(); check = new ByteArrayInputStream(data); validateInputStreamSkips(test, check); } } - @Test - public void testInputStreamProxyDetached() throws IOException { - Decoder d = newDecoder(data); - if (d instanceof BinaryDecoder) { - BinaryDecoder bd = (BinaryDecoder) d; - InputStream test = bd.inputStream(); - InputStream check = new ByteArrayInputStream(data); - // detach input stream and decoder from old source - this.newDecoder(new byte[56]); - try (InputStream bad = bd.inputStream(); InputStream check2 = new ByteArrayInputStream(data)) { - validateInputStreamReads(test, check); - Assert.assertNotEquals(bad.read(), check2.read()); - } + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void inputStreamProxyDetached(boolean useDirect) throws IOException { + BinaryDecoder bd = newDecoder(data, useDirect); + + InputStream test = bd.inputStream(); + InputStream check = new ByteArrayInputStream(data); + // detach input stream and decoder from old source + this.newDecoder(new byte[56], useDirect); + try (InputStream bad = bd.inputStream(); InputStream check2 = new ByteArrayInputStream(data)) { + validateInputStreamReads(test, check); + Assertions.assertNotEquals(bad.read(), check2.read()); } } - @Test - public void testInputStreamPartiallyUsed() throws IOException { - BinaryDecoder bd = this.newDecoder(new ByteArrayInputStream(data)); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void inputStreamPartiallyUsed(boolean useDirect) throws IOException { + BinaryDecoder bd = this.newDecoder(new ByteArrayInputStream(data), useDirect); InputStream test = bd.inputStream(); InputStream check = new ByteArrayInputStream(data); // triggers buffer fill if unused and tests isEnd() try { - Assert.assertFalse(bd.isEnd()); + Assertions.assertFalse(bd.isEnd()); } catch (UnsupportedOperationException e) { // this is ok if its a DirectBinaryDecoder. if (bd.getClass() != DirectBinaryDecoder.class) { @@ -300,25 +321,28 @@ private void validateInputStreamReads(InputStream test, InputStream check) throw while (true) { int t = test.read(); int c = check.read(); - Assert.assertEquals(c, t); - if (-1 == t) + Assertions.assertEquals(c, t); + if (-1 == t) { break; + } t = test.read(bt); c = check.read(bc); - Assert.assertEquals(c, t); - Assert.assertArrayEquals(bt, bc); - if (-1 == t) + Assertions.assertEquals(c, t); + Assertions.assertArrayEquals(bt, bc); + if (-1 == t) { break; + } t = test.read(bt, 1, 4); c = check.read(bc, 1, 4); - Assert.assertEquals(c, t); - Assert.assertArrayEquals(bt, bc); - if (-1 == t) + Assertions.assertEquals(c, t); + Assertions.assertArrayEquals(bt, bc); + if (-1 == t) { break; + } } - Assert.assertEquals(0, test.skip(5)); - Assert.assertEquals(0, test.available()); - Assert.assertFalse(test.getClass() != ByteArrayInputStream.class && test.markSupported()); + Assertions.assertEquals(0, test.skip(5)); + Assertions.assertEquals(0, test.available()); + Assertions.assertFalse(test.getClass() != ByteArrayInputStream.class && test.markSupported()); test.close(); } @@ -326,154 +350,300 @@ private void validateInputStreamSkips(InputStream test, InputStream check) throw while (true) { long t2 = test.skip(19); long c2 = check.skip(19); - Assert.assertEquals(c2, t2); - if (0 == t2) + Assertions.assertEquals(c2, t2); + if (0 == t2) { break; + } } - Assert.assertEquals(-1, test.read()); + Assertions.assertEquals(-1, test.read()); } - @Test - public void testBadIntEncoding() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void badIntEncoding(boolean useDirect) throws IOException { byte[] badint = new byte[5]; Arrays.fill(badint, (byte) 0xff); - Decoder bd = this.newDecoder(badint); + Decoder bd = this.newDecoder(badint, useDirect); String message = ""; try { bd.readInt(); } catch (IOException ioe) { message = ioe.getMessage(); } - Assert.assertEquals("Invalid int encoding", message); + Assertions.assertEquals("Invalid int encoding", message); } - @Test - public void testBadLongEncoding() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void badLongEncoding(boolean useDirect) throws IOException { byte[] badint = new byte[10]; Arrays.fill(badint, (byte) 0xff); - Decoder bd = this.newDecoder(badint); + Decoder bd = this.newDecoder(badint, useDirect); String message = ""; try { bd.readLong(); } catch (IOException ioe) { message = ioe.getMessage(); } - Assert.assertEquals("Invalid long encoding", message); + Assertions.assertEquals("Invalid long encoding", message); } - @Test - public void testNegativeStringLength() throws IOException { - byte[] bad = new byte[] { (byte) 1 }; - Decoder bd = this.newDecoder(bad); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testStringNegativeLength(boolean useDirect) throws IOException { + Exception ex = Assertions.assertThrows(AvroRuntimeException.class, this.newDecoder(useDirect, -1L)::readString); + Assertions.assertEquals(ERROR_NEGATIVE, ex.getMessage()); + } - Assert.assertThrows("Malformed data. Length is negative: -1", AvroRuntimeException.class, bd::readString); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testStringVmMaxSize(boolean useDirect) throws IOException { + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1L)::readString); + Assertions.assertEquals(ERROR_VM_LIMIT_STRING, ex.getMessage()); } - @Test - public void testStringMaxArraySize() throws IOException { - byte[] bad = new byte[10]; - BinaryData.encodeLong(BinaryDecoder.MAX_ARRAY_SIZE + 1, bad, 0); - Decoder bd = this.newDecoder(bad); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testStringMaxCustom(boolean useDirect) throws IOException { + try { + System.setProperty(SystemLimitException.MAX_STRING_LENGTH_PROPERTY, Long.toString(128)); + resetLimits(); + Exception ex = Assertions.assertThrows(SystemLimitException.class, newDecoder(useDirect, 129)::readString); + Assertions.assertEquals("String length 129 exceeds maximum allowed", ex.getMessage()); + } finally { + System.clearProperty(SystemLimitException.MAX_STRING_LENGTH_PROPERTY); + resetLimits(); + } + } - Assert.assertThrows("Cannot read strings longer than " + BinaryDecoder.MAX_ARRAY_SIZE + " bytes", - UnsupportedOperationException.class, bd::readString); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testBytesNegativeLength(boolean useDirect) throws IOException { + Exception ex = Assertions.assertThrows(AvroRuntimeException.class, + () -> this.newDecoder(useDirect, -1).readBytes(null)); + Assertions.assertEquals(ERROR_NEGATIVE, ex.getMessage()); } - @Test - public void testNegativeBytesLength() throws IOException { - byte[] bad = new byte[] { (byte) 1 }; - Decoder bd = this.newDecoder(bad); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testBytesVmMaxSize(boolean useDirect) throws IOException { + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readBytes(null)); + Assertions.assertEquals(ERROR_VM_LIMIT_BYTES, ex.getMessage()); + } - Assert.assertThrows("Malformed data. Length is negative: -1", AvroRuntimeException.class, () -> bd.readBytes(null)); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testBytesMaxCustom(boolean useDirect) throws IOException { + try { + System.setProperty(SystemLimitException.MAX_BYTES_LENGTH_PROPERTY, Long.toString(128)); + resetLimits(); + Exception ex = Assertions.assertThrows(SystemLimitException.class, + () -> newDecoder(useDirect, 129).readBytes(null)); + Assertions.assertEquals("Bytes length 129 exceeds maximum allowed", ex.getMessage()); + } finally { + System.clearProperty(SystemLimitException.MAX_BYTES_LENGTH_PROPERTY); + resetLimits(); + } } - @Test - public void testBytesMaxArraySize() { - byte[] bad = new byte[10]; - BinaryData.encodeLong(BinaryDecoder.MAX_ARRAY_SIZE + 1, bad, 0); - Decoder bd = this.newDecoder(bad); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testArrayVmMaxSize(boolean useDirect) throws IOException { + // At start + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readArrayStart()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Next + ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).arrayNext()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // An OK reads followed by an overflow + Decoder bd = newDecoder(useDirect, MAX_ARRAY_VM_LIMIT - 100, Long.MAX_VALUE); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readArrayStart()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::arrayNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the VM limit. + bd = newDecoder(useDirect, MAX_ARRAY_VM_LIMIT - 100, 100, 1); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readArrayStart()); + Assertions.assertEquals(100, bd.arrayNext()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::arrayNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the VM limit, where negative numbers are + // followed by the byte length of the items. For testing, the 999 values are + // read but ignored. + bd = newDecoder(useDirect, 100 - MAX_ARRAY_VM_LIMIT, 999, -100, 999, 1); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readArrayStart()); + Assertions.assertEquals(100, bd.arrayNext()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::arrayNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + } + + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testArrayMaxCustom(boolean useDirect) throws IOException { + try { + System.setProperty(SystemLimitException.MAX_COLLECTION_LENGTH_PROPERTY, Long.toString(128)); + resetLimits(); + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readArrayStart()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the custom limit. + Decoder bd = newDecoder(useDirect, 118, 10, 1); + Assertions.assertEquals(118, bd.readArrayStart()); + Assertions.assertEquals(10, bd.arrayNext()); + ex = Assertions.assertThrows(SystemLimitException.class, bd::arrayNext); + Assertions.assertEquals("Collection length 129 exceeds maximum allowed", ex.getMessage()); + + // Two OK reads followed by going over the VM limit, where negative numbers are + // followed by the byte length of the items. For testing, the 999 values are + // read but ignored. + bd = newDecoder(useDirect, -118, 999, -10, 999, 1); + Assertions.assertEquals(118, bd.readArrayStart()); + Assertions.assertEquals(10, bd.arrayNext()); + ex = Assertions.assertThrows(SystemLimitException.class, bd::arrayNext); + Assertions.assertEquals("Collection length 129 exceeds maximum allowed", ex.getMessage()); - Assert.assertThrows("Cannot read arrays longer than " + BinaryDecoder.MAX_ARRAY_SIZE + " bytes", - UnsupportedOperationException.class, () -> bd.readBytes(null)); + } finally { + System.clearProperty(SystemLimitException.MAX_COLLECTION_LENGTH_PROPERTY); + resetLimits(); + } } - @Test - public void testBytesMaxLengthProperty() { - int maxLength = 128; - byte[] bad = new byte[10]; - BinaryData.encodeLong(maxLength + 1, bad, 0); + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testMapVmMaxSize(boolean useDirect) throws IOException { + // At start + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readMapStart()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Next + ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> this.newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).mapNext()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the VM limit. + Decoder bd = newDecoder(useDirect, MAX_ARRAY_VM_LIMIT - 100, 100, 1); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readMapStart()); + Assertions.assertEquals(100, bd.mapNext()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::mapNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the VM limit, where negative numbers are + // followed by the byte length of the items. For testing, the 999 values are + // read but ignored. + bd = newDecoder(useDirect, 100 - MAX_ARRAY_VM_LIMIT, 999, -100, 999, 1); + Assertions.assertEquals(MAX_ARRAY_VM_LIMIT - 100, bd.readMapStart()); + Assertions.assertEquals(100, bd.mapNext()); + ex = Assertions.assertThrows(UnsupportedOperationException.class, bd::mapNext); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + } + + @ParameterizedTest + @ValueSource(booleans = { true, false }) + public void testMapMaxCustom(boolean useDirect) throws IOException { try { - System.setProperty("org.apache.avro.limits.bytes.maxLength", Long.toString(maxLength)); - Decoder bd = this.newDecoder(bad); + System.setProperty(SystemLimitException.MAX_COLLECTION_LENGTH_PROPERTY, Long.toString(128)); + resetLimits(); + Exception ex = Assertions.assertThrows(UnsupportedOperationException.class, + () -> newDecoder(useDirect, MAX_ARRAY_VM_LIMIT + 1).readMapStart()); + Assertions.assertEquals(ERROR_VM_LIMIT_COLLECTION, ex.getMessage()); + + // Two OK reads followed by going over the custom limit. + Decoder bd = newDecoder(useDirect, 118, 10, 1); + Assertions.assertEquals(118, bd.readMapStart()); + Assertions.assertEquals(10, bd.mapNext()); + ex = Assertions.assertThrows(SystemLimitException.class, bd::mapNext); + Assertions.assertEquals("Collection length 129 exceeds maximum allowed", ex.getMessage()); + + // Two OK reads followed by going over the VM limit, where negative numbers are + // followed by the byte length of the items. For testing, the 999 values are + // read but ignored. + bd = newDecoder(useDirect, -118, 999, -10, 999, 1); + Assertions.assertEquals(118, bd.readMapStart()); + Assertions.assertEquals(10, bd.mapNext()); + ex = Assertions.assertThrows(SystemLimitException.class, bd::mapNext); + Assertions.assertEquals("Collection length 129 exceeds maximum allowed", ex.getMessage()); - Assert.assertThrows("Bytes length " + (maxLength + 1) + " exceeds maximum allowed", AvroRuntimeException.class, - () -> bd.readBytes(null)); } finally { - System.clearProperty("org.apache.avro.limits.bytes.maxLength"); + System.clearProperty(SystemLimitException.MAX_COLLECTION_LENGTH_PROPERTY); + resetLimits(); } } - @Test(expected = UnsupportedOperationException.class) - public void testLongLengthEncoding() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void longLengthEncoding(boolean useDirect) { // Size equivalent to Integer.MAX_VALUE + 1 byte[] bad = new byte[] { (byte) -128, (byte) -128, (byte) -128, (byte) -128, (byte) 16 }; - Decoder bd = this.newDecoder(bad); - bd.readString(); + Decoder bd = this.newDecoder(bad, useDirect); + Assertions.assertThrows(UnsupportedOperationException.class, bd::readString); } - @Test(expected = EOFException.class) - public void testIntTooShort() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void intTooShort(boolean useDirect) { byte[] badint = new byte[4]; Arrays.fill(badint, (byte) 0xff); - newDecoder(badint).readInt(); + Assertions.assertThrows(EOFException.class, () -> newDecoder(badint, useDirect).readInt()); } - @Test(expected = EOFException.class) - public void testLongTooShort() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void longTooShort(boolean useDirect) { byte[] badint = new byte[9]; Arrays.fill(badint, (byte) 0xff); - newDecoder(badint).readLong(); + Assertions.assertThrows(EOFException.class, () -> newDecoder(badint, useDirect).readLong()); } - @Test(expected = EOFException.class) - public void testFloatTooShort() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void floatTooShort(boolean useDirect) { byte[] badint = new byte[3]; Arrays.fill(badint, (byte) 0xff); - newDecoder(badint).readInt(); + Assertions.assertThrows(EOFException.class, () -> newDecoder(badint, useDirect).readInt()); } - @Test(expected = EOFException.class) - public void testDoubleTooShort() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void doubleTooShort(boolean useDirect) { byte[] badint = new byte[7]; Arrays.fill(badint, (byte) 0xff); - newDecoder(badint).readLong(); + Assertions.assertThrows(EOFException.class, () -> newDecoder(badint, useDirect).readLong()); } - @Test - public void testSkipping() throws IOException { - Decoder d = newDecoder(data); - skipGenerated(d); - if (d instanceof BinaryDecoder) { - BinaryDecoder bd = (BinaryDecoder) d; - try { - Assert.assertTrue(bd.isEnd()); - } catch (UnsupportedOperationException e) { - // this is ok if its a DirectBinaryDecoder. - if (bd.getClass() != DirectBinaryDecoder.class) { - throw e; - } + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void skipping(boolean useDirect) throws IOException { + BinaryDecoder bd = newDecoder(data, useDirect); + skipGenerated(bd); + + try { + Assertions.assertTrue(bd.isEnd()); + } catch (UnsupportedOperationException e) { + // this is ok if its a DirectBinaryDecoder. + if (bd.getClass() != DirectBinaryDecoder.class) { + throw e; } - bd = this.newDecoder(new ByteArrayInputStream(data), bd); - skipGenerated(bd); - try { - Assert.assertTrue(bd.isEnd()); - } catch (UnsupportedOperationException e) { - // this is ok if its a DirectBinaryDecoder. - if (bd.getClass() != DirectBinaryDecoder.class) { - throw e; - } + } + bd = this.newDecoder(new ByteArrayInputStream(data), bd, useDirect); + skipGenerated(bd); + try { + Assertions.assertTrue(bd.isEnd()); + } catch (UnsupportedOperationException e) { + // this is ok if its a DirectBinaryDecoder. + if (bd.getClass() != DirectBinaryDecoder.class) { + throw e; } } + } private void skipGenerated(Decoder bd) throws IOException { @@ -496,19 +666,20 @@ private void skipGenerated(Decoder bd) throws IOException { } catch (EOFException e) { eof = e; } - Assert.assertNotNull(eof); + Assertions.assertNotNull(eof); } - @Test(expected = EOFException.class) - public void testEOF() throws IOException { + @ParameterizedTest + @ValueSource(booleans = { true, false }) + void eof(boolean useDirect) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); Encoder e = EncoderFactory.get().binaryEncoder(baos, null); e.writeLong(0x10000000000000L); e.flush(); - Decoder d = newDecoder(new ByteArrayInputStream(baos.toByteArray())); - Assert.assertEquals(0x10000000000000L, d.readLong()); - d.readInt(); + Decoder d = newDecoder(new ByteArrayInputStream(baos.toByteArray()), useDirect); + Assertions.assertEquals(0x10000000000000L, d.readLong()); + Assertions.assertThrows(EOFException.class, () -> d.readInt()); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO.java index 6beda2ae66e..d107b9d82d7 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO.java @@ -17,7 +17,12 @@ */ package org.apache.avro.io; -import static org.junit.Assert.*; +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonParser; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -25,28 +30,14 @@ import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.ArrayDeque; -import java.util.Arrays; -import java.util.Collection; +import java.util.stream.Stream; -import com.fasterxml.jackson.core.JsonFactory; -import com.fasterxml.jackson.core.JsonParser; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; -@RunWith(Parameterized.class) public class TestBlockingIO { - private final int iSize; - private final int iDepth; - private final String sInput; - - public TestBlockingIO(int sz, int dp, String inp) { - this.iSize = sz; - this.iDepth = dp; - this.sInput = inp; - } - private static class Tests { private final JsonParser parser; private final Decoder input; @@ -206,25 +197,29 @@ public S(long count, boolean isArray) { } } - @Test - public void testScan() throws IOException { - Tests t = new Tests(iSize, iDepth, sInput); + @ParameterizedTest + @MethodSource("data") + public void testScan(int size, int depth, String input) throws IOException { + Tests t = new Tests(size, depth, input); t.scan(); } - @Test - public void testSkip1() throws IOException { - testSkip(iSize, iDepth, sInput, 0); + @ParameterizedTest + @MethodSource("data") + public void testSkip1(int size, int depth, String input) throws IOException { + testSkip(size, depth, input, 0); } - @Test - public void testSkip2() throws IOException { - testSkip(iSize, iDepth, sInput, 1); + @ParameterizedTest + @MethodSource("data") + public void testSkip2(int size, int depth, String input) throws IOException { + testSkip(size, depth, input, 1); } - @Test - public void testSkip3() throws IOException { - testSkip(iSize, iDepth, sInput, 2); + @ParameterizedTest + @MethodSource("data") + public void testSkip3(int size, int depth, String input) throws IOException { + testSkip(size, depth, input, 2); } private void testSkip(int bufferSize, int depth, String input, int skipLevel) throws IOException { @@ -323,9 +318,8 @@ private static void serialize(Encoder cos, JsonParser p, ByteArrayOutputStream o } } - @Parameterized.Parameters - public static Collection data() { - return Arrays.asList(new Object[][] { { 64, 0, "" }, { 64, 0, jss(0, 'a') }, { 64, 0, jss(3, 'a') }, + public static Stream data() { + return Stream.of(new Object[][] { { 64, 0, "" }, { 64, 0, jss(0, 'a') }, { 64, 0, jss(3, 'a') }, { 64, 0, jss(64, 'a') }, { 64, 0, jss(65, 'a') }, { 64, 0, jss(100, 'a') }, { 64, 1, "[]" }, { 64, 1, "[" + jss(0, 'a') + "]" }, { 64, 1, "[" + jss(3, 'a') + "]" }, { 64, 1, "[" + jss(61, 'a') + "]" }, { 64, 1, "[" + jss(62, 'a') + "]" }, { 64, 1, "[" + jss(64, 'a') + "]" }, { 64, 1, "[" + jss(65, 'a') + "]" }, @@ -387,7 +381,8 @@ public static Collection data() { { 100, 2, "[[\"pqr\", \"ab\", \"mnopqrstuvwx\"]]" }, { 64, 2, "[[[\"pqr\"]], [[\"ab\"], [\"mnopqrstuvwx\"]]]" }, { 64, 1, "{}" }, { 64, 1, "{\"n\": \"v\"}" }, { 64, 1, "{\"n1\": \"v\", \"n2\": []}" }, - { 100, 1, "{\"n1\": \"v\", \"n2\": []}" }, { 100, 1, "{\"n1\": \"v\", \"n2\": [\"abc\"]}" }, }); + { 100, 1, "{\"n1\": \"v\", \"n2\": []}" }, { 100, 1, "{\"n1\": \"v\", \"n2\": [\"abc\"]}" }, }) + .map(Arguments::of); } /** diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO2.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO2.java index 3a91bb96dea..378e17ee613 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO2.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestBlockingIO2.java @@ -17,14 +17,13 @@ */ package org.apache.avro.io; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.util.Arrays; -import java.util.Collection; - -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import java.util.stream.Stream; /** * This class has more exhaustive tests for Blocking IO. The reason we have both @@ -32,38 +31,29 @@ * TestBlockingIO2, it is hard to test skip() operations. and with the test * infrastructure of TestBlockingIO, it is hard to test enums, unions etc. */ -@RunWith(Parameterized.class) public class TestBlockingIO2 { - private final Decoder decoder; - private final String calls; - private Object[] values; - private String msg; - - public TestBlockingIO2(int bufferSize, int skipLevel, String calls) throws IOException { + @ParameterizedTest + @MethodSource("data") + public void testScan(int bufferSize, int skipLevel, String calls) throws IOException { ByteArrayOutputStream os = new ByteArrayOutputStream(); EncoderFactory factory = new EncoderFactory().configureBlockSize(bufferSize); Encoder encoder = factory.blockingBinaryEncoder(os, null); - this.values = TestValidatingIO.randomValues(calls); + Object[] values = TestValidatingIO.randomValues(calls); TestValidatingIO.generate(encoder, calls, values); encoder.flush(); byte[] bb = os.toByteArray(); - decoder = DecoderFactory.get().binaryDecoder(bb, null); - this.calls = calls; - this.msg = "Case: { " + bufferSize + ", " + skipLevel + ", \"" + calls + "\" }"; - } + Decoder decoder = DecoderFactory.get().binaryDecoder(bb, null); + String msg = "Case: { " + bufferSize + ", " + skipLevel + ", \"" + calls + "\" }"; - @Test - public void testScan() throws IOException { TestValidatingIO.check(msg, decoder, calls, values, -1); } - @Parameterized.Parameters - public static Collection data() { - return Arrays.asList(new Object[][] { { 64, 0, "" }, { 64, 0, "S0" }, { 64, 0, "S3" }, { 64, 0, "S64" }, + public static Stream data() { + return Stream.of(new Object[][] { { 64, 0, "" }, { 64, 0, "S0" }, { 64, 0, "S3" }, { 64, 0, "S64" }, { 64, 0, "S65" }, { 64, 0, "S100" }, { 64, 1, "[]" }, { 64, 1, "[c1sS0]" }, { 64, 1, "[c1sS3]" }, { 64, 1, "[c1sS61]" }, { 64, 1, "[c1sS62]" }, { 64, 1, "[c1sS64]" }, { 64, 1, "[c1sS65]" }, { 64, 1, "[c2sS0sS0]" }, { 64, 1, "[c2sS0sS10]" }, { 64, 1, "[c2sS0sS63]" }, { 64, 1, "[c2sS0sS64]" }, @@ -99,6 +89,6 @@ public static Collection data() { { 100, 1, "{c1sK5e10}" }, { 100, 1, "{c1sK5U1S10}" }, { 100, 1, "{c1sK5f10S10}" }, { 100, 1, "{c1sK5NS10}" }, { 100, 1, "{c1sK5BS10}" }, { 100, 1, "{c1sK5IS10}" }, { 100, 1, "{c1sK5LS10}" }, { 100, 1, "{c1sK5FS10}" }, - { 100, 1, "{c1sK5DS10}" }, }); + { 100, 1, "{c1sK5DS10}" }, }).map(Arguments::of); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestEncoders.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestEncoders.java index 665a0e7b6f9..dbed64d6a18 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestEncoders.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestEncoders.java @@ -29,6 +29,8 @@ import org.apache.avro.generic.GenericDatumWriter; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; + +import java.io.BufferedOutputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; @@ -332,4 +334,35 @@ private String fromAvroToJson(byte[] avroBytes, Schema schema, boolean includeNa return new String(output.toByteArray(), StandardCharsets.UTF_8.name()); } + + @Test + public void testJsonEncoderInitAutoFlush() throws IOException { + Schema s = new Schema.Parser().parse("\"int\""); + OutputStream baos = new ByteArrayOutputStream(); + OutputStream out = new BufferedOutputStream(baos); + JsonEncoder enc = factory.jsonEncoder(s, out, false); + enc.configure(out, false); + enc.writeInt(24); + enc.flush(); + assertEquals("", baos.toString()); + out.flush(); + assertEquals("24", baos.toString()); + } + + @Test + public void testJsonEncoderInitAutoFlushDisabled() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + OutputStream out = new BufferedOutputStream(baos); + Schema ints = Schema.create(Type.INT); + Encoder e = factory.jsonEncoder(ints, out, false, false); + String separator = System.getProperty("line.separator"); + GenericDatumWriter writer = new GenericDatumWriter(ints); + writer.write(1, e); + writer.write(2, e); + e.flush(); + assertEquals("", baos.toString()); + out.flush(); + assertEquals("1" + separator + "2", baos.toString()); + out.close(); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestJsonDecoder.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestJsonDecoder.java index 693fbd421e8..05057139600 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestJsonDecoder.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestJsonDecoder.java @@ -19,11 +19,17 @@ import static org.junit.jupiter.api.Assertions.assertEquals; +import org.apache.avro.AvroTypeException; import org.apache.avro.Schema; +import org.apache.avro.SchemaBuilder; import org.apache.avro.generic.GenericDatumReader; import org.apache.avro.generic.GenericRecord; + +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import java.io.IOException; + public class TestJsonDecoder { @Test @@ -76,4 +82,14 @@ void reorderFields() throws Exception { assertEquals(200, in.readLong()); in.skipArray(); } + + @Test + void testIntWithError() throws IOException { + Schema schema = SchemaBuilder.builder("test").record("example").fields().requiredInt("id").endRecord(); + String record = "{ \"id\": -1.2 }"; + + GenericDatumReader reader = new GenericDatumReader<>(schema, schema); + JsonDecoder decoder = DecoderFactory.get().jsonDecoder(schema, record); + Assertions.assertThrows(AvroTypeException.class, () -> reader.read(null, decoder)); + } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIO.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIO.java index c880d9fd55a..8a960427922 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIO.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIO.java @@ -17,48 +17,34 @@ */ package org.apache.avro.io; +import org.apache.avro.Schema; +import org.apache.avro.io.TestValidatingIO.Encoding; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; -import java.util.Arrays; -import java.util.Collection; +import java.util.stream.Stream; -import org.apache.avro.Schema; -import org.apache.avro.io.TestValidatingIO.Encoding; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; - -@RunWith(Parameterized.class) public class TestResolvingIO { - protected final Encoding eEnc; - protected final int iSkipL; - protected final String sJsWrtSchm; - protected final String sWrtCls; - protected final String sJsRdrSchm; - protected final String sRdrCls; - - public TestResolvingIO(Encoding encoding, int skipLevel, String jsonWriterSchema, String writerCalls, - String jsonReaderSchema, String readerCalls) { - this.eEnc = encoding; - this.iSkipL = skipLevel; - this.sJsWrtSchm = jsonWriterSchema; - this.sWrtCls = writerCalls; - this.sJsRdrSchm = jsonReaderSchema; - this.sRdrCls = readerCalls; - } - - @Test - public void testIdentical() throws IOException { - performTest(eEnc, iSkipL, sJsWrtSchm, sWrtCls, sJsWrtSchm, sWrtCls); + @ParameterizedTest + @MethodSource("data2") + public void testIdentical(Encoding encoding, int skip, String jsonWriterSchema, String writerCalls, + String jsonReaderSchema, String readerCalls) throws IOException { + performTest(encoding, skip, jsonWriterSchema, writerCalls, jsonWriterSchema, writerCalls); } private static final int COUNT = 10; - @Test - public void testCompatible() throws IOException { - performTest(eEnc, iSkipL, sJsWrtSchm, sWrtCls, sJsRdrSchm, sRdrCls); + @ParameterizedTest + @MethodSource("data2") + public void testCompatible(Encoding encoding, int skip, String jsonWriterSchema, String writerCalls, + String jsonReaderSchema, String readerCalls) throws IOException { + performTest(encoding, skip, jsonWriterSchema, writerCalls, jsonReaderSchema, readerCalls); } private void performTest(Encoding encoding, int skipLevel, String jsonWriterSchema, String writerCalls, @@ -100,9 +86,8 @@ static void check(Schema wsc, Schema rsc, byte[] bytes, String calls, Object[] v TestValidatingIO.check(msg, vi, calls, values, skipLevel); } - @Parameterized.Parameters - public static Collection data2() { - return Arrays.asList(TestValidatingIO.convertTo2dArray(encodings, skipLevels, testSchemas())); + public static Stream data2() { + return TestValidatingIO.convertTo2dStream(encodings, skipLevels, testSchemas()); } static Object[][] encodings = new Object[][] { { Encoding.BINARY }, { Encoding.BLOCKING_BINARY }, { Encoding.JSON } }; diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java index 8e3dc8e53d7..0a55d18a742 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestResolvingIOResolving.java @@ -17,53 +17,32 @@ */ package org.apache.avro.io; -import java.io.IOException; -import java.util.Arrays; -import java.util.Collection; - import org.apache.avro.Schema; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -@RunWith(Parameterized.class) -public class TestResolvingIOResolving { - protected TestValidatingIO.Encoding eEnc; - protected final int iSkipL; - protected final String sJsWrtSchm; - protected final String sWrtCls; - protected final String sJsRdrSchm; - protected final String sRdrCls; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; - protected final Object[] oaWrtVals; - protected final Object[] oaRdrVals; +import java.io.IOException; +import java.util.stream.Stream; - public TestResolvingIOResolving(TestValidatingIO.Encoding encoding, int skipLevel, String jsonWriterSchema, - String writerCalls, Object[] writerValues, String jsonReaderSchema, String readerCalls, Object[] readerValues) { - this.eEnc = encoding; - this.iSkipL = skipLevel; - this.sJsWrtSchm = jsonWriterSchema; - this.sWrtCls = writerCalls; - this.oaWrtVals = writerValues; - this.sJsRdrSchm = jsonReaderSchema; - this.sRdrCls = readerCalls; - this.oaRdrVals = readerValues; - } +public class TestResolvingIOResolving { - @Test - public void testResolving() throws IOException { - Schema writerSchema = new Schema.Parser().parse(sJsWrtSchm); - byte[] bytes = TestValidatingIO.make(writerSchema, sWrtCls, oaWrtVals, eEnc); - Schema readerSchema = new Schema.Parser().parse(sJsRdrSchm); - TestValidatingIO.print(eEnc, iSkipL, writerSchema, readerSchema, oaWrtVals, oaRdrVals); - TestResolvingIO.check(writerSchema, readerSchema, bytes, sRdrCls, oaRdrVals, eEnc, iSkipL); + @ParameterizedTest + @MethodSource("data3") + public void testResolving(TestValidatingIO.Encoding encoding, int skipLevel, String jsonWriterSchema, + String writerCalls, Object[] writerValues, String jsonReaderSchema, String readerCalls, Object[] readerValues) + throws IOException { + Schema writerSchema = new Schema.Parser().parse(jsonWriterSchema); + byte[] bytes = TestValidatingIO.make(writerSchema, writerCalls, writerValues, encoding); + Schema readerSchema = new Schema.Parser().parse(jsonReaderSchema); + TestValidatingIO.print(encoding, skipLevel, writerSchema, readerSchema, writerValues, readerValues); + TestResolvingIO.check(writerSchema, readerSchema, bytes, readerCalls, readerValues, encoding, skipLevel); } - @Parameterized.Parameters - public static Collection data3() { - Collection ret = Arrays.asList(TestValidatingIO.convertTo2dArray(TestResolvingIO.encodings, - TestResolvingIO.skipLevels, dataForResolvingTests())); - return ret; + public static Stream data3() { + return TestValidatingIO.convertTo2dStream(TestResolvingIO.encodings, TestResolvingIO.skipLevels, + dataForResolvingTests()); } private static Object[][] dataForResolvingTests() { @@ -101,7 +80,7 @@ private static Object[][] dataForResolvingTests() { "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" + "{\"name\": \"g1\", " + "\"type\":{\"type\":\"record\",\"name\":\"inner\",\"fields\":[" + "{\"name\":\"f1\", \"type\":\"int\", \"default\": 101}," + "{\"name\":\"f2\", \"type\":\"int\"}]}}, " - + "{\"name\": \"g2\", \"type\": \"long\"}]}}", + + "{\"name\": \"g2\", \"type\": \"long\"}]}", "RRIIL", new Object[] { 10, 101, 11L } }, // Default value for a record. { "{\"type\":\"record\",\"name\":\"outer\",\"fields\":[" + "{\"name\": \"g2\", \"type\": \"long\"}]}", "L", diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/TestValidatingIO.java b/lang/java/avro/src/test/java/org/apache/avro/io/TestValidatingIO.java index 3056d5430af..063414fbb43 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/TestValidatingIO.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/TestValidatingIO.java @@ -17,9 +17,15 @@ */ package org.apache.avro.io; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; +import org.apache.avro.Schema; +import org.apache.avro.util.Utf8; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -27,20 +33,14 @@ import java.io.InputStream; import java.nio.ByteBuffer; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Random; -import org.apache.avro.Schema; -import org.apache.avro.util.Utf8; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import java.util.Spliterator; +import java.util.Spliterators; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; -@RunWith(Parameterized.class) public class TestValidatingIO { enum Encoding { BINARY, BLOCKING_BINARY, JSON, @@ -48,30 +48,19 @@ enum Encoding { private static final Logger LOG = LoggerFactory.getLogger(TestValidatingIO.class); - private Encoding eEnc; - private int iSkipL; - private String sJsSch; - private String sCl; - - public TestValidatingIO(Encoding enc, int skip, String js, String cls) { - this.eEnc = enc; - this.iSkipL = skip; - this.sJsSch = js; - this.sCl = cls; - } - private static final int COUNT = 1; - @Test - public void testMain() throws IOException { + @ParameterizedTest + @MethodSource("data") + public void testMain(Encoding enc, int skip, String js, String cls) throws IOException { for (int i = 0; i < COUNT; i++) { - testOnce(new Schema.Parser().parse(sJsSch), sCl, iSkipL, eEnc); + testOnce(new Schema.Parser().parse(js), cls, skip, enc); } } private void testOnce(Schema schema, String calls, int skipLevel, Encoding encoding) throws IOException { Object[] values = randomValues(calls); - print(eEnc, iSkipL, schema, schema, values, values); + print(encoding, skipLevel, schema, schema, values, values); byte[] bytes = make(schema, calls, values, encoding); check(schema, bytes, calls, values, skipLevel, encoding); } @@ -204,7 +193,7 @@ public static void generate(Encoder vw, String calls, Object[] values) throws IO break; } default: - fail(); + Assertions.fail(); break; } } @@ -254,7 +243,7 @@ public static Object[] randomValues(String calls) { case 's': break; default: - fail(); + Assertions.fail(); break; } } @@ -324,25 +313,25 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, vi.readNull(); break; case 'B': - assertEquals(msg, values[p++], vi.readBoolean()); + Assertions.assertEquals(values[p++], vi.readBoolean(), msg); break; case 'I': - assertEquals(msg, values[p++], vi.readInt()); + Assertions.assertEquals(values[p++], vi.readInt(), msg); break; case 'L': - assertEquals(msg, values[p++], vi.readLong()); + Assertions.assertEquals(values[p++], vi.readLong(), msg); break; case 'F': if (!(values[p] instanceof Float)) - fail(); + Assertions.fail(); float f = (Float) values[p++]; - assertEquals(msg, f, vi.readFloat(), Math.abs(f / 1000)); + Assertions.assertEquals(f, vi.readFloat(), Math.abs(f / 1000)); break; case 'D': if (!(values[p] instanceof Double)) - fail(); + Assertions.fail(); double d = (Double) values[p++]; - assertEquals(msg, d, vi.readDouble(), Math.abs(d / 1000)); + Assertions.assertEquals(d, vi.readDouble(), Math.abs(d / 1000), msg); break; case 'S': extractInt(cs); @@ -351,7 +340,7 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, p++; } else { String s = (String) values[p++]; - assertEquals(msg, new Utf8(s), vi.readString(null)); + Assertions.assertEquals(new Utf8(s), vi.readString(null), msg); } break; case 'K': @@ -361,7 +350,7 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, p++; } else { String s = (String) values[p++]; - assertEquals(msg, new Utf8(s), vi.readString(null)); + Assertions.assertEquals(new Utf8(s), vi.readString(null), msg); } break; case 'b': @@ -374,7 +363,7 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, ByteBuffer bb2 = vi.readBytes(null); byte[] actBytes = new byte[bb2.remaining()]; System.arraycopy(bb2.array(), bb2.position(), actBytes, 0, bb2.remaining()); - assertArrayEquals(msg, bb, actBytes); + Assertions.assertArrayEquals(bb, actBytes, msg); } break; case 'f': { @@ -386,7 +375,7 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, byte[] bb = (byte[]) values[p++]; byte[] actBytes = new byte[len]; vi.readFixed(actBytes); - assertArrayEquals(msg, bb, actBytes); + Assertions.assertArrayEquals(bb, actBytes, msg); } } break; @@ -395,7 +384,7 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, if (level == skipLevel) { vi.readEnum(); } else { - assertEquals(msg, e, vi.readEnum()); + Assertions.assertEquals(e, vi.readEnum(), msg); } } break; @@ -422,16 +411,16 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, continue; } case ']': - assertEquals(msg, 0, counts[level]); + Assertions.assertEquals(0, counts[level], msg); if (!isEmpty[level]) { - assertEquals(msg, 0, vi.arrayNext()); + Assertions.assertEquals(0, vi.arrayNext(), msg); } level--; break; case '}': - assertEquals(0, counts[level]); + Assertions.assertEquals(0, counts[level]); if (!isEmpty[level]) { - assertEquals(msg, 0, vi.mapNext()); + Assertions.assertEquals(0, vi.mapNext(), msg); } level--; break; @@ -450,28 +439,28 @@ public static void check(String msg, Decoder vi, String calls, Object[] values, continue; case 'U': { int idx = extractInt(cs); - assertEquals(msg, idx, vi.readIndex()); + Assertions.assertEquals(idx, vi.readIndex(), msg); continue; } case 'R': ((ResolvingDecoder) vi).readFieldOrder(); continue; default: - fail(msg); + Assertions.fail(msg); } } catch (RuntimeException e) { throw new RuntimeException(msg, e); } } - assertEquals(msg, values.length, p); + Assertions.assertEquals(values.length, p, msg); } private static int skip(String msg, InputScanner cs, Decoder vi, boolean isArray) throws IOException { final char end = isArray ? ']' : '}'; if (isArray) { - assertEquals(msg, 0, vi.skipArray()); + Assertions.assertEquals(0, vi.skipArray(), msg); } else if (end == '}') { - assertEquals(msg, 0, vi.skipMap()); + Assertions.assertEquals(0, vi.skipMap(), msg); } int level = 0; int p = 0; @@ -507,9 +496,8 @@ private static int skip(String msg, InputScanner cs, Decoder vi, boolean isArray throw new RuntimeException("Don't know how to skip"); } - @Parameterized.Parameters - public static Collection data() { - return Arrays.asList(convertTo2dArray(encodings, skipLevels, testSchemas())); + public static Stream data() { + return convertTo2dStream(encodings, skipLevels, testSchemas()); } private static Object[][] encodings = new Object[][] { { Encoding.BINARY }, { Encoding.BLOCKING_BINARY }, @@ -517,19 +505,11 @@ public static Collection data() { private static Object[][] skipLevels = new Object[][] { { -1 }, { 0 }, { 1 }, { 2 }, }; - public static Object[][] convertTo2dArray(final Object[][]... values) { - ArrayList ret = new ArrayList<>(); - + public static Stream convertTo2dStream(final Object[][]... values) { Iterator iter = cartesian(values); - while (iter.hasNext()) { - Object[] objects = iter.next(); - ret.add(objects); - } - Object[][] retArrays = new Object[ret.size()][]; - for (int i = 0; i < ret.size(); i++) { - retArrays[i] = ret.get(i); - } - return retArrays; + Stream stream = StreamSupport.stream(Spliterators.spliteratorUnknownSize(iter, Spliterator.ORDERED), + false); + return stream.map(Arguments::of); } /** diff --git a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java index 4eac760cec7..c6d8856733b 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java +++ b/lang/java/avro/src/test/java/org/apache/avro/io/parsing/TestResolvingGrammarGenerator.java @@ -21,8 +21,10 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.StringReader; +import java.io.UncheckedIOException; import java.util.Arrays; import java.util.Collection; +import java.util.stream.Stream; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.databind.JsonNode; @@ -38,29 +40,21 @@ import org.apache.avro.generic.GenericRecordBuilder; import org.apache.avro.io.Encoder; import org.apache.avro.io.EncoderFactory; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA_NAMESPACE_1; import static org.apache.avro.TestSchemas.ENUM1_AB_SCHEMA_NAMESPACE_2; -@RunWith(Parameterized.class) public class TestResolvingGrammarGenerator { - private final Schema schema; - private final JsonNode data; - - public TestResolvingGrammarGenerator(String jsonSchema, String jsonData) throws IOException { - this.schema = new Schema.Parser().parse(jsonSchema); - JsonFactory factory = new JsonFactory(); - ObjectMapper mapper = new ObjectMapper(factory); - - this.data = mapper.readTree(new StringReader(jsonData)); - } - @Test - public void test() throws IOException { + @ParameterizedTest + @MethodSource("data") + void test(Schema schema, JsonNode data) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); EncoderFactory factory = EncoderFactory.get(); Encoder e = factory.validatingEncoder(schema, factory.binaryEncoder(baos, null)); @@ -70,7 +64,7 @@ public void test() throws IOException { } @Test - public void testRecordMissingRequiredFieldError() throws Exception { + void recordMissingRequiredFieldError() throws Exception { Schema schemaWithoutField = SchemaBuilder.record("MyRecord").namespace("ns").fields().name("field1").type() .stringType().noDefault().endRecord(); Schema schemaWithField = SchemaBuilder.record("MyRecord").namespace("ns").fields().name("field1").type() @@ -79,15 +73,15 @@ public void testRecordMissingRequiredFieldError() throws Exception { byte[] data = writeRecord(schemaWithoutField, record); try { readRecord(schemaWithField, data); - Assert.fail("Expected exception not thrown"); + Assertions.fail("Expected exception not thrown"); } catch (AvroTypeException typeException) { - Assert.assertEquals("Incorrect exception message", - "Found ns.MyRecord, expecting ns.MyRecord, missing required field field2", typeException.getMessage()); + Assertions.assertEquals("Found ns.MyRecord, expecting ns.MyRecord, missing required field field2", + typeException.getMessage(), "Incorrect exception message"); } } @Test - public void testDifferingEnumNamespaces() throws Exception { + void differingEnumNamespaces() throws Exception { Schema schema1 = SchemaBuilder.record("MyRecord").fields().name("field").type(ENUM1_AB_SCHEMA_NAMESPACE_1) .noDefault().endRecord(); Schema schema2 = SchemaBuilder.record("MyRecord").fields().name("field").type(ENUM1_AB_SCHEMA_NAMESPACE_2) @@ -95,24 +89,35 @@ public void testDifferingEnumNamespaces() throws Exception { GenericData.EnumSymbol genericEnumSymbol = new GenericData.EnumSymbol(ENUM1_AB_SCHEMA_NAMESPACE_1, "A"); GenericData.Record record = new GenericRecordBuilder(schema1).set("field", genericEnumSymbol).build(); byte[] data = writeRecord(schema1, record); - Assert.assertEquals(genericEnumSymbol, readRecord(schema1, data).get("field")); - Assert.assertEquals(genericEnumSymbol, readRecord(schema2, data).get("field")); + Assertions.assertEquals(genericEnumSymbol, readRecord(schema1, data).get("field")); + Assertions.assertEquals(genericEnumSymbol, readRecord(schema2, data).get("field")); } - @Parameterized.Parameters - public static Collection data() { - Collection ret = Arrays.asList(new Object[][] { + public static Stream data() { + Collection ret = Arrays.asList(new String[][] { { "{ \"type\": \"record\", \"name\": \"r\", \"fields\": [ " + " { \"name\" : \"f1\", \"type\": \"int\" }, " - + " { \"name\" : \"f2\", \"type\": \"float\" } " + "] } }", "{ \"f2\": 10.4, \"f1\": 10 } " }, - { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", \"s2\"] } }", " \"s1\" " }, - { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", \"s2\"] } }", " \"s2\" " }, + + " { \"name\" : \"f2\", \"type\": \"float\" } " + "] }", "{ \"f2\": 10.4, \"f1\": 10 } " }, + { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", \"s2\"] }", " \"s1\" " }, + { "{ \"type\": \"enum\", \"name\": \"e\", \"symbols\": " + "[ \"s1\", \"s2\"] }", " \"s2\" " }, { "{ \"type\": \"fixed\", \"name\": \"f\", \"size\": 10 }", "\"hello\"" }, { "{ \"type\": \"array\", \"items\": \"int\" }", "[ 10, 20, 30 ]" }, { "{ \"type\": \"map\", \"values\": \"int\" }", "{ \"k1\": 10, \"k3\": 20, \"k3\": 30 }" }, { "[ \"int\", \"long\" ]", "10" }, { "\"string\"", "\"hello\"" }, { "\"bytes\"", "\"hello\"" }, { "\"int\"", "10" }, { "\"long\"", "10" }, { "\"float\"", "10.0" }, { "\"double\"", "10.0" }, { "\"boolean\"", "true" }, { "\"boolean\"", "false" }, { "\"null\"", "null" }, }); - return ret; + + final JsonFactory factory = new JsonFactory(); + final ObjectMapper mapper = new ObjectMapper(factory); + + return ret.stream().map((String[] args) -> { + Schema schema = new Schema.Parser().parse(args[0]); + try { + JsonNode data = mapper.readTree(new StringReader(args[1])); + return Arguments.of(schema, data); + } catch (IOException ex) { + throw new UncheckedIOException(ex); + } + }); } private byte[] writeRecord(Schema schema, GenericData.Record record) throws Exception { diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java index 2915f96e6f5..5f52a2cf789 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflect.java @@ -536,6 +536,34 @@ public static interface P2 { void error() throws E1; } + private static class NullableDefaultTest { + @Nullable + @AvroDefault("1") + int foo; + } + + @Test + public void testAvroNullableDefault() { + check(NullableDefaultTest.class, + "{\"type\":\"record\",\"name\":\"NullableDefaultTest\"," + + "\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + + "{\"name\":\"foo\",\"type\":[\"null\",\"int\"],\"default\":1}]}"); + } + + private static class UnionDefaultTest { + @Union({ Integer.class, String.class }) + @AvroDefault("1") + Object foo; + } + + @Test + public void testAvroUnionDefault() { + check(UnionDefaultTest.class, + "{\"type\":\"record\",\"name\":\"UnionDefaultTest\"," + + "\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[" + + "{\"name\":\"foo\",\"type\":[\"int\",\"string\"],\"default\":1}]}"); + } + @Test void p2() throws Exception { Schema e1 = ReflectData.get().getSchema(E1.class); @@ -977,7 +1005,8 @@ public static interface C { void forwardReference() { ReflectData data = ReflectData.get(); Protocol reflected = data.getProtocol(C.class); - Protocol reparsed = Protocol.parse(reflected.toString()); + String ref = reflected.toString(); + Protocol reparsed = Protocol.parse(ref); assertEquals(reflected, reparsed); assert (reparsed.getTypes().contains(data.getSchema(A.class))); assert (reparsed.getTypes().contains(data.getSchema(B1.class))); @@ -1231,7 +1260,7 @@ private static class Z { @Test void dollarTerminatedNamespaceCompatibility() { ReflectData data = ReflectData.get(); - Schema s = new Schema.Parser().setValidate(false).parse( + Schema s = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse( "{\"type\":\"record\",\"name\":\"Z\",\"namespace\":\"org.apache.avro.reflect.TestReflect$\",\"fields\":[]}"); assertEquals(data.getSchema(data.getClass(s)).toString(), "{\"type\":\"record\",\"name\":\"Z\",\"namespace\":\"org.apache.avro.reflect.TestReflect\",\"fields\":[]}"); @@ -1241,7 +1270,7 @@ void dollarTerminatedNamespaceCompatibility() { void dollarTerminatedNestedStaticClassNamespaceCompatibility() { ReflectData data = ReflectData.get(); // Older versions of Avro generated this namespace on nested records. - Schema s = new Schema.Parser().setValidate(false).parse( + Schema s = new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse( "{\"type\":\"record\",\"name\":\"AnotherSampleRecord\",\"namespace\":\"org.apache.avro.reflect.TestReflect$SampleRecord\",\"fields\":[]}"); assertThat(data.getSchema(data.getClass(s)).getFullName(), is("org.apache.avro.reflect.TestReflect.SampleRecord.AnotherSampleRecord")); diff --git a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectLogicalTypes.java b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectLogicalTypes.java index f25f022aab3..485a765d7a8 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectLogicalTypes.java +++ b/lang/java/avro/src/test/java/org/apache/avro/reflect/TestReflectLogicalTypes.java @@ -768,6 +768,6 @@ public boolean equals(Object obj) { return false; } RecordWithTimestamps that = (RecordWithTimestamps) obj; - return Objects.equals(that.localDateTime, that.localDateTime); + return Objects.equals(localDateTime, that.localDateTime); } } diff --git a/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificRecordWithUnion.java b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificRecordWithUnion.java index c3b330b28c1..e64b3f4c220 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificRecordWithUnion.java +++ b/lang/java/avro/src/test/java/org/apache/avro/specific/TestSpecificRecordWithUnion.java @@ -29,7 +29,8 @@ import org.apache.avro.io.DatumWriter; import org.apache.avro.io.BinaryEncoder; import org.apache.avro.io.Decoder; -import org.junit.Test; + +import org.junit.jupiter.api.Test; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -40,7 +41,7 @@ public class TestSpecificRecordWithUnion { @Test - public void testUnionLogicalDecimalConversion() throws IOException { + void unionLogicalDecimalConversion() throws IOException { final TestUnionRecord record = TestUnionRecord.newBuilder().setAmount(BigDecimal.ZERO).build(); final Schema schema = SchemaBuilder.unionOf().nullType().and().type(record.getSchema()).endUnion(); diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java index c7605770e6f..e0977ff9f96 100644 --- a/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java +++ b/lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java @@ -28,6 +28,8 @@ import java.io.ObjectOutputStream; import java.nio.charset.StandardCharsets; +import org.apache.avro.SystemLimitException; +import org.apache.avro.TestSystemLimitException; import org.junit.jupiter.api.Test; public class TestUtf8 { @@ -96,6 +98,26 @@ void hashCodeReused() { assertEquals(3198781, u.hashCode()); } + @Test + void oversizeUtf8() { + Utf8 u = new Utf8(); + u.setByteLength(1024); + assertEquals(1024, u.getByteLength()); + assertThrows(UnsupportedOperationException.class, + () -> u.setByteLength(TestSystemLimitException.MAX_ARRAY_VM_LIMIT + 1)); + + try { + System.setProperty(SystemLimitException.MAX_STRING_LENGTH_PROPERTY, Long.toString(1000L)); + TestSystemLimitException.resetLimits(); + + Exception ex = assertThrows(SystemLimitException.class, () -> u.setByteLength(1024)); + assertEquals("String length 1024 exceeds maximum allowed", ex.getMessage()); + } finally { + System.clearProperty(SystemLimitException.MAX_STRING_LENGTH_PROPERTY); + TestSystemLimitException.resetLimits(); + } + } + @Test void serialization() throws IOException, ClassNotFoundException { try (ByteArrayOutputStream bos = new ByteArrayOutputStream(); diff --git a/lang/java/avro/src/test/java/org/apache/avro/util/WeakIdentityHashMapTest.java b/lang/java/avro/src/test/java/org/apache/avro/util/WeakIdentityHashMapTest.java new file mode 100644 index 00000000000..8ecda312032 --- /dev/null +++ b/lang/java/avro/src/test/java/org/apache/avro/util/WeakIdentityHashMapTest.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.avro.util; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.List; + +/** + * This test aims to stress WeakIdentityHashMap class in multithread env. + */ +class WeakIdentityHashMapTest { + + private static final int TEST_SIZE = 4001; + + List data = new ArrayList<>(TEST_SIZE); + + final WeakIdentityHashMap map = new WeakIdentityHashMap<>(); + + List exceptions = new ArrayList<>(TEST_SIZE); + + @Test + void stressMap() { + + for (int i = 1; i <= TEST_SIZE; i++) { + data.add("Data_" + i); + } + + List threads = new ArrayList<>(80); + for (int i = 0; i <= 80; i++) { + final int seed = (i + 1) * 100; + Runnable runnable = () -> rundata(seed); + Thread t = new Thread(runnable); + threads.add(t); + } + threads.forEach(Thread::start); + threads.forEach((Thread t) -> { + try { + t.join(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + }); + Assertions.assertTrue(exceptions.isEmpty()); + } + + void rundata(int seed) { + try { + for (int i = 1; i <= TEST_SIZE; i++) { + String keyValue = data.get((i + seed) % TEST_SIZE); + map.put(keyValue, keyValue); + if (i % 200 == 0) { + sleep(); + } + String keyValueRemove = data.get(((i + seed) * 3) % TEST_SIZE); + map.remove(keyValueRemove); + } + } catch (RuntimeException ex) { + exceptions.add(ex); + } + } + + void sleep() { + try { + Thread.sleep(5); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + +} diff --git a/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.Conversion b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.Conversion new file mode 100644 index 00000000000..890ba764260 --- /dev/null +++ b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.Conversion @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.avro.CustomTypeConverter diff --git a/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.LogicalTypes$LogicalTypeFactory b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.LogicalTypes$LogicalTypeFactory index e111a25c43f..b55c233ae46 100644 --- a/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.LogicalTypes$LogicalTypeFactory +++ b/lang/java/avro/src/test/resources/META-INF/services/org.apache.avro.LogicalTypes$LogicalTypeFactory @@ -14,4 +14,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -org.apache.avro.DummyLogicalTypeFactory +org.apache.avro.CustomTypeLogicalTypeFactory diff --git a/lang/java/avro/src/test/resources/multipleFile/ApplicationEvent.avsc b/lang/java/avro/src/test/resources/multipleFile/ApplicationEvent.avsc new file mode 100644 index 00000000000..6902084350f --- /dev/null +++ b/lang/java/avro/src/test/resources/multipleFile/ApplicationEvent.avsc @@ -0,0 +1,28 @@ +{ + "namespace": "model", + "type": "record", + "doc": "", + "name": "ApplicationEvent", + "fields": [ + { + "name": "applicationId", + "type": "string", + "doc": "Application ID" + }, + { + "name": "status", + "type": "string", + "doc": "Application Status" + }, + { + "name": "documents", + "type": ["null", { + "type": "array", + "items": "model.DocumentInfo" + }], + "doc": "", + "default": null + } + ] + +} diff --git a/lang/java/avro/src/test/resources/multipleFile/DocumentInfo.avsc b/lang/java/avro/src/test/resources/multipleFile/DocumentInfo.avsc new file mode 100644 index 00000000000..95dd4243ea6 --- /dev/null +++ b/lang/java/avro/src/test/resources/multipleFile/DocumentInfo.avsc @@ -0,0 +1,19 @@ +{ + "namespace": "model", + "type": "record", + "doc": "", + "name": "DocumentInfo", + "fields": [ + { + "name": "documentId", + "type": "string", + "doc": "Document ID" + }, + { + "name": "filePath", + "type": "string", + "doc": "Document Path" + } + ] + +} diff --git a/lang/java/avro/src/test/resources/multipleFile/MyResponse.avsc b/lang/java/avro/src/test/resources/multipleFile/MyResponse.avsc new file mode 100644 index 00000000000..ac6d08291d9 --- /dev/null +++ b/lang/java/avro/src/test/resources/multipleFile/MyResponse.avsc @@ -0,0 +1,14 @@ +{ + "namespace": "model", + "type": "record", + "doc": "", + "name": "MyResponse", + "fields": [ + { + "name": "isSuccessful", + "type": "boolean", + "doc": "Indicator for successful or unsuccessful call" + } + ] + +} diff --git a/lang/java/avro/src/test/resources/multipleFile/README.md b/lang/java/avro/src/test/resources/multipleFile/README.md new file mode 100644 index 00000000000..fe3541b660e --- /dev/null +++ b/lang/java/avro/src/test/resources/multipleFile/README.md @@ -0,0 +1,8 @@ +## test for parsing multiple files. +This folder aims to test `public List Schema.parse(Iterable sources) throws IOException` method. + +The objective is to check that a record schema define in a file can be use in another record schema as a field type. +Here, ApplicationEvent.avsc file contains a field of type DocumentInfo, defined in file DocumentInfo.avsc. + +The is written at TestSchema.testParseMultipleFile. + diff --git a/lang/java/compiler/pom.xml b/lang/java/compiler/pom.xml index 53719816387..2019318ee97 100644 --- a/lang/java/compiler/pom.xml +++ b/lang/java/compiler/pom.xml @@ -137,6 +137,7 @@ org.apache.avro.compiler.specific.SchemaTask ${project.basedir}/src/test/resources/full_record_v1.avsc ${project.basedir}/src/test/resources/full_record_v2.avsc + ${project.basedir}/src/test/resources/regression_error_field_in_record.avsc ${project.basedir}/target/generated-test-sources/javacc @@ -182,37 +183,8 @@ - - - - org.eclipse.m2e - lifecycle-mapping - 1.0.0 - - - - - - org.codehaus.mojo - exec-maven-plugin - [1.0,) - - exec - - - - - - - - - - - - - ${project.groupId} @@ -241,4 +213,42 @@ + + + m2e + + m2e.version + + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + org.codehaus.mojo + exec-maven-plugin + [1.0,) + + exec + + + + + + + + + + + + + + + diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/SchemaResolver.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/SchemaResolver.java index 193f871178b..6a1a137898d 100644 --- a/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/SchemaResolver.java +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/idl/SchemaResolver.java @@ -23,6 +23,7 @@ import java.util.IdentityHashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; import org.apache.avro.Protocol; @@ -44,6 +45,8 @@ private SchemaResolver() { private static final String UR_SCHEMA_NS = "org.apache.avro.compiler"; + private static final AtomicInteger COUNTER = new AtomicInteger(); + /** * Create a schema to represent a "unresolved" schema. (used to represent a * schema where the definition is not known at the time) This concept might be @@ -53,8 +56,8 @@ private SchemaResolver() { * @return */ static Schema unresolvedSchema(final String name) { - Schema schema = Schema.createRecord(UR_SCHEMA_NAME, "unresolved schema", UR_SCHEMA_NS, false, - Collections.EMPTY_LIST); + Schema schema = Schema.createRecord(UR_SCHEMA_NAME + '_' + COUNTER.getAndIncrement(), "unresolved schema", + UR_SCHEMA_NS, false, Collections.EMPTY_LIST); schema.addProp(UR_SCHEMA_ATTR, name); return schema; } @@ -66,8 +69,8 @@ static Schema unresolvedSchema(final String name) { * @return */ static boolean isUnresolvedSchema(final Schema schema) { - return (schema.getType() == Schema.Type.RECORD && schema.getProp(UR_SCHEMA_ATTR) != null - && UR_SCHEMA_NAME.equals(schema.getName()) && UR_SCHEMA_NS.equals(schema.getNamespace())); + return (schema.getType() == Schema.Type.RECORD && schema.getProp(UR_SCHEMA_ATTR) != null && schema.getName() != null + && schema.getName().startsWith(UR_SCHEMA_NAME) && UR_SCHEMA_NS.equals(schema.getNamespace())); } /** diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/schema/Schemas.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/schema/Schemas.java index b35adbd9313..ec8ff778983 100644 --- a/lang/java/compiler/src/main/java/org/apache/avro/compiler/schema/Schemas.java +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/schema/Schemas.java @@ -21,7 +21,6 @@ import java.util.Collections; import java.util.Deque; import java.util.IdentityHashMap; -import java.util.Map; import java.util.Set; import java.util.function.Supplier; import java.util.stream.Collectors; @@ -67,10 +66,7 @@ public static void copyLogicalTypes(final Schema from, final Schema to) { } public static void copyProperties(final JsonProperties from, final JsonProperties to) { - Map objectProps = from.getObjectProps(); - for (Map.Entry entry : objectProps.entrySet()) { - to.addProp(entry.getKey(), entry.getValue()); - } + from.forEachProperty(to::addProp); } public static boolean hasGeneratedJavaClass(final Schema schema) { diff --git a/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java b/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java index 3210bd11c58..117fd2ed6ee 100644 --- a/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java +++ b/lang/java/compiler/src/main/java/org/apache/avro/compiler/specific/SpecificCompiler.java @@ -129,6 +129,10 @@ void addLogicalTypeConversions(SpecificData specificData) { private String suffix = ".java"; private List additionalVelocityTools = Collections.emptyList(); + private String recordSpecificClass = "org.apache.avro.specific.SpecificRecordBase"; + + private String errorSpecificClass = "org.apache.avro.specific.SpecificExceptionBase"; + /* * Used in the record.vm template. */ @@ -175,8 +179,20 @@ public SpecificCompiler(Protocol protocol) { } public SpecificCompiler(Schema schema) { + this(Collections.singleton(schema)); + } + + public SpecificCompiler(Collection schemas) { + this(); + for (Schema schema : schemas) { + enqueue(schema); + } + this.protocol = null; + } + + public SpecificCompiler(Iterable schemas) { this(); - enqueue(schema); + schemas.forEach(this::enqueue); this.protocol = null; } @@ -676,9 +692,7 @@ private Protocol addStringType(Protocol p) { Protocol newP = new Protocol(p.getName(), p.getDoc(), p.getNamespace()); Map types = new LinkedHashMap<>(); - for (Map.Entry a : p.getObjectProps().entrySet()) { - newP.addProp(a.getKey(), a.getValue()); - } + p.forEachProperty(newP::addProp); // annotate types Collection namedTypes = new LinkedHashSet<>(); @@ -956,19 +970,21 @@ public int getNonNullIndex(Schema s) { * record.vm can handle the schema being presented. */ public boolean isCustomCodable(Schema schema) { - if (schema.isError()) - return false; return isCustomCodable(schema, new HashSet<>()); } private boolean isCustomCodable(Schema schema, Set seen) { if (!seen.add(schema)) + // Recursive call: assume custom codable until a caller on the call stack proves + // otherwise. return true; if (schema.getLogicalType() != null) return false; boolean result = true; switch (schema.getType()) { case RECORD: + if (schema.isError()) + return false; for (Schema.Field f : schema.getFields()) result &= isCustomCodable(f.schema(), seen); break; @@ -1071,7 +1087,7 @@ public static String javaEscape(String o) { * Utility for template use. Escapes comment end with HTML entities. */ public static String escapeForJavadoc(String s) { - return s.replace("*/", "*/"); + return s.replace("*/", "*/").replace("<", "<").replace(">", ">"); } /** @@ -1272,10 +1288,7 @@ private static String generateMethodName(Schema schema, Field field, String pref // Check for the special case in which the schema defines two fields whose // names are identical except for the case of the first character: - char firstChar = field.name().charAt(0); - String conflictingFieldName = (Character.isLowerCase(firstChar) ? Character.toUpperCase(firstChar) - : Character.toLowerCase(firstChar)) + (field.name().length() > 1 ? field.name().substring(1) : ""); - boolean fieldNameConflict = schema.getField(conflictingFieldName) != null; + int indexNameConflict = calcNameIndex(field.name(), schema); StringBuilder methodBuilder = new StringBuilder(prefix); String fieldName = mangle(field.name(), schema.isError() ? ERROR_RESERVED_WORDS : ACCESSOR_MUTATOR_RESERVED_WORDS, @@ -1295,16 +1308,75 @@ private static String generateMethodName(Schema schema, Field field, String pref methodBuilder.append(postfix); // If there is a field name conflict append $0 or $1 - if (fieldNameConflict) { + if (indexNameConflict >= 0) { if (methodBuilder.charAt(methodBuilder.length() - 1) != '$') { methodBuilder.append('$'); } - methodBuilder.append(Character.isLowerCase(firstChar) ? '0' : '1'); + methodBuilder.append(indexNameConflict); } return methodBuilder.toString(); } + /** + * Calc name index for getter / setter field in case of conflict as example, + * having a schema with fields __X, _X, _x, X, x should result with indexes __X: + * 3, _X: 2, _x: 1, X: 0 x: None (-1) + * + * @param fieldName : field name. + * @param schema : schema. + * @return index for field. + */ + private static int calcNameIndex(String fieldName, Schema schema) { + // get name without underscore at start + // and calc number of other similar fields with same subname. + int countSimilar = 0; + String pureFieldName = fieldName; + while (!pureFieldName.isEmpty() && pureFieldName.charAt(0) == '_') { + pureFieldName = pureFieldName.substring(1); + if (schema.getField(pureFieldName) != null) { + countSimilar++; + } + String reversed = reverseFirstLetter(pureFieldName); + if (schema.getField(reversed) != null) { + countSimilar++; + } + } + // field name start with upper have +1 + String reversed = reverseFirstLetter(fieldName); + if (!pureFieldName.isEmpty() && Character.isUpperCase(pureFieldName.charAt(0)) + && schema.getField(reversed) != null) { + countSimilar++; + } + + int ret = -1; // if no similar name, no index. + if (countSimilar > 0) { + ret = countSimilar - 1; // index is count similar -1 (start with $0) + } + + return ret; + } + + /** + * Reverse first letter upper <=> lower. __Name <=> __name + * + * @param name : input name. + * @return name with change case of first letter. + */ + private static String reverseFirstLetter(String name) { + StringBuilder builder = new StringBuilder(name); + int index = 0; + while (builder.length() > index && builder.charAt(index) == '_') { + index++; + } + if (builder.length() > index) { + char c = builder.charAt(index); + char inverseC = Character.isLowerCase(c) ? Character.toUpperCase(c) : Character.toLowerCase(c); + builder.setCharAt(index, inverseC); + } + return builder.toString(); + } + /** * Tests whether an unboxed Java type can be set to null */ @@ -1336,4 +1408,20 @@ public static void main(String[] args) throws Exception { public void setOutputCharacterEncoding(String outputCharacterEncoding) { this.outputCharacterEncoding = outputCharacterEncoding; } + + public String getSchemaParentClass(boolean isError) { + if (isError) { + return this.errorSpecificClass; + } else { + return this.recordSpecificClass; + } + } + + public void setRecordSpecificClass(final String recordSpecificClass) { + this.recordSpecificClass = recordSpecificClass; + } + + public void setErrorSpecificClass(final String errorSpecificClass) { + this.errorSpecificClass = errorSpecificClass; + } } diff --git a/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj b/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj index 4deaa68a5a7..117764497e3 100644 --- a/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj +++ b/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj @@ -88,7 +88,10 @@ import org.apache.commons.text.StringEscapeUtils; * * Note: each instance is not thread-safe, but multiple separate * instances are safely independent. + * + * @deprecated Use the new org.apache.avro.idl.IdlReader from avro-idl instead. */ +@Deprecated public class Idl implements Closeable { static JsonNodeFactory FACTORY = JsonNodeFactory.instance; private static final String OPTIONAL_NULLABLE_TYPE_PROPERTY = "org.apache.avro.compiler.idl.Idl.NullableType.optional"; @@ -1284,7 +1287,7 @@ Schema ImportSchema() : { { try (InputStream stream=findFile(importFile).openStream()){ Parser parser = new Schema.Parser(); - parser.addTypes(names); // inherit names + parser.addTypes(names.values()); // inherit names Schema value = parser.parse(stream); names = parser.getTypes(); // update names return value; diff --git a/lang/java/compiler/src/main/velocity/org/apache/avro/compiler/specific/templates/java/classic/record.vm b/lang/java/compiler/src/main/velocity/org/apache/avro/compiler/specific/templates/java/classic/record.vm index d5dd7831e1f..2e3bb075961 100755 --- a/lang/java/compiler/src/main/velocity/org/apache/avro/compiler/specific/templates/java/classic/record.vm +++ b/lang/java/compiler/src/main/velocity/org/apache/avro/compiler/specific/templates/java/classic/record.vm @@ -36,7 +36,7 @@ import org.apache.avro.message.SchemaStore; @$annotation #end @org.apache.avro.specific.AvroGenerated -public class ${this.mangleTypeIdentifier($schema.getName())}#if ($schema.isError()) extends org.apache.avro.specific.SpecificExceptionBase#else extends org.apache.avro.specific.SpecificRecordBase#end implements org.apache.avro.specific.SpecificRecord { +public class ${this.mangleTypeIdentifier($schema.getName())} extends ${this.getSchemaParentClass($schema.isError())} implements org.apache.avro.specific.SpecificRecord { private static final long serialVersionUID = ${this.fingerprint64($schema)}L; #set ($schemaString = $this.javaSplit($schema.toString())) @@ -228,10 +228,10 @@ public class ${this.mangleTypeIdentifier($schema.getName())}#if ($schema.isError #foreach ($field in $schema.getFields()) #if (${this.gettersReturnOptional} && (!${this.optionalGettersForNullableFieldsOnly} || ${field.schema().isNullable()})) /** - * Gets the value of the '${this.mangle($field.name(), $schema.isError())}' field as an Optional<${this.javaType($field.schema())}>. + * Gets the value of the '${this.mangle($field.name(), $schema.isError())}' field as an Optional<${this.escapeForJavadoc(${this.javaType($field.schema())})}>. #if ($field.doc()) * $field.doc() #end - * @return The value wrapped in an Optional<${this.javaType($field.schema())}>. + * @return The value wrapped in an Optional<${this.escapeForJavadoc(${this.javaType($field.schema())})}>. */ public Optional<${this.javaType($field.schema())}> ${this.generateGetMethod($schema, $field)}() { return Optional.<${this.javaType($field.schema())}>ofNullable(${this.mangle($field.name(), $schema.isError())}); @@ -257,10 +257,10 @@ public class ${this.mangleTypeIdentifier($schema.getName())}#if ($schema.isError #if (${this.createOptionalGetters}) /** - * Gets the value of the '${this.mangle($field.name(), $schema.isError())}' field as an Optional<${this.javaType($field.schema())}>. + * Gets the value of the '${this.mangle($field.name(), $schema.isError())}' field as an Optional<${this.escapeForJavadoc(${this.javaType($field.schema())})}>. #if ($field.doc()) * $field.doc() #end - * @return The value wrapped in an Optional<${this.javaType($field.schema())}>. + * @return The value wrapped in an Optional<${this.escapeForJavadoc(${this.javaType($field.schema())})}>. */ public Optional<${this.javaType($field.schema())}> ${this.generateGetOptionalMethod($schema, $field)}() { return Optional.<${this.javaType($field.schema())}>ofNullable(${this.mangle($field.name(), $schema.isError())}); @@ -413,10 +413,10 @@ public class ${this.mangleTypeIdentifier($schema.getName())}#if ($schema.isError #if (${this.createOptionalGetters}) /** - * Gets the value of the '${this.mangle($field.name(), $schema.isError())}' field as an Optional<${this.javaType($field.schema())}>. + * Gets the value of the '${this.mangle($field.name(), $schema.isError())}' field as an Optional<${this.escapeForJavadoc(${this.javaType($field.schema())})}>. #if ($field.doc()) * $field.doc() #end - * @return The value wrapped in an Optional<${this.javaType($field.schema())}>. + * @return The value wrapped in an Optional<${this.escapeForJavadoc(${this.javaType($field.schema())})}>. */ public Optional<${this.javaType($field.schema())}> ${this.generateGetOptionalMethod($schema, $field)}() { return Optional.<${this.javaType($field.schema())}>ofNullable(${this.mangle($field.name(), $schema.isError())}); @@ -844,7 +844,7 @@ $I } $I long size${nv} = in.readMapStart(); $I $t m${nv} = ${var}; // Need fresh name due to limitation of macro system $I if (m${nv} == null) { -$I m${nv} = new java.util.HashMap<${kt},${vt}>((int)size${nv}); +$I m${nv} = new java.util.HashMap<${kt},${vt}>((int)(size${nv} * 4)/3 + 1); $I $var = m${nv}; $I } else m${nv}.clear(); $I for ( ; 0 < size${nv}; size${nv} = in.mapNext()) { diff --git a/lang/java/compiler/src/test/idl/input/bar.avpr b/lang/java/compiler/src/test/idl/input/bar.avpr index 5e9b194a060..ea8b0d4befb 100644 --- a/lang/java/compiler/src/test/idl/input/bar.avpr +++ b/lang/java/compiler/src/test/idl/input/bar.avpr @@ -1,2 +1,3 @@ {"protocol": "org.foo.Bar", - "messages": { "bar": {"request": [], "response": "null"}}} + "types": [{"name": "AorB", "type": "enum", "symbols": ["A", "B"], "default": "A"}], + "messages": { "bar": {"request": [{"name": "choice", "type": "AorB"}],"response": "null"}}} diff --git a/lang/java/compiler/src/test/idl/input/union.avdl b/lang/java/compiler/src/test/idl/input/union.avdl new file mode 100644 index 00000000000..19f37f2f748 --- /dev/null +++ b/lang/java/compiler/src/test/idl/input/union.avdl @@ -0,0 +1,16 @@ +@namespace("org.apache.avro.gen") +protocol UnionFwd { + + record TestRecord { + union {SR1, SR2} unionField; + } + + record SR1 { + string field; + } + + record SR2 { + string field; + } + +} diff --git a/lang/java/compiler/src/test/idl/output/import.avpr b/lang/java/compiler/src/test/idl/output/import.avpr index e6701ad94e1..b0093492d95 100644 --- a/lang/java/compiler/src/test/idl/output/import.avpr +++ b/lang/java/compiler/src/test/idl/output/import.avpr @@ -3,6 +3,11 @@ "namespace" : "org.foo", "doc" : "Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", "types" : [ { + "type" : "enum", + "name" : "AorB", + "symbols" : ["A", "B"], + "default" : "A" + }, { "type" : "enum", "name" : "Position", "namespace" : "avro.examples.baseball", @@ -111,7 +116,10 @@ "response" : "null" }, "bar" : { - "request" : [ ], + "request" : [ { + "name" : "choice", + "type" : "AorB" + } ], "response" : "null" }, "bazm" : { diff --git a/lang/java/compiler/src/test/idl/output/nestedimport.avpr b/lang/java/compiler/src/test/idl/output/nestedimport.avpr index 80273627109..f1060b0d743 100644 --- a/lang/java/compiler/src/test/idl/output/nestedimport.avpr +++ b/lang/java/compiler/src/test/idl/output/nestedimport.avpr @@ -4,6 +4,12 @@ "doc" : "Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", "version" : "1.0.5", "types" : [ { + "type" : "enum", + "name" : "AorB", + "namespace" : "org.foo", + "symbols" : ["A", "B"], + "default" : "A" + }, { "type" : "enum", "name" : "Position", "namespace" : "avro.examples.baseball", @@ -66,7 +72,10 @@ "response" : "null" }, "bar" : { - "request" : [ ], + "request" : [ { + "name" : "choice", + "type" : "org.foo.AorB" + } ], "response" : "null" } } diff --git a/lang/java/compiler/src/test/idl/output/union.avpr b/lang/java/compiler/src/test/idl/output/union.avpr new file mode 100644 index 00000000000..61748d179e3 --- /dev/null +++ b/lang/java/compiler/src/test/idl/output/union.avpr @@ -0,0 +1,38 @@ +{ + "protocol": "UnionFwd", + "namespace": "org.apache.avro.gen", + "types": [ + { + "type": "record", + "name": "TestRecord", + "fields": [ + { + "name": "unionField", + "type": [ + { + "type": "record", + "name": "SR1", + "fields": [ + { + "name": "field", + "type": "string" + } + ] + }, + { + "type": "record", + "name": "SR2", + "fields": [ + { + "name": "field", + "type": "string" + } + ] + } + ] + } + ] + } + ], + "messages": {} +} diff --git a/lang/java/compiler/src/test/java/org/apache/avro/compiler/specific/TestSpecificCompiler.java b/lang/java/compiler/src/test/java/org/apache/avro/compiler/specific/TestSpecificCompiler.java index 05bc7b2f531..cc3fcd31240 100644 --- a/lang/java/compiler/src/test/java/org/apache/avro/compiler/specific/TestSpecificCompiler.java +++ b/lang/java/compiler/src/test/java/org/apache/avro/compiler/specific/TestSpecificCompiler.java @@ -929,4 +929,59 @@ public LogicalType fromSchema(Schema schema) { } } + @Test + void fieldWithUnderscore_avro3826() { + String jsonSchema = "{\n" + " \"name\": \"Value\",\n" + " \"type\": \"record\",\n" + " \"fields\": [\n" + + " { \"name\": \"__deleted\", \"type\": \"string\"\n" + " }\n" + " ]\n" + "}"; + Collection outputs = new SpecificCompiler(new Schema.Parser().parse(jsonSchema)) + .compile(); + assertEquals(1, outputs.size()); + SpecificCompiler.OutputFile outputFile = outputs.iterator().next(); + assertTrue(outputFile.contents.contains("getDeleted()")); + assertFalse(outputFile.contents.contains("$0")); + assertFalse(outputFile.contents.contains("$1")); + + String jsonSchema2 = "{\n" + " \"name\": \"Value\", \"type\": \"record\",\n" + " \"fields\": [\n" + + " { \"name\": \"__deleted\", \"type\": \"string\"},\n" + + " { \"name\": \"_deleted\", \"type\": \"string\"}\n" + " ]\n" + "}"; + Collection outputs2 = new SpecificCompiler(new Schema.Parser().parse(jsonSchema2)) + .compile(); + assertEquals(1, outputs2.size()); + SpecificCompiler.OutputFile outputFile2 = outputs2.iterator().next(); + + assertTrue(outputFile2.contents.contains("getDeleted()")); + assertTrue(outputFile2.contents.contains("getDeleted$0()")); + assertFalse(outputFile.contents.contains("$1")); + + String jsonSchema3 = "{\n" + " \"name\": \"Value\", \"type\": \"record\",\n" + " \"fields\": [\n" + + " { \"name\": \"__deleted\", \"type\": \"string\"},\n" + + " { \"name\": \"_deleted\", \"type\": \"string\"},\n" + + " { \"name\": \"deleted\", \"type\": \"string\"}\n" + " ]\n" + "}"; + Collection outputs3 = new SpecificCompiler(new Schema.Parser().parse(jsonSchema3)) + .compile(); + assertEquals(1, outputs3.size()); + SpecificCompiler.OutputFile outputFile3 = outputs3.iterator().next(); + + assertTrue(outputFile3.contents.contains("getDeleted()")); + assertTrue(outputFile3.contents.contains("getDeleted$0()")); + assertTrue(outputFile3.contents.contains("getDeleted$1()")); + assertFalse(outputFile3.contents.contains("$2")); + + String jsonSchema4 = "{\n" + " \"name\": \"Value\", \"type\": \"record\",\n" + " \"fields\": [\n" + + " { \"name\": \"__deleted\", \"type\": \"string\"},\n" + + " { \"name\": \"_deleted\", \"type\": \"string\"},\n" + + " { \"name\": \"deleted\", \"type\": \"string\"},\n" + + " { \"name\": \"Deleted\", \"type\": \"string\"}\n" + " ]\n" + "}"; + Collection outputs4 = new SpecificCompiler(new Schema.Parser().parse(jsonSchema4)) + .compile(); + assertEquals(1, outputs4.size()); + SpecificCompiler.OutputFile outputFile4 = outputs4.iterator().next(); + + assertTrue(outputFile4.contents.contains("getDeleted()")); + assertTrue(outputFile4.contents.contains("getDeleted$0()")); + assertTrue(outputFile4.contents.contains("getDeleted$1()")); + assertTrue(outputFile4.contents.contains("getDeleted$2()")); + assertFalse(outputFile4.contents.contains("$3")); + } + } diff --git a/lang/java/compiler/src/test/java/org/apache/avro/specific/TestGeneratedCode.java b/lang/java/compiler/src/test/java/org/apache/avro/specific/TestGeneratedCode.java index e541a244b7a..9a334a45ab0 100644 --- a/lang/java/compiler/src/test/java/org/apache/avro/specific/TestGeneratedCode.java +++ b/lang/java/compiler/src/test/java/org/apache/avro/specific/TestGeneratedCode.java @@ -20,6 +20,8 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.Map; import org.apache.avro.Schema; import org.apache.avro.io.Encoder; @@ -28,6 +30,8 @@ import org.apache.avro.io.DecoderFactory; import org.apache.avro.io.DatumReader; import org.apache.avro.io.DatumWriter; +import org.apache.avro.specific.test.RecordWithErrorField; +import org.apache.avro.specific.test.TestError; import org.apache.avro.util.Utf8; import org.junit.Assert; @@ -71,8 +75,9 @@ void withoutSchemaMigration() throws IOException { @Test void withSchemaMigration() throws IOException { + Map map = new HashMap<>(); FullRecordV2 src = new FullRecordV2(true, 731, 87231, 38L, 54.2832F, "Hi there", - ByteBuffer.wrap(Utf8.getBytesFor("Hello, world!"))); + ByteBuffer.wrap(Utf8.getBytesFor("Hello, world!")), map); assertTrue(((SpecificRecordBase) src).hasCustomCoders(), "Test schema must allow for custom coders."); ByteArrayOutputStream out = new ByteArrayOutputStream(1024); @@ -89,4 +94,28 @@ void withSchemaMigration() throws IOException { FullRecordV1 expected = new FullRecordV1(true, 87231, 731L, 54.2832F, 38.0, null, "Hello, world!"); Assert.assertEquals(expected, dst); } + + @Test + public void withErrorField() throws IOException { + TestError srcError = TestError.newBuilder().setMessage$("Oops").build(); + RecordWithErrorField src = new RecordWithErrorField("Hi there", srcError); + Assert.assertFalse("Test schema with error field cannot allow for custom coders.", + ((SpecificRecordBase) src).hasCustomCoders()); + Schema schema = RecordWithErrorField.getClassSchema(); + + ByteArrayOutputStream out = new ByteArrayOutputStream(1024); + Encoder e = EncoderFactory.get().directBinaryEncoder(out, null); + DatumWriter w = (DatumWriter) MODEL.createDatumWriter(schema); + w.write(src, e); + e.flush(); + + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + Decoder d = DecoderFactory.get().directBinaryDecoder(in, null); + DatumReader r = (DatumReader) MODEL.createDatumReader(schema); + RecordWithErrorField dst = r.read(null, d); + + TestError expectedError = TestError.newBuilder().setMessage$("Oops").build(); + RecordWithErrorField expected = new RecordWithErrorField("Hi there", expectedError); + Assert.assertEquals(expected, dst); + } } diff --git a/lang/java/compiler/src/test/resources/full_record_v2.avsc b/lang/java/compiler/src/test/resources/full_record_v2.avsc index b80b9b4ae9d..0a033cf55be 100644 --- a/lang/java/compiler/src/test/resources/full_record_v2.avsc +++ b/lang/java/compiler/src/test/resources/full_record_v2.avsc @@ -24,6 +24,7 @@ }, { "name" : "h", "type" : "bytes" - } ] + }, + { "name" : "myMap", "type" : { "type" : "map", "values" : "string" } }] } diff --git a/lang/java/compiler/src/test/resources/regression_error_field_in_record.avsc b/lang/java/compiler/src/test/resources/regression_error_field_in_record.avsc new file mode 100644 index 00000000000..e2fdcb9ad93 --- /dev/null +++ b/lang/java/compiler/src/test/resources/regression_error_field_in_record.avsc @@ -0,0 +1,22 @@ +{ + "type" : "record", + "name" : "RecordWithErrorField", + "doc" : "With custom coders in Avro 1.9, previously successful records with error fields now fail to compile.", + "namespace" : "org.apache.avro.specific.test", + "fields" : [ { + "name" : "s", + "type" : [ "null", "string" ], + "default" : null + }, { + "name": "e", + "type": [ "null", { + "type" : "error", + "name" : "TestError", + "fields" : [ { + "name" : "message", + "type" : "string" + } ] + } ], + "default": null + } ] +} diff --git a/lang/java/grpc/pom.xml b/lang/java/grpc/pom.xml index f04dd468fcf..d895a1ba3a1 100644 --- a/lang/java/grpc/pom.xml +++ b/lang/java/grpc/pom.xml @@ -24,7 +24,7 @@ org.apache.avro avro-parent 1.12.0-SNAPSHOT - ../ + ../pom.xml avro-grpc diff --git a/lang/java/grpc/src/test/avro/TestService.avdl b/lang/java/grpc/src/test/avro/TestService.avdl index 9a4629a8f5c..6c5f6a038b8 100644 --- a/lang/java/grpc/src/test/avro/TestService.avdl +++ b/lang/java/grpc/src/test/avro/TestService.avdl @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/lang/java/idl/pom.xml b/lang/java/idl/pom.xml new file mode 100644 index 00000000000..6363ec150a5 --- /dev/null +++ b/lang/java/idl/pom.xml @@ -0,0 +1,172 @@ + + + + 4.0.0 + + + avro-parent + org.apache.avro + 1.12.0-SNAPSHOT + ../pom.xml + + + avro-idl + + Apache Avro IDL + bundle + https://avro.apache.org + Compilers for Avro IDL and Avro Specific Java API + + + ${project.parent.parent.basedir} + + !org.apache.avro.idl*, + org.apache.avro*;version="${project.version}", + org.apache.commons.text*, + * + + org.apache.avro.idl*;version="${project.version}" + 4.9.3 + + + + + + src/main/resources + + + + + src/test/resources + + + src/test/idl + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + org.apache.avro.idl + + + + + + prepare-test-jar + generate-test-resources + + test-jar + + + test-resource + src/test/idl/putOnClassPath + putOnClassPath + ${project.build.testOutputDirectory} + + + + + + org.antlr + antlr4-maven-plugin + ${antlr.version} + + + antlr + + antlr4 + + + + + ${project.basedir}/../../../share/idl_grammar + ${project.basedir}/../../../share/idl_grammar/imports + true + false + + + + + + + + ${project.groupId} + avro + ${project.version} + + + org.antlr + antlr4-runtime + ${antlr.version} + + + org.apache.commons + commons-text + ${commons-text.version} + + + com.fasterxml.jackson.core + jackson-databind + + + + + + m2e + + m2e.version + + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + org.codehaus.mojo + exec-maven-plugin + [1.0,) + + exec + + + + + + + + + + + + + + + + diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/IdlFile.java b/lang/java/idl/src/main/java/org/apache/avro/idl/IdlFile.java new file mode 100644 index 00000000000..b3777c9f790 --- /dev/null +++ b/lang/java/idl/src/main/java/org/apache/avro/idl/IdlFile.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.idl; + +import org.apache.avro.Protocol; +import org.apache.avro.Schema; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * A parsed IdlFile. Provides access to the named schemas in the IDL file and + * the protocol containing the schemas. + */ +public class IdlFile { + private final Schema mainSchema; + private final Protocol protocol; + private final String namespace; + private final Map namedSchemas; + private final List warnings; + + IdlFile(Protocol protocol, List warnings) { + this(protocol.getNamespace(), protocol.getTypes(), null, protocol, warnings); + } + + IdlFile(String namespace, Schema mainSchema, Iterable schemas, List warnings) { + this(namespace, schemas, mainSchema, null, warnings); + } + + private IdlFile(String namespace, Iterable schemas, Schema mainSchema, Protocol protocol, + List warnings) { + this.namespace = namespace; + this.namedSchemas = new LinkedHashMap<>(); + for (Schema namedSchema : schemas) { + this.namedSchemas.put(namedSchema.getFullName(), namedSchema); + } + this.mainSchema = mainSchema; + this.protocol = protocol; + this.warnings = Collections.unmodifiableList(new ArrayList<>(warnings)); + } + + /** + * The (main) schema defined by the IDL file. + */ + public Schema getMainSchema() { + return mainSchema; + } + + /** + * The protocol defined by the IDL file. + */ + public Protocol getProtocol() { + return protocol; + } + + public List getWarnings() { + return warnings; + } + + public List getWarnings(String importFile) { + return warnings.stream() + .map(warning -> importFile + ' ' + Character.toLowerCase(warning.charAt(0)) + warning.substring(1)) + .collect(Collectors.toList()); + } + + /** + * The default namespace to resolve schema names against. + */ + public String getNamespace() { + return namespace; + } + + /** + * The named schemas defined by the IDL file, mapped by their full name. + */ + public Map getNamedSchemas() { + return Collections.unmodifiableMap(namedSchemas); + } + + /** + * Get a named schema defined by the IDL file, by name. The name can be a simple + * name in the default namespace of the IDL file (e.g., the namespace of the + * protocol), or a full name. + * + * @param name the full name of the schema, or a simple name + * @return the schema, or {@code null} if it does not exist + */ + public Schema getNamedSchema(String name) { + Schema result = namedSchemas.get(name); + if (result != null) { + return result; + } + if (namespace != null && !name.contains(".")) { + result = namedSchemas.get(namespace + '.' + name); + } + return result; + } + + // Visible for testing + String outputString() { + if (protocol != null) { + return protocol.toString(); + } + if (mainSchema != null) { + return mainSchema.toString(); + } + if (namedSchemas.isEmpty()) { + return "[]"; + } else { + StringBuilder buffer = new StringBuilder(); + for (Schema schema : namedSchemas.values()) { + buffer.append(',').append(schema); + } + buffer.append(']').setCharAt(0, '['); + return buffer.toString(); + } + } +} diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/IdlReader.java b/lang/java/idl/src/main/java/org/apache/avro/idl/IdlReader.java new file mode 100644 index 00000000000..f2419f5f551 --- /dev/null +++ b/lang/java/idl/src/main/java/org/apache/avro/idl/IdlReader.java @@ -0,0 +1,1067 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.idl; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.BooleanNode; +import com.fasterxml.jackson.databind.node.DoubleNode; +import com.fasterxml.jackson.databind.node.IntNode; +import com.fasterxml.jackson.databind.node.LongNode; +import com.fasterxml.jackson.databind.node.NullNode; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.fasterxml.jackson.databind.node.TextNode; +import org.antlr.v4.runtime.BaseErrorListener; +import org.antlr.v4.runtime.CharStream; +import org.antlr.v4.runtime.CharStreams; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.RecognitionException; +import org.antlr.v4.runtime.Recognizer; +import org.antlr.v4.runtime.Token; +import org.apache.avro.JsonProperties; +import org.apache.avro.LogicalType; +import org.apache.avro.LogicalTypes; +import org.apache.avro.Protocol; +import org.apache.avro.Schema; +import org.apache.avro.SchemaParseException; +import org.apache.avro.idl.IdlParser.ArrayTypeContext; +import org.apache.avro.idl.IdlParser.EnumDeclarationContext; +import org.apache.avro.idl.IdlParser.EnumSymbolContext; +import org.apache.avro.idl.IdlParser.FieldDeclarationContext; +import org.apache.avro.idl.IdlParser.FixedDeclarationContext; +import org.apache.avro.idl.IdlParser.FormalParameterContext; +import org.apache.avro.idl.IdlParser.FullTypeContext; +import org.apache.avro.idl.IdlParser.IdentifierContext; +import org.apache.avro.idl.IdlParser.IdlFileContext; +import org.apache.avro.idl.IdlParser.ImportStatementContext; +import org.apache.avro.idl.IdlParser.JsonArrayContext; +import org.apache.avro.idl.IdlParser.JsonLiteralContext; +import org.apache.avro.idl.IdlParser.JsonObjectContext; +import org.apache.avro.idl.IdlParser.JsonPairContext; +import org.apache.avro.idl.IdlParser.JsonValueContext; +import org.apache.avro.idl.IdlParser.MapTypeContext; +import org.apache.avro.idl.IdlParser.MessageDeclarationContext; +import org.apache.avro.idl.IdlParser.NamespaceDeclarationContext; +import org.apache.avro.idl.IdlParser.NullableTypeContext; +import org.apache.avro.idl.IdlParser.PrimitiveTypeContext; +import org.apache.avro.idl.IdlParser.ProtocolDeclarationBodyContext; +import org.apache.avro.idl.IdlParser.ProtocolDeclarationContext; +import org.apache.avro.idl.IdlParser.RecordBodyContext; +import org.apache.avro.idl.IdlParser.RecordDeclarationContext; +import org.apache.avro.idl.IdlParser.ResultTypeContext; +import org.apache.avro.idl.IdlParser.SchemaPropertyContext; +import org.apache.avro.idl.IdlParser.UnionTypeContext; +import org.apache.avro.idl.IdlParser.VariableDeclarationContext; +import org.apache.avro.util.internal.Accessor; +import org.apache.commons.text.StringEscapeUtils; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Deque; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.function.Consumer; +import java.util.function.Predicate; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static java.util.Collections.singleton; +import static java.util.Collections.unmodifiableMap; + +public class IdlReader { + /** + * Simple error listener. Throws a runtime exception because ANTLR does not give + * easy access to the (reasonably readable) error message elsewhere. + */ + private static final BaseErrorListener SIMPLE_AVRO_ERROR_LISTENER = new BaseErrorListener() { + @Override + public void syntaxError(Recognizer recognizer, Object offendingSymbol, int line, int charPositionInLine, + String msg, RecognitionException e) { + throw new SchemaParseException("line " + line + ":" + charPositionInLine + " " + msg); + } + }; + private static final String OPTIONAL_NULLABLE_TYPE_PROPERTY = "org.apache.avro.idl.Idl.NullableType.optional"; + /** + * Pattern to match the common whitespace indents in a multi-line String. + * Doesn't match a single-line String, fully matches any multi-line String. + *

+ * To use: match on a {@link String#trim() trimmed} String, and then replace all + * newlines followed by the group "indent" with a newline. + */ + private static final Pattern WS_INDENT = Pattern.compile("(?U).*\\R(?\\h*).*(?:\\R\\k.*)*"); + /** + * Pattern to match the whitespace indents plus common stars (1 or 2) in a + * multi-line String. If a String fully matches, replace all occurrences of a + * newline followed by whitespace and then the group "stars" with a newline. + *

+ * Note: partial matches are invalid. + */ + private static final Pattern STAR_INDENT = Pattern.compile("(?U)(?\\*{1,2}).*(?:\\R\\h*\\k.*)*"); + /** + * Predicate to check for valid names. Should probably be delegated to the + * Schema class. + */ + private static final Predicate VALID_NAME = Pattern.compile("[_\\p{L}][_\\p{L}\\d]*").asPredicate(); + private static final Set INVALID_TYPE_NAMES = new HashSet<>(Arrays.asList("boolean", "int", "long", "float", + "double", "bytes", "string", "null", "date", "time_ms", "timestamp_ms", "localtimestamp_ms", "uuid")); + private static final String CLASSPATH_SCHEME = "classpath"; + + private final Set readLocations; + private final Map names; + + public IdlReader() { + readLocations = new HashSet<>(); + names = new LinkedHashMap<>(); + } + + public Map getTypes() { + return unmodifiableMap(names); + } + + private Schema namedSchemaOrUnresolved(String fullName) { + Schema schema = names.get(fullName); + if (schema == null) { + schema = SchemaResolver.unresolvedSchema(fullName); + } + return schema; + } + + private void setTypes(Map types) { + names.clear(); + for (Schema schema : types.values()) { + addSchema(schema); + } + } + + public void addTypes(Map types) { + for (Schema schema : types.values()) { + addSchema(schema); + } + } + + private void addSchema(Schema schema) { + String fullName = schema.getFullName(); + if (names.containsKey(fullName)) { + throw new SchemaParseException("Can't redefine: " + fullName); + } + names.put(fullName, schema); + } + + public IdlFile parse(Path location) throws IOException { + return parse(location.toUri()); + } + + IdlFile parse(URI location) throws IOException { + try (InputStream stream = location.toURL().openStream()) { + readLocations.add(location); + URI inputDir = location; + if ("jar".equals(location.getScheme())) { + String jarUriAsString = location.toString(); + String pathFromJarRoot = jarUriAsString.substring(jarUriAsString.indexOf("!/") + 2); + inputDir = URI.create(CLASSPATH_SCHEME + ":/" + pathFromJarRoot); + } + inputDir = inputDir.resolve("."); + + return parse(inputDir, CharStreams.fromStream(stream, StandardCharsets.UTF_8)); + } + } + + /** + * Parse an IDL file from a stream. This method cannot handle imports. + */ + public IdlFile parse(InputStream stream) throws IOException { + return parse(null, CharStreams.fromStream(stream, StandardCharsets.UTF_8)); + } + + private IdlFile parse(URI inputDir, CharStream charStream) { + IdlLexer lexer = new IdlLexer(charStream); + CommonTokenStream tokenStream = new CommonTokenStream(lexer); + + IdlParserListener parseListener = new IdlParserListener(inputDir, tokenStream); + + IdlParser parser = new IdlParser(tokenStream); + parser.removeErrorListeners(); + parser.addErrorListener(SIMPLE_AVRO_ERROR_LISTENER); + parser.addParseListener(parseListener); + parser.setTrace(false); + parser.setBuildParseTree(false); + + // Trigger parsing. + parser.idlFile(); + + return parseListener.getIdlFile(); + } + + /* Package private to facilitate testing */ + static String stripIndents(String docComment) { + Matcher starMatcher = STAR_INDENT.matcher(docComment); + if (starMatcher.matches()) { + return docComment.replaceAll("(?U)(?:^|(\\R)\\h*)\\Q" + starMatcher.group("stars") + "\\E\\h?", "$1"); + } + + Matcher whitespaceMatcher = WS_INDENT.matcher(docComment); + if (whitespaceMatcher.matches()) { + return docComment.replaceAll("(?U)(\\R)" + whitespaceMatcher.group("indent"), "$1"); + } + + return docComment; + } + + private static SchemaParseException error(String message, Token token) { + return error(message, token, null); + } + + private static SchemaParseException error(String message, Token token, Throwable cause) { + SchemaParseException exception = new SchemaParseException( + message + ", at line " + token.getLine() + ", column " + token.getCharPositionInLine()); + if (cause != null) { + exception.initCause(cause); + } + return exception; + } + + private class IdlParserListener extends IdlBaseListener { + private final URI inputDir; + private final CommonTokenStream tokenStream; + private int hiddenTokensProcessedIndex; + private final List warnings; + + private IdlFile result; + private Schema mainSchema; + private Protocol protocol; + private final Deque namespaces; + private final List enumSymbols; + private String enumDefaultSymbol; + private Schema schema; + private String defaultVariableDocComment; + private final List fields; + private final Deque typeStack; + private final Deque jsonValues; + private final Deque propertiesStack; + private String messageDocComment; + + public IdlParserListener(URI inputDir, CommonTokenStream tokenStream) { + this.inputDir = inputDir; + this.tokenStream = tokenStream; + hiddenTokensProcessedIndex = -1; + warnings = new ArrayList<>(); + + result = null; + mainSchema = null; + protocol = null; + namespaces = new ArrayDeque<>(); + enumSymbols = new ArrayList<>(); + enumDefaultSymbol = null; + schema = null; + defaultVariableDocComment = null; + fields = new ArrayList<>(); + typeStack = new ArrayDeque<>(); + propertiesStack = new ArrayDeque<>(); + jsonValues = new ArrayDeque<>(); + messageDocComment = null; + } + + public IdlFile getIdlFile() { + return result; + } + + private String getDocComment(ParserRuleContext ctx) { + int newHiddenTokensProcessedIndex = ctx.start.getTokenIndex(); + List docCommentTokens = tokenStream.getHiddenTokensToLeft(newHiddenTokensProcessedIndex, -1); + int searchEndIndex = newHiddenTokensProcessedIndex; + + Token docCommentToken = null; + if (docCommentTokens != null) { + // There's at least one element + docCommentToken = docCommentTokens.get(docCommentTokens.size() - 1); + searchEndIndex = docCommentToken.getTokenIndex() - 1; + } + + Set allHiddenTokens = singleton(IdlParser.DocComment); + if (searchEndIndex >= 0) { + List hiddenTokens = tokenStream.getTokens(hiddenTokensProcessedIndex + 1, searchEndIndex, + allHiddenTokens); + if (hiddenTokens != null) { + for (Token token : hiddenTokens) { + warnings.add(String.format( + "Line %d, char %d: Ignoring out-of-place documentation comment.%n" + + "Did you mean to use a multiline comment ( /* ... */ ) instead?", + token.getLine(), token.getCharPositionInLine() + 1)); + } + } + } + hiddenTokensProcessedIndex = newHiddenTokensProcessedIndex; + + if (docCommentToken == null) { + return null; + } + String comment = docCommentToken.getText(); + String text = comment.substring(3, comment.length() - 2); // Strip /** & */ + return stripIndents(text.trim()); + } + + private void pushNamespace(String namespace) { + namespaces.push(namespace == null ? "" : namespace); + } + + private String currentNamespace() { + String namespace = namespaces.peek(); + return namespace == null || namespace.isEmpty() ? null : namespace; + } + + private void popNamespace() { + namespaces.pop(); + } + + @Override + public void exitIdlFile(IdlFileContext ctx) { + IdlFile unresolved; + if (protocol == null) { + unresolved = new IdlFile(currentNamespace(), mainSchema, getTypes().values(), warnings); + } else { + unresolved = new IdlFile(protocol, warnings); + } + result = SchemaResolver.resolve(unresolved, OPTIONAL_NULLABLE_TYPE_PROPERTY); + } + + @Override + public void enterProtocolDeclaration(ProtocolDeclarationContext ctx) { + propertiesStack.push(new SchemaProperties(null, true, false, false)); + } + + @Override + public void enterProtocolDeclarationBody(ProtocolDeclarationBodyContext ctx) { + ProtocolDeclarationContext protocolCtx = (ProtocolDeclarationContext) ctx.parent; + SchemaProperties properties = propertiesStack.pop(); + String protocolIdentifier = identifier(protocolCtx.name); + pushNamespace(namespace(protocolIdentifier, properties.namespace())); + + String protocolName = name(protocolIdentifier); + String docComment = getDocComment(protocolCtx); + String protocolNamespace = currentNamespace(); + protocol = properties.copyProperties(new Protocol(protocolName, docComment, protocolNamespace)); + } + + @Override + public void exitProtocolDeclaration(ProtocolDeclarationContext ctx) { + if (protocol != null) + protocol.setTypes(getTypes().values()); + if (!namespaces.isEmpty()) + popNamespace(); + } + + @Override + public void exitNamespaceDeclaration(NamespaceDeclarationContext ctx) { + pushNamespace(namespace("", identifier(ctx.namespace))); + } + + @Override + public void exitMainSchemaDeclaration(IdlParser.MainSchemaDeclarationContext ctx) { + mainSchema = typeStack.pop(); + assert typeStack.isEmpty(); + } + + @Override + public void enterSchemaProperty(SchemaPropertyContext ctx) { + assert jsonValues.isEmpty(); + } + + @Override + public void exitSchemaProperty(SchemaPropertyContext ctx) { + String name = identifier(ctx.name); + JsonNode value = jsonValues.pop(); + Token firstToken = ctx.value.start; + + propertiesStack.element().addProperty(name, value, firstToken); + super.exitSchemaProperty(ctx); + } + + @Override + public void exitImportStatement(ImportStatementContext importContext) { + String importFile = getString(importContext.location); + try { + URI importLocation = findImport(importFile); + if (!readLocations.add(importLocation)) { + // Already imported + return; + } + switch (importContext.importType.getType()) { + case IdlParser.IDL: + // Note that the parse(URI) method uses the same known schema collection + IdlFile idlFile = parse(importLocation); + if (protocol != null && idlFile.getProtocol() != null) { + protocol.getMessages().putAll(idlFile.getProtocol().getMessages()); + } + warnings.addAll(idlFile.getWarnings(importFile)); + break; + case IdlParser.Protocol: + try (InputStream stream = importLocation.toURL().openStream()) { + Protocol importProtocol = Protocol.parse(stream); + for (Schema s : importProtocol.getTypes()) { + addSchema(s); + } + if (protocol != null) { + protocol.getMessages().putAll(importProtocol.getMessages()); + } + } + break; + case IdlParser.Schema: + try (InputStream stream = importLocation.toURL().openStream()) { + Schema.Parser parser = new Schema.Parser(); + parser.addTypes(getTypes().values()); // inherit names + parser.parse(stream); + setTypes(parser.getTypes()); // update names + } + break; + } + } catch (IOException e) { + throw error("Error importing " + importFile + ": " + e, importContext.location, e); + } + } + + /** + * Best effort guess at the import file location. For locations inside jar + * files, this may result in non-existing URLs. + */ + private URI findImport(String importFile) throws IOException { + URI importLocation = inputDir.resolve(importFile); + String importLocationScheme = importLocation.getScheme(); + + if (CLASSPATH_SCHEME.equals(importLocationScheme)) { + String resourceName = importLocation.getSchemeSpecificPart().substring(1); + URI resourceLocation = findResource(resourceName); + if (resourceLocation != null) { + return resourceLocation; + } + } + + if ("file".equals(importLocationScheme) && Files.exists(Paths.get(importLocation))) { + return importLocation; + } + + // The importFile doesn't exist as file relative to the current file. Try to + // load it from the classpath. + URI resourceLocation = findResource(importFile); + if (resourceLocation != null) { + return resourceLocation; + } + + // Cannot find the import. + throw new FileNotFoundException(importFile); + } + + private URI findResource(String resourceName) { + ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); + URL resourceLocation; + if (classLoader == null) { + resourceLocation = ClassLoader.getSystemResource(resourceName); + } else { + resourceLocation = classLoader.getResource(resourceName); + } + return resourceLocation == null ? null : URI.create(resourceLocation.toExternalForm()); + } + + @Override + public void enterFixedDeclaration(FixedDeclarationContext ctx) { + propertiesStack.push(new SchemaProperties(currentNamespace(), true, true, false)); + } + + @Override + public void exitFixedDeclaration(FixedDeclarationContext ctx) { + SchemaProperties properties = propertiesStack.pop(); + + String doc = getDocComment(ctx); + String identifier = identifier(ctx.name); + String name = name(identifier); + String space = namespace(identifier, properties.namespace()); + int size = Integer.decode(ctx.size.getText()); + Schema schema = Schema.createFixed(name, doc, space, size); + properties.copyAliases(schema::addAlias); + properties.copyProperties(schema); + addSchema(schema); + } + + @Override + public void enterEnumDeclaration(EnumDeclarationContext ctx) { + assert enumSymbols.isEmpty(); + assert enumDefaultSymbol == null; + propertiesStack.push(new SchemaProperties(currentNamespace(), true, true, false)); + } + + @Override + public void exitEnumDeclaration(EnumDeclarationContext ctx) { + String doc = getDocComment(ctx); + SchemaProperties properties = propertiesStack.pop(); + String identifier = identifier(ctx.name); + String name = name(identifier); + String space = namespace(identifier, properties.namespace()); + + Schema schema = Schema.createEnum(name, doc, space, new ArrayList<>(enumSymbols), enumDefaultSymbol); + properties.copyAliases(schema::addAlias); + properties.copyProperties(schema); + enumSymbols.clear(); + enumDefaultSymbol = null; + + addSchema(schema); + } + + @Override + public void enterEnumSymbol(EnumSymbolContext ctx) { + propertiesStack.push(new SchemaProperties(null, false, false, false)); + } + + @Override + public void exitEnumSymbol(EnumSymbolContext ctx) { + // TODO: implement doc comment & properties for enum symbols. + propertiesStack.pop(); + + enumSymbols.add(identifier(ctx.name)); + } + + @Override + public void exitEnumDefault(IdlParser.EnumDefaultContext ctx) { + enumDefaultSymbol = identifier(ctx.defaultSymbolName); + } + + @Override + public void enterRecordDeclaration(RecordDeclarationContext ctx) { + assert schema == null; + assert fields.isEmpty(); + + propertiesStack.push(new SchemaProperties(currentNamespace(), true, true, false)); + } + + @Override + public void enterRecordBody(RecordBodyContext ctx) { + assert fields.isEmpty(); + + RecordDeclarationContext recordCtx = (RecordDeclarationContext) ctx.parent; + + SchemaProperties properties = propertiesStack.pop(); + + String doc = getDocComment(recordCtx); + String identifier = identifier(recordCtx.name); + String name = name(identifier); + pushNamespace(namespace(identifier, properties.namespace())); + boolean isError = recordCtx.recordType.getType() == IdlParser.Error; + schema = Schema.createRecord(name, doc, currentNamespace(), isError); + properties.copyAliases(schema::addAlias); + properties.copyProperties(schema); + } + + @Override + public void exitRecordDeclaration(RecordDeclarationContext ctx) { + schema.setFields(fields); + fields.clear(); + addSchema(schema); + schema = null; + + popNamespace(); + } + + @Override + public void enterFieldDeclaration(FieldDeclarationContext ctx) { + assert typeStack.isEmpty(); + defaultVariableDocComment = getDocComment(ctx); + } + + @Override + public void exitFieldDeclaration(FieldDeclarationContext ctx) { + typeStack.pop(); + defaultVariableDocComment = null; + } + + @Override + public void enterVariableDeclaration(VariableDeclarationContext ctx) { + assert jsonValues.isEmpty(); + propertiesStack.push(new SchemaProperties(currentNamespace(), false, true, true)); + } + + @Override + public void exitVariableDeclaration(VariableDeclarationContext ctx) { + String doc = Optional.ofNullable(getDocComment(ctx)).orElse(defaultVariableDocComment); + String fieldName = identifier(ctx.fieldName); + + JsonNode defaultValue = jsonValues.poll(); + Schema type = typeStack.element(); + JsonNode fieldDefault = fixDefaultValue(defaultValue, type); + Schema fieldType = fixOptionalSchema(type, fieldDefault); + + SchemaProperties properties = propertiesStack.pop(); + + boolean validate = SchemaResolver.isFullyResolvedSchema(fieldType); + Schema.Field field = Accessor.createField(fieldName, fieldType, doc, fieldDefault, validate, properties.order()); + properties.copyAliases(field::addAlias); + properties.copyProperties(field); + fields.add(field); + } + + /** + * When parsing JSON, the parser generates a LongNode or IntNode based on the + * size of the number it encounters. But this may not be expected based on the + * schema. This method fixes that. + * + * @param defaultValue the parsed default value + * @param fieldType the field schema + * @return the default value, now matching the schema + */ + private JsonNode fixDefaultValue(JsonNode defaultValue, Schema fieldType) { + if (!(defaultValue instanceof IntNode)) { + return defaultValue; + } + + if (fieldType.getType() == Schema.Type.UNION) { + for (Schema unionedType : fieldType.getTypes()) { + if (unionedType.getType() == Schema.Type.INT) { + break; + } else if (unionedType.getType() == Schema.Type.LONG) { + return new LongNode(defaultValue.longValue()); + } + } + return defaultValue; + } + + if (fieldType.getType() == Schema.Type.LONG) { + return new LongNode(defaultValue.longValue()); + } + + return defaultValue; + } + + /** + * For "optional schemas" (recognized by the marker property the NullableType + * production adds), ensure the null schema is in the right place. + * + * @param schema a schema + * @param defaultValue the intended default value + * @return the schema, or an optional schema with null in the right place + */ + private Schema fixOptionalSchema(Schema schema, JsonNode defaultValue) { + Object optionalType = schema.getObjectProp(OPTIONAL_NULLABLE_TYPE_PROPERTY); + if (optionalType != null) { + // The schema is a union schema with 2 types: "null" and a non-"null" schema + Schema nullSchema = schema.getTypes().get(0); + Schema nonNullSchema = schema.getTypes().get(1); + boolean nonNullDefault = defaultValue != null && !defaultValue.isNull(); + + // Note: the resolving visitor we'll use later drops the marker property. + if (nonNullDefault) { + return Schema.createUnion(nonNullSchema, nullSchema); + } + } + return schema; + } + + @Override + public void enterMessageDeclaration(MessageDeclarationContext ctx) { + assert typeStack.isEmpty(); + assert fields.isEmpty(); + assert messageDocComment == null; + propertiesStack.push(new SchemaProperties(currentNamespace(), false, false, false)); + messageDocComment = getDocComment(ctx); + } + + @Override + public void exitMessageDeclaration(MessageDeclarationContext ctx) { + Schema resultType = typeStack.pop(); + Map properties = propertiesStack.pop().properties; + String name = identifier(ctx.name); + + Schema request = Schema.createRecord(null, null, null, false, fields); + fields.clear(); + + Protocol.Message message; + if (ctx.oneway != null) { + if (resultType.getType() == Schema.Type.NULL) { + message = protocol.createMessage(name, messageDocComment, properties, request); + } else { + throw error("One-way message'" + name + "' must return void", ctx.returnType.start); + } + } else { + List errorSchemas = new ArrayList<>(); + errorSchemas.add(Protocol.SYSTEM_ERROR); + for (IdentifierContext errorContext : ctx.errors) { + errorSchemas.add(namedSchemaOrUnresolved(fullName(currentNamespace(), identifier(errorContext)))); + } + message = protocol.createMessage(name, messageDocComment, properties, request, resultType, + Schema.createUnion(errorSchemas)); + } + messageDocComment = null; + protocol.getMessages().put(message.getName(), message); + } + + @Override + public void enterFormalParameter(FormalParameterContext ctx) { + assert typeStack.size() == 1; // The message return type is on the stack; nothing else. + defaultVariableDocComment = getDocComment(ctx); + } + + @Override + public void exitFormalParameter(FormalParameterContext ctx) { + typeStack.pop(); + defaultVariableDocComment = null; + } + + @Override + public void exitResultType(ResultTypeContext ctx) { + if (typeStack.isEmpty()) { + // if there's no type, we've parsed 'void': use the null type + typeStack.push(Schema.create(Schema.Type.NULL)); + } + } + + @Override + public void enterFullType(FullTypeContext ctx) { + propertiesStack.push(new SchemaProperties(currentNamespace(), false, false, false)); + } + + @Override + public void exitFullType(FullTypeContext ctx) { + SchemaProperties properties = propertiesStack.pop(); + + Schema type = typeStack.element(); + if (type.getObjectProp(OPTIONAL_NULLABLE_TYPE_PROPERTY) != null) { + // Optional type: put the properties on the non-null content + properties.copyProperties(type.getTypes().get(1)); + } else { + properties.copyProperties(type); + } + } + + @Override + public void exitNullableType(NullableTypeContext ctx) { + Schema type; + if (ctx.referenceName == null) { + type = typeStack.pop(); + } else { + // propertiesStack is empty within resultType->plainType->nullableType, and + // holds our properties otherwise + if (propertiesStack.isEmpty() || propertiesStack.peek().hasProperties()) { + throw error("Type references may not be annotated", ctx.getParent().getStart()); + } + type = namedSchemaOrUnresolved(fullName(currentNamespace(), identifier(ctx.referenceName))); + } + if (ctx.optional != null) { + type = Schema.createUnion(Schema.create(Schema.Type.NULL), type); + // Add a marker property to the union (it will be removed when creating fields) + type.addProp(OPTIONAL_NULLABLE_TYPE_PROPERTY, BooleanNode.TRUE); + } + typeStack.push(type); + } + + @Override + public void exitPrimitiveType(PrimitiveTypeContext ctx) { + switch (ctx.typeName.getType()) { + case IdlParser.Boolean: + typeStack.push(Schema.create(Schema.Type.BOOLEAN)); + break; + case IdlParser.Int: + typeStack.push(Schema.create(Schema.Type.INT)); + break; + case IdlParser.Long: + typeStack.push(Schema.create(Schema.Type.LONG)); + break; + case IdlParser.Float: + typeStack.push(Schema.create(Schema.Type.FLOAT)); + break; + case IdlParser.Double: + typeStack.push(Schema.create(Schema.Type.DOUBLE)); + break; + case IdlParser.Bytes: + typeStack.push(Schema.create(Schema.Type.BYTES)); + break; + case IdlParser.String: + typeStack.push(Schema.create(Schema.Type.STRING)); + break; + case IdlParser.Null: + typeStack.push(Schema.create(Schema.Type.NULL)); + break; + case IdlParser.Date: + typeStack.push(LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT))); + break; + case IdlParser.Time: + typeStack.push(LogicalTypes.timeMillis().addToSchema(Schema.create(Schema.Type.INT))); + break; + case IdlParser.Timestamp: + typeStack.push(LogicalTypes.timestampMillis().addToSchema(Schema.create(Schema.Type.LONG))); + break; + case IdlParser.LocalTimestamp: + typeStack.push(LogicalTypes.localTimestampMillis().addToSchema(Schema.create(Schema.Type.LONG))); + break; + case IdlParser.UUID: + typeStack.push(LogicalTypes.uuid().addToSchema(Schema.create(Schema.Type.STRING))); + break; + default: // Only option left: decimal + int precision = Integer.decode(ctx.precision.getText()); + int scale = ctx.scale == null ? 0 : Integer.decode(ctx.scale.getText()); + typeStack.push(LogicalTypes.decimal(precision, scale).addToSchema(Schema.create(Schema.Type.BYTES))); + break; + } + } + + @Override + public void exitArrayType(ArrayTypeContext ctx) { + typeStack.push(Schema.createArray(typeStack.pop())); + } + + @Override + public void exitMapType(MapTypeContext ctx) { + typeStack.push(Schema.createMap(typeStack.pop())); + } + + @Override + public void enterUnionType(UnionTypeContext ctx) { + // push an empty marker union; we'll replace it with the real union upon exit + typeStack.push(Schema.createUnion()); + } + + @Override + public void exitUnionType(UnionTypeContext ctx) { + List types = new ArrayList<>(); + Schema type; + while ((type = typeStack.pop()).getType() != Schema.Type.UNION) { + types.add(type); + } + Collections.reverse(types); // Popping the stack works in reverse order + // type is an empty marker union; ignore (drop) it + typeStack.push(Schema.createUnion(types)); + } + + @Override + public void exitJsonValue(JsonValueContext ctx) { + if (ctx.parent instanceof JsonArrayContext) { + JsonNode value = jsonValues.pop(); + assert jsonValues.peek() instanceof ArrayNode; + ((ArrayNode) jsonValues.element()).add(value); + } + } + + @Override + public void exitJsonLiteral(JsonLiteralContext ctx) { + Token literal = ctx.literal; + switch (literal.getType()) { + case IdlParser.Null: + jsonValues.push(NullNode.getInstance()); + break; + case IdlParser.BTrue: + jsonValues.push(BooleanNode.TRUE); + break; + case IdlParser.BFalse: + jsonValues.push(BooleanNode.FALSE); + break; + case IdlParser.IntegerLiteral: + String number = literal.getText().replace("_", ""); + char lastChar = number.charAt(number.length() - 1); + boolean coerceToLong = false; + if (lastChar == 'l' || lastChar == 'L') { + coerceToLong = true; + number = number.substring(0, number.length() - 1); + } + long longNumber = Long.decode(number); + int intNumber = (int) longNumber; // Narrowing cast: if too large a number, the two are different + jsonValues.push(coerceToLong || intNumber != longNumber ? new LongNode(longNumber) : new IntNode(intNumber)); + break; + case IdlParser.FloatingPointLiteral: + jsonValues.push(new DoubleNode(Double.parseDouble(literal.getText()))); + break; + default: // StringLiteral: + jsonValues.push(new TextNode(getString(literal))); + break; + } + } + + @Override + public void enterJsonArray(JsonArrayContext ctx) { + jsonValues.push(new ArrayNode(null)); + } + + @Override + public void enterJsonObject(JsonObjectContext ctx) { + jsonValues.push(new ObjectNode(null)); + } + + @Override + public void exitJsonPair(JsonPairContext ctx) { + String name = getString(ctx.name); + JsonNode value = jsonValues.pop(); + assert jsonValues.peek() instanceof ObjectNode; + ((ObjectNode) jsonValues.element()).set(name, value); + } + + private String identifier(IdentifierContext ctx) { + return ctx.word.getText().replace("`", ""); + } + + private String name(String identifier) { + int dotPos = identifier.lastIndexOf('.'); + String name = identifier.substring(dotPos + 1); + return validateName(name, true); + } + + private String namespace(String identifier, String namespace) { + int dotPos = identifier.lastIndexOf('.'); + String ns = dotPos < 0 ? namespace : identifier.substring(0, dotPos); + if (ns == null) { + return null; + } + for (int s = 0, e = ns.indexOf('.'); e > 0; s = e + 1, e = ns.indexOf('.', s)) { + validateName(ns.substring(s, e), false); + } + return ns; + } + + private String validateName(String name, boolean isTypeName) { + if (name == null) { + throw new SchemaParseException("Null name"); + } else if (!VALID_NAME.test(name)) { + throw new SchemaParseException("Illegal name: " + name); + } + if (isTypeName && INVALID_TYPE_NAMES.contains(name)) { + throw new SchemaParseException("Illegal name: " + name); + } + return name; + } + + private String fullName(String namespace, String typeName) { + int dotPos = typeName.lastIndexOf('.'); + if (dotPos > -1) { + return typeName; + } + return namespace != null ? namespace + "." + typeName : typeName; + } + + private String getString(Token stringToken) { + String stringLiteral = stringToken.getText(); + String betweenQuotes = stringLiteral.substring(1, stringLiteral.length() - 1); + return StringEscapeUtils.unescapeJava(betweenQuotes); + } + } + + private static class SchemaProperties { + String contextNamespace; + boolean withNamespace; + String namespace; + boolean withAliases; + List aliases; + boolean withOrder; + Schema.Field.Order order; + Map properties; + + public SchemaProperties(String contextNamespace, boolean withNamespace, boolean withAliases, boolean withOrder) { + this.contextNamespace = contextNamespace; + this.withNamespace = withNamespace; + this.withAliases = withAliases; + this.aliases = Collections.emptyList(); + this.withOrder = withOrder; + this.order = Schema.Field.Order.ASCENDING; + this.properties = new LinkedHashMap<>(); + } + + public void addProperty(String name, JsonNode value, Token firstValueToken) { + if (withNamespace && "namespace".equals(name)) { + if (value.isTextual()) { + namespace = value.textValue(); + } else { + throw error("@namespace(...) must contain a String value", firstValueToken); + } + } else if (withAliases && "aliases".equals(name)) { + if (value.isArray()) { + List result = new ArrayList<>(); + Iterator elements = value.elements(); + elements.forEachRemaining(element -> { + if (element.isTextual()) { + result.add(element.textValue()); + } else { + throw error("@aliases(...) must contain an array of String values", firstValueToken); + } + }); + aliases = result; + } else { + throw error("@aliases(...) must contain an array of String values", firstValueToken); + } + } else if (withOrder && "order".equals(name)) { + if (value.isTextual()) { + String orderValue = value.textValue().toUpperCase(Locale.ROOT); + switch (orderValue) { + case "ASCENDING": + order = Schema.Field.Order.ASCENDING; + break; + case "DESCENDING": + order = Schema.Field.Order.DESCENDING; + break; + case "IGNORE": + order = Schema.Field.Order.IGNORE; + break; + default: + throw error("@order(...) must contain \"ASCENDING\", \"DESCENDING\" or \"IGNORE\"", firstValueToken); + } + } else { + throw error("@order(...) must contain a String value", firstValueToken); + } + } else { + properties.put(name, value); + } + } + + public String namespace() { + return namespace == null ? contextNamespace : namespace; + } + + public Schema.Field.Order order() { + return order; + } + + public void copyAliases(Consumer addAlias) { + aliases.forEach(addAlias); + } + + public T copyProperties(T jsonProperties) { + properties.forEach(jsonProperties::addProp); + if (jsonProperties instanceof Schema) { + Schema schema = (Schema) jsonProperties; + LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(schema); + if (logicalType != null) { + logicalType.addToSchema(schema); + } + } + return jsonProperties; + } + + public boolean hasProperties() { + return !properties.isEmpty(); + } + } +} diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/IsResolvedSchemaVisitor.java b/lang/java/idl/src/main/java/org/apache/avro/idl/IsResolvedSchemaVisitor.java new file mode 100644 index 00000000000..12fd5dbff21 --- /dev/null +++ b/lang/java/idl/src/main/java/org/apache/avro/idl/IsResolvedSchemaVisitor.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.idl; + +import org.apache.avro.Schema; + +/** + * This visitor checks if the current schema is fully resolved. + */ +public final class IsResolvedSchemaVisitor implements SchemaVisitor { + boolean hasUnresolvedParts; + + IsResolvedSchemaVisitor() { + hasUnresolvedParts = false; + } + + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + hasUnresolvedParts = SchemaResolver.isUnresolvedSchema(terminal); + return hasUnresolvedParts ? SchemaVisitorAction.TERMINATE : SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction visitNonTerminal(Schema nonTerminal) { + hasUnresolvedParts = SchemaResolver.isUnresolvedSchema(nonTerminal); + if (hasUnresolvedParts) { + return SchemaVisitorAction.TERMINATE; + } + if (nonTerminal.getType() == Schema.Type.RECORD && !nonTerminal.hasFields()) { + // We're still initializing the type... + return SchemaVisitorAction.SKIP_SUBTREE; + } + return SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction afterVisitNonTerminal(Schema nonTerminal) { + return SchemaVisitorAction.CONTINUE; + } + + @Override + public Boolean get() { + return !hasUnresolvedParts; + } +} diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/ResolvingVisitor.java b/lang/java/idl/src/main/java/org/apache/avro/idl/ResolvingVisitor.java new file mode 100644 index 00000000000..04e41f3403a --- /dev/null +++ b/lang/java/idl/src/main/java/org/apache/avro/idl/ResolvingVisitor.java @@ -0,0 +1,192 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.idl; + +import org.apache.avro.AvroTypeException; +import org.apache.avro.Schema; +import org.apache.avro.Schema.Field; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.IdentityHashMap; +import java.util.List; +import java.util.Optional; +import java.util.Set; +import java.util.function.Function; + +import static org.apache.avro.Schema.Type.ARRAY; +import static org.apache.avro.Schema.Type.ENUM; +import static org.apache.avro.Schema.Type.FIXED; +import static org.apache.avro.Schema.Type.MAP; +import static org.apache.avro.Schema.Type.RECORD; +import static org.apache.avro.Schema.Type.UNION; + +/** + * This visitor creates clone of the visited Schemata, minus the specified + * schema properties, and resolves all unresolved schemas. + */ +public final class ResolvingVisitor implements SchemaVisitor { + private static final Set CONTAINER_SCHEMA_TYPES = EnumSet.of(RECORD, ARRAY, MAP, UNION); + private static final Set NAMED_SCHEMA_TYPES = EnumSet.of(RECORD, ENUM, FIXED); + + private final Function symbolTable; + private final Set schemaPropertiesToRemove; + private final IdentityHashMap replace; + + private final Schema root; + + public ResolvingVisitor(final Schema root, final Function symbolTable, + String... schemaPropertiesToRemove) { + this(root, symbolTable, new HashSet<>(Arrays.asList(schemaPropertiesToRemove))); + } + + public ResolvingVisitor(final Schema root, final Function symbolTable, + Set schemaPropertiesToRemove) { + this.replace = new IdentityHashMap<>(); + this.symbolTable = symbolTable; + this.schemaPropertiesToRemove = schemaPropertiesToRemove; + + this.root = root; + } + + public ResolvingVisitor withRoot(Schema root) { + return new ResolvingVisitor(root, symbolTable, schemaPropertiesToRemove); + } + + @Override + public SchemaVisitorAction visitTerminal(final Schema terminal) { + Schema.Type type = terminal.getType(); + Schema newSchema; + if (CONTAINER_SCHEMA_TYPES.contains(type)) { + if (!replace.containsKey(terminal)) { + throw new IllegalStateException("Schema " + terminal + " must be already processed"); + } + return SchemaVisitorAction.CONTINUE; + } else if (type == ENUM) { + newSchema = Schema.createEnum(terminal.getName(), terminal.getDoc(), terminal.getNamespace(), + terminal.getEnumSymbols(), terminal.getEnumDefault()); + } else if (type == FIXED) { + newSchema = Schema.createFixed(terminal.getName(), terminal.getDoc(), terminal.getNamespace(), + terminal.getFixedSize()); + } else { + newSchema = Schema.create(type); + } + copyProperties(terminal, newSchema); + replace.put(terminal, newSchema); + return SchemaVisitorAction.CONTINUE; + } + + public void copyProperties(final Schema first, final Schema second) { + // Logical type + Optional.ofNullable(first.getLogicalType()).ifPresent(logicalType -> logicalType.addToSchema(second)); + + // Aliases (if applicable) + if (NAMED_SCHEMA_TYPES.contains(first.getType())) { + first.getAliases().forEach(second::addAlias); + } + + // Other properties + first.getObjectProps().forEach((name, value) -> { + if (!schemaPropertiesToRemove.contains(name)) { + second.addProp(name, value); + } + }); + } + + @Override + public SchemaVisitorAction visitNonTerminal(final Schema nt) { + Schema.Type type = nt.getType(); + if (type == RECORD) { + if (SchemaResolver.isUnresolvedSchema(nt)) { + // unresolved schema will get a replacement that we already encountered, + // or we will attempt to resolve. + final String unresolvedSchemaName = SchemaResolver.getUnresolvedSchemaName(nt); + Schema resSchema = symbolTable.apply(unresolvedSchemaName); + if (resSchema == null) { + throw new AvroTypeException("Unable to resolve " + unresolvedSchemaName); + } + Schema replacement = replace.computeIfAbsent(resSchema, schema -> { + Schemas.visit(schema, this); + return replace.get(schema); + }); + replace.put(nt, replacement); + } else { + // create a fieldless clone. Fields will be added in afterVisitNonTerminal. + Schema newSchema = Schema.createRecord(nt.getName(), nt.getDoc(), nt.getNamespace(), nt.isError()); + copyProperties(nt, newSchema); + replace.put(nt, newSchema); + } + } + return SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction afterVisitNonTerminal(final Schema nt) { + Schema.Type type = nt.getType(); + Schema newSchema; + switch (type) { + case RECORD: + if (!SchemaResolver.isUnresolvedSchema(nt)) { + newSchema = replace.get(nt); + // Check if we've already handled the replacement schema with a + // reentrant call to visit(...) from within the visitor. + if (!newSchema.hasFields()) { + List fields = nt.getFields(); + List newFields = new ArrayList<>(fields.size()); + for (Schema.Field field : fields) { + newFields.add(new Field(field, replace.get(field.schema()))); + } + newSchema.setFields(newFields); + } + } + return SchemaVisitorAction.CONTINUE; + case UNION: + List types = nt.getTypes(); + List newTypes = new ArrayList<>(types.size()); + for (Schema sch : types) { + newTypes.add(replace.get(sch)); + } + newSchema = Schema.createUnion(newTypes); + break; + case ARRAY: + newSchema = Schema.createArray(replace.get(nt.getElementType())); + break; + case MAP: + newSchema = Schema.createMap(replace.get(nt.getValueType())); + break; + default: + throw new IllegalStateException("Illegal type " + type + ", schema " + nt); + } + copyProperties(nt, newSchema); + replace.put(nt, newSchema); + return SchemaVisitorAction.CONTINUE; + } + + @Override + public Schema get() { + return replace.get(root); + } + + @Override + public String toString() { + return "ResolvingVisitor{symbolTable=" + symbolTable + ", schemaPropertiesToRemove=" + schemaPropertiesToRemove + + ", replace=" + replace + '}'; + } +} diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaResolver.java b/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaResolver.java new file mode 100644 index 00000000000..8c9a9c15b99 --- /dev/null +++ b/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaResolver.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.idl; + +import org.apache.avro.JsonProperties; +import org.apache.avro.Protocol; +import org.apache.avro.Schema; + +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; +import java.util.stream.Collectors; + +/** + * Utility class to resolve schemas that are unavailable at the point they are + * referenced in the IDL. + */ +final class SchemaResolver { + + private SchemaResolver() { + } + + private static final String UR_SCHEMA_ATTR = "org.apache.avro.idl.unresolved.name"; + + private static final String UR_SCHEMA_NAME = "UnresolvedSchema"; + + private static final String UR_SCHEMA_NS = "org.apache.avro.compiler"; + + private static final AtomicInteger COUNTER = new AtomicInteger(); + + /** + * Create a schema to represent an "unresolved" schema. (used to represent a + * schema whose definition does not exist, yet). + * + * @param name a schema name + * @return an unresolved schema for the given name + */ + static Schema unresolvedSchema(final String name) { + Schema schema = Schema.createRecord(UR_SCHEMA_NAME + '_' + COUNTER.getAndIncrement(), "unresolved schema", + UR_SCHEMA_NS, false, Collections.emptyList()); + schema.addProp(UR_SCHEMA_ATTR, name); + return schema; + } + + /** + * Is this an unresolved schema. + * + * @param schema a schema + * @return whether the schema is an unresolved schema + */ + static boolean isUnresolvedSchema(final Schema schema) { + return (schema.getType() == Schema.Type.RECORD && schema.getProp(UR_SCHEMA_ATTR) != null && schema.getName() != null + && schema.getName().startsWith(UR_SCHEMA_NAME) && UR_SCHEMA_NS.equals(schema.getNamespace())); + } + + /** + * Get the unresolved schema name. + * + * @param schema an unresolved schema + * @return the name of the unresolved schema + */ + static String getUnresolvedSchemaName(final Schema schema) { + if (!isUnresolvedSchema(schema)) { + throw new IllegalArgumentException("Not a unresolved schema: " + schema); + } + return schema.getProp(UR_SCHEMA_ATTR); + } + + /** + * Is this an unresolved schema? + */ + static boolean isFullyResolvedSchema(final Schema schema) { + if (isUnresolvedSchema(schema)) { + return false; + } else { + return Schemas.visit(schema, new IsResolvedSchemaVisitor()); + } + } + + /** + * Clone all provided schemas while resolving all unreferenced schemas. + * + * @param idlFile a parsed IDL file + * @return a copy of idlFile with all schemas resolved + */ + static IdlFile resolve(final IdlFile idlFile, String... schemaPropertiesToRemove) { + if (idlFile.getProtocol() != null) { + return new IdlFile(resolve(idlFile.getProtocol(), schemaPropertiesToRemove), idlFile.getWarnings()); + } + + ResolvingVisitor visitor = new ResolvingVisitor(null, idlFile::getNamedSchema, schemaPropertiesToRemove); + Function resolver = schema -> Schemas.visit(schema, visitor.withRoot(schema)); + + List namedSchemata = idlFile.getNamedSchemas().values().stream().map(resolver).collect(Collectors.toList()); + Schema mainSchema = Optional.ofNullable(idlFile.getMainSchema()).map(resolver).orElse(null); + return new IdlFile(idlFile.getNamespace(), mainSchema, namedSchemata, idlFile.getWarnings()); + } + + /** + * Will clone the provided protocol while resolving all unreferenced schemas + * + * @param protocol a parsed protocol + * @return a copy of the protocol with all schemas resolved + */ + static Protocol resolve(final Protocol protocol, String... schemaPropertiesToRemove) { + // Create an empty copy of the protocol + Protocol result = new Protocol(protocol.getName(), protocol.getDoc(), protocol.getNamespace()); + protocol.getObjectProps().forEach(((JsonProperties) result)::addProp); + + ResolvingVisitor visitor = new ResolvingVisitor(null, protocol::getType, schemaPropertiesToRemove); + Function resolver = schema -> Schemas.visit(schema, visitor.withRoot(schema)); + + // Resolve all schemata in the protocol. + result.setTypes(protocol.getTypes().stream().map(resolver).collect(Collectors.toList())); + Map resultMessages = result.getMessages(); + protocol.getMessages().forEach((name, oldValue) -> { + Protocol.Message newValue; + if (oldValue.isOneWay()) { + newValue = result.createMessage(oldValue.getName(), oldValue.getDoc(), oldValue, + resolver.apply(oldValue.getRequest())); + } else { + Schema request = resolver.apply(oldValue.getRequest()); + Schema response = resolver.apply(oldValue.getResponse()); + Schema errors = resolver.apply(oldValue.getErrors()); + newValue = result.createMessage(oldValue.getName(), oldValue.getDoc(), oldValue, request, response, errors); + } + resultMessages.put(name, newValue); + }); + return result; + } +} diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaVisitor.java b/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaVisitor.java new file mode 100644 index 00000000000..0f9fcae5b68 --- /dev/null +++ b/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaVisitor.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.idl; + +import org.apache.avro.Schema; + +public interface SchemaVisitor { + + /** + * Invoked for schemas that do not have "child" schemas (like string, int …) or + * for a previously encountered schema with children, which will be treated as a + * terminal. (to avoid circular recursion) + */ + SchemaVisitorAction visitTerminal(Schema terminal); + + /** + * Invoked for schema with children before proceeding to visit the children. + */ + SchemaVisitorAction visitNonTerminal(Schema nonTerminal); + + /** + * Invoked for schemas with children after its children have been visited. + */ + SchemaVisitorAction afterVisitNonTerminal(Schema nonTerminal); + + /** + * Invoked when visiting is complete. + * + * @return a value that will be returned by the visit method. + */ + T get(); +} diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaVisitorAction.java b/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaVisitorAction.java new file mode 100644 index 00000000000..6aed09b3d32 --- /dev/null +++ b/lang/java/idl/src/main/java/org/apache/avro/idl/SchemaVisitorAction.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.idl; + +public enum SchemaVisitorAction { + + /** + * continue visit. + */ + CONTINUE, + /** + * terminate visit. + */ + TERMINATE, + /** + * when returned from pre non terminal visit method the children of the non + * terminal are skipped. afterVisitNonTerminal for the current schema will not + * be invoked. + */ + SKIP_SUBTREE, + /** + * Skip visiting the siblings of this schema. + */ + SKIP_SIBLINGS +} diff --git a/lang/java/idl/src/main/java/org/apache/avro/idl/Schemas.java b/lang/java/idl/src/main/java/org/apache/avro/idl/Schemas.java new file mode 100644 index 00000000000..da4b949d2bc --- /dev/null +++ b/lang/java/idl/src/main/java/org/apache/avro/idl/Schemas.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.idl; + +import org.apache.avro.Schema; +import org.apache.avro.Schema.Field; + +import java.util.ArrayDeque; +import java.util.Collections; +import java.util.Deque; +import java.util.IdentityHashMap; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +/** + * Avro Schema utilities, to traverse... + */ +public final class Schemas { + + private Schemas() { + } + + /** + * Depth first visit. + */ + public static T visit(final Schema start, final SchemaVisitor visitor) { + // Set of Visited Schemas + IdentityHashMap visited = new IdentityHashMap<>(); + // Stack that contains the Schemas to process and afterVisitNonTerminal + // functions. + // Deque>> + // Using Either<...> has a cost we want to avoid... + Deque dq = new ArrayDeque<>(); + dq.push(start); + Object current; + while ((current = dq.poll()) != null) { + if (current instanceof Supplier) { + // We are executing a non-terminal post visit. + @SuppressWarnings("unchecked") + SchemaVisitorAction action = ((Supplier) current).get(); + switch (action) { + case CONTINUE: + break; + case SKIP_SIBLINGS: + while (dq.peek() instanceof Schema) { + dq.remove(); + } + break; + case TERMINATE: + return visitor.get(); + case SKIP_SUBTREE: + default: + throw new UnsupportedOperationException("Invalid action " + action); + } + } else { + Schema schema = (Schema) current; + boolean terminate; + if (visited.containsKey(schema)) { + terminate = visitTerminal(visitor, schema, dq); + } else { + Schema.Type type = schema.getType(); + switch (type) { + case ARRAY: + terminate = visitNonTerminal(visitor, schema, dq, Collections.singleton(schema.getElementType())); + visited.put(schema, schema); + break; + case RECORD: + terminate = visitNonTerminal(visitor, schema, dq, () -> schema.getFields().stream().map(Field::schema) + .collect(Collectors.toCollection(ArrayDeque::new)).descendingIterator()); + visited.put(schema, schema); + break; + case UNION: + terminate = visitNonTerminal(visitor, schema, dq, schema.getTypes()); + visited.put(schema, schema); + break; + case MAP: + terminate = visitNonTerminal(visitor, schema, dq, Collections.singleton(schema.getValueType())); + visited.put(schema, schema); + break; + default: + terminate = visitTerminal(visitor, schema, dq); + break; + } + } + if (terminate) { + return visitor.get(); + } + } + } + return visitor.get(); + } + + private static boolean visitNonTerminal(final SchemaVisitor visitor, final Schema schema, final Deque dq, + final Iterable itSupp) { + SchemaVisitorAction action = visitor.visitNonTerminal(schema); + switch (action) { + case CONTINUE: + dq.push((Supplier) () -> visitor.afterVisitNonTerminal(schema)); + itSupp.forEach(dq::push); + break; + case SKIP_SUBTREE: + dq.push((Supplier) () -> visitor.afterVisitNonTerminal(schema)); + break; + case SKIP_SIBLINGS: + while (dq.peek() instanceof Schema) { + dq.remove(); + } + break; + case TERMINATE: + return true; + default: + throw new UnsupportedOperationException("Invalid action " + action + " for " + schema); + } + return false; + } + + private static boolean visitTerminal(final SchemaVisitor visitor, final Schema schema, final Deque dq) { + SchemaVisitorAction action = visitor.visitTerminal(schema); + switch (action) { + case CONTINUE: + break; + case SKIP_SIBLINGS: + while (dq.peek() instanceof Schema) { + dq.remove(); + } + break; + case TERMINATE: + return true; + case SKIP_SUBTREE: + default: + throw new UnsupportedOperationException("Invalid action " + action + " for " + schema); + } + return false; + } +} diff --git a/lang/java/idl/src/main/resources/META-INF/LICENSE b/lang/java/idl/src/main/resources/META-INF/LICENSE new file mode 100644 index 00000000000..62589edd12a --- /dev/null +++ b/lang/java/idl/src/main/resources/META-INF/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + https://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/lang/java/idl/src/test/idl/AnnotationOnTypeReference.avdl b/lang/java/idl/src/test/idl/AnnotationOnTypeReference.avdl new file mode 100644 index 00000000000..03f6f7c6f27 --- /dev/null +++ b/lang/java/idl/src/test/idl/AnnotationOnTypeReference.avdl @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A stripped down version of a previous `simple.avdl`, keeping the part where a type reference had an annotation (this is wrong). + */ +@namespace("org.apache.avro.test") +protocol Simple { + /** An MD5 hash. */ + fixed MD5(16); + + /** A TestRecord. */ + record TestRecord { + @foo("bar") MD5 hash = "0000000000000000"; + } +} diff --git a/lang/java/idl/src/test/idl/cycle.avdl b/lang/java/idl/src/test/idl/cycle.avdl new file mode 100644 index 00000000000..fe5bbbe4f0f --- /dev/null +++ b/lang/java/idl/src/test/idl/cycle.avdl @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@namespace("org.apache.avro.gen.test") +protocol Cycle { + + record Record1 { + string fString = ""; + Record3 rec3; + } + + record Record2 { + TestFixed fFixed; + int val; + union {null, Record1} fRec1; + } + + record Record3 { + TestEnum fEnum; + Record2 rec2; + } + + enum TestEnum { bla, blu } + + fixed TestFixed(16); + +} diff --git a/lang/java/idl/src/test/idl/extra/protocolSyntax.avdl b/lang/java/idl/src/test/idl/extra/protocolSyntax.avdl new file mode 100644 index 00000000000..8edc79876cc --- /dev/null +++ b/lang/java/idl/src/test/idl/extra/protocolSyntax.avdl @@ -0,0 +1,9 @@ +@namespace("communication") +protocol Parrot { + record Message { + string? title; + string message; + } + + Message echo(Message message); +} diff --git a/lang/java/idl/src/test/idl/extra/schemaSyntax.avdl b/lang/java/idl/src/test/idl/extra/schemaSyntax.avdl new file mode 100644 index 00000000000..1d88ba6e43b --- /dev/null +++ b/lang/java/idl/src/test/idl/extra/schemaSyntax.avdl @@ -0,0 +1,8 @@ +namespace communication; + +schema array; + +record Message { + string? title; + string message; +} diff --git a/lang/java/idl/src/test/idl/input/bar.avpr b/lang/java/idl/src/test/idl/input/bar.avpr new file mode 100644 index 00000000000..5e9b194a060 --- /dev/null +++ b/lang/java/idl/src/test/idl/input/bar.avpr @@ -0,0 +1,2 @@ +{"protocol": "org.foo.Bar", + "messages": { "bar": {"request": [], "response": "null"}}} diff --git a/lang/java/idl/src/test/idl/input/baseball.avdl b/lang/java/idl/src/test/idl/input/baseball.avdl new file mode 100644 index 00000000000..e485e89ba2a --- /dev/null +++ b/lang/java/idl/src/test/idl/input/baseball.avdl @@ -0,0 +1,23 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@namespace("avro.examples.baseball") +protocol Baseball { + import schema "position.avsc"; + import schema "player.avsc"; +} diff --git a/lang/java/idl/src/test/idl/input/baz.avsc b/lang/java/idl/src/test/idl/input/baz.avsc new file mode 100644 index 00000000000..efd68d97354 --- /dev/null +++ b/lang/java/idl/src/test/idl/input/baz.avsc @@ -0,0 +1,3 @@ +{"type": "record", "name": "ns.other.schema.Baz", + "fields": [ {"name": "x", "type": "int"} ] +} diff --git a/lang/java/idl/src/test/idl/input/comments.avdl b/lang/java/idl/src/test/idl/input/comments.avdl new file mode 100644 index 00000000000..76be4e314bf --- /dev/null +++ b/lang/java/idl/src/test/idl/input/comments.avdl @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +@namespace("testing") +protocol Comments { + /** Documented Enum */ + enum /** Dangling Enum1 */ DocumentedEnum /** Dangling Enum2 */ { + /** Dangling Enum3 */ A, + /** Dangling Enum4 */ B, + /** Dangling Enum5 */ C + /** Dangling Enum6 */} + /** Dangling Enum7 */= + /** Dangling Enum8 */ A + /** Dangling Enum9 */; + + enum UndocumentedEnum {D,E} + + /** Documented Fixed Type */ fixed + /** Dangling Fixed1 */ DocumentedFixed + /** Dangling Fixed2 */( + /** Dangling Fixed3 */ 16 + /** Dangling Fixed4 */) + /** Dangling Fixed5 */; + + fixed UndocumentedFixed(16); + + /** Documented Error */ error + /** Dangling Error1 */ DocumentedError + /** Dangling Field1 */{ + /** Default Doc Explanation Field */string + /** Documented Reason Field */reason, explanation + /** Dangling Field2 */; + /** Dangling Error2 */} + + record UndocumentedRecord { + string description; + } + + /** Documented Method */ void + /** Dangling Param1 */ documentedMethod + /** Dangling Param2 */( + string /** Documented Parameter */ message, + /** Default Documented Parameter */ string defMsg + /** Dangling Param3 */) + /** Dangling Method1 */ throws + /** Dangling Method2 */ DocumentedError + /** Dangling Method3 */; + + void undocumentedMethod(string message); +} diff --git a/lang/java/idl/src/test/idl/input/cycle.avdl b/lang/java/idl/src/test/idl/input/cycle.avdl new file mode 100644 index 00000000000..fc6fee954f6 --- /dev/null +++ b/lang/java/idl/src/test/idl/input/cycle.avdl @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@namespace("org.apache.avro.gen") +protocol Cycle { + + record SampleNode { + int count = 0; + array subNodes; + } + + record Method { + string @testAttribute("testValue") declaringClass; + string methodName; + } + + record SamplePair { + Method method; + SampleNode node; + } + + record SelfRef { + string something; + array subNodes = []; + } + +} diff --git a/lang/java/idl/src/test/idl/input/echo.avdl b/lang/java/idl/src/test/idl/input/echo.avdl new file mode 100644 index 00000000000..1673a125506 --- /dev/null +++ b/lang/java/idl/src/test/idl/input/echo.avdl @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@namespace("org.apache.avro.echo") +protocol Echo { + record Ping { + long timestamp = -1; + string text = ""; + } + + record Pong { + long timestamp = -1; + Ping ping; + } + + Pong ping(Ping ping); +} diff --git a/lang/java/idl/src/test/idl/input/foo.avsc b/lang/java/idl/src/test/idl/input/foo.avsc new file mode 100644 index 00000000000..83267ea4760 --- /dev/null +++ b/lang/java/idl/src/test/idl/input/foo.avsc @@ -0,0 +1,3 @@ +{"type": "record", "name": "org.foo.Foo", + "fields": [ {"name": "x", "type": "int"} ] +} diff --git a/lang/java/idl/src/test/idl/input/forward_ref.avdl b/lang/java/idl/src/test/idl/input/forward_ref.avdl new file mode 100644 index 00000000000..b75d60a4efd --- /dev/null +++ b/lang/java/idl/src/test/idl/input/forward_ref.avdl @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +@namespace("org.foo") +protocol Import { + /* Name Value record */ + record ANameValue { + /** the name */ + string name; + /** the value */ + string value; + /* is the value a json object */ + ValueType type = "PLAIN"; + } + + enum ValueType { + JSON, BASE64BIN, PLAIN + } +} diff --git a/lang/java/idl/src/test/idl/input/import.avdl b/lang/java/idl/src/test/idl/input/import.avdl new file mode 100644 index 00000000000..8cd6a163759 --- /dev/null +++ b/lang/java/idl/src/test/idl/input/import.avdl @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@namespace("org.foo") +protocol Import { + import idl "reservedwords.avdl"; + import idl "nestedimport.avdl"; + + //Note that this import is resolve via the classpath, not relative path. + import idl "OnTheClasspath.avdl"; + import protocol "OnTheClasspath.avpr"; + import schema "OnTheClasspath.avsc"; + + import schema "baz.avsc"; + import schema "foo.avsc"; + import protocol "bar.avpr"; + + record Bar { + ns.other.schema.Baz baz; + Foo foo; + } + + void bazm(ns.other.schema.Baz baz); + Bar barf(Foo foo); +} diff --git a/lang/java/idl/src/test/idl/input/interop.avdl b/lang/java/idl/src/test/idl/input/interop.avdl new file mode 100644 index 00000000000..497a7902aac --- /dev/null +++ b/lang/java/idl/src/test/idl/input/interop.avdl @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Currently genavro only does Protocols. +@namespace("org.apache.avro") +protocol InteropProtocol { + record Foo { + string label; + } + + enum Kind { A, B, C } + fixed MD5(16); + + record Node { + string label; + array children = []; + } + + record Interop { + int intField = 1; + long longField = -1; + string stringField; + boolean boolField = false; + float floatField = 0.0; + double doubleField = -1.0e12; + null nullField; + array arrayField = []; + map mapField; + union { boolean, double, array } unionFIeld; + Kind enumField; + MD5 fixedField; + Node recordField; + } + +} diff --git a/lang/java/idl/src/test/idl/input/mr_events.avdl b/lang/java/idl/src/test/idl/input/mr_events.avdl new file mode 100644 index 00000000000..ffb90e97193 --- /dev/null +++ b/lang/java/idl/src/test/idl/input/mr_events.avdl @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Genavro format for a particular protocol found in Hadoop MapReduce. + * Used as a test case/example to show that we can express real-world stuff more + * succinctly. + */ +@namespace("org.apache.hadoop.mapreduce.jobhistory") +protocol Events { + record JhCounter { + string name; + string displayName; + long value; + } + + record JhCounterGroup { + string name; + string displayName; + array counts; + } + + record JhCounters { + string name; + array groups; + } + + record JobFinished { + string jobid; + timestamp_ms finishTime; + decimal(9,2) finishRatio; + int finishedMaps; + int finishedReduces; + int failedMaps; + int failedReduces; + JhCounters totalCounters; + JhCounters mapCounters; + JhCounters reduceCounters; + } + + record JobInited { + string jobid; + timestamp_ms launchTime; + int totalMaps; + int totalReduces; + string jobStatus; + } + + record JobSubmitted { + string jobid; + string jobName; + string userName; + timestamp_ms submitTime; + local_timestamp_ms submitTimeLocal; + string jobConfPath; + } + + // ... TODO continue +} diff --git a/lang/java/idl/src/test/idl/input/namespaces.avdl b/lang/java/idl/src/test/idl/input/namespaces.avdl new file mode 100644 index 00000000000..9eb2027ab0c --- /dev/null +++ b/lang/java/idl/src/test/idl/input/namespaces.avdl @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@namespace("avro.test.protocol") +protocol TestNamespace { + @namespace("avro.test.fixed") + fixed FixedInOtherNamespace(16); + + fixed FixedInThisNamespace(16); + + @namespace("avro.test.record") + record RecordInOtherNamespace {} + + @namespace("avro.test.error") + error ErrorInOtherNamespace {} + + @namespace("avro.test.enum") + enum EnumInOtherNamespace { FOO } + + record RefersToOthers { + avro.test.fixed.FixedInOtherNamespace someFixed; + avro.test.record.RecordInOtherNamespace someRecord; + avro.test.error.ErrorInOtherNamespace someError; + avro.test.enum.EnumInOtherNamespace someEnum; + FixedInThisNamespace thisFixed; + } +} diff --git a/lang/java/idl/src/test/idl/input/nestedimport.avdl b/lang/java/idl/src/test/idl/input/nestedimport.avdl new file mode 100644 index 00000000000..13c6981361a --- /dev/null +++ b/lang/java/idl/src/test/idl/input/nestedimport.avdl @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@version("1.0.5") +@namespace("org.apache.avro.ipc.specific") +protocol nestedimport { + import idl "reservedwords.avdl"; + import protocol "bar.avpr"; + import schema "position.avsc"; + import schema "player.avsc"; + + record ImportBar { + avro.examples.baseball.Player foo; + } + +} \ No newline at end of file diff --git a/lang/java/idl/src/test/idl/input/player.avsc b/lang/java/idl/src/test/idl/input/player.avsc new file mode 100644 index 00000000000..0492850ef5e --- /dev/null +++ b/lang/java/idl/src/test/idl/input/player.avsc @@ -0,0 +1,8 @@ +{"type":"record", "name":"Player", "namespace": "avro.examples.baseball", + "fields": [ + {"name": "number", "type": "int"}, + {"name": "first_name", "type": "string"}, + {"name": "last_name", "type": "string"}, + {"name": "position", "type": {"type": "array", "items": "Position"} } + ] +} diff --git a/lang/java/idl/src/test/idl/input/position.avsc b/lang/java/idl/src/test/idl/input/position.avsc new file mode 100644 index 00000000000..a47065a35f4 --- /dev/null +++ b/lang/java/idl/src/test/idl/input/position.avsc @@ -0,0 +1,3 @@ +{"type":"enum", "name": "Position", "namespace": "avro.examples.baseball", + "symbols": ["P", "C", "B1", "B2", "B3", "SS", "LF", "CF", "RF", "DH"] +} diff --git a/lang/java/idl/src/test/idl/input/reservedwords.avdl b/lang/java/idl/src/test/idl/input/reservedwords.avdl new file mode 100644 index 00000000000..f2112aae7f2 --- /dev/null +++ b/lang/java/idl/src/test/idl/input/reservedwords.avdl @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +protocol Foo { + void `error`(); + void `void`(); + void `idl`(); + void `import`(); + void `oneway`(); + void `null`(); + void `local_timestamp_ms`(); +} diff --git a/lang/java/idl/src/test/idl/input/schema_syntax_schema.avdl b/lang/java/idl/src/test/idl/input/schema_syntax_schema.avdl new file mode 100644 index 00000000000..1df43f7a656 --- /dev/null +++ b/lang/java/idl/src/test/idl/input/schema_syntax_schema.avdl @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: the correct extension for this new syntax is also '.avdl'. The test + * files end with '_schema.avdl' only to distinguish them from .avdl files using + * the protocol syntax, because the result is a schema file instead of a protocol file. + */ +namespace monitoring; +schema array; + +record StatusUpdate { + /** + * The moment of the status change. + */ + timestamp_ms timestamp; + /** + * The process whose status changed. + */ + string processName; + /** + * The new status of the process. + */ + Status newStatus; + /** + * A description why this status change occurred (optional). + */ + string? description; +} + +import idl "status_schema.avdl"; +import schema "foo.avsc"; +import protocol "bar.avpr"; diff --git a/lang/java/idl/src/test/idl/input/simple.avdl b/lang/java/idl/src/test/idl/input/simple.avdl new file mode 100644 index 00000000000..27949547a1d --- /dev/null +++ b/lang/java/idl/src/test/idl/input/simple.avdl @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A simple test case. + */ +@version("1.0.5") +@namespace("org.apache.avro.test") +protocol Simple { + /** A kind of record. */ + @aliases(["org.foo.KindOf"]) + enum Kind { + FOO, + BAR, // the bar enum value + BAZ + } + + enum Status { + A, + B, + C + } = C; // C is the default value used when reading unknown values from another schema version (without it, reading throws an exception). + + /** A TestRecord. */ + @my-property({"key":3}) + record TestRecord { + // Tests that keywords can also appear in identifiers. + @avro.java.`string`("String") string @order("ignore") name = "foo"; + + /** The kind of record. */ + Kind @order("descending") kind; + + /** The status of the record. */ + Status status = "A"; + + MD5 hash = "0000000000000000"; + + // A traditional optional field + union {null, MD5} @aliases(["hash", "hsh"]) nullableHash = null; + + // These two fields parse correctly, but will brewak (be changed to strings) when serializing the protocol/schema as JSON. + double value = NaN; + float average = -Infinity; + date d = 0; + // An optional type with a non-null default value (results in a union with null last). + time_ms? t = 0; + + @foo.bar("bar.foo") long l = 0; + // Arrays (and maps) may also have properties + @foo.bar.bar("foo.bar2") array a = []; + // An optional type with a null default value (results in a union with null first). + @foo.foo.bar(42) @foo.foo.foo("3foo") string? prop = null; + } + + /** An MD5 hash. */ + fixed MD5(0x10); + + error TestError { + string message; + } + + /** method 'hello' takes @parameter 'greeting' */ + string hello(string greeting); + // The value of TestRecord also contains defaults for fields not mentioned. + TestRecord echo(TestRecord `record` = {"name":"bar","kind":"BAR"}); + /** method 'add' takes @parameter 'arg1' @parameter 'arg2' */ + @specialProp("test") + int add(int arg1, int arg2 = 0); + bytes echoBytes(bytes data); + void `error`() throws TestError; + void ping() oneway; +} diff --git a/lang/java/idl/src/test/idl/input/status_schema.avdl b/lang/java/idl/src/test/idl/input/status_schema.avdl new file mode 100644 index 00000000000..504218a4fcb --- /dev/null +++ b/lang/java/idl/src/test/idl/input/status_schema.avdl @@ -0,0 +1,3 @@ +enum Status { + UNKNOWN, NEW, STARTUP, RUNNING, TERMINATING, SHUTDOWN, CRASHED +} = UNKNOWN; diff --git a/lang/java/idl/src/test/idl/input/unicode.avdl b/lang/java/idl/src/test/idl/input/unicode.avdl new file mode 100644 index 00000000000..f16fb2f95dc --- /dev/null +++ b/lang/java/idl/src/test/idl/input/unicode.avdl @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** +* This is a test that UTF8 functions correctly. +* このテストでは、UTF - 8で正しく機能している。 +* 这是一个测试,UTF - 8的正常运行。 +*/ +protocol Протоколы { + record Структура { + string Строковый; + string 文字列; + } +} diff --git a/lang/java/idl/src/test/idl/input/union.avdl b/lang/java/idl/src/test/idl/input/union.avdl new file mode 100644 index 00000000000..19f37f2f748 --- /dev/null +++ b/lang/java/idl/src/test/idl/input/union.avdl @@ -0,0 +1,16 @@ +@namespace("org.apache.avro.gen") +protocol UnionFwd { + + record TestRecord { + union {SR1, SR2} unionField; + } + + record SR1 { + string field; + } + + record SR2 { + string field; + } + +} diff --git a/lang/java/idl/src/test/idl/input/uuid.avdl b/lang/java/idl/src/test/idl/input/uuid.avdl new file mode 100644 index 00000000000..49a83f45eaa --- /dev/null +++ b/lang/java/idl/src/test/idl/input/uuid.avdl @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +/** +Testing UUID fields +*/ + +@namespace("org.apache.avro") +protocol MyProtocol { + record APlaygroundEvent { + + /** + * Documentation must be provided for each attribute + */ + uuid identifier; + + /** + * A string field with a special name + */ + string `uuid`; + + + /** + * a nullable uuid field + */ + + union { null, uuid } optionalString; + } +} diff --git a/lang/java/idl/src/test/idl/logicalTypes.avdl b/lang/java/idl/src/test/idl/logicalTypes.avdl new file mode 100644 index 00000000000..9e4a284da9b --- /dev/null +++ b/lang/java/idl/src/test/idl/logicalTypes.avdl @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +@version("1.0.5") +@namespace("org.apache.avro.test") +protocol LogicalTypeTest { + record LogicalTypeFields { + date aDate; + time_ms aTime; + timestamp_ms aTimestamp; + local_timestamp_ms aLocalTimestamp; + decimal(6,2) pocketMoney; + uuid identifier; + @logicalType("timestamp-micros") long anotherTimestamp; + @logicalType("decimal") @precision(6) @scale(2) bytes allowance; + @logicalType("decimal") @precision(3000000000) @scale(0) bytes byteArray; + } +} diff --git a/lang/java/idl/src/test/idl/output/baseball.avpr b/lang/java/idl/src/test/idl/output/baseball.avpr new file mode 100644 index 00000000000..715cdde8847 --- /dev/null +++ b/lang/java/idl/src/test/idl/output/baseball.avpr @@ -0,0 +1,31 @@ +{ + "protocol" : "Baseball", + "namespace" : "avro.examples.baseball", + "doc" : "Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", + "types" : [ { + "type" : "enum", + "name" : "Position", + "symbols" : [ "P", "C", "B1", "B2", "B3", "SS", "LF", "CF", "RF", "DH" ] + }, { + "type" : "record", + "name" : "Player", + "fields" : [ { + "name" : "number", + "type" : "int" + }, { + "name" : "first_name", + "type" : "string" + }, { + "name" : "last_name", + "type" : "string" + }, { + "name" : "position", + "type" : { + "type" : "array", + "items" : "Position" + } + } ] + } ], + "messages" : { + } +} diff --git a/lang/java/idl/src/test/idl/output/comments.avpr b/lang/java/idl/src/test/idl/output/comments.avpr new file mode 100644 index 00000000000..9901f8eebc8 --- /dev/null +++ b/lang/java/idl/src/test/idl/output/comments.avpr @@ -0,0 +1,67 @@ +{ + "protocol" : "Comments", + "namespace" : "testing", + "types" : [ { + "type" : "enum", + "name" : "DocumentedEnum", + "doc" : "Documented Enum", + "symbols" : [ "A", "B", "C" ], + "default" : "A" + }, { + "type" : "enum", + "name" : "UndocumentedEnum", + "symbols" : [ "D", "E" ] + }, { + "type" : "fixed", + "name" : "DocumentedFixed", + "doc" : "Documented Fixed Type", + "size" : 16 + }, { + "type" : "fixed", + "name" : "UndocumentedFixed", + "size" : 16 + }, { + "type" : "error", + "name" : "DocumentedError", + "doc" : "Documented Error", + "fields" : [ { + "name" : "reason", + "type" : "string", + "doc" : "Documented Reason Field" + }, { + "name" : "explanation", + "type" : "string", + "doc" : "Default Doc Explanation Field" + } ] + }, { + "type" : "record", + "name" : "UndocumentedRecord", + "fields" : [ { + "name" : "description", + "type" : "string" + } ] + } ], + "messages" : { + "documentedMethod" : { + "doc" : "Documented Method", + "request" : [ { + "name" : "message", + "type" : "string", + "doc" : "Documented Parameter" + }, { + "name" : "defMsg", + "type" : "string", + "doc" : "Default Documented Parameter" + } ], + "response" : "null", + "errors" : [ "DocumentedError" ] + }, + "undocumentedMethod" : { + "request" : [ { + "name" : "message", + "type" : "string" + } ], + "response" : "null" + } + } +} diff --git a/lang/java/idl/src/test/idl/output/cycle.avpr b/lang/java/idl/src/test/idl/output/cycle.avpr new file mode 100644 index 00000000000..e8b15835c7a --- /dev/null +++ b/lang/java/idl/src/test/idl/output/cycle.avpr @@ -0,0 +1,55 @@ +{ + "protocol" : "Cycle", + "namespace" : "org.apache.avro.gen", + "types" : [ { + "type" : "record", + "name" : "SampleNode", + "fields" : [ { + "name" : "count", + "type" : "int", + "default" : 0 + }, { + "name" : "subNodes", + "type" : { + "type" : "array", + "items" : { + "type" : "record", + "name" : "SamplePair", + "fields" : [ { + "name" : "method", + "type" : { + "type" : "record", + "name" : "Method", + "fields" : [ { + "name" : "declaringClass", + "type" : "string", + "testAttribute":"testValue" + }, { + "name" : "methodName", + "type" : "string" + } ] + } + }, { + "name" : "node", + "type" : "SampleNode" + } ] + } + } + } ] + }, { + "type" : "record", + "name" : "SelfRef", + "fields" : [ { + "name" : "something", + "type" : "string" + }, { + "name" : "subNodes", + "type" : { + "type" : "array", + "items" : "SelfRef" + }, + "default" : [ ] + } ] + } ], + "messages" : { } +} diff --git a/lang/java/idl/src/test/idl/output/echo.avpr b/lang/java/idl/src/test/idl/output/echo.avpr new file mode 100644 index 00000000000..dbf9b99134a --- /dev/null +++ b/lang/java/idl/src/test/idl/output/echo.avpr @@ -0,0 +1,37 @@ +{ + "protocol" : "Echo", + "namespace" : "org.apache.avro.echo", + "types" : [ { + "type" : "record", + "name" : "Ping", + "fields" : [ { + "name" : "timestamp", + "type" : "long", + "default" : -1 + }, { + "name" : "text", + "type" : "string", + "default" : "" + } ] + }, { + "type" : "record", + "name" : "Pong", + "fields" : [ { + "name" : "timestamp", + "type" : "long", + "default" : -1 + }, { + "name" : "ping", + "type" : "Ping" + } ] + } ], + "messages" : { + "ping" : { + "request" : [ { + "name" : "ping", + "type" : "Ping" + } ], + "response" : "Pong" + } + } +} \ No newline at end of file diff --git a/lang/java/idl/src/test/idl/output/forward_ref.avpr b/lang/java/idl/src/test/idl/output/forward_ref.avpr new file mode 100644 index 00000000000..a349206a37f --- /dev/null +++ b/lang/java/idl/src/test/idl/output/forward_ref.avpr @@ -0,0 +1,16 @@ +{ + "protocol": "Import", + "namespace": "org.foo", + "types": [ + { + "type": "record", + "name": "ANameValue", + "fields": [ + { "name":"name", "type": "string", "doc":"the name" }, + { "name": "value", "type": "string", "doc": "the value" }, + { "name": "type", "type": { "type": "enum", "name":"ValueType", "symbols": ["JSON","BASE64BIN","PLAIN"] }, "default": "PLAIN" } + ] + } + ], + "messages": { } +} diff --git a/lang/java/idl/src/test/idl/output/import.avpr b/lang/java/idl/src/test/idl/output/import.avpr new file mode 100644 index 00000000000..e6701ad94e1 --- /dev/null +++ b/lang/java/idl/src/test/idl/output/import.avpr @@ -0,0 +1,132 @@ +{ + "protocol" : "Import", + "namespace" : "org.foo", + "doc" : "Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", + "types" : [ { + "type" : "enum", + "name" : "Position", + "namespace" : "avro.examples.baseball", + "symbols" : [ "P", "C", "B1", "B2", "B3", "SS", "LF", "CF", "RF", "DH" ] + }, { + "type" : "record", + "name" : "Player", + "namespace" : "avro.examples.baseball", + "fields" : [ { + "name" : "number", + "type" : "int" + }, { + "name" : "first_name", + "type" : "string" + }, { + "name" : "last_name", + "type" : "string" + }, { + "name" : "position", + "type" : { + "type" : "array", + "items" : "Position" + } + } ] + }, { + "type" : "record", + "name" : "ImportBar", + "namespace" : "org.apache.avro.ipc.specific", + "fields" : [ { + "name" : "foo", + "type" : "avro.examples.baseball.Player" + } ] + }, { + "type" : "record", + "name" : "NestedType", + "namespace" : "org.on.the.classpath", + "fields" : [ ] + }, { + "type" : "record", + "name" : "FromAfar", + "namespace" : "org.on.the.classpath", + "fields" : [ ] + }, { + "type" : "record", + "name" : "VeryFar", + "namespace" : "org.on.the.classpath", + "fields" : [ ] + }, { + "type" : "record", + "name" : "FarAway", + "namespace" : "org.on.the.classpath", + "fields" : [ ] + }, { + "type" : "record", + "name" : "Baz", + "namespace" : "ns.other.schema", + "fields" : [ { + "name" : "x", + "type" : "int" + } ] + }, { + "type" : "record", + "name" : "Foo", + "fields" : [ { + "name" : "x", + "type" : "int" + } ] + }, { + "type" : "record", + "name" : "Bar", + "fields" : [ { + "name" : "baz", + "type" : "ns.other.schema.Baz" + }, { + "name" : "foo", + "type" : "Foo" + } ] + } ], + "messages" : { + "error" : { + "request" : [ ], + "response" : "null" + }, + "void" : { + "request" : [ ], + "response" : "null" + }, + "idl" : { + "request" : [ ], + "response" : "null" + }, + "import" : { + "request" : [ ], + "response" : "null" + }, + "oneway" : { + "request" : [ ], + "response" : "null" + }, + "null" : { + "request" : [ ], + "response" : "null" + }, + "local_timestamp_ms" : { + "request" : [ ], + "response" : "null" + }, + "bar" : { + "request" : [ ], + "response" : "null" + }, + "bazm" : { + "request" : [ { + "name" : "baz", + "type" : "ns.other.schema.Baz" + } ], + "response" : "null" + }, + "barf" : { + "request" : [ { + "name" : "foo", + "type" : "Foo" + } ], + "response" : "Bar" + } + } +} diff --git a/lang/java/idl/src/test/idl/output/interop.avpr b/lang/java/idl/src/test/idl/output/interop.avpr new file mode 100644 index 00000000000..6e56c7c9cee --- /dev/null +++ b/lang/java/idl/src/test/idl/output/interop.avpr @@ -0,0 +1,94 @@ +{ + "protocol" : "InteropProtocol", + "namespace" : "org.apache.avro", + "doc" : "Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", + "types" : [ { + "type" : "record", + "name" : "Foo", + "fields" : [ { + "name" : "label", + "type" : "string" + } ] + }, { + "type" : "enum", + "name" : "Kind", + "symbols" : [ "A", "B", "C" ] + }, { + "type" : "fixed", + "name" : "MD5", + "size" : 16 + }, { + "type" : "record", + "name" : "Node", + "fields" : [ { + "name" : "label", + "type" : "string" + }, { + "name" : "children", + "type" : { + "type" : "array", + "items" : "Node" + }, + "default" : [ ] + } ] + }, { + "type" : "record", + "name" : "Interop", + "fields" : [ { + "name" : "intField", + "type" : "int", + "default" : 1 + }, { + "name" : "longField", + "type" : "long", + "default" : -1 + }, { + "name" : "stringField", + "type" : "string" + }, { + "name" : "boolField", + "type" : "boolean", + "default" : false + }, { + "name" : "floatField", + "type" : "float", + "default" : 0.0 + }, { + "name" : "doubleField", + "type" : "double", + "default" : -1.0E12 + }, { + "name" : "nullField", + "type" : "null" + }, { + "name" : "arrayField", + "type" : { + "type" : "array", + "items" : "double" + }, + "default" : [ ] + }, { + "name" : "mapField", + "type" : { + "type" : "map", + "values" : "Foo" + } + }, { + "name" : "unionFIeld", + "type" : [ "boolean", "double", { + "type" : "array", + "items" : "bytes" + } ] + }, { + "name" : "enumField", + "type" : "Kind" + }, { + "name" : "fixedField", + "type" : "MD5" + }, { + "name" : "recordField", + "type" : "Node" + } ] + } ], + "messages" : { } +} diff --git a/lang/java/idl/src/test/idl/output/mr_events.avpr b/lang/java/idl/src/test/idl/output/mr_events.avpr new file mode 100644 index 00000000000..8c6343664d6 --- /dev/null +++ b/lang/java/idl/src/test/idl/output/mr_events.avpr @@ -0,0 +1,125 @@ +{ + "protocol" : "Events", + "namespace" : "org.apache.hadoop.mapreduce.jobhistory", + "doc" : "Genavro format for a particular protocol found in Hadoop MapReduce.\nUsed as a test case/example to show that we can express real-world stuff more\nsuccinctly.", + "types" : [ { + "type" : "record", + "name" : "JhCounter", + "fields" : [ { + "name" : "name", + "type" : "string" + }, { + "name" : "displayName", + "type" : "string" + }, { + "name" : "value", + "type" : "long" + } ] + }, { + "type" : "record", + "name" : "JhCounterGroup", + "fields" : [ { + "name" : "name", + "type" : "string" + }, { + "name" : "displayName", + "type" : "string" + }, { + "name" : "counts", + "type" : { + "type" : "array", + "items" : "JhCounter" + } + } ] + }, { + "type" : "record", + "name" : "JhCounters", + "fields" : [ { + "name" : "name", + "type" : "string" + }, { + "name" : "groups", + "type" : { + "type" : "array", + "items" : "JhCounterGroup" + } + } ] + }, { + "type" : "record", + "name" : "JobFinished", + "fields" : [ { + "name" : "jobid", + "type" : "string" + }, { + "name" : "finishTime", + "type" : {"type": "long", "logicalType": "timestamp-millis"} + }, { + "name" : "finishRatio", + "type" : {"type": "bytes", "logicalType": "decimal", "precision": 9, "scale": 2} + }, { + "name" : "finishedMaps", + "type" : "int" + }, { + "name" : "finishedReduces", + "type" : "int" + }, { + "name" : "failedMaps", + "type" : "int" + }, { + "name" : "failedReduces", + "type" : "int" + }, { + "name" : "totalCounters", + "type" : "JhCounters" + }, { + "name" : "mapCounters", + "type" : "JhCounters" + }, { + "name" : "reduceCounters", + "type" : "JhCounters" + } ] + }, { + "type" : "record", + "name" : "JobInited", + "fields" : [ { + "name" : "jobid", + "type" : "string" + }, { + "name" : "launchTime", + "type" : {"type": "long", "logicalType": "timestamp-millis"} + }, { + "name" : "totalMaps", + "type" : "int" + }, { + "name" : "totalReduces", + "type" : "int" + }, { + "name" : "jobStatus", + "type" : "string" + } ] + }, { + "type" : "record", + "name" : "JobSubmitted", + "fields" : [ { + "name" : "jobid", + "type" : "string" + }, { + "name" : "jobName", + "type" : "string" + }, { + "name" : "userName", + "type" : "string" + }, { + "name" : "submitTime", + "type" : {"type": "long", "logicalType": "timestamp-millis"} + }, { + "name" : "submitTimeLocal", + "type" : {"type": "long", "logicalType": "local-timestamp-millis"} + }, { + "name" : "jobConfPath", + "type" : "string" + } ] + } ], + "messages" : { + } +} diff --git a/lang/java/idl/src/test/idl/output/namespaces.avpr b/lang/java/idl/src/test/idl/output/namespaces.avpr new file mode 100644 index 00000000000..22ec760d5bf --- /dev/null +++ b/lang/java/idl/src/test/idl/output/namespaces.avpr @@ -0,0 +1,51 @@ +{ + "protocol" : "TestNamespace", + "namespace" : "avro.test.protocol", + "doc" : "Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", + "types" : [ { + "type" : "fixed", + "name" : "FixedInOtherNamespace", + "namespace" : "avro.test.fixed", + "size" : 16 + }, { + "type" : "fixed", + "name" : "FixedInThisNamespace", + "size" : 16 + }, { + "type" : "record", + "name" : "RecordInOtherNamespace", + "namespace" : "avro.test.record", + "fields" : [ ] + }, { + "type" : "error", + "name" : "ErrorInOtherNamespace", + "namespace" : "avro.test.error", + "fields" : [ ] + }, { + "type" : "enum", + "name" : "EnumInOtherNamespace", + "namespace" : "avro.test.enum", + "symbols" : [ "FOO" ] + }, { + "type" : "record", + "name" : "RefersToOthers", + "fields" : [ { + "name" : "someFixed", + "type" : "avro.test.fixed.FixedInOtherNamespace" + }, { + "name" : "someRecord", + "type" : "avro.test.record.RecordInOtherNamespace" + }, { + "name" : "someError", + "type" : "avro.test.error.ErrorInOtherNamespace" + }, { + "name" : "someEnum", + "type" : "avro.test.enum.EnumInOtherNamespace" + }, { + "name" : "thisFixed", + "type" : "FixedInThisNamespace" + } ] + } ], + "messages" : { + } +} diff --git a/lang/java/idl/src/test/idl/output/nestedimport.avpr b/lang/java/idl/src/test/idl/output/nestedimport.avpr new file mode 100644 index 00000000000..80273627109 --- /dev/null +++ b/lang/java/idl/src/test/idl/output/nestedimport.avpr @@ -0,0 +1,73 @@ +{ + "protocol" : "nestedimport", + "namespace" : "org.apache.avro.ipc.specific", + "doc" : "Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", + "version" : "1.0.5", + "types" : [ { + "type" : "enum", + "name" : "Position", + "namespace" : "avro.examples.baseball", + "symbols" : [ "P", "C", "B1", "B2", "B3", "SS", "LF", "CF", "RF", "DH" ] + }, { + "type" : "record", + "name" : "Player", + "namespace" : "avro.examples.baseball", + "fields" : [ { + "name" : "number", + "type" : "int" + }, { + "name" : "first_name", + "type" : "string" + }, { + "name" : "last_name", + "type" : "string" + }, { + "name" : "position", + "type" : { + "type" : "array", + "items" : "Position" + } + } ] + }, { + "type" : "record", + "name" : "ImportBar", + "fields" : [ { + "name" : "foo", + "type" : "avro.examples.baseball.Player" + } ] + } ], + "messages" : { + "error" : { + "request" : [ ], + "response" : "null" + }, + "void" : { + "request" : [ ], + "response" : "null" + }, + "idl" : { + "request" : [ ], + "response" : "null" + }, + "import" : { + "request" : [ ], + "response" : "null" + }, + "oneway" : { + "request" : [ ], + "response" : "null" + }, + "null" : { + "request" : [ ], + "response" : "null" + }, + "local_timestamp_ms" : { + "request" : [ ], + "response" : "null" + }, + "bar" : { + "request" : [ ], + "response" : "null" + } + } +} diff --git a/lang/java/idl/src/test/idl/output/reservedwords.avpr b/lang/java/idl/src/test/idl/output/reservedwords.avpr new file mode 100644 index 00000000000..f28a90cc8b3 --- /dev/null +++ b/lang/java/idl/src/test/idl/output/reservedwords.avpr @@ -0,0 +1,35 @@ +{ + "protocol" : "Foo", + "doc" : "Licensed to the Apache Software Foundation (ASF) under one\nor more contributor license agreements. See the NOTICE file\ndistributed with this work for additional information\nregarding copyright ownership. The ASF licenses this file\nto you under the Apache License, Version 2.0 (the\n\"License\"); you may not use this file except in compliance\nwith the License. You may obtain a copy of the License at\n\n https://www.apache.org/licenses/LICENSE-2.0\n\nUnless required by applicable law or agreed to in writing, software\ndistributed under the License is distributed on an \"AS IS\" BASIS,\nWITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\nSee the License for the specific language governing permissions and\nlimitations under the License.", + "types" : [ ], + "messages" : { + "error" : { + "request" : [ ], + "response" : "null" + }, + "void" : { + "request" : [ ], + "response" : "null" + }, + "idl" : { + "request" : [ ], + "response" : "null" + }, + "import" : { + "request" : [ ], + "response" : "null" + }, + "oneway" : { + "request" : [ ], + "response" : "null" + }, + "null" : { + "request" : [ ], + "response" : "null" + }, + "local_timestamp_ms" : { + "request" : [ ], + "response" : "null" + } + } +} diff --git a/lang/java/idl/src/test/idl/output/schema_syntax.avsc b/lang/java/idl/src/test/idl/output/schema_syntax.avsc new file mode 100644 index 00000000000..06042446188 --- /dev/null +++ b/lang/java/idl/src/test/idl/output/schema_syntax.avsc @@ -0,0 +1,36 @@ +{ + "type": "array", + "items": { + "type": "record", + "name": "StatusUpdate", + "namespace": "monitoring", + "fields": [ + { + "name": "timestamp", + "type": { + "type": "long", + "logicalType": "timestamp-millis" + }, + "doc": "The moment of the status change." + }, { + "name": "processName", + "type": "string", + "doc": "The process whose status changed." + }, { + "name": "newStatus", + "type": { + "type": "enum", + "name": "Status", + "namespace": "system", + "symbols": ["UNKNOWN", "NEW", "STARTUP", "RUNNING", "TERMINATING", "SHUTDOWN", "CRASHED"], + "default": "UNKNOWN" + }, + "doc": "The new status of the process." + }, { + "name": "description", + "type": ["null", "string"], + "doc": "A description why this status change occurred (optional)." + } + ] + } +} diff --git a/lang/java/idl/src/test/idl/output/simple.avpr b/lang/java/idl/src/test/idl/output/simple.avpr new file mode 100644 index 00000000000..0ec9edb4c24 --- /dev/null +++ b/lang/java/idl/src/test/idl/output/simple.avpr @@ -0,0 +1,144 @@ +{ + "protocol" : "Simple", + "namespace" : "org.apache.avro.test", + "doc" : "A simple test case.", + "version" : "1.0.5", + "types" : [ { + "type" : "enum", + "name" : "Kind", + "doc" : "A kind of record.", + "symbols" : [ "FOO", "BAR", "BAZ" ], + "aliases" : [ "org.foo.KindOf" ] + }, { + "type" : "enum", + "name" : "Status", + "symbols" : [ "A", "B", "C" ], + "default" : "C" + }, { + "type" : "record", + "name" : "TestRecord", + "doc" : "A TestRecord.", + "fields" : [ { + "name" : "name", + "type" : { + "type": "string", + "avro.java.string": "String" + }, + "default" : "foo", + "order" : "ignore" + }, { + "name" : "kind", + "type" : "Kind", + "doc" : "The kind of record.", + "order" : "descending" + }, { + "name" : "status", + "type" : "Status", + "doc" : "The status of the record.", + "default" : "A" + }, { + "name" : "hash", + "type" : { + "type" : "fixed", + "name" : "MD5", + "doc" : "An MD5 hash.", + "size" : 16 + }, + "default" : "0000000000000000" + }, { + "name" : "nullableHash", + "type" : [ "null", "MD5" ], + "default" : null, + "aliases" : [ "hash", "hsh" ] + }, { + "name" : "value", + "type" : "double", + "default" : "NaN" + }, { + "name" : "average", + "type" : "float", + "default" : "-Infinity" + }, { + "name": "d", + "type": {"type": "int", "logicalType": "date"}, + "default": 0 + }, { + "name": "t", + "type": [ {"type": "int", "logicalType": "time-millis"}, "null" ], + "default": 0 + } , { + "name": "l", + "type": {"type": "long", "foo.bar": "bar.foo"}, + "default": 0 + } , { + "name": "a", + "type": {"type": "array", "items": "string", "foo.bar.bar": "foo.bar2"}, + "default": [] + } , { + "name": "prop", + "type": [ "null" , {"type":"string", "foo.foo.bar": 42, "foo.foo.foo": "3foo"} ], + "default": null + }], + "my-property" : { + "key" : 3 + } + }, { + "type" : "error", + "name" : "TestError", + "fields" : [ { + "name" : "message", + "type" : "string" + } ] + } ], + "messages" : { + "hello" : { + "doc" : "method 'hello' takes @parameter 'greeting'", + "request" : [ { + "name" : "greeting", + "type" : "string" + } ], + "response" : "string" + }, + "echo" : { + "request" : [ { + "name" : "record", + "type" : "TestRecord", + "default" : { + "name" : "bar", + "kind" : "BAR" + } + } ], + "response" : "TestRecord" + }, + "add" : { + "doc" : "method 'add' takes @parameter 'arg1' @parameter 'arg2'", + "specialProp" : "test", + "request" : [ { + "name" : "arg1", + "type" : "int" + }, { + "name" : "arg2", + "type" : "int", + "default" : 0 + } ], + "response" : "int" + }, + "echoBytes" : { + "request" : [ { + "name" : "data", + "type" : "bytes" + } ], + "response" : "bytes" + }, + "error" : { + "request" : [ ], + "response" : "null", + "errors" : [ "TestError" ] + }, + "ping" : { + "request" : [ ], + "response" : "null", + "one-way" : true + } + } +} diff --git a/lang/java/idl/src/test/idl/output/status.avsc b/lang/java/idl/src/test/idl/output/status.avsc new file mode 100644 index 00000000000..82710b84137 --- /dev/null +++ b/lang/java/idl/src/test/idl/output/status.avsc @@ -0,0 +1,9 @@ +[ + { + "type": "enum", + "name": "Status", + "namespace": "system", + "symbols": [ "UNKNOWN", "NEW", "STARTUP", "RUNNING", "TERMINATING", "SHUTDOWN", "CRASHED" ], + "default": "UNKNOWN" + } +] diff --git a/lang/java/idl/src/test/idl/output/unicode.avpr b/lang/java/idl/src/test/idl/output/unicode.avpr new file mode 100644 index 00000000000..c58a043003b --- /dev/null +++ b/lang/java/idl/src/test/idl/output/unicode.avpr @@ -0,0 +1,17 @@ +{ + "protocol" : "Протоколы", + "doc" : "This is a test that UTF8 functions correctly.\nこのテストでは、UTF - 8で正しく機能している。\n这是一个测试,UTF - 8的正常运行。", + "types" : [ { + "type" : "record", + "name" : "Структура", + "fields" : [ { + "name" : "Строковый", + "type" : "string" + }, { + "name" : "文字列", + "type" : "string" + } ] + } ], + "messages" : { + } +} diff --git a/lang/java/idl/src/test/idl/output/union.avpr b/lang/java/idl/src/test/idl/output/union.avpr new file mode 100644 index 00000000000..61748d179e3 --- /dev/null +++ b/lang/java/idl/src/test/idl/output/union.avpr @@ -0,0 +1,38 @@ +{ + "protocol": "UnionFwd", + "namespace": "org.apache.avro.gen", + "types": [ + { + "type": "record", + "name": "TestRecord", + "fields": [ + { + "name": "unionField", + "type": [ + { + "type": "record", + "name": "SR1", + "fields": [ + { + "name": "field", + "type": "string" + } + ] + }, + { + "type": "record", + "name": "SR2", + "fields": [ + { + "name": "field", + "type": "string" + } + ] + } + ] + } + ] + } + ], + "messages": {} +} diff --git a/lang/java/idl/src/test/idl/output/uuid.avpr b/lang/java/idl/src/test/idl/output/uuid.avpr new file mode 100644 index 00000000000..fe3d5a66c1a --- /dev/null +++ b/lang/java/idl/src/test/idl/output/uuid.avpr @@ -0,0 +1,29 @@ +{ + "protocol" : "MyProtocol", + "namespace" : "org.apache.avro", + "doc" : "Testing UUID fields", + "types" : [ { + "type" : "record", + "name" : "APlaygroundEvent", + "fields" : [ { + "name" : "identifier", + "type" : { + "type" : "string", + "logicalType" : "uuid" + }, + "doc" : "Documentation must be provided for each attribute" + }, { + "name" : "uuid", + "type" : "string", + "doc" : "A string field with a special name" + }, { + "name" : "optionalString", + "type" : [ "null", { + "type" : "string", + "logicalType" : "uuid" + } ], + "doc" : "a nullable uuid field" + } ] + } ], + "messages" : { } +} diff --git a/lang/java/idl/src/test/idl/putOnClassPath/OnTheClasspath.avdl b/lang/java/idl/src/test/idl/putOnClassPath/OnTheClasspath.avdl new file mode 100644 index 00000000000..4ee84113246 --- /dev/null +++ b/lang/java/idl/src/test/idl/putOnClassPath/OnTheClasspath.avdl @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@namespace("org.on.the.classpath") +protocol OnTheClasspath { + import idl "folder/relativePath.avdl"; + record FromAfar { + } +} diff --git a/lang/java/idl/src/test/idl/putOnClassPath/OnTheClasspath.avpr b/lang/java/idl/src/test/idl/putOnClassPath/OnTheClasspath.avpr new file mode 100644 index 00000000000..46951c2d485 --- /dev/null +++ b/lang/java/idl/src/test/idl/putOnClassPath/OnTheClasspath.avpr @@ -0,0 +1,11 @@ +{ + "protocol" : "OnTheClasspath", + "namespace" : "org.on.the.classpath", + "types" : [ { + "type" : "record", + "name" : "VeryFar", + "fields" : [ ] + } ], + "messages" : { + } +} \ No newline at end of file diff --git a/lang/java/idl/src/test/idl/putOnClassPath/OnTheClasspath.avsc b/lang/java/idl/src/test/idl/putOnClassPath/OnTheClasspath.avsc new file mode 100644 index 00000000000..40d3595e8fe --- /dev/null +++ b/lang/java/idl/src/test/idl/putOnClassPath/OnTheClasspath.avsc @@ -0,0 +1,6 @@ +{ + "type" : "record", + "name" : "FarAway", + "namespace" : "org.on.the.classpath", + "fields" : [ ] +} \ No newline at end of file diff --git a/lang/java/idl/src/test/idl/putOnClassPath/folder/relativePath.avdl b/lang/java/idl/src/test/idl/putOnClassPath/folder/relativePath.avdl new file mode 100644 index 00000000000..0343efeb90d --- /dev/null +++ b/lang/java/idl/src/test/idl/putOnClassPath/folder/relativePath.avdl @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@namespace("org.on.the.classpath") +protocol OnTheClasspathDependency { + import idl "../nestedtypes.avdl"; +} diff --git a/lang/java/idl/src/test/idl/putOnClassPath/nestedtypes.avdl b/lang/java/idl/src/test/idl/putOnClassPath/nestedtypes.avdl new file mode 100644 index 00000000000..6ef062f3d7e --- /dev/null +++ b/lang/java/idl/src/test/idl/putOnClassPath/nestedtypes.avdl @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +@namespace("org.on.the.classpath") +protocol OnTheClasspathTypes { + record NestedType { + } +} diff --git a/lang/java/idl/src/test/java/org/apache/avro/idl/IdlReaderTest.java b/lang/java/idl/src/test/java/org/apache/avro/idl/IdlReaderTest.java new file mode 100644 index 00000000000..8e9f187f4ce --- /dev/null +++ b/lang/java/idl/src/test/java/org/apache/avro/idl/IdlReaderTest.java @@ -0,0 +1,262 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.idl; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.avro.Protocol; +import org.apache.avro.Schema; +import org.junit.Before; +import org.junit.Test; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URL; +import java.net.URLClassLoader; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * Simple test harness for Idl. This relies on an input/ and output/ directory. + * Inside the input/ directory are .avdl files. Each file should have a + * corresponding .avpr file in output/. When the test runs, it generates and + * stringifies each .avdl file and compares it to the expected output, failing + * if the two differ. + *

+ * To make it simpler to write these tests, you can run ant -Dtestcase=TestIdl + * -Dtest.idl.mode=write, which will *replace* all expected output. + */ +public class IdlReaderTest { + private static final File TEST_DIR = new File(System.getProperty("test.idl.dir", "src/test/idl")); + + private static final File TEST_INPUT_DIR = new File(TEST_DIR, "input").getAbsoluteFile(); + + private static final File TEST_OUTPUT_DIR = new File(TEST_DIR, "output"); + + private static final String TEST_MODE = System.getProperty("test.idl.mode", "run"); + + private static final File EXTRA_TEST_DIR = new File(TEST_DIR, "extra"); + + private List tests; + + @Before + public void loadTests() { + assertTrue(TEST_DIR.exists()); + assertTrue(TEST_INPUT_DIR.exists()); + assertTrue(TEST_OUTPUT_DIR.exists()); + + tests = new ArrayList<>(); + for (File inF : Objects.requireNonNull(TEST_INPUT_DIR.listFiles())) { + if (!inF.getName().endsWith(".avdl")) { + continue; + } + if (inF.getName().startsWith(".")) { + continue; + } + + File outF = new File(TEST_OUTPUT_DIR, + inF.getName().replaceFirst("_schema\\.avdl$", ".avsc").replaceFirst("\\.avdl$", ".avpr")); + tests.add(new GenTest(inF, outF)); + } + } + + @Test + public void validateProtocolParsingResult() throws IOException { + // runTests already tests the actual parsing; this tests the result object. + IdlFile idlFile = parseExtraIdlFile("protocolSyntax.avdl"); + + assertEquals(1, idlFile.getNamedSchemas().size()); + idlFile.getNamedSchemas().keySet().forEach(System.out::println); + assertNotNull(idlFile.getNamedSchema("communication.Message")); + assertNotNull(idlFile.getNamedSchema("Message")); + + assertNotNull(idlFile.getProtocol()); + assertNull(idlFile.getMainSchema()); + } + + @Test + public void validateSchemaParsingResult() throws IOException { + // runTests already tests the actual parsing; this tests the result object. + IdlFile idlFile = parseExtraIdlFile("schemaSyntax.avdl"); + + assertEquals(1, idlFile.getNamedSchemas().size()); + idlFile.getNamedSchemas().keySet().forEach(System.out::println); + assertNotNull(idlFile.getNamedSchema("communication.Message")); + assertNotNull(idlFile.getNamedSchema("Message")); + + assertNull(idlFile.getProtocol()); + Schema mainSchema = idlFile.getMainSchema(); + assertEquals(Schema.Type.ARRAY, mainSchema.getType()); + assertEquals(idlFile.getNamedSchema("Message"), mainSchema.getElementType()); + } + + @Test + public void testDocCommentsAndWarnings() throws Exception { + final IdlFile idlFile = parseExtraIdlFile("../input/comments.avdl"); + final Protocol protocol = idlFile.getProtocol(); + final List warnings = idlFile.getWarnings(); + + assertEquals("Documented Enum", protocol.getType("testing.DocumentedEnum").getDoc()); + + assertEquals("Documented Fixed Type", protocol.getType("testing.DocumentedFixed").getDoc()); + + final Schema documentedError = protocol.getType("testing.DocumentedError"); + assertEquals("Documented Error", documentedError.getDoc()); + assertEquals("Documented Reason Field", documentedError.getField("reason").doc()); + assertEquals("Default Doc Explanation Field", documentedError.getField("explanation").doc()); + + final Map messages = protocol.getMessages(); + final Protocol.Message documentedMethod = messages.get("documentedMethod"); + assertEquals("Documented Method", documentedMethod.getDoc()); + assertEquals("Documented Parameter", documentedMethod.getRequest().getField("message").doc()); + assertEquals("Default Documented Parameter", documentedMethod.getRequest().getField("defMsg").doc()); + + assertNull(protocol.getType("testing.UndocumentedEnum").getDoc()); + assertNull(protocol.getType("testing.UndocumentedFixed").getDoc()); + assertNull(protocol.getType("testing.UndocumentedRecord").getDoc()); + assertNull(messages.get("undocumentedMethod").getDoc()); + + final String pattern = "Line %d, char %d: Ignoring out-of-place documentation comment.%n" + + "Did you mean to use a multiline comment ( /* ... */ ) instead?"; + assertEquals( + Arrays.asList(String.format(pattern, 21, 8), String.format(pattern, 21, 45), String.format(pattern, 22, 5), + String.format(pattern, 23, 5), String.format(pattern, 24, 5), String.format(pattern, 25, 5), + String.format(pattern, 26, 7), String.format(pattern, 27, 7), String.format(pattern, 28, 7), + String.format(pattern, 33, 7), String.format(pattern, 34, 7), String.format(pattern, 35, 5), + String.format(pattern, 36, 5), String.format(pattern, 37, 7), String.format(pattern, 42, 7), + String.format(pattern, 43, 7), String.format(pattern, 46, 9), String.format(pattern, 47, 5), + String.format(pattern, 54, 7), String.format(pattern, 55, 7), String.format(pattern, 58, 9), + String.format(pattern, 59, 7), String.format(pattern, 60, 11), String.format(pattern, 61, 11)), + warnings); + } + + @SuppressWarnings("SameParameterValue") + private IdlFile parseExtraIdlFile(String fileName) throws IOException { + return new IdlReader().parse(EXTRA_TEST_DIR.toPath().resolve(fileName)); + } + + @Test + public void runTests() { + if (!"run".equals(TEST_MODE)) { + return; + } + + int failed = 0; + + for (GenTest t : tests) { + try { + t.run(); + } catch (Exception e) { + failed++; + System.err.println("Failed: " + t.testName()); + e.printStackTrace(System.err); + } + } + + if (failed > 0) { + fail(failed + " tests failed"); + } + } + + @Test + public void writeTests() throws Exception { + if (!"write".equals(TEST_MODE)) { + return; + } + + for (GenTest t : tests) { + t.write(); + } + } + + /** + * An individual comparison test + */ + private static class GenTest { + private final File in, expectedOut; + + public GenTest(File in, File expectedOut) { + this.in = in; + this.expectedOut = expectedOut; + } + + private String generate() { + ClassLoader cl = Thread.currentThread().getContextClassLoader(); + + URL[] newPathURL = new URL[] { cl.getResource("putOnClassPath-test-resource.jar") }; + URLClassLoader ucl = new URLClassLoader(newPathURL, cl); + Thread.currentThread().setContextClassLoader(ucl); + try { + IdlReader parser = new IdlReader(); + return parser.parse(in.toPath()).outputString(); + } catch (IOException e) { + throw new AssertionError(e.getMessage(), e); + } finally { + Thread.currentThread().setContextClassLoader(cl); + } + } + + public String testName() { + return this.in.getName(); + } + + public void run() throws Exception { + String output = generate(); + String slurped = slurp(expectedOut); + assertEquals(slurped.trim(), output.replace("\\r", "").trim()); + } + + public void write() throws Exception { + writeFile(expectedOut, generate()); + } + + private static String slurp(File f) throws IOException { + BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(f), StandardCharsets.UTF_8)); + + String line; + StringBuilder builder = new StringBuilder(); + while ((line = in.readLine()) != null) { + builder.append(line); + } + in.close(); + ObjectMapper mapper = new ObjectMapper(); + JsonNode json = mapper.readTree(builder.toString()); + return mapper.writer().writeValueAsString(json); + } + + private static void writeFile(File f, String s) throws IOException { + FileWriter w = new FileWriter(f); + w.write(s); + w.close(); + } + } +} diff --git a/lang/java/idl/src/test/java/org/apache/avro/idl/TestCycle.java b/lang/java/idl/src/test/java/org/apache/avro/idl/TestCycle.java new file mode 100644 index 00000000000..427de0957d6 --- /dev/null +++ b/lang/java/idl/src/test/java/org/apache/avro/idl/TestCycle.java @@ -0,0 +1,87 @@ +/* + * Copyright 2015 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.idl; + +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericDatumWriter; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.GenericRecordBuilder; +import org.apache.avro.io.BinaryDecoder; +import org.apache.avro.io.BinaryEncoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.io.EncoderFactory; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.Collections; + +import static java.util.Objects.requireNonNull; + +public class TestCycle { + + private static final Logger LOG = LoggerFactory.getLogger(TestCycle.class); + + @Test + public void testCycleGeneration() throws IOException, URISyntaxException { + final ClassLoader cl = Thread.currentThread().getContextClassLoader(); + IdlFile idlFile = new IdlReader().parse(requireNonNull(cl.getResource("input/cycle.avdl")).toURI()); + String json = idlFile.outputString(); + LOG.info(json); + + GenericRecordBuilder rb2 = new GenericRecordBuilder(idlFile.getNamedSchema("SampleNode")); + rb2.set("count", 10); + rb2.set("subNodes", Collections.EMPTY_LIST); + GenericData.Record node = rb2.build(); + + GenericRecordBuilder mb = new GenericRecordBuilder(idlFile.getNamedSchema("Method")); + mb.set("declaringClass", "Test"); + mb.set("methodName", "test"); + GenericData.Record method = mb.build(); + + GenericRecordBuilder spb = new GenericRecordBuilder(idlFile.getNamedSchema("SamplePair")); + spb.set("method", method); + spb.set("node", node); + GenericData.Record sp = spb.build(); + + GenericRecordBuilder rb = new GenericRecordBuilder(idlFile.getNamedSchema("SampleNode")); + rb.set("count", 10); + rb.set("subNodes", Collections.singletonList(sp)); + GenericData.Record record = rb.build(); + + serDeserRecord(record); + } + + private static void serDeserRecord(GenericData.Record data) throws IOException { + ByteArrayOutputStream bab = new ByteArrayOutputStream(); + GenericDatumWriter writer = new GenericDatumWriter<>(data.getSchema()); + final BinaryEncoder directBinaryEncoder = EncoderFactory.get().directBinaryEncoder(bab, null); + writer.write(data, directBinaryEncoder); + directBinaryEncoder.flush(); + ByteArrayInputStream bis = new ByteArrayInputStream(bab.toByteArray(), 0, bab.size()); + GenericDatumReader reader = new GenericDatumReader<>(data.getSchema()); + BinaryDecoder directBinaryDecoder = DecoderFactory.get().directBinaryDecoder(bis, null); + GenericData.Record read = (GenericData.Record) reader.read(null, directBinaryDecoder); + Assert.assertEquals(data.toString(), read.toString()); + } + +} diff --git a/lang/java/idl/src/test/java/org/apache/avro/idl/TestLogicalTypes.java b/lang/java/idl/src/test/java/org/apache/avro/idl/TestLogicalTypes.java new file mode 100644 index 00000000000..05a8dcaf2db --- /dev/null +++ b/lang/java/idl/src/test/java/org/apache/avro/idl/TestLogicalTypes.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.idl; + +import org.apache.avro.LogicalType; +import org.apache.avro.LogicalTypes; +import org.apache.avro.Schema; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.net.URISyntaxException; + +import static java.util.Objects.requireNonNull; + +public class TestLogicalTypes { + private Schema logicalTypeFields; + + @Before + public void setup() throws IOException, URISyntaxException { + final ClassLoader cl = Thread.currentThread().getContextClassLoader(); + IdlFile idlFile = new IdlReader().parse(requireNonNull(cl.getResource("logicalTypes.avdl")).toURI()); + + logicalTypeFields = idlFile.getNamedSchema("org.apache.avro.test.LogicalTypeFields"); + } + + @Test + public void testDateBecomesLogicalType() { + Assert.assertEquals(LogicalTypes.date(), logicalTypeOfField("aDate")); + } + + @Test + public void testTimeMsBecomesLogicalType() { + Assert.assertEquals(LogicalTypes.timeMillis(), logicalTypeOfField("aTime")); + } + + @Test + public void testTimestampMsBecomesLogicalType() { + Assert.assertEquals(LogicalTypes.timestampMillis(), logicalTypeOfField("aTimestamp")); + } + + @Test + public void testLocalTimestampMsBecomesLogicalType() { + Assert.assertEquals(LogicalTypes.localTimestampMillis(), logicalTypeOfField("aLocalTimestamp")); + } + + @Test + public void testDecimalBecomesLogicalType() { + Assert.assertEquals(LogicalTypes.decimal(6, 2), logicalTypeOfField("pocketMoney")); + } + + @Test + public void testUuidBecomesLogicalType() { + Assert.assertEquals(LogicalTypes.uuid(), logicalTypeOfField("identifier")); + } + + @Test + public void testAnnotatedLongBecomesLogicalType() { + Assert.assertEquals(LogicalTypes.timestampMicros(), logicalTypeOfField("anotherTimestamp")); + } + + @Test + public void testAnnotatedBytesFieldBecomesLogicalType() { + Assert.assertEquals(LogicalTypes.decimal(6, 2), logicalTypeOfField("allowance")); + } + + @Test + public void testIncorrectlyAnnotatedBytesFieldHasNoLogicalType() { + Schema fieldSchema = logicalTypeFields.getField("byteArray").schema(); + + Assert.assertNull(fieldSchema.getLogicalType()); + Assert.assertEquals("decimal", fieldSchema.getObjectProp("logicalType")); + Assert.assertEquals(3000000000L, fieldSchema.getObjectProp("precision")); // Not an int, so not a valid precision + Assert.assertEquals(0, fieldSchema.getObjectProp("scale")); + } + + private LogicalType logicalTypeOfField(String name) { + return logicalTypeFields.getField(name).schema().getLogicalType(); + } +} diff --git a/lang/java/idl/src/test/java/org/apache/avro/idl/TestReferenceAnnotationNotAllowed.java b/lang/java/idl/src/test/java/org/apache/avro/idl/TestReferenceAnnotationNotAllowed.java new file mode 100644 index 00000000000..51b5570f661 --- /dev/null +++ b/lang/java/idl/src/test/java/org/apache/avro/idl/TestReferenceAnnotationNotAllowed.java @@ -0,0 +1,42 @@ +/* + * Copyright 2015 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.idl; + +import org.apache.avro.AvroRuntimeException; +import org.junit.Test; + +import java.io.IOException; +import java.net.URISyntaxException; + +import static java.util.Objects.requireNonNull; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +public class TestReferenceAnnotationNotAllowed { + + @Test + public void testReferenceAnnotationNotAllowed() throws IOException, URISyntaxException { + + final ClassLoader cl = Thread.currentThread().getContextClassLoader(); + + try { + new IdlReader().parse(requireNonNull(cl.getResource("AnnotationOnTypeReference.avdl")).toURI()); + fail("Compilation should fail: annotations on type references are not allowed."); + } catch (AvroRuntimeException e) { + assertEquals("Type references may not be annotated, at line 29, column 16", e.getMessage()); + } + } +} diff --git a/lang/java/idl/src/test/java/org/apache/avro/idl/TestSchemaResolver.java b/lang/java/idl/src/test/java/org/apache/avro/idl/TestSchemaResolver.java new file mode 100644 index 00000000000..70488232581 --- /dev/null +++ b/lang/java/idl/src/test/java/org/apache/avro/idl/TestSchemaResolver.java @@ -0,0 +1,71 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.idl; + +import org.apache.avro.Protocol; +import org.apache.avro.Schema; +import org.apache.avro.SchemaBuilder; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.io.File; +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; + +public class TestSchemaResolver { + + @Test + public void testResolving() throws IOException { + Path testIdl = Paths.get(".", "src", "test", "idl", "cycle.avdl").toAbsolutePath(); + IdlReader parser = new IdlReader(); + IdlFile idlFile = parser.parse(testIdl); + Protocol protocol = idlFile.getProtocol(); + System.out.println(protocol); + Assert.assertEquals(5, protocol.getTypes().size()); + } + + @Test(expected = IllegalArgumentException.class) + public void testIsUnresolvedSchemaError1() { + // No "org.apache.avro.idl.unresolved.name" property + Schema s = SchemaBuilder.record("R").fields().endRecord(); + SchemaResolver.getUnresolvedSchemaName(s); + } + + @Test(expected = IllegalArgumentException.class) + public void testIsUnresolvedSchemaError2() { + // No "UnresolvedSchema" property + Schema s = SchemaBuilder.record("R").prop("org.apache.avro.idl.unresolved.name", "x").fields().endRecord(); + SchemaResolver.getUnresolvedSchemaName(s); + } + + @Test(expected = IllegalArgumentException.class) + public void testIsUnresolvedSchemaError3() { + // Namespace not "org.apache.avro.compiler". + Schema s = SchemaBuilder.record("UnresolvedSchema").prop("org.apache.avro.idl.unresolved.name", "x").fields() + .endRecord(); + SchemaResolver.getUnresolvedSchemaName(s); + } + + @Test(expected = IllegalArgumentException.class) + public void testGetUnresolvedSchemaNameError() { + Schema s = SchemaBuilder.fixed("a").size(10); + SchemaResolver.getUnresolvedSchemaName(s); + } +} diff --git a/lang/java/idl/src/test/java/org/apache/avro/idl/TestSchemas.java b/lang/java/idl/src/test/java/org/apache/avro/idl/TestSchemas.java new file mode 100644 index 00000000000..000ba20dbcb --- /dev/null +++ b/lang/java/idl/src/test/java/org/apache/avro/idl/TestSchemas.java @@ -0,0 +1,195 @@ +/* + * Copyright 2017 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.idl; + +import org.apache.avro.Schema; +import org.junit.Assert; +import org.junit.Test; + +public class TestSchemas { + + private static class TestVisitor implements SchemaVisitor { + StringBuilder sb = new StringBuilder(); + + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + sb.append(terminal); + return SchemaVisitorAction.CONTINUE; + } + + @Override + public SchemaVisitorAction visitNonTerminal(Schema nonTerminal) { + String n = nonTerminal.getName(); + sb.append(n).append('.'); + if (n.startsWith("t")) { + return SchemaVisitorAction.TERMINATE; + } else if (n.startsWith("ss")) { + return SchemaVisitorAction.SKIP_SIBLINGS; + } else if (n.startsWith("st")) { + return SchemaVisitorAction.SKIP_SUBTREE; + } else { + return SchemaVisitorAction.CONTINUE; + } + } + + @Override + public SchemaVisitorAction afterVisitNonTerminal(Schema nonTerminal) { + sb.append("!"); + String n = nonTerminal.getName(); + if (n.startsWith("ct")) { + return SchemaVisitorAction.TERMINATE; + } else if (n.startsWith("css")) { + return SchemaVisitorAction.SKIP_SIBLINGS; + } else if (n.startsWith("cst")) { + return SchemaVisitorAction.SKIP_SUBTREE; + } else { + return SchemaVisitorAction.CONTINUE; + } + } + + @Override + public String get() { + return sb.toString(); + } + } + + @Test + public void testVisit1() { + String s1 = "{\"type\": \"record\", \"name\": \"t1\", \"fields\": [" + "{\"name\": \"f1\", \"type\": \"int\"}" + + "]}"; + Assert.assertEquals("t1.", Schemas.visit(new Schema.Parser().parse(s1), new TestVisitor())); + } + + @Test + public void testVisit2() { + String s2 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + "{\"name\": \"f1\", \"type\": \"int\"}" + + "]}"; + Assert.assertEquals("c1.\"int\"!", Schemas.visit(new Schema.Parser().parse(s2), new TestVisitor())); + + } + + @Test + public void testVisit3() { + String s3 = "{\"type\": \"record\", \"name\": \"ss1\", \"fields\": [" + "{\"name\": \"f1\", \"type\": \"int\"}" + + "]}"; + Assert.assertEquals("ss1.", Schemas.visit(new Schema.Parser().parse(s3), new TestVisitor())); + + } + + @Test + public void testVisit4() { + String s4 = "{\"type\": \"record\", \"name\": \"st1\", \"fields\": [" + "{\"name\": \"f1\", \"type\": \"int\"}" + + "]}"; + Assert.assertEquals("st1.!", Schemas.visit(new Schema.Parser().parse(s4), new TestVisitor())); + + } + + @Test + public void testVisit5() { + String s5 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"c2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.c2.\"int\"!\"long\"!", Schemas.visit(new Schema.Parser().parse(s5), new TestVisitor())); + + } + + @Test + public void testVisit6() { + String s6 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"ss2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.ss2.!", Schemas.visit(new Schema.Parser().parse(s6), new TestVisitor())); + + } + + @Test + public void testVisit7() { + String s7 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"css2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.css2.\"int\"!!", Schemas.visit(new Schema.Parser().parse(s7), new TestVisitor())); + } + + @Test(expected = UnsupportedOperationException.class) + public void testVisit8() { + String s8 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"cst2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"int\"}" + "]}"; + Schemas.visit(new Schema.Parser().parse(s8), new TestVisitor()); + } + + @Test + public void testVisit9() { + String s9 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"ct2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.ct2.\"int\"!", Schemas.visit(new Schema.Parser().parse(s9), new TestVisitor())); + } + + @Test(expected = UnsupportedOperationException.class) + public void testVisit10() { + String s10 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"ct2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"int\"}" + "]}"; + Schemas.visit(new Schema.Parser().parse(s10), new TestVisitor() { + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + return SchemaVisitorAction.SKIP_SUBTREE; + } + }); + } + + @Test + public void testVisit11() { + String s11 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"c2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"},{\"name\": \"f12\", \"type\": \"double\"}" + "]}}," + + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.c2.\"int\".!\"long\".!", Schemas.visit(new Schema.Parser().parse(s11), new TestVisitor() { + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + sb.append(terminal).append('.'); + return SchemaVisitorAction.SKIP_SIBLINGS; + } + })); + } + + @Test + public void testVisit12() { + String s12 = "{\"type\": \"record\", \"name\": \"c1\", \"fields\": [" + + "{\"name\": \"f1\", \"type\": {\"type\": \"record\", \"name\": \"ct2\", \"fields\": " + + "[{\"name\": \"f11\", \"type\": \"int\"}]}}," + "{\"name\": \"f2\", \"type\": \"long\"}" + "]}"; + Assert.assertEquals("c1.ct2.\"int\".", Schemas.visit(new Schema.Parser().parse(s12), new TestVisitor() { + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + sb.append(terminal).append('.'); + return SchemaVisitorAction.TERMINATE; + } + })); + } + + @Test + public void testVisit13() { + String s12 = "{\"type\": \"int\"}"; + Assert.assertEquals("\"int\".", Schemas.visit(new Schema.Parser().parse(s12), new TestVisitor() { + @Override + public SchemaVisitorAction visitTerminal(Schema terminal) { + sb.append(terminal).append('.'); + return SchemaVisitorAction.SKIP_SIBLINGS; + } + })); + } +} diff --git a/lang/java/integration-test/codegen-test/pom.xml b/lang/java/integration-test/codegen-test/pom.xml index e5a7ae1d962..760bea934aa 100644 --- a/lang/java/integration-test/codegen-test/pom.xml +++ b/lang/java/integration-test/codegen-test/pom.xml @@ -24,7 +24,7 @@ avro-integration-test org.apache.avro 1.12.0-SNAPSHOT - ../ + ../pom.xml avro-codegen-test diff --git a/lang/java/integration-test/pom.xml b/lang/java/integration-test/pom.xml index cc5db4c9194..bc27a788170 100644 --- a/lang/java/integration-test/pom.xml +++ b/lang/java/integration-test/pom.xml @@ -24,7 +24,7 @@ avro-parent org.apache.avro 1.12.0-SNAPSHOT - ../ + ../pom.xml avro-integration-test diff --git a/lang/java/integration-test/test-custom-conversions/pom.xml b/lang/java/integration-test/test-custom-conversions/pom.xml index 94e76b94162..9a5dc836982 100644 --- a/lang/java/integration-test/test-custom-conversions/pom.xml +++ b/lang/java/integration-test/test-custom-conversions/pom.xml @@ -24,7 +24,7 @@ avro-integration-test org.apache.avro 1.12.0-SNAPSHOT - ../ + ../pom.xml avro-test-custom-conversions diff --git a/lang/java/ipc-jetty/pom.xml b/lang/java/ipc-jetty/pom.xml index 098370404a1..2ee6248d34f 100644 --- a/lang/java/ipc-jetty/pom.xml +++ b/lang/java/ipc-jetty/pom.xml @@ -24,7 +24,7 @@ avro-parent org.apache.avro 1.12.0-SNAPSHOT - ../ + ../pom.xml avro-ipc-jetty diff --git a/lang/java/ipc-netty/pom.xml b/lang/java/ipc-netty/pom.xml index 96dd0bfaaa8..6b316c4a9b3 100644 --- a/lang/java/ipc-netty/pom.xml +++ b/lang/java/ipc-netty/pom.xml @@ -24,7 +24,7 @@ avro-parent org.apache.avro 1.12.0-SNAPSHOT - ../ + ../pom.xml avro-ipc-netty diff --git a/lang/java/ipc/pom.xml b/lang/java/ipc/pom.xml index 5878980f5bd..5a86e1393f7 100644 --- a/lang/java/ipc/pom.xml +++ b/lang/java/ipc/pom.xml @@ -24,7 +24,7 @@ avro-parent org.apache.avro 1.12.0-SNAPSHOT - ../ + ../pom.xml avro-ipc diff --git a/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java b/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java index 38a81326ae2..d85b28effa3 100644 --- a/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java +++ b/lang/java/ipc/src/test/java/org/apache/avro/TestSchema.java @@ -205,9 +205,9 @@ void record(TestInfo testInfo) throws Exception { @Test void invalidNameTolerance() { - new Schema.Parser().setValidate(false).parse("{\"type\":\"record\",\"name\":\"1X\",\"fields\":[]}"); - new Schema.Parser().setValidate(false).parse("{\"type\":\"record\",\"name\":\"X-\",\"fields\":[]}"); - new Schema.Parser().setValidate(false).parse("{\"type\":\"record\",\"name\":\"X$\",\"fields\":[]}"); + new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"1X\",\"fields\":[]}"); + new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"X-\",\"fields\":[]}"); + new Schema.Parser(Schema.NameValidator.NO_VALIDATION).parse("{\"type\":\"record\",\"name\":\"X$\",\"fields\":[]}"); } @Test @@ -302,26 +302,20 @@ void lisp(TestInfo testInfo) throws Exception { void union(TestInfo testInfo) throws Exception { check(new File(DIR, testInfo.getTestMethod().get().getName()), "[\"string\", \"long\"]", false); checkDefault("[\"double\", \"long\"]", "1.1", 1.1); + checkDefault("[\"double\", \"string\"]", "\"TheString\"", new Utf8("TheString")); // test that erroneous default values cause errors for (String type : new String[] { "int", "long", "float", "double", "string", "bytes", "boolean" }) { - checkValidateDefaults("[\"" + type + "\", \"null\"]", "null"); // schema parse time - boolean error = false; - try { - checkDefault("[\"" + type + "\", \"null\"]", "null", 0); // read time - } catch (AvroTypeException e) { - error = true; - } - assertTrue(error); - checkValidateDefaults("[\"null\", \"" + type + "\"]", "0"); // schema parse time - error = false; - try { - checkDefault("[\"null\", \"" + type + "\"]", "0", null); // read time - } catch (AvroTypeException e) { - error = true; - } - assertTrue(error); +// checkValidateDefaults("[\"" + type + "\", \"null\"]", "null"); // schema parse time + checkDefault("[\"" + type + "\", \"null\"]", "null", null); // read time } + checkDefault("[\"null\", \"int\"]", "0", 0); + checkDefault("[\"null\", \"long\"]", "0", 0l); + checkDefault("[\"null\", \"float\"]", "0.0", 0.0f); + checkDefault("[\"null\", \"double\"]", "0.0", 0.0d); + checkDefault("[\"null\", \"string\"]", "\"Hi\"", new Utf8("Hi")); + checkDefault("[\"null\", \"bytes\"]", "\"01\"", ByteBuffer.wrap("01".getBytes(StandardCharsets.UTF_8))); + checkDefault("[\"null\", \"boolean\"]", "true", true); // check union json String record = "{\"type\":\"record\",\"name\":\"Foo\",\"fields\":[]}"; @@ -513,7 +507,7 @@ void nullPointer() throws Exception { private static void checkParseError(String json) { try { new Schema.Parser().parse(json); - } catch (SchemaParseException e) { + } catch (AvroRuntimeException e) { return; } fail("Should not have parsed: " + json); diff --git a/lang/java/ipc/src/test/java/org/apache/avro/compiler/specific/TestSpecificCompiler.java b/lang/java/ipc/src/test/java/org/apache/avro/compiler/specific/TestSpecificCompiler.java index bb249dd8e1c..0af06b9a2b1 100644 --- a/lang/java/ipc/src/test/java/org/apache/avro/compiler/specific/TestSpecificCompiler.java +++ b/lang/java/ipc/src/test/java/org/apache/avro/compiler/specific/TestSpecificCompiler.java @@ -300,12 +300,11 @@ void generateGetMethod() { height = new Field("height", Schema.create(Type.INT), null, null); Height = new Field("Height", Schema.create(Type.INT), null, null); - assertEquals("getHeight$0", - SpecificCompiler.generateGetMethod(createRecord("test", false, height, Height), height)); + assertEquals("getHeight", SpecificCompiler.generateGetMethod(createRecord("test", false, height, Height), height)); height = new Field("height", Schema.create(Type.INT), null, null); Height = new Field("Height", Schema.create(Type.INT), null, null); - assertEquals("getHeight$1", + assertEquals("getHeight$0", SpecificCompiler.generateGetMethod(createRecord("test", false, height, Height), Height)); message = new Field("message", Schema.create(Type.STRING), null, null); @@ -314,12 +313,12 @@ void generateGetMethod() { message = new Field("message", Schema.create(Type.STRING), null, null); Message = new Field("Message", Schema.create(Type.STRING), null, null); - assertEquals("getMessage$0", + assertEquals("getMessage$", SpecificCompiler.generateGetMethod(createRecord("test", true, message, Message), message)); message = new Field("message", Schema.create(Type.STRING), null, null); Message = new Field("Message", Schema.create(Type.STRING), null, null); - assertEquals("getMessage$1", + assertEquals("getMessage$0", SpecificCompiler.generateGetMethod(createRecord("test", true, message, Message), Message)); schema = new Field("schema", Schema.create(Type.STRING), null, null); @@ -328,12 +327,12 @@ void generateGetMethod() { schema = new Field("schema", Schema.create(Type.STRING), null, null); Schema$ = new Field("Schema", Schema.create(Type.STRING), null, null); - assertEquals("getSchema$0", + assertEquals("getSchema$", SpecificCompiler.generateGetMethod(createRecord("test", false, schema, Schema$), schema)); schema = new Field("schema", Schema.create(Type.STRING), null, null); Schema$ = new Field("Schema", Schema.create(Type.STRING), null, null); - assertEquals("getSchema$1", + assertEquals("getSchema$0", SpecificCompiler.generateGetMethod(createRecord("test", false, schema, Schema$), Schema$)); } @@ -376,12 +375,11 @@ void generateSetMethod() { height = new Field("height", Schema.create(Type.INT), null, null); Height = new Field("Height", Schema.create(Type.INT), null, null); - assertEquals("setHeight$0", - SpecificCompiler.generateSetMethod(createRecord("test", false, height, Height), height)); + assertEquals("setHeight", SpecificCompiler.generateSetMethod(createRecord("test", false, height, Height), height)); height = new Field("height", Schema.create(Type.INT), null, null); Height = new Field("Height", Schema.create(Type.INT), null, null); - assertEquals("setHeight$1", + assertEquals("setHeight$0", SpecificCompiler.generateSetMethod(createRecord("test", false, height, Height), Height)); message = new Field("message", Schema.create(Type.STRING), null, null); @@ -390,12 +388,12 @@ void generateSetMethod() { message = new Field("message", Schema.create(Type.STRING), null, null); Message = new Field("Message", Schema.create(Type.STRING), null, null); - assertEquals("setMessage$0", + assertEquals("setMessage$", SpecificCompiler.generateSetMethod(createRecord("test", true, message, Message), message)); message = new Field("message", Schema.create(Type.STRING), null, null); Message = new Field("Message", Schema.create(Type.STRING), null, null); - assertEquals("setMessage$1", + assertEquals("setMessage$0", SpecificCompiler.generateSetMethod(createRecord("test", true, message, Message), Message)); schema = new Field("schema", Schema.create(Type.STRING), null, null); @@ -404,12 +402,12 @@ void generateSetMethod() { schema = new Field("schema", Schema.create(Type.STRING), null, null); Schema$ = new Field("Schema", Schema.create(Type.STRING), null, null); - assertEquals("setSchema$0", + assertEquals("setSchema$", SpecificCompiler.generateSetMethod(createRecord("test", false, schema, Schema$), schema)); schema = new Field("schema", Schema.create(Type.STRING), null, null); Schema$ = new Field("Schema", Schema.create(Type.STRING), null, null); - assertEquals("setSchema$1", + assertEquals("setSchema$0", SpecificCompiler.generateSetMethod(createRecord("test", false, schema, Schema$), Schema$)); } @@ -452,12 +450,11 @@ void generateHasMethod() { height = new Field("height", Schema.create(Type.INT), null, null); Height = new Field("Height", Schema.create(Type.INT), null, null); - assertEquals("hasHeight$0", - SpecificCompiler.generateHasMethod(createRecord("test", false, height, Height), height)); + assertEquals("hasHeight", SpecificCompiler.generateHasMethod(createRecord("test", false, height, Height), height)); height = new Field("height", Schema.create(Type.INT), null, null); Height = new Field("Height", Schema.create(Type.INT), null, null); - assertEquals("hasHeight$1", + assertEquals("hasHeight$0", SpecificCompiler.generateHasMethod(createRecord("test", false, height, Height), Height)); message = new Field("message", Schema.create(Type.STRING), null, null); @@ -466,12 +463,12 @@ void generateHasMethod() { message = new Field("message", Schema.create(Type.STRING), null, null); Message = new Field("Message", Schema.create(Type.STRING), null, null); - assertEquals("hasMessage$0", + assertEquals("hasMessage$", SpecificCompiler.generateHasMethod(createRecord("test", true, message, Message), message)); message = new Field("message", Schema.create(Type.STRING), null, null); Message = new Field("Message", Schema.create(Type.STRING), null, null); - assertEquals("hasMessage$1", + assertEquals("hasMessage$0", SpecificCompiler.generateHasMethod(createRecord("test", true, message, Message), Message)); schema = new Field("schema", Schema.create(Type.STRING), null, null); @@ -480,12 +477,12 @@ void generateHasMethod() { schema = new Field("schema", Schema.create(Type.STRING), null, null); Schema$ = new Field("Schema", Schema.create(Type.STRING), null, null); - assertEquals("hasSchema$0", + assertEquals("hasSchema$", SpecificCompiler.generateHasMethod(createRecord("test", false, schema, Schema$), schema)); schema = new Field("schema", Schema.create(Type.STRING), null, null); Schema$ = new Field("Schema", Schema.create(Type.STRING), null, null); - assertEquals("hasSchema$1", + assertEquals("hasSchema$0", SpecificCompiler.generateHasMethod(createRecord("test", false, schema, Schema$), Schema$)); } @@ -528,12 +525,12 @@ void generateClearMethod() { height = new Field("height", Schema.create(Type.INT), null, null); Height = new Field("Height", Schema.create(Type.INT), null, null); - assertEquals("clearHeight$0", + assertEquals("clearHeight", SpecificCompiler.generateClearMethod(createRecord("test", false, height, Height), height)); height = new Field("height", Schema.create(Type.INT), null, null); Height = new Field("Height", Schema.create(Type.INT), null, null); - assertEquals("clearHeight$1", + assertEquals("clearHeight$0", SpecificCompiler.generateClearMethod(createRecord("test", false, height, Height), Height)); message = new Field("message", Schema.create(Type.STRING), null, null); @@ -542,12 +539,12 @@ void generateClearMethod() { message = new Field("message", Schema.create(Type.STRING), null, null); Message = new Field("Message", Schema.create(Type.STRING), null, null); - assertEquals("clearMessage$0", + assertEquals("clearMessage$", SpecificCompiler.generateClearMethod(createRecord("test", true, message, Message), message)); message = new Field("message", Schema.create(Type.STRING), null, null); Message = new Field("Message", Schema.create(Type.STRING), null, null); - assertEquals("clearMessage$1", + assertEquals("clearMessage$0", SpecificCompiler.generateClearMethod(createRecord("test", true, message, Message), Message)); schema = new Field("schema", Schema.create(Type.STRING), null, null); @@ -556,12 +553,12 @@ void generateClearMethod() { schema = new Field("schema", Schema.create(Type.STRING), null, null); Schema$ = new Field("Schema", Schema.create(Type.STRING), null, null); - assertEquals("clearSchema$0", + assertEquals("clearSchema$", SpecificCompiler.generateClearMethod(createRecord("test", false, schema, Schema$), schema)); schema = new Field("schema", Schema.create(Type.STRING), null, null); Schema$ = new Field("Schema", Schema.create(Type.STRING), null, null); - assertEquals("clearSchema$1", + assertEquals("clearSchema$0", SpecificCompiler.generateClearMethod(createRecord("test", false, schema, Schema$), Schema$)); } diff --git a/lang/java/mapred/pom.xml b/lang/java/mapred/pom.xml index 7a4b8a649be..8069e7df3e9 100644 --- a/lang/java/mapred/pom.xml +++ b/lang/java/mapred/pom.xml @@ -24,7 +24,7 @@ avro-parent org.apache.avro 1.12.0-SNAPSHOT - ../ + ../pom.xml avro-mapred diff --git a/lang/java/maven-plugin/pom.xml b/lang/java/maven-plugin/pom.xml index 15590348dd8..a4e9e7b6fdb 100644 --- a/lang/java/maven-plugin/pom.xml +++ b/lang/java/maven-plugin/pom.xml @@ -91,6 +91,11 @@ + + ${project.groupId} + avro-idl + ${project.version} + ${project.groupId} avro-compiler diff --git a/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/AbstractAvroMojo.java b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/AbstractAvroMojo.java index ce15f10e947..7dc203b0357 100644 --- a/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/AbstractAvroMojo.java +++ b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/AbstractAvroMojo.java @@ -18,6 +18,18 @@ package org.apache.avro.mojo; +import org.apache.avro.LogicalTypes; +import org.apache.avro.Protocol; +import org.apache.avro.Schema; +import org.apache.avro.compiler.specific.SpecificCompiler; +import org.apache.avro.generic.GenericData; +import org.apache.maven.artifact.DependencyResolutionRequiredException; +import org.apache.maven.plugin.AbstractMojo; +import org.apache.maven.plugin.MojoExecutionException; +import org.apache.maven.project.MavenProject; +import org.apache.maven.shared.model.fileset.FileSet; +import org.apache.maven.shared.model.fileset.util.FileSetManager; + import java.io.File; import java.io.IOException; import java.lang.reflect.InvocationTargetException; @@ -26,17 +38,9 @@ import java.net.URLClassLoader; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.List; -import org.apache.avro.LogicalTypes; -import org.apache.avro.compiler.specific.SpecificCompiler; -import org.apache.maven.artifact.DependencyResolutionRequiredException; -import org.apache.maven.plugin.AbstractMojo; -import org.apache.maven.plugin.MojoExecutionException; -import org.apache.maven.project.MavenProject; -import org.apache.maven.shared.model.fileset.FileSet; -import org.apache.maven.shared.model.fileset.util.FileSetManager; - /** * Base for Avro Compiler Mojos. */ @@ -129,6 +133,20 @@ public abstract class AbstractAvroMojo extends AbstractMojo { */ protected String[] velocityToolsClassesNames = new String[0]; + /** + * Generated record schema classes will extend this class. + * + * @parameter property="recordSpecificClass" + */ + private String recordSpecificClass = "org.apache.avro.specific.SpecificRecordBase"; + + /** + * Generated error schema classes will extend this class. + * + * @parameter property="errorSpecificClass" + */ + private String errorSpecificClass = "org.apache.avro.specific.SpecificExceptionBase"; + /** * The createOptionalGetters parameter enables generating the getOptional... * methods that return an Optional of the requested type. This works ONLY on @@ -224,6 +242,7 @@ public void execute() throws MojoExecutionException { } if (hasImports) { + checkImportPaths(); for (String importedFile : imports) { File file = new File(importedFile); if (file.isDirectory()) { @@ -254,6 +273,15 @@ public void execute() throws MojoExecutionException { } } + private void checkImportPaths() throws MojoExecutionException { + for (String importedFile : imports) { + File file = new File(importedFile); + if (!file.exists()) { + throw new MojoExecutionException("Path " + file.getAbsolutePath() + " does not exist"); + } + } + } + private String[] getIncludedFiles(String absPath, String[] excludes, String[] includes) { final FileSetManager fileSetManager = new FileSetManager(); final FileSet fs = new FileSet(); @@ -286,14 +314,21 @@ private String[] getIncludedFiles(String absPath, String[] excludes, String[] in } private void compileFiles(String[] files, File sourceDir, File outDir) throws MojoExecutionException { - for (String filename : files) { + final ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader(); + try { + Thread.currentThread().setContextClassLoader(createClassLoader()); + + // Need to register custom logical type factories before schema compilation. try { - // Need to register custom logical type factories before schema compilation. loadLogicalTypesFactories(); - doCompile(filename, sourceDir, outDir); } catch (IOException e) { - throw new MojoExecutionException("Error compiling protocol file " + filename + " to " + outDir, e); + throw new MojoExecutionException("Error while loading logical types factories ", e); } + this.doCompile(files, sourceDir, outDir); + } catch (MalformedURLException | DependencyResolutionRequiredException e) { + throw new MojoExecutionException("Cannot locate classpath entries", e); + } finally { + Thread.currentThread().setContextClassLoader(contextClassLoader); } } @@ -326,7 +361,7 @@ protected List instantiateAdditionalVelocityTools() { final List velocityTools = new ArrayList<>(velocityToolsClassesNames.length); for (String velocityToolClassName : velocityToolsClassesNames) { try { - Class klass = Class.forName(velocityToolClassName); + Class klass = Class.forName(velocityToolClassName); velocityTools.add(klass.getDeclaredConstructor().newInstance()); } catch (Exception e) { throw new RuntimeException(e); @@ -335,22 +370,74 @@ protected List instantiateAdditionalVelocityTools() { return velocityTools; } - protected abstract void doCompile(String filename, File sourceDirectory, File outputDirectory) throws IOException; + protected void doCompile(String[] files, File sourceDirectory, File outputDirectory) throws MojoExecutionException { + for (String filename : files) { + try { + doCompile(filename, sourceDirectory, outputDirectory); + } catch (IOException e) { + throw new MojoExecutionException("Error compiling file " + filename + " to " + outputDirectory, e); + } + } + } + + protected void doCompile(String filename, File sourceDirectory, File outputDirectory) throws IOException { + throw new UnsupportedOperationException( + "Programmer error: AbstractAvroMojo.doCompile(String, java.io.File, java.io.File) called directly"); + }; - protected URLClassLoader createClassLoader() throws DependencyResolutionRequiredException, MalformedURLException { + protected void doCompile(File sourceFileForModificationDetection, Collection schemas, File outputDirectory) + throws IOException { + doCompile(sourceFileForModificationDetection, new SpecificCompiler(schemas), outputDirectory); + } + + protected void doCompile(File sourceFileForModificationDetection, Protocol protocol, File outputDirectory) + throws IOException { + doCompile(sourceFileForModificationDetection, new SpecificCompiler(protocol), outputDirectory); + } + + private void doCompile(File sourceFileForModificationDetection, SpecificCompiler compiler, File outputDirectory) + throws IOException { + compiler.setTemplateDir(templateDirectory); + compiler.setStringType(GenericData.StringType.valueOf(stringType)); + compiler.setFieldVisibility(getFieldVisibility()); + compiler.setCreateOptionalGetters(createOptionalGetters); + compiler.setGettersReturnOptional(gettersReturnOptional); + compiler.setOptionalGettersForNullableFieldsOnly(optionalGettersForNullableFieldsOnly); + compiler.setCreateSetters(createSetters); + compiler.setCreateNullSafeAnnotations(createNullSafeAnnotations); + compiler.setEnableDecimalLogicalType(enableDecimalLogicalType); + try { + for (String customConversion : customConversions) { + compiler.addCustomConversion(Thread.currentThread().getContextClassLoader().loadClass(customConversion)); + } + } catch (ClassNotFoundException e) { + throw new IOException(e); + } + compiler.setOutputCharacterEncoding(project.getProperties().getProperty("project.build.sourceEncoding")); + compiler.setAdditionalVelocityTools(instantiateAdditionalVelocityTools()); + compiler.setRecordSpecificClass(this.recordSpecificClass); + compiler.setErrorSpecificClass(this.errorSpecificClass); + compiler.compileToDestination(sourceFileForModificationDetection, outputDirectory); + } + + protected List findClasspath() throws DependencyResolutionRequiredException, MalformedURLException { final List urls = appendElements(project.getRuntimeClasspathElements()); urls.addAll(appendElements(project.getTestClasspathElements())); + return urls; + } + + protected URLClassLoader createClassLoader() throws DependencyResolutionRequiredException, MalformedURLException { + final List urls = findClasspath(); return new URLClassLoader(urls.toArray(new URL[0]), Thread.currentThread().getContextClassLoader()); } - private List appendElements(List runtimeClasspathElements) throws MalformedURLException { + private List appendElements(List runtimeClasspathElements) throws MalformedURLException { if (runtimeClasspathElements == null) { return new ArrayList<>(); } List runtimeUrls = new ArrayList<>(runtimeClasspathElements.size()); - for (Object runtimeClasspathElement : runtimeClasspathElements) { - String element = (String) runtimeClasspathElement; - runtimeUrls.add(new File(element).toURI().toURL()); + for (String runtimeClasspathElement : runtimeClasspathElements) { + runtimeUrls.add(new File(runtimeClasspathElement).toURI().toURL()); } return runtimeUrls; } diff --git a/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLMojo.java b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLMojo.java new file mode 100644 index 00000000000..15f6a6c0c4e --- /dev/null +++ b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLMojo.java @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.avro.mojo; + +import java.io.File; +import java.io.IOException; +import java.net.URL; +import java.net.URLClassLoader; +import java.util.ArrayList; +import java.util.List; + +import org.apache.avro.Protocol; +import org.apache.avro.compiler.specific.SpecificCompiler; +import org.apache.avro.generic.GenericData; + +import org.apache.avro.idl.IdlFile; +import org.apache.avro.idl.IdlReader; +import org.apache.maven.artifact.DependencyResolutionRequiredException; + +/** + * Generate Java classes and interfaces from AvroIDL files (.avdl) + * + * @goal idl + * @requiresDependencyResolution runtime + * @phase generate-sources + * @threadSafe + */ +public class IDLMojo extends AbstractAvroMojo { + /** + * A set of Ant-like inclusion patterns used to select files from the source + * directory for processing. By default, the pattern **/*.avdl + * is used to select IDL files. + * + * @parameter + */ + private String[] includes = new String[] { "**/*.avdl" }; + + /** + * A set of Ant-like inclusion patterns used to select files from the source + * directory for processing. By default, the pattern **/*.avdl + * is used to select IDL files. + * + * @parameter + */ + private String[] testIncludes = new String[] { "**/*.avdl" }; + + @Override + protected void doCompile(String filename, File sourceDirectory, File outputDirectory) throws IOException { + try { + @SuppressWarnings("rawtypes") + List runtimeClasspathElements = project.getRuntimeClasspathElements(); + + List runtimeUrls = new ArrayList<>(); + + // Add the source directory of avro files to the classpath so that + // imports can refer to other idl files as classpath resources + runtimeUrls.add(sourceDirectory.toURI().toURL()); + + // If runtimeClasspathElements is not empty values add its values to Idl path. + if (runtimeClasspathElements != null && !runtimeClasspathElements.isEmpty()) { + for (Object runtimeClasspathElement : runtimeClasspathElements) { + String element = (String) runtimeClasspathElement; + runtimeUrls.add(new File(element).toURI().toURL()); + } + } + + final ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader(); + URLClassLoader projPathLoader = new URLClassLoader(runtimeUrls.toArray(new URL[0]), contextClassLoader); + Thread.currentThread().setContextClassLoader(projPathLoader); + try { + IdlReader parser = new IdlReader(); + IdlFile idlFile = parser.parse(sourceDirectory.toPath().resolve(filename)); + for (String warning : idlFile.getWarnings()) { + getLog().warn(warning); + } + final SpecificCompiler compiler; + final Protocol protocol = idlFile.getProtocol(); + if (protocol != null) { + compiler = new SpecificCompiler(protocol); + } else { + compiler = new SpecificCompiler(idlFile.getNamedSchemas().values()); + } + compiler.setStringType(GenericData.StringType.valueOf(stringType)); + compiler.setTemplateDir(templateDirectory); + compiler.setFieldVisibility(getFieldVisibility()); + compiler.setCreateOptionalGetters(createOptionalGetters); + compiler.setGettersReturnOptional(gettersReturnOptional); + compiler.setOptionalGettersForNullableFieldsOnly(optionalGettersForNullableFieldsOnly); + compiler.setCreateSetters(createSetters); + compiler.setAdditionalVelocityTools(instantiateAdditionalVelocityTools()); + compiler.setEnableDecimalLogicalType(enableDecimalLogicalType); + for (String customConversion : customConversions) { + compiler.addCustomConversion(projPathLoader.loadClass(customConversion)); + } + compiler.setOutputCharacterEncoding(project.getProperties().getProperty("project.build.sourceEncoding")); + compiler.compileToDestination(null, outputDirectory); + } finally { + Thread.currentThread().setContextClassLoader(contextClassLoader); + } + } catch (ClassNotFoundException | DependencyResolutionRequiredException e) { + throw new IOException(e); + } + } + + @Override + protected String[] getIncludes() { + return includes; + } + + @Override + protected String[] getTestIncludes() { + return testIncludes; + } +} diff --git a/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLProtocolMojo.java b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLProtocolMojo.java index e616a249d58..a6dd9cf24ee 100644 --- a/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLProtocolMojo.java +++ b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/IDLProtocolMojo.java @@ -18,21 +18,6 @@ package org.apache.avro.mojo; -import java.io.File; -import java.io.IOException; -import java.net.URL; -import java.net.URLClassLoader; -import java.util.ArrayList; -import java.util.List; - -import org.apache.avro.Protocol; -import org.apache.avro.compiler.idl.Idl; -import org.apache.avro.compiler.idl.ParseException; -import org.apache.avro.compiler.specific.SpecificCompiler; -import org.apache.avro.generic.GenericData; - -import org.apache.maven.artifact.DependencyResolutionRequiredException; - /** * Generate Java classes and interfaces from AvroIDL files (.avdl) * @@ -41,84 +26,6 @@ * @phase generate-sources * @threadSafe */ -public class IDLProtocolMojo extends AbstractAvroMojo { - /** - * A set of Ant-like inclusion patterns used to select files from the source - * directory for processing. By default, the pattern **/*.avdl - * is used to select IDL files. - * - * @parameter - */ - private String[] includes = new String[] { "**/*.avdl" }; - - /** - * A set of Ant-like inclusion patterns used to select files from the source - * directory for processing. By default, the pattern **/*.avdl - * is used to select IDL files. - * - * @parameter - */ - private String[] testIncludes = new String[] { "**/*.avdl" }; - - @Override - protected void doCompile(String filename, File sourceDirectory, File outputDirectory) throws IOException { - try { - @SuppressWarnings("rawtypes") - List runtimeClasspathElements = project.getRuntimeClasspathElements(); - - List runtimeUrls = new ArrayList<>(); - - // Add the source directory of avro files to the classpath so that - // imports can refer to other idl files as classpath resources - runtimeUrls.add(sourceDirectory.toURI().toURL()); - - // If runtimeClasspathElements is not empty values add its values to Idl path. - if (runtimeClasspathElements != null && !runtimeClasspathElements.isEmpty()) { - for (Object runtimeClasspathElement : runtimeClasspathElements) { - String element = (String) runtimeClasspathElement; - runtimeUrls.add(new File(element).toURI().toURL()); - } - } - - URLClassLoader projPathLoader = new URLClassLoader(runtimeUrls.toArray(new URL[0]), - Thread.currentThread().getContextClassLoader()); - try (Idl parser = new Idl(new File(sourceDirectory, filename), projPathLoader)) { - - Protocol p = parser.CompilationUnit(); - for (String warning : parser.getWarningsAfterParsing()) { - getLog().warn(warning); - } - String json = p.toString(true); - Protocol protocol = Protocol.parse(json); - final SpecificCompiler compiler = new SpecificCompiler(protocol); - compiler.setStringType(GenericData.StringType.valueOf(stringType)); - compiler.setTemplateDir(templateDirectory); - compiler.setFieldVisibility(getFieldVisibility()); - compiler.setCreateOptionalGetters(createOptionalGetters); - compiler.setGettersReturnOptional(gettersReturnOptional); - compiler.setOptionalGettersForNullableFieldsOnly(optionalGettersForNullableFieldsOnly); - compiler.setCreateSetters(createSetters); - compiler.setCreateNullSafeAnnotations(createNullSafeAnnotations); - compiler.setAdditionalVelocityTools(instantiateAdditionalVelocityTools()); - compiler.setEnableDecimalLogicalType(enableDecimalLogicalType); - for (String customConversion : customConversions) { - compiler.addCustomConversion(projPathLoader.loadClass(customConversion)); - } - compiler.setOutputCharacterEncoding(project.getProperties().getProperty("project.build.sourceEncoding")); - compiler.compileToDestination(null, outputDirectory); - } - } catch (ParseException | ClassNotFoundException | DependencyResolutionRequiredException e) { - throw new IOException(e); - } - } - - @Override - protected String[] getIncludes() { - return includes; - } - - @Override - protected String[] getTestIncludes() { - return testIncludes; - } +public class IDLProtocolMojo extends IDLMojo { + // Empty; kept for backwards compatibility. } diff --git a/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/ProtocolMojo.java b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/ProtocolMojo.java index 6b7f45d9e95..ee7e4101c5d 100644 --- a/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/ProtocolMojo.java +++ b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/ProtocolMojo.java @@ -18,15 +18,10 @@ package org.apache.avro.mojo; -import org.apache.avro.generic.GenericData.StringType; +import org.apache.avro.Protocol; import java.io.File; import java.io.IOException; -import java.net.URLClassLoader; - -import org.apache.avro.Protocol; -import org.apache.avro.compiler.specific.SpecificCompiler; -import org.apache.maven.artifact.DependencyResolutionRequiredException; /** * Generate Java classes and interfaces from Avro protocol files (.avpr) @@ -59,28 +54,7 @@ public class ProtocolMojo extends AbstractAvroMojo { protected void doCompile(String filename, File sourceDirectory, File outputDirectory) throws IOException { final File src = new File(sourceDirectory, filename); final Protocol protocol = Protocol.parse(src); - final SpecificCompiler compiler = new SpecificCompiler(protocol); - compiler.setTemplateDir(templateDirectory); - compiler.setStringType(StringType.valueOf(stringType)); - compiler.setFieldVisibility(getFieldVisibility()); - compiler.setCreateOptionalGetters(createOptionalGetters); - compiler.setGettersReturnOptional(gettersReturnOptional); - compiler.setOptionalGettersForNullableFieldsOnly(optionalGettersForNullableFieldsOnly); - compiler.setCreateSetters(createSetters); - compiler.setCreateNullSafeAnnotations(createNullSafeAnnotations); - compiler.setAdditionalVelocityTools(instantiateAdditionalVelocityTools()); - compiler.setEnableDecimalLogicalType(enableDecimalLogicalType); - final URLClassLoader classLoader; - try { - classLoader = createClassLoader(); - for (String customConversion : customConversions) { - compiler.addCustomConversion(classLoader.loadClass(customConversion)); - } - } catch (DependencyResolutionRequiredException | ClassNotFoundException e) { - throw new IOException(e); - } - compiler.setOutputCharacterEncoding(project.getProperties().getProperty("project.build.sourceEncoding")); - compiler.compileToDestination(src, outputDirectory); + doCompile(src, protocol, outputDirectory); } @Override diff --git a/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/SchemaMojo.java b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/SchemaMojo.java index 4d1b15870ed..36a4fc4a53c 100644 --- a/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/SchemaMojo.java +++ b/lang/java/maven-plugin/src/main/java/org/apache/avro/mojo/SchemaMojo.java @@ -18,15 +18,16 @@ package org.apache.avro.mojo; -import org.apache.avro.generic.GenericData.StringType; +import org.apache.avro.Schema; +import org.apache.avro.SchemaParseException; +import org.apache.maven.plugin.MojoExecutionException; import java.io.File; import java.io.IOException; -import java.net.URLClassLoader; - -import org.apache.avro.Schema; -import org.apache.avro.compiler.specific.SpecificCompiler; -import org.apache.maven.artifact.DependencyResolutionRequiredException; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; /** * Generate Java classes from Avro schema files (.avsc) @@ -62,41 +63,29 @@ public class SchemaMojo extends AbstractAvroMojo { private String[] testIncludes = new String[] { "**/*.avsc" }; @Override - protected void doCompile(String filename, File sourceDirectory, File outputDirectory) throws IOException { - File src = new File(sourceDirectory, filename); - final Schema schema; + protected void doCompile(String[] fileNames, File sourceDirectory, File outputDirectory) + throws MojoExecutionException { + final List sourceFiles = Arrays.stream(fileNames) + .map((String filename) -> new File(sourceDirectory, filename)).collect(Collectors.toList()); + final File sourceFileForModificationDetection = sourceFiles.stream().filter(file -> file.lastModified() > 0) + .max(Comparator.comparing(File::lastModified)).orElse(null); + final List schemas; - // This is necessary to maintain backward-compatibility. If there are - // no imported files then isolate the schemas from each other, otherwise - // allow them to share a single schema so reuse and sharing of schema - // is possible. - if (imports == null) { - schema = new Schema.Parser().parse(src); - } else { - schema = schemaParser.parse(src); - } - - final SpecificCompiler compiler = new SpecificCompiler(schema); - compiler.setTemplateDir(templateDirectory); - compiler.setStringType(StringType.valueOf(stringType)); - compiler.setFieldVisibility(getFieldVisibility()); - compiler.setCreateOptionalGetters(createOptionalGetters); - compiler.setGettersReturnOptional(gettersReturnOptional); - compiler.setOptionalGettersForNullableFieldsOnly(optionalGettersForNullableFieldsOnly); - compiler.setCreateSetters(createSetters); - compiler.setCreateNullSafeAnnotations(createNullSafeAnnotations); - compiler.setEnableDecimalLogicalType(enableDecimalLogicalType); try { - final URLClassLoader classLoader = createClassLoader(); - for (String customConversion : customConversions) { - compiler.addCustomConversion(classLoader.loadClass(customConversion)); + // This is necessary to maintain backward-compatibility. If there are + // no imported files then isolate the schemas from each other, otherwise + // allow them to share a single schema so reuse and sharing of schema + // is possible. + if (imports == null) { + schemas = new Schema.Parser().parse(sourceFiles); + } else { + schemas = schemaParser.parse(sourceFiles); } - } catch (ClassNotFoundException | DependencyResolutionRequiredException e) { - throw new IOException(e); + + doCompile(sourceFileForModificationDetection, schemas, outputDirectory); + } catch (IOException | SchemaParseException ex) { + throw new MojoExecutionException("Error compiling a file in " + sourceDirectory + " to " + outputDirectory, ex); } - compiler.setOutputCharacterEncoding(project.getProperties().getProperty("project.build.sourceEncoding")); - compiler.setAdditionalVelocityTools(instantiateAdditionalVelocityTools()); - compiler.compileToDestination(src, outputDirectory); } @Override diff --git a/lang/java/maven-plugin/src/test/avro/AvdlClasspathImport.avdl b/lang/java/maven-plugin/src/test/avro/AvdlClasspathImport.avdl index fd799d3dda5..81bdb609445 100644 --- a/lang/java/maven-plugin/src/test/avro/AvdlClasspathImport.avdl +++ b/lang/java/maven-plugin/src/test/avro/AvdlClasspathImport.avdl @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -15,14 +15,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -@namespace("test") -protocol IdlClasspathImportTest { - import idl "avro/User.avdl"; +namespace test; - /** Ignored Doc Comment */ - /** IDL User */ - record IdlUserWrapper { - union { null, test.IdlUser } wrapped; - } +import idl "avro/User.avdl"; +/** Ignored Doc Comment */ +/** IDL User */ +record IdlUserWrapper { + union { null, test.IdlUser } wrapped; } diff --git a/lang/java/maven-plugin/src/test/avro/extends/Custom.avsc b/lang/java/maven-plugin/src/test/avro/extends/Custom.avsc new file mode 100644 index 00000000000..63056e5d17f --- /dev/null +++ b/lang/java/maven-plugin/src/test/avro/extends/Custom.avsc @@ -0,0 +1,18 @@ +{ + "type": "record", + "namespace": "test", + "name": "SchemaCustom", + "doc": "Custom Test Bean", + "fields": [ + { + "name": "id", + "type": ["null", "string"], + "default": null + }, + { + "name": "createdOn", + "type": ["null", "long"], + "default": null + } + ] +} diff --git a/lang/java/maven-plugin/src/test/avro/multipleSchemas/ApplicationEvent.avsc b/lang/java/maven-plugin/src/test/avro/multipleSchemas/ApplicationEvent.avsc new file mode 100644 index 00000000000..6902084350f --- /dev/null +++ b/lang/java/maven-plugin/src/test/avro/multipleSchemas/ApplicationEvent.avsc @@ -0,0 +1,28 @@ +{ + "namespace": "model", + "type": "record", + "doc": "", + "name": "ApplicationEvent", + "fields": [ + { + "name": "applicationId", + "type": "string", + "doc": "Application ID" + }, + { + "name": "status", + "type": "string", + "doc": "Application Status" + }, + { + "name": "documents", + "type": ["null", { + "type": "array", + "items": "model.DocumentInfo" + }], + "doc": "", + "default": null + } + ] + +} diff --git a/lang/java/maven-plugin/src/test/avro/multipleSchemas/DocumentInfo.avsc b/lang/java/maven-plugin/src/test/avro/multipleSchemas/DocumentInfo.avsc new file mode 100644 index 00000000000..95dd4243ea6 --- /dev/null +++ b/lang/java/maven-plugin/src/test/avro/multipleSchemas/DocumentInfo.avsc @@ -0,0 +1,19 @@ +{ + "namespace": "model", + "type": "record", + "doc": "", + "name": "DocumentInfo", + "fields": [ + { + "name": "documentId", + "type": "string", + "doc": "Document ID" + }, + { + "name": "filePath", + "type": "string", + "doc": "Document Path" + } + ] + +} diff --git a/lang/java/maven-plugin/src/test/avro/multipleSchemas/MyResponse.avsc b/lang/java/maven-plugin/src/test/avro/multipleSchemas/MyResponse.avsc new file mode 100644 index 00000000000..ac6d08291d9 --- /dev/null +++ b/lang/java/maven-plugin/src/test/avro/multipleSchemas/MyResponse.avsc @@ -0,0 +1,14 @@ +{ + "namespace": "model", + "type": "record", + "doc": "", + "name": "MyResponse", + "fields": [ + { + "name": "isSuccessful", + "type": "boolean", + "doc": "Indicator for successful or unsuccessful call" + } + ] + +} diff --git a/lang/java/maven-plugin/src/test/avro/multipleSchemas/README.md b/lang/java/maven-plugin/src/test/avro/multipleSchemas/README.md new file mode 100644 index 00000000000..fe3541b660e --- /dev/null +++ b/lang/java/maven-plugin/src/test/avro/multipleSchemas/README.md @@ -0,0 +1,8 @@ +## test for parsing multiple files. +This folder aims to test `public List Schema.parse(Iterable sources) throws IOException` method. + +The objective is to check that a record schema define in a file can be use in another record schema as a field type. +Here, ApplicationEvent.avsc file contains a field of type DocumentInfo, defined in file DocumentInfo.avsc. + +The is written at TestSchema.testParseMultipleFile. + diff --git a/lang/java/maven-plugin/src/test/java/org/apache/avro/custom/CustomRecordBase.java b/lang/java/maven-plugin/src/test/java/org/apache/avro/custom/CustomRecordBase.java new file mode 100644 index 00000000000..acc38381294 --- /dev/null +++ b/lang/java/maven-plugin/src/test/java/org/apache/avro/custom/CustomRecordBase.java @@ -0,0 +1,23 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.avro.custom; + +import org.apache.avro.specific.SpecificRecordBase; + +public abstract class CustomRecordBase extends SpecificRecordBase { +} diff --git a/lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestIDLProtocolMojo.java b/lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestIDLMojo.java similarity index 74% rename from lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestIDLProtocolMojo.java rename to lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestIDLMojo.java index 226ca6de09d..94cc5b29e52 100644 --- a/lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestIDLProtocolMojo.java +++ b/lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestIDLMojo.java @@ -17,19 +17,21 @@ */ package org.apache.avro.mojo; -import org.codehaus.plexus.util.FileUtils; -import org.junit.Test; - import java.io.File; -import java.util.Arrays; +import java.util.Collections; import java.util.Collections; import java.util.HashSet; import java.util.Set; +import org.codehaus.plexus.util.FileUtils; +import org.junit.Test; + +import static java.util.Arrays.asList; + /** * Test the IDL Protocol Mojo. */ -public class TestIDLProtocolMojo extends AbstractAvroMojoTest { +public class TestIDLMojo extends AbstractAvroMojoTest { private File testPom = new File(getBasedir(), "src/test/resources/unit/idl/pom.xml"); private File injectingVelocityToolsTestPom = new File(getBasedir(), @@ -37,7 +39,7 @@ public class TestIDLProtocolMojo extends AbstractAvroMojoTest { @Test public void testIdlProtocolMojo() throws Exception { - final IDLProtocolMojo mojo = (IDLProtocolMojo) lookupMojo("idl-protocol", testPom); + final IDLMojo mojo = (IDLMojo) lookupMojo("idl", testPom); final TestLog log = new TestLog(); mojo.setLog(log); @@ -45,17 +47,15 @@ public void testIdlProtocolMojo() throws Exception { mojo.execute(); final File outputDir = new File(getBasedir(), "target/test-harness/idl/test/"); - final Set generatedFiles = new HashSet<>(Arrays.asList("IdlPrivacy.java", "IdlTest.java", "IdlUser.java", - "IdlUserWrapper.java", "IdlClasspathImportTest.java")); + final Set generatedFiles = new HashSet<>( + asList("IdlPrivacy.java", "IdlTest.java", "IdlUser.java", "IdlUserWrapper.java")); assertFilesExist(outputDir, generatedFiles); final String idlUserContent = FileUtils.fileRead(new File(outputDir, "IdlUser.java")); assertTrue(idlUserContent.contains("java.time.Instant")); - assertEquals(Collections.singletonList( - "[WARN] Found documentation comment at line 23, column 5. Ignoring previous one at line 22, column 5: \"Ignored Doc Comment\"" - + "\nDid you mean to use a multiline comment ( /* ... */ ) instead?"), - log.getLogEntries()); + assertEquals(Collections.singletonList("[WARN] Line 22, char 1: Ignoring out-of-place documentation comment.\n" + + "Did you mean to use a multiline comment ( /* ... */ ) instead?"), log.getLogEntries()); } @Test @@ -68,8 +68,8 @@ public void testSetCompilerVelocityAdditionalTools() throws Exception { mojo.execute(); final File outputDir = new File(getBasedir(), "target/test-harness/idl-inject/test"); - final Set generatedFiles = new HashSet<>(Arrays.asList("IdlPrivacy.java", "IdlTest.java", "IdlUser.java", - "IdlUserWrapper.java", "IdlClasspathImportTest.java")); + final Set generatedFiles = new HashSet<>( + asList("IdlPrivacy.java", "IdlTest.java", "IdlUser.java", "IdlUserWrapper.java")); assertFilesExist(outputDir, generatedFiles); diff --git a/lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestSchemaMojo.java b/lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestSchemaMojo.java index f6bdc7fd06f..f54b9a40403 100644 --- a/lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestSchemaMojo.java +++ b/lang/java/maven-plugin/src/test/java/org/apache/avro/mojo/TestSchemaMojo.java @@ -17,13 +17,18 @@ */ package org.apache.avro.mojo; +import org.apache.maven.plugin.MojoExecutionException; import org.codehaus.plexus.util.FileUtils; import org.junit.Test; +import org.junit.jupiter.api.Assertions; import java.io.File; +import java.nio.file.Files; import java.util.Arrays; import java.util.HashSet; +import java.util.List; import java.util.Set; +import java.util.stream.Collectors; /** * Test the Schema Mojo. @@ -33,6 +38,12 @@ public class TestSchemaMojo extends AbstractAvroMojoTest { private File testPom = new File(getBasedir(), "src/test/resources/unit/schema/pom.xml"); private File injectingVelocityToolsTestPom = new File(getBasedir(), "src/test/resources/unit/schema/pom-injecting-velocity-tools.xml"); + private File testNonexistentFilePom = new File(getBasedir(), + "src/test/resources/unit/schema/pom-nonexistent-file.xml"); + private File testNonexistentSecondFilePom = new File(getBasedir(), + "src/test/resources/unit/schema/pom-nonexistent-second-file.xml"); + + private File testExtendsFilePom = new File(getBasedir(), "src/test/resources/unit/schema/pom-customExtends.xml"); @Test public void testSchemaMojo() throws Exception { @@ -42,8 +53,8 @@ public void testSchemaMojo() throws Exception { mojo.execute(); final File outputDir = new File(getBasedir(), "target/test-harness/schema/test"); - final Set generatedFiles = new HashSet<>( - Arrays.asList("PrivacyDirectImport.java", "PrivacyImport.java", "SchemaPrivacy.java", "SchemaUser.java")); + final Set generatedFiles = new HashSet<>(Arrays.asList("PrivacyDirectImport.java", "PrivacyImport.java", + "SchemaPrivacy.java", "SchemaUser.java", "SchemaCustom.java", "SchemaCustom.java")); assertFilesExist(outputDir, generatedFiles); @@ -59,12 +70,49 @@ public void testSetCompilerVelocityAdditionalTools() throws Exception { mojo.execute(); final File outputDir = new File(getBasedir(), "target/test-harness/schema-inject/test"); - final Set generatedFiles = new HashSet<>( - Arrays.asList("PrivacyDirectImport.java", "PrivacyImport.java", "SchemaPrivacy.java", "SchemaUser.java")); + final Set generatedFiles = new HashSet<>(Arrays.asList("PrivacyDirectImport.java", "PrivacyImport.java", + "SchemaPrivacy.java", "SchemaUser.java", "SchemaCustom.java")); assertFilesExist(outputDir, generatedFiles); final String schemaUserContent = FileUtils.fileRead(new File(outputDir, "SchemaUser.java")); assertTrue("Got " + schemaUserContent + " instead", schemaUserContent.contains("It works!")); } + + @Test + public void testThrowsErrorForNonexistentFile() throws Exception { + try { + final SchemaMojo mojo = (SchemaMojo) lookupMojo("schema", testNonexistentFilePom); + mojo.execute(); + fail("MojoExecutionException not thrown!"); + } catch (MojoExecutionException ignored) { + } + } + + @Test + public void testThrowsErrorForNonexistentSecondFile() throws Exception { + try { + final SchemaMojo mojo = (SchemaMojo) lookupMojo("schema", testNonexistentSecondFilePom); + mojo.execute(); + fail("MojoExecutionException not thrown!"); + } catch (MojoExecutionException ignored) { + } + } + + @Test + public void testExtends() throws Exception { + final SchemaMojo mojo = (SchemaMojo) lookupMojo("schema", testExtendsFilePom); + assertNotNull(mojo); + + mojo.execute(); + final File outputDir = new File(getBasedir(), "target/extends/schema/test"); + File outputFile = new File(outputDir, "SchemaCustom.java"); + assertTrue(outputFile.exists()); + List extendsLines = Files.readAllLines(outputFile.toPath()).stream() + .filter((String line) -> line.contains("class SchemaCustom extends ")).collect(Collectors.toList()); + assertEquals(1, extendsLines.size()); + String extendLine = extendsLines.get(0); + assertTrue(extendLine.contains(" org.apache.avro.custom.CustomRecordBase ")); + assertFalse(extendLine.contains("org.apache.avro.specific.SpecificRecordBase")); + } } diff --git a/lang/java/maven-plugin/src/test/resources/unit/idl/pom-javacc.xml b/lang/java/maven-plugin/src/test/resources/unit/idl/pom-javacc.xml new file mode 100644 index 00000000000..4abd67f7bca --- /dev/null +++ b/lang/java/maven-plugin/src/test/resources/unit/idl/pom-javacc.xml @@ -0,0 +1,68 @@ + + + + 4.0.0 + + + avro-parent + org.apache.avro + 1.11.0-SNAPSHOT + ../../../../../../../../../pom.xml + + + avro-maven-plugin-test + jar + + testproject + + + + + avro-maven-plugin + + + idl + + idl-protocol + + + + + true + ${basedir}/src/test + ${basedir}/target/test-harness/idl + String + + + + + + + + org.apache.avro + avro + ${parent.version} + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + diff --git a/lang/java/maven-plugin/src/test/resources/unit/schema/pom-customExtends.xml b/lang/java/maven-plugin/src/test/resources/unit/schema/pom-customExtends.xml new file mode 100644 index 00000000000..9eea1911be5 --- /dev/null +++ b/lang/java/maven-plugin/src/test/resources/unit/schema/pom-customExtends.xml @@ -0,0 +1,60 @@ + + + + 4.0.0 + + avro-maven-plugin-test + jar + + testproject + + + + + avro-maven-plugin + + + schema + + schema + + + + + ${basedir}/src/test/avro/extends + ${basedir}/target/extends/schema + org.apache.avro.custom.CustomRecordBase + + + + + + + + org.apache.avro + avro + ${parent.version} + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + diff --git a/lang/java/maven-plugin/src/test/resources/unit/schema/pom-multiple-schema.xml b/lang/java/maven-plugin/src/test/resources/unit/schema/pom-multiple-schema.xml new file mode 100644 index 00000000000..10b0b3fae80 --- /dev/null +++ b/lang/java/maven-plugin/src/test/resources/unit/schema/pom-multiple-schema.xml @@ -0,0 +1,66 @@ + + + + 4.0.0 + + + avro-parent + org.apache.avro + 1.12.0-SNAPSHOT + ../../../../../../../../../pom.xml + + + avro-maven-plugin-test + jar + + testproject + + + + + avro-maven-plugin + + + schema + + schema + + + + + ${basedir}/src/test/avro/multipleSchemas + ${basedir}/target/test-harness/schema + + + + + + + + org.apache.avro + avro + ${parent.version} + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + diff --git a/lang/java/maven-plugin/src/test/resources/unit/schema/pom-nonexistent-file.xml b/lang/java/maven-plugin/src/test/resources/unit/schema/pom-nonexistent-file.xml new file mode 100644 index 00000000000..49965752d0d --- /dev/null +++ b/lang/java/maven-plugin/src/test/resources/unit/schema/pom-nonexistent-file.xml @@ -0,0 +1,69 @@ + + + + 4.0.0 + + + avro-parent + org.apache.avro + 1.12.0-SNAPSHOT + ../../../../../../../../../pom.xml + + + avro-maven-plugin-test + jar + + testproject + + + + + avro-maven-plugin + + + schema + + schema + + + + + ${basedir}/src/test/avro + ${basedir}/target/test-harness/schema + + ${basedir}/src/test/avro/nonexistent-dir + + + + + + + + + org.apache.avro + avro + ${parent.version} + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + diff --git a/lang/java/maven-plugin/src/test/resources/unit/schema/pom-nonexistent-second-file.xml b/lang/java/maven-plugin/src/test/resources/unit/schema/pom-nonexistent-second-file.xml new file mode 100644 index 00000000000..f5b7134cd55 --- /dev/null +++ b/lang/java/maven-plugin/src/test/resources/unit/schema/pom-nonexistent-second-file.xml @@ -0,0 +1,70 @@ + + + + 4.0.0 + + + avro-parent + org.apache.avro + 1.12.0-SNAPSHOT + ../../../../../../../../../pom.xml + + + avro-maven-plugin-test + jar + + testproject + + + + + avro-maven-plugin + + + schema + + schema + + + + + ${basedir}/src/test/avro + ${basedir}/target/test-harness/schema + + ${basedir}/src/test/avro/imports + ${basedir}/src/test/avro/nonexistent-dir + + + + + + + + + org.apache.avro + avro + ${parent.version} + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + diff --git a/lang/java/perf/pom.xml b/lang/java/perf/pom.xml index 1aff69ddb61..479f9bd760c 100644 --- a/lang/java/perf/pom.xml +++ b/lang/java/perf/pom.xml @@ -24,7 +24,7 @@ avro-parent org.apache.avro 1.12.0-SNAPSHOT - ../ + ../pom.xml avro-perf @@ -34,7 +34,7 @@ jar - 1.36 + 1.37 ${project.parent.parent.basedir} diff --git a/lang/java/pom.xml b/lang/java/pom.xml index 32faca4e7ef..9f7f241a946 100644 --- a/lang/java/pom.xml +++ b/lang/java/pom.xml @@ -37,42 +37,43 @@ ${project.parent.basedir} - 3.3.4 - 2.14.2 - 4.0.1 - 9.4.51.v20230217 - 5.0.4 - 5.9.2 - 4.1.89.Final - 3.22.0 - 0.16.0 - 1.7.36 - 1.2.24 - 1.1.9.1 - 2.3 - 3.3.9 - 1.10.13 + 1.10.14 1.5.0 1.22 1.10.0 - 1.9 - 4.11.0 + 1.58.0 + 3.3.5 2.2 - 1.53.0 - 1.5.4-2 + 2.15.2 + 9.4.52.v20230823 + 5.0.4 + 5.10.0 + 3.3.9 + 4.11.0 + 4.1.97.Final + 3.24.3 + 1.2.25 + 4.0.1 + 1.7.36 + 1.1.10.3 + 0.16.0 + 1.9 + 2.3 + 1.5.5-5 3.2.1 5.1.8 + 2.7.9 3.1.0 3.1.0 3.0.3 7.0.12 - 2.7.5 android avro + idl compiler maven-plugin ipc @@ -98,7 +99,7 @@ org.codehaus.mojo build-helper-maven-plugin - 3.3.0 + 3.4.0 org.apache.maven.plugins diff --git a/lang/java/protobuf/pom.xml b/lang/java/protobuf/pom.xml index d509955f81a..0a04f8f96c6 100644 --- a/lang/java/protobuf/pom.xml +++ b/lang/java/protobuf/pom.xml @@ -24,7 +24,7 @@ avro-parent org.apache.avro 1.12.0-SNAPSHOT - ../ + ../pom.xml avro-protobuf diff --git a/lang/java/thrift/pom.xml b/lang/java/thrift/pom.xml index 64a9dfa0668..591dd255472 100644 --- a/lang/java/thrift/pom.xml +++ b/lang/java/thrift/pom.xml @@ -24,7 +24,7 @@ avro-parent org.apache.avro 1.12.0-SNAPSHOT - ../ + ../pom.xml avro-thrift diff --git a/lang/java/tools/pom.xml b/lang/java/tools/pom.xml index c64412b9591..e841c8d84fd 100644 --- a/lang/java/tools/pom.xml +++ b/lang/java/tools/pom.xml @@ -24,7 +24,7 @@ avro-parent org.apache.avro 1.12.0-SNAPSHOT - ../ + ../pom.xml avro-tools @@ -198,6 +198,11 @@ tests test + + ${project.groupId} + avro-idl + ${project.version} + ${project.groupId} avro-compiler diff --git a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java index fb5ef7227f4..87ff94aa592 100644 --- a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java +++ b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileReadTool.java @@ -18,6 +18,7 @@ package org.apache.avro.tool; import java.io.BufferedInputStream; +import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; import java.util.ArrayList; @@ -35,9 +36,12 @@ import org.apache.avro.generic.GenericDatumWriter; import org.apache.avro.io.EncoderFactory; import org.apache.avro.io.JsonEncoder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** Reads a data file and dumps to JSON */ public class DataFileReadTool implements Tool { + private static final Logger LOG = LoggerFactory.getLogger(DataFileReadTool.class); private static final long DEFAULT_HEAD_COUNT = 10; @Override @@ -62,18 +66,13 @@ public int run(InputStream stdin, PrintStream out, PrintStream err, List .ofType(String.class); OptionSet optionSet = optionParser.parse(args.toArray(new String[0])); - Boolean pretty = optionSet.has(prettyOption); + boolean pretty = optionSet.has(prettyOption); List nargs = new ArrayList<>((List) optionSet.nonOptionArguments()); String readerSchemaStr = readerSchemaOption.value(optionSet); String readerSchemaFile = readerSchemaFileOption.value(optionSet); - Schema readerSchema = null; - if (readerSchemaFile != null) { - readerSchema = Util.parseSchemaFromFS(readerSchemaFile); - } else if (readerSchemaStr != null) { - readerSchema = new Schema.Parser().parse(readerSchemaStr); - } + Schema readerSchema = getSchema(readerSchemaStr, readerSchemaFile); long headCount = getHeadCount(optionSet, headOption, nargs); @@ -92,7 +91,7 @@ public int run(InputStream stdin, PrintStream out, PrintStream err, List } try (DataFileStream streamReader = new DataFileStream<>(inStream, reader)) { Schema schema = readerSchema != null ? readerSchema : streamReader.getSchema(); - DatumWriter writer = new GenericDatumWriter<>(schema); + DatumWriter writer = new GenericDatumWriter<>(schema); JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, out, pretty); for (long recordCount = 0; streamReader.hasNext() && recordCount < headCount; recordCount++) { Object datum = streamReader.next(); @@ -105,6 +104,18 @@ public int run(InputStream stdin, PrintStream out, PrintStream err, List return 0; } + static Schema getSchema(String schemaStr, String schemaFile) throws IOException { + Schema readerSchema = null; + if (schemaFile != null) { + LOG.info("Reading schema from file '{}'", schemaFile); + readerSchema = Util.parseSchemaFromFS(schemaFile); + } else if (schemaStr != null) { + LOG.info("Reading schema from string '{}'", schemaStr); + readerSchema = new Schema.Parser().parse(schemaStr); + } + return readerSchema; + } + private static long getHeadCount(OptionSet optionSet, OptionSpec headOption, List nargs) { long headCount = Long.MAX_VALUE; if (optionSet.has(headOption)) { diff --git a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileWriteTool.java b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileWriteTool.java index b763a00bee4..4bf2f79a0b2 100644 --- a/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileWriteTool.java +++ b/lang/java/tools/src/main/java/org/apache/avro/tool/DataFileWriteTool.java @@ -72,7 +72,7 @@ public int run(InputStream stdin, PrintStream out, PrintStream err, List p.printHelpOn(err); return 1; } - Schema schema = (schemafile != null) ? Util.parseSchemaFromFS(schemafile) : new Schema.Parser().parse(schemastr); + Schema schema = DataFileReadTool.getSchema(schemastr, schemafile); DatumReader reader = new GenericDatumReader<>(schema); diff --git a/lang/java/tools/src/main/java/org/apache/avro/tool/IdlToSchemataTool.java b/lang/java/tools/src/main/java/org/apache/avro/tool/IdlToSchemataTool.java index df43d6d2ac7..6e0901efed1 100644 --- a/lang/java/tools/src/main/java/org/apache/avro/tool/IdlToSchemataTool.java +++ b/lang/java/tools/src/main/java/org/apache/avro/tool/IdlToSchemataTool.java @@ -21,6 +21,8 @@ import org.apache.avro.Protocol; import org.apache.avro.Schema; import org.apache.avro.compiler.idl.Idl; +import org.apache.avro.idl.IdlFile; +import org.apache.avro.idl.IdlReader; import java.io.File; import java.io.FileNotFoundException; @@ -36,26 +38,40 @@ public class IdlToSchemataTool implements Tool { @Override public int run(InputStream in, PrintStream out, PrintStream err, List args) throws Exception { - if (args.isEmpty() || args.size() > 2 || isRequestingHelp(args)) { - err.println("Usage: idl2schemata [idl] [outdir]"); - err.println(""); + boolean useJavaCC = "--useJavaCC".equals(getArg(args, 0, null)); + + if (args.isEmpty() || args.size() > (useJavaCC ? 3 : 2) || isRequestingHelp(args)) { + err.println("Usage: idl2schemata [--useJavaCC] [idl [outdir]]"); + err.println(); err.println("If an output directory is not specified, " + "outputs to current directory."); return -1; } - boolean pretty = true; - Idl parser = new Idl(new File(args.get(0))); - File outputDirectory = getOutputDirectory(args); + String inputName = getArg(args, useJavaCC ? 1 : 0, "-"); + File inputFile = "-".equals(inputName) ? null : new File(inputName); + File outputDirectory = getOutputDirectory(getArg(args, useJavaCC ? 2 : 1, "")); - final Protocol protocol = parser.CompilationUnit(); - final List warnings = parser.getWarningsAfterParsing(); - for (String warning : warnings) { - err.println("Warning: " + warning); - } - for (Schema schema : protocol.getTypes()) { - print(schema, outputDirectory, pretty); + if (useJavaCC) { + try (Idl parser = new Idl(inputFile)) { + final Protocol protocol = parser.CompilationUnit(); + final List warnings = parser.getWarningsAfterParsing(); + for (String warning : warnings) { + err.println("Warning: " + warning); + } + for (Schema schema : protocol.getTypes()) { + print(schema, outputDirectory); + } + } + } else { + IdlReader parser = new IdlReader(); + IdlFile idlFile = inputFile == null ? parser.parse(in) : parser.parse(inputFile.toPath()); + for (String warning : idlFile.getWarnings()) { + err.println("Warning: " + warning); + } + for (Schema schema : idlFile.getNamedSchemas().values()) { + print(schema, outputDirectory); + } } - parser.close(); return 0; } @@ -64,19 +80,26 @@ private boolean isRequestingHelp(List args) { return args.size() == 1 && (args.get(0).equals("--help") || args.get(0).equals("-help")); } - private File getOutputDirectory(List args) { - String dirname = (args.size() == 2) ? args.get(1) : ""; + private String getArg(List args, int index, String defaultValue) { + if (index < args.size()) { + return args.get(index); + } else { + return defaultValue; + } + } + + private File getOutputDirectory(String dirname) { File outputDirectory = new File(dirname); outputDirectory.mkdirs(); return outputDirectory; } - private void print(Schema schema, File outputDirectory, boolean pretty) throws FileNotFoundException { + private void print(Schema schema, File outputDirectory) throws FileNotFoundException { String dirpath = outputDirectory.getAbsolutePath(); String filename = dirpath + "/" + schema.getName() + ".avsc"; FileOutputStream fileOutputStream = new FileOutputStream(filename); PrintStream printStream = new PrintStream(fileOutputStream); - printStream.println(schema.toString(pretty)); + printStream.println(schema.toString(true)); printStream.close(); } diff --git a/lang/java/tools/src/main/java/org/apache/avro/tool/IdlTool.java b/lang/java/tools/src/main/java/org/apache/avro/tool/IdlTool.java index b32b0db76a8..d20226b8e77 100644 --- a/lang/java/tools/src/main/java/org/apache/avro/tool/IdlTool.java +++ b/lang/java/tools/src/main/java/org/apache/avro/tool/IdlTool.java @@ -19,7 +19,10 @@ package org.apache.avro.tool; import org.apache.avro.Protocol; +import org.apache.avro.Schema; import org.apache.avro.compiler.idl.Idl; +import org.apache.avro.idl.IdlFile; +import org.apache.avro.idl.IdlReader; import java.io.File; import java.io.FileOutputStream; @@ -34,37 +37,54 @@ public class IdlTool implements Tool { @Override public int run(InputStream in, PrintStream out, PrintStream err, List args) throws Exception { - PrintStream parseOut = out; - - if (args.size() > 2 || (args.size() == 1 && (args.get(0).equals("--help") || args.get(0).equals("-help")))) { - err.println("Usage: idl [in] [out]"); + boolean useJavaCC = "--useJavaCC".equals(getArg(args, 0, null)); + if (args.size() > (useJavaCC ? 3 : 2) + || (args.size() == 1 && (args.get(0).equals("--help") || args.get(0).equals("-help")))) { + err.println("Usage: idl [--useJavaCC] [in [out]]"); err.println(); err.println("If an output path is not specified, outputs to stdout."); err.println("If no input or output is specified, takes input from"); - err.println("stdin and outputs to stdin."); + err.println("stdin and outputs to stdout."); err.println("The special path \"-\" may also be specified to refer to"); err.println("stdin and stdout."); return -1; } - Idl parser; - if (args.size() >= 1 && !"-".equals(args.get(0))) { - parser = new Idl(new File(args.get(0))); + String inputName = getArg(args, useJavaCC ? 1 : 0, "-"); + File inputFile = "-".equals(inputName) ? null : new File(inputName); + String outputName = getArg(args, useJavaCC ? 2 : 1, "-"); + File outputFile = "-".equals(outputName) ? null : new File(outputName); + + Schema m = null; + Protocol p = null; + if (useJavaCC) { + try (Idl parser = new Idl(inputFile)) { + p = parser.CompilationUnit(); + for (String warning : parser.getWarningsAfterParsing()) { + err.println("Warning: " + warning); + } + } } else { - parser = new Idl(in); + IdlReader parser = new IdlReader(); + IdlFile idlFile = inputFile == null ? parser.parse(in) : parser.parse(inputFile.toPath()); + for (String warning : idlFile.getWarnings()) { + err.println("Warning: " + warning); + } + p = idlFile.getProtocol(); + m = idlFile.getMainSchema(); } - if (args.size() == 2 && !"-".equals(args.get(1))) { - parseOut = new PrintStream(new FileOutputStream(args.get(1))); + PrintStream parseOut = out; + if (outputFile != null) { + parseOut = new PrintStream(new FileOutputStream(outputFile)); } - Protocol p = parser.CompilationUnit(); - final List warnings = parser.getWarningsAfterParsing(); - for (String warning : warnings) { - err.println("Warning: " + warning); + if (m == null && p == null) { + err.println("Error: the IDL file does not contain a schema nor a protocol."); + return 1; } try { - parseOut.print(p.toString(true)); + parseOut.print(m == null ? p.toString(true) : m.toString(true)); } finally { if (parseOut != out) // Close only the newly created FileOutputStream parseOut.close(); @@ -72,6 +92,14 @@ public int run(InputStream in, PrintStream out, PrintStream err, List ar return 0; } + private String getArg(List args, int index, String defaultValue) { + if (index < args.size()) { + return args.get(index); + } else { + return defaultValue; + } + } + @Override public String getName() { return "idl"; @@ -79,6 +107,6 @@ public String getName() { @Override public String getShortDescription() { - return "Generates a JSON schema from an Avro IDL file"; + return "Generates a JSON schema or protocol from an Avro IDL file"; } } diff --git a/lang/java/tools/src/test/compiler/input/optionalgettersnullablefieldstest.avsc b/lang/java/tools/src/test/compiler/input/optionalgettersnullablefieldstest.avsc index 80ab1c67f98..65e21fe805e 100644 --- a/lang/java/tools/src/test/compiler/input/optionalgettersnullablefieldstest.avsc +++ b/lang/java/tools/src/test/compiler/input/optionalgettersnullablefieldstest.avsc @@ -3,6 +3,7 @@ {"name": "name", "type": "string"}, {"name": "nullable_name", "type": ["string", "null"]}, {"name": "favorite_number", "type": ["int"]}, - {"name": "nullable_favorite_number", "type": ["int", "null"]} + {"name": "nullable_favorite_number", "type": ["int", "null"]}, + {"name": "nullable_array", "type": [{ "type": "array", "items": "string" }, "null"]} ] } diff --git a/lang/java/tools/src/test/compiler/output/OptionalGettersNullableFieldsTest.java b/lang/java/tools/src/test/compiler/output/OptionalGettersNullableFieldsTest.java index a09d1080c9a..9bf446abc1e 100644 --- a/lang/java/tools/src/test/compiler/output/OptionalGettersNullableFieldsTest.java +++ b/lang/java/tools/src/test/compiler/output/OptionalGettersNullableFieldsTest.java @@ -15,8 +15,10 @@ /** Test that optional getters are created only for nullable fields */ @org.apache.avro.specific.AvroGenerated public class OptionalGettersNullableFieldsTest extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord { - private static final long serialVersionUID = 7830366875847294825L; - public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"OptionalGettersNullableFieldsTest\",\"namespace\":\"avro.examples.baseball\",\"doc\":\"Test that optional getters are created only for nullable fields\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"nullable_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"favorite_number\",\"type\":[\"int\"]},{\"name\":\"nullable_favorite_number\",\"type\":[\"int\",\"null\"]}]}"); + private static final long serialVersionUID = -6919829133416680993L; + + + public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"OptionalGettersNullableFieldsTest\",\"namespace\":\"avro.examples.baseball\",\"doc\":\"Test that optional getters are created only for nullable fields\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"nullable_name\",\"type\":[\"string\",\"null\"]},{\"name\":\"favorite_number\",\"type\":[\"int\"]},{\"name\":\"nullable_favorite_number\",\"type\":[\"int\",\"null\"]},{\"name\":\"nullable_array\",\"type\":[{\"type\":\"array\",\"items\":\"string\"},\"null\"]}]}"); public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; } private static final SpecificData MODEL$ = new SpecificData(); @@ -76,6 +78,7 @@ public static OptionalGettersNullableFieldsTest fromByteBuffer( private java.lang.CharSequence nullable_name; private java.lang.Object favorite_number; private java.lang.Integer nullable_favorite_number; + private java.util.List nullable_array; /** * Default constructor. Note that this does not initialize fields @@ -90,12 +93,14 @@ public OptionalGettersNullableFieldsTest() {} * @param nullable_name The new value for nullable_name * @param favorite_number The new value for favorite_number * @param nullable_favorite_number The new value for nullable_favorite_number + * @param nullable_array The new value for nullable_array */ - public OptionalGettersNullableFieldsTest(java.lang.CharSequence name, java.lang.CharSequence nullable_name, java.lang.Object favorite_number, java.lang.Integer nullable_favorite_number) { + public OptionalGettersNullableFieldsTest(java.lang.CharSequence name, java.lang.CharSequence nullable_name, java.lang.Object favorite_number, java.lang.Integer nullable_favorite_number, java.util.List nullable_array) { this.name = name; this.nullable_name = nullable_name; this.favorite_number = favorite_number; this.nullable_favorite_number = nullable_favorite_number; + this.nullable_array = nullable_array; } @Override @@ -112,6 +117,7 @@ public java.lang.Object get(int field$) { case 1: return nullable_name; case 2: return favorite_number; case 3: return nullable_favorite_number; + case 4: return nullable_array; default: throw new IndexOutOfBoundsException("Invalid index: " + field$); } } @@ -125,6 +131,7 @@ public void put(int field$, java.lang.Object value$) { case 1: nullable_name = (java.lang.CharSequence)value$; break; case 2: favorite_number = value$; break; case 3: nullable_favorite_number = (java.lang.Integer)value$; break; + case 4: nullable_array = (java.util.List)value$; break; default: throw new IndexOutOfBoundsException("Invalid index: " + field$); } } @@ -197,6 +204,23 @@ public void setNullableFavoriteNumber(java.lang.Integer value) { this.nullable_favorite_number = value; } + /** + * Gets the value of the 'nullable_array' field as an Optional<java.util.List<java.lang.CharSequence>>. + * @return The value wrapped in an Optional<java.util.List<java.lang.CharSequence>>. + */ + public Optional> getNullableArray() { + return Optional.>ofNullable(nullable_array); + } + + + /** + * Sets the value of the 'nullable_array' field. + * @param value the value to set. + */ + public void setNullableArray(java.util.List value) { + this.nullable_array = value; + } + /** * Creates a new OptionalGettersNullableFieldsTest RecordBuilder. * @return A new OptionalGettersNullableFieldsTest RecordBuilder @@ -242,6 +266,7 @@ public static class Builder extends org.apache.avro.specific.SpecificRecordBuild private java.lang.CharSequence nullable_name; private java.lang.Object favorite_number; private java.lang.Integer nullable_favorite_number; + private java.util.List nullable_array; /** Creates a new Builder */ private Builder() { @@ -270,6 +295,10 @@ private Builder(avro.examples.baseball.OptionalGettersNullableFieldsTest.Builder this.nullable_favorite_number = data().deepCopy(fields()[3].schema(), other.nullable_favorite_number); fieldSetFlags()[3] = other.fieldSetFlags()[3]; } + if (isValidValue(fields()[4], other.nullable_array)) { + this.nullable_array = data().deepCopy(fields()[4].schema(), other.nullable_array); + fieldSetFlags()[4] = other.fieldSetFlags()[4]; + } } /** @@ -294,6 +323,10 @@ private Builder(avro.examples.baseball.OptionalGettersNullableFieldsTest other) this.nullable_favorite_number = data().deepCopy(fields()[3].schema(), other.nullable_favorite_number); fieldSetFlags()[3] = true; } + if (isValidValue(fields()[4], other.nullable_array)) { + this.nullable_array = data().deepCopy(fields()[4].schema(), other.nullable_array); + fieldSetFlags()[4] = true; + } } /** @@ -456,6 +489,46 @@ public avro.examples.baseball.OptionalGettersNullableFieldsTest.Builder clearNul return this; } + /** + * Gets the value of the 'nullable_array' field. + * @return The value. + */ + public java.util.List getNullableArray() { + return nullable_array; + } + + + /** + * Sets the value of the 'nullable_array' field. + * @param value The value of 'nullable_array'. + * @return This builder. + */ + public avro.examples.baseball.OptionalGettersNullableFieldsTest.Builder setNullableArray(java.util.List value) { + validate(fields()[4], value); + this.nullable_array = value; + fieldSetFlags()[4] = true; + return this; + } + + /** + * Checks whether the 'nullable_array' field has been set. + * @return True if the 'nullable_array' field has been set, false otherwise. + */ + public boolean hasNullableArray() { + return fieldSetFlags()[4]; + } + + + /** + * Clears the value of the 'nullable_array' field. + * @return This builder. + */ + public avro.examples.baseball.OptionalGettersNullableFieldsTest.Builder clearNullableArray() { + nullable_array = null; + fieldSetFlags()[4] = false; + return this; + } + @Override @SuppressWarnings("unchecked") public OptionalGettersNullableFieldsTest build() { @@ -465,6 +538,7 @@ public OptionalGettersNullableFieldsTest build() { record.nullable_name = fieldSetFlags()[1] ? this.nullable_name : (java.lang.CharSequence) defaultValue(fields()[1]); record.favorite_number = fieldSetFlags()[2] ? this.favorite_number : defaultValue(fields()[2]); record.nullable_favorite_number = fieldSetFlags()[3] ? this.nullable_favorite_number : (java.lang.Integer) defaultValue(fields()[3]); + record.nullable_array = fieldSetFlags()[4] ? this.nullable_array : (java.util.List) defaultValue(fields()[4]); return record; } catch (org.apache.avro.AvroMissingFieldException e) { throw e; diff --git a/lang/java/tools/src/test/idl/schema.avdl b/lang/java/tools/src/test/idl/schema.avdl new file mode 100644 index 00000000000..312bd5d9ac5 --- /dev/null +++ b/lang/java/tools/src/test/idl/schema.avdl @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +namespace org.apache.avro.test; +schema TestRecord; + +record TestRecord { + string @order("ignore") name; + Kind @order("descending") kind; + MD5 hash; + MD5? @aliases(["hash"]) nullableHash; + array arrayOfLongs; +} + +@aliases(["org.foo.KindOf"]) +enum Kind { + FOO, + BAR, // the bar enum value + BAZ +} + +fixed MD5(16); diff --git a/lang/java/tools/src/test/idl/schema.avsc b/lang/java/tools/src/test/idl/schema.avsc new file mode 100644 index 00000000000..aa34f5b694b --- /dev/null +++ b/lang/java/tools/src/test/idl/schema.avsc @@ -0,0 +1,36 @@ +{ + "type" : "record", + "name" : "TestRecord", + "namespace" : "org.apache.avro.test", + "fields" : [ { + "name" : "name", + "type" : "string", + "order" : "ignore" + }, { + "name" : "kind", + "type" : { + "type" : "enum", + "name" : "Kind", + "symbols" : [ "FOO", "BAR", "BAZ" ], + "aliases" : [ "org.foo.KindOf" ] + }, + "order" : "descending" + }, { + "name" : "hash", + "type" : { + "type" : "fixed", + "name" : "MD5", + "size" : 16 + } + }, { + "name" : "nullableHash", + "type" : [ "null", "MD5" ], + "aliases" : [ "hash" ] + }, { + "name" : "arrayOfLongs", + "type" : { + "type" : "array", + "items" : "long" + } + } ] +} diff --git a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileRepairTool.java b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileRepairTool.java index b73fc964b2f..0e24b8915f0 100644 --- a/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileRepairTool.java +++ b/lang/java/tools/src/test/java/org/apache/avro/tool/TestDataFileRepairTool.java @@ -180,7 +180,7 @@ void repairAfterCorruptRecord() throws Exception { } private void checkFileContains(File repairedFile, String... lines) throws IOException { - DataFileReader r = new DataFileReader<>(repairedFile, new GenericDatumReader<>(SCHEMA)); + DataFileReader r = new DataFileReader<>(repairedFile, new GenericDatumReader<>(SCHEMA)); for (String line : lines) { assertEquals(line, r.next().toString()); } diff --git a/lang/java/tools/src/test/java/org/apache/avro/tool/TestIdlToSchemataTool.java b/lang/java/tools/src/test/java/org/apache/avro/tool/TestIdlToSchemataTool.java index feb5e931a9b..cc74ba1684a 100644 --- a/lang/java/tools/src/test/java/org/apache/avro/tool/TestIdlToSchemataTool.java +++ b/lang/java/tools/src/test/java/org/apache/avro/tool/TestIdlToSchemataTool.java @@ -46,22 +46,39 @@ void splitIdlIntoSchemata() throws Exception { String[] files = new File(outdir).list(); assertEquals(4, files.length); + String warnings = readPrintStreamBuffer(buffer); + assertEquals("Warning: Line 1, char 1: Ignoring out-of-place documentation comment." + + "\nDid you mean to use a multiline comment ( /* ... */ ) instead?", warnings); + } + + @Test + public void testSplitIdlIntoSchemataUsingJavaCC() throws Exception { + String idl = "src/test/idl/protocol.avdl"; + String outdir = "target/test-split"; + + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + List arglist = Arrays.asList("--useJavaCC", idl, outdir); + new IdlToSchemataTool().run(null, null, new PrintStream(buffer), arglist); + + String[] files = new File(outdir).list(); + assertEquals(4, files.length); + String warnings = readPrintStreamBuffer(buffer); assertEquals( - "Warning: Found documentation comment at line 19, column 1. Ignoring previous one at line 1, column 1: \"" - + "Licensed to the Apache Software Foundation (ASF) under one\n" + "Warning: Found documentation comment at line 19, column 1. Ignoring previous one at line 1, column 1: " + + "\"Licensed to the Apache Software Foundation (ASF) under one\n" + "or more contributor license agreements. See the NOTICE file\n" + "distributed with this work for additional information\n" + "regarding copyright ownership. The ASF licenses this file\n" + "to you under the Apache License, Version 2.0 (the\n" + "\"License\"); you may not use this file except in compliance\n" - + "with the License. You may obtain a copy of the License at\n\n" - + " https://www.apache.org/licenses/LICENSE-2.0\n\n" + + "with the License. You may obtain a copy of the License at\n" + + "\n https://www.apache.org/licenses/LICENSE-2.0\n\n" + "Unless required by applicable law or agreed to in writing, software\n" + "distributed under the License is distributed on an \"AS IS\" BASIS,\n" + "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" - + "See the License for the specific language governing permissions and\n" + "limitations under the License." - + "\"\nDid you mean to use a multiline comment ( /* ... */ ) instead?", + + "See the License for the specific language governing permissions and\n" + + "limitations under the License.\"\nDid you mean to use a multiline comment ( /* ... */ ) instead?", warnings); } diff --git a/lang/java/tools/src/test/java/org/apache/avro/tool/TestIdlTool.java b/lang/java/tools/src/test/java/org/apache/avro/tool/TestIdlTool.java index 374a3d60562..136344bc15a 100644 --- a/lang/java/tools/src/test/java/org/apache/avro/tool/TestIdlTool.java +++ b/lang/java/tools/src/test/java/org/apache/avro/tool/TestIdlTool.java @@ -34,6 +34,22 @@ import java.util.stream.Collectors; public class TestIdlTool { + @Test + public void testWriteIdlAsSchema() throws Exception { + String idl = "src/test/idl/schema.avdl"; + String protocol = "src/test/idl/schema.avsc"; + String outfile = "target/test-schema.avsc"; + + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + List arglist = Arrays.asList(idl, outfile); + new IdlTool().run(null, null, new PrintStream(buffer), arglist); + + assertEquals(readFileAsString(protocol), readFileAsString(outfile)); + + String warnings = readPrintStreamBuffer(buffer); + assertEquals("Warning: Line 1, char 1: Ignoring out-of-place documentation comment." + + "\nDid you mean to use a multiline comment ( /* ... */ ) instead?", warnings); + } @Test void writeIdlAsProtocol() throws Exception { @@ -47,22 +63,39 @@ void writeIdlAsProtocol() throws Exception { assertEquals(readFileAsString(protocol), readFileAsString(outfile)); + String warnings = readPrintStreamBuffer(buffer); + assertEquals("Warning: Line 1, char 1: Ignoring out-of-place documentation comment." + + "\nDid you mean to use a multiline comment ( /* ... */ ) instead?", warnings); + } + + @Test + public void testWriteIdlAsProtocolUsingJavaCC() throws Exception { + String idl = "src/test/idl/protocol.avdl"; + String protocol = "src/test/idl/protocol.avpr"; + String outfile = "target/test-protocol.avpr"; + + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + List arglist = Arrays.asList("--useJavaCC", idl, outfile); + new IdlTool().run(null, null, new PrintStream(buffer), arglist); + + assertEquals(readFileAsString(protocol), readFileAsString(outfile)); + String warnings = readPrintStreamBuffer(buffer); assertEquals( - "Warning: Found documentation comment at line 19, column 1. Ignoring previous one at line 1, column 1: \"" - + "Licensed to the Apache Software Foundation (ASF) under one\n" + "Warning: Found documentation comment at line 19, column 1. Ignoring previous one at line 1, column 1: " + + "\"Licensed to the Apache Software Foundation (ASF) under one\n" + "or more contributor license agreements. See the NOTICE file\n" + "distributed with this work for additional information\n" + "regarding copyright ownership. The ASF licenses this file\n" + "to you under the Apache License, Version 2.0 (the\n" + "\"License\"); you may not use this file except in compliance\n" - + "with the License. You may obtain a copy of the License at\n\n" - + " https://www.apache.org/licenses/LICENSE-2.0\n\n" + + "with the License. You may obtain a copy of the License at\n" + + "\n https://www.apache.org/licenses/LICENSE-2.0\n\n" + "Unless required by applicable law or agreed to in writing, software\n" + "distributed under the License is distributed on an \"AS IS\" BASIS,\n" + "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n" - + "See the License for the specific language governing permissions and\n" + "limitations under the License." - + "\"\nDid you mean to use a multiline comment ( /* ... */ ) instead?", + + "See the License for the specific language governing permissions and\n" + + "limitations under the License.\"\nDid you mean to use a multiline comment ( /* ... */ ) instead?", warnings); } diff --git a/lang/java/tools/src/test/java/org/apache/avro/tool/TestRpcProtocolTool.java b/lang/java/tools/src/test/java/org/apache/avro/tool/TestRpcProtocolTool.java index fddd850df3d..fcdd2d0596f 100644 --- a/lang/java/tools/src/test/java/org/apache/avro/tool/TestRpcProtocolTool.java +++ b/lang/java/tools/src/test/java/org/apache/avro/tool/TestRpcProtocolTool.java @@ -18,63 +18,37 @@ package org.apache.avro.tool; import org.apache.avro.Protocol; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.PrintStream; import java.util.Arrays; import java.util.Collections; -import java.util.List; - -import static org.junit.Assert.assertEquals; /** * */ -@RunWith(Parameterized.class) public class TestRpcProtocolTool { - @Parameterized.Parameters(/* name = "{0}" */) - public static List data() { - return Arrays.asList(new Object[] { "http" }, new Object[] { "avro" }); - } - - private RpcReceiveTool receive; - private Protocol simpleProtocol; - - private String uriScheme; - - public TestRpcProtocolTool(String uriScheme) { - this.uriScheme = uriScheme; - } + @ParameterizedTest + @ValueSource(strings = { "http", "avro" }) + void rpcProtocol(String uriScheme) throws Exception { - @Before - public void setUp() throws Exception { String protocolFile = System.getProperty("share.dir", "../../../share") + "/test/schemas/simple.avpr"; - simpleProtocol = Protocol.parse(new File(protocolFile)); + Protocol simpleProtocol = Protocol.parse(new File(protocolFile)); // start a simple server ByteArrayOutputStream baos1 = new ByteArrayOutputStream(); PrintStream p1 = new PrintStream(baos1); - receive = new RpcReceiveTool(); + RpcReceiveTool receive = new RpcReceiveTool(); + receive.run1(null, p1, System.err, Arrays.asList(uriScheme + "://0.0.0.0:0/", protocolFile, "hello", "-data", "\"Hello!\"")); - } - - @After - public void tearDown() throws Exception { - if (receive != null) - receive.server.close(); // force the server to finish - } - - @Test - public void testRpcProtocol() throws Exception { // run the actual test ByteArrayOutputStream baos2 = new ByteArrayOutputStream(); @@ -86,8 +60,9 @@ public void testRpcProtocol() throws Exception { p2.flush(); - assertEquals("Expected the simple.avpr protocol to be echoed to standout", simpleProtocol, - Protocol.parse(baos2.toString("UTF-8"))); + Assertions.assertEquals(simpleProtocol, Protocol.parse(baos2.toString("UTF-8")), + "Expected the simple.avpr protocol to be echoed to standout"); + receive.server.close(); // force the server to finish } } diff --git a/lang/java/trevni/avro/pom.xml b/lang/java/trevni/avro/pom.xml index 5cc5f5049cb..a50cc028e5e 100644 --- a/lang/java/trevni/avro/pom.xml +++ b/lang/java/trevni/avro/pom.xml @@ -23,7 +23,7 @@ trevni-java org.apache.avro 1.12.0-SNAPSHOT - ../ + ../pom.xml trevni-avro diff --git a/lang/java/trevni/core/pom.xml b/lang/java/trevni/core/pom.xml index 99a12172673..365cf6f50f7 100644 --- a/lang/java/trevni/core/pom.xml +++ b/lang/java/trevni/core/pom.xml @@ -23,7 +23,7 @@ trevni-java org.apache.avro 1.12.0-SNAPSHOT - ../ + ../pom.xml trevni-core diff --git a/lang/java/trevni/core/src/test/java/org/apache/trevni/TestAllCodecs.java b/lang/java/trevni/core/src/test/java/org/apache/trevni/TestAllCodecs.java index c588046428e..e5bda582fc4 100644 --- a/lang/java/trevni/core/src/test/java/org/apache/trevni/TestAllCodecs.java +++ b/lang/java/trevni/core/src/test/java/org/apache/trevni/TestAllCodecs.java @@ -18,28 +18,17 @@ package org.apache.trevni; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; import java.io.IOException; import java.nio.Buffer; import java.nio.ByteBuffer; -import java.util.Arrays; -import java.util.Collection; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; -@RunWith(Parameterized.class) public class TestAllCodecs { - @Parameterized.Parameters(name = "{index}: codec={0}") - public static Collection data() { - return Arrays.asList(new Object[][] { { "bzip2" }, { "null" }, { "snappy" }, { "deflate" }, }); - } - - @Parameterized.Parameter(0) - public String codec; public static Codec getCodec(String name) { MetaData m = new MetaData(); @@ -47,8 +36,9 @@ public static Codec getCodec(String name) { return Codec.get(m); } - @Test - public void testCodec() throws IOException { + @ParameterizedTest + @ValueSource(strings = { "bzip2", "null", "snappy", "deflate" }) + public void testCodec(String codec) throws IOException { int inputSize = 500_000; byte[] input = generateTestData(inputSize); @@ -76,8 +66,9 @@ public void testCodec() throws IOException { assertEquals(decompressedBuffer, inputByteBuffer); } - @Test - public void testCodecSlice() throws IOException { + @ParameterizedTest + @ValueSource(strings = { "bzip2", "null", "snappy", "deflate" }) + public void testCodecSlice(String codec) throws IOException { int inputSize = 500_000; byte[] input = generateTestData(inputSize); diff --git a/lang/java/trevni/core/src/test/java/org/apache/trevni/TestColumnFile.java b/lang/java/trevni/core/src/test/java/org/apache/trevni/TestColumnFile.java index 6fc36bf6763..781476abfc5 100644 --- a/lang/java/trevni/core/src/test/java/org/apache/trevni/TestColumnFile.java +++ b/lang/java/trevni/core/src/test/java/org/apache/trevni/TestColumnFile.java @@ -19,72 +19,66 @@ import java.io.File; import java.util.Random; -import java.util.Collection; import java.util.Arrays; import java.util.Iterator; import java.util.Map; import java.util.HashMap; +import java.util.stream.Stream; -import org.junit.Assert; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import org.junit.runners.Parameterized.Parameters; -@RunWith(value = Parameterized.class) public class TestColumnFile { private static final File FILE = new File("target", "test.trv"); private static final int COUNT = 1024 * 64; - private String codec; - private String checksum; - - public TestColumnFile(String codec, String checksum) { - this.codec = codec; - this.checksum = checksum; - } - @Parameters - public static Collection codecs() { - Object[][] data = new Object[][] { { "null", "null" }, { "snappy", "crc32" }, { "deflate", "crc32" } }; - return Arrays.asList(data); + public static Stream codecs() { + return Stream.of(Arguments.of(createFileMeta("null", "null")), Arguments.of(createFileMeta("snappy", "crc32")), + Arguments.of(createFileMeta("deflate", "crc32"))); } - private ColumnFileMetaData createFileMeta() { + private static ColumnFileMetaData createFileMeta(String codec, String checksum) { return new ColumnFileMetaData().setCodec(codec).setChecksum(checksum); } - @Test - public void testEmptyFile() throws Exception { + @ParameterizedTest + @MethodSource("codecs") + void emptyFile(ColumnFileMetaData fileMeta) throws Exception { FILE.delete(); - ColumnFileWriter out = new ColumnFileWriter(createFileMeta()); + ColumnFileWriter out = new ColumnFileWriter(fileMeta); out.writeTo(FILE); ColumnFileReader in = new ColumnFileReader(FILE); - Assert.assertEquals(0, in.getRowCount()); - Assert.assertEquals(0, in.getColumnCount()); + Assertions.assertEquals(0, in.getRowCount()); + Assertions.assertEquals(0, in.getColumnCount()); in.close(); } - @Test - public void testEmptyColumn() throws Exception { + @ParameterizedTest + @MethodSource("codecs") + void emptyColumn(ColumnFileMetaData fileMeta) throws Exception { FILE.delete(); - ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("test", ValueType.INT)); + ColumnFileWriter out = new ColumnFileWriter(fileMeta, new ColumnMetaData("test", ValueType.INT)); out.writeTo(FILE); ColumnFileReader in = new ColumnFileReader(FILE); - Assert.assertEquals(0, in.getRowCount()); - Assert.assertEquals(1, in.getColumnCount()); + Assertions.assertEquals(0, in.getRowCount()); + Assertions.assertEquals(1, in.getColumnCount()); ColumnValues values = in.getValues("test"); for (int i : values) throw new Exception("no value should be found"); in.close(); } - @Test - public void testInts() throws Exception { + @ParameterizedTest + @MethodSource("codecs") + void ints(ColumnFileMetaData fileMeta) throws Exception { FILE.delete(); - ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("test", ValueType.INT)); + ColumnFileWriter out = new ColumnFileWriter(fileMeta, new ColumnMetaData("test", ValueType.INT)); Random random = TestUtil.createRandom(); for (int i = 0; i < COUNT; i++) out.writeRow(TestUtil.randomLength(random)); @@ -92,22 +86,23 @@ public void testInts() throws Exception { random = TestUtil.createRandom(); ColumnFileReader in = new ColumnFileReader(FILE); - Assert.assertEquals(COUNT, in.getRowCount()); - Assert.assertEquals(1, in.getColumnCount()); + Assertions.assertEquals(COUNT, in.getRowCount()); + Assertions.assertEquals(1, in.getColumnCount()); Iterator i = in.getValues("test"); int count = 0; while (i.hasNext()) { - Assert.assertEquals(TestUtil.randomLength(random), (int) i.next()); + Assertions.assertEquals(TestUtil.randomLength(random), (int) i.next()); count++; } - Assert.assertEquals(COUNT, count); + Assertions.assertEquals(COUNT, count); } - @Test - public void testLongs() throws Exception { + @ParameterizedTest + @MethodSource("codecs") + void longs(ColumnFileMetaData fileMeta) throws Exception { FILE.delete(); - ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("test", ValueType.LONG)); + ColumnFileWriter out = new ColumnFileWriter(fileMeta, new ColumnMetaData("test", ValueType.LONG)); Random random = TestUtil.createRandom(); for (int i = 0; i < COUNT; i++) out.writeRow(random.nextLong()); @@ -115,22 +110,23 @@ public void testLongs() throws Exception { random = TestUtil.createRandom(); ColumnFileReader in = new ColumnFileReader(FILE); - Assert.assertEquals(COUNT, in.getRowCount()); - Assert.assertEquals(1, in.getColumnCount()); + Assertions.assertEquals(COUNT, in.getRowCount()); + Assertions.assertEquals(1, in.getColumnCount()); Iterator i = in.getValues("test"); int count = 0; while (i.hasNext()) { - Assert.assertEquals(random.nextLong(), (long) i.next()); + Assertions.assertEquals(random.nextLong(), (long) i.next()); count++; } - Assert.assertEquals(COUNT, count); + Assertions.assertEquals(COUNT, count); } - @Test - public void testStrings() throws Exception { + @ParameterizedTest + @MethodSource("codecs") + void strings(ColumnFileMetaData fileMeta) throws Exception { FILE.delete(); - ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("test", ValueType.STRING)); + ColumnFileWriter out = new ColumnFileWriter(fileMeta, new ColumnMetaData("test", ValueType.STRING)); Random random = TestUtil.createRandom(); for (int i = 0; i < COUNT; i++) out.writeRow(TestUtil.randomString(random)); @@ -138,21 +134,22 @@ public void testStrings() throws Exception { random = TestUtil.createRandom(); ColumnFileReader in = new ColumnFileReader(FILE); - Assert.assertEquals(COUNT, in.getRowCount()); - Assert.assertEquals(1, in.getColumnCount()); + Assertions.assertEquals(COUNT, in.getRowCount()); + Assertions.assertEquals(1, in.getColumnCount()); Iterator i = in.getValues("test"); int count = 0; while (i.hasNext()) { - Assert.assertEquals(TestUtil.randomString(random), i.next()); + Assertions.assertEquals(TestUtil.randomString(random), i.next()); count++; } - Assert.assertEquals(COUNT, count); + Assertions.assertEquals(COUNT, count); } - @Test - public void testTwoColumn() throws Exception { + @ParameterizedTest + @MethodSource("codecs") + void twoColumn(ColumnFileMetaData fileMeta) throws Exception { FILE.delete(); - ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("a", ValueType.FIXED32), + ColumnFileWriter out = new ColumnFileWriter(fileMeta, new ColumnMetaData("a", ValueType.FIXED32), new ColumnMetaData("b", ValueType.STRING)); Random random = TestUtil.createRandom(); for (int i = 0; i < COUNT; i++) @@ -161,24 +158,25 @@ public void testTwoColumn() throws Exception { random = TestUtil.createRandom(); ColumnFileReader in = new ColumnFileReader(FILE); - Assert.assertEquals(COUNT, in.getRowCount()); - Assert.assertEquals(2, in.getColumnCount()); + Assertions.assertEquals(COUNT, in.getRowCount()); + Assertions.assertEquals(2, in.getColumnCount()); Iterator i = in.getValues("a"); Iterator j = in.getValues("b"); int count = 0; while (i.hasNext() && j.hasNext()) { - Assert.assertEquals(random.nextInt(), i.next()); - Assert.assertEquals(TestUtil.randomString(random), j.next()); + Assertions.assertEquals(random.nextInt(), i.next()); + Assertions.assertEquals(TestUtil.randomString(random), j.next()); count++; } - Assert.assertEquals(COUNT, count); + Assertions.assertEquals(COUNT, count); } - @Test - public void testSeekLongs() throws Exception { + @ParameterizedTest + @MethodSource("codecs") + void seekLongs(ColumnFileMetaData fileMeta) throws Exception { FILE.delete(); - ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), new ColumnMetaData("test", ValueType.LONG)); + ColumnFileWriter out = new ColumnFileWriter(fileMeta, new ColumnMetaData("test", ValueType.LONG)); Random random = TestUtil.createRandom(); int seekCount = COUNT / 1024; @@ -206,16 +204,17 @@ public void testSeekLongs() throws Exception { for (int i = 0; i < seekCount; i++) { v.seek(seekRows[i]); - Assert.assertEquals(seekValues[i], v.next()); + Assertions.assertEquals(seekValues[i], v.next()); } } - @Test - public void testSeekStrings() throws Exception { + @ParameterizedTest + @MethodSource("codecs") + void seekStrings(ColumnFileMetaData fileMeta) throws Exception { FILE.delete(); - ColumnFileWriter out = new ColumnFileWriter(createFileMeta(), + ColumnFileWriter out = new ColumnFileWriter(fileMeta, new ColumnMetaData("test", ValueType.STRING).hasIndexValues(true)); Random random = TestUtil.createRandom(); @@ -246,7 +245,7 @@ public void testSeekStrings() throws Exception { for (int i = 0; i < seekCount; i++) { v.seek(seekValues[i]); - Assert.assertEquals(seekValues[i], v.next()); + Assertions.assertEquals(seekValues[i], v.next()); } } diff --git a/lang/java/trevni/pom.xml b/lang/java/trevni/pom.xml index 9d676bf09e8..464eee7b167 100644 --- a/lang/java/trevni/pom.xml +++ b/lang/java/trevni/pom.xml @@ -24,7 +24,7 @@ avro-parent org.apache.avro 1.12.0-SNAPSHOT - ../ + ../pom.xml trevni-java diff --git a/lang/js/.gitignore b/lang/js/.gitignore index b509c88b364..78ed32c7512 100644 --- a/lang/js/.gitignore +++ b/lang/js/.gitignore @@ -1,2 +1,3 @@ coverage/ node_modules/ +.nyc_output \ No newline at end of file diff --git a/lang/perl/MANIFEST b/lang/perl/MANIFEST index bc8f9341af0..d308e85ea01 100644 --- a/lang/perl/MANIFEST +++ b/lang/perl/MANIFEST @@ -41,7 +41,6 @@ META.yml LICENSE NOTICE README -VERSION.txt t/00_compile.t t/01_names.t t/01_schema.t diff --git a/lang/perl/Makefile.PL b/lang/perl/Makefile.PL index 69447e5651f..ffac5e9d11b 100644 --- a/lang/perl/Makefile.PL +++ b/lang/perl/Makefile.PL @@ -19,12 +19,7 @@ use Config; use inc::Module::Install; my $version; -for ('VERSION.txt', '../../share/VERSION.txt') { - if (-f) { - $version = `cat $_`; - last; - } -} +$version = `cat ../../share/VERSION.txt`; chomp $version; license 'apache'; diff --git a/lang/perl/build.sh b/lang/perl/build.sh index 2b689276b82..e7634dbabab 100755 --- a/lang/perl/build.sh +++ b/lang/perl/build.sh @@ -58,7 +58,6 @@ case "$target" in ;; dist) - cp ../../share/VERSION.txt . perl ./Makefile.PL && make dist ;; diff --git a/lang/perl/xt/interop.t b/lang/perl/xt/interop.t index 8bc2a84861f..81cd48666e9 100644 --- a/lang/perl/xt/interop.t +++ b/lang/perl/xt/interop.t @@ -21,8 +21,11 @@ use warnings; use Test::More; use File::Basename qw(basename); use IO::File; -use_ok 'Avro::DataFile'; -use_ok 'Avro::DataFileReader'; + +BEGIN { + use_ok 'Avro::DataFile'; + use_ok 'Avro::DataFileReader'; +} for my $path (glob '../../build/interop/data/*.avro') { my $fn = basename($path); diff --git a/lang/py/avro/datafile.py b/lang/py/avro/datafile.py index d39a91131e3..0f002cee1b5 100644 --- a/lang/py/avro/datafile.py +++ b/lang/py/avro/datafile.py @@ -26,7 +26,7 @@ import json import warnings from types import TracebackType -from typing import IO, AnyStr, BinaryIO, MutableMapping, Optional, Type, cast +from typing import IO, AnyStr, MutableMapping, Optional, Type, cast import avro.codecs import avro.errors diff --git a/lang/py/avro/errors.py b/lang/py/avro/errors.py index 2c7675131ef..b961a04ae43 100644 --- a/lang/py/avro/errors.py +++ b/lang/py/avro/errors.py @@ -120,3 +120,7 @@ class UsageError(RuntimeError, AvroException): class AvroRuntimeException(RuntimeError, AvroException): """Raised when compatibility parsing encounters an unknown type""" + + +class UnknownFingerprintAlgorithmException(AvroException): + """Raised when attempting to generate a fingerprint with an unknown algorithm""" diff --git a/lang/py/avro/io.py b/lang/py/avro/io.py index 7b5576697eb..5e3ffa6c537 100644 --- a/lang/py/avro/io.py +++ b/lang/py/avro/io.py @@ -89,17 +89,7 @@ import decimal import struct import warnings -from typing import ( - IO, - Deque, - Generator, - Iterable, - List, - Mapping, - Optional, - Sequence, - Union, -) +from typing import IO, Generator, Iterable, List, Mapping, Optional, Sequence, Union import avro.constants import avro.errors @@ -435,7 +425,6 @@ def write_null(self, datum: None) -> None: """ null is written as zero bytes """ - pass def write_boolean(self, datum: bool) -> None: """ @@ -810,7 +799,7 @@ def read_array(self, writers_schema: avro.schema.ArraySchema, readers_schema: av while block_count != 0: if block_count < 0: block_count = -block_count - block_size = decoder.read_long() + decoder.skip_long() for i in range(block_count): read_items.append(self.read_data(writers_schema.items, readers_schema.items, decoder)) block_count = decoder.read_long() @@ -847,7 +836,7 @@ def read_map(self, writers_schema: avro.schema.MapSchema, readers_schema: avro.s while block_count != 0: if block_count < 0: block_count = -block_count - block_size = decoder.read_long() + decoder.skip_long() for i in range(block_count): key = decoder.read_utf8() read_items[key] = self.read_data(writers_schema.values, readers_schema.values, decoder) diff --git a/lang/py/avro/ipc.py b/lang/py/avro/ipc.py index 2c3ff668581..7a5a5831f08 100644 --- a/lang/py/avro/ipc.py +++ b/lang/py/avro/ipc.py @@ -77,27 +77,41 @@ def __init__(self, local_protocol, transceiver): self._send_protocol = None # read-only properties - local_protocol = property(lambda self: self._local_protocol) - transceiver = property(lambda self: self._transceiver) + @property + def local_protocol(self): + return self._local_protocol + + @property + def transceiver(self): + return self._transceiver # read/write properties - def set_remote_protocol(self, new_remote_protocol): + @property + def remote_protocol(self): + return self._remote_protocol + + @remote_protocol.setter + def remote_protocol(self, new_remote_protocol): self._remote_protocol = new_remote_protocol REMOTE_PROTOCOLS[self.transceiver.remote_name] = self.remote_protocol - remote_protocol = property(lambda self: self._remote_protocol, set_remote_protocol) + @property + def remote_hash(self): + return self._remote_hash - def set_remote_hash(self, new_remote_hash): + @remote_hash.setter + def remote_hash(self, new_remote_hash): self._remote_hash = new_remote_hash REMOTE_HASHES[self.transceiver.remote_name] = self.remote_hash - remote_hash = property(lambda self: self._remote_hash, set_remote_hash) + @property + def send_protocol(self): + return self._send_protocol - def set_send_protocol(self, new_send_protocol): + @send_protocol.setter + def send_protocol(self, new_send_protocol): self._send_protocol = new_send_protocol - send_protocol = property(lambda self: self._send_protocol, set_send_protocol) - def request(self, message_name, request_datum): """ Writes a request message and reads a response or error message. @@ -185,7 +199,7 @@ def read_call_response(self, message_name, decoder): the error, serialized per the message's error union schema. """ # response metadata - response_metadata = META_READER.read(decoder) + META_READER.read(decoder) # remote response schema remote_message_schema = self.remote_protocol.messages.get(message_name) @@ -236,9 +250,17 @@ def __init__(self, local_protocol): self.set_protocol_cache(self.local_hash, self.local_protocol) # read-only properties - local_protocol = property(lambda self: self._local_protocol) - local_hash = property(lambda self: self._local_hash) - protocol_cache = property(lambda self: self._protocol_cache) + @property + def local_protocol(self): + return self._local_protocol + + @property + def local_hash(self): + return self._local_hash + + @property + def protocol_cache(self): + return self._protocol_cache # utility functions to manipulate protocol cache def get_protocol_cache(self, hash): @@ -266,7 +288,7 @@ def respond(self, call_request): return buffer_writer.getvalue() # read request using remote protocol - request_metadata = META_READER.read(buffer_decoder) + META_READER.read(buffer_decoder) remote_message_name = buffer_decoder.read_utf8() # get remote and local request schemas so we can do @@ -342,9 +364,8 @@ def process_handshake(self, decoder, encoder): def invoke(self, local_message, request): """ - Aactual work done by server: cf. handler in thrift. + Actual work done by server: cf. handler in thrift. """ - pass def read_request(self, writers_schema, readers_schema, decoder): datum_reader = avro.io.DatumReader(writers_schema, readers_schema) @@ -371,7 +392,9 @@ def __init__(self, reader): self._reader = reader # read-only properties - reader = property(lambda self: self._reader) + @property + def reader(self): + return self._reader def read_framed_message(self): message = [] @@ -401,7 +424,9 @@ def __init__(self, writer): self._writer = writer # read-only properties - writer = property(lambda self: self._writer) + @property + def writer(self): + return self._writer def write_framed_message(self, message): message_length = len(message) diff --git a/lang/py/avro/schema.py b/lang/py/avro/schema.py index 3bc0c27af5a..018f74debe6 100644 --- a/lang/py/avro/schema.py +++ b/lang/py/avro/schema.py @@ -42,16 +42,27 @@ import collections import datetime import decimal +import hashlib import json import math import uuid import warnings +from functools import reduce from pathlib import Path -from typing import List, Mapping, MutableMapping, Optional, Sequence, Union, cast +from typing import ( + Callable, + FrozenSet, + List, + Mapping, + MutableMapping, + Optional, + Sequence, + Union, + cast, +) import avro.constants import avro.errors -from avro.constants import NAMED_TYPES, PRIMITIVE_TYPES, VALID_TYPES from avro.name import Name, Names, validate_basename # @@ -104,6 +115,50 @@ def _is_timezone_aware_datetime(dt: datetime.datetime) -> bool: return dt.tzinfo is not None and dt.tzinfo.utcoffset(dt) is not None +# Fingerprint Constants +_EMPTY64_FINGERPRINT: int = 0xC15D213AA4D7A795 +_FINGERPRINT_TABLE: tuple = tuple(reduce(lambda fp, _: (fp >> 1) ^ (_EMPTY64_FINGERPRINT & -(fp & 1)), range(8), i) for i in range(256)) + + +# All algorithms guaranteed by hashlib are supported: +# - 'blake2b', +# - 'blake2s', +# - 'md5', +# - 'sha1', +# - 'sha224', +# - 'sha256', +# - 'sha384', +# - 'sha3_224', +# - 'sha3_256', +# - 'sha3_384', +# - 'sha3_512', +# - 'sha512', +# - 'shake_128', +# - 'shake_256' +SUPPORTED_ALGORITHMS: FrozenSet[str] = frozenset({"CRC-64-AVRO"} | hashlib.algorithms_guaranteed) + + +def _crc_64_fingerprint(data: bytes) -> bytes: + """The 64-bit Rabin Fingerprint. + + As described in the Avro specification. + + Args: + data: A bytes object containing the UTF-8 encoded parsing canonical + form of an Avro schema. + Returns: + A bytes object with a length of eight in little-endian format. + """ + result = _EMPTY64_FINGERPRINT + + for b in data: + result = (result >> 8) ^ _FINGERPRINT_TABLE[(result ^ b) & 0xFF] + + # Although not mentioned in the Avro specification, the Java + # implementation gives fingerprint bytes in little-endian order + return result.to_bytes(length=8, byteorder="little", signed=False) + + # # Base Classes # @@ -142,9 +197,11 @@ def other_props(self) -> Mapping[str, object]: return get_other_props(self.props, self._reserved_properties) -class EqualByJsonMixin: +class EqualByJsonMixin(collections.abc.Hashable): """A mixin that defines equality as equal if the json deserializations are equal.""" + fingerprint: Callable[..., bytes] + def __eq__(self, that: object) -> bool: try: that_obj = json.loads(str(that)) @@ -152,13 +209,29 @@ def __eq__(self, that: object) -> bool: return False return cast(bool, json.loads(str(self)) == that_obj) + def __hash__(self) -> int: + """Make it so a schema can be in a set or a key in a dictionary. -class EqualByPropsMixin(PropertiesMixin): + NB: Python has special rules for this method being defined in the same class as __eq__. + """ + return hash(self.fingerprint()) + + +class EqualByPropsMixin(collections.abc.Hashable, PropertiesMixin): """A mixin that defines equality as equal if the props are equal.""" + fingerprint: Callable[..., bytes] + def __eq__(self, that: object) -> bool: return hasattr(that, "props") and self.props == getattr(that, "props") + def __hash__(self) -> int: + """Make it so a schema can be in a set or a key in a dictionary. + + NB: Python has special rules for this method being defined in the same class as __eq__. + """ + return hash(self.fingerprint()) + class CanonicalPropertiesMixin(PropertiesMixin): """A Mixin that provides canonical properties to Schema and Field types.""" @@ -177,7 +250,7 @@ class Schema(abc.ABC, CanonicalPropertiesMixin): def __init__(self, type_: str, other_props: Optional[Mapping[str, object]] = None, validate_names: bool = True) -> None: if not isinstance(type_, str): raise avro.errors.SchemaParseException("Schema type must be a string.") - if type_ not in VALID_TYPES: + if type_ not in avro.constants.VALID_TYPES: raise avro.errors.SchemaParseException(f"{type_} is not a valid type.") self.set_prop("type", type_) self.type = type_ @@ -240,6 +313,30 @@ def __eq__(self, that: object) -> bool: Consider the mixins EqualByPropsMixin and EqualByJsonMixin """ + def fingerprint(self, algorithm="CRC-64-AVRO") -> bytes: + """ + Generate fingerprint for supplied algorithm. + + 'CRC-64-AVRO' will be used as the algorithm by default, but any + algorithm supported by hashlib (as can be referenced with + `hashlib.algorithms_guaranteed`) can be specified. + + `algorithm` param is used as an algorithm name, and NoSuchAlgorithmException + will be thrown if the algorithm is not among supported. + """ + schema = self.canonical_form.encode("utf-8") + + if algorithm == "CRC-64-AVRO": + return _crc_64_fingerprint(schema) + + if algorithm not in SUPPORTED_ALGORITHMS: + raise avro.errors.UnknownFingerprintAlgorithmException(f"Unknown Fingerprint Algorithm: {algorithm}") + + # Generate digests with hashlib for all other algorithms + # Lowercase algorithm to support algorithm strings sent by other languages like Java + h = hashlib.new(algorithm.lower(), schema) + return h.digest() + class NamedSchema(Schema): """Named Schemas specified in NAMED_TYPES.""" @@ -276,9 +373,17 @@ def name_ref(self, names): return self.name if self.namespace == names.default_namespace else self.fullname # read-only properties - name = property(lambda self: self.get_prop("name")) - namespace = property(lambda self: self.get_prop("namespace")) - fullname = property(lambda self: self._fullname) + @property + def name(self): + return self.get_prop("name") + + @property + def namespace(self): + return self.get_prop("namespace") + + @property + def fullname(self): + return self._fullname # @@ -347,10 +452,21 @@ def __init__(self, type_, name, has_default, default=None, order=None, names=Non self.set_prop("doc", doc) # read-only properties - default = property(lambda self: self.get_prop("default")) - has_default = property(lambda self: self._has_default) - order = property(lambda self: self.get_prop("order")) - doc = property(lambda self: self.get_prop("doc")) + @property + def default(self): + return self.get_prop("default") + + @property + def has_default(self): + return self._has_default + + @property + def order(self): + return self.get_prop("order") + + @property + def doc(self): + return self.get_prop("doc") def __str__(self): return json.dumps(self.to_json()) @@ -393,7 +509,7 @@ class PrimitiveSchema(EqualByPropsMixin, Schema): def __init__(self, type, other_props=None): # Ensure valid ctor args - if type not in PRIMITIVE_TYPES: + if type not in avro.constants.PRIMITIVE_TYPES: raise avro.errors.AvroException(f"{type} is not a valid primitive type.") # Call parent ctor @@ -449,8 +565,13 @@ def __init__(self, precision, scale=0, other_props=None): self.set_prop("scale", scale) # read-only properties - precision = property(lambda self: self.get_prop("precision")) - scale = property(lambda self: self.get_prop("scale")) + @property + def precision(self): + return self.get_prop("precision") + + @property + def scale(self): + return self.get_prop("scale") def to_json(self, names=None): return self.props @@ -477,7 +598,9 @@ def __init__(self, name, namespace, size, names=None, other_props=None, validate self.set_prop("size", size) # read-only properties - size = property(lambda self: self.get_prop("size")) + @property + def size(self): + return self.get_prop("size") def match(self, writer): """Return True if the current schema (as reader) matches the writer schema. @@ -531,8 +654,13 @@ def __init__( self.set_prop("scale", scale) # read-only properties - precision = property(lambda self: self.get_prop("precision")) - scale = property(lambda self: self.get_prop("scale")) + @property + def precision(self): + return self.get_prop("precision") + + @property + def scale(self): + return self.get_prop("scale") def to_json(self, names=None): return self.props @@ -587,7 +715,9 @@ def symbols(self) -> Sequence[str]: return symbols raise Exception - doc = property(lambda self: self.get_prop("doc")) + @property + def doc(self): + return self.get_prop("doc") def match(self, writer): """Return True if the current schema (as reader) matches the writer schema. @@ -645,7 +775,9 @@ def __init__(self, items, names=None, other_props=None, validate_names: bool = T self.set_prop("items", items_schema) # read-only properties - items = property(lambda self: self.get_prop("items")) + @property + def items(self): + return self.get_prop("items") def match(self, writer): """Return True if the current schema (as reader) matches the writer schema. @@ -697,7 +829,9 @@ def __init__(self, values, names=None, other_props=None, validate_names: bool = self.set_prop("values", values_schema) # read-only properties - values = property(lambda self: self.get_prop("values")) + @property + def values(self): + return self.get_prop("values") def match(self, writer): """Return True if the current schema (as reader) matches the writer schema. @@ -754,8 +888,8 @@ def __init__(self, schemas, names=None, validate_names: bool = True): raise avro.errors.SchemaParseException(f"Union item must be a valid Avro schema: {e}") # check the new schema if ( - new_schema.type in VALID_TYPES - and new_schema.type not in NAMED_TYPES + new_schema.type in avro.constants.VALID_TYPES + and new_schema.type not in avro.constants.NAMED_TYPES and new_schema.type in [schema.type for schema in schema_objects] ): raise avro.errors.SchemaParseException(f"{new_schema.type} type already in Union") @@ -766,7 +900,9 @@ def __init__(self, schemas, names=None, validate_names: bool = True): self._schemas = schema_objects # read-only properties - schemas = property(lambda self: self._schemas) + @property + def schemas(self): + return self._schemas def match(self, writer): """Return True if the current schema (as reader) matches the writer schema. @@ -792,9 +928,7 @@ def to_canonical_json(self, names=None): def validate(self, datum): """Return the first branch schema of which datum is a valid example, else None.""" - for branch in self.schemas: - if branch.validate(datum) is not None: - return branch + return next((branch for branch in self.schemas if branch.validate(datum) is not None), None) class ErrorUnionSchema(UnionSchema): @@ -890,8 +1024,13 @@ def __init__( names.default_namespace = old_default # read-only properties - fields = property(lambda self: self.get_prop("fields")) - doc = property(lambda self: self.get_prop("doc")) + @property + def fields(self): + return self.get_prop("fields") + + @property + def doc(self): + return self.get_prop("doc") @property def fields_dict(self): @@ -1042,10 +1181,8 @@ def to_json(self, names=None): def validate(self, datum): try: - val = uuid.UUID(datum) - except ValueError: - # If it's a value error, then the string - # is not a valid hex code for a UUID. + uuid.UUID(datum) + except (ValueError, TypeError): return None return self @@ -1122,7 +1259,7 @@ def make_avsc_object( if logical_schema is not None: return cast(Schema, logical_schema) - if type_ in NAMED_TYPES: + if type_ in avro.constants.NAMED_TYPES: name = json_data.get("name") if not isinstance(name, str): raise avro.errors.SchemaParseException(f"Name {name} must be a string, but it is {type(name)}.") @@ -1152,10 +1289,10 @@ def make_avsc_object( return RecordSchema(name, namespace, fields, names, type_, doc, other_props, validate_names) raise avro.errors.SchemaParseException(f"Unknown Named Type: {type_}") - if type_ in PRIMITIVE_TYPES: + if type_ in avro.constants.PRIMITIVE_TYPES: return PrimitiveSchema(type_, other_props) - if type_ in VALID_TYPES: + if type_ in avro.constants.VALID_TYPES: if type_ == "array": items = json_data.get("items") return ArraySchema(items, names, other_props, validate_names) @@ -1175,7 +1312,7 @@ def make_avsc_object( elif isinstance(json_data, list): return UnionSchema(json_data, names, validate_names=validate_names) # JSON string (primitive) - elif json_data in PRIMITIVE_TYPES: + elif json_data in avro.constants.PRIMITIVE_TYPES: return PrimitiveSchema(json_data) # not for us! fail_msg = f"Could not make an Avro Schema object from {json_data}" diff --git a/lang/py/avro/test/gen_interop_data.py b/lang/py/avro/test/gen_interop_data.py index 1993b012826..1bde7ff8764 100644 --- a/lang/py/avro/test/gen_interop_data.py +++ b/lang/py/avro/test/gen_interop_data.py @@ -23,6 +23,7 @@ import io import json import os +from contextlib import closing from pathlib import Path from typing import IO, TextIO @@ -93,7 +94,8 @@ def _parse_args() -> argparse.Namespace: def main() -> int: args = _parse_args() - generate(args.schema_path, args.output_path) + with closing(args.output_path) as op: + generate(args.schema_path, op) return 0 diff --git a/lang/py/avro/test/test_io.py b/lang/py/avro/test/test_io.py index b77c17fb98b..36a85b3e5f9 100644 --- a/lang/py/avro/test/test_io.py +++ b/lang/py/avro/test/test_io.py @@ -24,6 +24,7 @@ import itertools import json import unittest +import uuid import warnings from typing import BinaryIO, Collection, Dict, List, Optional, Tuple, Union, cast @@ -185,6 +186,23 @@ class DefaultValueTestCaseType(TypedDict): {"type": "record", "name": "ns.long", "fields": [{"name": "value", "type": "int"}, {"name": "next", "type": ["null", "ns.long"]}]}, {"value": 0, "next": {"value": 1, "next": None}}, ), + # Optional logical types. + ( + [{"logicalType": "uuid", "type": "string"}, "null"], + None, + ), + ( + [{"logicalType": "uuid", "type": "string"}, "null"], + uuid.uuid4().hex, + ), + ( + [{"type": "long", "logicalType": "timestamp-millis"}, "null"], + datetime.datetime(1000, 1, 1, 0, 0, 0, 0, tzinfo=avro.timezones.utc), + ), + ( + [{"type": "long", "logicalType": "timestamp-millis"}, "null"], + None, + ), ) ) diff --git a/lang/py/avro/test/test_protocol.py b/lang/py/avro/test/test_protocol.py index b3cd7bd692a..82fec0dd3f6 100644 --- a/lang/py/avro/test/test_protocol.py +++ b/lang/py/avro/test/test_protocol.py @@ -392,7 +392,7 @@ def test_inner_namespace_not_rendered(self): self.assertEqual("com.acme.Greeting", proto.types[0].fullname) self.assertEqual("Greeting", proto.types[0].name) # but there shouldn't be 'namespace' rendered to json on the inner type - self.assertFalse("namespace" in proto.to_json()["types"][0]) + self.assertNotIn("namespace", proto.to_json()["types"][0]) class ProtocolParseTestCase(unittest.TestCase): diff --git a/lang/py/avro/test/test_schema.py b/lang/py/avro/test/test_schema.py index c59ded8a73e..85d9f8db839 100644 --- a/lang/py/avro/test/test_schema.py +++ b/lang/py/avro/test/test_schema.py @@ -58,12 +58,12 @@ class InvalidTestSchema(TestSchema): valid = False -PRIMITIVE_EXAMPLES = [InvalidTestSchema('"True"')] # type: List[TestSchema] +PRIMITIVE_EXAMPLES: List[TestSchema] = [InvalidTestSchema('"True"')] PRIMITIVE_EXAMPLES.append(InvalidTestSchema("True")) PRIMITIVE_EXAMPLES.append(InvalidTestSchema('{"no_type": "test"}')) PRIMITIVE_EXAMPLES.append(InvalidTestSchema('{"type": "panther"}')) -PRIMITIVE_EXAMPLES.extend([ValidTestSchema(f'"{t}"') for t in avro.schema.PRIMITIVE_TYPES]) -PRIMITIVE_EXAMPLES.extend([ValidTestSchema({"type": t}) for t in avro.schema.PRIMITIVE_TYPES]) +PRIMITIVE_EXAMPLES.extend([ValidTestSchema(f'"{t}"') for t in avro.constants.PRIMITIVE_TYPES]) +PRIMITIVE_EXAMPLES.extend([ValidTestSchema({"type": t}) for t in avro.constants.PRIMITIVE_TYPES]) FIXED_EXAMPLES = [ ValidTestSchema({"type": "fixed", "name": "Test", "size": 1}), @@ -519,6 +519,204 @@ class InvalidTestSchema(TestSchema): ), ] + +# Fingerprint examples are in the form of tuples: +# - Value in Position 0 is schema +# - Value in Position 1 is an array of fingerprints: +# - Position 0 is CRC-64-AVRO fingerprint +# - Position 0 is MD5 fingerprint +# - Position 0 is SHA256 fingerprint +FINGERPRINT_EXAMPLES = [ + ('"int"', ["8f5c393f1ad57572", "ef524ea1b91e73173d938ade36c1db32", "3f2b87a9fe7cc9b13835598c3981cd45e3e355309e5090aa0933d7becb6fba45"]), + ('{"type": "int"}', ["8f5c393f1ad57572", "ef524ea1b91e73173d938ade36c1db32", "3f2b87a9fe7cc9b13835598c3981cd45e3e355309e5090aa0933d7becb6fba45"]), + ('"float"', ["90d7a83ecb027c4d", "50a6b9db85da367a6d2df400a41758a6", "1e71f9ec051d663f56b0d8e1fc84d71aa56ccfe9fa93aa20d10547a7abeb5cc0"]), + ( + '{"type": "float"}', + ["90d7a83ecb027c4d", "50a6b9db85da367a6d2df400a41758a6", "1e71f9ec051d663f56b0d8e1fc84d71aa56ccfe9fa93aa20d10547a7abeb5cc0"], + ), + ('"long"', ["b71df49344e154d0", "e1dd9a1ef98b451b53690370b393966b", "c32c497df6730c97fa07362aa5023f37d49a027ec452360778114cf427965add"]), + ( + '{"type": "long"}', + ["b71df49344e154d0", "e1dd9a1ef98b451b53690370b393966b", "c32c497df6730c97fa07362aa5023f37d49a027ec452360778114cf427965add"], + ), + ('"double"', ["7e95ab32c035758e", "bfc71a62f38b99d6a93690deeb4b3af6", "730a9a8c611681d7eef442e03c16c70d13bca3eb8b977bb403eaff52176af254"]), + ( + '{"type": "double"}', + ["7e95ab32c035758e", "bfc71a62f38b99d6a93690deeb4b3af6", "730a9a8c611681d7eef442e03c16c70d13bca3eb8b977bb403eaff52176af254"], + ), + ('"bytes"', ["651920c3da16c04f", "b462f06cb909be57c85008867784cde6", "9ae507a9dd39ee5b7c7e285da2c0846521c8ae8d80feeae5504e0c981d53f5fa"]), + ( + '{"type": "bytes"}', + ["651920c3da16c04f", "b462f06cb909be57c85008867784cde6", "9ae507a9dd39ee5b7c7e285da2c0846521c8ae8d80feeae5504e0c981d53f5fa"], + ), + ('"string"', ["c70345637248018f", "095d71cf12556b9d5e330ad575b3df5d", "e9e5c1c9e4f6277339d1bcde0733a59bd42f8731f449da6dc13010a916930d48"]), + ( + '{"type": "string"}', + ["c70345637248018f", "095d71cf12556b9d5e330ad575b3df5d", "e9e5c1c9e4f6277339d1bcde0733a59bd42f8731f449da6dc13010a916930d48"], + ), + ('"boolean"', ["64f7d4a478fc429f", "01f692b30d4a1c8a3e600b1440637f8f", "a5b031ab62bc416d720c0410d802ea46b910c4fbe85c50a946ccc658b74e677e"]), + ( + '{"type": "boolean"}', + ["64f7d4a478fc429f", "01f692b30d4a1c8a3e600b1440637f8f", "a5b031ab62bc416d720c0410d802ea46b910c4fbe85c50a946ccc658b74e677e"], + ), + ('"null"', ["8a8f25cce724dd63", "9b41ef67651c18488a8b08bb67c75699", "f072cbec3bf8841871d4284230c5e983dc211a56837aed862487148f947d1a1f"]), + ( + '{"type": "null"}', + ["8a8f25cce724dd63", "9b41ef67651c18488a8b08bb67c75699", "f072cbec3bf8841871d4284230c5e983dc211a56837aed862487148f947d1a1f"], + ), + ( + '{"type": "fixed", "name": "Test", "size": 1}', + ["6869897b4049355b", "db01bc515fcfcd2d4be82ed385288261", "f527116a6f44455697e935afc31dc60ad0f95caf35e1d9c9db62edb3ffeb9170"], + ), + ( + json.dumps({"type": "fixed", "name": "MyFixed", "namespace": "org.apache.hadoop.avro", "size": 1}), + ["fadbd138e85bdf45", "d74b3726484422711c465d49e857b1ba", "28e493a44771cecc5deca4bd938cdc3d5a24cfe1f3760bc938fa1057df6334fc"], + ), + ( + '{"type": "enum", "name": "Test", "symbols": ["A", "B"]}', + ["03a2f2c2e27f7a16", "d883f2a9b16ed085fcc5e4ca6c8f6ed1", "9b51286144f87ce5aebdc61ca834379effa5a41ce6ac0938630ff246297caca8"], + ), + ( + '{"type": "array", "items": "long"}', + ["715e2ea28bc91654", "c1c387e8d6a58f0df749b698991b1f43", "f78e954167feb23dcb1ce01e8463cebf3408e0a4259e16f24bd38f6d0f1d578b"], + ), + ( + json.dumps({"type": "array", "items": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}), + ["10d9ade1fa3a0387", "cfc7b861c7cfef082a6ef082948893fa", "0d8edd49d7f7e9553668f133577bc99f842852b55d9f84f1f7511e4961aa685c"], + ), + ( + '{"type": "map", "values": "long"}', + ["6f74f4e409b1334e", "32b3f1a3177a0e73017920f00448b56e", "b8fad07d458971a07692206b8a7cf626c86c62fe6bcff7c1b11bc7295de34853"], + ), + ( + json.dumps({"type": "map", "values": {"type": "enum", "name": "Test", "symbols": ["A", "B"]}}), + ["df2ab0626f6b812d", "c588da6ba99701c41e73fd30d23f994e", "3886747ed1669a8af476b549e97b34222afb2fed5f18bb27c6f367ea0351a576"], + ), + ( + '["string", "null", "long"]', + ["65a5be410d687566", "b11cf95f0a55dd55f9ee515a37bf937a", "ed8d254116441bb35e237ad0563cf5432b8c975334bd222c1ee84609435d95bb"], + ), + ( + json.dumps({"type": "record", "name": "Test", "fields": [{"name": "f", "type": "long"}]}), + ["ed94e5f5e6eb588e", "69531a03db788afe353244cd049b1e6d", "9670f15a8f96d23e92830d00b8bd57275e02e3e173ffef7c253c170b6beabeb8"], + ), + ( + json.dumps( + { + "type": "record", + "name": "Node", + "fields": [{"name": "label", "type": "string"}, {"name": "children", "type": {"type": "array", "items": "Node"}}], + } + ), + ["52cba544c3e756b7", "99625b0cc02050363e89ef66b0f406c9", "65d80dc8c95c98a9671d92cf0415edfabfee2cb058df2138606656cd6ae4dc59"], + ), + ( + json.dumps( + { + "type": "record", + "name": "Lisp", + "fields": [ + { + "name": "value", + "type": [ + "null", + "string", + {"type": "record", "name": "Cons", "fields": [{"name": "car", "type": "Lisp"}, {"name": "cdr", "type": "Lisp"}]}, + ], + } + ], + } + ), + ["68d91a23eda0b306", "9e1d0d15b52789fcb8e3a88b53059d5f", "e5ce4f4a15ce19fa1047cfe16a3b0e13a755db40f00f23284fdd376fc1c7dd21"], + ), + ( + json.dumps( + { + "type": "record", + "name": "HandshakeRequest", + "namespace": "org.apache.avro.ipc", + "fields": [ + {"name": "clientHash", "type": {"type": "fixed", "name": "MD5", "size": 16}}, + {"name": "clientProtocol", "type": ["null", "string"]}, + {"name": "serverHash", "type": "MD5"}, + {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}, + ], + } + ), + ["43818703b7b5d769", "16ded8b5027e80a17704c6565c0c3f1b", "6c317314687da52a85c813a7f0c92298a60b79625b9acc072e4d9e4256a1d800"], + ), + ( + json.dumps( + { + "type": "record", + "name": "HandshakeResponse", + "namespace": "org.apache.avro.ipc", + "fields": [ + {"name": "match", "type": {"type": "enum", "name": "HandshakeMatch", "symbols": ["BOTH", "CLIENT", "NONE"]}}, + {"name": "serverProtocol", "type": ["null", "string"]}, + {"name": "serverHash", "type": ["null", {"name": "MD5", "size": 16, "type": "fixed"}]}, + {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}, + ], + } + ), + ["00feee01de4ea50e", "afe529d01132daab7f4e2a6663e7a2f5", "a303cbbfe13958f880605d70c521a4b7be34d9265ac5a848f25916a67b11d889"], + ), + ( + json.dumps( + { + "type": "record", + "name": "Interop", + "namespace": "org.apache.avro", + "fields": [ + {"name": "intField", "type": "int"}, + {"name": "longField", "type": "long"}, + {"name": "stringField", "type": "string"}, + {"name": "boolField", "type": "boolean"}, + {"name": "floatField", "type": "float"}, + {"name": "doubleField", "type": "double"}, + {"name": "bytesField", "type": "bytes"}, + {"name": "nullField", "type": "null"}, + {"name": "arrayField", "type": {"type": "array", "items": "double"}}, + { + "name": "mapField", + "type": {"type": "map", "values": {"name": "Foo", "type": "record", "fields": [{"name": "label", "type": "string"}]}}, + }, + {"name": "unionField", "type": ["boolean", "double", {"type": "array", "items": "bytes"}]}, + {"name": "enumField", "type": {"type": "enum", "name": "Kind", "symbols": ["A", "B", "C"]}}, + {"name": "fixedField", "type": {"type": "fixed", "name": "MD5", "size": 16}}, + { + "name": "recordField", + "type": { + "type": "record", + "name": "Node", + "fields": [{"name": "label", "type": "string"}, {"name": "children", "type": {"type": "array", "items": "Node"}}], + }, + }, + ], + } + ), + ["e82c0a93a6a0b5a4", "994fea1a1be7ff8603cbe40c3bc7e4ca", "cccfd6e3f917cf53b0f90c206342e6703b0d905071f724a1c1f85b731c74058d"], + ), + ( + json.dumps( + { + "type": "record", + "name": "ipAddr", + "fields": [{"name": "addr", "type": [{"name": "IPv6", "type": "fixed", "size": 16}, {"name": "IPv4", "type": "fixed", "size": 4}]}], + } + ), + ["8d961b4e298a1844", "45d85c69b353a99b93d7c4f2fcf0c30d", "6f6fc8f685a4f07d99734946565d63108806d55a8620febea047cf52cb0ac181"], + ), + ( + json.dumps({"type": "record", "name": "TestDoc", "doc": "Doc string", "fields": [{"name": "name", "type": "string", "doc": "Doc String"}]}), + ["0e6660f02bcdc109", "f2da75f5131f5ab80629538287b8beb2", "0b3644f7aa5ca2fc4bad93ca2d3609c12aa9dbda9c15e68b34c120beff08e7b9"], + ), + ( + '{"type": "enum", "name": "Test", "symbols": ["A", "B"], "doc": "Doc String"}', + ["03a2f2c2e27f7a16", "d883f2a9b16ed085fcc5e4ca6c8f6ed1", "9b51286144f87ce5aebdc61ca834379effa5a41ce6ac0938630ff246297caca8"], + ), +] + EXAMPLES = PRIMITIVE_EXAMPLES EXAMPLES += FIXED_EXAMPLES EXAMPLES += ENUM_EXAMPLES @@ -634,13 +832,25 @@ def test_fixed_decimal_invalid_max_precision(self): def test_parse_invalid_symbol(self): """Disabling enumschema symbol validation should allow invalid symbols to pass.""" test_schema_string = json.dumps({"type": "enum", "name": "AVRO2174", "symbols": ["white space"]}) + with self.assertRaises(avro.errors.InvalidName, msg="When enum symbol validation is enabled, an invalid symbol should raise InvalidName."): avro.schema.parse(test_schema_string, validate_enum_symbols=True) + try: avro.schema.parse(test_schema_string, validate_enum_symbols=False) except avro.errors.InvalidName: # pragma: no coverage self.fail("When enum symbol validation is disabled, an invalid symbol should not raise InvalidName.") + def test_unsupported_fingerprint_algorithm(self): + s = avro.schema.parse('"int"') + self.assertRaises(avro.errors.UnknownFingerprintAlgorithmException, s.fingerprint, "foo") + + def test_less_popular_fingerprint_algorithm(self): + s = avro.schema.parse('"int"') + fingerprint = s.fingerprint("sha384") + hex_fingerprint = "".join(format(b, "02x") for b in fingerprint).zfill(16) + self.assertEqual(hex_fingerprint, "32ed5e4ac896570f044d1dab68f4c8ca9866ac06d22261f399316bf4799e16854750238085775107dfac905c82b2feaf") + class SchemaParseTestCase(unittest.TestCase): """Enable generating parse test cases over all the valid and invalid example schema.""" @@ -680,6 +890,31 @@ def parse_invalid(self): self.test_schema.parse() +class HashableTestCase(unittest.TestCase): + """Ensure that Schema are hashable. + + While hashability is implemented with parsing canonical form fingerprinting, + this test should be kept distinct to avoid coupling.""" + + def __init__(self, test_schema): + """Ignore the normal signature for unittest.TestCase because we are generating + many test cases from this one class. This is safe as long as the autoloader + ignores this class. The autoloader will ignore this class as long as it has + no methods starting with `test_`. + """ + super().__init__("parse_and_hash") + self.test_schema = test_schema + + def parse_and_hash(self): + """Ensure that every schema can be hashed.""" + try: + hash(self.test_schema.parse()) + except TypeError as e: + if "unhashable type" in str(e): + self.fail(f"{self.test_schema} is not hashable") + raise + + class RoundTripParseTestCase(unittest.TestCase): """Enable generating round-trip parse test cases over all the valid test schema.""" @@ -1181,6 +1416,39 @@ def test_large_record_interop(self): ) +class FingerprintTestCase(unittest.TestCase): + """ + Enable generating fingerprint test cases across algorithms. + + Fingerprint examples are in the form of tuples: + - Value in Position 0 is schema + - Value in Position 1 is an array of fingerprints: + - Position 0 is CRC-64-AVRO fingerprint + - Position 0 is MD5 fingerprint + - Position 0 is SHA256 fingerprint + """ + + def __init__(self, test_schema, fingerprints): + """Ignore the normal signature for unittest.TestCase because we are generating + many test cases from this one class. This is safe as long as the autoloader + ignores this class. The autoloader will ignore this class as long as it has + no methods starting with `test_`. + """ + super(FingerprintTestCase, self).__init__("validate_fingerprint") + self.test_schema = test_schema + self.fingerprints = fingerprints + + def _hex_fingerprint(self, fingerprint): + return "".join(format(b, "02x") for b in fingerprint).zfill(16) + + def validate_fingerprint(self): + """The string of a Schema should be parseable to the same Schema.""" + s = avro.schema.parse(self.test_schema) + self.assertEqual(self._hex_fingerprint(s.fingerprint()), self.fingerprints[0]) + self.assertEqual(self._hex_fingerprint(s.fingerprint("md5")), self.fingerprints[1]) + self.assertEqual(self._hex_fingerprint(s.fingerprint("sha256")), self.fingerprints[2]) + + def load_tests(loader, default_tests, pattern): """Generate test cases across many test schema.""" suite = unittest.TestSuite() @@ -1190,6 +1458,8 @@ def load_tests(loader, default_tests, pattern): suite.addTests(DocAttributesTestCase(ex) for ex in DOC_EXAMPLES) suite.addTests(OtherAttributesTestCase(ex) for ex in OTHER_PROP_EXAMPLES) suite.addTests(loader.loadTestsFromTestCase(CanonicalFormTestCase)) + suite.addTests(FingerprintTestCase(ex[0], ex[1]) for ex in FINGERPRINT_EXAMPLES) + suite.addTests(HashableTestCase(ex) for ex in VALID_EXAMPLES) return suite diff --git a/lang/py/avro/test/test_tether_task.py b/lang/py/avro/test/test_tether_task.py index 5a4e2b26dbe..35be22437dd 100644 --- a/lang/py/avro/test/test_tether_task.py +++ b/lang/py/avro/test/test_tether_task.py @@ -18,7 +18,6 @@ # limitations under the License. import io -import os import subprocess import sys import time diff --git a/lang/py/avro/test/test_tether_task_runner.py b/lang/py/avro/test/test_tether_task_runner.py index 7696161d3c9..8e52c985edd 100644 --- a/lang/py/avro/test/test_tether_task_runner.py +++ b/lang/py/avro/test/test_tether_task_runner.py @@ -19,7 +19,6 @@ import io import logging -import os import subprocess import sys import time @@ -47,7 +46,6 @@ def test1(self): pyfile = avro.test.mock_tether_parent.__file__ proc = subprocess.Popen([sys.executable, pyfile, "start_server", f"{parent_port}"]) - input_port = avro.tether.util.find_port() print(f"Mock server started process pid={proc.pid}") # Possible race condition? open tries to connect to the subprocess before the subprocess is fully started diff --git a/lang/py/avro/tether/__init__.py b/lang/py/avro/tether/__init__.py index 4875581f292..68df39ec4d4 100644 --- a/lang/py/avro/tether/__init__.py +++ b/lang/py/avro/tether/__init__.py @@ -27,3 +27,13 @@ ) from avro.tether.tether_task_runner import TaskRunner from avro.tether.util import find_port + +__all__ = ( + "HTTPRequestor", + "TaskRunner", + "TaskType", + "TetherTask", + "find_port", + "inputProtocol", + "outputProtocol", +) diff --git a/lang/py/avro/tether/tether_task.py b/lang/py/avro/tether/tether_task.py index c521fa56b4c..6caac6abe90 100644 --- a/lang/py/avro/tether/tether_task.py +++ b/lang/py/avro/tether/tether_task.py @@ -285,7 +285,7 @@ def configure(self, taskType, inSchemaText, outSchemaText): try: inSchema = avro.schema.parse(inSchemaText) - outSchema = avro.schema.parse(outSchemaText) + avro.schema.parse(outSchemaText) if taskType == TaskType.MAP: self.inReader = avro.io.DatumReader(writers_schema=inSchema, readers_schema=self.inschema) @@ -299,7 +299,7 @@ def configure(self, taskType, inSchemaText, outSchemaText): # determine which fields in the input record are they keys for the reducer self._red_fkeys = [f.name for f in self.midschema.fields if not (f.order == "ignore")] - except Exception as e: + except Exception: estr = traceback.format_exc() self.fail(estr) @@ -345,7 +345,7 @@ def input(self, data, count): self.reduceFlush(prev, self.outCollector) self.reduce(self.midRecord, self.outCollector) - except Exception as e: + except Exception: estr = traceback.format_exc() self.log.warning("failing: %s", estr) self.fail(estr) @@ -357,7 +357,7 @@ def complete(self): if (self.taskType == TaskType.REDUCE) and not (self.midRecord is None): try: self.reduceFlush(self.midRecord, self.outCollector) - except Exception as e: + except Exception: estr = traceback.format_exc() self.log.warning("failing: %s", estr) self.fail(estr) @@ -430,7 +430,7 @@ def fail(self, message): try: self.outputClient.request("fail", {"message": message}) - except Exception as e: + except Exception: self.log.exception("TetherTask.fail: an exception occured while trying to send the fail message to the output server.") self.close() @@ -441,7 +441,7 @@ def close(self): try: self.clienTransciever.close() - except Exception as e: + except Exception: # ignore exceptions pass diff --git a/lang/py/avro/tether/tether_task_runner.py b/lang/py/avro/tether/tether_task_runner.py index c1533b33353..410f6c00e4c 100644 --- a/lang/py/avro/tether/tether_task_runner.py +++ b/lang/py/avro/tether/tether_task_runner.py @@ -66,7 +66,7 @@ def invoke(self, message, request): self.log.info("TetherTaskRunner: Received partitions") try: self.task.partitions = request["partitions"] - except Exception as e: + except Exception: self.log.error("Exception occured while processing the partitions message: Message:\n%s", traceback.format_exc()) raise elif message.name == "input": diff --git a/lang/py/avro/utils.py b/lang/py/avro/utils.py index 76d8f6ec293..32e7f5aa36a 100644 --- a/lang/py/avro/utils.py +++ b/lang/py/avro/utils.py @@ -36,3 +36,5 @@ def _randbytes(n: int) -> bytes: randbytes = getattr(random, "randbytes", _randbytes) + +__all__ = ("randbytes", "TypedDict") diff --git a/lang/py/build.sh b/lang/py/build.sh index 7412889ec5a..d022d67a630 100755 --- a/lang/py/build.sh +++ b/lang/py/build.sh @@ -18,7 +18,7 @@ set -e usage() { - echo "Usage: $0 {clean|dist|interop-data-generate|interop-data-test|lint|test}" + echo "Usage: $0 {clean|dist|doc|interop-data-generate|interop-data-test|lint|test}" exit 1 } @@ -27,7 +27,6 @@ clean() { '*.avsc' \ '*.egg-info' \ '*.py[co]' \ - 'VERSION.txt' \ '__pycache__' \ '.tox' \ 'avro/test/interop' \ @@ -51,6 +50,15 @@ dist() ( "$virtualenv/bin/python3" -m build --outdir "$destination" ) +doc() { + local doc_dir + local version=$(cat ../../share/VERSION.txt) + doc_dir="../../build/avro-doc-$version/api/py" + python3 -m tox -e docs + mkdir -p "$doc_dir" + cp -a docs/build/* "$doc_dir" +} + interop-data-generate() { ./setup.py generate_interop_data cp -r avro/test/interop/data ../../build/interop @@ -76,6 +84,7 @@ main() { case "$target" in clean) clean;; dist) dist;; + doc) doc;; interop-data-generate) interop-data-generate;; interop-data-test) interop-data-test;; lint) lint;; diff --git a/lang/py/docs/Makefile b/lang/py/docs/Makefile new file mode 100644 index 00000000000..f0710c99b4f --- /dev/null +++ b/lang/py/docs/Makefile @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/lang/py/docs/make.bat b/lang/py/docs/make.bat new file mode 100644 index 00000000000..93900e3e37b --- /dev/null +++ b/lang/py/docs/make.bat @@ -0,0 +1,50 @@ +REM Licensed to the Apache Software Foundation (ASF) under one or more +REM contributor license agreements. See the NOTICE file distributed with +REM this work for additional information regarding copyright ownership. +REM The ASF licenses this file to You under the Apache License, Version 2.0 +REM (the "License"); you may not use this file except in compliance with +REM the License. You may obtain a copy of the License at +REM +REM https://www.apache.org/licenses/LICENSE-2.0 +REM +REM Unless required by applicable law or agreed to in writing, software +REM distributed under the License is distributed on an "AS IS" BASIS, +REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +REM See the License for the specific language governing permissions and +REM limitations under the License. + +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/lang/py/docs/source/_static/.gitignore b/lang/py/docs/source/_static/.gitignore new file mode 100644 index 00000000000..e69de29bb2d diff --git a/lang/py/docs/source/automodule.rst b/lang/py/docs/source/automodule.rst new file mode 100644 index 00000000000..bfd53e79252 --- /dev/null +++ b/lang/py/docs/source/automodule.rst @@ -0,0 +1,54 @@ +.. Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +.. automodule:: avro + :members: +.. automodule:: avro.compatibility + :members: +.. automodule:: avro.datafile + :members: +.. automodule:: avro.ipc + :members: +.. automodule:: avro.protocol + :members: +.. automodule:: avro.codecs + :members: +.. automodule:: avro.constants + :members: +.. automodule:: avro.io + :members: +.. automodule:: avro.tether + :members: +.. automodule:: avro.tether.tether_task_runner + :members: +.. automodule:: avro.tether.util + :members: +.. automodule:: avro.tether.tether_task + :members: +.. automodule:: avro.utils + :members: +.. automodule:: avro.errors + :members: +.. automodule:: avro.name + :members: +.. automodule:: avro.tool + :members: +.. automodule:: avro.timezones + :members: +.. automodule:: avro.__main__ + :members: +.. automodule:: avro.schema + :members: diff --git a/lang/py/docs/source/conf.py b/lang/py/docs/source/conf.py new file mode 100644 index 00000000000..85ba2827ad9 --- /dev/null +++ b/lang/py/docs/source/conf.py @@ -0,0 +1,47 @@ +## +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information +import sys + +sys.path.append("..") + +project = "Apache Avro" +copyright = "2023, Apache" +author = "Apache" +release = "1.12.0" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = ["sphinx.ext.autodoc"] + +templates_path = ["_templates"] +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "alabaster" +html_static_path = ["_static"] diff --git a/lang/py/docs/source/index.rst b/lang/py/docs/source/index.rst new file mode 100644 index 00000000000..ec66916da4e --- /dev/null +++ b/lang/py/docs/source/index.rst @@ -0,0 +1,31 @@ +.. Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + + intro + automodule + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/lang/py/docs/source/intro.rst b/lang/py/docs/source/intro.rst new file mode 100644 index 00000000000..ca480a15bb7 --- /dev/null +++ b/lang/py/docs/source/intro.rst @@ -0,0 +1,29 @@ +.. Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +Welcome to Avro's Python documentation! +======================================= + +Avro is a data serialization system. See `avro.apache.org `_ for background information. + +Avro Python is a Python library that implements parts of the `Avro Specification `_. + +The library includes the following functionality: + +* Assembling schemas programmatically. +* A schema parser, which can parse Avro schema (written in JSON) into a Schema object. +* Binary encoders and decoders to encode data into Avro format and decode it back using primitive functions. +* Streams for storing and reading data, which Encoders and Decoders use. +* Support for Avro DataFile. diff --git a/lang/py/pyproject.toml b/lang/py/pyproject.toml index 675e976379a..bc4e5d4bce8 100644 --- a/lang/py/pyproject.toml +++ b/lang/py/pyproject.toml @@ -25,3 +25,12 @@ line-length = 150 [tool.isort] profile = 'black' + +[tool.autoflake] +expand-star-imports = true +recursive = true +# Put a name in __all_ to explicitly export something a module imports. +# This is clearer and will keep autoflake from trying to remove it. +remove-all-unused-imports = true +remove-duplicate-keys = true +remove-unused-variables = true diff --git a/lang/py/setup.cfg b/lang/py/setup.cfg index ad50a763da2..5b2a6c78080 100644 --- a/lang/py/setup.cfg +++ b/lang/py/setup.cfg @@ -33,7 +33,6 @@ license_files = avro/LICENSE license = Apache License 2.0 classifiers = License :: OSI Approved :: Apache Software License - Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 @@ -57,7 +56,7 @@ include_package_data = true install_requires = typing-extensions;python_version<"3.8" zip_safe = true -python_requires = >=3.6 +python_requires = >=3.7 [options.entry_points] console_scripts = @@ -81,3 +80,8 @@ zstandard = zstandard [aliases] dist = sdist --dist-dir ../../dist/py + +[flake8] +# Flake8 doesn't support configuration in pyproject.toml +# Otherwise, configure it to be compatible with black in that file. +max-line-length = 150 diff --git a/lang/py/setup.py b/lang/py/setup.py index dee0e7f2ee9..d68073a7b6d 100755 --- a/lang/py/setup.py +++ b/lang/py/setup.py @@ -19,9 +19,7 @@ import distutils.errors -import glob import os -import subprocess import setuptools # type: ignore diff --git a/lang/py/tox.ini b/lang/py/tox.ini index 07e4d498d95..38c5e6d8350 100644 --- a/lang/py/tox.ini +++ b/lang/py/tox.ini @@ -18,15 +18,18 @@ envlist = build # Build the wheel # Fastest checks first + docs lint typechecks - py36 py37 py38 py39 py310 - pypy3.6 + py311 pypy3.7 + pypy3.8 + pypy3.9 + pypy3.10 [coverage:run] @@ -66,12 +69,22 @@ commands_pre = commands = commands_post = +[testenv:docs] +deps = + sphinx +commands_pre = +commands = + sphinx-build -b html docs/source/ docs/build/html +commands_post = + [testenv:lint] deps = + autoflake black isort commands_pre = commands = + autoflake --check-diff . black --check . isort --check-only . commands_post = diff --git a/lang/ruby/lib/avro/io.rb b/lang/ruby/lib/avro/io.rb index e6e3b326d2d..0d2f3135850 100644 --- a/lang/ruby/lib/avro/io.rb +++ b/lang/ruby/lib/avro/io.rb @@ -390,31 +390,31 @@ def read_record(writers_schema, readers_schema, decoder) def read_default_value(field_schema, default_value) # Basically a JSON Decoder? - case field_schema.type_sym + datum = case field_schema.type_sym when :null - return nil + nil when :int, :long - return Integer(default_value) + Integer(default_value) when :float, :double - return Float(default_value) + Float(default_value) when :boolean, :enum, :fixed, :string, :bytes - return default_value + default_value when :array read_array = [] default_value.each do |json_val| item_val = read_default_value(field_schema.items, json_val) read_array << item_val end - return read_array + read_array when :map read_map = {} default_value.each do |key, json_val| map_val = read_default_value(field_schema.values, json_val) read_map[key] = map_val end - return read_map + read_map when :union - return read_default_value(field_schema.schemas[0], default_value) + read_default_value(field_schema.schemas[0], default_value) when :record, :error read_record = {} field_schema.fields.each do |field| @@ -423,11 +423,13 @@ def read_default_value(field_schema, default_value) field_val = read_default_value(field.type, json_val) read_record[field.name] = field_val end - return read_record + read_record else fail_msg = "Unknown type: #{field_schema.type}" raise AvroError, fail_msg end + + field_schema.type_adapter.decode(datum) end def skip_data(writers_schema, decoder) diff --git a/lang/ruby/lib/avro/schema.rb b/lang/ruby/lib/avro/schema.rb index 4485d5e9ec4..37e1d21031c 100644 --- a/lang/ruby/lib/avro/schema.rb +++ b/lang/ruby/lib/avro/schema.rb @@ -111,7 +111,7 @@ def self.real_parse(json_obj, names=nil, default_namespace=nil) elsif PRIMITIVE_TYPES.include? json_obj return PrimitiveSchema.new(json_obj) else - raise UnknownSchemaError.new(json_obj) + raise UnknownSchemaError.new(json_obj, default_namespace) end end @@ -601,8 +601,16 @@ def validate_default! else type end - - Avro::SchemaValidator.validate!(type_for_default, default) + case type_for_default.logical_type + when DECIMAL_LOGICAL_TYPE + # https://avro.apache.org/docs/1.11.1/specification/#schema-record + # Default values for bytes and fixed fields are JSON strings, where Unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255 + options = SchemaValidator::DEFAULT_VALIDATION_OPTIONS.dup + options[:encoded] = true + Avro::SchemaValidator.validate!(type_for_default, default, options) + else + Avro::SchemaValidator.validate!(type_for_default, default) + end rescue Avro::SchemaValidator::ValidationError => e raise Avro::SchemaParseError, "Error validating default for #{name}: #{e.message}" end @@ -613,9 +621,11 @@ class SchemaParseError < AvroError; end class UnknownSchemaError < SchemaParseError attr_reader :type_name + attr_reader :default_namespace - def initialize(type) + def initialize(type, default_namespace) @type_name = type + @default_namespace = default_namespace super("#{type.inspect} is not a schema we know about.") end end diff --git a/lang/ruby/test/test_logical_types.rb b/lang/ruby/test/test_logical_types.rb index f42b5fe2e6a..3aeb3498397 100644 --- a/lang/ruby/test/test_logical_types.rb +++ b/lang/ruby/test/test_logical_types.rb @@ -124,6 +124,113 @@ def test_bytes_decimal end end + def test_logical_type_default_value + sales_schema = Avro::Schema.parse('{ + "type": "record", + "name": "Order", + "fields" : [ + { + "name": "sales", + "type": [ + { + "type": "bytes", + "logicalType": "decimal", + "precision": 4, + "scale": 2 + }, + "null" + ], + "default": "\u0000" + } + ] + }') + + sales_tax_schema = Avro::Schema.parse('{ + "type": "record", + "name": "Order", + "fields" : [ + { + "name": "sales", + "type": [ + { + "type": "bytes", + "logicalType": "decimal", + "precision": 4, + "scale": 2 + }, + "null" + ], + "default": "\u0000" + }, + { + "name": "tax", + "type": [ + { + "type": "bytes", + "logicalType": "decimal", + "precision": 4, + "scale": 2 + }, + "null" + ], + "default": "\u0000" + }, + { + "name": "invoice_date", + "type": [ + { + "type": "int", + "logicalType": "date" + }, + "null" + ], + "default": 0 + }, + { + "name": "invoice_time", + "type": [ + { + "type": "int", + "logicalType": "time-millis" + }, + "null" + ], + "default": 0 + }, + { + "name": "created_at", + "type": [ + { + "type": "long", + "logicalType": "timestamp-millis" + }, + "null" + ], + "default": 0 + } + ] + }') + + sales_record = {"sales" => BigDecimal("12.34")} + sales_tax_record = { + "sales" => BigDecimal("12.34"), + "tax" => BigDecimal("0.000"), + "invoice_date" => Time.at(0).to_date, + # time-millis is not supported + "invoice_time" => 0, + "created_at" => Time.at(0).utc, + } + encoded = encode(sales_record, sales_schema) + assert_equal sales_record, decode(encoded, sales_schema) + # decode with different schema applies default + assert_equal sales_tax_record, decode(encoded, sales_tax_schema, writer_schema: sales_schema) + + # decode with same schema does not apply default, since it is nullable during encode + encoded = encode(sales_record, sales_tax_schema) + tax_nil_record = {"sales" => BigDecimal("12.34"), "tax" => nil, "invoice_date" => nil, "invoice_time" => nil, "created_at" => nil} + assert_equal tax_nil_record, decode(encoded, sales_tax_schema) + end + def test_bytes_decimal_range_errors schema = Avro::Schema.parse <<-SCHEMA { "type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2 } @@ -243,11 +350,12 @@ def encode(datum, schema) buffer.string end - def decode(encoded, schema) + def decode(encoded, schema, writer_schema: nil) + writer_schema ||= schema buffer = StringIO.new(encoded) decoder = Avro::IO::BinaryDecoder.new(buffer) - datum_reader = Avro::IO::DatumReader.new(schema, schema) + datum_reader = Avro::IO::DatumReader.new(writer_schema, schema) datum_reader.read(decoder) end diff --git a/lang/ruby/test/test_schema.rb b/lang/ruby/test/test_schema.rb index 802653010e9..182f7dd8bca 100644 --- a/lang/ruby/test/test_schema.rb +++ b/lang/ruby/test/test_schema.rb @@ -176,6 +176,8 @@ def test_unknown_named_type end assert_equal '"MissingType" is not a schema we know about.', error.message + assert_equal "MissingType", error.type_name + assert_equal "my.name.space", error.default_namespace end def test_invalid_name @@ -612,6 +614,37 @@ def test_fixed_decimal_to_without_precision_scale assert_equal schema_hash, schema.to_avro end + def test_bytes_decimal_in_record + assert_nothing_raised do + hash_to_schema( + type: 'record', + name: 'account', + fields: [ + { name: 'balance', type: 'bytes', logicalType: 'decimal', precision: 9, scale: 2 } + ] + ) + end + end + + def test_bytes_decimal_with_default_in_record + assert_nothing_raised do + hash_to_schema( + type: 'record', + name: 'account', + fields: [ + { + name: 'balance', + type: [ + { type: 'bytes', logicalType: 'decimal', precision: 9, scale: 2 }, + 'null' + ], + default: '\u00ff' + } + ] + ) + end + end + def test_bytes_decimal_to_include_precision_scale schema = Avro::Schema.parse <<-SCHEMA { diff --git a/lang/rust/.cargo-rdme.toml b/lang/rust/.cargo-rdme.toml new file mode 100644 index 00000000000..3f27313be86 --- /dev/null +++ b/lang/rust/.cargo-rdme.toml @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +readme-path = "avro/README.md" +workspace-project = "apache-avro" \ No newline at end of file diff --git a/lang/rust/Cargo.lock b/lang/rust/Cargo.lock index 054005b47bd..0fe0f0d8f28 100644 --- a/lang/rust/Cargo.lock +++ b/lang/rust/Cargo.lock @@ -23,11 +23,22 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" +[[package]] +name = "ahash" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", +] + [[package]] name = "aho-corasick" -version = "0.7.20" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +checksum = "67fc08ce920c31afb70f013dcce1bfc3a3195de6a228474e45e1f145b36f8d04" dependencies = [ "memchr", ] @@ -38,20 +49,25 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" +[[package]] +name = "anstyle" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" + [[package]] name = "anyhow" -version = "1.0.69" +version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "224afbd727c3d6e4b90103ece64b8d1b67fbb1973b1046c2281eed3f3803f800" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" [[package]] name = "apache-avro" -version = "0.15.0" +version = "0.16.0" dependencies = [ "anyhow", "apache-avro-derive", "apache-avro-test-helper", - "byteorder", "bzip2", "crc32fast", "criterion", @@ -65,9 +81,10 @@ dependencies = [ "pretty_assertions", "quad-rand", "rand", - "regex", + "regex-lite", "serde", "serde_json", + "serial_test", "sha2", "snap", "strum", @@ -76,13 +93,12 @@ dependencies = [ "typed-builder", "uuid", "xz2", - "zerocopy", "zstd", ] [[package]] name = "apache-avro-derive" -version = "0.15.0" +version = "0.16.0" dependencies = [ "apache-avro", "darling", @@ -96,8 +112,9 @@ dependencies = [ [[package]] name = "apache-avro-test-helper" -version = "0.15.0" +version = "0.16.0" dependencies = [ + "anyhow", "color-backtrace", "ctor", "env_logger", @@ -112,7 +129,7 @@ version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ - "hermit-abi", + "hermit-abi 0.1.19", "libc", "winapi", ] @@ -144,6 +161,12 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitflags" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" + [[package]] name = "block-buffer" version = "0.10.2" @@ -236,25 +259,29 @@ dependencies = [ [[package]] name = "clap" -version = "3.2.23" +version = "4.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" +checksum = "1640e5cc7fb47dbb8338fd471b105e7ed6c3cb2aeb00c2e067127ffd3764a05d" dependencies = [ - "bitflags", - "clap_lex", - "indexmap", - "textwrap", + "clap_builder", ] [[package]] -name = "clap_lex" -version = "0.2.4" +name = "clap_builder" +version = "4.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" +checksum = "98c59138d527eeaf9b53f35a77fcc1fad9d883116070c63d5de1c7dc7b00c72b" dependencies = [ - "os_str_bytes", + "anstyle", + "clap_lex", ] +[[package]] +name = "clap_lex" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" + [[package]] name = "color-backtrace" version = "0.5.1" @@ -276,6 +303,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "core2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" +dependencies = [ + "memchr", +] + [[package]] name = "cpufeatures" version = "0.2.2" @@ -296,19 +332,19 @@ dependencies = [ [[package]] name = "criterion" -version = "0.4.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7c76e09c1aae2bc52b3d2f29e13c6572553b30c4aa1b8a49fd70de6412654cb" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" dependencies = [ "anes", - "atty", "cast", "ciborium", "clap", "criterion-plot", + "is-terminal", "itertools", - "lazy_static", "num-traits", + "once_cell", "oorandom", "regex", "serde", @@ -340,9 +376,9 @@ dependencies = [ [[package]] name = "ctor" -version = "0.1.26" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" +checksum = "1f34ba9a9bcb8645379e9de8cb3ecfcf4d1c85ba66d90deb3259206fa5aa193b" dependencies = [ "quote", "syn", @@ -350,9 +386,9 @@ dependencies = [ [[package]] name = "darling" -version = "0.14.4" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" +checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e" dependencies = [ "darling_core", "darling_macro", @@ -360,9 +396,9 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.14.4" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" +checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621" dependencies = [ "fnv", "ident_case", @@ -373,15 +409,34 @@ dependencies = [ [[package]] name = "darling_macro" -version = "0.14.4" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" +checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" dependencies = [ "darling_core", "quote", "syn", ] +[[package]] +name = "dary_heap" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7762d17f1241643615821a8455a0b2c3e803784b058693d990b11f2dce25a0ca" + +[[package]] +name = "dashmap" +version = "5.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd72493923899c6f10c641bdbdeddc7183d6396641d99c1a0d1597f37f92e28" +dependencies = [ + "cfg-if", + "hashbrown 0.14.0", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "diff" version = "0.1.13" @@ -390,9 +445,9 @@ checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8" [[package]] name = "digest" -version = "0.10.6" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8168378f4e5023e7218c89c891c0fd8ecdb5e5e4f18cb78f38cf245dd021e76f" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ "block-buffer", "crypto-common", @@ -413,12 +468,110 @@ dependencies = [ "log", ] +[[package]] +name = "errno" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" +dependencies = [ + "errno-dragonfly", + "libc", + "windows-sys", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "futures" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" + +[[package]] +name = "futures-executor" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" + +[[package]] +name = "futures-sink" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" + +[[package]] +name = "futures-task" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" + +[[package]] +name = "futures-util" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + [[package]] name = "generic-array" version = "0.14.6" @@ -454,15 +607,24 @@ checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" [[package]] name = "hashbrown" -version = "0.12.3" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" [[package]] name = "heck" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hello-wasm" @@ -484,11 +646,17 @@ dependencies = [ "libc", ] +[[package]] +name = "hermit-abi" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" + [[package]] name = "hex-literal" -version = "0.3.4" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ebdb29d2ea9ed0083cd8cece49bbd968021bd99b0849edb4a9a7ee0fdf6a4e0" +checksum = "6fe2267d4ed49bc07b63801559be28c718ea06c4738b7a03c94df7386d2cde46" [[package]] name = "ident_case" @@ -497,13 +665,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" [[package]] -name = "indexmap" -version = "1.9.2" +name = "is-terminal" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" +checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ - "autocfg", - "hashbrown", + "hermit-abi 0.3.2", + "rustix", + "windows-sys", ] [[package]] @@ -532,9 +701,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.61" +version = "0.3.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "445dde2150c55e483f3d8416706b97ec8e8237c307e5b7b4b8dd15e6af2a0730" +checksum = "c5f195fe497f702db0f318b07fdd68edb16955aed830df8363d837542f8f935a" dependencies = [ "wasm-bindgen", ] @@ -547,27 +716,31 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.127" +version = "0.2.147" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "505e71a4706fa491e9b1b55f51b95d4037d0821ee40131190475f692b35b009b" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] name = "libflate" -version = "1.2.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05605ab2bce11bcfc0e9c635ff29ef8b2ea83f29be257ee7d730cac3ee373093" +checksum = "9f7d5654ae1795afc7ff76f4365c2c8791b0feb18e8996a96adad8ffd7c3b2bf" dependencies = [ "adler32", + "core2", "crc32fast", + "dary_heap", "libflate_lz77", ] [[package]] name = "libflate_lz77" -version = "1.1.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39a734c0493409afcd49deee13c006a04e3586b9761a03543c6272c9c51f2f5a" +checksum = "be5f52fb8c451576ec6b79d3f4deb327398bc05bbdbd99021a6e77a4c855d524" dependencies = [ + "core2", + "hashbrown 0.13.2", "rle-decode-fast", ] @@ -578,14 +751,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" [[package]] -name = "log" -version = "0.4.17" +name = "linux-raw-sys" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +checksum = "09fc20d2ca12cb9f044c93e3bd6d32d523e6e2ec3db4f7b2939cd99026ecd3f0" + +[[package]] +name = "lock_api" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" dependencies = [ - "cfg-if", + "autocfg", + "scopeguard", ] +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + [[package]] name = "lzma-sys" version = "0.1.19" @@ -599,10 +785,11 @@ dependencies = [ [[package]] name = "md-5" -version = "0.10.5" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6365506850d44bff6e2fbcb5176cf63650e48bd45ef2fe2665ae1570e0f4b9ca" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" dependencies = [ + "cfg-if", "digest", ] @@ -623,9 +810,9 @@ dependencies = [ [[package]] name = "num-bigint" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" dependencies = [ "autocfg", "num-integer", @@ -644,9 +831,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +checksum = "f30b0abd723be7e2ffca1272140fac1a2f084c77ec3e123c192b66af1ee9e6c2" dependencies = [ "autocfg", "libm", @@ -663,9 +850,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.13.0" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" [[package]] name = "oorandom" @@ -674,20 +861,40 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" [[package]] -name = "os_str_bytes" -version = "6.4.1" +name = "parking_lot" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] [[package]] -name = "output_vt100" -version = "0.1.3" +name = "parking_lot_core" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" +checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" dependencies = [ - "winapi", + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", ] +[[package]] +name = "pin-project-lite" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "pkg-config" version = "0.3.25" @@ -702,40 +909,37 @@ checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" [[package]] name = "pretty_assertions" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a25e9bcb20aa780fd0bb16b72403a9064d6b3f22f026946029acb941a50af755" +checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" dependencies = [ - "ctor", "diff", - "output_vt100", "yansi", ] [[package]] name = "proc-macro2" -version = "1.0.51" +version = "1.0.67" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" +checksum = "3d433d9f1a3e8c1263d9456598b16fec66f4acc9a74dacffd35c7bb09b3a1328" dependencies = [ "unicode-ident", ] [[package]] name = "proptest" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29f1b898011ce9595050a68e60f90bad083ff2987a695a42357134c8381fba70" +checksum = "4e35c06b98bf36aba164cc17cb25f7e232f5c4aeea73baa14b8a9f0d92dbfa65" dependencies = [ - "bitflags", + "bitflags 1.3.2", "byteorder", "lazy_static", "num-traits", - "quick-error", "rand", "rand_chacha", "rand_xorshift", - "regex-syntax", + "regex-syntax 0.6.29", "unarray", ] @@ -745,17 +949,11 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "658fa1faf7a4cc5f057c9ee5ef560f717ad9d8dc66d975267f709624d6e1ab88" -[[package]] -name = "quick-error" -version = "2.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" - [[package]] name = "quote" -version = "1.0.23" +version = "1.0.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" dependencies = [ "proc-macro2", ] @@ -799,6 +997,15 @@ dependencies = [ "rand_core", ] +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "ref_thread_local" version = "0.1.1" @@ -807,20 +1014,44 @@ checksum = "a0d51660a68078997855ba5602f73ab3a5031bd7ad480a9d4c90fbbf04e1fff0" [[package]] name = "regex" -version = "1.7.1" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" +checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-automata", + "regex-syntax 0.7.4", ] +[[package]] +name = "regex-automata" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fed1ceff11a1dddaee50c9dc8e4938bd106e9d89ae372f192311e7da498e3b69" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax 0.7.4", +] + +[[package]] +name = "regex-lite" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f96ede7f386ba6e910092e7ccdc04176cface62abebea07ed6b46d870ed95ca2" + +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "regex-syntax" -version = "0.6.27" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" +checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2" [[package]] name = "rle-decode-fast" @@ -834,6 +1065,19 @@ version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" +[[package]] +name = "rustix" +version = "0.38.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac5ffa1efe7548069688cd7028f32591853cd7b5b756d41bcffd2353e4fc75b4" +dependencies = [ + "bitflags 2.3.3", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + [[package]] name = "rustversion" version = "1.0.9" @@ -861,20 +1105,26 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2" +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + [[package]] name = "serde" -version = "1.0.154" +version = "1.0.188" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cdd151213925e7f1ab45a9bbfb129316bd00799784b174b7cc7bcd16961c49e" +checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.154" +version = "1.0.188" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fc80d722935453bcafdc2c9a73cd6fac4dc1938f0346035d84bf99fa9e33217" +checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" dependencies = [ "proc-macro2", "quote", @@ -883,26 +1133,66 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.94" +version = "1.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c533a59c9d8a93a09c6ab31f0fd5e5f4dd1b8fc9434804029839884765d04ea" +checksum = "6b420ce6e3d8bd882e9b243c6eed35dbc9a6110c9769e74b584e0d68d1f20c65" dependencies = [ "itoa", "ryu", "serde", ] +[[package]] +name = "serial_test" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e56dd856803e253c8f298af3f4d7eb0ae5e23a737252cd90bb4f3b435033b2d" +dependencies = [ + "dashmap", + "futures", + "lazy_static", + "log", + "parking_lot", + "serial_test_derive", +] + +[[package]] +name = "serial_test_derive" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91d129178576168c589c9ec973feedf7d3126c01ac2bf08795109aa35b69fb8f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "sha2" -version = "0.10.6" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" +checksum = "479fb9d862239e610720565ca91403019f2f00410f1864c5aa7479b950a76ed8" dependencies = [ "cfg-if", "cpufeatures", "digest", ] +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" + [[package]] name = "snap" version = "1.1.0" @@ -911,15 +1201,15 @@ checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" [[package]] name = "strum" -version = "0.24.1" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" [[package]] name = "strum_macros" -version = "0.24.3" +version = "0.25.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e385be0d24f186b4ce2f9982191e7101bb737312ad61c1f2f984f34bcf85d59" +checksum = "ad8d03b598d3d0fff69bf533ee3ef19b8eeb342729596df84bcc7e1f96ec4059" dependencies = [ "heck", "proc-macro2", @@ -930,27 +1220,15 @@ dependencies = [ [[package]] name = "syn" -version = "1.0.109" +version = "2.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +checksum = "7303ef2c05cd654186cb250d29049a24840ca25d2747c25c0381c8d9e2f582e8" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] -[[package]] -name = "synstructure" -version = "0.12.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "unicode-xid", -] - [[package]] name = "termcolor" version = "1.1.3" @@ -960,26 +1238,20 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "textwrap" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" - [[package]] name = "thiserror" -version = "1.0.39" +version = "1.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5ab016db510546d856297882807df8da66a16fb8c4101cb8b30054b0d5b2d9c" +checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.39" +version = "1.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5420d42e90af0c38c3290abcca25b9b3bdf379fc9f55c528f53a269d9c9a267e" +checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" dependencies = [ "proc-macro2", "quote", @@ -998,9 +1270,18 @@ dependencies = [ [[package]] name = "typed-builder" -version = "0.14.0" +version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64cba322cb9b7bc6ca048de49e83918223f35e7a86311267013afff257004870" +checksum = "34085c17941e36627a879208083e25d357243812c30e7d7387c3b954f30ade16" +dependencies = [ + "typed-builder-macro", +] + +[[package]] +name = "typed-builder-macro" +version = "0.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", @@ -1025,17 +1306,11 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf" -[[package]] -name = "unicode-xid" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "957e51f3646910546462e67d5f7599b9e4fb8acdd304b087a6494730f9eebf04" - [[package]] name = "uuid" -version = "1.3.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1674845326ee10d37ca60470760d4288a6f80f304007d92e5c53bab78c9cfd79" +checksum = "79daa5ed5740825c40b389c5e50312b9c86df53fccd33f281df655642b43869d" dependencies = [ "serde", ] @@ -1065,9 +1340,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31f8dcbc21f30d9b8f2ea926ecb58f6b91192c17e9d33594b3df58b2007ca53b" +checksum = "7706a72ab36d8cb1f80ffbf0e071533974a60d0a308d01a5d0375bf60499a342" dependencies = [ "cfg-if", "wasm-bindgen-macro", @@ -1075,9 +1350,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95ce90fd5bcc06af55a641a86428ee4229e44e07033963a2290a8e241607ccb9" +checksum = "5ef2b6d3c510e9625e5fe6f509ab07d66a760f0885d858736483c32ed7809abd" dependencies = [ "bumpalo", "log", @@ -1090,9 +1365,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.34" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454" +checksum = "c02dbc21516f9f1f04f187958890d7e6026df8d16540b7ad9492bc34a67cea03" dependencies = [ "cfg-if", "js-sys", @@ -1102,9 +1377,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c21f77c0bedc37fd5dc21f897894a5ca01e7bb159884559461862ae90c0b4c5" +checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1112,9 +1387,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" +checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", @@ -1125,15 +1400,15 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.84" +version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0046fef7e28c3804e5e38bfa31ea2a0f73905319b677e57ebe37e49358989b5d" +checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" [[package]] name = "wasm-bindgen-test" -version = "0.3.34" +version = "0.3.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db36fc0f9fb209e88fb3642590ae0205bb5a56216dabd963ba15879fe53a30b" +checksum = "6e6e302a7ea94f83a6d09e78e7dc7d9ca7b186bc2829c24a22d0753efd680671" dependencies = [ "console_error_panic_hook", "js-sys", @@ -1145,9 +1420,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-test-macro" -version = "0.3.34" +version = "0.3.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0734759ae6b3b1717d661fe4f016efcfb9828f5edb4520c18eaee05af3b43be9" +checksum = "ecb993dd8c836930ed130e020e77d9b2e65dd0fbab1b67c790b0f5d80b11a575" dependencies = [ "proc-macro2", "quote", @@ -1195,46 +1470,91 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] -name = "xz2" -version = "0.1.7" +name = "windows-sys" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ - "lzma-sys", + "windows-targets", ] [[package]] -name = "yansi" -version = "0.5.1" +name = "windows-targets" +version = "0.48.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" +checksum = "05d4b17490f70499f20b9e791dcf6a299785ce8af4d709018206dc5b4953e95f" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] [[package]] -name = "zerocopy" -version = "0.6.1" +name = "windows_aarch64_gnullvm" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "332f188cc1bcf1fe1064b8c58d150f497e697f49774aa846f2dc949d9a25f236" -dependencies = [ - "byteorder", - "zerocopy-derive", -] +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" [[package]] -name = "zerocopy-derive" -version = "0.3.1" +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0fbc82b82efe24da867ee52e015e58178684bd9dd64c34e66bdf21da2582a9f" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" + +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" dependencies = [ - "proc-macro2", - "syn", - "synstructure", + "lzma-sys", ] +[[package]] +name = "yansi" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" + [[package]] name = "zstd" -version = "0.12.3+zstd.1.5.2" +version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" +checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" dependencies = [ "zstd-safe", ] diff --git a/lang/rust/Cargo.toml b/lang/rust/Cargo.toml index 8a6dfaa0328..ba89acf8806 100644 --- a/lang/rust/Cargo.toml +++ b/lang/rust/Cargo.toml @@ -25,3 +25,21 @@ members = [ exclude = [ "fuzz" ] + +resolver = "2" + +[workspace.package] +version = "0.16.0" +authors = ["Apache Avro team "] +license = "Apache-2.0" +readme = "README.md" +repository = "https://github.com/apache/avro" +edition = "2021" +rust-version = "1.65.0" +keywords = ["avro", "data", "serialization"] +categories = ["encoding"] +documentation = "https://docs.rs/apache-avro" + +[profile.release.package.hello-wasm] +# Tell `rustc` to optimize for small code size. +opt-level = "s" diff --git a/lang/rust/Makefile b/lang/rust/Makefile index c948b851101..4a903c1c6d7 100644 --- a/lang/rust/Makefile +++ b/lang/rust/Makefile @@ -85,7 +85,7 @@ doc-local: .PHONY: readme readme: - cargo readme > README.md + cargo rdme # BUILDING diff --git a/lang/rust/avro/Cargo.toml b/lang/rust/avro/Cargo.toml index d83bd45f080..9c441457159 100644 --- a/lang/rust/avro/Cargo.toml +++ b/lang/rust/avro/Cargo.toml @@ -17,17 +17,17 @@ [package] name = "apache-avro" -version = "0.15.0" -authors = ["Apache Avro team "] description = "A library for working with Apache Avro in Rust" -license = "Apache-2.0" -readme = "README.md" -repository = "https://github.com/apache/avro" -edition = "2021" -rust-version = "1.60.0" -keywords = ["avro", "data", "serialization"] -categories = ["encoding"] -documentation = "https://docs.rs/apache-avro" +version.workspace = true +authors.workspace = true +license.workspace = true +readme.workspace = true +repository.workspace = true +edition.workspace = true +rust-version.workspace = true +keywords.workspace = true +categories.workspace = true +documentation.workspace = true [features] bzip = ["bzip2"] @@ -54,27 +54,25 @@ harness = false name = "single" [dependencies] -apache-avro-derive = { default-features = false, version = "0.15.0", path = "../avro_derive", optional = true } -byteorder = { default-features = false, version = "1.4.3" } +apache-avro-derive = { default-features = false, version = "0.16.0", path = "../avro_derive", optional = true } bzip2 = { default-features = false, version = "0.4.4", optional = true } crc32fast = { default-features = false, version = "1.3.2", optional = true } -digest = { default-features = false, version = "0.10.6", features = ["core-api"] } +digest = { default-features = false, version = "0.10.7", features = ["core-api"] } lazy_static = { default-features = false, version = "1.4.0" } -libflate = { default-features = false, version = "1.2.0" } -log = { default-features = false, version = "0.4.17" } -num-bigint = { default-features = false, version = "0.4.3" } -regex = { default-features = false, version = "1.7.1", features = ["std", "perf"] } -serde = { default-features = false, version = "1.0.154", features = ["derive"] } -serde_json = { default-features = false, version = "1.0.94", features = ["std"] } +libflate = { default-features = false, version = "2.0.0", features = ["std"] } +log = { default-features = false, version = "0.4.20" } +num-bigint = { default-features = false, version = "0.4.4" } +regex-lite = { default-features = false, version = "0.1.0", features = ["std", "string"] } +serde = { default-features = false, version = "1.0.188", features = ["derive"] } +serde_json = { default-features = false, version = "1.0.107", features = ["std"] } snap = { default-features = false, version = "1.1.0", optional = true } -strum = { default-features = false, version = "0.24.1" } -strum_macros = { default-features = false, version = "0.24.3" } -thiserror = { default-features = false, version = "1.0.39" } -typed-builder = { default-features = false, version = "0.14.0" } -uuid = { default-features = false, version = "1.3.0", features = ["serde", "std"] } +strum = { default-features = false, version = "0.25.0" } +strum_macros = { default-features = false, version = "0.25.2" } +thiserror = { default-features = false, version = "1.0.48" } +typed-builder = { default-features = false, version = "0.16.2" } +uuid = { default-features = false, version = "1.4.1", features = ["serde", "std"] } xz2 = { default-features = false, version = "0.1.7", optional = true } -zerocopy = { default-features = false, version = "0.6.1" } -zstd = { default-features = false, version = "0.12.3+zstd.1.5.2", optional = true } +zstd = { default-features = false, version = "0.12.4+zstd.1.5.2", optional = true } [target.'cfg(target_arch = "wasm32")'.dependencies] quad-rand = { default-features = false, version = "0.2.1" } @@ -83,10 +81,11 @@ quad-rand = { default-features = false, version = "0.2.1" } rand = { default-features = false, version = "0.8.5", features = ["default"] } [dev-dependencies] -anyhow = { default-features = false, version = "1.0.69", features = ["std"] } -apache-avro-test-helper = { default-features = false, version = "0.15.0", path = "../avro_test_helper" } -criterion = { default-features = false, version = "0.4.0" } -hex-literal = { default-features = false, version = "0.3.4" } -md-5 = { default-features = false, version = "0.10.5" } -pretty_assertions = { default-features = false, version = "1.3.0", features = ["std"] } -sha2 = { default-features = false, version = "0.10.6" } +anyhow = { default-features = false, version = "1.0.75", features = ["std"] } +apache-avro-test-helper = { default-features = false, version = "0.16.0", path = "../avro_test_helper" } +criterion = { default-features = false, version = "0.5.1" } +hex-literal = { default-features = false, version = "0.4.1" } +md-5 = { default-features = false, version = "0.10.6" } +pretty_assertions = { default-features = false, version = "1.4.0", features = ["std"] } +serial_test = "2.0.0" +sha2 = { default-features = false, version = "0.10.7" } diff --git a/lang/rust/avro/README.md b/lang/rust/avro/README.md index 7ae6fc77ceb..ad5ec70689f 100644 --- a/lang/rust/avro/README.md +++ b/lang/rust/avro/README.md @@ -24,7 +24,9 @@ [![Latest Documentation](https://docs.rs/apache-avro/badge.svg)](https://docs.rs/apache-avro) [![Apache License 2.0](https://img.shields.io/badge/license-Apache%202-blue.svg)](https://github.com/apache/avro/blob/master/LICENSE.txt) -A library for working with [Apache Avro](https://avro.apache.org/) in Rust language. + + +A library for working with [Apache Avro](https://avro.apache.org/) in Rust. Please check our [documentation](https://docs.rs/apache-avro) for examples, tutorials and API reference. @@ -33,7 +35,7 @@ data structures and a compact, fast, binary data format. All data in Avro is schematized, as in the following example: -``` +```json { "type": "record", "name": "test", @@ -95,11 +97,10 @@ version = "x.y" features = ["xz"] ``` - ## Upgrading to a newer minor version The library is still in beta, so there might be backward-incompatible changes between minor -versions. If you have troubles upgrading, check the [version upgrade guide](migration_guide.md). +versions. If you have troubles upgrading, check the [version upgrade guide](https://github.com/apache/avro/blob/master/lang/rust/migration_guide.md). ## Defining a schema @@ -189,7 +190,6 @@ associated type provided by the library to specify the data we want to serialize ```rust use apache_avro::types::Record; use apache_avro::Writer; -# // a writer needs a schema and something to write to let mut writer = Writer::new(&schema, Vec::new()); @@ -276,12 +276,10 @@ You must enable the `bzip` feature to use this codec. * **Xz**: uses [xz2](https://github.com/alexcrichton/xz2-rs) compression library. You must enable the `xz` feature to use this codec. - To specify a codec to use to compress data, just specify it while creating a `Writer`: ```rust use apache_avro::Writer; use apache_avro::Codec; -# let mut writer = Writer::with_codec(&schema, Vec::new(), Codec::Deflate); ``` @@ -293,7 +291,6 @@ codec: ```rust use apache_avro::Reader; -# // reader creation can fail in case the input to read from is not Avro-compatible or malformed let reader = Reader::new(&input[..]).unwrap(); ``` @@ -303,7 +300,6 @@ the data has been written with, we can just do as the following: ```rust use apache_avro::Schema; use apache_avro::Reader; -# let reader_raw_schema = r#" { @@ -342,7 +338,6 @@ We can just read directly instances of `Value` out of the `Reader` iterator: ```rust use apache_avro::Reader; -# let reader = Reader::new(&input[..]).unwrap(); // value is a Result of an Avro Value in case the read operation fails @@ -434,9 +429,10 @@ fn main() -> Result<(), Error> { `apache-avro` also supports the logical types listed in the [Avro specification](https://avro.apache.org/docs/current/spec.html#Logical+Types): 1. `Decimal` using the [`num_bigint`](https://docs.rs/num-bigint/0.2.6/num_bigint) crate -1. UUID using the [`uuid`](https://docs.rs/uuid/0.8.1/uuid) crate +1. UUID using the [`uuid`](https://docs.rs/uuid/1.0.0/uuid) crate 1. Date, Time (milli) as `i32` and Time (micro) as `i64` 1. Timestamp (milli and micro) as `i64` +1. Local timestamp (milli and micro) as `i64` 1. Duration as a custom type with `months`, `days` and `millis` accessor methods each of which returns an `i32` Note that the on-disk representation is identical to the underlying primitive/complex type. @@ -504,6 +500,16 @@ fn main() -> Result<(), Error> { "type": "long", "logicalType": "timestamp-micros" }, + { + "name": "local_timestamp_millis", + "type": "long", + "logicalType": "local-timestamp-millis" + }, + { + "name": "local_timestamp_micros", + "type": "long", + "logicalType": "local-timestamp-micros" + }, { "name": "duration", "type": { @@ -532,6 +538,8 @@ fn main() -> Result<(), Error> { record.put("time_micros", Value::TimeMicros(3)); record.put("timestamp_millis", Value::TimestampMillis(4)); record.put("timestamp_micros", Value::TimestampMicros(5)); + record.put("local_timestamp_millis", Value::LocalTimestampMillis(4)); + record.put("local_timestamp_micros", Value::LocalTimestampMicros(5)); record.put("duration", Duration::new(Months::new(6), Days::new(7), Millis::new(8))); writer.append(record)?; @@ -642,9 +650,11 @@ let readers_schema = Schema::parse_str(r#"{"type": "array", "items":"int"}"#).un assert_eq!(false, SchemaCompatibility::can_read(&writers_schema, &readers_schema)); ``` + + ## Minimal supported Rust version -1.60.0 +1.65.0 ## License This project is licensed under [Apache License 2.0](https://github.com/apache/avro/blob/master/LICENSE.txt). diff --git a/lang/rust/avro/examples/benchmark.rs b/lang/rust/avro/examples/benchmark.rs index 4cf0f1365f7..c3eac431393 100644 --- a/lang/rust/avro/examples/benchmark.rs +++ b/lang/rust/avro/examples/benchmark.rs @@ -20,6 +20,7 @@ use apache_avro::{ types::{Record, Value}, Reader, Writer, }; +use apache_avro_test_helper::TestResult; use std::{ io::{BufReader, BufWriter}, time::{Duration, Instant}, @@ -45,7 +46,7 @@ fn benchmark( big_or_small: &str, count: usize, runs: usize, -) -> anyhow::Result<()> { +) -> TestResult { let mut records = Vec::new(); for __ in 0..count { records.push(record.clone()); @@ -100,7 +101,7 @@ fn benchmark( Ok(()) } -fn main() -> anyhow::Result<()> { +fn main() -> TestResult { let raw_small_schema = r#" {"namespace": "test", "type": "record", "name": "Test", "fields": [{"type": {"type": "string"}, "name": "field"}]} "#; diff --git a/lang/rust/avro/examples/generate_interop_data.rs b/lang/rust/avro/examples/generate_interop_data.rs index 72b4d10b220..35a6dc7c090 100644 --- a/lang/rust/avro/examples/generate_interop_data.rs +++ b/lang/rust/avro/examples/generate_interop_data.rs @@ -20,6 +20,7 @@ use apache_avro::{ types::{Record, Value}, Codec, Writer, }; +use apache_avro_test_helper::TestResult; use std::{ collections::HashMap, io::{BufWriter, Write}, @@ -74,7 +75,7 @@ fn create_datum(schema: &Schema) -> Record { datum } -fn main() -> anyhow::Result<()> { +fn main() -> TestResult { let schema_str = std::fs::read_to_string("../../share/test/schemas/interop.avsc") .expect("Unable to read the interop Avro schema"); let schema = Schema::parse_str(schema_str.as_str())?; @@ -104,7 +105,7 @@ fn main() -> anyhow::Result<()> { Ok(()) } -fn write_user_metadata(writer: &mut Writer>) -> anyhow::Result<()> { +fn write_user_metadata(writer: &mut Writer>) -> TestResult { writer.add_user_metadata("user_metadata".to_string(), b"someByteArray")?; Ok(()) diff --git a/lang/rust/avro/examples/test_interop_data.rs b/lang/rust/avro/examples/test_interop_data.rs index 611c0e19238..736b1fd7d03 100644 --- a/lang/rust/avro/examples/test_interop_data.rs +++ b/lang/rust/avro/examples/test_interop_data.rs @@ -16,13 +16,14 @@ // under the License. use apache_avro::Reader; +use apache_avro_test_helper::TestResult; use std::{ collections::HashMap, ffi::OsStr, io::{BufReader, Read}, }; -fn main() -> anyhow::Result<()> { +fn main() -> TestResult { let mut expected_user_metadata: HashMap> = HashMap::new(); expected_user_metadata.insert("user_metadata".to_string(), b"someByteArray".to_vec()); diff --git a/lang/rust/avro/src/codec.rs b/lang/rust/avro/src/codec.rs index 0866ff62d53..a394cad2545 100644 --- a/lang/rust/avro/src/codec.rs +++ b/lang/rust/avro/src/codec.rs @@ -82,8 +82,6 @@ impl Codec { } #[cfg(feature = "snappy")] Codec::Snappy => { - use byteorder::ByteOrder; - let mut encoded: Vec = vec![0; snap::raw::max_compress_len(stream.len())]; let compressed_size = snap::raw::Encoder::new() .compress(&stream[..], &mut encoded[..]) @@ -92,8 +90,10 @@ impl Codec { let mut hasher = Hasher::new(); hasher.update(&stream[..]); let checksum = hasher.finalize(); - byteorder::BigEndian::write_u32(&mut encoded[compressed_size..], checksum); - encoded.truncate(compressed_size + 4); + let checksum_as_bytes = checksum.to_be_bytes(); + let checksum_len = checksum_as_bytes.len(); + encoded.truncate(compressed_size + checksum_len); + encoded[compressed_size..].copy_from_slice(&checksum_as_bytes); *stream = encoded; } @@ -137,8 +137,6 @@ impl Codec { } #[cfg(feature = "snappy")] Codec::Snappy => { - use byteorder::ByteOrder; - let decompressed_size = snap::raw::decompress_len(&stream[..stream.len() - 4]) .map_err(Error::GetSnappyDecompressLen)?; let mut decoded = vec![0; decompressed_size]; @@ -146,7 +144,10 @@ impl Codec { .decompress(&stream[..stream.len() - 4], &mut decoded[..]) .map_err(Error::SnappyDecompress)?; - let expected = byteorder::BigEndian::read_u32(&stream[stream.len() - 4..]); + let mut last_four: [u8; 4] = [0; 4]; + last_four.copy_from_slice(&stream[(stream.len() - 4)..]); + let expected: u32 = u32::from_be_bytes(last_four); + let mut hasher = Hasher::new(); hasher.update(&decoded); let actual = hasher.finalize(); @@ -185,56 +186,59 @@ impl Codec { #[cfg(test)] mod tests { use super::*; + use apache_avro_test_helper::TestResult; use pretty_assertions::{assert_eq, assert_ne}; const INPUT: &[u8] = b"theanswertolifetheuniverseandeverythingis42theanswertolifetheuniverseandeverythingis4theanswertolifetheuniverseandeverythingis2"; #[test] - fn null_compress_and_decompress() { + fn null_compress_and_decompress() -> TestResult { let codec = Codec::Null; let mut stream = INPUT.to_vec(); - codec.compress(&mut stream).unwrap(); + codec.compress(&mut stream)?; assert_eq!(INPUT, stream.as_slice()); - codec.decompress(&mut stream).unwrap(); + codec.decompress(&mut stream)?; assert_eq!(INPUT, stream.as_slice()); + Ok(()) } #[test] - fn deflate_compress_and_decompress() { - compress_and_decompress(Codec::Deflate); + fn deflate_compress_and_decompress() -> TestResult { + compress_and_decompress(Codec::Deflate) } #[cfg(feature = "snappy")] #[test] - fn snappy_compress_and_decompress() { - compress_and_decompress(Codec::Snappy); + fn snappy_compress_and_decompress() -> TestResult { + compress_and_decompress(Codec::Snappy) } #[cfg(feature = "zstandard")] #[test] - fn zstd_compress_and_decompress() { - compress_and_decompress(Codec::Zstandard); + fn zstd_compress_and_decompress() -> TestResult { + compress_and_decompress(Codec::Zstandard) } #[cfg(feature = "bzip")] #[test] - fn bzip_compress_and_decompress() { - compress_and_decompress(Codec::Bzip2); + fn bzip_compress_and_decompress() -> TestResult { + compress_and_decompress(Codec::Bzip2) } #[cfg(feature = "xz")] #[test] - fn xz_compress_and_decompress() { - compress_and_decompress(Codec::Xz); + fn xz_compress_and_decompress() -> TestResult { + compress_and_decompress(Codec::Xz) } - fn compress_and_decompress(codec: Codec) { + fn compress_and_decompress(codec: Codec) -> TestResult { let mut stream = INPUT.to_vec(); - codec.compress(&mut stream).unwrap(); + codec.compress(&mut stream)?; assert_ne!(INPUT, stream.as_slice()); assert!(INPUT.len() > stream.len()); - codec.decompress(&mut stream).unwrap(); + codec.decompress(&mut stream)?; assert_eq!(INPUT, stream.as_slice()); + Ok(()) } #[test] diff --git a/lang/rust/avro/src/de.rs b/lang/rust/avro/src/de.rs index a5bc14b4f00..6600564489a 100644 --- a/lang/rust/avro/src/de.rs +++ b/lang/rust/avro/src/de.rs @@ -244,7 +244,9 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { Value::Long(i) | Value::TimeMicros(i) | Value::TimestampMillis(i) - | Value::TimestampMicros(i) => visitor.visit_i64(*i), + | Value::TimestampMicros(i) + | Value::LocalTimestampMillis(i) + | Value::LocalTimestampMicros(i) => visitor.visit_i64(*i), &Value::Float(f) => visitor.visit_f32(f), &Value::Double(d) => visitor.visit_f64(d), Value::Union(_i, u) => match **u { @@ -254,7 +256,9 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { Value::Long(i) | Value::TimeMicros(i) | Value::TimestampMillis(i) - | Value::TimestampMicros(i) => visitor.visit_i64(i), + | Value::TimestampMicros(i) + | Value::LocalTimestampMillis(i) + | Value::LocalTimestampMicros(i) => visitor.visit_i64(i), Value::Float(f) => visitor.visit_f32(f), Value::Double(d) => visitor.visit_f64(d), Value::Record(ref fields) => visitor.visit_map(RecordDeserializer::new(fields)), @@ -515,6 +519,7 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { match *self.input { // This branch can be anything... Value::Record(ref fields) => visitor.visit_enum(EnumDeserializer::new(fields)), + Value::String(ref field) => visitor.visit_enum(EnumUnitDeserializer::new(field)), // This has to be a unit Enum Value::Enum(_index, ref field) => visitor.visit_enum(EnumUnitDeserializer::new(field)), _ => Err(de::Error::custom(format!( @@ -537,6 +542,10 @@ impl<'a, 'de> de::Deserializer<'de> for &'a Deserializer<'de> { { self.deserialize_any(visitor) } + + fn is_human_readable(&self) -> bool { + crate::util::is_human_readable() + } } impl<'de> de::SeqAccess<'de> for SeqDeserializer<'de> { @@ -647,8 +656,12 @@ pub fn from_value<'de, D: Deserialize<'de>>(value: &'de Value) -> Result TestResult { let test = Value::Record(vec![ ("a".to_owned(), Value::Long(27)), ("b".to_owned(), Value::String("foo".to_owned())), @@ -753,7 +766,7 @@ mod tests { a: 27, b: "foo".to_owned(), }; - let final_value: Test = from_value(&test).unwrap(); + let final_value: Test = from_value(&test)?; assert_eq!(final_value, expected); let test_inner = Value::Record(vec![ @@ -768,18 +781,20 @@ mod tests { ]); let expected_inner = TestInner { a: expected, b: 35 }; - let final_value: TestInner = from_value(&test_inner).unwrap(); - assert_eq!(final_value, expected_inner) + let final_value: TestInner = from_value(&test_inner)?; + assert_eq!(final_value, expected_inner); + + Ok(()) } #[test] - fn test_from_value_unit_enum() { + fn test_from_value_unit_enum() -> TestResult { let expected = TestUnitExternalEnum { a: UnitExternalEnum::Val1, }; let test = Value::Record(vec![("a".to_owned(), Value::Enum(0, "Val1".to_owned()))]); - let final_value: TestUnitExternalEnum = from_value(&test).unwrap(); + let final_value: TestUnitExternalEnum = from_value(&test)?; assert_eq!( final_value, expected, "Error deserializing unit external enum" @@ -793,7 +808,7 @@ mod tests { "a".to_owned(), Value::Record(vec![("t".to_owned(), Value::String("Val1".to_owned()))]), )]); - let final_value: TestUnitInternalEnum = from_value(&test).unwrap(); + let final_value: TestUnitInternalEnum = from_value(&test)?; assert_eq!( final_value, expected, "Error deserializing unit internal enum" @@ -806,7 +821,7 @@ mod tests { "a".to_owned(), Value::Record(vec![("t".to_owned(), Value::String("Val1".to_owned()))]), )]); - let final_value: TestUnitAdjacentEnum = from_value(&test).unwrap(); + let final_value: TestUnitAdjacentEnum = from_value(&test)?; assert_eq!( final_value, expected, "Error deserializing unit adjacent enum" @@ -816,15 +831,16 @@ mod tests { }; let test = Value::Record(vec![("a".to_owned(), Value::Null)]); - let final_value: TestUnitUntaggedEnum = from_value(&test).unwrap(); + let final_value: TestUnitUntaggedEnum = from_value(&test)?; assert_eq!( final_value, expected, "Error deserializing unit untagged enum" ); + Ok(()) } #[test] - fn avro_3645_3646_test_from_value_enum() { + fn avro_3645_3646_test_from_value_enum() -> TestResult { #[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] struct TestNullExternalEnum { a: NullExternalEnum, @@ -896,13 +912,15 @@ mod tests { ]; for (expected, test) in data.iter() { - let actual: TestNullExternalEnum = from_value(test).unwrap(); + let actual: TestNullExternalEnum = from_value(test)?; assert_eq!(actual, *expected); } + + Ok(()) } #[test] - fn test_from_value_single_value_enum() { + fn test_from_value_single_value_enum() -> TestResult { let expected = TestSingleValueExternalEnum { a: SingleValueExternalEnum::Double(64.0), }; @@ -917,15 +935,17 @@ mod tests { ), ]), )]); - let final_value: TestSingleValueExternalEnum = from_value(&test).unwrap(); + let final_value: TestSingleValueExternalEnum = from_value(&test)?; assert_eq!( final_value, expected, "Error deserializing single value external enum(union)" ); + + Ok(()) } #[test] - fn test_from_value_struct_enum() { + fn test_from_value_struct_enum() -> TestResult { let expected = TestStructExternalEnum { a: StructExternalEnum::Val1 { x: 1.0, y: 2.0 }, }; @@ -946,15 +966,17 @@ mod tests { ), ]), )]); - let final_value: TestStructExternalEnum = from_value(&test).unwrap(); + let final_value: TestStructExternalEnum = from_value(&test)?; assert_eq!( final_value, expected, "error deserializing struct external enum(union)" ); + + Ok(()) } #[test] - fn test_avro_3692_from_value_struct_flatten() { + fn test_avro_3692_from_value_struct_flatten() -> TestResult { #[derive(Deserialize, PartialEq, Debug)] struct S1 { f1: String, @@ -976,12 +998,14 @@ mod tests { ("f1".to_owned(), "Hello".into()), ("f2".to_owned(), "World".into()), ]); - let final_value: S1 = from_value(&test).unwrap(); + let final_value: S1 = from_value(&test)?; assert_eq!(final_value, expected); + + Ok(()) } #[test] - fn test_from_value_tuple_enum() { + fn test_from_value_tuple_enum() -> TestResult { let expected = TestTupleExternalEnum { a: TupleExternalEnum::Val1(1.0, 2.0), }; @@ -999,17 +1023,17 @@ mod tests { ), ]), )]); - let final_value: TestTupleExternalEnum = from_value(&test).unwrap(); + let final_value: TestTupleExternalEnum = from_value(&test)?; assert_eq!( final_value, expected, "error serializing tuple external enum(union)" ); - } - type TestResult = Result>; + Ok(()) + } #[test] - fn test_date() -> TestResult<()> { + fn test_date() -> TestResult { let raw_value = 1; let value = Value::Date(raw_value); let result = crate::from_value::(&value)?; @@ -1018,7 +1042,7 @@ mod tests { } #[test] - fn test_time_millis() -> TestResult<()> { + fn test_time_millis() -> TestResult { let raw_value = 1; let value = Value::TimeMillis(raw_value); let result = crate::from_value::(&value)?; @@ -1027,7 +1051,7 @@ mod tests { } #[test] - fn test_time_micros() -> TestResult<()> { + fn test_time_micros() -> TestResult { let raw_value = 1; let value = Value::TimeMicros(raw_value); let result = crate::from_value::(&value)?; @@ -1036,7 +1060,7 @@ mod tests { } #[test] - fn test_timestamp_millis() -> TestResult<()> { + fn test_timestamp_millis() -> TestResult { let raw_value = 1; let value = Value::TimestampMillis(raw_value); let result = crate::from_value::(&value)?; @@ -1045,7 +1069,7 @@ mod tests { } #[test] - fn test_timestamp_micros() -> TestResult<()> { + fn test_timestamp_micros() -> TestResult { let raw_value = 1; let value = Value::TimestampMicros(raw_value); let result = crate::from_value::(&value)?; @@ -1054,16 +1078,34 @@ mod tests { } #[test] - fn test_from_value_uuid_str() -> TestResult<()> { + fn test_avro_3853_local_timestamp_millis() -> TestResult { + let raw_value = 1; + let value = Value::LocalTimestampMillis(raw_value); + let result = crate::from_value::(&value)?; + assert_eq!(result, raw_value); + Ok(()) + } + + #[test] + fn test_avro_3853_local_timestamp_micros() -> TestResult { + let raw_value = 1; + let value = Value::LocalTimestampMicros(raw_value); + let result = crate::from_value::(&value)?; + assert_eq!(result, raw_value); + Ok(()) + } + + #[test] + fn test_from_value_uuid_str() -> TestResult { let raw_value = "9ec535ff-3e2a-45bd-91d3-0a01321b5a49"; - let value = Value::Uuid(Uuid::parse_str(raw_value).unwrap()); + let value = Value::Uuid(Uuid::parse_str(raw_value)?); let result = crate::from_value::(&value)?; assert_eq!(result.to_string(), raw_value); Ok(()) } #[test] - fn test_from_value_uuid_slice() -> TestResult<()> { + fn test_from_value_uuid_slice() -> TestResult { let raw_value = &[4, 54, 67, 12, 43, 2, 2, 76, 32, 50, 87, 5, 1, 33, 43, 87]; let value = Value::Uuid(Uuid::from_slice(raw_value)?); let result = crate::from_value::(&value)?; @@ -1072,7 +1114,7 @@ mod tests { } #[test] - fn test_from_value_with_union() -> TestResult<()> { + fn test_from_value_with_union() -> TestResult { // AVRO-3232 test for deserialize_any on missing fields on the destination struct: // Error: DeserializeValue("Unsupported union") // Error: DeserializeValue("incorrect value of type: String") @@ -1096,6 +1138,8 @@ mod tests { ("time_micros_a".to_string(), 123), ("timestamp_millis_b".to_string(), 234), ("timestamp_micros_c".to_string(), 345), + ("local_timestamp_millis_d".to_string(), 678), + ("local_timestamp_micros_e".to_string(), 789), ] .iter() .cloned() @@ -1112,6 +1156,12 @@ mod tests { key if key.starts_with("timestamp_micros_") => { (k.clone(), Value::TimestampMicros(*v)) } + key if key.starts_with("local_timestamp_millis_") => { + (k.clone(), Value::LocalTimestampMillis(*v)) + } + key if key.starts_with("local_timestamp_micros_") => { + (k.clone(), Value::LocalTimestampMicros(*v)) + } _ => unreachable!("unexpected key: {:?}", k), }) .collect(); @@ -1161,6 +1211,22 @@ mod tests { "a_non_existing_timestamp_micros".to_string(), Value::Union(0, Box::new(Value::TimestampMicros(-345))), ), + ( + "a_local_timestamp_millis".to_string(), + Value::Union(0, Box::new(Value::LocalTimestampMillis(678))), + ), + ( + "a_non_existing_local_timestamp_millis".to_string(), + Value::Union(0, Box::new(Value::LocalTimestampMillis(-678))), + ), + ( + "a_local_timestamp_micros".to_string(), + Value::Union(0, Box::new(Value::LocalTimestampMicros(789))), + ), + ( + "a_non_existing_local_timestamp_micros".to_string(), + Value::Union(0, Box::new(Value::LocalTimestampMicros(-789))), + ), ( "a_record".to_string(), Value::Union( @@ -1220,4 +1286,33 @@ mod tests { assert_eq!(deserialized, reference); Ok(()) } + + #[test] + #[serial(avro_3747)] + fn avro_3747_human_readable_false() -> TestResult { + use serde::de::Deserializer as SerdeDeserializer; + + let is_human_readable = false; + crate::util::SERDE_HUMAN_READABLE.store(is_human_readable, Ordering::Release); + + let deser = &Deserializer::new(&Value::Null); + + assert_eq!(deser.is_human_readable(), is_human_readable); + + Ok(()) + } + + #[test] + #[serial(avro_3747)] + fn avro_3747_human_readable_true() -> TestResult { + use serde::de::Deserializer as SerdeDeserializer; + + crate::util::SERDE_HUMAN_READABLE.store(true, Ordering::Release); + + let deser = &Deserializer::new(&Value::Null); + + assert!(deser.is_human_readable()); + + Ok(()) + } } diff --git a/lang/rust/avro/src/decimal.rs b/lang/rust/avro/src/decimal.rs index e67430384eb..a06ab45a6ca 100644 --- a/lang/rust/avro/src/decimal.rs +++ b/lang/rust/avro/src/decimal.rs @@ -55,6 +55,12 @@ impl Decimal { } } +impl From for BigInt { + fn from(decimal: Decimal) -> Self { + decimal.value + } +} + /// Gets the internal byte array representation of a referenced decimal. /// Usage: /// ``` @@ -102,24 +108,29 @@ impl> From for Decimal { #[cfg(test)] mod tests { use super::*; + use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; use std::convert::TryFrom; #[test] - fn test_decimal_from_bytes_from_ref_decimal() { + fn test_decimal_from_bytes_from_ref_decimal() -> TestResult { let input = vec![1, 24]; let d = Decimal::from(&input); - let output = >::try_from(&d).unwrap(); + let output = >::try_from(&d)?; assert_eq!(output, input); + + Ok(()) } #[test] - fn test_decimal_from_bytes_from_owned_decimal() { + fn test_decimal_from_bytes_from_owned_decimal() -> TestResult { let input = vec![1, 24]; let d = Decimal::from(&input); - let output = >::try_from(d).unwrap(); + let output = >::try_from(d)?; assert_eq!(output, input); + + Ok(()) } } diff --git a/lang/rust/avro/src/decode.rs b/lang/rust/avro/src/decode.rs index 4f9e7e94556..b13c76739b9 100644 --- a/lang/rust/avro/src/decode.rs +++ b/lang/rust/avro/src/decode.rs @@ -18,7 +18,10 @@ use crate::{ decimal::Decimal, duration::Duration, - schema::{Name, Namespace, ResolvedSchema, Schema}, + schema::{ + DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, RecordSchema, ResolvedSchema, + Schema, + }, types::Value, util::{safe_len, zag_i32, zag_i64}, AvroResult, Error, @@ -98,7 +101,7 @@ pub(crate) fn decode_internal>( } } } - Schema::Decimal { ref inner, .. } => match &**inner { + Schema::Decimal(DecimalSchema { ref inner, .. }) => match &**inner { Schema::Fixed { .. } => { match decode_internal(inner, names, enclosing_namespace, reader)? { Value::Fixed(_, bytes) => Ok(Value::Decimal(Decimal::from(bytes))), @@ -127,6 +130,8 @@ pub(crate) fn decode_internal>( Schema::TimeMicros => zag_i64(reader).map(Value::TimeMicros), Schema::TimestampMillis => zag_i64(reader).map(Value::TimestampMillis), Schema::TimestampMicros => zag_i64(reader).map(Value::TimestampMicros), + Schema::LocalTimestampMillis => zag_i64(reader).map(Value::LocalTimestampMillis), + Schema::LocalTimestampMicros => zag_i64(reader).map(Value::LocalTimestampMicros), Schema::Duration => { let mut buf = [0u8; 12]; reader.read_exact(&mut buf).map_err(Error::ReadDuration)?; @@ -164,7 +169,7 @@ pub(crate) fn decode_internal>( } } } - Schema::Fixed { size, .. } => { + Schema::Fixed(FixedSchema { size, .. }) => { let mut buf = vec![0u8; size]; reader .read_exact(&mut buf) @@ -232,11 +237,11 @@ pub(crate) fn decode_internal>( } Err(io_err) => Err(io_err), }, - Schema::Record { + Schema::Record(RecordSchema { ref name, ref fields, .. - } => { + }) => { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); // Benchmarks indicate ~10% improvement using this method. let mut items = Vec::with_capacity(fields.len()); @@ -254,7 +259,7 @@ pub(crate) fn decode_internal>( } Ok(Value::Record(items)) } - Schema::Enum { ref symbols, .. } => { + Schema::Enum(EnumSchema { ref symbols, .. }) => { Ok(if let Value::Int(raw_index) = decode_int(reader)? { let index = usize::try_from(raw_index) .map_err(|e| Error::ConvertI32ToUsize(e, raw_index))?; @@ -293,64 +298,73 @@ mod tests { use crate::{ decode::decode, encode::{encode, tests::success}, - schema::Schema, + schema::{DecimalSchema, FixedSchema, Schema}, types::{ Value, Value::{Array, Int, Map}, }, Decimal, }; + use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; use std::collections::HashMap; #[test] - fn test_decode_array_without_size() { + fn test_decode_array_without_size() -> TestResult { let mut input: &[u8] = &[6, 2, 4, 6, 0]; let result = decode(&Schema::Array(Box::new(Schema::Int)), &mut input); - assert_eq!(Array(vec!(Int(1), Int(2), Int(3))), result.unwrap()); + assert_eq!(Array(vec!(Int(1), Int(2), Int(3))), result?); + + Ok(()) } #[test] - fn test_decode_array_with_size() { + fn test_decode_array_with_size() -> TestResult { let mut input: &[u8] = &[5, 6, 2, 4, 6, 0]; let result = decode(&Schema::Array(Box::new(Schema::Int)), &mut input); - assert_eq!(Array(vec!(Int(1), Int(2), Int(3))), result.unwrap()); + assert_eq!(Array(vec!(Int(1), Int(2), Int(3))), result?); + + Ok(()) } #[test] - fn test_decode_map_without_size() { + fn test_decode_map_without_size() -> TestResult { let mut input: &[u8] = &[0x02, 0x08, 0x74, 0x65, 0x73, 0x74, 0x02, 0x00]; let result = decode(&Schema::Map(Box::new(Schema::Int)), &mut input); let mut expected = HashMap::new(); expected.insert(String::from("test"), Int(1)); - assert_eq!(Map(expected), result.unwrap()); + assert_eq!(Map(expected), result?); + + Ok(()) } #[test] - fn test_decode_map_with_size() { + fn test_decode_map_with_size() -> TestResult { let mut input: &[u8] = &[0x01, 0x0C, 0x08, 0x74, 0x65, 0x73, 0x74, 0x02, 0x00]; let result = decode(&Schema::Map(Box::new(Schema::Int)), &mut input); let mut expected = HashMap::new(); expected.insert(String::from("test"), Int(1)); - assert_eq!(Map(expected), result.unwrap()); + assert_eq!(Map(expected), result?); + + Ok(()) } #[test] - fn test_negative_decimal_value() { + fn test_negative_decimal_value() -> TestResult { use crate::{encode::encode, schema::Name}; use num_bigint::ToBigInt; - let inner = Box::new(Schema::Fixed { + let inner = Box::new(Schema::Fixed(FixedSchema { size: 2, doc: None, - name: Name::new("decimal").unwrap(), + name: Name::new("decimal")?, aliases: None, attributes: Default::default(), - }); - let schema = Schema::Decimal { + })); + let schema = Schema::Decimal(DecimalSchema { inner, precision: 4, scale: 2, - }; + }); let bigint = (-423).to_bigint().unwrap(); let value = Value::Decimal(Decimal::from(bigint.to_signed_bytes_be())); @@ -358,26 +372,28 @@ mod tests { encode(&value, &schema, &mut buffer).expect(&success(&value, &schema)); let mut bytes = &buffer[..]; - let result = decode(&schema, &mut bytes).unwrap(); + let result = decode(&schema, &mut bytes)?; assert_eq!(result, value); + + Ok(()) } #[test] - fn test_decode_decimal_with_bigger_than_necessary_size() { + fn test_decode_decimal_with_bigger_than_necessary_size() -> TestResult { use crate::{encode::encode, schema::Name}; use num_bigint::ToBigInt; - let inner = Box::new(Schema::Fixed { + let inner = Box::new(Schema::Fixed(FixedSchema { size: 13, - name: Name::new("decimal").unwrap(), + name: Name::new("decimal")?, aliases: None, doc: None, attributes: Default::default(), - }); - let schema = Schema::Decimal { + })); + let schema = Schema::Decimal(DecimalSchema { inner, precision: 4, scale: 2, - }; + }); let value = Value::Decimal(Decimal::from( ((-423).to_bigint().unwrap()).to_signed_bytes_be(), )); @@ -385,12 +401,14 @@ mod tests { encode(&value, &schema, &mut buffer).expect(&success(&value, &schema)); let mut bytes: &[u8] = &buffer[..]; - let result = decode(&schema, &mut bytes).unwrap(); + let result = decode(&schema, &mut bytes)?; assert_eq!(result, value); + + Ok(()) } #[test] - fn test_avro_3448_recursive_definition_decode_union() { + fn test_avro_3448_recursive_definition_decode_union() -> TestResult { // if encoding fails in this test check the corresponding test in encode let schema = Schema::parse_str( r#" @@ -415,8 +433,7 @@ mod tests { } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); @@ -450,10 +467,12 @@ mod tests { &schema )) ); + + Ok(()) } #[test] - fn test_avro_3448_recursive_definition_decode_array() { + fn test_avro_3448_recursive_definition_decode_array() -> TestResult { let schema = Schema::parse_str( r#" { @@ -480,8 +499,7 @@ mod tests { } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); @@ -498,11 +516,13 @@ mod tests { "Failed to decode using recursive definitions with schema:\n {:?}\n", &schema )) - ) + ); + + Ok(()) } #[test] - fn test_avro_3448_recursive_definition_decode_map() { + fn test_avro_3448_recursive_definition_decode_map() -> TestResult { let schema = Schema::parse_str( r#" { @@ -529,8 +549,7 @@ mod tests { } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); @@ -550,11 +569,13 @@ mod tests { "Failed to decode using recursive definitions with schema:\n {:?}\n", &schema )) - ) + ); + + Ok(()) } #[test] - fn test_avro_3448_proper_multi_level_decoding_middle_namespace() { + fn test_avro_3448_proper_multi_level_decoding_middle_namespace() -> TestResult { // if encoding fails in this test check the corresponding test in encode let schema = r#" { @@ -598,7 +619,7 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); let middle_record_variation_1 = Value::Record(vec![( "middle_field_1".into(), @@ -665,10 +686,12 @@ mod tests { &schema )) ); + + Ok(()) } #[test] - fn test_avro_3448_proper_multi_level_decoding_inner_namespace() { + fn test_avro_3448_proper_multi_level_decoding_inner_namespace() -> TestResult { // if encoding fails in this test check the corresponding test in encode let schema = r#" { @@ -713,7 +736,7 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); let middle_record_variation_1 = Value::Record(vec![( "middle_field_1".into(), @@ -780,5 +803,7 @@ mod tests { &schema )) ); + + Ok(()) } } diff --git a/lang/rust/avro/src/duration.rs b/lang/rust/avro/src/duration.rs index 3bdfe4d23ef..4aa6bd53a0c 100644 --- a/lang/rust/avro/src/duration.rs +++ b/lang/rust/avro/src/duration.rs @@ -14,10 +14,6 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. - -use byteorder::LittleEndian; -use zerocopy::U32; - /// A struct representing duration that hides the details of endianness and conversion between /// platform-native u32 and byte arrays. #[derive(Debug, Copy, Clone, Eq, PartialEq)] @@ -28,83 +24,77 @@ pub struct Duration { } #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct Months(U32); +pub struct Months(u32); impl Months { pub fn new(months: u32) -> Self { - Self(U32::new(months)) + Self(months) + } + + fn as_bytes(&self) -> [u8; 4] { + self.0.to_le_bytes() } } impl From for u32 { fn from(days: Months) -> Self { - days.0.get() + days.0 } } impl From<[u8; 4]> for Months { fn from(bytes: [u8; 4]) -> Self { - Self(U32::from(bytes)) - } -} - -impl AsRef<[u8; 4]> for Months { - fn as_ref(&self) -> &[u8; 4] { - self.0.as_ref() + Self(u32::from_le_bytes(bytes)) } } #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct Days(U32); +pub struct Days(u32); impl Days { pub fn new(days: u32) -> Self { - Self(U32::new(days)) + Self(days) + } + + fn as_bytes(&self) -> [u8; 4] { + self.0.to_le_bytes() } } impl From for u32 { fn from(days: Days) -> Self { - days.0.get() + days.0 } } impl From<[u8; 4]> for Days { fn from(bytes: [u8; 4]) -> Self { - Self(U32::from(bytes)) - } -} - -impl AsRef<[u8; 4]> for Days { - fn as_ref(&self) -> &[u8; 4] { - self.0.as_ref() + Self(u32::from_le_bytes(bytes)) } } #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct Millis(U32); +pub struct Millis(u32); impl Millis { pub fn new(millis: u32) -> Self { - Self(U32::new(millis)) + Self(millis) + } + + fn as_bytes(&self) -> [u8; 4] { + self.0.to_le_bytes() } } impl From for u32 { fn from(days: Millis) -> Self { - days.0.get() + days.0 } } impl From<[u8; 4]> for Millis { fn from(bytes: [u8; 4]) -> Self { - Self(U32::from(bytes)) - } -} - -impl AsRef<[u8; 4]> for Millis { - fn as_ref(&self) -> &[u8; 4] { - self.0.as_ref() + Self(u32::from_le_bytes(bytes)) } } @@ -137,9 +127,9 @@ impl Duration { impl From for [u8; 12] { fn from(duration: Duration) -> Self { let mut bytes = [0u8; 12]; - bytes[0..4].copy_from_slice(duration.months.as_ref()); - bytes[4..8].copy_from_slice(duration.days.as_ref()); - bytes[8..12].copy_from_slice(duration.millis.as_ref()); + bytes[0..4].copy_from_slice(&duration.months.as_bytes()); + bytes[4..8].copy_from_slice(&duration.days.as_bytes()); + bytes[8..12].copy_from_slice(&duration.millis.as_bytes()); bytes } } diff --git a/lang/rust/avro/src/encode.rs b/lang/rust/avro/src/encode.rs index 40f4ee0f777..6e52e0c3b1e 100644 --- a/lang/rust/avro/src/encode.rs +++ b/lang/rust/avro/src/encode.rs @@ -16,7 +16,10 @@ // under the License. use crate::{ - schema::{Name, Namespace, ResolvedSchema, Schema, SchemaKind}, + schema::{ + DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, RecordSchema, ResolvedSchema, + Schema, SchemaKind, + }, types::{Value, ValueKind}, util::{zig_i32, zig_i64}, AvroResult, Error, @@ -74,12 +77,14 @@ pub(crate) fn encode_internal>( Value::Long(i) | Value::TimestampMillis(i) | Value::TimestampMicros(i) + | Value::LocalTimestampMillis(i) + | Value::LocalTimestampMicros(i) | Value::TimeMicros(i) => encode_long(*i, buffer), Value::Float(x) => buffer.extend_from_slice(&x.to_le_bytes()), Value::Double(x) => buffer.extend_from_slice(&x.to_le_bytes()), Value::Decimal(decimal) => match schema { - Schema::Decimal { inner, .. } => match *inner.clone() { - Schema::Fixed { size, .. } => { + Schema::Decimal(DecimalSchema { inner, .. }) => match *inner.clone() { + Schema::Fixed(FixedSchema { size, .. }) => { let bytes = decimal.to_sign_extended_bytes_with_len(size).unwrap(); let num_bytes = bytes.len(); if num_bytes != size { @@ -125,7 +130,7 @@ pub(crate) fn encode_internal>( Schema::String | Schema::Uuid => { encode_bytes(s, buffer); } - Schema::Enum { ref symbols, .. } => { + Schema::Enum(EnumSchema { ref symbols, .. }) => { if let Some(index) = symbols.iter().position(|item| item == s) { encode_int(index as i32, buffer); } else { @@ -193,33 +198,39 @@ pub(crate) fn encode_internal>( }); } } - Value::Record(fields) => { - if let Schema::Record { + Value::Record(value_fields) => { + if let Schema::Record(RecordSchema { ref name, fields: ref schema_fields, - ref lookup, .. - } = *schema + }) = *schema { let record_namespace = name.fully_qualified_name(enclosing_namespace).namespace; - for (name, value) in fields.iter() { - match lookup.get(name) { - Some(idx) => { - encode_internal( - value, - &schema_fields[*idx].schema, - names, - &record_namespace, - buffer, - )?; - } + + let mut lookup = HashMap::new(); + value_fields.iter().for_each(|(name, field)| { + lookup.insert(name, field); + }); + + for schema_field in schema_fields.iter() { + let name = &schema_field.name; + let value = match lookup.get(name) { + Some(value) => value, None => { return Err(Error::NoEntryInLookupTable( name.clone(), format!("{lookup:?}"), )); } - } + }; + + encode_internal( + value, + &schema_field.schema, + names, + &record_namespace, + buffer, + )?; } } else { error!("invalid schema type for Record: {:?}", schema); diff --git a/lang/rust/avro/src/error.rs b/lang/rust/avro/src/error.rs index 0228600761a..bf066b8a5ee 100644 --- a/lang/rust/avro/src/error.rs +++ b/lang/rust/avro/src/error.rs @@ -19,9 +19,9 @@ use crate::{ schema::{Name, SchemaKind}, types::ValueKind, }; -use std::fmt; +use std::{error::Error as _, fmt}; -#[derive(thiserror::Error, Debug)] +#[derive(thiserror::Error)] pub enum Error { #[error("Bad Snappy CRC32; expected {expected:x} but got {actual:x}")] SnappyCrc32 { expected: u32, actual: u32 }, @@ -151,6 +151,12 @@ pub enum Error { #[error("TimestampMicros expected, got {0:?}")] GetTimestampMicros(ValueKind), + #[error("LocalTimestampMillis expected, got {0:?}")] + GetLocalTimestampMillis(ValueKind), + + #[error("LocalTimestampMicros expected, got {0:?}")] + GetLocalTimestampMicros(ValueKind), + #[error("Null expected, got {0:?}")] GetNull(ValueKind), @@ -235,6 +241,9 @@ pub enum Error { #[error("One union type {0:?} must match the `default`'s value type {1:?}")] GetDefaultUnion(SchemaKind, ValueKind), + #[error("`default`'s value type of field {0:?} in {1:?} must be {2:?}")] + GetDefaultRecordField(String, String, String), + #[error("JSON value {0} claims to be u64 but cannot be converted")] GetU64FromJson(serde_json::Number), @@ -259,6 +268,9 @@ pub enum Error { #[error("Failed to parse schema from JSON")] ParseSchemaJson(#[source] serde_json::Error), + #[error("Failed to read schema")] + ReadSchemaFromReader(#[source] std::io::Error), + #[error("Must be a JSON string, object or array")] ParseSchemaFromValidJson, @@ -304,19 +316,34 @@ pub enum Error { #[error("Invalid enum symbol name {0}")] EnumSymbolName(String), + #[error("Invalid field name {0}")] + FieldName(String), + + #[error("Duplicate field name {0}")] + FieldNameDuplicate(String), + #[error("Invalid schema name {0}. It must match the regex '{1}'")] InvalidSchemaName(String, &'static str), + #[error("Invalid namespace {0}. It must match the regex '{1}'")] + InvalidNamespace(String, &'static str), + #[error("Duplicate enum symbol {0}")] EnumSymbolDuplicate(String), + #[error("Default value for enum must be a string! Got: {0}")] + EnumDefaultWrongType(serde_json::Value), + #[error("No `items` in array")] GetArrayItemsField, #[error("No `values` in map")] GetMapValuesField, - #[error("No `size` in fixed")] + #[error("Fixed schema `size` value must be a positive integer: {0}")] + GetFixedSizeFieldPositive(serde_json::Value), + + #[error("Fixed schema has no `size`")] GetFixedSizeField, #[error("Failed to compress with flate")] @@ -452,3 +479,13 @@ impl serde::de::Error for Error { Error::DeserializeValue(msg.to_string()) } } + +impl fmt::Debug for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut msg = self.to_string(); + if let Some(e) = self.source() { + msg.extend([": ", &e.to_string()]); + } + write!(f, "{}", msg) + } +} diff --git a/lang/rust/avro/src/lib.rs b/lang/rust/avro/src/lib.rs index 2c6f46c07a3..35b1b431a11 100644 --- a/lang/rust/avro/src/lib.rs +++ b/lang/rust/avro/src/lib.rs @@ -24,7 +24,7 @@ //! //! All data in Avro is schematized, as in the following example: //! -//! ```text +//! ```json //! { //! "type": "record", //! "name": "test", @@ -62,10 +62,34 @@ //! features = ["snappy"] //! ``` //! +//! Or in case you want to leverage the **Zstandard** codec: +//! +//! ```toml +//! [dependencies.apache-avro] +//! version = "x.y" +//! features = ["zstandard"] +//! ``` +//! +//! Or in case you want to leverage the **Bzip2** codec: +//! +//! ```toml +//! [dependencies.apache-avro] +//! version = "x.y" +//! features = ["bzip"] +//! ``` +//! +//! Or in case you want to leverage the **Xz** codec: +//! +//! ```toml +//! [dependencies.apache-avro] +//! version = "x.y" +//! features = ["xz"] +//! ``` +//! //! # Upgrading to a newer minor version //! //! The library is still in beta, so there might be backward-incompatible changes between minor -//! versions. If you have troubles upgrading, check the [version upgrade guide](migration_guide.md). +//! versions. If you have troubles upgrading, check the [version upgrade guide](https://github.com/apache/avro/blob/master/lang/rust/migration_guide.md). //! //! # Defining a schema //! @@ -260,6 +284,12 @@ //! * **Snappy**: uses Google's [Snappy](http://google.github.io/snappy/) compression library. Each //! compressed block is followed by the 4-byte, big-endianCRC32 checksum of the uncompressed data in //! the block. You must enable the `snappy` feature to use this codec. +//! * **Zstandard**: uses Facebook's [Zstandard](https://facebook.github.io/zstd/) compression library. +//! You must enable the `zstandard` feature to use this codec. +//! * **Bzip2**: uses [BZip2](https://sourceware.org/bzip2/) compression library. +//! You must enable the `bzip` feature to use this codec. +//! * **Xz**: uses [xz2](https://github.com/alexcrichton/xz2-rs) compression library. +//! You must enable the `xz` feature to use this codec. //! //! To specify a codec to use to compress data, just specify it while creating a `Writer`: //! ``` @@ -515,6 +545,7 @@ //! 1. UUID using the [`uuid`](https://docs.rs/uuid/1.0.0/uuid) crate //! 1. Date, Time (milli) as `i32` and Time (micro) as `i64` //! 1. Timestamp (milli and micro) as `i64` +//! 1. Local timestamp (milli and micro) as `i64` //! 1. Duration as a custom type with `months`, `days` and `millis` accessor methods each of which returns an `i32` //! //! Note that the on-disk representation is identical to the underlying primitive/complex type. @@ -583,6 +614,16 @@ //! "logicalType": "timestamp-micros" //! }, //! { +//! "name": "local_timestamp_millis", +//! "type": "long", +//! "logicalType": "local-timestamp-millis" +//! }, +//! { +//! "name": "local_timestamp_micros", +//! "type": "long", +//! "logicalType": "local-timestamp-micros" +//! }, +//! { //! "name": "duration", //! "type": { //! "type": "fixed", @@ -610,6 +651,8 @@ //! record.put("time_micros", Value::TimeMicros(3)); //! record.put("timestamp_millis", Value::TimestampMillis(4)); //! record.put("timestamp_micros", Value::TimestampMicros(5)); +//! record.put("local_timestamp_millis", Value::LocalTimestampMillis(4)); +//! record.put("local_timestamp_micros", Value::LocalTimestampMicros(5)); //! record.put("duration", Duration::new(Months::new(6), Days::new(7), Millis::new(8))); //! //! writer.append(record)?; @@ -748,7 +791,7 @@ pub use reader::{ }; pub use schema::{AvroSchema, Schema}; pub use ser::to_value; -pub use util::max_allocation_bytes; +pub use util::{max_allocation_bytes, set_serde_human_readable}; pub use writer::{ to_avro_datum, to_avro_datum_schemata, GenericSingleObjectWriter, SpecificSingleObjectWriter, Writer, @@ -866,61 +909,6 @@ mod tests { assert!(reader.next().is_none()); } - //TODO: move where it fits better - #[test] - fn test_enum_resolution() { - let writer_raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"}, - { - "name": "c", - "type": { - "type": "enum", - "name": "suit", - "symbols": ["diamonds", "spades", "clubs", "hearts"] - }, - "default": "spades" - } - ] - } - "#; - let reader_raw_schema = r#" - { - "type": "record", - "name": "test", - "fields": [ - {"name": "a", "type": "long", "default": 42}, - {"name": "b", "type": "string"}, - { - "name": "c", - "type": { - "type": "enum", - "name": "suit", - "symbols": ["diamonds", "spades", "ninja", "hearts"] - }, - "default": "spades" - } - ] - } - "#; - let writer_schema = Schema::parse_str(writer_raw_schema).unwrap(); - let reader_schema = Schema::parse_str(reader_raw_schema).unwrap(); - let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null); - let mut record = Record::new(writer.schema()).unwrap(); - record.put("a", 27i64); - record.put("b", "foo"); - record.put("c", "clubs"); - writer.append(record).unwrap(); - let input = writer.into_inner().unwrap(); - let mut reader = Reader::with_schema(&reader_schema, &input[..]).unwrap(); - assert!(reader.next().unwrap().is_err()); - assert!(reader.next().is_none()); - } - //TODO: move where it fits better #[test] fn test_enum_no_reader_schema() { diff --git a/lang/rust/avro/src/rabin.rs b/lang/rust/avro/src/rabin.rs index ce5f0761f3b..fc63f8999b8 100644 --- a/lang/rust/avro/src/rabin.rs +++ b/lang/rust/avro/src/rabin.rs @@ -16,7 +16,6 @@ // under the License. //! Implementation of the Rabin fingerprint algorithm -use byteorder::{ByteOrder, LittleEndian}; use digest::{ consts::U8, core_api::OutputSizeUser, generic_array::GenericArray, FixedOutput, FixedOutputReset, HashMarker, Output, Reset, Update, @@ -61,7 +60,7 @@ lazy_static! { /// assert_eq!(result[..], hex!("60335ba6d0415528")); /// ``` /// -/// To convert the digest to the commonly used 64-bit integer value, you can use the byteorder crate: +/// To convert the digest to the commonly used 64-bit integer value, you can use the i64::from_le_bytes() function /// /// ```rust /// # use apache_avro::rabin::Rabin; @@ -75,9 +74,8 @@ lazy_static! { /// # let result = hasher.finalize(); /// /// # assert_eq!(result[..], hex!("60335ba6d0415528")); -/// use byteorder::{ByteOrder, LittleEndian}; /// -/// let i = LittleEndian::read_i64(&result.to_vec()); +/// let i = i64::from_le_bytes(result.try_into().unwrap()); /// /// assert_eq!(i, 2906301498937520992) /// ``` @@ -103,7 +101,7 @@ impl Update for Rabin { impl FixedOutput for Rabin { fn finalize_into(self, out: &mut GenericArray) { - LittleEndian::write_i64(out, self.result); + out.copy_from_slice(&self.result.to_le_bytes()); } } @@ -123,7 +121,7 @@ impl HashMarker for Rabin {} impl FixedOutputReset for Rabin { fn finalize_into_reset(&mut self, out: &mut Output) { - LittleEndian::write_i64(out, self.result); + out.copy_from_slice(&self.result.to_le_bytes()); self.reset(); } } @@ -131,13 +129,13 @@ impl FixedOutputReset for Rabin { #[cfg(test)] mod tests { use super::Rabin; - use byteorder::{ByteOrder, LittleEndian}; + use apache_avro_test_helper::TestResult; use digest::Digest; use pretty_assertions::assert_eq; // See: https://github.com/apache/avro/blob/master/share/test/data/schema-tests.txt #[test] - fn test1() { + fn test1() -> TestResult { let data: &[(&str, i64)] = &[ (r#""null""#, 7195948357588979594), (r#""boolean""#, -6970731678124411036), @@ -155,8 +153,11 @@ mod tests { for (s, fp) in data { hasher.update(s.as_bytes()); - let result = LittleEndian::read_i64(&hasher.finalize_reset()); + let res: &[u8] = &hasher.finalize_reset(); + let result = i64::from_le_bytes(res.try_into()?); assert_eq!(*fp, result); } + + Ok(()) } } diff --git a/lang/rust/avro/src/reader.rs b/lang/rust/avro/src/reader.rs index 3489ccfb198..2ec0b84cb82 100644 --- a/lang/rust/avro/src/reader.rs +++ b/lang/rust/avro/src/reader.rs @@ -34,14 +34,14 @@ use std::{ str::FromStr, }; -// Internal Block reader. +/// Internal Block reader. #[derive(Debug, Clone)] struct Block<'r, R> { reader: R, - // Internal buffering to reduce allocation. + /// Internal buffering to reduce allocation. buf: Vec, buf_idx: usize, - // Number of elements expected to exist within this block. + /// Number of elements expected to exist within this block. message_count: usize, marker: [u8; 16], codec: Codec, @@ -529,6 +529,7 @@ pub fn read_marker(bytes: &[u8]) -> [u8; 16] { mod tests { use super::*; use crate::{encode::encode, from_value, types::Record, Reader}; + use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; use serde::Deserialize; use std::io::Cursor; @@ -569,8 +570,8 @@ mod tests { ]; #[test] - fn test_from_avro_datum() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_from_avro_datum() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut encoded: &'static [u8] = &[54, 6, 102, 111, 111]; let mut record = Record::new(&schema).unwrap(); @@ -578,14 +579,13 @@ mod tests { record.put("b", "foo"); let expected = record.into(); - assert_eq!( - from_avro_datum(&schema, &mut encoded, None).unwrap(), - expected - ); + assert_eq!(from_avro_datum(&schema, &mut encoded, None)?, expected); + + Ok(()) } #[test] - fn test_from_avro_datum_with_union_to_struct() { + fn test_from_avro_datum_with_union_to_struct() -> TestResult { const TEST_RECORD_SCHEMA_3240: &str = r#" { "type": "record", @@ -628,7 +628,7 @@ mod tests { a_nullable_string: Option, } - let schema = Schema::parse_str(TEST_RECORD_SCHEMA_3240).unwrap(); + let schema = Schema::parse_str(TEST_RECORD_SCHEMA_3240)?; let mut encoded: &'static [u8] = &[54, 6, 102, 111, 111]; let expected_record: TestRecord3240 = TestRecord3240 { @@ -638,32 +638,36 @@ mod tests { a_nullable_string: None, }; - let avro_datum = from_avro_datum(&schema, &mut encoded, None).unwrap(); + let avro_datum = from_avro_datum(&schema, &mut encoded, None)?; let parsed_record: TestRecord3240 = match &avro_datum { - Value::Record(_) => from_value::(&avro_datum).unwrap(), + Value::Record(_) => from_value::(&avro_datum)?, unexpected => { panic!("could not map avro data to struct, found unexpected: {unexpected:?}") } }; assert_eq!(parsed_record, expected_record); + + Ok(()) } #[test] - fn test_null_union() { - let schema = Schema::parse_str(UNION_SCHEMA).unwrap(); + fn test_null_union() -> TestResult { + let schema = Schema::parse_str(UNION_SCHEMA)?; let mut encoded: &'static [u8] = &[2, 0]; assert_eq!( - from_avro_datum(&schema, &mut encoded, None).unwrap(), + from_avro_datum(&schema, &mut encoded, None)?, Value::Union(1, Box::new(Value::Long(0))) ); + + Ok(()) } #[test] - fn test_reader_iterator() { - let schema = Schema::parse_str(SCHEMA).unwrap(); - let reader = Reader::with_schema(&schema, ENCODED).unwrap(); + fn test_reader_iterator() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; + let reader = Reader::with_schema(&schema, ENCODED)?; let mut record1 = Record::new(&schema).unwrap(); record1.put("a", 27i64); @@ -676,20 +680,24 @@ mod tests { let expected = vec![record1.into(), record2.into()]; for (i, value) in reader.enumerate() { - assert_eq!(value.unwrap(), expected[i]); + assert_eq!(value?, expected[i]); } + + Ok(()) } #[test] - fn test_reader_invalid_header() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_reader_invalid_header() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let invalid = ENCODED.iter().copied().skip(1).collect::>(); assert!(Reader::with_schema(&schema, &invalid[..]).is_err()); + + Ok(()) } #[test] - fn test_reader_invalid_block() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_reader_invalid_block() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let invalid = ENCODED .iter() .copied() @@ -699,32 +707,38 @@ mod tests { .into_iter() .rev() .collect::>(); - let reader = Reader::with_schema(&schema, &invalid[..]).unwrap(); + let reader = Reader::with_schema(&schema, &invalid[..])?; for value in reader { assert!(value.is_err()); } + + Ok(()) } #[test] - fn test_reader_empty_buffer() { + fn test_reader_empty_buffer() -> TestResult { let empty = Cursor::new(Vec::new()); assert!(Reader::new(empty).is_err()); + + Ok(()) } #[test] - fn test_reader_only_header() { + fn test_reader_only_header() -> TestResult { let invalid = ENCODED.iter().copied().take(165).collect::>(); - let reader = Reader::new(&invalid[..]).unwrap(); + let reader = Reader::new(&invalid[..])?; for value in reader { assert!(value.is_err()); } + + Ok(()) } #[test] - fn test_avro_3405_read_user_metadata_success() { + fn test_avro_3405_read_user_metadata_success() -> TestResult { use crate::writer::Writer; - let schema = Schema::parse_str(SCHEMA).unwrap(); + let schema = Schema::parse_str(SCHEMA)?; let mut writer = Writer::new(&schema, Vec::new()); let mut user_meta_data: HashMap> = HashMap::new(); @@ -736,20 +750,22 @@ mod tests { user_meta_data.insert("vecKey".to_string(), vec![1, 2, 3]); for (k, v) in user_meta_data.iter() { - writer.add_user_metadata(k.to_string(), v).unwrap(); + writer.add_user_metadata(k.to_string(), v)?; } let mut record = Record::new(&schema).unwrap(); record.put("a", 27i64); record.put("b", "foo"); - writer.append(record.clone()).unwrap(); - writer.append(record.clone()).unwrap(); - writer.flush().unwrap(); - let result = writer.into_inner().unwrap(); + writer.append(record.clone())?; + writer.append(record.clone())?; + writer.flush()?; + let result = writer.into_inner()?; - let reader = Reader::new(&result[..]).unwrap(); + let reader = Reader::new(&result[..])?; assert_eq!(reader.user_metadata(), &user_meta_data); + + Ok(()) } #[derive(Deserialize, Clone, PartialEq, Debug)] @@ -833,7 +849,7 @@ mod tests { } #[test] - fn test_avro_3507_single_object_reader() { + fn test_avro_3507_single_object_reader() -> TestResult { let obj = TestSingleObjectReader { a: 42, b: 3.33, @@ -860,17 +876,19 @@ mod tests { .expect("Should read"); let expected_value: Value = obj.into(); assert_eq!(expected_value, val); + + Ok(()) } #[test] - fn avro_3642_test_single_object_reader_incomplete_reads() { + fn avro_3642_test_single_object_reader_incomplete_reads() -> TestResult { let obj = TestSingleObjectReader { a: 42, b: 3.33, c: vec!["cat".into(), "dog".into()], }; // The two-byte marker, to show that the message uses this single-record format - let to_read_1 = vec![0xC3, 0x01]; + let to_read_1 = [0xC3, 0x01]; let mut to_read_2 = Vec::::new(); to_read_2.extend_from_slice( &TestSingleObjectReader::get_schema() @@ -892,10 +910,12 @@ mod tests { .expect("Should read"); let expected_value: Value = obj.into(); assert_eq!(expected_value, val); + + Ok(()) } #[test] - fn test_avro_3507_reader_parity() { + fn test_avro_3507_reader_parity() -> TestResult { let obj = TestSingleObjectReader { a: 42, b: 3.33, @@ -935,7 +955,9 @@ mod tests { let expected_value: Value = obj.clone().into(); assert_eq!(obj, read_obj1); assert_eq!(obj, read_obj2); - assert_eq!(val, expected_value) + assert_eq!(val, expected_value); + + Ok(()) } #[cfg(not(feature = "snappy"))] diff --git a/lang/rust/avro/src/schema.rs b/lang/rust/avro/src/schema.rs index 0aa0eed2c96..7c6b7dc9e43 100644 --- a/lang/rust/avro/src/schema.rs +++ b/lang/rust/avro/src/schema.rs @@ -19,18 +19,20 @@ use crate::{error::Error, types, util::MapHelper, AvroResult}; use digest::Digest; use lazy_static::lazy_static; -use regex::Regex; +use regex_lite::Regex; use serde::{ ser::{SerializeMap, SerializeSeq}, Deserialize, Serialize, Serializer, }; use serde_json::{Map, Value}; use std::{ - borrow::Cow, + borrow::{Borrow, Cow}, collections::{BTreeMap, HashMap, HashSet}, convert::{TryFrom, TryInto}, fmt, + fmt::Debug, hash::Hash, + io::Read, str::FromStr, }; use strum_macros::{EnumDiscriminants, EnumString}; @@ -40,7 +42,11 @@ lazy_static! { // An optional namespace (with optional dots) followed by a name without any dots in it. static ref SCHEMA_NAME_R: Regex = - Regex::new(r"^((?P[A-Za-z_][A-Za-z0-9_\.]*)*\.)?(?P[A-Za-z_][A-Za-z0-9_]*)$").unwrap(); + Regex::new(r"^((?P([A-Za-z_][A-Za-z0-9_\.]*)*)\.)?(?P[A-Za-z_][A-Za-z0-9_]*)$").unwrap(); + + static ref FIELD_NAME_R: Regex = Regex::new(r"^[A-Za-z_][A-Za-z0-9_]*$").unwrap(); + + static ref NAMESPACE_R: Regex = Regex::new(r"^([A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*)?$").unwrap(); } /// Represents an Avro schema fingerprint @@ -98,43 +104,14 @@ pub enum Schema { /// A `union` Avro schema. Union(UnionSchema), /// A `record` Avro schema. - /// - /// The `lookup` table maps field names to their position in the `Vec` - /// of `fields`. - Record { - name: Name, - aliases: Aliases, - doc: Documentation, - fields: Vec, - lookup: BTreeMap, - attributes: BTreeMap, - }, + Record(RecordSchema), /// An `enum` Avro schema. - Enum { - name: Name, - aliases: Aliases, - doc: Documentation, - symbols: Vec, - attributes: BTreeMap, - }, + Enum(EnumSchema), /// A `fixed` Avro schema. - Fixed { - name: Name, - aliases: Aliases, - doc: Documentation, - size: usize, - attributes: BTreeMap, - }, + Fixed(FixedSchema), /// Logical type which represents `Decimal` values. The underlying type is serialized and /// deserialized as `Schema::Bytes` or `Schema::Fixed`. - /// - /// `scale` defaults to 0 and is an integer greater than or equal to 0 and `precision` is an - /// integer greater than 0. - Decimal { - precision: DecimalMetadata, - scale: DecimalMetadata, - inner: Box, - }, + Decimal(DecimalSchema), /// A universally unique identifier, annotating a string. Uuid, /// Logical type which represents the number of days since the unix epoch. @@ -150,12 +127,14 @@ pub enum Schema { TimestampMillis, /// An instant in time represented as the number of microseconds after the UNIX epoch. TimestampMicros, + /// An instant in localtime represented as the number of milliseconds after the UNIX epoch. + LocalTimestampMillis, + /// An instant in local time represented as the number of microseconds after the UNIX epoch. + LocalTimestampMicros, /// An amount of time defined by a number of months, days and milliseconds. Duration, - // A reference to another schema. - Ref { - name: Name, - }, + /// A reference to another schema. + Ref { name: Name }, } impl PartialEq for Schema { @@ -216,6 +195,8 @@ impl From<&types::Value> for SchemaKind { Value::TimeMicros(_) => Self::TimeMicros, Value::TimestampMillis(_) => Self::TimestampMillis, Value::TimestampMicros(_) => Self::TimestampMicros, + Value::LocalTimestampMillis(_) => Self::LocalTimestampMillis, + Value::LocalTimestampMicros(_) => Self::LocalTimestampMicros, Value::Duration { .. } => Self::Duration, } } @@ -254,10 +235,13 @@ impl Name { /// `aliases` will not be defined. pub fn new(name: &str) -> AvroResult { let (name, namespace) = Name::get_name_and_namespace(name)?; - Ok(Self { name, namespace }) + Ok(Self { + name, + namespace: namespace.filter(|ns| !ns.is_empty()), + }) } - pub(crate) fn get_name_and_namespace(name: &str) -> AvroResult<(String, Namespace)> { + fn get_name_and_namespace(name: &str) -> AvroResult<(String, Namespace)> { let caps = SCHEMA_NAME_R .captures(name) .ok_or_else(|| Error::InvalidSchemaName(name.to_string(), SCHEMA_NAME_R.as_str()))?; @@ -268,7 +252,10 @@ impl Name { } /// Parse a `serde_json::Value` into a `Name`. - pub(crate) fn parse(complex: &Map) -> AvroResult { + pub(crate) fn parse( + complex: &Map, + enclosing_namespace: &Namespace, + ) -> AvroResult { let (name, namespace_from_name) = complex .name() .map(|name| Name::get_name_and_namespace(name.as_str()).unwrap()) @@ -279,9 +266,26 @@ impl Name { _ => None, }; + let namespace = namespace_from_name + .or_else(|| { + complex + .string("namespace") + .or_else(|| enclosing_namespace.clone()) + }) + .filter(|ns| !ns.is_empty()); + + if let Some(ref ns) = namespace { + if !NAMESPACE_R.is_match(ns) { + return Err(Error::InvalidNamespace( + ns.to_string(), + NAMESPACE_R.as_str(), + )); + } + } + Ok(Self { name: type_name.unwrap_or(name), - namespace: namespace_from_name.or_else(|| complex.string("namespace")), + namespace, }) } @@ -296,8 +300,10 @@ impl Name { let namespace = self.namespace.clone().or(default_namespace); match namespace { - Some(ref namespace) => format!("{}.{}", namespace, self.name), - None => self.name.clone(), + Some(ref namespace) if !namespace.is_empty() => { + format!("{}.{}", namespace, self.name) + } + _ => self.name.clone(), } } } @@ -307,12 +313,12 @@ impl Name { /// use apache_avro::schema::Name; /// /// assert_eq!( - /// Name::new("some_name").unwrap().fully_qualified_name(&Some("some_namespace".into())), - /// Name::new("some_namespace.some_name").unwrap() + /// Name::new("some_name")?.fully_qualified_name(&Some("some_namespace".into())), + /// Name::new("some_namespace.some_name")? /// ); /// assert_eq!( - /// Name::new("some_namespace.some_name").unwrap().fully_qualified_name(&Some("other_namespace".into())), - /// Name::new("some_namespace.some_name").unwrap() + /// Name::new("some_namespace.some_name")?.fully_qualified_name(&Some("other_namespace".into())), + /// Name::new("some_namespace.some_name")? /// ); /// ``` pub fn fully_qualified_name(&self, enclosing_namespace: &Namespace) -> Name { @@ -321,7 +327,7 @@ impl Name { namespace: self .namespace .clone() - .or_else(|| enclosing_namespace.clone()), + .or_else(|| enclosing_namespace.clone().filter(|ns| !ns.is_empty())), } } } @@ -346,7 +352,7 @@ impl<'de> Deserialize<'de> for Name { Value::deserialize(deserializer).and_then(|value| { use serde::de::Error; if let Value::Object(json) = value { - Name::parse(&json).map_err(Error::custom) + Name::parse(&json, &None).map_err(Error::custom) } else { Err(Error::custom(format!("Expected a JSON object: {value:?}"))) } @@ -411,7 +417,7 @@ impl<'s> TryFrom<&'s Schema> for ResolvedSchema<'s> { names_ref: names, schemata: vec![schema], }; - Self::from_internal(rs.get_schemata(), &mut rs.names_ref, &None)?; + rs.resolve(rs.get_schemata(), &None, None)?; Ok(rs) } } @@ -425,7 +431,7 @@ impl<'s> TryFrom> for ResolvedSchema<'s> { names_ref: names, schemata, }; - Self::from_internal(rs.get_schemata(), &mut rs.names_ref, &None)?; + rs.resolve(rs.get_schemata(), &None, None)?; Ok(rs) } } @@ -439,33 +445,53 @@ impl<'s> ResolvedSchema<'s> { &self.names_ref } - fn from_internal( + /// Creates `ResolvedSchema` with some already known schemas. + /// + /// Those schemata would be used to resolve references if needed. + pub fn new_with_known_schemata<'n>( + schemata_to_resolve: Vec<&'s Schema>, + enclosing_namespace: &Namespace, + known_schemata: &'n NamesRef<'n>, + ) -> AvroResult { + let names = HashMap::new(); + let mut rs = ResolvedSchema { + names_ref: names, + schemata: schemata_to_resolve, + }; + rs.resolve(rs.get_schemata(), enclosing_namespace, Some(known_schemata))?; + Ok(rs) + } + + fn resolve<'n>( + &mut self, schemata: Vec<&'s Schema>, - names_ref: &mut NamesRef<'s>, enclosing_namespace: &Namespace, + known_schemata: Option<&'n NamesRef<'n>>, ) -> AvroResult<()> { for schema in schemata { match schema { Schema::Array(schema) | Schema::Map(schema) => { - Self::from_internal(vec![schema], names_ref, enclosing_namespace)? + self.resolve(vec![schema], enclosing_namespace, known_schemata)? } Schema::Union(UnionSchema { schemas, .. }) => { for schema in schemas { - Self::from_internal(vec![schema], names_ref, enclosing_namespace)? + self.resolve(vec![schema], enclosing_namespace, known_schemata)? } } - Schema::Enum { name, .. } | Schema::Fixed { name, .. } => { + Schema::Enum(EnumSchema { name, .. }) | Schema::Fixed(FixedSchema { name, .. }) => { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - if names_ref + if self + .names_ref .insert(fully_qualified_name.clone(), schema) .is_some() { return Err(Error::AmbiguousSchemaDefinition(fully_qualified_name)); } } - Schema::Record { name, fields, .. } => { + Schema::Record(RecordSchema { name, fields, .. }) => { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - if names_ref + if self + .names_ref .insert(fully_qualified_name.clone(), schema) .is_some() { @@ -473,14 +499,21 @@ impl<'s> ResolvedSchema<'s> { } else { let record_namespace = fully_qualified_name.namespace; for field in fields { - Self::from_internal(vec![&field.schema], names_ref, &record_namespace)? + self.resolve(vec![&field.schema], &record_namespace, known_schemata)? } } } Schema::Ref { name } => { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); - if names_ref.get(&fully_qualified_name).is_none() { - return Err(Error::SchemaResolutionError(fully_qualified_name)); + // first search for reference in current schemata, then look into external references. + if !self.names_ref.contains_key(&fully_qualified_name) { + let is_resolved_with_known_schemas = known_schemata + .as_ref() + .map(|names| names.contains_key(&fully_qualified_name)) + .unwrap_or(false); + if !is_resolved_with_known_schemas { + return Err(Error::SchemaResolutionError(fully_qualified_name)); + } } } _ => (), @@ -532,7 +565,7 @@ impl ResolvedOwnedSchema { } Ok(()) } - Schema::Enum { name, .. } | Schema::Fixed { name, .. } => { + Schema::Enum(EnumSchema { name, .. }) | Schema::Fixed(FixedSchema { name, .. }) => { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); if names .insert(fully_qualified_name.clone(), schema.clone()) @@ -543,7 +576,7 @@ impl ResolvedOwnedSchema { Ok(()) } } - Schema::Record { name, fields, .. } => { + Schema::Record(RecordSchema { name, fields, .. }) => { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); if names .insert(fully_qualified_name.clone(), schema.clone()) @@ -610,14 +643,25 @@ impl RecordField { field: &Map, position: usize, parser: &mut Parser, - enclosing_namespace: &Namespace, + enclosing_record: &Name, ) -> AvroResult { let name = field.name().ok_or(Error::GetNameFieldFromRecord)?; + if !FIELD_NAME_R.is_match(&name) { + return Err(Error::FieldName(name)); + } + // TODO: "type" = "" - let schema = parser.parse_complex(field, enclosing_namespace)?; + let schema = parser.parse_complex(field, &enclosing_record.namespace)?; let default = field.get("default").cloned(); + Self::resolve_default_value( + &schema, + &name, + &enclosing_record.fullname(None), + &parser.parsed_schemas, + &default, + )?; let aliases = field.get("aliases").and_then(|aliases| { aliases.as_array().map(|aliases| { @@ -647,6 +691,55 @@ impl RecordField { }) } + fn resolve_default_value( + field_schema: &Schema, + field_name: &str, + record_name: &str, + names: &Names, + default: &Option, + ) -> AvroResult<()> { + if let Some(value) = default { + let avro_value = types::Value::from(value.clone()); + match field_schema { + Schema::Union(union_schema) => { + let schemas = &union_schema.schemas; + let resolved = schemas.iter().any(|schema| { + avro_value + .to_owned() + .resolve_internal(schema, names, &schema.namespace(), &None) + .is_ok() + }); + + if !resolved { + let schema: Option<&Schema> = schemas.get(0); + return match schema { + Some(first_schema) => Err(Error::GetDefaultUnion( + SchemaKind::from(first_schema), + types::ValueKind::from(avro_value), + )), + None => Err(Error::EmptyUnion), + }; + } + } + _ => { + let resolved = avro_value + .resolve_internal(field_schema, names, &field_schema.namespace(), &None) + .is_ok(); + + if !resolved { + return Err(Error::GetDefaultRecordField( + field_name.to_string(), + record_name.to_string(), + field_schema.canonical_form(), + )); + } + } + }; + } + + Ok(()) + } + fn get_field_custom_attributes(field: &Map) -> BTreeMap { let mut custom_attributes: BTreeMap = BTreeMap::new(); for (key, value) in field { @@ -668,8 +761,74 @@ impl RecordField { } } +/// A description of an Enum schema. +#[derive(Debug, Clone)] +pub struct RecordSchema { + /// The name of the schema + pub name: Name, + /// The aliases of the schema + pub aliases: Aliases, + /// The documentation of the schema + pub doc: Documentation, + /// The set of fields of the schema + pub fields: Vec, + /// The `lookup` table maps field names to their position in the `Vec` + /// of `fields`. + pub lookup: BTreeMap, + /// The custom attributes of the schema + pub attributes: BTreeMap, +} + +/// A description of an Enum schema. +#[derive(Debug, Clone)] +pub struct EnumSchema { + /// The name of the schema + pub name: Name, + /// The aliases of the schema + pub aliases: Aliases, + /// The documentation of the schema + pub doc: Documentation, + /// The set of symbols of the schema + pub symbols: Vec, + /// An optional default symbol used for compatibility + pub default: Option, + /// The custom attributes of the schema + pub attributes: BTreeMap, +} + +/// A description of a Union schema. +#[derive(Debug, Clone)] +pub struct FixedSchema { + /// The name of the schema + pub name: Name, + /// The aliases of the schema + pub aliases: Aliases, + /// The documentation of the schema + pub doc: Documentation, + /// The size of the fixed schema + pub size: usize, + /// The custom attributes of the schema + pub attributes: BTreeMap, +} + +/// A description of a Union schema. +/// +/// `scale` defaults to 0 and is an integer greater than or equal to 0 and `precision` is an +/// integer greater than 0. +#[derive(Debug, Clone)] +pub struct DecimalSchema { + /// The number of digits in the unscaled value + pub precision: DecimalMetadata, + /// The number of digits to the right of the decimal point + pub scale: DecimalMetadata, + /// The inner schema of the decimal (fixed or bytes) + pub inner: Box, +} + +/// A description of a Union schema #[derive(Debug, Clone)] pub struct UnionSchema { + /// The schemas that make up this union pub(crate) schemas: Vec, // Used to ensure uniqueness of schema inputs, and provide constant time finding of the // schema index given a value. @@ -709,19 +868,59 @@ impl UnionSchema { /// Optionally returns a reference to the schema matched by this value, as well as its position /// within this union. + #[deprecated( + since = "0.15.0", + note = "Please use `find_schema_with_known_schemata` instead" + )] pub fn find_schema(&self, value: &types::Value) -> Option<(usize, &Schema)> { + self.find_schema_with_known_schemata::(value, None, &None) + } + + /// Optionally returns a reference to the schema matched by this value, as well as its position + /// within this union. + /// + /// Extra arguments: + /// - `known_schemata` - mapping between `Name` and `Schema` - if passed, additional external schemas would be used to resolve references. + pub fn find_schema_with_known_schemata + Debug>( + &self, + value: &types::Value, + known_schemata: Option<&HashMap>, + enclosing_namespace: &Namespace, + ) -> Option<(usize, &Schema)> { let schema_kind = SchemaKind::from(value); if let Some(&i) = self.variant_index.get(&schema_kind) { // fast path Some((i, &self.schemas[i])) } else { // slow path (required for matching logical or named types) + + // first collect what schemas we already know + let mut collected_names: HashMap = known_schemata + .map(|names| { + names + .iter() + .map(|(name, schema)| (name.clone(), schema.borrow())) + .collect() + }) + .unwrap_or_default(); + self.schemas.iter().enumerate().find(|(_, schema)| { - let rs = - ResolvedSchema::try_from(*schema).expect("Schema didn't successfully parse"); + let resolved_schema = ResolvedSchema::new_with_known_schemata( + vec![*schema], + enclosing_namespace, + &collected_names, + ) + .expect("Schema didn't successfully parse"); + let resolved_names = resolved_schema.names_ref; + + // extend known schemas with just resolved names + collected_names.extend(resolved_names); + let namespace = &schema.namespace().or_else(|| enclosing_namespace.clone()); + value - .validate_internal(schema, rs.get_names(), &schema.namespace()) - .is_none() + .clone() + .resolve_internal(schema, &collected_names, namespace, &None) + .is_ok() }) } } @@ -759,13 +958,13 @@ fn parse_json_integer_for_decimal(value: &serde_json::Number) -> Result, - // A map of name -> Schema::Ref - // Used to resolve cyclic references, i.e. when a - // field's type is a reference to its record's type + /// A map of name -> Schema::Ref + /// Used to resolve cyclic references, i.e. when a + /// field's type is a reference to its record's type resolving_schemas: Names, input_order: Vec, - // A map of name -> fully parsed Schema - // Used to avoid parsing the same schema twice + /// A map of name -> fully parsed Schema + /// Used to avoid parsing the same schema twice parsed_schemas: Names, } @@ -807,13 +1006,13 @@ impl Schema { /// during parsing. /// /// If two of the input schemas have the same fullname, an Error will be returned. - pub fn parse_list(input: &[&str]) -> Result, Error> { + pub fn parse_list(input: &[&str]) -> AvroResult> { let mut input_schemas: HashMap = HashMap::with_capacity(input.len()); let mut input_order: Vec = Vec::with_capacity(input.len()); for js in input { let schema: Value = serde_json::from_str(js).map_err(Error::ParseSchemaJson)?; if let Value::Object(inner) = &schema { - let name = Name::parse(inner)?; + let name = Name::parse(inner, &None)?; let previous_value = input_schemas.insert(name.clone(), schema); if previous_value.is_some() { return Err(Error::NameCollision(name.fullname(None))); @@ -832,6 +1031,15 @@ impl Schema { parser.parse_list() } + /// Create a `Schema` from a reader which implements [`Read`]. + pub fn parse_reader(reader: &mut (impl Read + ?Sized)) -> AvroResult { + let mut buf = String::new(); + match reader.read_to_string(&mut buf) { + Ok(_) => Self::parse_str(&buf), + Err(e) => Err(Error::ReadSchemaFromReader(e)), + } + } + /// Parses an Avro schema from JSON. pub fn parse(value: &Value) -> AvroResult { let mut parser = Parser::default(); @@ -853,9 +1061,9 @@ impl Schema { /// Returns the custom attributes (metadata) if the schema supports them. pub fn custom_attributes(&self) -> Option<&BTreeMap> { match self { - Schema::Record { attributes, .. } - | Schema::Enum { attributes, .. } - | Schema::Fixed { attributes, .. } => Some(attributes), + Schema::Record(RecordSchema { attributes, .. }) + | Schema::Enum(EnumSchema { attributes, .. }) + | Schema::Fixed(FixedSchema { attributes, .. }) => Some(attributes), _ => None, } } @@ -863,10 +1071,10 @@ impl Schema { /// Returns the name of the schema if it has one. pub fn name(&self) -> Option<&Name> { match self { - Schema::Ref { ref name, .. } - | Schema::Record { ref name, .. } - | Schema::Enum { ref name, .. } - | Schema::Fixed { ref name, .. } => Some(name), + Schema::Ref { name, .. } + | Schema::Record(RecordSchema { name, .. }) + | Schema::Enum(EnumSchema { name, .. }) + | Schema::Fixed(FixedSchema { name, .. }) => Some(name), _ => None, } } @@ -875,6 +1083,26 @@ impl Schema { pub fn namespace(&self) -> Namespace { self.name().and_then(|n| n.namespace.clone()) } + + /// Returns the aliases of the schema if it has ones. + pub fn aliases(&self) -> Option<&Vec> { + match self { + Schema::Record(RecordSchema { aliases, .. }) + | Schema::Enum(EnumSchema { aliases, .. }) + | Schema::Fixed(FixedSchema { aliases, .. }) => aliases.as_ref(), + _ => None, + } + } + + /// Returns the doc of the schema if it has one. + pub fn doc(&self) -> Option<&String> { + match self { + Schema::Record(RecordSchema { doc, .. }) + | Schema::Enum(EnumSchema { doc, .. }) + | Schema::Fixed(FixedSchema { doc, .. }) => doc.as_ref(), + _ => None, + } + } } impl Parser { @@ -920,7 +1148,7 @@ impl Parser { match *value { Value::String(ref t) => self.parse_known_schema(t.as_str(), enclosing_namespace), Value::Object(ref data) => self.parse_complex(data, enclosing_namespace), - Value::Array(ref data) => self.parse_union(data, enclosing_namespace, None), + Value::Array(ref data) => self.parse_union(data, enclosing_namespace), _ => Err(Error::ParseSchemaFromValidJson), } } @@ -962,9 +1190,9 @@ impl Parser { ) -> AvroResult { fn get_schema_ref(parsed: &Schema) -> Schema { match &parsed { - Schema::Record { ref name, .. } - | Schema::Enum { ref name, .. } - | Schema::Fixed { ref name, .. } => Schema::Ref { name: name.clone() }, + Schema::Record(RecordSchema { ref name, .. }) + | Schema::Enum(EnumSchema { ref name, .. }) + | Schema::Fixed(FixedSchema { ref name, .. }) => Schema::Ref { name: name.clone() }, _ => parsed.clone(), } } @@ -973,7 +1201,9 @@ impl Parser { let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); if self.parsed_schemas.get(&fully_qualified_name).is_some() { - return Ok(Schema::Ref { name }); + return Ok(Schema::Ref { + name: fully_qualified_name, + }); } if let Some(resolving_schema) = self.resolving_schemas.get(&fully_qualified_name) { return Ok(resolving_schema.clone()); @@ -1047,7 +1277,12 @@ impl Parser { ) -> AvroResult { match complex.get("type") { Some(value) => { - let ty = parser.parse(value, enclosing_namespace)?; + let ty = match value { + Value::String(s) if s == "fixed" => { + parser.parse_fixed(complex, enclosing_namespace)? + } + _ => parser.parse(value, enclosing_namespace)?, + }; if kinds .iter() @@ -1118,11 +1353,11 @@ impl Parser { let (precision, scale) = Self::parse_precision_and_scale(complex)?; - return Ok(Schema::Decimal { + return Ok(Schema::Decimal(DecimalSchema { precision, scale, inner, - }); + })); } "uuid" => { logical_verify_type(complex, &[SchemaKind::String], self, enclosing_namespace)?; @@ -1178,6 +1413,26 @@ impl Parser { enclosing_namespace, ); } + "local-timestamp-millis" => { + return try_logical_type( + "local-timestamp-millis", + complex, + &[SchemaKind::Long], + Schema::LocalTimestampMillis, + self, + enclosing_namespace, + ); + } + "local-timestamp-micros" => { + return try_logical_type( + "local-timestamp-micros", + complex, + &[SchemaKind::Long], + Schema::LocalTimestampMicros, + self, + enclosing_namespace, + ); + } "duration" => { logical_verify_type(complex, &[SchemaKind::Fixed], self, enclosing_namespace)?; return Ok(Schema::Duration); @@ -1202,10 +1457,7 @@ impl Parser { other => self.parse_known_schema(other, enclosing_namespace), }, Some(Value::Object(data)) => self.parse_complex(data, enclosing_namespace), - Some(Value::Array(variants)) => { - let default = complex.get("default"); - self.parse_union(variants, enclosing_namespace, default) - } + Some(Value::Array(variants)) => self.parse_union(variants, enclosing_namespace), Some(unknown) => Err(Error::GetComplexType(unknown.clone())), None => Err(Error::GetComplexTypeField), } @@ -1284,11 +1536,11 @@ impl Parser { } } - let name = Name::parse(complex)?; - let aliases = fix_aliases_namespace(complex.aliases(), &name.namespace); + let fully_qualified_name = Name::parse(complex, enclosing_namespace)?; + let aliases = fix_aliases_namespace(complex.aliases(), &fully_qualified_name.namespace); let mut lookup = BTreeMap::new(); - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); + self.register_resolving_schema(&fully_qualified_name, &aliases); let fields: Vec = fields_opt @@ -1300,13 +1552,15 @@ impl Parser { .filter_map(|field| field.as_object()) .enumerate() .map(|(position, field)| { - RecordField::parse(field, position, self, &fully_qualified_name.namespace) + RecordField::parse(field, position, self, &fully_qualified_name) }) .collect::>() })?; for field in &fields { - lookup.insert(field.name.clone(), field.position); + if let Some(_old) = lookup.insert(field.name.clone(), field.position) { + return Err(Error::FieldNameDuplicate(field.name.clone())); + } if let Some(ref field_aliases) = field.aliases { for alias in field_aliases { @@ -1315,14 +1569,14 @@ impl Parser { } } - let schema = Schema::Record { - name, + let schema = Schema::Record(RecordSchema { + name: fully_qualified_name.clone(), aliases: aliases.clone(), doc: complex.doc(), fields, lookup, attributes: self.get_custom_attributes(complex, vec!["fields"]), - }; + }); self.register_parsed_schema(&fully_qualified_name, &schema, &aliases); Ok(schema) @@ -1359,8 +1613,8 @@ impl Parser { } } - let name = Name::parse(complex)?; - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); + let name = Name::parse(complex, enclosing_namespace)?; + let fully_qualified_name = name.clone(); let aliases = fix_aliases_namespace(complex.aliases(), &name.namespace); let symbols: Vec = symbols_opt @@ -1389,13 +1643,35 @@ impl Parser { existing_symbols.insert(symbol); } - let schema = Schema::Enum { - name, + let mut default: Option = None; + if let Some(value) = complex.get("default") { + if let Value::String(ref s) = *value { + default = Some(s.clone()); + } else { + return Err(Error::EnumDefaultWrongType(value.clone())); + } + } + + if let Some(ref value) = default { + let resolved = types::Value::from(value.clone()) + .resolve_enum(&symbols, &Some(value.to_string()), &None) + .is_ok(); + if !resolved { + return Err(Error::GetEnumDefault { + symbol: value.to_string(), + symbols, + }); + } + } + + let schema = Schema::Enum(EnumSchema { + name: fully_qualified_name.clone(), aliases: aliases.clone(), doc: complex.doc(), symbols, + default, attributes: self.get_custom_attributes(complex, vec!["symbols"]), - }; + }); self.register_parsed_schema(&fully_qualified_name, &schema, &aliases); @@ -1436,32 +1712,11 @@ impl Parser { &mut self, items: &[Value], enclosing_namespace: &Namespace, - default: Option<&Value>, ) -> AvroResult { items .iter() .map(|v| self.parse(v, enclosing_namespace)) .collect::, _>>() - .and_then(|schemas| { - if let Some(default_value) = default.cloned() { - let avro_value = types::Value::from(default_value); - let resolved = schemas - .iter() - .any(|schema| avro_value.to_owned().resolve(schema).is_ok()); - - if !resolved { - let schema: Option<&Schema> = schemas.get(0); - return match schema { - Some(first_schema) => Err(Error::GetDefaultUnion( - SchemaKind::from(first_schema), - types::ValueKind::from(avro_value), - )), - None => Err(Error::EmptyUnion), - }; - } - } - Ok(schemas) - }) .and_then(|schemas| Ok(Schema::Union(UnionSchema::new(schemas)?))) } @@ -1484,21 +1739,24 @@ impl Parser { _ => None, }); - let size = size_opt - .and_then(|v| v.as_i64()) - .ok_or(Error::GetFixedSizeField)?; + let size = match size_opt { + Some(size) => size + .as_u64() + .ok_or_else(|| Error::GetFixedSizeFieldPositive(size.clone())), + None => Err(Error::GetFixedSizeField), + }?; - let name = Name::parse(complex)?; - let fully_qualified_name = name.fully_qualified_name(enclosing_namespace); + let name = Name::parse(complex, enclosing_namespace)?; + let fully_qualified_name = name.clone(); let aliases = fix_aliases_namespace(complex.aliases(), &name.namespace); - let schema = Schema::Fixed { - name, + let schema = Schema::Fixed(FixedSchema { + name: fully_qualified_name.clone(), aliases: aliases.clone(), doc, size: size as usize, attributes: self.get_custom_attributes(complex, vec!["size"]), - }; + }); self.register_parsed_schema(&fully_qualified_name, &schema, &aliases); @@ -1575,13 +1833,13 @@ impl Serialize for Schema { } seq.end() } - Schema::Record { + Schema::Record(RecordSchema { ref name, ref aliases, ref doc, ref fields, .. - } => { + }) => { let mut map = serializer.serialize_map(None)?; map.serialize_entry("type", "record")?; if let Some(ref n) = name.namespace { @@ -1597,12 +1855,12 @@ impl Serialize for Schema { map.serialize_entry("fields", fields)?; map.end() } - Schema::Enum { + Schema::Enum(EnumSchema { ref name, ref symbols, ref aliases, .. - } => { + }) => { let mut map = serializer.serialize_map(None)?; map.serialize_entry("type", "enum")?; if let Some(ref n) = name.namespace { @@ -1616,13 +1874,13 @@ impl Serialize for Schema { } map.end() } - Schema::Fixed { + Schema::Fixed(FixedSchema { ref name, ref doc, ref size, ref aliases, .. - } => { + }) => { let mut map = serializer.serialize_map(None)?; map.serialize_entry("type", "fixed")?; if let Some(ref n) = name.namespace { @@ -1639,11 +1897,11 @@ impl Serialize for Schema { } map.end() } - Schema::Decimal { + Schema::Decimal(DecimalSchema { ref scale, ref precision, ref inner, - } => { + }) => { let mut map = serializer.serialize_map(None)?; map.serialize_entry("type", &*inner.clone())?; map.serialize_entry("logicalType", "decimal")?; @@ -1687,18 +1945,30 @@ impl Serialize for Schema { map.serialize_entry("logicalType", "timestamp-micros")?; map.end() } + Schema::LocalTimestampMillis => { + let mut map = serializer.serialize_map(None)?; + map.serialize_entry("type", "long")?; + map.serialize_entry("logicalType", "local-timestamp-millis")?; + map.end() + } + Schema::LocalTimestampMicros => { + let mut map = serializer.serialize_map(None)?; + map.serialize_entry("type", "long")?; + map.serialize_entry("logicalType", "local-timestamp-micros")?; + map.end() + } Schema::Duration => { let mut map = serializer.serialize_map(None)?; // the Avro doesn't indicate what the name of the underlying fixed type of a // duration should be or typically is. - let inner = Schema::Fixed { + let inner = Schema::Fixed(FixedSchema { name: Name::new("duration").unwrap(), aliases: None, doc: None, size: 12, attributes: Default::default(), - }; + }); map.serialize_entry("type", &inner)?; map.serialize_entry("logicalType", "duration")?; map.end() @@ -2038,6 +2308,7 @@ pub mod derive { #[cfg(test)] mod tests { use super::*; + use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; use serde_json::json; @@ -2047,31 +2318,35 @@ mod tests { } #[test] - fn test_primitive_schema() { - assert_eq!(Schema::Null, Schema::parse_str("\"null\"").unwrap()); - assert_eq!(Schema::Int, Schema::parse_str("\"int\"").unwrap()); - assert_eq!(Schema::Double, Schema::parse_str("\"double\"").unwrap()); + fn test_primitive_schema() -> TestResult { + assert_eq!(Schema::Null, Schema::parse_str("\"null\"")?); + assert_eq!(Schema::Int, Schema::parse_str("\"int\"")?); + assert_eq!(Schema::Double, Schema::parse_str("\"double\"")?); + Ok(()) } #[test] - fn test_array_schema() { - let schema = Schema::parse_str(r#"{"type": "array", "items": "string"}"#).unwrap(); + fn test_array_schema() -> TestResult { + let schema = Schema::parse_str(r#"{"type": "array", "items": "string"}"#)?; assert_eq!(Schema::Array(Box::new(Schema::String)), schema); + Ok(()) } #[test] - fn test_map_schema() { - let schema = Schema::parse_str(r#"{"type": "map", "values": "double"}"#).unwrap(); + fn test_map_schema() -> TestResult { + let schema = Schema::parse_str(r#"{"type": "map", "values": "double"}"#)?; assert_eq!(Schema::Map(Box::new(Schema::Double)), schema); + Ok(()) } #[test] - fn test_union_schema() { - let schema = Schema::parse_str(r#"["null", "int"]"#).unwrap(); + fn test_union_schema() -> TestResult { + let schema = Schema::parse_str(r#"["null", "int"]"#)?; assert_eq!( - Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int]).unwrap()), + Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int])?), schema ); + Ok(()) } #[test] @@ -2081,10 +2356,10 @@ mod tests { } #[test] - fn test_multi_union_schema() { + fn test_multi_union_schema() -> TestResult { let schema = Schema::parse_str(r#"["null", "int", "float", "string", "bytes"]"#); assert!(schema.is_ok()); - let schema = schema.unwrap(); + let schema = schema?; assert_eq!(SchemaKind::from(&schema), SchemaKind::Union); let union_schema = match schema { Schema::Union(u) => u, @@ -2107,28 +2382,26 @@ mod tests { SchemaKind::Bytes ); assert_eq!(variants.next(), None); + + Ok(()) } - // AVRO-3621 #[test] - fn test_avro_3621_nullable_record_field() { + fn test_avro_3621_nullable_record_field() -> TestResult { let nullable_record_field = RecordField { name: "next".to_string(), doc: None, default: None, aliases: None, - schema: Schema::Union( - UnionSchema::new(vec![ - Schema::Null, - Schema::Ref { - name: Name { - name: "LongList".to_owned(), - namespace: None, - }, + schema: Schema::Union(UnionSchema::new(vec![ + Schema::Null, + Schema::Ref { + name: Name { + name: "LongList".to_owned(), + namespace: None, }, - ]) - .unwrap(), - ), + }, + ])?), order: RecordFieldOrder::Ascending, position: 1, custom_attributes: Default::default(), @@ -2148,11 +2421,12 @@ mod tests { }; assert!(!non_nullable_record_field.is_nullable()); + Ok(()) } // AVRO-3248 #[test] - fn test_union_of_records() { + fn test_union_of_records() -> TestResult { use std::iter::FromIterator; // A and B are the same except the name. @@ -2181,14 +2455,13 @@ mod tests { ] }"#; - let schema_c = Schema::parse_list(&[schema_str_a, schema_str_b, schema_str_c]) - .unwrap() + let schema_c = Schema::parse_list(&[schema_str_a, schema_str_b, schema_str_c])? .last() .unwrap() .clone(); - let schema_c_expected = Schema::Record { - name: Name::new("C").unwrap(), + let schema_c_expected = Schema::Record(RecordSchema { + name: Name::new("C")?, aliases: None, doc: None, fields: vec![RecordField { @@ -2196,31 +2469,28 @@ mod tests { doc: None, default: None, aliases: None, - schema: Schema::Union( - UnionSchema::new(vec![ - Schema::Ref { - name: Name::new("A").unwrap(), - }, - Schema::Ref { - name: Name::new("B").unwrap(), - }, - ]) - .unwrap(), - ), + schema: Schema::Union(UnionSchema::new(vec![ + Schema::Ref { + name: Name::new("A")?, + }, + Schema::Ref { + name: Name::new("B")?, + }, + ])?), order: RecordFieldOrder::Ignore, position: 0, custom_attributes: Default::default(), }], lookup: BTreeMap::from_iter(vec![("field_one".to_string(), 0)]), attributes: Default::default(), - }; + }); assert_eq!(schema_c, schema_c_expected); + Ok(()) } - // AVRO-3584 : recursion in type definitions #[test] - fn avro_3584_test_recursion_records() { + fn avro_3584_test_recursion_records() -> TestResult { // A and B are the same except the name. let schema_str_a = r#"{ "name": "A", @@ -2234,26 +2504,27 @@ mod tests { "fields": [ {"name": "field_one", "type": "A"} ] }"#; - let list = Schema::parse_list(&[schema_str_a, schema_str_b]).unwrap(); + let list = Schema::parse_list(&[schema_str_a, schema_str_b])?; let schema_a = list.first().unwrap().clone(); match schema_a { - Schema::Record { fields, .. } => { + Schema::Record(RecordSchema { fields, .. }) => { let f1 = fields.get(0); let ref_schema = Schema::Ref { - name: Name::new("B").unwrap(), + name: Name::new("B")?, }; assert_eq!(ref_schema, f1.unwrap().schema); } _ => panic!("Expected a record schema!"), } + + Ok(()) } - // AVRO-3248 #[test] - fn test_nullable_record() { + fn test_avro_3248_nullable_record() -> TestResult { use std::iter::FromIterator; let schema_str_a = r#"{ @@ -2273,14 +2544,13 @@ mod tests { ] }"#; - let schema_option_a = Schema::parse_list(&[schema_str_a, schema_str_option_a]) - .unwrap() + let schema_option_a = Schema::parse_list(&[schema_str_a, schema_str_option_a])? .last() .unwrap() .clone(); - let schema_option_a_expected = Schema::Record { - name: Name::new("OptionA").unwrap(), + let schema_option_a_expected = Schema::Record(RecordSchema { + name: Name::new("OptionA")?, aliases: None, doc: None, fields: vec![RecordField { @@ -2288,28 +2558,27 @@ mod tests { doc: None, default: Some(Value::Null), aliases: None, - schema: Schema::Union( - UnionSchema::new(vec![ - Schema::Null, - Schema::Ref { - name: Name::new("A").unwrap(), - }, - ]) - .unwrap(), - ), + schema: Schema::Union(UnionSchema::new(vec![ + Schema::Null, + Schema::Ref { + name: Name::new("A")?, + }, + ])?), order: RecordFieldOrder::Ignore, position: 0, custom_attributes: Default::default(), }], lookup: BTreeMap::from_iter(vec![("field_one".to_string(), 0)]), attributes: Default::default(), - }; + }); assert_eq!(schema_option_a, schema_option_a_expected); + + Ok(()) } #[test] - fn test_record_schema() { + fn test_record_schema() -> TestResult { let parsed = Schema::parse_str( r#" { @@ -2321,15 +2590,14 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; let mut lookup = BTreeMap::new(); lookup.insert("a".to_owned(), 0); lookup.insert("b".to_owned(), 1); - let expected = Schema::Record { - name: Name::new("test").unwrap(), + let expected = Schema::Record(RecordSchema { + name: Name::new("test")?, aliases: None, doc: None, fields: vec![ @@ -2356,14 +2624,15 @@ mod tests { ], lookup, attributes: Default::default(), - }; + }); assert_eq!(parsed, expected); + + Ok(()) } - // AVRO-3302 #[test] - fn test_record_schema_with_currently_parsing_schema() { + fn test_avro_3302_record_schema_with_currently_parsing_schema() -> TestResult { let schema = Schema::parse_str( r#" { @@ -2382,8 +2651,7 @@ mod tests { }] } "#, - ) - .unwrap(); + )?; let mut lookup = BTreeMap::new(); lookup.insert("recordField".to_owned(), 0); @@ -2392,8 +2660,8 @@ mod tests { node_lookup.insert("children".to_owned(), 1); node_lookup.insert("label".to_owned(), 0); - let expected = Schema::Record { - name: Name::new("test").unwrap(), + let expected = Schema::Record(RecordSchema { + name: Name::new("test")?, aliases: None, doc: None, fields: vec![RecordField { @@ -2401,8 +2669,8 @@ mod tests { doc: None, default: None, aliases: None, - schema: Schema::Record { - name: Name::new("Node").unwrap(), + schema: Schema::Record(RecordSchema { + name: Name::new("Node")?, aliases: None, doc: None, fields: vec![ @@ -2422,7 +2690,7 @@ mod tests { default: None, aliases: None, schema: Schema::Array(Box::new(Schema::Ref { - name: Name::new("Node").unwrap(), + name: Name::new("Node")?, })), order: RecordFieldOrder::Ascending, position: 1, @@ -2431,24 +2699,26 @@ mod tests { ], lookup: node_lookup, attributes: Default::default(), - }, + }), order: RecordFieldOrder::Ascending, position: 0, custom_attributes: Default::default(), }], lookup, attributes: Default::default(), - }; + }); assert_eq!(schema, expected); let canonical_form = &schema.canonical_form(); let expected = r#"{"name":"test","type":"record","fields":[{"name":"recordField","type":{"name":"Node","type":"record","fields":[{"name":"label","type":"string"},{"name":"children","type":{"type":"array","items":"Node"}}]}}]}"#; assert_eq!(canonical_form, &expected); + + Ok(()) } // https://github.com/flavray/avro-rs/pull/99#issuecomment-1016948451 #[test] - fn test_parsing_of_recursive_type_enum() { + fn test_parsing_of_recursive_type_enum() -> TestResult { let schema = r#" { "type": "record", @@ -2492,14 +2762,16 @@ mod tests { } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let schema_str = schema.canonical_form(); - let expected = r#"{"name":"office.User","type":"record","fields":[{"name":"details","type":[{"name":"Employee","type":"record","fields":[{"name":"gender","type":{"name":"Gender","type":"enum","symbols":["male","female"]}}]},{"name":"Manager","type":"record","fields":[{"name":"gender","type":"Gender"}]}]}]}"#; + let expected = r#"{"name":"office.User","type":"record","fields":[{"name":"details","type":[{"name":"office.Employee","type":"record","fields":[{"name":"gender","type":{"name":"office.Gender","type":"enum","symbols":["male","female"]}}]},{"name":"office.Manager","type":"record","fields":[{"name":"gender","type":"office.Gender"}]}]}]}"#; assert_eq!(schema_str, expected); + + Ok(()) } #[test] - fn test_parsing_of_recursive_type_fixed() { + fn test_parsing_of_recursive_type_fixed() -> TestResult { let schema = r#" { "type": "record", @@ -2540,15 +2812,16 @@ mod tests { } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let schema_str = schema.canonical_form(); - let expected = r#"{"name":"office.User","type":"record","fields":[{"name":"details","type":[{"name":"Employee","type":"record","fields":[{"name":"id","type":{"name":"EmployeeId","type":"fixed","size":16}}]},{"name":"Manager","type":"record","fields":[{"name":"id","type":"EmployeeId"}]}]}]}"#; + let expected = r#"{"name":"office.User","type":"record","fields":[{"name":"details","type":[{"name":"office.Employee","type":"record","fields":[{"name":"id","type":{"name":"office.EmployeeId","type":"fixed","size":16}}]},{"name":"office.Manager","type":"record","fields":[{"name":"id","type":"office.EmployeeId"}]}]}]}"#; assert_eq!(schema_str, expected); + + Ok(()) } - // AVRO-3302 #[test] - fn test_record_schema_with_currently_parsing_schema_aliases() { + fn test_avro_3302_record_schema_with_currently_parsing_schema_aliases() -> TestResult { let schema = Schema::parse_str( r#" { @@ -2561,14 +2834,13 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; let mut lookup = BTreeMap::new(); lookup.insert("value".to_owned(), 0); lookup.insert("next".to_owned(), 1); - let expected = Schema::Record { + let expected = Schema::Record(RecordSchema { name: Name { name: "LongList".to_owned(), namespace: None, @@ -2591,18 +2863,15 @@ mod tests { doc: None, default: None, aliases: None, - schema: Schema::Union( - UnionSchema::new(vec![ - Schema::Null, - Schema::Ref { - name: Name { - name: "LongList".to_owned(), - namespace: None, - }, + schema: Schema::Union(UnionSchema::new(vec![ + Schema::Null, + Schema::Ref { + name: Name { + name: "LongList".to_owned(), + namespace: None, }, - ]) - .unwrap(), - ), + }, + ])?), order: RecordFieldOrder::Ascending, position: 1, custom_attributes: Default::default(), @@ -2610,17 +2879,18 @@ mod tests { ], lookup, attributes: Default::default(), - }; + }); assert_eq!(schema, expected); let canonical_form = &schema.canonical_form(); let expected = r#"{"name":"LongList","type":"record","fields":[{"name":"value","type":"long"},{"name":"next","type":["null","LongList"]}]}"#; assert_eq!(canonical_form, &expected); + + Ok(()) } - // AVRO-3370 #[test] - fn test_record_schema_with_currently_parsing_schema_named_record() { + fn test_avro_3370_record_schema_with_currently_parsing_schema_named_record() -> TestResult { let schema = Schema::parse_str( r#" { @@ -2632,14 +2902,13 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; let mut lookup = BTreeMap::new(); lookup.insert("value".to_owned(), 0); lookup.insert("next".to_owned(), 1); - let expected = Schema::Record { + let expected = Schema::Record(RecordSchema { name: Name { name: "record".to_owned(), namespace: None, @@ -2675,17 +2944,18 @@ mod tests { ], lookup, attributes: Default::default(), - }; + }); assert_eq!(schema, expected); let canonical_form = &schema.canonical_form(); let expected = r#"{"name":"record","type":"record","fields":[{"name":"value","type":"long"},{"name":"next","type":"record"}]}"#; assert_eq!(canonical_form, &expected); + + Ok(()) } - // AVRO-3370 #[test] - fn test_record_schema_with_currently_parsing_schema_named_enum() { + fn test_avro_3370_record_schema_with_currently_parsing_schema_named_enum() -> TestResult { let schema = Schema::parse_str( r#" { @@ -2701,14 +2971,13 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; let mut lookup = BTreeMap::new(); lookup.insert("enum".to_owned(), 0); lookup.insert("next".to_owned(), 1); - let expected = Schema::Record { + let expected = Schema::Record(RecordSchema { name: Name { name: "record".to_owned(), namespace: None, @@ -2721,7 +2990,7 @@ mod tests { doc: None, default: None, aliases: None, - schema: Schema::Enum { + schema: Schema::Enum(EnumSchema { name: Name { name: "enum".to_owned(), namespace: None, @@ -2729,8 +2998,9 @@ mod tests { aliases: None, doc: None, symbols: vec!["one".to_string(), "two".to_string(), "three".to_string()], + default: None, attributes: Default::default(), - }, + }), order: RecordFieldOrder::Ascending, position: 0, custom_attributes: Default::default(), @@ -2740,7 +3010,7 @@ mod tests { doc: None, default: None, aliases: None, - schema: Schema::Enum { + schema: Schema::Enum(EnumSchema { name: Name { name: "enum".to_owned(), namespace: None, @@ -2748,8 +3018,9 @@ mod tests { aliases: None, doc: None, symbols: vec!["one".to_string(), "two".to_string(), "three".to_string()], + default: None, attributes: Default::default(), - }, + }), order: RecordFieldOrder::Ascending, position: 1, custom_attributes: Default::default(), @@ -2757,17 +3028,18 @@ mod tests { ], lookup, attributes: Default::default(), - }; + }); assert_eq!(schema, expected); let canonical_form = &schema.canonical_form(); let expected = r#"{"name":"record","type":"record","fields":[{"name":"enum","type":{"name":"enum","type":"enum","symbols":["one","two","three"]}},{"name":"next","type":{"name":"enum","type":"enum","symbols":["one","two","three"]}}]}"#; assert_eq!(canonical_form, &expected); + + Ok(()) } - // AVRO-3370 #[test] - fn test_record_schema_with_currently_parsing_schema_named_fixed() { + fn test_avro_3370_record_schema_with_currently_parsing_schema_named_fixed() -> TestResult { let schema = Schema::parse_str( r#" { @@ -2783,14 +3055,13 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; let mut lookup = BTreeMap::new(); lookup.insert("fixed".to_owned(), 0); lookup.insert("next".to_owned(), 1); - let expected = Schema::Record { + let expected = Schema::Record(RecordSchema { name: Name { name: "record".to_owned(), namespace: None, @@ -2803,7 +3074,7 @@ mod tests { doc: None, default: None, aliases: None, - schema: Schema::Fixed { + schema: Schema::Fixed(FixedSchema { name: Name { name: "fixed".to_owned(), namespace: None, @@ -2812,7 +3083,7 @@ mod tests { doc: None, size: 456, attributes: Default::default(), - }, + }), order: RecordFieldOrder::Ascending, position: 0, custom_attributes: Default::default(), @@ -2822,7 +3093,7 @@ mod tests { doc: None, default: None, aliases: None, - schema: Schema::Fixed { + schema: Schema::Fixed(FixedSchema { name: Name { name: "fixed".to_owned(), namespace: None, @@ -2831,7 +3102,7 @@ mod tests { doc: None, size: 456, attributes: Default::default(), - }, + }), order: RecordFieldOrder::Ascending, position: 1, custom_attributes: Default::default(), @@ -2839,22 +3110,24 @@ mod tests { ], lookup, attributes: Default::default(), - }; + }); assert_eq!(schema, expected); let canonical_form = &schema.canonical_form(); let expected = r#"{"name":"record","type":"record","fields":[{"name":"fixed","type":{"name":"fixed","type":"fixed","size":456}},{"name":"next","type":{"name":"fixed","type":"fixed","size":456}}]}"#; assert_eq!(canonical_form, &expected); + + Ok(()) } #[test] - fn test_enum_schema() { + fn test_enum_schema() -> TestResult { let schema = Schema::parse_str( r#"{"type": "enum", "name": "Suit", "symbols": ["diamonds", "spades", "clubs", "hearts"]}"#, - ).unwrap(); + )?; - let expected = Schema::Enum { - name: Name::new("Suit").unwrap(), + let expected = Schema::Enum(EnumSchema { + name: Name::new("Suit")?, aliases: None, doc: None, symbols: vec![ @@ -2863,89 +3136,103 @@ mod tests { "clubs".to_owned(), "hearts".to_owned(), ], + default: None, attributes: Default::default(), - }; + }); assert_eq!(expected, schema); + + Ok(()) } #[test] - fn test_enum_schema_duplicate() { + fn test_enum_schema_duplicate() -> TestResult { // Duplicate "diamonds" let schema = Schema::parse_str( r#"{"type": "enum", "name": "Suit", "symbols": ["diamonds", "spades", "clubs", "diamonds"]}"#, ); assert!(schema.is_err()); + + Ok(()) } #[test] - fn test_enum_schema_name() { + fn test_enum_schema_name() -> TestResult { // Invalid name "0000" does not match [A-Za-z_][A-Za-z0-9_]* let schema = Schema::parse_str( r#"{"type": "enum", "name": "Enum", "symbols": ["0000", "variant"]}"#, ); assert!(schema.is_err()); + + Ok(()) } #[test] - fn test_fixed_schema() { - let schema = Schema::parse_str(r#"{"type": "fixed", "name": "test", "size": 16}"#).unwrap(); + fn test_fixed_schema() -> TestResult { + let schema = Schema::parse_str(r#"{"type": "fixed", "name": "test", "size": 16}"#)?; - let expected = Schema::Fixed { - name: Name::new("test").unwrap(), + let expected = Schema::Fixed(FixedSchema { + name: Name::new("test")?, aliases: None, doc: None, size: 16usize, attributes: Default::default(), - }; + }); assert_eq!(expected, schema); + + Ok(()) } #[test] - fn test_fixed_schema_with_documentation() { + fn test_fixed_schema_with_documentation() -> TestResult { let schema = Schema::parse_str( r#"{"type": "fixed", "name": "test", "size": 16, "doc": "FixedSchema documentation"}"#, - ) - .unwrap(); + )?; - let expected = Schema::Fixed { - name: Name::new("test").unwrap(), + let expected = Schema::Fixed(FixedSchema { + name: Name::new("test")?, aliases: None, doc: Some(String::from("FixedSchema documentation")), size: 16usize, attributes: Default::default(), - }; + }); assert_eq!(expected, schema); + + Ok(()) } #[test] - fn test_no_documentation() { - let schema = - Schema::parse_str(r#"{"type": "enum", "name": "Coin", "symbols": ["heads", "tails"]}"#) - .unwrap(); + fn test_no_documentation() -> TestResult { + let schema = Schema::parse_str( + r#"{"type": "enum", "name": "Coin", "symbols": ["heads", "tails"]}"#, + )?; let doc = match schema { - Schema::Enum { doc, .. } => doc, - _ => return, + Schema::Enum(EnumSchema { doc, .. }) => doc, + _ => unreachable!(), }; assert!(doc.is_none()); + + Ok(()) } #[test] - fn test_documentation() { + fn test_documentation() -> TestResult { let schema = Schema::parse_str( - r#"{"type": "enum", "name": "Coin", "doc": "Some documentation", "symbols": ["heads", "tails"]}"# - ).unwrap(); + r#"{"type": "enum", "name": "Coin", "doc": "Some documentation", "symbols": ["heads", "tails"]}"#, + )?; let doc = match schema { - Schema::Enum { doc, .. } => doc, + Schema::Enum(EnumSchema { doc, .. }) => doc, _ => None, }; assert_eq!("Some documentation".to_owned(), doc.unwrap()); + + Ok(()) } // Tests to ensure Schema is Send + Sync. These tests don't need to _do_ anything, if they can @@ -2968,8 +3255,7 @@ mod tests { } #[test] - #[cfg_attr(miri, ignore)] // Sha256 uses an inline assembly instructions which is not supported by miri - fn test_schema_fingerprint() { + fn test_schema_fingerprint() -> TestResult { use crate::rabin::Rabin; use md5::Md5; use sha2::Sha256; @@ -2986,7 +3272,7 @@ mod tests { } "#; - let schema = Schema::parse_str(raw_schema).unwrap(); + let schema = Schema::parse_str(raw_schema)?; assert_eq!( "abf662f831715ff78f88545a05a9262af75d6406b54e1a8a174ff1d2b75affc4", format!("{}", schema.fingerprint::()) @@ -2999,33 +3285,40 @@ mod tests { assert_eq!( "28cf0a67d9937bb3", format!("{}", schema.fingerprint::()) - ) + ); + + Ok(()) } #[test] - fn test_logical_types() { - let schema = Schema::parse_str(r#"{"type": "int", "logicalType": "date"}"#).unwrap(); + fn test_logical_types() -> TestResult { + let schema = Schema::parse_str(r#"{"type": "int", "logicalType": "date"}"#)?; assert_eq!(schema, Schema::Date); - let schema = - Schema::parse_str(r#"{"type": "long", "logicalType": "timestamp-micros"}"#).unwrap(); + let schema = Schema::parse_str(r#"{"type": "long", "logicalType": "timestamp-micros"}"#)?; assert_eq!(schema, Schema::TimestampMicros); + + Ok(()) } #[test] - fn test_nullable_logical_type() { + fn test_nullable_logical_type() -> TestResult { let schema = Schema::parse_str( r#"{"type": ["null", {"type": "long", "logicalType": "timestamp-micros"}]}"#, - ) - .unwrap(); + )?; assert_eq!( schema, - Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::TimestampMicros]).unwrap()) + Schema::Union(UnionSchema::new(vec![ + Schema::Null, + Schema::TimestampMicros + ])?) ); + + Ok(()) } #[test] - fn record_field_order_from_str() { + fn record_field_order_from_str() -> TestResult { use std::str::FromStr; assert_eq!( @@ -3041,11 +3334,12 @@ mod tests { RecordFieldOrder::Ignore ); assert!(RecordFieldOrder::from_str("not an ordering").is_err()); + + Ok(()) } - /// AVRO-3374 #[test] - fn test_avro_3374_preserve_namespace_for_primitive() { + fn test_avro_3374_preserve_namespace_for_primitive() -> TestResult { let schema = Schema::parse_str( r#" { @@ -3057,18 +3351,19 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; let json = schema.canonical_form(); assert_eq!( json, r#"{"name":"ns.int","type":"record","fields":[{"name":"value","type":"int"},{"name":"next","type":["null","ns.int"]}]}"# ); + + Ok(()) } #[test] - fn test_avro_3433_preserve_schema_refs_in_json() { + fn test_avro_3433_preserve_schema_refs_in_json() -> TestResult { let schema = r#" { "name": "test.test", @@ -3083,14 +3378,16 @@ mod tests { } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let expected = r#"{"name":"test.test","type":"record","fields":[{"name":"bar","type":{"name":"test.foo","type":"record","fields":[{"name":"id","type":"long"}]}},{"name":"baz","type":"test.foo"}]}"#; assert_eq!(schema.canonical_form(), expected); + + Ok(()) } #[test] - fn test_read_namespace_from_name() { + fn test_read_namespace_from_name() -> TestResult { let schema = r#" { "name": "space.name", @@ -3104,17 +3401,19 @@ mod tests { } "#; - let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, .. } = schema { + let schema = Schema::parse_str(schema)?; + if let Schema::Record(RecordSchema { name, .. }) = schema { assert_eq!(name.name, "name"); assert_eq!(name.namespace, Some("space".to_string())); } else { panic!("Expected a record schema!"); } + + Ok(()) } #[test] - fn test_namespace_from_name_has_priority_over_from_field() { + fn test_namespace_from_name_has_priority_over_from_field() -> TestResult { let schema = r#" { "name": "space1.name", @@ -3129,16 +3428,18 @@ mod tests { } "#; - let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, .. } = schema { + let schema = Schema::parse_str(schema)?; + if let Schema::Record(RecordSchema { name, .. }) = schema { assert_eq!(name.namespace, Some("space1".to_string())); } else { panic!("Expected a record schema!"); } + + Ok(()) } #[test] - fn test_namespace_from_field() { + fn test_namespace_from_field() -> TestResult { let schema = r#" { "name": "name", @@ -3153,20 +3454,24 @@ mod tests { } "#; - let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, .. } = schema { + let schema = Schema::parse_str(schema)?; + if let Schema::Record(RecordSchema { name, .. }) = schema { assert_eq!(name.namespace, Some("space2".to_string())); } else { panic!("Expected a record schema!"); } + + Ok(()) } #[test] /// Zero-length namespace is considered as no-namespace. - fn test_namespace_from_name_with_empty_value() { - let name = Name::new(".name").unwrap(); + fn test_namespace_from_name_with_empty_value() -> TestResult { + let name = Name::new(".name")?; assert_eq!(name.name, "name"); assert_eq!(name.namespace, None); + + Ok(()) } #[test] @@ -3188,7 +3493,7 @@ mod tests { } #[test] - fn avro_3448_test_proper_resolution_inner_record_inherited_namespace() { + fn avro_3448_test_proper_resolution_inner_record_inherited_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3218,16 +3523,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_record_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_resolution_inner_record_qualified_namespace() { + fn avro_3448_test_proper_resolution_inner_record_qualified_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3257,16 +3564,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_record_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_resolution_inner_enum_inherited_namespace() { + fn avro_3448_test_proper_resolution_inner_enum_inherited_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3291,16 +3600,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_enum_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_resolution_inner_enum_qualified_namespace() { + fn avro_3448_test_proper_resolution_inner_enum_qualified_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3325,16 +3636,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_enum_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_resolution_inner_fixed_inherited_namespace() { + fn avro_3448_test_proper_resolution_inner_fixed_inherited_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3359,16 +3672,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_fixed_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_resolution_inner_fixed_qualified_namespace() { + fn avro_3448_test_proper_resolution_inner_fixed_qualified_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3393,16 +3708,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.inner_fixed_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_resolution_inner_record_inner_namespace() { + fn avro_3448_test_proper_resolution_inner_record_inner_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3433,16 +3750,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "inner_space.inner_record_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_resolution_inner_enum_inner_namespace() { + fn avro_3448_test_proper_resolution_inner_enum_inner_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3468,16 +3787,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "inner_space.inner_enum_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_resolution_inner_fixed_inner_namespace() { + fn avro_3448_test_proper_resolution_inner_fixed_inner_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3503,16 +3824,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "inner_space.inner_fixed_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_multi_level_resolution_inner_record_outer_namespace() { + fn avro_3448_test_proper_multi_level_resolution_inner_record_outer_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3554,7 +3877,7 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 3); for s in &[ @@ -3562,12 +3885,14 @@ mod tests { "space.middle_record_name", "space.inner_record_name", ] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_multi_level_resolution_inner_record_middle_namespace() { + fn avro_3448_test_proper_multi_level_resolution_inner_record_middle_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3610,7 +3935,7 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 3); for s in &[ @@ -3618,12 +3943,14 @@ mod tests { "middle_namespace.middle_record_name", "middle_namespace.inner_record_name", ] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_multi_level_resolution_inner_record_inner_namespace() { + fn avro_3448_test_proper_multi_level_resolution_inner_record_inner_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3667,7 +3994,7 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 3); for s in &[ @@ -3675,12 +4002,14 @@ mod tests { "middle_namespace.middle_record_name", "inner_namespace.inner_record_name", ] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_in_array_resolution_inherited_namespace() { + fn avro_3448_test_proper_in_array_resolution_inherited_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3710,16 +4039,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.in_array_record"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3448_test_proper_in_map_resolution_inherited_namespace() { + fn avro_3448_test_proper_in_map_resolution_inherited_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3749,16 +4080,18 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "space.in_map_record"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } + + Ok(()) } #[test] - fn avro_3466_test_to_json_inner_enum_inner_namespace() { + fn avro_3466_test_to_json_inner_enum_inner_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3784,23 +4117,25 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); // confirm we have expected 2 full-names assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "inner_space.inner_enum_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } // convert Schema back to JSON string let schema_str = serde_json::to_string(&schema).expect("test failed"); let _schema = Schema::parse_str(&schema_str).expect("test failed"); assert_eq!(schema, _schema); + + Ok(()) } #[test] - fn avro_3466_test_to_json_inner_fixed_inner_namespace() { + fn avro_3466_test_to_json_inner_fixed_inner_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -3826,19 +4161,21 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let rs = ResolvedSchema::try_from(&schema).expect("Schema didn't successfully parse"); // confirm we have expected 2 full-names assert_eq!(rs.get_names().len(), 2); for s in &["space.record_name", "inner_space.inner_fixed_name"] { - assert!(rs.get_names().contains_key(&Name::new(s).unwrap())); + assert!(rs.get_names().contains_key(&Name::new(s)?)); } // convert Schema back to JSON string let schema_str = serde_json::to_string(&schema).expect("test failed"); let _schema = Schema::parse_str(&schema_str).expect("test failed"); assert_eq!(schema, _schema); + + Ok(()) } fn assert_avro_3512_aliases(aliases: &Aliases) { @@ -3856,7 +4193,7 @@ mod tests { } #[test] - fn avro_3512_alias_with_null_namespace_record() { + fn avro_3512_alias_with_null_namespace_record() -> TestResult { let schema = Schema::parse_str( r#" { @@ -3869,18 +4206,19 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; - if let Schema::Record { ref aliases, .. } = schema { + if let Schema::Record(RecordSchema { ref aliases, .. }) = schema { assert_avro_3512_aliases(aliases); } else { panic!("The Schema should be a record: {schema:?}"); } + + Ok(()) } #[test] - fn avro_3512_alias_with_null_namespace_enum() { + fn avro_3512_alias_with_null_namespace_enum() -> TestResult { let schema = Schema::parse_str( r#" { @@ -3893,18 +4231,19 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; - if let Schema::Enum { ref aliases, .. } = schema { + if let Schema::Enum(EnumSchema { ref aliases, .. }) = schema { assert_avro_3512_aliases(aliases); } else { panic!("The Schema should be an enum: {schema:?}"); } + + Ok(()) } #[test] - fn avro_3512_alias_with_null_namespace_fixed() { + fn avro_3512_alias_with_null_namespace_fixed() -> TestResult { let schema = Schema::parse_str( r#" { @@ -3915,18 +4254,19 @@ mod tests { "size" : 12 } "#, - ) - .unwrap(); + )?; - if let Schema::Fixed { ref aliases, .. } = schema { + if let Schema::Fixed(FixedSchema { ref aliases, .. }) = schema { assert_avro_3512_aliases(aliases); } else { panic!("The Schema should be a fixed: {schema:?}"); } + + Ok(()) } #[test] - fn avro_3518_serialize_aliases_record() { + fn avro_3518_serialize_aliases_record() -> TestResult { let schema = Schema::parse_str( r#" { @@ -3945,20 +4285,21 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; - let value = serde_json::to_value(&schema).unwrap(); - let serialized = serde_json::to_string(&value).unwrap(); + let value = serde_json::to_value(&schema)?; + let serialized = serde_json::to_string(&value)?; assert_eq!( r#"{"aliases":["space.b","x.y","c"],"fields":[{"aliases":["time1","ns.time2"],"default":123,"name":"time","type":"long"}],"name":"a","namespace":"space","type":"record"}"#, &serialized ); - assert_eq!(schema, Schema::parse_str(&serialized).unwrap()); + assert_eq!(schema, Schema::parse_str(&serialized)?); + + Ok(()) } #[test] - fn avro_3518_serialize_aliases_enum() { + fn avro_3518_serialize_aliases_enum() -> TestResult { let schema = Schema::parse_str( r#" { @@ -3971,20 +4312,21 @@ mod tests { ] } "#, - ) - .unwrap(); + )?; - let value = serde_json::to_value(&schema).unwrap(); - let serialized = serde_json::to_string(&value).unwrap(); + let value = serde_json::to_value(&schema)?; + let serialized = serde_json::to_string(&value)?; assert_eq!( r#"{"aliases":["space.b","x.y","c"],"name":"a","namespace":"space","symbols":["symbol1","symbol2"],"type":"enum"}"#, &serialized ); - assert_eq!(schema, Schema::parse_str(&serialized).unwrap()); + assert_eq!(schema, Schema::parse_str(&serialized)?); + + Ok(()) } #[test] - fn avro_3518_serialize_aliases_fixed() { + fn avro_3518_serialize_aliases_fixed() -> TestResult { let schema = Schema::parse_str( r#" { @@ -3995,20 +4337,21 @@ mod tests { "size" : 12 } "#, - ) - .unwrap(); + )?; - let value = serde_json::to_value(&schema).unwrap(); - let serialized = serde_json::to_string(&value).unwrap(); + let value = serde_json::to_value(&schema)?; + let serialized = serde_json::to_string(&value)?; assert_eq!( r#"{"aliases":["space.b","x.y","c"],"name":"a","namespace":"space","size":12,"type":"fixed"}"#, &serialized ); - assert_eq!(schema, Schema::parse_str(&serialized).unwrap()); + assert_eq!(schema, Schema::parse_str(&serialized)?); + + Ok(()) } #[test] - fn avro_3130_parse_anonymous_union_type() { + fn avro_3130_parse_anonymous_union_type() -> TestResult { let schema_str = r#" { "type": "record", @@ -4028,10 +4371,10 @@ mod tests { ] } "#; - let schema = Schema::parse_str(schema_str).unwrap(); + let schema = Schema::parse_str(schema_str)?; - if let Schema::Record { name, fields, .. } = schema { - assert_eq!(name, Name::new("AccountEvent").unwrap()); + if let Schema::Record(RecordSchema { name, fields, .. }) = schema { + assert_eq!(name, Name::new("AccountEvent")?); let field = &fields[0]; assert_eq!(&field.name, "NullableLongArray"); @@ -4054,10 +4397,12 @@ mod tests { } else { panic!("Expected Schema::Record"); } + + Ok(()) } #[test] - fn avro_custom_attributes_schema_without_attributes() { + fn avro_custom_attributes_schema_without_attributes() -> TestResult { let schemata_str = [ r#" { @@ -4085,9 +4430,11 @@ mod tests { "#, ]; for schema_str in schemata_str.iter() { - let schema = Schema::parse_str(schema_str).unwrap(); + let schema = Schema::parse_str(schema_str)?; assert_eq!(schema.custom_attributes(), Some(&Default::default())); } + + Ok(()) } const CUSTOM_ATTRS_SUFFIX: &str = r#" @@ -4101,7 +4448,7 @@ mod tests { "#; #[test] - fn avro_3609_custom_attributes_schema_with_attributes() { + fn avro_3609_custom_attributes_schema_with_attributes() -> TestResult { let schemata_str = [ r#" { @@ -4141,14 +4488,15 @@ mod tests { .to_owned() .replace("{{{}}}", CUSTOM_ATTRS_SUFFIX) .as_str(), - ) - .unwrap(); + )?; assert_eq!( schema.custom_attributes(), Some(&expected_custom_attibutes()) ); } + + Ok(()) } fn expected_custom_attibutes() -> BTreeMap { @@ -4167,7 +4515,7 @@ mod tests { } #[test] - fn avro_3609_custom_attributes_record_field_without_attributes() { + fn avro_3609_custom_attributes_record_field_without_attributes() -> TestResult { let schema_str = String::from( r#" { @@ -4185,12 +4533,11 @@ mod tests { "#, ); - let schema = - Schema::parse_str(schema_str.replace("{{{}}}", CUSTOM_ATTRS_SUFFIX).as_str()).unwrap(); + let schema = Schema::parse_str(schema_str.replace("{{{}}}", CUSTOM_ATTRS_SUFFIX).as_str())?; match schema { - Schema::Record { name, fields, .. } => { - assert_eq!(name, Name::new("Rec").unwrap()); + Schema::Record(RecordSchema { name, fields, .. }) => { + assert_eq!(name, Name::new("Rec")?); assert_eq!(fields.len(), 1); let field = &fields[0]; assert_eq!(&field.name, "field_one"); @@ -4198,10 +4545,12 @@ mod tests { } _ => panic!("Expected Schema::Record"), } + + Ok(()) } #[test] - fn avro_3625_null_is_first() { + fn avro_3625_null_is_first() -> TestResult { let schema_str = String::from( r#" { @@ -4214,11 +4563,11 @@ mod tests { "#, ); - let schema = Schema::parse_str(&schema_str).unwrap(); + let schema = Schema::parse_str(&schema_str)?; match schema { - Schema::Record { name, fields, .. } => { - assert_eq!(name, Name::new("union_schema_test").unwrap()); + Schema::Record(RecordSchema { name, fields, .. }) => { + assert_eq!(name, Name::new("union_schema_test")?); assert_eq!(fields.len(), 1); let field = &fields[0]; assert_eq!(&field.name, "a"); @@ -4235,10 +4584,12 @@ mod tests { } _ => panic!("Expected Schema::Record"), } + + Ok(()) } #[test] - fn avro_3625_null_is_last() { + fn avro_3625_null_is_last() -> TestResult { let schema_str = String::from( r#" { @@ -4251,11 +4602,11 @@ mod tests { "#, ); - let schema = Schema::parse_str(&schema_str).unwrap(); + let schema = Schema::parse_str(&schema_str)?; match schema { - Schema::Record { name, fields, .. } => { - assert_eq!(name, Name::new("union_schema_test").unwrap()); + Schema::Record(RecordSchema { name, fields, .. }) => { + assert_eq!(name, Name::new("union_schema_test")?); assert_eq!(fields.len(), 1); let field = &fields[0]; assert_eq!(&field.name, "a"); @@ -4271,10 +4622,12 @@ mod tests { } _ => panic!("Expected Schema::Record"), } + + Ok(()) } #[test] - fn avro_3625_null_is_the_middle() { + fn avro_3625_null_is_the_middle() -> TestResult { let schema_str = String::from( r#" { @@ -4287,11 +4640,11 @@ mod tests { "#, ); - let schema = Schema::parse_str(&schema_str).unwrap(); + let schema = Schema::parse_str(&schema_str)?; match schema { - Schema::Record { name, fields, .. } => { - assert_eq!(name, Name::new("union_schema_test").unwrap()); + Schema::Record(RecordSchema { name, fields, .. }) => { + assert_eq!(name, Name::new("union_schema_test")?); assert_eq!(fields.len(), 1); let field = &fields[0]; assert_eq!(&field.name, "a"); @@ -4308,10 +4661,12 @@ mod tests { } _ => panic!("Expected Schema::Record"), } + + Ok(()) } #[test] - fn avro_3649_default_notintfirst() { + fn avro_3649_default_notintfirst() -> TestResult { let schema_str = String::from( r#" { @@ -4324,11 +4679,11 @@ mod tests { "#, ); - let schema = Schema::parse_str(&schema_str).unwrap(); + let schema = Schema::parse_str(&schema_str)?; match schema { - Schema::Record { name, fields, .. } => { - assert_eq!(name, Name::new("union_schema_test").unwrap()); + Schema::Record(RecordSchema { name, fields, .. }) => { + assert_eq!(name, Name::new("union_schema_test")?); assert_eq!(fields.len(), 1); let field = &fields[0]; assert_eq!(&field.name, "a"); @@ -4344,10 +4699,12 @@ mod tests { } _ => panic!("Expected Schema::Record"), } + + Ok(()) } #[test] - fn avro_3709_parsing_of_record_field_aliases() { + fn avro_3709_parsing_of_record_field_aliases() -> TestResult { let schema = r#" { "name": "rec", @@ -4362,13 +4719,1372 @@ mod tests { } "#; - let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { fields, .. } = schema { + let schema = Schema::parse_str(schema)?; + if let Schema::Record(RecordSchema { fields, .. }) = schema { let num_field = &fields[0]; assert_eq!(num_field.name, "num"); assert_eq!(num_field.aliases, Some(vec!("num1".into(), "num2".into()))); } else { panic!("Expected a record schema!"); } + + Ok(()) + } + + #[test] + fn avro_3735_parse_enum_namespace() -> TestResult { + let schema = r#" + { + "type": "record", + "name": "Foo", + "namespace": "name.space", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1" + ] + } + }, + { + "name": "barUse", + "type": "Bar" + } + ] + } + "#; + + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + } + + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barInit")] + pub bar_init: Bar, + #[serde(rename = "barUse")] + pub bar_use: Bar, + } + + let schema = Schema::parse_str(schema)?; + + let foo = Foo { + bar_init: Bar::Bar0, + bar_use: Bar::Bar1, + }; + + let avro_value = crate::to_value(foo)?; + assert!(avro_value.validate(&schema)); + + let mut writer = crate::Writer::new(&schema, Vec::new()); + + // schema validation happens here + writer.append(avro_value)?; + + Ok(()) + } + + #[test] + fn avro_3755_deserialize() -> TestResult { + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barInit")] + pub bar_init: Bar, + #[serde(rename = "barUse")] + pub bar_use: Bar, + } + + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1" + ] + } + }, + { + "name": "barUse", + "type": "Bar" + } + ] + }"#; + + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "name.space", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ] + } + }, + { + "name": "barUse", + "type": "Bar" + } + ] + }"#; + + let writer_schema = Schema::parse_str(writer_schema)?; + let foo = Foo { + bar_init: Bar::Bar0, + bar_use: Bar::Bar1, + }; + let avro_value = crate::to_value(foo)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 2); + assert_eq!(fields[0].0, "barInit"); + assert_eq!(fields[0].1, types::Value::Enum(0, "bar0".to_string())); + assert_eq!(fields[1].0, "barUse"); + assert_eq!(fields[1].1, types::Value::Enum(1, "bar1".to_string())); + } + _ => panic!("Expected Value::Record"), + } + + Ok(()) + } + + #[test] + fn test_avro_3780_decimal_schema_type_with_fixed() -> TestResult { + let schema = json!( + { + "type": "record", + "name": "recordWithDecimal", + "fields": [ + { + "name": "decimal", + "type": "fixed", + "name": "nestedFixed", + "size": 8, + "logicalType": "decimal", + "precision": 4 + } + ] + }); + + let parse_result = Schema::parse(&schema); + assert!( + parse_result.is_ok(), + "parse result must be ok, got: {:?}", + parse_result + ); + + Ok(()) + } + + #[test] + fn test_avro_3772_enum_default_wrong_type() -> TestResult { + let schema = r#" + { + "type": "record", + "name": "test", + "fields": [ + {"name": "a", "type": "long", "default": 42}, + {"name": "b", "type": "string"}, + { + "name": "c", + "type": { + "type": "enum", + "name": "suit", + "symbols": ["diamonds", "spades", "clubs", "hearts"], + "default": 123 + } + } + ] + } + "#; + + match Schema::parse_str(schema) { + Err(err) => { + assert_eq!( + err.to_string(), + "Default value for enum must be a string! Got: 123" + ); + } + _ => panic!("Expected an error"), + } + Ok(()) + } + + #[test] + fn test_avro_3812_handle_null_namespace_properly() -> TestResult { + let schema_str = r#" + { + "namespace": "", + "type": "record", + "name": "my_schema", + "fields": [ + { + "name": "a", + "type": { + "type": "enum", + "name": "my_enum", + "namespace": "", + "symbols": ["a", "b"] + } + }, { + "name": "b", + "type": { + "type": "fixed", + "name": "my_fixed", + "namespace": "", + "size": 10 + } + } + ] + } + "#; + + let expected = r#"{"name":"my_schema","type":"record","fields":[{"name":"a","type":{"name":"my_enum","type":"enum","symbols":["a","b"]}},{"name":"b","type":{"name":"my_fixed","type":"fixed","size":10}}]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + let name = Name::new("my_name")?; + let fullname = name.fullname(Some("".to_string())); + assert_eq!(fullname, "my_name"); + let qname = name.fully_qualified_name(&Some("".to_string())).to_string(); + assert_eq!(qname, "my_name"); + + Ok(()) + } + + #[test] + fn test_avro_3818_inherit_enclosing_namespace() -> TestResult { + // Enclosing namespace is specified but inner namespaces are not. + let schema_str = r#" + { + "namespace": "my_ns", + "type": "record", + "name": "my_schema", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a"] + } + }, { + "name": "f2", + "type": { + "name": "fixed1", + "type": "fixed", + "size": 1 + } + } + ] + } + "#; + + let expected = r#"{"name":"my_ns.my_schema","type":"record","fields":[{"name":"f1","type":{"name":"my_ns.enum1","type":"enum","symbols":["a"]}},{"name":"f2","type":{"name":"my_ns.fixed1","type":"fixed","size":1}}]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + // Enclosing namespace and inner namespaces are specified + // but inner namespaces are "" + let schema_str = r#" + { + "namespace": "my_ns", + "type": "record", + "name": "my_schema", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "namespace": "", + "symbols": ["a"] + } + }, { + "name": "f2", + "type": { + "name": "fixed1", + "type": "fixed", + "namespace": "", + "size": 1 + } + } + ] + } + "#; + + let expected = r#"{"name":"my_ns.my_schema","type":"record","fields":[{"name":"f1","type":{"name":"enum1","type":"enum","symbols":["a"]}},{"name":"f2","type":{"name":"fixed1","type":"fixed","size":1}}]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + // Enclosing namespace is "" and inner non-empty namespaces are specified. + let schema_str = r#" + { + "namespace": "", + "type": "record", + "name": "my_schema", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "namespace": "f1.ns", + "symbols": ["a"] + } + }, { + "name": "f2", + "type": { + "name": "f2.ns.fixed1", + "type": "fixed", + "size": 1 + } + } + ] + } + "#; + + let expected = r#"{"name":"my_schema","type":"record","fields":[{"name":"f1","type":{"name":"f1.ns.enum1","type":"enum","symbols":["a"]}},{"name":"f2","type":{"name":"f2.ns.fixed1","type":"fixed","size":1}}]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + // Nested complex types with non-empty enclosing namespace. + let schema_str = r#" + { + "type": "record", + "name": "my_ns.my_schema", + "fields": [ + { + "name": "f1", + "type": { + "name": "inner_record1", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a"] + } + } + ] + } + }, { + "name": "f2", + "type": { + "name": "inner_record2", + "type": "record", + "namespace": "inner_ns", + "fields": [ + { + "name": "f2_1", + "type": { + "name": "enum2", + "type": "enum", + "symbols": ["a"] + } + } + ] + } + } + ] + } + "#; + + let expected = r#"{"name":"my_ns.my_schema","type":"record","fields":[{"name":"f1","type":{"name":"my_ns.inner_record1","type":"record","fields":[{"name":"f1_1","type":{"name":"my_ns.enum1","type":"enum","symbols":["a"]}}]}},{"name":"f2","type":{"name":"inner_ns.inner_record2","type":"record","fields":[{"name":"f2_1","type":{"name":"inner_ns.enum2","type":"enum","symbols":["a"]}}]}}]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + Ok(()) + } + + #[test] + fn test_avro_3820_deny_invalid_field_names() -> TestResult { + let schema_str = r#" + { + "name": "my_record", + "type": "record", + "fields": [ + { + "name": "f1.x", + "type": { + "name": "my_enum", + "type": "enum", + "symbols": ["a"] + } + }, { + "name": "f2", + "type": { + "name": "my_fixed", + "type": "fixed", + "size": 1 + } + } + ] + } + "#; + + match Schema::parse_str(schema_str) { + Err(Error::FieldName(x)) if x == "f1.x" => Ok(()), + other => Err(format!("Expected Error::FieldName, got {other:?}").into()), + } + } + + #[test] + fn test_avro_3827_disallow_duplicate_field_names() -> TestResult { + let schema_str = r#" + { + "name": "my_schema", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "a", + "type": "record", + "fields": [] + } + }, { + "name": "f1", + "type": { + "name": "b", + "type": "record", + "fields": [] + } + } + ] + } + "#; + + match Schema::parse_str(schema_str) { + Err(Error::FieldNameDuplicate(_)) => (), + other => { + return Err(format!("Expected Error::FieldNameDuplicate, got {other:?}").into()) + } + }; + + let schema_str = r#" + { + "name": "my_schema", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "a", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "b", + "type": "record", + "fields": [] + } + } + ] + } + } + ] + } + "#; + + let expected = r#"{"name":"my_schema","type":"record","fields":[{"name":"f1","type":{"name":"a","type":"record","fields":[{"name":"f1","type":{"name":"b","type":"record","fields":[]}}]}}]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + Ok(()) + } + + #[test] + fn test_avro_3830_null_namespace_in_fully_qualified_names() -> TestResult { + // Check whether all the named types don't refer to the namespace field + // if their name starts with a dot. + let schema_str = r#" + { + "name": ".record1", + "namespace": "ns1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": ".enum1", + "namespace": "ns2", + "type": "enum", + "symbols": ["a"] + } + }, { + "name": "f2", + "type": { + "name": ".fxed1", + "namespace": "ns3", + "type": "fixed", + "size": 1 + } + } + ] + } + "#; + + let expected = r#"{"name":"record1","type":"record","fields":[{"name":"f1","type":{"name":"enum1","type":"enum","symbols":["a"]}},{"name":"f2","type":{"name":"fxed1","type":"fixed","size":1}}]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + // Check whether inner types don't inherit ns1. + let schema_str = r#" + { + "name": ".record1", + "namespace": "ns1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a"] + } + }, { + "name": "f2", + "type": { + "name": "fxed1", + "type": "fixed", + "size": 1 + } + } + ] + } + "#; + + let expected = r#"{"name":"record1","type":"record","fields":[{"name":"f1","type":{"name":"enum1","type":"enum","symbols":["a"]}},{"name":"f2","type":{"name":"fxed1","type":"fixed","size":1}}]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + let name = Name::new(".my_name")?; + let fullname = name.fullname(None); + assert_eq!(fullname, "my_name"); + let qname = name.fully_qualified_name(&None).to_string(); + assert_eq!(qname, "my_name"); + + Ok(()) + } + + #[test] + fn test_avro_3814_schema_resolution_failure() -> TestResult { + // Define a reader schema: a nested record with an optional field. + let reader_schema = json!( + { + "type": "record", + "name": "MyOuterRecord", + "fields": [ + { + "name": "inner_record", + "type": [ + "null", + { + "type": "record", + "name": "MyRecord", + "fields": [ + {"name": "a", "type": "string"} + ] + } + ], + "default": null + } + ] + } + ); + + // Define a writer schema: a nested record with an optional field, which + // may optionally contain an enum. + let writer_schema = json!( + { + "type": "record", + "name": "MyOuterRecord", + "fields": [ + { + "name": "inner_record", + "type": [ + "null", + { + "type": "record", + "name": "MyRecord", + "fields": [ + {"name": "a", "type": "string"}, + { + "name": "b", + "type": [ + "null", + { + "type": "enum", + "name": "MyEnum", + "symbols": ["A", "B", "C"], + "default": "C" + } + ], + "default": null + }, + ] + } + ] + } + ], + "default": null + } + ); + + // Use different structs to represent the "Reader" and the "Writer" + // to mimic two different versions of a producer & consumer application. + #[derive(Serialize, Deserialize, Debug)] + struct MyInnerRecordReader { + a: String, + } + + #[derive(Serialize, Deserialize, Debug)] + struct MyRecordReader { + inner_record: Option, + } + + #[derive(Serialize, Deserialize, Debug)] + enum MyEnum { + A, + B, + C, + } + + #[derive(Serialize, Deserialize, Debug)] + struct MyInnerRecordWriter { + a: String, + b: Option, + } + + #[derive(Serialize, Deserialize, Debug)] + struct MyRecordWriter { + inner_record: Option, + } + + let s = MyRecordWriter { + inner_record: Some(MyInnerRecordWriter { + a: "foo".to_string(), + b: None, + }), + }; + + // Serialize using the writer schema. + let writer_schema = Schema::parse(&writer_schema)?; + let avro_value = crate::to_value(s)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + + // Now, attempt to deserialize using the reader schema. + let reader_schema = Schema::parse(&reader_schema)?; + let mut x = &datum[..]; + + // Deserialization should succeed and we should be able to resolve the schema. + let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + assert!(deser_value.validate(&reader_schema)); + + // Verify that we can read a field from the record. + let d: MyRecordReader = crate::from_value(&deser_value)?; + assert_eq!(d.inner_record.unwrap().a, "foo".to_string()); + Ok(()) + } + + #[test] + fn test_avro_3837_disallow_invalid_namespace() -> TestResult { + // Valid namespace #1 (Single name portion) + let schema_str = r#" + { + "name": "record1", + "namespace": "ns1", + "type": "record", + "fields": [] + } + "#; + + let expected = r#"{"name":"ns1.record1","type":"record","fields":[]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + // Valid namespace #2 (multiple name portions). + let schema_str = r#" + { + "name": "enum1", + "namespace": "ns1.foo.bar", + "type": "enum", + "symbols": ["a"] + } + "#; + + let expected = r#"{"name":"ns1.foo.bar.enum1","type":"enum","symbols":["a"]}"#; + let schema = Schema::parse_str(schema_str)?; + let canonical_form = schema.canonical_form(); + assert_eq!(canonical_form, expected); + + // Invalid namespace #1 (a name portion starts with dot) + let schema_str = r#" + { + "name": "fixed1", + "namespace": ".ns1.a.b", + "type": "fixed", + "size": 1 + } + "#; + + match Schema::parse_str(schema_str) { + Err(Error::InvalidNamespace(_, _)) => (), + other => return Err(format!("Expected Error::InvalidNamespace, got {other:?}").into()), + }; + + // Invalid namespace #2 (invalid character in a name portion) + let schema_str = r#" + { + "name": "record1", + "namespace": "ns1.a*b.c", + "type": "record", + "fields": [] + } + "#; + + match Schema::parse_str(schema_str) { + Err(Error::InvalidNamespace(_, _)) => (), + other => return Err(format!("Expected Error::InvalidNamespace, got {other:?}").into()), + }; + + // Invalid namespace #3 (a name portion starts with a digit) + let schema_str = r#" + { + "name": "fixed1", + "namespace": "ns1.1a.b", + "type": "fixed", + "size": 1 + } + "#; + + match Schema::parse_str(schema_str) { + Err(Error::InvalidNamespace(_, _)) => (), + other => return Err(format!("Expected Error::InvalidNamespace, got {other:?}").into()), + }; + + // Invalid namespace #4 (a name portion is missing - two dots in a row) + let schema_str = r#" + { + "name": "fixed1", + "namespace": "ns1..a", + "type": "fixed", + "size": 1 + } + "#; + + match Schema::parse_str(schema_str) { + Err(Error::InvalidNamespace(_, _)) => (), + other => return Err(format!("Expected Error::InvalidNamespace, got {other:?}").into()), + }; + + // Invalid namespace #5 (a name portion is missing - ends with a dot) + let schema_str = r#" + { + "name": "fixed1", + "namespace": "ns1.a.", + "type": "fixed", + "size": 1 + } + "#; + + match Schema::parse_str(schema_str) { + Err(Error::InvalidNamespace(_, _)) => (), + other => return Err(format!("Expected Error::InvalidNamespace, got {other:?}").into()), + }; + + Ok(()) + } + + #[test] + fn test_avro_3851_validate_default_value_of_simple_record_field() -> TestResult { + let schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "int", + "default": "invalid" + } + ] + } + "#; + let expected = Error::GetDefaultRecordField( + "f1".to_string(), + "ns.record1".to_string(), + r#""int""#.to_string(), + ) + .to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + Ok(()) + } + + #[test] + fn test_avro_3851_validate_default_value_of_nested_record_field() -> TestResult { + let schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + }, + "default": "invalid" + } + ] + } + "#; + let expected = Error::GetDefaultRecordField( + "f1".to_string(), + "ns.record1".to_string(), + r#"{"name":"ns.record2","type":"record","fields":[{"name":"f1_1","type":"int"}]}"# + .to_string(), + ) + .to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + Ok(()) + } + + #[test] + fn test_avro_3851_validate_default_value_of_enum_record_field() -> TestResult { + let schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a", "b", "c"] + }, + "default": "invalid" + } + ] + } + "#; + let expected = Error::GetDefaultRecordField( + "f1".to_string(), + "ns.record1".to_string(), + r#"{"name":"ns.enum1","type":"enum","symbols":["a","b","c"]}"#.to_string(), + ) + .to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + Ok(()) + } + + #[test] + fn test_avro_3851_validate_default_value_of_fixed_record_field() -> TestResult { + let schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "fixed1", + "type": "fixed", + "size": 3 + }, + "default": 100 + } + ] + } + "#; + let expected = Error::GetDefaultRecordField( + "f1".to_string(), + "ns.record1".to_string(), + r#"{"name":"ns.fixed1","type":"fixed","size":3}"#.to_string(), + ) + .to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + Ok(()) + } + + #[test] + fn test_avro_3851_validate_default_value_of_array_record_field() -> TestResult { + let schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "array", + "items": "int", + "default": "invalid" + } + ] + } + "#; + let expected = Error::GetDefaultRecordField( + "f1".to_string(), + "ns.record1".to_string(), + r#"{"type":"array","items":"int"}"#.to_string(), + ) + .to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + Ok(()) + } + + #[test] + fn test_avro_3851_validate_default_value_of_map_record_field() -> TestResult { + let schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "map", + "values": "string", + "default": "invalid" + } + ] + } + "#; + let expected = Error::GetDefaultRecordField( + "f1".to_string(), + "ns.record1".to_string(), + r#"{"type":"map","values":"string"}"#.to_string(), + ) + .to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + Ok(()) + } + + #[test] + fn test_avro_3851_validate_default_value_of_ref_record_field() -> TestResult { + let schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + }, { + "name": "f2", + "type": "ns.record2", + "default": { "f1_1": true } + } + ] + } + "#; + let expected = Error::GetDefaultRecordField( + "f2".to_string(), + "ns.record1".to_string(), + r#""ns.record2""#.to_string(), + ) + .to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + Ok(()) + } + + #[test] + fn test_avro_3851_validate_default_value_of_enum() -> TestResult { + let schema_str = r#" + { + "name": "enum1", + "namespace": "ns", + "type": "enum", + "symbols": ["a", "b", "c"], + "default": 100 + } + "#; + let expected = Error::EnumDefaultWrongType(100.into()).to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + let schema_str = r#" + { + "name": "enum1", + "namespace": "ns", + "type": "enum", + "symbols": ["a", "b", "c"], + "default": "d" + } + "#; + let expected = Error::GetEnumDefault { + symbol: "d".to_string(), + symbols: vec!["a".to_string(), "b".to_string(), "c".to_string()], + } + .to_string(); + let result = Schema::parse_str(schema_str); + assert!(result.is_err()); + let err = result + .map_err(|e| e.to_string()) + .err() + .unwrap_or_else(|| "unexpected".to_string()); + assert_eq!(expected, err); + + Ok(()) + } + + #[test] + fn test_avro_3862_get_aliases() -> TestResult { + // Test for Record + let schema_str = r#" + { + "name": "record1", + "namespace": "ns1", + "type": "record", + "aliases": ["r1", "ns2.r2"], + "fields": [ + { "name": "f1", "type": "int" }, + { "name": "f2", "type": "string" } + ] + } + "#; + let schema = Schema::parse_str(schema_str)?; + let expected = vec![Alias::new("ns1.r1")?, Alias::new("ns2.r2")?]; + match schema.aliases() { + Some(aliases) => assert_eq!(aliases, &expected), + None => panic!("Expected Some({:?}), got None", expected), + } + + let schema_str = r#" + { + "name": "record1", + "namespace": "ns1", + "type": "record", + "fields": [ + { "name": "f1", "type": "int" }, + { "name": "f2", "type": "string" } + ] + } + "#; + let schema = Schema::parse_str(schema_str)?; + match schema.aliases() { + None => (), + some => panic!("Expected None, got {some:?}"), + } + + // Test for Enum + let schema_str = r#" + { + "name": "enum1", + "namespace": "ns1", + "type": "enum", + "aliases": ["en1", "ns2.en2"], + "symbols": ["a", "b", "c"] + } + "#; + let schema = Schema::parse_str(schema_str)?; + let expected = vec![Alias::new("ns1.en1")?, Alias::new("ns2.en2")?]; + match schema.aliases() { + Some(aliases) => assert_eq!(aliases, &expected), + None => panic!("Expected Some({:?}), got None", expected), + } + + let schema_str = r#" + { + "name": "enum1", + "namespace": "ns1", + "type": "enum", + "symbols": ["a", "b", "c"] + } + "#; + let schema = Schema::parse_str(schema_str)?; + match schema.aliases() { + None => (), + some => panic!("Expected None, got {some:?}"), + } + + // Test for Fixed + let schema_str = r#" + { + "name": "fixed1", + "namespace": "ns1", + "type": "fixed", + "aliases": ["fx1", "ns2.fx2"], + "size": 10 + } + "#; + let schema = Schema::parse_str(schema_str)?; + let expected = vec![Alias::new("ns1.fx1")?, Alias::new("ns2.fx2")?]; + match schema.aliases() { + Some(aliases) => assert_eq!(aliases, &expected), + None => panic!("Expected Some({:?}), got None", expected), + } + + let schema_str = r#" + { + "name": "fixed1", + "namespace": "ns1", + "type": "fixed", + "size": 10 + } + "#; + let schema = Schema::parse_str(schema_str)?; + match schema.aliases() { + None => (), + some => panic!("Expected None, got {some:?}"), + } + + // Test for non-named type + let schema = Schema::Int; + match schema.aliases() { + None => (), + some => panic!("Expected None, got {some:?}"), + } + + Ok(()) + } + + #[test] + fn test_avro_3862_get_doc() -> TestResult { + // Test for Record + let schema_str = r#" + { + "name": "record1", + "type": "record", + "doc": "Record Document", + "fields": [ + { "name": "f1", "type": "int" }, + { "name": "f2", "type": "string" } + ] + } + "#; + let schema = Schema::parse_str(schema_str)?; + let expected = "Record Document"; + match schema.doc() { + Some(doc) => assert_eq!(doc, expected), + None => panic!("Expected Some({:?}), got None", expected), + } + + let schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { "name": "f1", "type": "int" }, + { "name": "f2", "type": "string" } + ] + } + "#; + let schema = Schema::parse_str(schema_str)?; + match schema.doc() { + None => (), + some => panic!("Expected None, got {some:?}"), + } + + // Test for Enum + let schema_str = r#" + { + "name": "enum1", + "type": "enum", + "doc": "Enum Document", + "symbols": ["a", "b", "c"] + } + "#; + let schema = Schema::parse_str(schema_str)?; + let expected = "Enum Document"; + match schema.doc() { + Some(doc) => assert_eq!(doc, expected), + None => panic!("Expected Some({:?}), got None", expected), + } + + let schema_str = r#" + { + "name": "enum1", + "type": "enum", + "symbols": ["a", "b", "c"] + } + "#; + let schema = Schema::parse_str(schema_str)?; + match schema.doc() { + None => (), + some => panic!("Expected None, got {some:?}"), + } + + // Test for Fixed + let schema_str = r#" + { + "name": "fixed1", + "type": "fixed", + "doc": "Fixed Document", + "size": 10 + } + "#; + let schema = Schema::parse_str(schema_str)?; + let expected = "Fixed Document"; + match schema.doc() { + Some(doc) => assert_eq!(doc, expected), + None => panic!("Expected Some({:?}), got None", expected), + } + + let schema_str = r#" + { + "name": "fixed1", + "type": "fixed", + "size": 10 + } + "#; + let schema = Schema::parse_str(schema_str)?; + match schema.doc() { + None => (), + some => panic!("Expected None, got {some:?}"), + } + + // Test for non-named type + let schema = Schema::Int; + match schema.doc() { + None => (), + some => panic!("Expected None, got {some:?}"), + } + + Ok(()) } } diff --git a/lang/rust/avro/src/schema_compatibility.rs b/lang/rust/avro/src/schema_compatibility.rs index b691041e543..a15c18407ef 100644 --- a/lang/rust/avro/src/schema_compatibility.rs +++ b/lang/rust/avro/src/schema_compatibility.rs @@ -16,7 +16,7 @@ // under the License. //! Logic for checking schema compatibility -use crate::schema::{Schema, SchemaKind}; +use crate::schema::{EnumSchema, FixedSchema, RecordSchema, Schema, SchemaKind}; use std::{ collections::{hash_map::DefaultHasher, HashSet}, hash::Hasher, @@ -88,13 +88,13 @@ impl Checker { SchemaKind::Union => self.match_union_schemas(writers_schema, readers_schema), SchemaKind::Enum => { // reader's symbols must contain all writer's symbols - if let Schema::Enum { + if let Schema::Enum(EnumSchema { symbols: w_symbols, .. - } = writers_schema + }) = writers_schema { - if let Schema::Enum { + if let Schema::Enum(EnumSchema { symbols: r_symbols, .. - } = readers_schema + }) = readers_schema { return !w_symbols.iter().any(|e| !r_symbols.contains(e)); } @@ -121,15 +121,15 @@ impl Checker { return false; } - if let Schema::Record { + if let Schema::Record(RecordSchema { fields: w_fields, lookup: w_lookup, .. - } = writers_schema + }) = writers_schema { - if let Schema::Record { + if let Schema::Record(RecordSchema { fields: r_fields, .. - } = readers_schema + }) = readers_schema { for field in r_fields.iter() { if let Some(pos) = w_lookup.get(&field.name) { @@ -219,8 +219,8 @@ impl SchemaCompatibility { match r_type { SchemaKind::Record => { - if let Schema::Record { name: w_name, .. } = writers_schema { - if let Schema::Record { name: r_name, .. } = readers_schema { + if let Schema::Record(RecordSchema { name: w_name, .. }) = writers_schema { + if let Schema::Record(RecordSchema { name: r_name, .. }) = readers_schema { return w_name.fullname(None) == r_name.fullname(None); } else { unreachable!("readers_schema should have been Schema::Record") @@ -230,21 +230,21 @@ impl SchemaCompatibility { } } SchemaKind::Fixed => { - if let Schema::Fixed { + if let Schema::Fixed(FixedSchema { name: w_name, aliases: _, doc: _w_doc, size: w_size, attributes: _, - } = writers_schema + }) = writers_schema { - if let Schema::Fixed { + if let Schema::Fixed(FixedSchema { name: r_name, aliases: _, doc: _r_doc, size: r_size, attributes: _, - } = readers_schema + }) = readers_schema { return w_name.fullname(None) == r_name.fullname(None) && w_size == r_size; @@ -256,8 +256,8 @@ impl SchemaCompatibility { } } SchemaKind::Enum => { - if let Schema::Enum { name: w_name, .. } = writers_schema { - if let Schema::Enum { name: r_name, .. } = readers_schema { + if let Schema::Enum(EnumSchema { name: w_name, .. }) = writers_schema { + if let Schema::Enum(EnumSchema { name: r_name, .. }) = readers_schema { return w_name.fullname(None) == r_name.fullname(None); } else { unreachable!("readers_schema should have been Schema::Enum") @@ -293,7 +293,7 @@ impl SchemaCompatibility { } if w_type == SchemaKind::Int - && vec![SchemaKind::Long, SchemaKind::Float, SchemaKind::Double] + && [SchemaKind::Long, SchemaKind::Float, SchemaKind::Double] .iter() .any(|&t| t == r_type) { @@ -301,7 +301,7 @@ impl SchemaCompatibility { } if w_type == SchemaKind::Long - && vec![SchemaKind::Float, SchemaKind::Double] + && [SchemaKind::Float, SchemaKind::Double] .iter() .any(|&t| t == r_type) { @@ -327,6 +327,11 @@ impl SchemaCompatibility { #[cfg(test)] mod tests { use super::*; + use crate::{ + types::{Record, Value}, + Codec, Reader, Writer, + }; + use apache_avro_test_helper::TestResult; fn int_array_schema() -> Schema { Schema::parse_str(r#"{"type":"array", "items":"int"}"#).unwrap() @@ -589,15 +594,14 @@ mod tests { } #[test] - fn test_missing_field() { + fn test_missing_field() -> TestResult { let reader_schema = Schema::parse_str( r#" {"type":"record", "name":"Record", "fields":[ {"name":"oldfield1", "type":"int"} ]} "#, - ) - .unwrap(); + )?; assert!(SchemaCompatibility::can_read( &writer_schema(), &reader_schema, @@ -606,18 +610,19 @@ mod tests { &reader_schema, &writer_schema() )); + + Ok(()) } #[test] - fn test_missing_second_field() { + fn test_missing_second_field() -> TestResult { let reader_schema = Schema::parse_str( r#" {"type":"record", "name":"Record", "fields":[ {"name":"oldfield2", "type":"string"} ]} "#, - ) - .unwrap(); + )?; assert!(SchemaCompatibility::can_read( &writer_schema(), &reader_schema @@ -626,10 +631,12 @@ mod tests { &reader_schema, &writer_schema() )); + + Ok(()) } #[test] - fn test_all_fields() { + fn test_all_fields() -> TestResult { let reader_schema = Schema::parse_str( r#" {"type":"record", "name":"Record", "fields":[ @@ -637,8 +644,7 @@ mod tests { {"name":"oldfield2", "type":"string"} ]} "#, - ) - .unwrap(); + )?; assert!(SchemaCompatibility::can_read( &writer_schema(), &reader_schema @@ -647,10 +653,12 @@ mod tests { &reader_schema, &writer_schema() )); + + Ok(()) } #[test] - fn test_new_field_with_default() { + fn test_new_field_with_default() -> TestResult { let reader_schema = Schema::parse_str( r#" {"type":"record", "name":"Record", "fields":[ @@ -658,8 +666,7 @@ mod tests { {"name":"newfield1", "type":"int", "default":42} ]} "#, - ) - .unwrap(); + )?; assert!(SchemaCompatibility::can_read( &writer_schema(), &reader_schema @@ -668,10 +675,12 @@ mod tests { &reader_schema, &writer_schema() )); + + Ok(()) } #[test] - fn test_new_field() { + fn test_new_field() -> TestResult { let reader_schema = Schema::parse_str( r#" {"type":"record", "name":"Record", "fields":[ @@ -679,8 +688,7 @@ mod tests { {"name":"newfield1", "type":"int"} ]} "#, - ) - .unwrap(); + )?; assert!(!SchemaCompatibility::can_read( &writer_schema(), &reader_schema @@ -689,6 +697,8 @@ mod tests { &reader_schema, &writer_schema() )); + + Ok(()) } #[test] @@ -720,7 +730,7 @@ mod tests { } #[test] - fn test_union_reader_writer_subset_incompatiblity() { + fn test_union_reader_writer_subset_incompatibility() { // reader union schema must contain all writer union branches let union_writer = union_schema(vec![Schema::Int, Schema::String]); let union_reader = union_schema(vec![Schema::String]); @@ -730,15 +740,14 @@ mod tests { } #[test] - fn test_incompatible_record_field() { + fn test_incompatible_record_field() -> TestResult { let string_schema = Schema::parse_str( r#" {"type":"record", "name":"MyRecord", "namespace":"ns", "fields": [ {"name":"field1", "type":"string"} ]} "#, - ) - .unwrap(); + )?; let int_schema = Schema::parse_str( r#" @@ -746,25 +755,26 @@ mod tests { {"name":"field1", "type":"int"} ]} "#, - ) - .unwrap(); + )?; assert!(!SchemaCompatibility::can_read(&string_schema, &int_schema)); + + Ok(()) } #[test] - fn test_enum_symbols() { + fn test_enum_symbols() -> TestResult { let enum_schema1 = Schema::parse_str( r#" {"type":"enum", "name":"MyEnum", "symbols":["A","B"]} "#, - ) - .unwrap(); + )?; let enum_schema2 = - Schema::parse_str(r#"{"type":"enum", "name":"MyEnum", "symbols":["A","B","C"]}"#) - .unwrap(); + Schema::parse_str(r#"{"type":"enum", "name":"MyEnum", "symbols":["A","B","C"]}"#)?; assert!(!SchemaCompatibility::can_read(&enum_schema2, &enum_schema1)); assert!(SchemaCompatibility::can_read(&enum_schema1, &enum_schema2)); + + Ok(()) } fn point_2d_schema() -> Schema { @@ -900,4 +910,132 @@ mod tests { &read_schema )); } + + #[test] + fn test_avro_3772_enum_default() -> TestResult { + let writer_raw_schema = r#" + { + "type": "record", + "name": "test", + "fields": [ + {"name": "a", "type": "long", "default": 42}, + {"name": "b", "type": "string"}, + { + "name": "c", + "type": { + "type": "enum", + "name": "suit", + "symbols": ["diamonds", "spades", "clubs", "hearts"], + "default": "spades" + } + } + ] + } + "#; + + let reader_raw_schema = r#" + { + "type": "record", + "name": "test", + "fields": [ + {"name": "a", "type": "long", "default": 42}, + {"name": "b", "type": "string"}, + { + "name": "c", + "type": { + "type": "enum", + "name": "suit", + "symbols": ["diamonds", "spades", "ninja", "hearts"], + "default": "spades" + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_raw_schema)?; + let reader_schema = Schema::parse_str(reader_raw_schema)?; + let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null); + let mut record = Record::new(writer.schema()).unwrap(); + record.put("a", 27i64); + record.put("b", "foo"); + record.put("c", "clubs"); + writer.append(record).unwrap(); + let input = writer.into_inner()?; + let mut reader = Reader::with_schema(&reader_schema, &input[..])?; + assert_eq!( + reader.next().unwrap().unwrap(), + Value::Record(vec![ + ("a".to_string(), Value::Long(27)), + ("b".to_string(), Value::String("foo".to_string())), + ("c".to_string(), Value::Enum(1, "spades".to_string())), + ]) + ); + assert!(reader.next().is_none()); + + Ok(()) + } + + #[test] + fn test_avro_3772_enum_default_less_symbols() -> TestResult { + let writer_raw_schema = r#" + { + "type": "record", + "name": "test", + "fields": [ + {"name": "a", "type": "long", "default": 42}, + {"name": "b", "type": "string"}, + { + "name": "c", + "type": { + "type": "enum", + "name": "suit", + "symbols": ["diamonds", "spades", "clubs", "hearts"], + "default": "spades" + } + } + ] + } + "#; + + let reader_raw_schema = r#" + { + "type": "record", + "name": "test", + "fields": [ + {"name": "a", "type": "long", "default": 42}, + {"name": "b", "type": "string"}, + { + "name": "c", + "type": { + "type": "enum", + "name": "suit", + "symbols": ["hearts", "spades"], + "default": "spades" + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_raw_schema)?; + let reader_schema = Schema::parse_str(reader_raw_schema)?; + let mut writer = Writer::with_codec(&writer_schema, Vec::new(), Codec::Null); + let mut record = Record::new(writer.schema()).unwrap(); + record.put("a", 27i64); + record.put("b", "foo"); + record.put("c", "hearts"); + writer.append(record).unwrap(); + let input = writer.into_inner()?; + let mut reader = Reader::with_schema(&reader_schema, &input[..])?; + assert_eq!( + reader.next().unwrap().unwrap(), + Value::Record(vec![ + ("a".to_string(), Value::Long(27)), + ("b".to_string(), Value::String("foo".to_string())), + ("c".to_string(), Value::Enum(0, "hearts".to_string())), + ]) + ); + assert!(reader.next().is_none()); + + Ok(()) + } } diff --git a/lang/rust/avro/src/ser.rs b/lang/rust/avro/src/ser.rs index c85c8c4d3ee..ce779b946c2 100644 --- a/lang/rust/avro/src/ser.rs +++ b/lang/rust/avro/src/ser.rs @@ -200,10 +200,10 @@ impl<'b> ser::Serializer for &'b mut Serializer { fn serialize_unit_variant( self, _: &'static str, - index: u32, + _variant_index: u32, variant: &'static str, ) -> Result { - Ok(Value::Enum(index, variant.to_string())) + Ok(Value::String(variant.to_string())) } fn serialize_newtype_struct( @@ -283,6 +283,10 @@ impl<'b> ser::Serializer for &'b mut Serializer { ) -> Result { Ok(StructVariantSerializer::new(index, variant, len)) } + + fn is_human_readable(&self) -> bool { + crate::util::is_human_readable() + } } impl ser::SerializeSeq for SeqSerializer { @@ -485,8 +489,11 @@ pub fn to_value(value: S) -> Result { #[cfg(test)] mod tests { use super::*; + use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; use serde::{Deserialize, Serialize}; + use serial_test::serial; + use std::sync::atomic::Ordering; #[derive(Debug, Deserialize, Serialize, Clone)] struct Test { @@ -678,7 +685,7 @@ mod tests { } #[test] - fn test_to_value() { + fn test_to_value() -> TestResult { let test = Test { a: 27, b: "foo".to_owned(), @@ -688,7 +695,7 @@ mod tests { ("b".to_owned(), Value::String("foo".to_owned())), ]); - assert_eq!(to_value(test.clone()).unwrap(), expected); + assert_eq!(to_value(test.clone())?, expected); let test_inner = TestInner { a: test, b: 35 }; @@ -703,19 +710,21 @@ mod tests { ("b".to_owned(), Value::Int(35)), ]); - assert_eq!(to_value(test_inner).unwrap(), expected_inner); + assert_eq!(to_value(test_inner)?, expected_inner); + + Ok(()) } #[test] - fn test_to_value_unit_enum() { + fn test_to_value_unit_enum() -> TestResult { let test = TestUnitExternalEnum { a: UnitExternalEnum::Val1, }; - let expected = Value::Record(vec![("a".to_owned(), Value::Enum(0, "Val1".to_owned()))]); + let expected = Value::Record(vec![("a".to_owned(), Value::String("Val1".to_owned()))]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "Error serializing unit external enum" ); @@ -730,7 +739,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "Error serializing unit internal enum" ); @@ -745,7 +754,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "Error serializing unit adjacent enum" ); @@ -757,14 +766,16 @@ mod tests { let expected = Value::Record(vec![("a".to_owned(), Value::Null)]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "Error serializing unit untagged enum" ); + + Ok(()) } #[test] - fn test_to_value_single_value_enum() { + fn test_to_value_single_value_enum() -> TestResult { let test = TestSingleValueExternalEnum { a: SingleValueExternalEnum::Double(64.0), }; @@ -781,7 +792,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "Error serializing single value external enum" ); @@ -806,7 +817,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "Error serializing single value adjacent enum" ); @@ -818,14 +829,16 @@ mod tests { let expected = Value::Record(vec![("a".to_owned(), Value::Double(64.0))]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "Error serializing single value untagged enum" ); + + Ok(()) } #[test] - fn test_to_value_struct_enum() { + fn test_to_value_struct_enum() -> TestResult { let test = TestStructExternalEnum { a: StructExternalEnum::Val1 { x: 1.0, y: 2.0 }, }; @@ -847,7 +860,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "error serializing struct external enum" ); @@ -867,7 +880,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "error serializing struct internal enum" ); @@ -890,7 +903,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "error serializing struct adjacent enum" ); @@ -907,7 +920,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "error serializing struct untagged enum" ); @@ -929,14 +942,16 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "error serializing struct untagged enum variant" ); + + Ok(()) } #[test] - fn test_to_value_tuple_enum() { + fn test_to_value_tuple_enum() -> TestResult { let test = TestTupleExternalEnum { a: TupleExternalEnum::Val2(1.0, 2.0, 3.0), }; @@ -957,7 +972,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "error serializing tuple external enum" ); @@ -978,7 +993,7 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "error serializing tuple adjacent enum" ); @@ -993,9 +1008,35 @@ mod tests { )]); assert_eq!( - to_value(test).unwrap(), + to_value(test)?, expected, "error serializing tuple untagged enum" ); + + Ok(()) + } + + #[test] + #[serial(avro_3747)] + fn avro_3747_human_readable_false() { + use serde::ser::Serializer as SerdeSerializer; + + crate::util::SERDE_HUMAN_READABLE.store(false, Ordering::Release); + + let ser = &mut Serializer {}; + + assert_eq!(ser.is_human_readable(), false); + } + + #[test] + #[serial(avro_3747)] + fn avro_3747_human_readable_true() { + use serde::ser::Serializer as SerdeSerializer; + + crate::util::SERDE_HUMAN_READABLE.store(true, Ordering::Release); + + let ser = &mut Serializer {}; + + assert!(ser.is_human_readable()); } } diff --git a/lang/rust/avro/src/types.rs b/lang/rust/avro/src/types.rs index ee322b331a6..9bb60770562 100644 --- a/lang/rust/avro/src/types.rs +++ b/lang/rust/avro/src/types.rs @@ -20,15 +20,17 @@ use crate::{ decimal::Decimal, duration::Duration, schema::{ - Name, NamesRef, Namespace, Precision, RecordField, ResolvedSchema, Scale, Schema, - SchemaKind, UnionSchema, + DecimalSchema, EnumSchema, FixedSchema, Name, Namespace, Precision, RecordField, + RecordSchema, ResolvedSchema, Scale, Schema, SchemaKind, UnionSchema, }, AvroResult, Error, }; use serde_json::{Number, Value as JsonValue}; use std::{ + borrow::Borrow, collections::{BTreeMap, HashMap}, convert::TryFrom, + fmt::Debug, hash::BuildHasher, str::FromStr, }; @@ -106,6 +108,10 @@ pub enum Value { TimestampMillis(i64), /// Timestamp in microseconds. TimestampMicros(i64), + /// Local timestamp in milliseconds. + LocalTimestampMillis(i64), + /// Local timestamp in microseconds. + LocalTimestampMicros(i64), /// Avro Duration. An amount of time defined by months, days and milliseconds. Duration(Duration), /// Universally unique identifier. @@ -221,11 +227,11 @@ impl<'a> Record<'a> { /// If the `Schema` is not a `Schema::Record` variant, `None` will be returned. pub fn new(schema: &Schema) -> Option { match *schema { - Schema::Record { + Schema::Record(RecordSchema { fields: ref schema_fields, lookup: ref schema_lookup, .. - } => { + }) => { let mut fields = Vec::with_capacity(schema_fields.len()); for schema_field in schema_fields.iter() { fields.push((schema_field.name.clone(), Value::Null)); @@ -282,7 +288,7 @@ impl From for Value { } /// Convert Avro values to Json values -impl std::convert::TryFrom for JsonValue { +impl TryFrom for JsonValue { type Error = crate::error::Error; fn try_from(value: Value) -> AvroResult { match value { @@ -325,6 +331,8 @@ impl std::convert::TryFrom for JsonValue { Value::TimeMicros(t) => Ok(Self::Number(t.into())), Value::TimestampMillis(t) => Ok(Self::Number(t.into())), Value::TimestampMicros(t) => Ok(Self::Number(t.into())), + Value::LocalTimestampMillis(t) => Ok(Self::Number(t.into())), + Value::LocalTimestampMicros(t) => Ok(Self::Number(t.into())), Value::Duration(d) => Ok(Self::Array( <[u8; 12]>::from(d).iter().map(|&v| v.into()).collect(), )), @@ -345,15 +353,21 @@ impl Value { pub fn validate_schemata(&self, schemata: Vec<&Schema>) -> bool { let rs = ResolvedSchema::try_from(schemata.clone()) .expect("Schemata didn't successfully resolve"); + let schemata_len = schemata.len(); schemata.iter().any(|schema| { let enclosing_namespace = schema.namespace(); match self.validate_internal(schema, rs.get_names(), &enclosing_namespace) { - Some(error_msg) => { - error!( + Some(reason) => { + let log_message = format!( "Invalid value: {:?} for schema: {:?}. Reason: {}", - self, schema, error_msg + self, schema, reason ); + if schemata_len == 1 { + error!("{}", log_message); + } else { + debug!("{}", log_message); + }; false } None => true, @@ -370,7 +384,8 @@ impl Value { } } - pub(crate) fn validate_internal>( + /// Validates the value against the provided schema. + pub(crate) fn validate_internal + Debug>( &self, schema: &Schema, names: &HashMap, @@ -400,8 +415,12 @@ impl Value { (&Value::Long(_), &Schema::TimeMicros) => None, (&Value::Long(_), &Schema::TimestampMillis) => None, (&Value::Long(_), &Schema::TimestampMicros) => None, + (&Value::Long(_), &Schema::LocalTimestampMillis) => None, + (&Value::Long(_), &Schema::LocalTimestampMicros) => None, (&Value::TimestampMicros(_), &Schema::TimestampMicros) => None, (&Value::TimestampMillis(_), &Schema::TimestampMillis) => None, + (&Value::LocalTimestampMicros(_), &Schema::LocalTimestampMicros) => None, + (&Value::LocalTimestampMillis(_), &Schema::LocalTimestampMillis) => None, (&Value::TimeMicros(_), &Schema::TimeMicros) => None, (&Value::TimeMillis(_), &Schema::TimeMillis) => None, (&Value::Date(_), &Schema::Date) => None, @@ -415,7 +434,7 @@ impl Value { (&Value::Bytes(_), &Schema::Decimal { .. }) => None, (&Value::String(_), &Schema::String) => None, (&Value::String(_), &Schema::Uuid) => None, - (&Value::Fixed(n, _), &Schema::Fixed { size, .. }) => { + (&Value::Fixed(n, _), &Schema::Fixed(FixedSchema { size, .. })) => { if n != size { Some(format!( "The value's size ({n}) is different than the schema's size ({size})" @@ -424,7 +443,7 @@ impl Value { None } } - (Value::Bytes(b), &Schema::Fixed { size, .. }) => { + (Value::Bytes(b), &Schema::Fixed(FixedSchema { size, .. })) => { if b.len() != size { Some(format!( "The bytes' length ({}) is different than the schema's size ({})", @@ -446,14 +465,19 @@ impl Value { } // TODO: check precision against n (&Value::Fixed(_n, _), &Schema::Decimal { .. }) => None, - (Value::String(s), Schema::Enum { symbols, .. }) => { + (Value::String(s), Schema::Enum(EnumSchema { symbols, .. })) => { if !symbols.contains(s) { Some(format!("'{s}' is not a member of the possible symbols")) } else { None } } - (&Value::Enum(i, ref s), Schema::Enum { symbols, .. }) => symbols + ( + &Value::Enum(i, ref s), + Schema::Enum(EnumSchema { + symbols, default, .. + }), + ) => symbols .get(i as usize) .map(|ref symbol| { if symbol != &s { @@ -462,17 +486,22 @@ impl Value { None } }) - .unwrap_or_else(|| Some(format!("No symbol at position '{i}'"))), + .unwrap_or_else(|| match default { + Some(_) => None, + None => Some(format!("No symbol at position '{i}'")), + }), // (&Value::Union(None), &Schema::Union(_)) => None, (&Value::Union(i, ref value), Schema::Union(inner)) => inner .variants() .get(i as usize) .map(|schema| value.validate_internal(schema, names, enclosing_namespace)) .unwrap_or_else(|| Some(format!("No schema in the union at position '{i}'"))), - (v, Schema::Union(inner)) => match inner.find_schema(v) { - Some(_) => None, - None => Some("Could not find matching type in union".to_string()), - }, + (v, Schema::Union(inner)) => { + match inner.find_schema_with_known_schemata(v, Some(names), enclosing_namespace) { + Some(_) => None, + None => Some("Could not find matching type in union".to_string()), + } + } (Value::Array(items), Schema::Array(inner)) => items.iter().fold(None, |acc, item| { Value::accumulate( acc, @@ -487,10 +516,19 @@ impl Value { ) }) } - (Value::Record(record_fields), Schema::Record { fields, lookup, .. }) => { + ( + Value::Record(record_fields), + Schema::Record(RecordSchema { + fields, + lookup, + name, + .. + }), + ) => { let non_nullable_fields_count = fields.iter().filter(|&rf| !rf.is_nullable()).count(); + // If the record contains fewer fields as required fields by the schema, it is invalid. if record_fields.len() < non_nullable_fields_count { return Some(format!( "The value's records length ({}) doesn't match the schema ({} non-nullable fields)", @@ -508,6 +546,11 @@ impl Value { record_fields .iter() .fold(None, |acc, (field_name, record_field)| { + let record_namespace = if name.namespace.is_none() { + enclosing_namespace + } else { + &name.namespace + }; match lookup.get(field_name) { Some(idx) => { let field = &fields[*idx]; @@ -516,7 +559,7 @@ impl Value { record_field.validate_internal( &field.schema, names, - enclosing_namespace, + record_namespace, ), ) } @@ -527,7 +570,7 @@ impl Value { } }) } - (Value::Map(items), Schema::Record { fields, .. }) => { + (Value::Map(items), Schema::Record(RecordSchema { fields, .. })) => { fields.iter().fold(None, |acc, field| { if let Some(item) = items.get(&field.name) { let res = item.validate_internal(&field.schema, names, enclosing_namespace); @@ -558,14 +601,27 @@ impl Value { pub fn resolve(self, schema: &Schema) -> AvroResult { let enclosing_namespace = schema.namespace(); let rs = ResolvedSchema::try_from(schema)?; - self.resolve_internal(schema, rs.get_names(), &enclosing_namespace) + self.resolve_internal(schema, rs.get_names(), &enclosing_namespace, &None) + } + + /// Attempt to perform schema resolution on the value, with the given + /// [Schema](../schema/enum.Schema.html) and set of schemas to use for Refs resolution. + /// + /// See [Schema Resolution](https://avro.apache.org/docs/current/spec.html#Schema+Resolution) + /// in the Avro specification for the full set of rules of schema + /// resolution. + pub fn resolve_schemata(self, schema: &Schema, schemata: Vec<&Schema>) -> AvroResult { + let enclosing_namespace = schema.namespace(); + let rs = ResolvedSchema::try_from(schemata)?; + self.resolve_internal(schema, rs.get_names(), &enclosing_namespace, &None) } - fn resolve_internal( + pub(crate) fn resolve_internal + Debug>( mut self, schema: &Schema, - names: &NamesRef, + names: &HashMap, enclosing_namespace: &Namespace, + field_default: &Option, ) -> AvroResult { // Check if this schema is a union, and if the reader schema is not. if SchemaKind::from(&self) == SchemaKind::Union @@ -578,13 +634,14 @@ impl Value { }; self = v; } + match *schema { Schema::Ref { ref name } => { let name = name.fully_qualified_name(enclosing_namespace); if let Some(resolved) = names.get(&name) { debug!("Resolved {:?}", name); - self.resolve_internal(resolved, names, &name.namespace) + self.resolve_internal(resolved.borrow(), names, &name.namespace, field_default) } else { error!("Failed to resolve schema {:?}", name); Err(Error::SchemaResolutionError(name.clone())) @@ -598,24 +655,32 @@ impl Value { Schema::Double => self.resolve_double(), Schema::Bytes => self.resolve_bytes(), Schema::String => self.resolve_string(), - Schema::Fixed { size, .. } => self.resolve_fixed(size), - Schema::Union(ref inner) => self.resolve_union(inner, names, enclosing_namespace), - Schema::Enum { ref symbols, .. } => self.resolve_enum(symbols), + Schema::Fixed(FixedSchema { size, .. }) => self.resolve_fixed(size), + Schema::Union(ref inner) => { + self.resolve_union(inner, names, enclosing_namespace, field_default) + } + Schema::Enum(EnumSchema { + ref symbols, + ref default, + .. + }) => self.resolve_enum(symbols, default, field_default), Schema::Array(ref inner) => self.resolve_array(inner, names, enclosing_namespace), Schema::Map(ref inner) => self.resolve_map(inner, names, enclosing_namespace), - Schema::Record { ref fields, .. } => { + Schema::Record(RecordSchema { ref fields, .. }) => { self.resolve_record(fields, names, enclosing_namespace) } - Schema::Decimal { + Schema::Decimal(DecimalSchema { scale, precision, ref inner, - } => self.resolve_decimal(precision, scale, inner), + }) => self.resolve_decimal(precision, scale, inner), Schema::Date => self.resolve_date(), Schema::TimeMillis => self.resolve_time_millis(), Schema::TimeMicros => self.resolve_time_micros(), Schema::TimestampMillis => self.resolve_timestamp_millis(), Schema::TimestampMicros => self.resolve_timestamp_micros(), + Schema::LocalTimestampMillis => self.resolve_local_timestamp_millis(), + Schema::LocalTimestampMicros => self.resolve_local_timestamp_micros(), Schema::Duration => self.resolve_duration(), Schema::Uuid => self.resolve_uuid(), } @@ -657,7 +722,7 @@ impl Value { return Err(Error::GetScaleAndPrecision { scale, precision }); } match inner { - &Schema::Fixed { size, .. } => { + &Schema::Fixed(FixedSchema { size, .. }) => { if max_prec_for_len(size)? < precision { return Err(Error::GetScaleWithFixedSize { size, precision }); } @@ -668,7 +733,7 @@ impl Value { match self { Value::Decimal(num) => { let num_bytes = num.len(); - if max_prec_for_len(num_bytes)? > precision { + if max_prec_for_len(num_bytes)? < precision { Err(Error::ComparePrecisionAndSize { precision, num_bytes, @@ -679,7 +744,7 @@ impl Value { // check num.bits() here } Value::Fixed(_, bytes) | Value::Bytes(bytes) => { - if max_prec_for_len(bytes.len())? > precision { + if max_prec_for_len(bytes.len())? < precision { Err(Error::ComparePrecisionAndSize { precision, num_bytes: bytes.len(), @@ -731,6 +796,26 @@ impl Value { } } + fn resolve_local_timestamp_millis(self) -> Result { + match self { + Value::LocalTimestampMillis(ts) | Value::Long(ts) => { + Ok(Value::LocalTimestampMillis(ts)) + } + Value::Int(ts) => Ok(Value::LocalTimestampMillis(i64::from(ts))), + other => Err(Error::GetLocalTimestampMillis(other.into())), + } + } + + fn resolve_local_timestamp_micros(self) -> Result { + match self { + Value::LocalTimestampMicros(ts) | Value::Long(ts) => { + Ok(Value::LocalTimestampMicros(ts)) + } + Value::Int(ts) => Ok(Value::LocalTimestampMicros(i64::from(ts))), + other => Err(Error::GetLocalTimestampMicros(other.into())), + } + } + fn resolve_null(self) -> Result { match self { Value::Null => Ok(Value::Null), @@ -819,41 +904,48 @@ impl Value { } } - fn resolve_enum(self, symbols: &[String]) -> Result { + pub(crate) fn resolve_enum( + self, + symbols: &[String], + enum_default: &Option, + _field_default: &Option, + ) -> Result { let validate_symbol = |symbol: String, symbols: &[String]| { if let Some(index) = symbols.iter().position(|item| item == &symbol) { Ok(Value::Enum(index as u32, symbol)) } else { - Err(Error::GetEnumDefault { - symbol, - symbols: symbols.into(), - }) + match enum_default { + Some(default) => { + if let Some(index) = symbols.iter().position(|item| item == default) { + Ok(Value::Enum(index as u32, default.clone())) + } else { + Err(Error::GetEnumDefault { + symbol, + symbols: symbols.into(), + }) + } + } + _ => Err(Error::GetEnumDefault { + symbol, + symbols: symbols.into(), + }), + } } }; match self { - Value::Enum(raw_index, s) => { - let index = usize::try_from(raw_index) - .map_err(|e| Error::ConvertU32ToUsize(e, raw_index))?; - if (0..=symbols.len()).contains(&index) { - validate_symbol(s, symbols) - } else { - Err(Error::GetEnumValue { - index, - nsymbols: symbols.len(), - }) - } - } + Value::Enum(_raw_index, s) => validate_symbol(s, symbols), Value::String(s) => validate_symbol(s, symbols), other => Err(Error::GetEnum(other.into())), } } - fn resolve_union( + fn resolve_union + Debug>( self, schema: &UnionSchema, - names: &NamesRef, + names: &HashMap, enclosing_namespace: &Namespace, + field_default: &Option, ) -> Result { let v = match self { // Both are unions case. @@ -861,47 +953,27 @@ impl Value { // Reader is a union, but writer is not. v => v, }; - - // A union might contain references to another schema in the form of a Schema::Ref, - // resolve these prior to finding the schema. - let resolved_schemas: Vec = schema - .schemas - .iter() - .cloned() - .map(|schema| match schema { - Schema::Ref { name } => { - let name = name.fully_qualified_name(enclosing_namespace); - names - .get(&name) - .map(|s| (**s).clone()) - .ok_or_else(|| Error::SchemaResolutionError(name.clone())) - } - schema => Ok(schema), - }) - .collect::, Error>>()?; - - let resolved_union_schema = UnionSchema::new(resolved_schemas).unwrap(); - let (i, inner) = resolved_union_schema - .find_schema(&v) + let (i, inner) = schema + .find_schema_with_known_schemata(&v, Some(names), enclosing_namespace) .ok_or(Error::FindUnionVariant)?; Ok(Value::Union( i as u32, - Box::new(v.resolve_internal(inner, names, enclosing_namespace)?), + Box::new(v.resolve_internal(inner, names, enclosing_namespace, field_default)?), )) } - fn resolve_array( + fn resolve_array + Debug>( self, schema: &Schema, - names: &NamesRef, + names: &HashMap, enclosing_namespace: &Namespace, ) -> Result { match self { Value::Array(items) => Ok(Value::Array( items .into_iter() - .map(|item| item.resolve_internal(schema, names, enclosing_namespace)) + .map(|item| item.resolve_internal(schema, names, enclosing_namespace, &None)) .collect::>()?, )), other => Err(Error::GetArray { @@ -911,10 +983,10 @@ impl Value { } } - fn resolve_map( + fn resolve_map + Debug>( self, schema: &Schema, - names: &NamesRef, + names: &HashMap, enclosing_namespace: &Namespace, ) -> Result { match self { @@ -923,7 +995,7 @@ impl Value { .into_iter() .map(|(key, value)| { value - .resolve_internal(schema, names, enclosing_namespace) + .resolve_internal(schema, names, enclosing_namespace, &None) .map(|value| (key, value)) }) .collect::>()?, @@ -935,10 +1007,10 @@ impl Value { } } - fn resolve_record( + fn resolve_record + Debug>( self, fields: &[RecordField], - names: &NamesRef, + names: &HashMap, enclosing_namespace: &Namespace, ) -> Result { let mut items = match self { @@ -960,9 +1032,15 @@ impl Value { Some(value) => value, None => match field.default { Some(ref value) => match field.schema { - Schema::Enum { ref symbols, .. } => { - Value::from(value.clone()).resolve_enum(symbols)? - } + Schema::Enum(EnumSchema { + ref symbols, + ref default, + .. + }) => Value::from(value.clone()).resolve_enum( + symbols, + default, + &field.default.clone(), + )?, Schema::Union(ref union_schema) => { let first = &union_schema.variants()[0]; // NOTE: this match exists only to optimize null defaults for large @@ -975,6 +1053,7 @@ impl Value { first, names, enclosing_namespace, + &field.default, )?), ), } @@ -987,7 +1066,7 @@ impl Value { }, }; value - .resolve_internal(&field.schema, names, enclosing_namespace) + .resolve_internal(&field.schema, names, enclosing_namespace, &field.default) .map(|value| (field.name.clone(), value)) }) .collect::, _>>()?; @@ -1016,12 +1095,66 @@ mod tests { schema::{Name, RecordField, RecordFieldOrder, Schema, UnionSchema}, types::Value, }; - use apache_avro_test_helper::logger::{assert_logged, assert_not_logged}; + use apache_avro_test_helper::{ + logger::{assert_logged, assert_not_logged}, + TestResult, + }; + use num_bigint::BigInt; use pretty_assertions::assert_eq; use uuid::Uuid; #[test] - fn validate() { + fn avro_3809_validate_nested_records_with_implicit_namespace() -> TestResult { + let schema = Schema::parse_str( + r#"{ + "name": "record_name", + "namespace": "space", + "type": "record", + "fields": [ + { + "name": "outer_field_1", + "type": { + "type": "record", + "name": "middle_record_name", + "namespace": "middle_namespace", + "fields": [ + { + "name": "middle_field_1", + "type": { + "type": "record", + "name": "inner_record_name", + "fields": [ + { "name": "inner_field_1", "type": "double" } + ] + } + }, + { "name": "middle_field_2", "type": "inner_record_name" } + ] + } + } + ] + }"#, + )?; + let value = Value::Record(vec![( + "outer_field_1".into(), + Value::Record(vec![ + ( + "middle_field_1".into(), + Value::Record(vec![("inner_field_1".into(), Value::Double(1.2f64))]), + ), + ( + "middle_field_2".into(), + Value::Record(vec![("inner_field_1".into(), Value::Double(1.6f64))]), + ), + ]), + )]); + + assert!(value.validate(&schema)); + Ok(()) + } + + #[test] + fn validate() -> TestResult { let value_schema_valid = vec![ (Value::Int(42), Schema::Int, true, ""), (Value::Int(43), Schema::Long, true, ""), @@ -1035,19 +1168,19 @@ mod tests { ), ( Value::Union(0, Box::new(Value::Null)), - Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int]).unwrap()), + Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int])?), true, "", ), ( Value::Union(1, Box::new(Value::Int(42))), - Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int]).unwrap()), + Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int])?), true, "", ), ( Value::Union(0, Box::new(Value::Null)), - Schema::Union(UnionSchema::new(vec![Schema::Double, Schema::Int]).unwrap()), + Schema::Union(UnionSchema::new(vec![Schema::Double, Schema::Int])?), false, "Invalid value: Union(0, Null) for schema: Union(UnionSchema { schemas: [Double, Int], variant_index: {Int: 1, Double: 0} }). Reason: Unsupported value-schema combination", ), @@ -1060,7 +1193,7 @@ mod tests { Schema::String, Schema::Int, ]) - .unwrap(), + ?, ), true, "", @@ -1068,14 +1201,14 @@ mod tests { ( Value::Union(1, Box::new(Value::Long(42i64))), Schema::Union( - UnionSchema::new(vec![Schema::Null, Schema::TimestampMillis]).unwrap(), + UnionSchema::new(vec![Schema::Null, Schema::TimestampMillis])?, ), true, "", ), ( Value::Union(2, Box::new(Value::Long(1_i64))), - Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int]).unwrap()), + Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int])?), false, "Invalid value: Union(2, Long(1)) for schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }). Reason: No schema in the union at position '2'", ), @@ -1106,7 +1239,7 @@ mod tests { ), ( Value::Record(vec![("unknown_field_name".to_string(), Value::Null)]), - Schema::Record { + Schema::Record(RecordSchema { name: Name::new("record_name").unwrap(), aliases: None, doc: None, @@ -1122,13 +1255,13 @@ mod tests { }], lookup: Default::default(), attributes: Default::default(), - }, + }), false, - r#"Invalid value: Record([("unknown_field_name", Null)]) for schema: Record { name: Name { name: "record_name", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "field_name", doc: None, aliases: None, default: None, schema: Int, order: Ignore, position: 0, custom_attributes: {} }], lookup: {}, attributes: {} }. Reason: There is no schema field for field 'unknown_field_name'"#, + r#"Invalid value: Record([("unknown_field_name", Null)]) for schema: Record(RecordSchema { name: Name { name: "record_name", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "field_name", doc: None, aliases: None, default: None, schema: Int, order: Ignore, position: 0, custom_attributes: {} }], lookup: {}, attributes: {} }). Reason: There is no schema field for field 'unknown_field_name'"#, ), ( Value::Record(vec![("field_name".to_string(), Value::Null)]), - Schema::Record { + Schema::Record(RecordSchema { name: Name::new("record_name").unwrap(), aliases: None, doc: None, @@ -1146,9 +1279,9 @@ mod tests { }], lookup: [("field_name".to_string(), 0)].iter().cloned().collect(), attributes: Default::default(), - }, + }), false, - r#"Invalid value: Record([("field_name", Null)]) for schema: Record { name: Name { name: "record_name", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "field_name", doc: None, aliases: None, default: None, schema: Ref { name: Name { name: "missing", namespace: None } }, order: Ignore, position: 0, custom_attributes: {} }], lookup: {"field_name": 0}, attributes: {} }. Reason: Unresolved schema reference: 'Name { name: "missing", namespace: None }'. Parsed names: []"#, + r#"Invalid value: Record([("field_name", Null)]) for schema: Record(RecordSchema { name: Name { name: "record_name", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "field_name", doc: None, aliases: None, default: None, schema: Ref { name: Name { name: "missing", namespace: None } }, order: Ignore, position: 0, custom_attributes: {} }], lookup: {"field_name": 0}, attributes: {} }). Reason: Unresolved schema reference: 'Name { name: "missing", namespace: None }'. Parsed names: []"#, ), ]; @@ -1166,17 +1299,19 @@ mod tests { assert_eq!(expected_err_message, full_err_message); } } + + Ok(()) } #[test] - fn validate_fixed() { - let schema = Schema::Fixed { + fn validate_fixed() -> TestResult { + let schema = Schema::Fixed(FixedSchema { size: 4, name: Name::new("some_fixed").unwrap(), aliases: None, doc: None, attributes: Default::default(), - }; + }); assert!(Value::Fixed(4, vec![0, 0, 0, 0]).validate(&schema)); let value = Value::Fixed(5, vec![0, 0, 0, 0, 0]); @@ -1199,11 +1334,13 @@ mod tests { ) .as_str(), ); + + Ok(()) } #[test] - fn validate_enum() { - let schema = Schema::Enum { + fn validate_enum() -> TestResult { + let schema = Schema::Enum(EnumSchema { name: Name::new("some_enum").unwrap(), aliases: None, doc: None, @@ -1213,8 +1350,9 @@ mod tests { "diamonds".to_string(), "clubs".to_string(), ], + default: None, attributes: Default::default(), - }; + }); assert!(Value::Enum(0, "spades".to_string()).validate(&schema)); assert!(Value::String("spades".to_string()).validate(&schema)); @@ -1249,7 +1387,7 @@ mod tests { .as_str(), ); - let other_schema = Schema::Enum { + let other_schema = Schema::Enum(EnumSchema { name: Name::new("some_other_enum").unwrap(), aliases: None, doc: None, @@ -1259,8 +1397,9 @@ mod tests { "clubs".to_string(), "spades".to_string(), ], + default: None, attributes: Default::default(), - }; + }); let value = Value::Enum(0, "spades".to_string()); assert!(!value.validate(&other_schema)); @@ -1271,10 +1410,12 @@ mod tests { ) .as_str(), ); + + Ok(()) } #[test] - fn validate_record() { + fn validate_record() -> TestResult { // { // "type": "record", // "fields": [ @@ -1287,7 +1428,7 @@ mod tests { // } // ] // } - let schema = Schema::Record { + let schema = Schema::Record(RecordSchema { name: Name::new("some_record").unwrap(), aliases: None, doc: None, @@ -1317,9 +1458,7 @@ mod tests { doc: None, default: Some(JsonValue::Null), aliases: None, - schema: Schema::Union( - UnionSchema::new(vec![Schema::Null, Schema::Int]).unwrap(), - ), + schema: Schema::Union(UnionSchema::new(vec![Schema::Null, Schema::Int])?), order: RecordFieldOrder::Ascending, position: 2, custom_attributes: Default::default(), @@ -1334,7 +1473,7 @@ mod tests { .cloned() .collect(), attributes: Default::default(), - }; + }); assert!(Value::Record(vec![ ("a".to_string(), Value::Long(42i64)), @@ -1354,7 +1493,7 @@ mod tests { ]); assert!(!value.validate(&schema)); assert_logged( - r#"Invalid value: Record([("a", Boolean(false)), ("b", String("foo"))]) for schema: Record { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }. Reason: Unsupported value-schema combination"#, + r#"Invalid value: Record([("a", Boolean(false)), ("b", String("foo"))]) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }). Reason: Unsupported value-schema combination"#, ); let value = Value::Record(vec![ @@ -1363,7 +1502,7 @@ mod tests { ]); assert!(!value.validate(&schema)); assert_logged( - r#"Invalid value: Record([("a", Long(42)), ("c", String("foo"))]) for schema: Record { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }. Reason: Could not find matching type in union"#, + r#"Invalid value: Record([("a", Long(42)), ("c", String("foo"))]) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }). Reason: Could not find matching type in union"#, ); assert_not_logged( r#"Invalid value: String("foo") for schema: Int. Reason: Unsupported value-schema combination"#, @@ -1375,7 +1514,7 @@ mod tests { ]); assert!(!value.validate(&schema)); assert_logged( - r#"Invalid value: Record([("a", Long(42)), ("d", String("foo"))]) for schema: Record { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }. Reason: There is no schema field for field 'd'"#, + r#"Invalid value: Record([("a", Long(42)), ("d", String("foo"))]) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }). Reason: There is no schema field for field 'd'"#, ); let value = Value::Record(vec![ @@ -1386,7 +1525,7 @@ mod tests { ]); assert!(!value.validate(&schema)); assert_logged( - r#"Invalid value: Record([("a", Long(42)), ("b", String("foo")), ("c", Null), ("d", Null)]) for schema: Record { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }. Reason: The value's records length (4) is greater than the schema's (3 fields)"#, + r#"Invalid value: Record([("a", Long(42)), ("b", String("foo")), ("c", Null), ("d", Null)]) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }). Reason: The value's records length (4) is greater than the schema's (3 fields)"#, ); assert!(Value::Map( @@ -1406,11 +1545,11 @@ mod tests { ) .validate(&schema)); assert_logged( - r#"Invalid value: Map({"d": Long(123)}) for schema: Record { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }. Reason: Field with name '"a"' is not a member of the map items + r#"Invalid value: Map({"d": Long(123)}) for schema: Record(RecordSchema { name: Name { name: "some_record", namespace: None }, aliases: None, doc: None, fields: [RecordField { name: "a", doc: None, aliases: None, default: None, schema: Long, order: Ascending, position: 0, custom_attributes: {} }, RecordField { name: "b", doc: None, aliases: None, default: None, schema: String, order: Ascending, position: 1, custom_attributes: {} }, RecordField { name: "c", doc: None, aliases: None, default: Some(Null), schema: Union(UnionSchema { schemas: [Null, Int], variant_index: {Null: 0, Int: 1} }), order: Ascending, position: 2, custom_attributes: {} }], lookup: {"a": 0, "b": 1, "c": 2}, attributes: {} }). Reason: Field with name '"a"' is not a member of the map items Field with name '"b"' is not a member of the map items"#, ); - let union_schema = Schema::Union(UnionSchema::new(vec![Schema::Null, schema]).unwrap()); + let union_schema = Schema::Union(UnionSchema::new(vec![Schema::Null, schema])?); assert!(Value::Union( 1, @@ -1433,33 +1572,41 @@ Field with name '"b"' is not a member of the map items"#, )) ) .validate(&union_schema)); + + Ok(()) } #[test] - fn resolve_bytes_ok() { + fn resolve_bytes_ok() -> TestResult { let value = Value::Array(vec![Value::Int(0), Value::Int(42)]); assert_eq!( - value.resolve(&Schema::Bytes).unwrap(), + value.resolve(&Schema::Bytes)?, Value::Bytes(vec![0u8, 42u8]) ); + + Ok(()) } #[test] - fn resolve_string_from_bytes() { + fn resolve_string_from_bytes() -> TestResult { let value = Value::Bytes(vec![97, 98, 99]); assert_eq!( - value.resolve(&Schema::String).unwrap(), + value.resolve(&Schema::String)?, Value::String("abc".to_string()) ); + + Ok(()) } #[test] - fn resolve_string_from_fixed() { + fn resolve_string_from_fixed() -> TestResult { let value = Value::Fixed(3, vec![97, 98, 99]); assert_eq!( - value.resolve(&Schema::String).unwrap(), + value.resolve(&Schema::String)?, Value::String("abc".to_string()) ); + + Ok(()) } #[test] @@ -1469,28 +1616,27 @@ Field with name '"b"' is not a member of the map items"#, } #[test] - fn resolve_decimal_bytes() { - let value = Value::Decimal(Decimal::from(vec![1, 2])); - value - .clone() - .resolve(&Schema::Decimal { - precision: 10, - scale: 4, - inner: Box::new(Schema::Bytes), - }) - .unwrap(); + fn resolve_decimal_bytes() -> TestResult { + let value = Value::Decimal(Decimal::from(vec![1, 2, 3, 4, 5])); + value.clone().resolve(&Schema::Decimal(DecimalSchema { + precision: 10, + scale: 4, + inner: Box::new(Schema::Bytes), + }))?; assert!(value.resolve(&Schema::String).is_err()); + + Ok(()) } #[test] fn resolve_decimal_invalid_scale() { - let value = Value::Decimal(Decimal::from(vec![1])); + let value = Value::Decimal(Decimal::from(vec![1, 2])); assert!(value - .resolve(&Schema::Decimal { + .resolve(&Schema::Decimal(DecimalSchema { precision: 2, scale: 3, inner: Box::new(Schema::Bytes), - }) + })) .is_err()); } @@ -1498,30 +1644,30 @@ Field with name '"b"' is not a member of the map items"#, fn resolve_decimal_invalid_precision_for_length() { let value = Value::Decimal(Decimal::from((1u8..=8u8).rev().collect::>())); assert!(value - .resolve(&Schema::Decimal { + .resolve(&Schema::Decimal(DecimalSchema { precision: 1, scale: 0, inner: Box::new(Schema::Bytes), - }) - .is_err()); + })) + .is_ok()); } #[test] fn resolve_decimal_fixed() { - let value = Value::Decimal(Decimal::from(vec![1, 2])); + let value = Value::Decimal(Decimal::from(vec![1, 2, 3, 4, 5])); assert!(value .clone() - .resolve(&Schema::Decimal { + .resolve(&Schema::Decimal(DecimalSchema { precision: 10, scale: 1, - inner: Box::new(Schema::Fixed { + inner: Box::new(Schema::Fixed(FixedSchema { name: Name::new("decimal").unwrap(), aliases: None, size: 20, doc: None, attributes: Default::default(), - }) - }) + })) + })) .is_ok()); assert!(value.resolve(&Schema::String).is_err()); } @@ -1567,6 +1713,26 @@ Field with name '"b"' is not a member of the map items"#, assert!(value.resolve(&Schema::TimestampMicros).is_err()); } + #[test] + fn test_avro_3853_resolve_timestamp_millis() { + let value = Value::LocalTimestampMillis(10); + assert!(value.clone().resolve(&Schema::LocalTimestampMillis).is_ok()); + assert!(value.resolve(&Schema::Float).is_err()); + + let value = Value::Float(10.0f32); + assert!(value.resolve(&Schema::LocalTimestampMillis).is_err()); + } + + #[test] + fn test_avro_3853_resolve_timestamp_micros() { + let value = Value::LocalTimestampMicros(10); + assert!(value.clone().resolve(&Schema::LocalTimestampMicros).is_ok()); + assert!(value.resolve(&Schema::Int).is_err()); + + let value = Value::Double(10.0); + assert!(value.resolve(&Schema::LocalTimestampMicros).is_err()); + } + #[test] fn resolve_duration() { let value = Value::Duration(Duration::new( @@ -1580,10 +1746,12 @@ Field with name '"b"' is not a member of the map items"#, } #[test] - fn resolve_uuid() { - let value = Value::Uuid(Uuid::parse_str("1481531d-ccc9-46d9-a56f-5b67459c0537").unwrap()); + fn resolve_uuid() -> TestResult { + let value = Value::Uuid(Uuid::parse_str("1481531d-ccc9-46d9-a56f-5b67459c0537")?); assert!(value.clone().resolve(&Schema::Uuid).is_ok()); assert!(value.resolve(&Schema::TimestampMicros).is_err()); + + Ok(()) } #[test] @@ -1593,7 +1761,7 @@ Field with name '"b"' is not a member of the map items"#, } #[test] - fn test_avro_3621_resolve_to_nullable_union() { + fn test_avro_3621_resolve_to_nullable_union() -> TestResult { let schema = Schema::parse_str( r#"{ "type": "record", @@ -1626,8 +1794,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let value = Value::Record(vec![( "event".to_string(), @@ -1640,33 +1807,35 @@ Field with name '"b"' is not a member of the map items"#, Value::Record(vec![("size".to_string(), Value::Int(1))]), )]); assert!(value.resolve(&schema).is_err()); + + Ok(()) } #[test] - fn json_from_avro() { - assert_eq!(JsonValue::try_from(Value::Null).unwrap(), JsonValue::Null); + fn json_from_avro() -> TestResult { + assert_eq!(JsonValue::try_from(Value::Null)?, JsonValue::Null); assert_eq!( - JsonValue::try_from(Value::Boolean(true)).unwrap(), + JsonValue::try_from(Value::Boolean(true))?, JsonValue::Bool(true) ); assert_eq!( - JsonValue::try_from(Value::Int(1)).unwrap(), + JsonValue::try_from(Value::Int(1))?, JsonValue::Number(1.into()) ); assert_eq!( - JsonValue::try_from(Value::Long(1)).unwrap(), + JsonValue::try_from(Value::Long(1))?, JsonValue::Number(1.into()) ); assert_eq!( - JsonValue::try_from(Value::Float(1.0)).unwrap(), + JsonValue::try_from(Value::Float(1.0))?, JsonValue::Number(Number::from_f64(1.0).unwrap()) ); assert_eq!( - JsonValue::try_from(Value::Double(1.0)).unwrap(), + JsonValue::try_from(Value::Double(1.0))?, JsonValue::Number(Number::from_f64(1.0).unwrap()) ); assert_eq!( - JsonValue::try_from(Value::Bytes(vec![1, 2, 3])).unwrap(), + JsonValue::try_from(Value::Bytes(vec![1, 2, 3]))?, JsonValue::Array(vec![ JsonValue::Number(1.into()), JsonValue::Number(2.into()), @@ -1674,11 +1843,11 @@ Field with name '"b"' is not a member of the map items"#, ]) ); assert_eq!( - JsonValue::try_from(Value::String("test".into())).unwrap(), + JsonValue::try_from(Value::String("test".into()))?, JsonValue::String("test".into()) ); assert_eq!( - JsonValue::try_from(Value::Fixed(3, vec![1, 2, 3])).unwrap(), + JsonValue::try_from(Value::Fixed(3, vec![1, 2, 3]))?, JsonValue::Array(vec![ JsonValue::Number(1.into()), JsonValue::Number(2.into()), @@ -1686,12 +1855,11 @@ Field with name '"b"' is not a member of the map items"#, ]) ); assert_eq!( - JsonValue::try_from(Value::Enum(1, "test_enum".into())).unwrap(), + JsonValue::try_from(Value::Enum(1, "test_enum".into()))?, JsonValue::String("test_enum".into()) ); assert_eq!( - JsonValue::try_from(Value::Union(1, Box::new(Value::String("test_enum".into())))) - .unwrap(), + JsonValue::try_from(Value::Union(1, Box::new(Value::String("test_enum".into()))))?, JsonValue::String("test_enum".into()) ); assert_eq!( @@ -1699,8 +1867,7 @@ Field with name '"b"' is not a member of the map items"#, Value::Int(1), Value::Int(2), Value::Int(3) - ])) - .unwrap(), + ]))?, JsonValue::Array(vec![ JsonValue::Number(1.into()), JsonValue::Number(2.into()), @@ -1716,8 +1883,7 @@ Field with name '"b"' is not a member of the map items"#, ] .into_iter() .collect() - )) - .unwrap(), + ))?, JsonValue::Object( vec![ ("v1".to_string(), JsonValue::Number(1.into())), @@ -1733,8 +1899,7 @@ Field with name '"b"' is not a member of the map items"#, ("v1".to_string(), Value::Int(1)), ("v2".to_string(), Value::Int(2)), ("v3".to_string(), Value::Int(3)) - ])) - .unwrap(), + ]))?, JsonValue::Object( vec![ ("v1".to_string(), JsonValue::Number(1.into())), @@ -1746,11 +1911,11 @@ Field with name '"b"' is not a member of the map items"#, ) ); assert_eq!( - JsonValue::try_from(Value::Date(1)).unwrap(), + JsonValue::try_from(Value::Date(1))?, JsonValue::Number(1.into()) ); assert_eq!( - JsonValue::try_from(Value::Decimal(vec![1, 2, 3].into())).unwrap(), + JsonValue::try_from(Value::Decimal(vec![1, 2, 3].into()))?, JsonValue::Array(vec![ JsonValue::Number(1.into()), JsonValue::Number(2.into()), @@ -1758,26 +1923,33 @@ Field with name '"b"' is not a member of the map items"#, ]) ); assert_eq!( - JsonValue::try_from(Value::TimeMillis(1)).unwrap(), + JsonValue::try_from(Value::TimeMillis(1))?, + JsonValue::Number(1.into()) + ); + assert_eq!( + JsonValue::try_from(Value::TimeMicros(1))?, + JsonValue::Number(1.into()) + ); + assert_eq!( + JsonValue::try_from(Value::TimestampMillis(1))?, JsonValue::Number(1.into()) ); assert_eq!( - JsonValue::try_from(Value::TimeMicros(1)).unwrap(), + JsonValue::try_from(Value::TimestampMicros(1))?, JsonValue::Number(1.into()) ); assert_eq!( - JsonValue::try_from(Value::TimestampMillis(1)).unwrap(), + JsonValue::try_from(Value::LocalTimestampMillis(1))?, JsonValue::Number(1.into()) ); assert_eq!( - JsonValue::try_from(Value::TimestampMicros(1)).unwrap(), + JsonValue::try_from(Value::LocalTimestampMicros(1))?, JsonValue::Number(1.into()) ); assert_eq!( JsonValue::try_from(Value::Duration( [1u8, 2u8, 3u8, 4u8, 5u8, 6u8, 7u8, 8u8, 9u8, 10u8, 11u8, 12u8].into() - )) - .unwrap(), + ))?, JsonValue::Array(vec![ JsonValue::Number(1.into()), JsonValue::Number(2.into()), @@ -1794,16 +1966,17 @@ Field with name '"b"' is not a member of the map items"#, ]) ); assert_eq!( - JsonValue::try_from(Value::Uuid( - Uuid::parse_str("936DA01F-9ABD-4D9D-80C7-02AF85C822A8").unwrap() - )) - .unwrap(), + JsonValue::try_from(Value::Uuid(Uuid::parse_str( + "936DA01F-9ABD-4D9D-80C7-02AF85C822A8" + )?))?, JsonValue::String("936da01f-9abd-4d9d-80c7-02af85c822a8".into()) ); + + Ok(()) } #[test] - fn test_avro_3433_recursive_resolves_record() { + fn test_avro_3433_recursive_resolves_record() -> TestResult { let schema = Schema::parse_str( r#" { @@ -1827,8 +2000,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); @@ -1836,10 +2008,12 @@ Field with name '"b"' is not a member of the map items"#, outer .resolve(&schema) .expect("Record definition defined in one field must be available in other field"); + + Ok(()) } #[test] - fn test_avro_3433_recursive_resolves_array() { + fn test_avro_3433_recursive_resolves_array() -> TestResult { let schema = Schema::parse_str( r#" { @@ -1869,8 +2043,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); @@ -1884,10 +2057,12 @@ Field with name '"b"' is not a member of the map items"#, outer_value .resolve(&schema) .expect("Record defined in array definition must be resolvable from map"); + + Ok(()) } #[test] - fn test_avro_3433_recursive_resolves_map() { + fn test_avro_3433_recursive_resolves_map() -> TestResult { let schema = Schema::parse_str( r#" { @@ -1914,8 +2089,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); @@ -1929,10 +2103,12 @@ Field with name '"b"' is not a member of the map items"#, outer_value .resolve(&schema) .expect("Record defined in record field must be resolvable from map field"); + + Ok(()) } #[test] - fn test_avro_3433_recursive_resolves_record_wrapper() { + fn test_avro_3433_recursive_resolves_record_wrapper() -> TestResult { let schema = Schema::parse_str( r#" { @@ -1963,8 +2139,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![( @@ -1974,10 +2149,12 @@ Field with name '"b"' is not a member of the map items"#, let outer_value = Value::Record(vec![("a".into(), inner_value1), ("b".into(), inner_value2)]); outer_value.resolve(&schema).expect("Record schema defined in field must be resolvable in Record schema defined in other field"); + + Ok(()) } #[test] - fn test_avro_3433_recursive_resolves_map_and_array() { + fn test_avro_3433_recursive_resolves_map_and_array() -> TestResult { let schema = Schema::parse_str( r#" { @@ -2007,8 +2184,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); @@ -2022,10 +2198,12 @@ Field with name '"b"' is not a member of the map items"#, outer_value .resolve(&schema) .expect("Record defined in map definition must be resolvable from array"); + + Ok(()) } #[test] - fn test_avro_3433_recursive_resolves_union() { + fn test_avro_3433_recursive_resolves_union() -> TestResult { let schema = Schema::parse_str( r#" { @@ -2049,8 +2227,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let inner_value1 = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value2 = Value::Record(vec![("z".into(), Value::Int(6))]); @@ -2065,10 +2242,12 @@ Field with name '"b"' is not a member of the map items"#, outer2 .resolve(&schema) .expect("Record definition defined in union must be resolved in other field"); + + Ok(()) } #[test] - fn test_avro_3461_test_multi_level_resolve_outer_namespace() { + fn test_avro_3461_test_multi_level_resolve_outer_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -2110,7 +2289,7 @@ Field with name '"b"' is not a member of the map items"#, ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); let middle_record_variation_1 = Value::Record(vec![( "middle_field_1".into(), @@ -2151,10 +2330,12 @@ Field with name '"b"' is not a member of the map items"#, outer_record_variation_3 .resolve(&schema) .expect("Should be able to resolve value to the schema that is it's definition"); + + Ok(()) } #[test] - fn test_avro_3461_test_multi_level_resolve_middle_namespace() { + fn test_avro_3461_test_multi_level_resolve_middle_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -2197,7 +2378,7 @@ Field with name '"b"' is not a member of the map items"#, ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); let middle_record_variation_1 = Value::Record(vec![( "middle_field_1".into(), @@ -2238,10 +2419,12 @@ Field with name '"b"' is not a member of the map items"#, outer_record_variation_3 .resolve(&schema) .expect("Should be able to resolve value to the schema that is it's definition"); + + Ok(()) } #[test] - fn test_avro_3461_test_multi_level_resolve_inner_namespace() { + fn test_avro_3461_test_multi_level_resolve_inner_namespace() -> TestResult { let schema = r#" { "name": "record_name", @@ -2285,7 +2468,7 @@ Field with name '"b"' is not a member of the map items"#, ] } "#; - let schema = Schema::parse_str(schema).unwrap(); + let schema = Schema::parse_str(schema)?; let inner_record = Value::Record(vec![("inner_field_1".into(), Value::Double(5.4))]); let middle_record_variation_1 = Value::Record(vec![( @@ -2327,10 +2510,12 @@ Field with name '"b"' is not a member of the map items"#, outer_record_variation_3 .resolve(&schema) .expect("Should be able to resolve value to the schema that is it's definition"); + + Ok(()) } #[test] - fn test_avro_3460_validation_with_refs() { + fn test_avro_3460_validation_with_refs() -> TestResult { let schema = Schema::parse_str( r#" { @@ -2354,8 +2539,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let inner_value_right = Value::Record(vec![("z".into(), Value::Int(3))]); let inner_value_wrong1 = Value::Record(vec![("z".into(), Value::Null)]); @@ -2378,10 +2562,12 @@ Field with name '"b"' is not a member of the map items"#, !outer2.validate(&schema), "field b record is invalid against the schema" ); // this should pass, but doesn't + + Ok(()) } #[test] - fn test_avro_3460_validation_with_refs_real_struct() { + fn test_avro_3460_validation_with_refs_real_struct() -> TestResult { use crate::ser::Serializer; use serde::Serialize; @@ -2431,8 +2617,7 @@ Field with name '"b"' is not a member of the map items"#, } ] }"#, - ) - .unwrap(); + )?; let test_inner = TestInner { z: 3 }; let test_outer1 = TestRefSchemaStruct1 { @@ -2449,11 +2634,11 @@ Field with name '"b"' is not a member of the map items"#, }; let mut ser = Serializer::default(); - let test_outer1: Value = test_outer1.serialize(&mut ser).unwrap(); + let test_outer1: Value = test_outer1.serialize(&mut ser)?; let mut ser = Serializer::default(); - let test_outer2: Value = test_outer2.serialize(&mut ser).unwrap(); + let test_outer2: Value = test_outer2.serialize(&mut ser)?; let mut ser = Serializer::default(); - let test_outer3: Value = test_outer3.serialize(&mut ser).unwrap(); + let test_outer3: Value = test_outer3.serialize(&mut ser)?; assert!( !test_outer1.validate(&schema), @@ -2467,9 +2652,11 @@ Field with name '"b"' is not a member of the map items"#, !test_outer3.validate(&schema), "field b record is invalid against the schema" ); + + Ok(()) } - fn avro_3674_with_or_without_namespace(with_namespace: bool) { + fn avro_3674_with_or_without_namespace(with_namespace: bool) -> TestResult { use crate::ser::Serializer; use serde::Serialize; @@ -2511,7 +2698,7 @@ Field with name '"b"' is not a member of the map items"#, }, ); - let schema = Schema::parse_str(&schema_str).unwrap(); + let schema = Schema::parse_str(&schema_str)?; #[derive(Serialize)] enum EnumType { @@ -2540,25 +2727,27 @@ Field with name '"b"' is not a member of the map items"#, }; let mut ser = Serializer::default(); - let test_value: Value = msg.serialize(&mut ser).unwrap(); + let test_value: Value = msg.serialize(&mut ser)?; assert!(test_value.validate(&schema), "test_value should validate"); assert!( test_value.resolve(&schema).is_ok(), "test_value should resolve" ); + + Ok(()) } #[test] - fn test_avro_3674_validate_no_namespace_resolution() { - avro_3674_with_or_without_namespace(false); + fn test_avro_3674_validate_no_namespace_resolution() -> TestResult { + avro_3674_with_or_without_namespace(false) } #[test] - fn test_avro_3674_validate_with_namespace_resolution() { - avro_3674_with_or_without_namespace(true); + fn test_avro_3674_validate_with_namespace_resolution() -> TestResult { + avro_3674_with_or_without_namespace(true) } - fn avro_3688_schema_resolution_panic(set_field_b: bool) { + fn avro_3688_schema_resolution_panic(set_field_b: bool) -> TestResult { use crate::ser::Serializer; use serde::{Deserialize, Serialize}; @@ -2605,7 +2794,7 @@ Field with name '"b"' is not a member of the map items"#, field_b: Option, } - let schema = Schema::parse_str(schema_str).unwrap(); + let schema = Schema::parse_str(schema_str)?; let msg = Message { field_a: Some(Inner { @@ -2621,21 +2810,119 @@ Field with name '"b"' is not a member of the map items"#, }; let mut ser = Serializer::default(); - let test_value: Value = msg.serialize(&mut ser).unwrap(); + let test_value: Value = msg.serialize(&mut ser)?; assert!(test_value.validate(&schema), "test_value should validate"); assert!( test_value.resolve(&schema).is_ok(), "test_value should resolve" ); + + Ok(()) + } + + #[test] + fn test_avro_3688_field_b_not_set() -> TestResult { + avro_3688_schema_resolution_panic(false) + } + + #[test] + fn test_avro_3688_field_b_set() -> TestResult { + avro_3688_schema_resolution_panic(true) + } + + #[test] + fn test_avro_3764_use_resolve_schemata() -> TestResult { + let referenced_schema = + r#"{"name": "enumForReference", "type": "enum", "symbols": ["A", "B"]}"#; + let main_schema = r#"{"name": "recordWithReference", "type": "record", "fields": [{"name": "reference", "type": "enumForReference"}]}"#; + + let value: serde_json::Value = serde_json::from_str( + r#" + { + "reference": "A" + } + "#, + )?; + + let avro_value = Value::from(value); + + let schemas = Schema::parse_list(&[main_schema, referenced_schema])?; + + let main_schema = schemas.get(0).unwrap(); + let schemata: Vec<_> = schemas.iter().skip(1).collect(); + + let resolve_result = avro_value.clone().resolve_schemata(main_schema, schemata); + + assert!( + resolve_result.is_ok(), + "result of resolving with schemata should be ok, got: {:?}", + resolve_result + ); + + let resolve_result = avro_value.resolve(main_schema); + assert!( + resolve_result.is_err(), + "result of resolving without schemata should be err, got: {:?}", + resolve_result + ); + + Ok(()) } #[test] - fn test_avro_3688_field_b_not_set() { - avro_3688_schema_resolution_panic(false); + fn test_avro_3767_union_resolve_complex_refs() -> TestResult { + let referenced_enum = + r#"{"name": "enumForReference", "type": "enum", "symbols": ["A", "B"]}"#; + let referenced_record = r#"{"name": "recordForReference", "type": "record", "fields": [{"name": "refInRecord", "type": "enumForReference"}]}"#; + let main_schema = r#"{"name": "recordWithReference", "type": "record", "fields": [{"name": "reference", "type": ["null", "recordForReference"]}]}"#; + + let value: serde_json::Value = serde_json::from_str( + r#" + { + "reference": { + "refInRecord": "A" + } + } + "#, + )?; + + let avro_value = Value::from(value); + + let schemata = Schema::parse_list(&[referenced_enum, referenced_record, main_schema])?; + + let main_schema = schemata.last().unwrap(); + let other_schemata: Vec<&Schema> = schemata.iter().take(2).collect(); + + let resolve_result = avro_value.resolve_schemata(main_schema, other_schemata); + + assert!( + resolve_result.is_ok(), + "result of resolving with schemata should be ok, got: {:?}", + resolve_result + ); + + assert!( + resolve_result?.validate_schemata(schemata.iter().collect()), + "result of validation with schemata should be true" + ); + + Ok(()) } #[test] - fn test_avro_3688_field_b_set() { - avro_3688_schema_resolution_panic(true); + fn test_avro_3782_incorrect_decimal_resolving() -> TestResult { + let schema = r#"{"name": "decimalSchema", "logicalType": "decimal", "type": "fixed", "precision": 8, "scale": 0, "size": 8}"#; + + let avro_value = Value::Decimal(Decimal::from( + BigInt::from(12345678u32).to_signed_bytes_be(), + )); + let schema = Schema::parse_str(schema)?; + let resolve_result = avro_value.resolve(&schema); + assert!( + resolve_result.is_ok(), + "resolve result must be ok, got: {resolve_result:?}" + ); + + Ok(()) } } diff --git a/lang/rust/avro/src/util.rs b/lang/rust/avro/src/util.rs index e18b5641fce..2ea134c77a6 100644 --- a/lang/rust/avro/src/util.rs +++ b/lang/rust/avro/src/util.rs @@ -17,16 +17,31 @@ use crate::{schema::Documentation, AvroResult, Error}; use serde_json::{Map, Value}; -use std::{convert::TryFrom, i64, io::Read, sync::Once}; +use std::{ + convert::TryFrom, + i64, + io::Read, + sync::{ + atomic::{AtomicBool, AtomicUsize, Ordering}, + Once, + }, +}; /// Maximum number of bytes that can be allocated when decoding /// Avro-encoded values. This is a protection against ill-formed /// data, whose length field might be interpreted as enormous. /// See max_allocation_bytes to change this limit. pub const DEFAULT_MAX_ALLOCATION_BYTES: usize = 512 * 1024 * 1024; -static mut MAX_ALLOCATION_BYTES: usize = DEFAULT_MAX_ALLOCATION_BYTES; +static MAX_ALLOCATION_BYTES: AtomicUsize = AtomicUsize::new(DEFAULT_MAX_ALLOCATION_BYTES); static MAX_ALLOCATION_BYTES_ONCE: Once = Once::new(); +/// Whether to set serialization & deserialization traits +/// as `human_readable` or not. +/// See [set_serde_human_readable] to change this value. +// crate-visible for testing +pub(crate) static SERDE_HUMAN_READABLE: AtomicBool = AtomicBool::new(true); +static SERDE_HUMAN_READABLE_ONCE: Once = Once::new(); + pub trait MapHelper { fn string(&self, key: &str) -> Option; @@ -132,12 +147,10 @@ fn decode_variable(reader: &mut R) -> AvroResult { /// to set the limit either when calling this method, or when decoding for /// the first time. pub fn max_allocation_bytes(num_bytes: usize) -> usize { - unsafe { - MAX_ALLOCATION_BYTES_ONCE.call_once(|| { - MAX_ALLOCATION_BYTES = num_bytes; - }); - MAX_ALLOCATION_BYTES - } + MAX_ALLOCATION_BYTES_ONCE.call_once(|| { + MAX_ALLOCATION_BYTES.store(num_bytes, Ordering::Release); + }); + MAX_ALLOCATION_BYTES.load(Ordering::Acquire) } pub fn safe_len(len: usize) -> AvroResult { @@ -153,9 +166,28 @@ pub fn safe_len(len: usize) -> AvroResult { } } +/// Set whether serializing/deserializing is marked as human readable in serde traits. +/// This will adjust the return value of `is_human_readable()` for both. +/// Once called, the value cannot be changed. +/// +/// **NOTE** This function must be called before serializing/deserializing **any** data. The +/// library leverages [`std::sync::Once`](https://doc.rust-lang.org/std/sync/struct.Once.html) +/// to set the limit either when calling this method, or when decoding for +/// the first time. +pub fn set_serde_human_readable(human_readable: bool) { + SERDE_HUMAN_READABLE_ONCE.call_once(|| { + SERDE_HUMAN_READABLE.store(human_readable, Ordering::Release); + }); +} + +pub(crate) fn is_human_readable() -> bool { + SERDE_HUMAN_READABLE.load(Ordering::Acquire) +} + #[cfg(test)] mod tests { use super::*; + use apache_avro_test_helper::TestResult; use pretty_assertions::assert_eq; #[test] @@ -249,8 +281,10 @@ mod tests { } #[test] - fn test_safe_len() { - assert_eq!(42usize, safe_len(42usize).unwrap()); + fn test_safe_len() -> TestResult { + assert_eq!(42usize, safe_len(42usize)?); assert!(safe_len(1024 * 1024 * 1024).is_err()); + + Ok(()) } } diff --git a/lang/rust/avro/src/writer.rs b/lang/rust/avro/src/writer.rs index 33653e29b1d..83b7d8b00a0 100644 --- a/lang/rust/avro/src/writer.rs +++ b/lang/rust/avro/src/writer.rs @@ -632,13 +632,15 @@ mod tests { use crate::{ decimal::Decimal, duration::{Days, Duration, Millis, Months}, - schema::Name, + schema::{DecimalSchema, FixedSchema, Name}, types::Record, util::zig_i64, }; use pretty_assertions::assert_eq; use serde::{Deserialize, Serialize}; + use apache_avro_test_helper::TestResult; + const AVRO_OBJECT_HEADER_LEN: usize = AVRO_OBJECT_HEADER.len(); const SCHEMA: &str = r#" @@ -661,8 +663,8 @@ mod tests { const UNION_SCHEMA: &str = r#"["null", "long"]"#; #[test] - fn test_to_avro_datum() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_to_avro_datum() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut record = Record::new(&schema).unwrap(); record.put("a", 27i64); record.put("b", "foo"); @@ -670,35 +672,39 @@ mod tests { let mut expected = Vec::new(); zig_i64(27, &mut expected); zig_i64(3, &mut expected); - expected.extend(vec![b'f', b'o', b'o'].into_iter()); + expected.extend([b'f', b'o', b'o']); + + assert_eq!(to_avro_datum(&schema, record)?, expected); - assert_eq!(to_avro_datum(&schema, record).unwrap(), expected); + Ok(()) } #[test] - fn test_union_not_null() { - let schema = Schema::parse_str(UNION_SCHEMA).unwrap(); + fn test_union_not_null() -> TestResult { + let schema = Schema::parse_str(UNION_SCHEMA)?; let union = Value::Union(1, Box::new(Value::Long(3))); let mut expected = Vec::new(); zig_i64(1, &mut expected); zig_i64(3, &mut expected); - assert_eq!(to_avro_datum(&schema, union).unwrap(), expected); + assert_eq!(to_avro_datum(&schema, union)?, expected); + + Ok(()) } #[test] - fn test_union_null() { - let schema = Schema::parse_str(UNION_SCHEMA).unwrap(); + fn test_union_null() -> TestResult { + let schema = Schema::parse_str(UNION_SCHEMA)?; let union = Value::Union(0, Box::new(Value::Null)); let mut expected = Vec::new(); zig_i64(0, &mut expected); - assert_eq!(to_avro_datum(&schema, union).unwrap(), expected); - } + assert_eq!(to_avro_datum(&schema, union)?, expected); - type TestResult = Result>; + Ok(()) + } fn logical_type_test + Clone>( schema_str: &'static str, @@ -708,7 +714,7 @@ mod tests { raw_schema: &Schema, raw_value: T, - ) -> TestResult<()> { + ) -> TestResult { let schema = Schema::parse_str(schema_str)?; assert_eq!(&schema, expected_schema); // The serialized format should be the same as the schema. @@ -718,13 +724,13 @@ mod tests { // Should deserialize from the schema into the logical type. let mut r = ser.as_slice(); - let de = crate::from_avro_datum(&schema, &mut r, None).unwrap(); + let de = crate::from_avro_datum(&schema, &mut r, None)?; assert_eq!(de, value); Ok(()) } #[test] - fn date() -> TestResult<()> { + fn date() -> TestResult { logical_type_test( r#"{"type": "int", "logicalType": "date"}"#, &Schema::Date, @@ -735,7 +741,7 @@ mod tests { } #[test] - fn time_millis() -> TestResult<()> { + fn time_millis() -> TestResult { logical_type_test( r#"{"type": "int", "logicalType": "time-millis"}"#, &Schema::TimeMillis, @@ -746,7 +752,7 @@ mod tests { } #[test] - fn time_micros() -> TestResult<()> { + fn time_micros() -> TestResult { logical_type_test( r#"{"type": "long", "logicalType": "time-micros"}"#, &Schema::TimeMicros, @@ -757,7 +763,7 @@ mod tests { } #[test] - fn timestamp_millis() -> TestResult<()> { + fn timestamp_millis() -> TestResult { logical_type_test( r#"{"type": "long", "logicalType": "timestamp-millis"}"#, &Schema::TimestampMillis, @@ -768,7 +774,7 @@ mod tests { } #[test] - fn timestamp_micros() -> TestResult<()> { + fn timestamp_micros() -> TestResult { logical_type_test( r#"{"type": "long", "logicalType": "timestamp-micros"}"#, &Schema::TimestampMicros, @@ -779,23 +785,23 @@ mod tests { } #[test] - fn decimal_fixed() -> TestResult<()> { + fn decimal_fixed() -> TestResult { let size = 30; - let inner = Schema::Fixed { - name: Name::new("decimal").unwrap(), + let inner = Schema::Fixed(FixedSchema { + name: Name::new("decimal")?, aliases: None, doc: None, size, attributes: Default::default(), - }; + }); let value = vec![0u8; size]; logical_type_test( r#"{"type": {"type": "fixed", "size": 30, "name": "decimal"}, "logicalType": "decimal", "precision": 20, "scale": 5}"#, - &Schema::Decimal { + &Schema::Decimal(DecimalSchema { precision: 20, scale: 5, inner: Box::new(inner.clone()), - }, + }), Value::Decimal(Decimal::from(value.clone())), &inner, Value::Fixed(size, value), @@ -803,16 +809,16 @@ mod tests { } #[test] - fn decimal_bytes() -> TestResult<()> { + fn decimal_bytes() -> TestResult { let inner = Schema::Bytes; let value = vec![0u8; 10]; logical_type_test( r#"{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 3}"#, - &Schema::Decimal { + &Schema::Decimal(DecimalSchema { precision: 4, scale: 3, inner: Box::new(inner.clone()), - }, + }), Value::Decimal(Decimal::from(value.clone())), &inner, value, @@ -820,14 +826,14 @@ mod tests { } #[test] - fn duration() -> TestResult<()> { - let inner = Schema::Fixed { - name: Name::new("duration").unwrap(), + fn duration() -> TestResult { + let inner = Schema::Fixed(FixedSchema { + name: Name::new("duration")?, aliases: None, doc: None, size: 12, attributes: Default::default(), - }; + }); let value = Value::Duration(Duration::new( Months::new(256), Days::new(512), @@ -843,18 +849,18 @@ mod tests { } #[test] - fn test_writer_append() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_writer_append() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut writer = Writer::new(&schema, Vec::new()); let mut record = Record::new(&schema).unwrap(); record.put("a", 27i64); record.put("b", "foo"); - let n1 = writer.append(record.clone()).unwrap(); - let n2 = writer.append(record.clone()).unwrap(); - let n3 = writer.flush().unwrap(); - let result = writer.into_inner().unwrap(); + let n1 = writer.append(record.clone())?; + let n2 = writer.append(record.clone())?; + let n3 = writer.flush()?; + let result = writer.into_inner()?; assert_eq!(n1 + n2 + n3, result.len()); @@ -872,11 +878,13 @@ mod tests { &result[last_data_byte - data.len()..last_data_byte], data.as_slice() ); + + Ok(()) } #[test] - fn test_writer_extend() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_writer_extend() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut writer = Writer::new(&schema, Vec::new()); let mut record = Record::new(&schema).unwrap(); @@ -885,9 +893,9 @@ mod tests { let record_copy = record.clone(); let records = vec![record, record_copy]; - let n1 = writer.extend(records.into_iter()).unwrap(); - let n2 = writer.flush().unwrap(); - let result = writer.into_inner().unwrap(); + let n1 = writer.extend(records.into_iter())?; + let n2 = writer.flush()?; + let result = writer.into_inner()?; assert_eq!(n1 + n2, result.len()); @@ -905,6 +913,8 @@ mod tests { &result[last_data_byte - data.len()..last_data_byte], data.as_slice() ); + + Ok(()) } #[derive(Debug, Clone, Deserialize, Serialize)] @@ -914,8 +924,8 @@ mod tests { } #[test] - fn test_writer_append_ser() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_writer_append_ser() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut writer = Writer::new(&schema, Vec::new()); let record = TestSerdeSerialize { @@ -923,9 +933,9 @@ mod tests { b: "foo".to_owned(), }; - let n1 = writer.append_ser(record).unwrap(); - let n2 = writer.flush().unwrap(); - let result = writer.into_inner().unwrap(); + let n1 = writer.append_ser(record)?; + let n2 = writer.flush()?; + let result = writer.into_inner()?; assert_eq!(n1 + n2, result.len()); @@ -942,11 +952,13 @@ mod tests { &result[last_data_byte - data.len()..last_data_byte], data.as_slice() ); + + Ok(()) } #[test] - fn test_writer_extend_ser() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_writer_extend_ser() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut writer = Writer::new(&schema, Vec::new()); let record = TestSerdeSerialize { @@ -956,9 +968,9 @@ mod tests { let record_copy = record.clone(); let records = vec![record, record_copy]; - let n1 = writer.extend_ser(records.into_iter()).unwrap(); - let n2 = writer.flush().unwrap(); - let result = writer.into_inner().unwrap(); + let n1 = writer.extend_ser(records.into_iter())?; + let n2 = writer.flush()?; + let result = writer.into_inner()?; assert_eq!(n1 + n2, result.len()); @@ -976,6 +988,8 @@ mod tests { &result[last_data_byte - data.len()..last_data_byte], data.as_slice() ); + + Ok(()) } fn make_writer_with_codec(schema: &Schema) -> Writer<'_, Vec> { @@ -991,15 +1005,15 @@ mod tests { .build() } - fn check_writer(mut writer: Writer<'_, Vec>, schema: &Schema) { + fn check_writer(mut writer: Writer<'_, Vec>, schema: &Schema) -> TestResult { let mut record = Record::new(schema).unwrap(); record.put("a", 27i64); record.put("b", "foo"); - let n1 = writer.append(record.clone()).unwrap(); - let n2 = writer.append(record.clone()).unwrap(); - let n3 = writer.flush().unwrap(); - let result = writer.into_inner().unwrap(); + let n1 = writer.append(record.clone())?; + let n2 = writer.append(record.clone())?; + let n3 = writer.flush()?; + let result = writer.into_inner()?; assert_eq!(n1 + n2 + n3, result.len()); @@ -1008,7 +1022,7 @@ mod tests { zig_i64(3, &mut data); data.extend(b"foo"); data.extend(data.clone()); - Codec::Deflate.compress(&mut data).unwrap(); + Codec::Deflate.compress(&mut data)?; // starts with magic assert_eq!(&result[..AVRO_OBJECT_HEADER_LEN], AVRO_OBJECT_HEADER); @@ -1018,24 +1032,26 @@ mod tests { &result[last_data_byte - data.len()..last_data_byte], data.as_slice() ); + + Ok(()) } #[test] - fn test_writer_with_codec() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_writer_with_codec() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let writer = make_writer_with_codec(&schema); - check_writer(writer, &schema); + check_writer(writer, &schema) } #[test] - fn test_writer_with_builder() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_writer_with_builder() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let writer = make_writer_with_builder(&schema); - check_writer(writer, &schema); + check_writer(writer, &schema) } #[test] - fn test_logical_writer() { + fn test_logical_writer() -> TestResult { const LOGICAL_TYPE_SCHEMA: &str = r#" { "type": "record", @@ -1055,7 +1071,7 @@ mod tests { } "#; let codec = Codec::Deflate; - let schema = Schema::parse_str(LOGICAL_TYPE_SCHEMA).unwrap(); + let schema = Schema::parse_str(LOGICAL_TYPE_SCHEMA)?; let mut writer = Writer::builder() .schema(&schema) .codec(codec) @@ -1071,10 +1087,10 @@ mod tests { let mut record2 = Record::new(&schema).unwrap(); record2.put("a", Value::Union(0, Box::new(Value::Null))); - let n1 = writer.append(record1).unwrap(); - let n2 = writer.append(record2).unwrap(); - let n3 = writer.flush().unwrap(); - let result = writer.into_inner().unwrap(); + let n1 = writer.append(record1)?; + let n2 = writer.append(record2)?; + let n3 = writer.flush()?; + let result = writer.into_inner()?; assert_eq!(n1 + n2 + n3, result.len()); @@ -1085,7 +1101,7 @@ mod tests { // byte indicating null zig_i64(0, &mut data); - codec.compress(&mut data).unwrap(); + codec.compress(&mut data)?; // starts with magic assert_eq!(&result[..AVRO_OBJECT_HEADER_LEN], AVRO_OBJECT_HEADER); @@ -1095,47 +1111,43 @@ mod tests { &result[last_data_byte - data.len()..last_data_byte], data.as_slice() ); + + Ok(()) } #[test] - fn test_avro_3405_writer_add_metadata_success() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_avro_3405_writer_add_metadata_success() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut writer = Writer::new(&schema, Vec::new()); - writer - .add_user_metadata("stringKey".to_string(), String::from("stringValue")) - .unwrap(); - writer - .add_user_metadata("strKey".to_string(), "strValue") - .unwrap(); - writer - .add_user_metadata("bytesKey".to_string(), b"bytesValue") - .unwrap(); - writer - .add_user_metadata("vecKey".to_string(), vec![1, 2, 3]) - .unwrap(); + writer.add_user_metadata("stringKey".to_string(), String::from("stringValue"))?; + writer.add_user_metadata("strKey".to_string(), "strValue")?; + writer.add_user_metadata("bytesKey".to_string(), b"bytesValue")?; + writer.add_user_metadata("vecKey".to_string(), vec![1, 2, 3])?; let mut record = Record::new(&schema).unwrap(); record.put("a", 27i64); record.put("b", "foo"); - writer.append(record.clone()).unwrap(); - writer.append(record.clone()).unwrap(); - writer.flush().unwrap(); - let result = writer.into_inner().unwrap(); + writer.append(record.clone())?; + writer.append(record.clone())?; + writer.flush()?; + let result = writer.into_inner()?; assert_eq!(result.len(), 260); + + Ok(()) } #[test] - fn test_avro_3405_writer_add_metadata_failure() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_avro_3405_writer_add_metadata_failure() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut writer = Writer::new(&schema, Vec::new()); let mut record = Record::new(&schema).unwrap(); record.put("a", 27i64); record.put("b", "foo"); - writer.append(record.clone()).unwrap(); + writer.append(record.clone())?; match writer.add_user_metadata("stringKey".to_string(), String::from("value2")) { Err(e @ Error::FileHeaderAlreadyWritten) => { @@ -1144,11 +1156,13 @@ mod tests { Err(e) => panic!("Unexpected error occurred while writing user metadata: {e:?}"), Ok(_) => panic!("Expected an error that metadata cannot be added after adding data"), } + + Ok(()) } #[test] - fn test_avro_3405_writer_add_metadata_reserved_prefix_failure() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_avro_3405_writer_add_metadata_reserved_prefix_failure() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut writer = Writer::new(&schema, Vec::new()); let key = "avro.stringKey".to_string(); @@ -1161,11 +1175,13 @@ mod tests { ), Ok(_) => panic!("Expected an error that the metadata key cannot be prefixed with 'avro.'"), } + + Ok(()) } #[test] - fn test_avro_3405_writer_add_metadata_with_builder_api_success() { - let schema = Schema::parse_str(SCHEMA).unwrap(); + fn test_avro_3405_writer_add_metadata_with_builder_api_success() -> TestResult { + let schema = Schema::parse_str(SCHEMA)?; let mut user_meta_data: HashMap = HashMap::new(); user_meta_data.insert( @@ -1182,6 +1198,8 @@ mod tests { .build(); assert_eq!(writer.user_metadata, user_meta_data); + + Ok(()) } #[derive(Serialize, Clone)] @@ -1234,7 +1252,7 @@ mod tests { } #[test] - fn test_single_object_writer() { + fn test_single_object_writer() -> TestResult { let mut buf: Vec = Vec::new(); let obj = TestSingleObjectWriter { a: 300, @@ -1268,11 +1286,13 @@ mod tests { &mut msg_binary, ) .expect("encode should have failed by here as a dependency of any writing"); - assert_eq!(&buf[10..], &msg_binary[..]) + assert_eq!(&buf[10..], &msg_binary[..]); + + Ok(()) } #[test] - fn test_writer_parity() { + fn test_writer_parity() -> TestResult { let obj1 = TestSingleObjectWriter { a: 300, b: 34.555, @@ -1302,5 +1322,7 @@ mod tests { .expect("Serialization expected"); assert_eq!(buf1, buf2); assert_eq!(buf1, buf3); + + Ok(()) } } diff --git a/lang/rust/avro/tests/append_to_existing.rs b/lang/rust/avro/tests/append_to_existing.rs index 7b3874746bd..2ea59d95c56 100644 --- a/lang/rust/avro/tests/append_to_existing.rs +++ b/lang/rust/avro/tests/append_to_existing.rs @@ -20,9 +20,10 @@ use apache_avro::{ types::{Record, Value}, AvroResult, Reader, Schema, Writer, }; +use apache_avro_test_helper::TestResult; #[test] -fn avro_3630_append_to_an_existing_file() { +fn avro_3630_append_to_an_existing_file() -> TestResult { let schema_str = r#" { "type": "record", @@ -53,6 +54,8 @@ fn avro_3630_append_to_an_existing_file() { check(value, i); i += 1 } + + Ok(()) } /// Simulates reading from a pre-existing .avro file and returns its bytes diff --git a/lang/rust/avro/tests/avro-3786.rs b/lang/rust/avro/tests/avro-3786.rs new file mode 100644 index 00000000000..d27e0c4e53f --- /dev/null +++ b/lang/rust/avro/tests/avro-3786.rs @@ -0,0 +1,886 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use apache_avro::{from_avro_datum, to_avro_datum, to_value, types, Schema}; +use apache_avro_test_helper::TestResult; + +#[test] +fn avro_3786_deserialize_union_with_different_enum_order() -> TestResult { + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct BarUseParent { + #[serde(rename = "barUse")] + pub bar_use: Bar, + } + + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barInit")] + pub bar_init: Bar, + #[serde(rename = "barUseParent")] + pub bar_use_parent: Option, + } + + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ], + "default": "bar0" + } + }, + { + "name": "barUseParent", + "type": [ + "null", + { + "type": "record", + "name": "BarUseParent", + "fields": [ + { + "name": "barUse", + "type": "Bar" + } + ] + } + ] + } + ] + }"#; + + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar1" + ], + "default": "bar1" + } + }, + { + "name": "barUseParent", + "type": [ + "null", + { + "type": "record", + "name": "BarUseParent", + "fields": [ + { + "name": "barUse", + "type": "Bar" + } + ] + } + ] + } + ] + }"#; + + let writer_schema = Schema::parse_str(writer_schema)?; + let foo1 = Foo { + bar_init: Bar::Bar1, + bar_use_parent: Some(BarUseParent { bar_use: Bar::Bar1 }), + }; + let avro_value = crate::to_value(foo1)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 2); + assert_eq!(fields[0].0, "barInit"); + assert_eq!(fields[0].1, types::Value::Enum(0, "bar1".to_string())); + assert_eq!(fields[1].0, "barUseParent"); + assert_eq!( + fields[1].1, + types::Value::Union( + 1, + Box::new(types::Value::Record(vec![( + "barUse".to_string(), + types::Value::Enum(0, "bar1".to_string()) + )])) + ) + ); + } + _ => panic!("Expected Value::Record"), + } + Ok(()) +} + +#[test] +fn avro_3786_deserialize_union_with_different_enum_order_defined_in_record() -> TestResult { + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct BarParent { + pub bar: Bar, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barParent")] + pub bar_parent: Option, + } + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "name": "bar", + "type": { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ], + "default": "bar0" + } + } + ] + } + ] + } + ] + }"#; + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "name": "bar", + "type": { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar2" + ], + "default": "bar0" + } + } + ] + } + ] + } + ] + }"#; + let writer_schema = Schema::parse_str(writer_schema)?; + let foo1 = Foo { + bar_parent: Some(BarParent { bar: Bar::Bar0 }), + }; + let avro_value = crate::to_value(foo1)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 1); + assert_eq!(fields[0].0, "barParent"); + // TODO: better validation + } + _ => panic!("Expected Value::Record"), + } + Ok(()) +} + +#[test] +fn test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_v1() -> TestResult { + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct BarParent { + pub bar: Bar, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barParent")] + pub bar_parent: Option, + } + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "name": "bar", + "type": { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ], + "default": "bar0" + } + } + ] + } + ] + } + ] + }"#; + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "name": "bar", + "type": { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar2" + ], + "default": "bar0" + } + } + ] + } + ] + } + ] + }"#; + let writer_schema = Schema::parse_str(writer_schema)?; + let foo1 = Foo { + bar_parent: Some(BarParent { bar: Bar::Bar1 }), + }; + let avro_value = crate::to_value(foo1)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 1); + assert_eq!(fields[0].0, "barParent"); + // TODO: better validation + } + _ => panic!("Expected Value::Record"), + } + Ok(()) +} + +#[test] +fn test_avro_3786_deserialize_union_with_different_enum_order_defined_in_record_v2() -> TestResult { + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct BarParent { + pub bar: Bar, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barParent")] + pub bar_parent: Option, + } + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "name": "bar", + "type": { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ], + "default": "bar2" + } + } + ] + } + ] + } + ] + }"#; + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "name": "bar", + "type": { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar1", + "bar2" + ], + "default": "bar2" + } + } + ] + } + ] + } + ] + }"#; + let writer_schema = Schema::parse_str(writer_schema)?; + let foo1 = Foo { + bar_parent: Some(BarParent { bar: Bar::Bar1 }), + }; + let avro_value = crate::to_value(foo1)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 1); + assert_eq!(fields[0].0, "barParent"); + // TODO: better validation + } + _ => panic!("Expected Value::Record"), + } + Ok(()) +} + +#[test] +fn deserialize_union_with_different_enum_order_defined_in_record() -> TestResult { + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct BarParent { + pub bar: Bar, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barParent")] + pub bar_parent: Option, + } + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "name": "bar", + "type": { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ], + "default": "bar0" + } + } + ] + } + ] + } + ] + }"#; + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "com.rallyhealth.devices.canonical.avro.model.v6_0", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "name": "bar", + "type": { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar2" + ], + "default": "bar0" + } + } + ] + } + ] + } + ] + }"#; + let writer_schema = Schema::parse_str(writer_schema)?; + let foo1 = Foo { + bar_parent: Some(BarParent { bar: Bar::Bar2 }), + }; + let avro_value = crate::to_value(foo1)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 1); + assert_eq!(fields[0].0, "barParent"); + // TODO: better validation + } + _ => panic!("Expected Value::Record"), + } + Ok(()) +} + +#[test] +fn deserialize_union_with_record_with_enum_defined_inline_reader_has_different_indices( +) -> TestResult { + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum DefinedInRecord { + #[serde(rename = "val0")] + Val0, + #[serde(rename = "val1")] + Val1, + #[serde(rename = "val2")] + Val2, + #[serde(rename = "UNKNOWN")] + Unknown, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Parent { + pub date: i64, + #[serde(rename = "barUse")] + pub bar_use: Bar, + #[serde(rename = "bazUse")] + pub baz_use: Option>, + #[serde(rename = "definedInRecord")] + pub defined_in_record: DefinedInRecord, + #[serde(rename = "optionalString")] + pub optional_string: Option, + } + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Baz { + #[serde(rename = "baz0")] + Baz0, + #[serde(rename = "baz1")] + Baz1, + #[serde(rename = "baz2")] + Baz2, + } + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barInit")] + pub bar_init: Bar, + pub baz: Baz, + pub parent: Option, + } + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "fake", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ], + "default": "bar0" + } + }, + { + "name": "baz", + "type": + { + "type": "enum", + "name": "Baz", + "symbols": + [ + "baz0", + "baz1", + "baz2" + ], + "default": "baz0" + } + }, + { + "name": "parent", + "type": [ + "null", + { + "type": "record", + "name": "Parent", + "fields": [ + { + "name": "date", + "type": { + "type": "long", + "avro.java.long": "Long" + } + }, + { + "name": "barUse", + "type": "Bar" + }, + { + "name": "bazUse", + "type": [ + "null", + { + "type": "array", + "items": { + "type": "Baz" + } + } + ] + }, + { + "name": "definedInRecord", + "type": { + "name": "DefinedInRecord", + "type": "enum", + "symbols": [ + "val0", + "val1", + "val2", + "UNKNOWN" + ], + "default": "UNKNOWN" + } + }, + { + "name": "optionalString", + "type": [ + "null", + "string" + ] + } + ] + } + ] + } + ] + }"#; + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "fake", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar2" + ], + "default": "bar0" + } + }, + { + "name": "baz", + "type": + { + "type": "enum", + "name": "Baz", + "symbols": + [ + "baz0", + "baz2" + ], + "default": "baz0" + } + }, + { + "name": "parent", + "type": [ + "null", + { + "type": "record", + "name": "Parent", + "fields": [ + { + "name": "date", + "type": { + "type": "long", + "avro.java.long": "Long" + } + }, + { + "name": "barUse", + "type": "Bar" + }, + { + "name": "bazUse", + "type": [ + "null", + { + "type": "array", + "items": { + "type": "Baz" + } + } + ] + }, + { + "name": "definedInRecord", + "type": { + "name": "DefinedInRecord", + "type": "enum", + "symbols": [ + "val1", + "val2", + "UNKNOWN" + ], + "default": "UNKNOWN" + } + }, + { + "name": "optionalString", + "type": [ + "null", + "string" + ] + } + ] + } + ] + } + ] + }"#; + let writer_schema = Schema::parse_str(writer_schema)?; + let foo1 = Foo { + bar_init: Bar::Bar0, + baz: Baz::Baz0, + parent: Some(Parent { + bar_use: Bar::Bar0, + baz_use: Some(vec![Baz::Baz0]), + optional_string: Some("test".to_string()), + date: 1689197893, + defined_in_record: DefinedInRecord::Val1, + }), + }; + let avro_value = crate::to_value(foo1)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = crate::to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = crate::from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 3); + assert_eq!(fields[0].0, "barInit"); + assert_eq!(fields[0].1, types::Value::Enum(0, "bar0".to_string())); + // TODO: better validation + } + _ => panic!("Expected Value::Record"), + } + Ok(()) +} diff --git a/lang/rust/avro/tests/avro-3787.rs b/lang/rust/avro/tests/avro-3787.rs new file mode 100644 index 00000000000..c08c3c6cce8 --- /dev/null +++ b/lang/rust/avro/tests/avro-3787.rs @@ -0,0 +1,279 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use apache_avro::{from_avro_datum, to_avro_datum, to_value, types, Schema}; +use apache_avro_test_helper::TestResult; + +#[test] +fn avro_3787_deserialize_union_with_unknown_symbol() -> TestResult { + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct BarUseParent { + #[serde(rename = "barUse")] + pub bar_use: Bar, + } + + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barInit")] + pub bar_init: Bar, + #[serde(rename = "barUseParent")] + pub bar_use_parent: Option, + } + + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ], + "default": "bar0" + } + }, + { + "name": "barUseParent", + "type": [ + "null", + { + "type": "record", + "name": "BarUseParent", + "fields": [ + { + "name": "barUse", + "type": "Bar" + } + ] + } + ] + } + ] + }"#; + + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1" + ], + "default": "bar0" + } + }, + { + "name": "barUseParent", + "type": [ + "null", + { + "type": "record", + "name": "BarUseParent", + "fields": [ + { + "name": "barUse", + "type": "Bar" + } + ] + } + ] + } + ] + }"#; + + let writer_schema = Schema::parse_str(writer_schema)?; + let foo1 = Foo { + bar_init: Bar::Bar1, + bar_use_parent: Some(BarUseParent { bar_use: Bar::Bar2 }), + }; + let avro_value = to_value(foo1)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 2); + assert_eq!(fields[0].0, "barInit"); + assert_eq!(fields[0].1, types::Value::Enum(1, "bar1".to_string())); + assert_eq!(fields[1].0, "barUseParent"); + // TODO: test value + } + _ => panic!("Expected Value::Record"), + } + + Ok(()) +} + +#[test] +fn avro_3787_deserialize_union_with_unknown_symbol_no_ref() -> TestResult { + #[derive( + Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, serde::Deserialize, serde::Serialize, + )] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + #[serde(default)] + pub struct BarParent { + #[serde(rename = "Bar")] + pub bar: Bar, + } + + #[inline(always)] + fn default_barparent_bar() -> Bar { + Bar::Bar0 + } + impl Default for BarParent { + fn default() -> BarParent { + BarParent { + bar: default_barparent_bar(), + } + } + } + + #[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] + pub struct Foo { + #[serde(rename = "barParent")] + pub bar_parent: Option, + } + + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ], + "default": "bar0" + } + ] + } + ] + } + ] + }"#; + + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "fields": + [ + { + "name": "barParent", + "type": [ + "null", + { + "type": "record", + "name": "BarParent", + "fields": [ + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1" + ], + "default": "bar0" + } + ] + } + ] + } + ] + }"#; + + let writer_schema = Schema::parse_str(writer_schema)?; + let foo2 = Foo { + bar_parent: Some(BarParent { bar: Bar::Bar2 }), + }; + let avro_value = to_value(foo2)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + types::Value::Record(fields) => { + assert_eq!(fields.len(), 1); + // assert_eq!(fields[0].0, "barInit"); + // assert_eq!(fields[0].1, types::Value::Enum(0, "bar0".to_string())); + assert_eq!(fields[0].0, "barParent"); + // assert_eq!(fields[1].1, types::Value::Enum(1, "bar1".to_string())); + } + _ => panic!("Expected Value::Record"), + } + + Ok(()) +} diff --git a/lang/rust/avro/tests/io.rs b/lang/rust/avro/tests/io.rs index 9d3dc7b79f3..ab3712893bc 100644 --- a/lang/rust/avro/tests/io.rs +++ b/lang/rust/avro/tests/io.rs @@ -17,6 +17,7 @@ //! Port of https://github.com/apache/avro/blob/release-1.9.1/lang/py/test/test_io.py use apache_avro::{from_avro_datum, to_avro_datum, types::Value, Error, Schema}; +use apache_avro_test_helper::TestResult; use lazy_static::lazy_static; use pretty_assertions::assert_eq; use std::io::Cursor; @@ -99,47 +100,55 @@ lazy_static! { } #[test] -fn test_validate() { +fn test_validate() -> TestResult { for (raw_schema, value) in SCHEMAS_TO_VALIDATE.iter() { - let schema = Schema::parse_str(raw_schema).unwrap(); + let schema = Schema::parse_str(raw_schema)?; assert!( value.validate(&schema), "value {value:?} does not validate schema: {raw_schema}" ); } + + Ok(()) } #[test] -fn test_round_trip() { +fn test_round_trip() -> TestResult { for (raw_schema, value) in SCHEMAS_TO_VALIDATE.iter() { - let schema = Schema::parse_str(raw_schema).unwrap(); + let schema = Schema::parse_str(raw_schema)?; let encoded = to_avro_datum(&schema, value.clone()).unwrap(); let decoded = from_avro_datum(&schema, &mut Cursor::new(encoded), None).unwrap(); assert_eq!(value, &decoded); } + + Ok(()) } #[test] -fn test_binary_int_encoding() { +fn test_binary_int_encoding() -> TestResult { for (number, hex_encoding) in BINARY_ENCODINGS.iter() { - let encoded = to_avro_datum(&Schema::Int, Value::Int(*number as i32)).unwrap(); + let encoded = to_avro_datum(&Schema::Int, Value::Int(*number as i32))?; assert_eq!(&encoded, hex_encoding); } + + Ok(()) } #[test] -fn test_binary_long_encoding() { +fn test_binary_long_encoding() -> TestResult { for (number, hex_encoding) in BINARY_ENCODINGS.iter() { - let encoded = to_avro_datum(&Schema::Long, Value::Long(*number)).unwrap(); + let encoded = to_avro_datum(&Schema::Long, Value::Long(*number))?; assert_eq!(&encoded, hex_encoding); } + + Ok(()) } #[test] -fn test_schema_promotion() { +fn test_schema_promotion() -> TestResult { // Each schema is present in order of promotion (int -> long, long -> float, float -> double) // Each value represents the expected decoded value when promoting a value previously encoded with a promotable schema - let promotable_schemas = vec![r#""int""#, r#""long""#, r#""float""#, r#""double""#]; + let promotable_schemas = [r#""int""#, r#""long""#, r#""float""#, r#""double""#]; let promotable_values = vec![ Value::Int(219), Value::Long(219), @@ -147,11 +156,11 @@ fn test_schema_promotion() { Value::Double(219.0), ]; for (i, writer_raw_schema) in promotable_schemas.iter().enumerate() { - let writer_schema = Schema::parse_str(writer_raw_schema).unwrap(); + let writer_schema = Schema::parse_str(writer_raw_schema)?; let original_value = &promotable_values[i]; for (j, reader_raw_schema) in promotable_schemas.iter().enumerate().skip(i + 1) { - let reader_schema = Schema::parse_str(reader_raw_schema).unwrap(); - let encoded = to_avro_datum(&writer_schema, original_value.clone()).unwrap(); + let reader_schema = Schema::parse_str(reader_raw_schema)?; + let encoded = to_avro_datum(&writer_schema, original_value.clone())?; let decoded = from_avro_datum( &writer_schema, &mut Cursor::new(encoded), @@ -163,28 +172,30 @@ fn test_schema_promotion() { assert_eq!(decoded, promotable_values[j]); } } + + Ok(()) } #[test] -fn test_unknown_symbol() { +fn test_unknown_symbol() -> TestResult { let writer_schema = - Schema::parse_str(r#"{"type": "enum", "name": "Test", "symbols": ["FOO", "BAR"]}"#) - .unwrap(); + Schema::parse_str(r#"{"type": "enum", "name": "Test", "symbols": ["FOO", "BAR"]}"#)?; let reader_schema = - Schema::parse_str(r#"{"type": "enum", "name": "Test", "symbols": ["BAR", "BAZ"]}"#) - .unwrap(); + Schema::parse_str(r#"{"type": "enum", "name": "Test", "symbols": ["BAR", "BAZ"]}"#)?; let original_value = Value::Enum(0, "FOO".to_string()); - let encoded = to_avro_datum(&writer_schema, original_value).unwrap(); + let encoded = to_avro_datum(&writer_schema, original_value)?; let decoded = from_avro_datum( &writer_schema, &mut Cursor::new(encoded), Some(&reader_schema), ); assert!(decoded.is_err()); + + Ok(()) } #[test] -fn test_default_value() { +fn test_default_value() -> TestResult { for (field_type, default_json, default_datum) in DEFAULT_VALUE_EXAMPLES.iter() { let reader_schema = Schema::parse_str(&format!( r#"{{ @@ -194,26 +205,26 @@ fn test_default_value() { {{"name": "H", "type": {field_type}, "default": {default_json}}} ] }}"# - )) - .unwrap(); + ))?; let datum_to_read = Value::Record(vec![("H".to_string(), default_datum.clone())]); - let encoded = to_avro_datum(&LONG_RECORD_SCHEMA, LONG_RECORD_DATUM.clone()).unwrap(); + let encoded = to_avro_datum(&LONG_RECORD_SCHEMA, LONG_RECORD_DATUM.clone())?; let datum_read = from_avro_datum( &LONG_RECORD_SCHEMA, &mut Cursor::new(encoded), Some(&reader_schema), - ) - .unwrap(); + )?; assert_eq!( datum_read, datum_to_read, "{} -> {}", *field_type, *default_json ); } + + Ok(()) } #[test] -fn test_no_default_value() { +fn test_no_default_value() -> TestResult { let reader_schema = Schema::parse_str( r#"{ "type": "record", @@ -222,19 +233,20 @@ fn test_no_default_value() { {"name": "H", "type": "int"} ] }"#, - ) - .unwrap(); - let encoded = to_avro_datum(&LONG_RECORD_SCHEMA, LONG_RECORD_DATUM.clone()).unwrap(); + )?; + let encoded = to_avro_datum(&LONG_RECORD_SCHEMA, LONG_RECORD_DATUM.clone())?; let result = from_avro_datum( &LONG_RECORD_SCHEMA, &mut Cursor::new(encoded), Some(&reader_schema), ); assert!(result.is_err()); + + Ok(()) } #[test] -fn test_projection() { +fn test_projection() -> TestResult { let reader_schema = Schema::parse_str( r#" { @@ -246,24 +258,24 @@ fn test_projection() { ] } "#, - ) - .unwrap(); + )?; let datum_to_read = Value::Record(vec![ ("E".to_string(), Value::Int(5)), ("F".to_string(), Value::Int(6)), ]); - let encoded = to_avro_datum(&LONG_RECORD_SCHEMA, LONG_RECORD_DATUM.clone()).unwrap(); + let encoded = to_avro_datum(&LONG_RECORD_SCHEMA, LONG_RECORD_DATUM.clone())?; let datum_read = from_avro_datum( &LONG_RECORD_SCHEMA, &mut Cursor::new(encoded), Some(&reader_schema), - ) - .unwrap(); + )?; assert_eq!(datum_to_read, datum_read); + + Ok(()) } #[test] -fn test_field_order() { +fn test_field_order() -> TestResult { let reader_schema = Schema::parse_str( r#" { @@ -275,20 +287,20 @@ fn test_field_order() { ] } "#, - ) - .unwrap(); + )?; let datum_to_read = Value::Record(vec![ ("F".to_string(), Value::Int(6)), ("E".to_string(), Value::Int(5)), ]); - let encoded = to_avro_datum(&LONG_RECORD_SCHEMA, LONG_RECORD_DATUM.clone()).unwrap(); + let encoded = to_avro_datum(&LONG_RECORD_SCHEMA, LONG_RECORD_DATUM.clone())?; let datum_read = from_avro_datum( &LONG_RECORD_SCHEMA, &mut Cursor::new(encoded), Some(&reader_schema), - ) - .unwrap(); + )?; assert_eq!(datum_to_read, datum_read); + + Ok(()) } #[test] diff --git a/lang/rust/avro/tests/schema.rs b/lang/rust/avro/tests/schema.rs index 95ba18c2f4b..63b73056084 100644 --- a/lang/rust/avro/tests/schema.rs +++ b/lang/rust/avro/tests/schema.rs @@ -15,13 +15,19 @@ // specific language governing permissions and limitations // under the License. +use std::{ + collections::HashMap, + io::{Cursor, Read}, +}; + use apache_avro::{ - schema::{Name, RecordField}, + from_avro_datum, from_value, + schema::{EnumSchema, FixedSchema, Name, RecordField, RecordSchema}, to_avro_datum, to_value, types::{Record, Value}, Codec, Error, Reader, Schema, Writer, }; -use apache_avro_test_helper::init; +use apache_avro_test_helper::{init, TestResult}; use lazy_static::lazy_static; const PRIMITIVE_EXAMPLES: &[(&str, bool)] = &[ @@ -586,6 +592,42 @@ const TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[ ), ]; +const LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE: &[(&str, bool)] = &[ + ( + r#"{"type": "long", "logicalType": "local-timestamp-millis"}"#, + true, + ), + // this is valid even though its logical type is "local-timestamp-milis" (missing the second "l"), because + // unknown logical types are ignored + ( + r#"{"type": "long", "logicalType": "local-timestamp-milis"}"#, + true, + ), + ( + // this is still valid because unknown logicalType should be ignored + r#"{"type": "int", "logicalType": "local-timestamp-millis"}"#, + true, + ), +]; + +const LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE: &[(&str, bool)] = &[ + ( + r#"{"type": "long", "logicalType": "local-timestamp-micros"}"#, + true, + ), + // this is valid even though its logical type is "local-timestamp-micro" (missing the last "s"), because + // unknown logical types are ignored + ( + r#"{"type": "long", "logicalType": "local-timestamp-micro"}"#, + true, + ), + ( + // this is still valid because unknown logicalType should be ignored + r#"{"type": "int", "logicalType": "local-timestamp-micros"}"#, + true, + ), +]; + lazy_static! { static ref EXAMPLES: Vec<(&'static str, bool)> = Vec::new() .iter() @@ -606,13 +648,15 @@ lazy_static! { .chain(TIMEMICROS_LOGICAL_TYPE.iter().copied()) .chain(TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied()) .chain(TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied()) + .chain(LOCAL_TIMESTAMPMILLIS_LOGICAL_TYPE.iter().copied()) + .chain(LOCAL_TIMESTAMPMICROS_LOGICAL_TYPE.iter().copied()) .collect(); static ref VALID_EXAMPLES: Vec<(&'static str, bool)> = EXAMPLES.iter().copied().filter(|s| s.1).collect(); } #[test] -fn test_correct_recursive_extraction() { +fn test_correct_recursive_extraction() -> TestResult { init(); let raw_outer_schema = r#"{ "type": "record", @@ -633,22 +677,22 @@ fn test_correct_recursive_extraction() { } ] }"#; - let outer_schema = Schema::parse_str(raw_outer_schema).unwrap(); - if let Schema::Record { + let outer_schema = Schema::parse_str(raw_outer_schema)?; + if let Schema::Record(RecordSchema { fields: outer_fields, .. - } = outer_schema + }) = outer_schema { let inner_schema = &outer_fields[0].schema; - if let Schema::Record { + if let Schema::Record(RecordSchema { fields: inner_fields, .. - } = inner_schema + }) = inner_schema { - if let Schema::Record { + if let Schema::Record(RecordSchema { name: recursive_type, .. - } = &inner_fields[0].schema + }) = &inner_fields[0].schema { assert_eq!("X", recursive_type.name.as_str()); } @@ -658,10 +702,12 @@ fn test_correct_recursive_extraction() { } else { panic!("outer schema {outer_schema:?} should have been a record") } + + Ok(()) } #[test] -fn test_parse() { +fn test_parse() -> TestResult { init(); for (raw_schema, valid) in EXAMPLES.iter() { let schema = Schema::parse_str(raw_schema); @@ -677,36 +723,93 @@ fn test_parse() { ) } } + Ok(()) +} + +#[test] +fn test_3799_parse_reader() -> TestResult { + init(); + for (raw_schema, valid) in EXAMPLES.iter() { + let schema = Schema::parse_reader(&mut Cursor::new(raw_schema)); + if *valid { + assert!( + schema.is_ok(), + "schema {raw_schema} was supposed to be valid; error: {schema:?}", + ) + } else { + assert!( + schema.is_err(), + "schema {raw_schema} was supposed to be invalid" + ) + } + } + + // Ensure it works for trait objects too. + for (raw_schema, valid) in EXAMPLES.iter() { + let reader: &mut dyn Read = &mut Cursor::new(raw_schema); + let schema = Schema::parse_reader(reader); + if *valid { + assert!( + schema.is_ok(), + "schema {raw_schema} was supposed to be valid; error: {schema:?}", + ) + } else { + assert!( + schema.is_err(), + "schema {raw_schema} was supposed to be invalid" + ) + } + } + Ok(()) +} + +#[test] +fn test_3799_raise_io_error_from_parse_read() -> Result<(), String> { + // 0xDF is invalid for UTF-8. + let mut invalid_data = Cursor::new([0xDF]); + + let error = Schema::parse_reader(&mut invalid_data).unwrap_err(); + + if let Error::ReadSchemaFromReader(e) = error { + assert!( + e.to_string().contains("stream did not contain valid UTF-8"), + "{e}" + ); + Ok(()) + } else { + Err(format!("Expected std::io::Error, got {error:?}")) + } } #[test] /// Test that the string generated by an Avro Schema object is, in fact, a valid Avro schema. -fn test_valid_cast_to_string_after_parse() { +fn test_valid_cast_to_string_after_parse() -> TestResult { init(); for (raw_schema, _) in VALID_EXAMPLES.iter() { - let schema = Schema::parse_str(raw_schema).unwrap(); - Schema::parse_str(schema.canonical_form().as_str()).unwrap(); + let schema = Schema::parse_str(raw_schema)?; + Schema::parse_str(schema.canonical_form().as_str())?; } + Ok(()) } #[test] /// 1. Given a string, parse it to get Avro schema "original". /// 2. Serialize "original" to a string and parse that string to generate Avro schema "round trip". /// 3. Ensure "original" and "round trip" schemas are equivalent. -fn test_equivalence_after_round_trip() { +fn test_equivalence_after_round_trip() -> TestResult { init(); for (raw_schema, _) in VALID_EXAMPLES.iter() { - let original_schema = Schema::parse_str(raw_schema).unwrap(); - let round_trip_schema = - Schema::parse_str(original_schema.canonical_form().as_str()).unwrap(); + let original_schema = Schema::parse_str(raw_schema)?; + let round_trip_schema = Schema::parse_str(original_schema.canonical_form().as_str())?; assert_eq!(original_schema, round_trip_schema); } + Ok(()) } #[test] /// Test that a list of schemas whose definitions do not depend on each other produces the same /// result as parsing each element of the list individually -fn test_parse_list_without_cross_deps() { +fn test_parse_list_without_cross_deps() -> TestResult { init(); let schema_str_1 = r#"{ "name": "A", @@ -721,12 +824,13 @@ fn test_parse_list_without_cross_deps() { "size": 16 }"#; let schema_strs = [schema_str_1, schema_str_2]; - let schemas = Schema::parse_list(&schema_strs).expect("Test failed"); + let schemas = Schema::parse_list(&schema_strs)?; for schema_str in &schema_strs { - let parsed = Schema::parse_str(schema_str).expect("Test failed"); + let parsed = Schema::parse_str(schema_str)?; assert!(schemas.contains(&parsed)); } + Ok(()) } #[test] @@ -734,7 +838,7 @@ fn test_parse_list_without_cross_deps() { /// perform the necessary schema composition. This should work regardless of the order in which /// the schemas are input. /// However, the output order is guaranteed to be the same as the input order. -fn test_parse_list_with_cross_deps_basic() { +fn test_parse_list_with_cross_deps_basic() -> TestResult { init(); let schema_a_str = r#"{ "name": "A", @@ -753,15 +857,16 @@ fn test_parse_list_with_cross_deps_basic() { let schema_strs_first = [schema_a_str, schema_b_str]; let schema_strs_second = [schema_b_str, schema_a_str]; - let schemas_first = Schema::parse_list(&schema_strs_first).expect("Test failed"); - let schemas_second = Schema::parse_list(&schema_strs_second).expect("Test failed"); + let schemas_first = Schema::parse_list(&schema_strs_first)?; + let schemas_second = Schema::parse_list(&schema_strs_second)?; assert_eq!(schemas_first[0], schemas_second[1]); assert_eq!(schemas_first[1], schemas_second[0]); + Ok(()) } #[test] -fn test_parse_list_recursive_type() { +fn test_parse_list_recursive_type() -> TestResult { init(); let schema_str_1 = r#"{ "name": "A", @@ -781,13 +886,14 @@ fn test_parse_list_recursive_type() { }"#; let schema_strs_first = [schema_str_1, schema_str_2]; let schema_strs_second = [schema_str_2, schema_str_1]; - let _ = Schema::parse_list(&schema_strs_first).expect("Test failed"); - let _ = Schema::parse_list(&schema_strs_second).expect("Test failed"); + let _ = Schema::parse_list(&schema_strs_first)?; + let _ = Schema::parse_list(&schema_strs_second)?; + Ok(()) } #[test] /// Test that schema composition resolves namespaces. -fn test_parse_list_with_cross_deps_and_namespaces() { +fn test_parse_list_with_cross_deps_and_namespaces() -> TestResult { init(); let schema_a_str = r#"{ "name": "A", @@ -805,16 +911,18 @@ fn test_parse_list_with_cross_deps_and_namespaces() { ] }"#; - let schemas_first = Schema::parse_list(&[schema_a_str, schema_b_str]).expect("Test failed"); - let schemas_second = Schema::parse_list(&[schema_b_str, schema_a_str]).expect("Test failed"); + let schemas_first = Schema::parse_list(&[schema_a_str, schema_b_str])?; + let schemas_second = Schema::parse_list(&[schema_b_str, schema_a_str])?; assert_eq!(schemas_first[0], schemas_second[1]); assert_eq!(schemas_first[1], schemas_second[0]); + + Ok(()) } #[test] /// Test that schema composition fails on namespace errors. -fn test_parse_list_with_cross_deps_and_namespaces_error() { +fn test_parse_list_with_cross_deps_and_namespaces_error() -> TestResult { init(); let schema_str_1 = r#"{ "name": "A", @@ -836,12 +944,14 @@ fn test_parse_list_with_cross_deps_and_namespaces_error() { let schema_strs_second = [schema_str_2, schema_str_1]; let _ = Schema::parse_list(&schema_strs_first).expect_err("Test failed"); let _ = Schema::parse_list(&schema_strs_second).expect_err("Test failed"); + + Ok(()) } #[test] // // test that field's RecordSchema could be referenced by a following field by full name -fn test_parse_reused_record_schema_by_fullname() { +fn test_parse_reused_record_schema_by_fullname() -> TestResult { init(); let schema_str = r#" { @@ -879,15 +989,15 @@ fn test_parse_reused_record_schema_by_fullname() { let schema = Schema::parse_str(schema_str); assert!(schema.is_ok()); - match schema.unwrap() { - Schema::Record { + match schema? { + Schema::Record(RecordSchema { ref name, aliases: _, doc: _, ref fields, lookup: _, attributes: _, - } => { + }) => { assert_eq!(name.fullname(None), "test.Weather", "Name does not match!"); assert_eq!(fields.len(), 3, "The number of the fields is not correct!"); @@ -914,6 +1024,8 @@ fn test_parse_reused_record_schema_by_fullname() { } unexpected => unreachable!("Unexpected schema type: {:?}", unexpected), } + + Ok(()) } /// Return all permutations of an input slice @@ -956,7 +1068,7 @@ fn permutation_indices(indices: Vec) -> Vec> { #[test] /// Test that a type that depends on more than one other type is parsed correctly when all /// definitions are passed in as a list. This should work regardless of the ordering of the list. -fn test_parse_list_multiple_dependencies() { +fn test_parse_list_multiple_dependencies() -> TestResult { init(); let schema_a_str = r#"{ "name": "A", @@ -978,23 +1090,23 @@ fn test_parse_list_multiple_dependencies() { ] }"#; - let parsed = - Schema::parse_list(&[schema_a_str, schema_b_str, schema_c_str]).expect("Test failed"); + let parsed = Schema::parse_list(&[schema_a_str, schema_b_str, schema_c_str])?; let schema_strs = vec![schema_a_str, schema_b_str, schema_c_str]; for schema_str_perm in permutations(&schema_strs) { let schema_str_perm: Vec<&str> = schema_str_perm.iter().map(|s| **s).collect(); - let schemas = Schema::parse_list(&schema_str_perm).expect("Test failed"); + let schemas = Schema::parse_list(&schema_str_perm)?; assert_eq!(schemas.len(), 3); for parsed_schema in &parsed { assert!(schemas.contains(parsed_schema)); } } + Ok(()) } #[test] /// Test that a type that is depended on by more than one other type is parsed correctly when all /// definitions are passed in as a list. This should work regardless of the ordering of the list. -fn test_parse_list_shared_dependency() { +fn test_parse_list_shared_dependency() -> TestResult { init(); let schema_a_str = r#"{ "name": "A", @@ -1018,22 +1130,22 @@ fn test_parse_list_shared_dependency() { ] }"#; - let parsed = - Schema::parse_list(&[schema_a_str, schema_b_str, schema_c_str]).expect("Test failed"); + let parsed = Schema::parse_list(&[schema_a_str, schema_b_str, schema_c_str])?; let schema_strs = vec![schema_a_str, schema_b_str, schema_c_str]; for schema_str_perm in permutations(&schema_strs) { let schema_str_perm: Vec<&str> = schema_str_perm.iter().map(|s| **s).collect(); - let schemas = Schema::parse_list(&schema_str_perm).expect("Test failed"); + let schemas = Schema::parse_list(&schema_str_perm)?; assert_eq!(schemas.len(), 3); for parsed_schema in &parsed { assert!(schemas.contains(parsed_schema)); } } + Ok(()) } #[test] /// Test that trying to parse two schemas with the same fullname returns an Error -fn test_name_collision_error() { +fn test_name_collision_error() -> TestResult { init(); let schema_str_1 = r#"{ "name": "foo.A", @@ -1052,11 +1164,12 @@ fn test_name_collision_error() { }"#; let _ = Schema::parse_list(&[schema_str_1, schema_str_2]).expect_err("Test failed"); + Ok(()) } #[test] /// Test that having the same name but different fullnames does not return an error -fn test_namespace_prevents_collisions() { +fn test_namespace_prevents_collisions() -> TestResult { init(); let schema_str_1 = r#"{ "name": "A", @@ -1074,10 +1187,11 @@ fn test_namespace_prevents_collisions() { ] }"#; - let parsed = Schema::parse_list(&[schema_str_1, schema_str_2]).expect("Test failed"); - let parsed_1 = Schema::parse_str(schema_str_1).expect("Test failed"); - let parsed_2 = Schema::parse_str(schema_str_2).expect("Test failed"); + let parsed = Schema::parse_list(&[schema_str_1, schema_str_2])?; + let parsed_1 = Schema::parse_str(schema_str_1)?; + let parsed_2 = Schema::parse_str(schema_str_2)?; assert_eq!(parsed, vec!(parsed_1, parsed_2)); + Ok(()) } // The fullname is determined in one of the following ways: @@ -1106,116 +1220,125 @@ fn test_namespace_prevents_collisions() { // equivalent. #[test] -fn test_fullname_name_and_namespace_specified() { +fn test_fullname_name_and_namespace_specified() -> TestResult { init(); let name: Name = - serde_json::from_str(r#"{"name": "a", "namespace": "o.a.h", "aliases": null}"#).unwrap(); + serde_json::from_str(r#"{"name": "a", "namespace": "o.a.h", "aliases": null}"#)?; let fullname = name.fullname(None); assert_eq!("o.a.h.a", fullname); + Ok(()) } #[test] -fn test_fullname_fullname_and_namespace_specified() { +fn test_fullname_fullname_and_namespace_specified() -> TestResult { init(); - let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": "o.a.h"}"#).unwrap(); + let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": "o.a.h"}"#)?; assert_eq!(&name.name, "d"); assert_eq!(name.namespace, Some("a.b.c".to_owned())); let fullname = name.fullname(None); assert_eq!("a.b.c.d", fullname); + Ok(()) } #[test] -fn test_fullname_name_and_default_namespace_specified() { +fn test_fullname_name_and_default_namespace_specified() -> TestResult { init(); - let name: Name = serde_json::from_str(r#"{"name": "a", "namespace": null}"#).unwrap(); + let name: Name = serde_json::from_str(r#"{"name": "a", "namespace": null}"#)?; assert_eq!(&name.name, "a"); assert_eq!(name.namespace, None); let fullname = name.fullname(Some("b.c.d".into())); assert_eq!("b.c.d.a", fullname); + Ok(()) } #[test] -fn test_fullname_fullname_and_default_namespace_specified() { +fn test_fullname_fullname_and_default_namespace_specified() -> TestResult { init(); - let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": null}"#).unwrap(); + let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": null}"#)?; assert_eq!(&name.name, "d"); assert_eq!(name.namespace, Some("a.b.c".to_owned())); let fullname = name.fullname(Some("o.a.h".into())); assert_eq!("a.b.c.d", fullname); + Ok(()) } #[test] -fn test_avro_3452_parsing_name_without_namespace() { +fn test_avro_3452_parsing_name_without_namespace() -> TestResult { init(); - let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d"}"#).unwrap(); + let name: Name = serde_json::from_str(r#"{"name": "a.b.c.d"}"#)?; assert_eq!(&name.name, "d"); assert_eq!(name.namespace, Some("a.b.c".to_owned())); let fullname = name.fullname(None); assert_eq!("a.b.c.d", fullname); + Ok(()) } #[test] -fn test_avro_3452_parsing_name_with_leading_dot_without_namespace() { +fn test_avro_3452_parsing_name_with_leading_dot_without_namespace() -> TestResult { init(); - let name: Name = serde_json::from_str(r#"{"name": ".a"}"#).unwrap(); + let name: Name = serde_json::from_str(r#"{"name": ".a"}"#)?; assert_eq!(&name.name, "a"); assert_eq!(name.namespace, None); assert_eq!("a", name.fullname(None)); + Ok(()) } #[test] -fn test_avro_3452_parse_json_without_name_field() { +fn test_avro_3452_parse_json_without_name_field() -> TestResult { init(); let result: serde_json::error::Result = serde_json::from_str(r#"{"unknown": "a"}"#); assert!(&result.is_err()); assert_eq!(result.unwrap_err().to_string(), "No `name` field"); + Ok(()) } #[test] -fn test_fullname_fullname_namespace_and_default_namespace_specified() { +fn test_fullname_fullname_namespace_and_default_namespace_specified() -> TestResult { init(); let name: Name = - serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": "o.a.a", "aliases": null}"#) - .unwrap(); + serde_json::from_str(r#"{"name": "a.b.c.d", "namespace": "o.a.a", "aliases": null}"#)?; assert_eq!(&name.name, "d"); assert_eq!(name.namespace, Some("a.b.c".to_owned())); let fullname = name.fullname(Some("o.a.h".into())); assert_eq!("a.b.c.d", fullname); + Ok(()) } #[test] -fn test_fullname_name_namespace_and_default_namespace_specified() { +fn test_fullname_name_namespace_and_default_namespace_specified() -> TestResult { init(); let name: Name = - serde_json::from_str(r#"{"name": "a", "namespace": "o.a.a", "aliases": null}"#).unwrap(); + serde_json::from_str(r#"{"name": "a", "namespace": "o.a.a", "aliases": null}"#)?; assert_eq!(&name.name, "a"); assert_eq!(name.namespace, Some("o.a.a".to_owned())); let fullname = name.fullname(Some("o.a.h".into())); assert_eq!("o.a.a.a", fullname); + Ok(()) } #[test] -fn test_doc_attributes() { +fn test_doc_attributes() -> TestResult { init(); fn assert_doc(schema: &Schema) { match schema { - Schema::Enum { doc, .. } => assert!(doc.is_some()), - Schema::Record { doc, .. } => assert!(doc.is_some()), - Schema::Fixed { doc, .. } => assert!(doc.is_some()), + Schema::Enum(EnumSchema { doc, .. }) => assert!(doc.is_some()), + Schema::Record(RecordSchema { doc, .. }) => assert!(doc.is_some()), + Schema::Fixed(FixedSchema { doc, .. }) => assert!(doc.is_some()), Schema::String => (), _ => unreachable!("Unexpected schema type: {:?}", schema), } } for (raw_schema, _) in DOC_EXAMPLES.iter() { - let original_schema = Schema::parse_str(raw_schema).unwrap(); + let original_schema = Schema::parse_str(raw_schema)?; assert_doc(&original_schema); - if let Schema::Record { fields, .. } = original_schema { + if let Schema::Record(RecordSchema { fields, .. }) = original_schema { for f in fields { assert_doc(&f.schema) } } } + Ok(()) } /* @@ -1233,17 +1356,17 @@ fn test_other_attributes() { } for (raw_schema, _) in OTHER_ATTRIBUTES_EXAMPLES.iter() { - let schema = Schema::parse_str(raw_schema).unwrap(); + let schema = Schema::parse_str(raw_schema)?; // all inputs have at least some user-defined attributes assert!(schema.other_attributes.is_some()); - for prop in schema.other_attributes.unwrap().iter() { + for prop in schema.other_attributes?.iter() { assert_attribute_type(prop); } if let Schema::Record { fields, .. } = schema { for f in fields { // all fields in the record have at least some user-defined attributes assert!(f.schema.other_attributes.is_some()); - for prop in f.schema.other_attributes.unwrap().iter() { + for prop in f.schema.other_attributes?.iter() { assert_attribute_type(prop); } } @@ -1272,7 +1395,7 @@ fn test_root_error_is_not_swallowed_on_parse_error() -> Result<(), String> { // AVRO-3302 #[test] -fn test_record_schema_with_cyclic_references() { +fn test_record_schema_with_cyclic_references() -> TestResult { init(); let schema = Schema::parse_str( r#" @@ -1292,8 +1415,7 @@ fn test_record_schema_with_cyclic_references() { }] } "#, - ) - .unwrap(); + )?; let mut datum = Record::new(&schema).unwrap(); datum.put( @@ -1326,16 +1448,17 @@ fn test_record_schema_with_cyclic_references() { if let Err(err) = writer.append(datum) { panic!("An error occurred while writing datum: {err:?}") } - let bytes = writer.into_inner().unwrap(); + let bytes = writer.into_inner()?; assert_eq!(316, bytes.len()); match Reader::new(&mut bytes.as_slice()) { Ok(mut reader) => match reader.next() { - Some(value) => log::debug!("{:?}", value.unwrap()), + Some(value) => log::debug!("{:?}", value?), None => panic!("No value was read!"), }, Err(err) => panic!("An error occurred while reading datum: {err:?}"), } + Ok(()) } /* @@ -1343,12 +1466,12 @@ fn test_record_schema_with_cyclic_references() { #[test] fn test_decimal_valid_type_attributes() { init(); - let fixed_decimal = Schema::parse_str(DECIMAL_LOGICAL_TYPE_ATTRIBUTES[0]).unwrap(); + let fixed_decimal = Schema::parse_str(DECIMAL_LOGICAL_TYPE_ATTRIBUTES[0])?; assert_eq!(4, fixed_decimal.get_attribute("precision")); assert_eq!(2, fixed_decimal.get_attribute("scale")); assert_eq!(2, fixed_decimal.get_attribute("size")); - let bytes_decimal = Schema::parse_str(DECIMAL_LOGICAL_TYPE_ATTRIBUTES[1]).unwrap(); + let bytes_decimal = Schema::parse_str(DECIMAL_LOGICAL_TYPE_ATTRIBUTES[1])?; assert_eq!(4, bytes_decimal.get_attribute("precision")); assert_eq!(0, bytes_decimal.get_attribute("scale")); } @@ -1356,7 +1479,7 @@ fn test_decimal_valid_type_attributes() { // https://github.com/flavray/avro-rs/issues/47 #[test] -fn avro_old_issue_47() { +fn avro_old_issue_47() -> TestResult { init(); let schema_str = r#" { @@ -1367,11 +1490,11 @@ fn avro_old_issue_47() { {"name": "b", "type": "string"} ] }"#; - let schema = Schema::parse_str(schema_str).unwrap(); + let schema = Schema::parse_str(schema_str)?; use serde::{Deserialize, Serialize}; - #[derive(Deserialize, Serialize)] + #[derive(Deserialize, Serialize, Debug, Clone, PartialEq, Eq)] pub struct MyRecord { b: String, a: i64, @@ -1382,5 +1505,1152 @@ fn avro_old_issue_47() { a: 1, }; - let _ = to_avro_datum(&schema, to_value(record).unwrap()).unwrap(); + let ser_value = to_value(record.clone())?; + let serialized_bytes = to_avro_datum(&schema, ser_value)?; + + let de_value = &from_avro_datum(&schema, &mut &*serialized_bytes, None)?; + let deserialized_record = from_value::(de_value)?; + + assert_eq!(record, deserialized_record); + Ok(()) +} + +#[test] +fn test_avro_3785_deserialize_namespace_with_nullable_type_containing_reference_type() -> TestResult +{ + use apache_avro::{from_avro_datum, to_avro_datum, types::Value}; + use serde::{Deserialize, Serialize}; + + #[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] + pub struct BarUseParent { + #[serde(rename = "barUse")] + pub bar_use: Bar, + } + + #[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Clone, Deserialize, Serialize)] + pub enum Bar { + #[serde(rename = "bar0")] + Bar0, + #[serde(rename = "bar1")] + Bar1, + #[serde(rename = "bar2")] + Bar2, + } + + #[derive(Debug, PartialEq, Eq, Clone, Deserialize, Serialize)] + pub struct Foo { + #[serde(rename = "barInit")] + pub bar_init: Bar, + #[serde(rename = "barUseParent")] + pub bar_use_parent: Option, + } + + let writer_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "name.space", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1", + "bar2" + ] + } + }, + { + "name": "barUseParent", + "type": [ + "null", + { + "type": "record", + "name": "BarUseParent", + "fields": [ + { + "name": "barUse", + "type": "Bar" + } + ] + } + ] + } + ] + }"#; + + let reader_schema = r#"{ + "type": "record", + "name": "Foo", + "namespace": "name.space", + "fields": + [ + { + "name": "barInit", + "type": + { + "type": "enum", + "name": "Bar", + "symbols": + [ + "bar0", + "bar1" + ] + } + }, + { + "name": "barUseParent", + "type": [ + "null", + { + "type": "record", + "name": "BarUseParent", + "fields": [ + { + "name": "barUse", + "type": "Bar" + } + ] + } + ] + } + ] + }"#; + + let writer_schema = Schema::parse_str(writer_schema)?; + let foo1 = Foo { + bar_init: Bar::Bar0, + bar_use_parent: Some(BarUseParent { bar_use: Bar::Bar1 }), + }; + let avro_value = crate::to_value(foo1)?; + assert!( + avro_value.validate(&writer_schema), + "value is valid for schema", + ); + let datum = to_avro_datum(&writer_schema, avro_value)?; + let mut x = &datum[..]; + let reader_schema = Schema::parse_str(reader_schema)?; + let deser_value = from_avro_datum(&writer_schema, &mut x, Some(&reader_schema))?; + match deser_value { + Value::Record(fields) => { + assert_eq!(fields.len(), 2); + } + _ => panic!("Expected Value::Record"), + } + + Ok(()) +} + +#[test] +fn test_avro_3847_union_field_with_default_value_of_ref() -> TestResult { + // Test for reference to Record + let writer_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Record(vec![("f1_1".to_string(), 10.into())])); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + }, { + "name": "f2", + "type": ["record2", "int"], + "default": { + "f1_1": 100 + } + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ( + "f1".to_string(), + Value::Record(vec![("f1_1".to_string(), 10.into())]), + ), + ( + "f2".to_string(), + Value::Union( + 0, + Box::new(Value::Record(vec![("f1_1".to_string(), 100.into())])), + ), + ), + ]); + + assert_eq!(expected, result[0]); + + // Test for reference to Enum + let writer_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a", "b"] + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Enum(1, "b".to_string())); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a", "b"] + } + }, { + "name": "f2", + "type": ["enum1", "int"], + "default": "a" + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Enum(1, "b".to_string())), + ( + "f2".to_string(), + Value::Union(0, Box::new(Value::Enum(0, "a".to_string()))), + ), + ]); + + assert_eq!(expected, result[0]); + + // Test for reference to Fixed + let writer_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "fixed1", + "type": "fixed", + "size": 3 + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Fixed(3, vec![0, 1, 2])); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "fixed1", + "type": "fixed", + "size": 3 + } + }, { + "name": "f2", + "type": ["fixed1", "int"], + "default": "abc" + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Fixed(3, vec![0, 1, 2])), + ( + "f2".to_string(), + Value::Union(0, Box::new(Value::Fixed(3, vec![b'a', b'b', b'c']))), + ), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3847_union_field_with_default_value_of_ref_with_namespace() -> TestResult { + // Test for reference to Record + let writer_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Record(vec![("f1_1".to_string(), 10.into())])); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + }, { + "name": "f2", + "type": ["ns.record2", "int"], + "default": { + "f1_1": 100 + } + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ( + "f1".to_string(), + Value::Record(vec![("f1_1".to_string(), 10.into())]), + ), + ( + "f2".to_string(), + Value::Union( + 0, + Box::new(Value::Record(vec![("f1_1".to_string(), 100.into())])), + ), + ), + ]); + + assert_eq!(expected, result[0]); + + // Test for reference to Enum + let writer_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "namespace": "ns", + "type": "enum", + "symbols": ["a", "b"] + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Enum(1, "b".to_string())); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "namespace": "ns", + "type": "enum", + "symbols": ["a", "b"] + } + }, { + "name": "f2", + "type": ["ns.enum1", "int"], + "default": "a" + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Enum(1, "b".to_string())), + ( + "f2".to_string(), + Value::Union(0, Box::new(Value::Enum(0, "a".to_string()))), + ), + ]); + + assert_eq!(expected, result[0]); + + // Test for reference to Fixed + let writer_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "fixed1", + "namespace": "ns", + "type": "fixed", + "size": 3 + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Fixed(3, vec![0, 1, 2])); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "fixed1", + "namespace": "ns", + "type": "fixed", + "size": 3 + } + }, { + "name": "f2", + "type": ["ns.fixed1", "int"], + "default": "abc" + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Fixed(3, vec![0, 1, 2])), + ( + "f2".to_string(), + Value::Union(0, Box::new(Value::Fixed(3, vec![b'a', b'b', b'c']))), + ), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3847_union_field_with_default_value_of_ref_with_enclosing_namespace() -> TestResult { + // Test for reference to Record + let writer_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Record(vec![("f1_1".to_string(), 10.into())])); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + }, { + "name": "f2", + "type": ["ns.record2", "int"], + "default": { + "f1_1": 100 + } + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ( + "f1".to_string(), + Value::Record(vec![("f1_1".to_string(), 10.into())]), + ), + ( + "f2".to_string(), + Value::Union( + 0, + Box::new(Value::Record(vec![("f1_1".to_string(), 100.into())])), + ), + ), + ]); + + assert_eq!(expected, result[0]); + + // Test for reference to Enum + let writer_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a", "b"] + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Enum(1, "b".to_string())); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a", "b"] + } + }, { + "name": "f2", + "type": ["ns.enum1", "int"], + "default": "a" + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Enum(1, "b".to_string())), + ( + "f2".to_string(), + Value::Union(0, Box::new(Value::Enum(0, "a".to_string()))), + ), + ]); + + assert_eq!(expected, result[0]); + + // Test for reference to Fixed + let writer_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "fixed1", + "type": "fixed", + "size": 3 + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Fixed(3, vec![0, 1, 2])); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "fixed1", + "type": "fixed", + "size": 3 + } + }, { + "name": "f2", + "type": ["ns.fixed1", "int"], + "default": "abc" + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Fixed(3, vec![0, 1, 2])), + ( + "f2".to_string(), + Value::Union(0, Box::new(Value::Fixed(3, vec![b'a', b'b', b'c']))), + ), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +fn write_schema_for_default_value_test() -> apache_avro::AvroResult> { + let writer_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "int" + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()) + .ok_or("Expected Some(Record), but got None") + .unwrap(); + record.put("f1", 10); + writer.append(record)?; + + writer.into_inner() +} + +#[test] +fn test_avro_3851_read_default_value_for_simple_record_field() -> TestResult { + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "int" + }, { + "name": "f2", + "type": "int", + "default": 20 + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = write_schema_for_default_value_test()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Int(10)), + ("f2".to_string(), Value::Int(20)), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3851_read_default_value_for_nested_record_field() -> TestResult { + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "int" + }, { + "name": "f2", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + }, + "default": { + "f1_1": 100 + } + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = write_schema_for_default_value_test()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Int(10)), + ( + "f2".to_string(), + Value::Record(vec![("f1_1".to_string(), 100.into())]), + ), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3851_read_default_value_for_enum_record_field() -> TestResult { + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "int" + }, { + "name": "f2", + "type": { + "name": "enum1", + "type": "enum", + "symbols": ["a", "b", "c"] + }, + "default": "a" + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = write_schema_for_default_value_test()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Int(10)), + ("f2".to_string(), Value::Enum(0, "a".to_string())), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3851_read_default_value_for_fixed_record_field() -> TestResult { + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "int" + }, { + "name": "f2", + "type": { + "name": "fixed1", + "type": "fixed", + "size": 3 + }, + "default": "abc" + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = write_schema_for_default_value_test()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Int(10)), + ("f2".to_string(), Value::Fixed(3, vec![b'a', b'b', b'c'])), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3851_read_default_value_for_array_record_field() -> TestResult { + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "int" + }, { + "name": "f2", + "type": "array", + "items": "int", + "default": [1, 2, 3] + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = write_schema_for_default_value_test()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Int(10)), + ( + "f2".to_string(), + Value::Array(vec![1.into(), 2.into(), 3.into()]), + ), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3851_read_default_value_for_map_record_field() -> TestResult { + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": "int" + }, { + "name": "f2", + "type": "map", + "values": "string", + "default": { "a": "A", "b": "B", "c": "C" } + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = write_schema_for_default_value_test()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let map = HashMap::from_iter([ + ("a".to_string(), "A".into()), + ("b".to_string(), "B".into()), + ("c".to_string(), "C".into()), + ]); + let expected = Value::Record(vec![ + ("f1".to_string(), Value::Int(10)), + ("f2".to_string(), Value::Map(map)), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3851_read_default_value_for_ref_record_field() -> TestResult { + let writer_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + } + ] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + let mut record = Record::new(writer.schema()).ok_or("Expected Some(Record), but got None")?; + record.put("f1", Value::Record(vec![("f1_1".to_string(), 10.into())])); + writer.append(record)?; + + let reader_schema_str = r#" + { + "name": "record1", + "namespace": "ns", + "type": "record", + "fields": [ + { + "name": "f1", + "type": { + "name": "record2", + "type": "record", + "fields": [ + { + "name": "f1_1", + "type": "int" + } + ] + } + }, { + "name": "f2", + "type": "ns.record2", + "default": { "f1_1": 100 } + } + ] + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Record(vec![ + ( + "f1".to_string(), + Value::Record(vec![("f1_1".to_string(), 10.into())]), + ), + ( + "f2".to_string(), + Value::Record(vec![("f1_1".to_string(), 100.into())]), + ), + ]); + + assert_eq!(expected, result[0]); + + Ok(()) +} + +#[test] +fn test_avro_3851_read_default_value_for_enum() -> TestResult { + let writer_schema_str = r#" + { + "name": "enum1", + "namespace": "ns", + "type": "enum", + "symbols": ["a", "b", "c"] + } + "#; + let writer_schema = Schema::parse_str(writer_schema_str)?; + let mut writer = Writer::new(&writer_schema, Vec::new()); + writer.append("c")?; + + let reader_schema_str = r#" + { + "name": "enum1", + "namespace": "ns", + "type": "enum", + "symbols": ["a", "b"], + "default": "a" + } + "#; + let reader_schema = Schema::parse_str(reader_schema_str)?; + let input = writer.into_inner()?; + let reader = Reader::with_schema(&reader_schema, &input[..])?; + let result = reader.collect::, _>>()?; + + assert_eq!(1, result.len()); + + let expected = Value::Enum(0, "a".to_string()); + assert_eq!(expected, result[0]); + + Ok(()) } diff --git a/lang/rust/avro/tests/shared.rs b/lang/rust/avro/tests/shared.rs index f5686129cbc..9790ddfe424 100644 --- a/lang/rust/avro/tests/shared.rs +++ b/lang/rust/avro/tests/shared.rs @@ -16,6 +16,7 @@ // under the License. use apache_avro::{types::Value, Codec, Reader, Schema, Writer}; +use apache_avro_test_helper::TestResult; use std::{ fmt, fs::{DirEntry, File, ReadDir}, @@ -27,8 +28,14 @@ use std::{ const ROOT_DIRECTORY: &str = "../../../share/test/data/schemas"; #[test] -fn test_schema() { - let directory: ReadDir = scan_shared_folder(); +fn test_schema() -> TestResult { + let directory: ReadDir = match std::fs::read_dir(ROOT_DIRECTORY) { + Ok(root_folder) => root_folder, + Err(err) => { + log::warn!("Can't read the root folder: {err}"); + return Ok(()); + } + }; let mut result: Result<(), ErrorsDesc> = Ok(()); for f in directory { let entry: DirEntry = match f { @@ -54,6 +61,7 @@ fn test_schema() { if let Err(e) = result { core::panic!("{}", e) } + Ok(()) } #[derive(Debug)] @@ -141,7 +149,3 @@ fn test_folder(folder: &str) -> Result<(), ErrorsDesc> { } result } - -fn scan_shared_folder() -> ReadDir { - std::fs::read_dir(ROOT_DIRECTORY).expect("Can't read root folder") -} diff --git a/lang/rust/avro/tests/to_from_avro_datum_schemata.rs b/lang/rust/avro/tests/to_from_avro_datum_schemata.rs index d05025ab75b..e27f1e625ac 100644 --- a/lang/rust/avro/tests/to_from_avro_datum_schemata.rs +++ b/lang/rust/avro/tests/to_from_avro_datum_schemata.rs @@ -18,7 +18,7 @@ use apache_avro::{ from_avro_datum_schemata, to_avro_datum_schemata, types::Value, Codec, Reader, Schema, Writer, }; -use apache_avro_test_helper::init; +use apache_avro_test_helper::{init, TestResult}; static SCHEMA_A_STR: &str = r#"{ "name": "A", @@ -37,7 +37,7 @@ static SCHEMA_B_STR: &str = r#"{ }"#; #[test] -fn test_avro_3683_multiple_schemata_to_from_avro_datum() { +fn test_avro_3683_multiple_schemata_to_from_avro_datum() -> TestResult { init(); let record: Value = Value::Record(vec![( @@ -45,21 +45,23 @@ fn test_avro_3683_multiple_schemata_to_from_avro_datum() { Value::Record(vec![(String::from("field_a"), Value::Float(1.0))]), )]); - let schemata: Vec = Schema::parse_list(&[SCHEMA_A_STR, SCHEMA_B_STR]).unwrap(); + let schemata: Vec = Schema::parse_list(&[SCHEMA_A_STR, SCHEMA_B_STR])?; let schemata: Vec<&Schema> = schemata.iter().collect(); // this is the Schema we want to use for write/read let schema_b = schemata[1]; let expected: Vec = vec![0, 0, 128, 63]; - let actual = to_avro_datum_schemata(schema_b, schemata.clone(), record.clone()).unwrap(); + let actual = to_avro_datum_schemata(schema_b, schemata.clone(), record.clone())?; assert_eq!(actual, expected); - let value = from_avro_datum_schemata(schema_b, schemata, &mut actual.as_slice(), None).unwrap(); + let value = from_avro_datum_schemata(schema_b, schemata, &mut actual.as_slice(), None)?; assert_eq!(value, record); + + Ok(()) } #[test] -fn test_avro_3683_multiple_schemata_writer_reader() { +fn test_avro_3683_multiple_schemata_writer_reader() -> TestResult { init(); let record: Value = Value::Record(vec![( @@ -67,7 +69,7 @@ fn test_avro_3683_multiple_schemata_writer_reader() { Value::Record(vec![(String::from("field_a"), Value::Float(1.0))]), )]); - let schemata: Vec = Schema::parse_list(&[SCHEMA_A_STR, SCHEMA_B_STR]).unwrap(); + let schemata: Vec = Schema::parse_list(&[SCHEMA_A_STR, SCHEMA_B_STR])?; let schemata: Vec<&Schema> = schemata.iter().collect(); // this is the Schema we want to use for write/read @@ -75,10 +77,12 @@ fn test_avro_3683_multiple_schemata_writer_reader() { let mut output: Vec = Vec::new(); let mut writer = Writer::with_schemata(schema_b, schemata.clone(), &mut output, Codec::Null); - writer.append(record.clone()).unwrap(); - writer.flush().unwrap(); + writer.append(record.clone())?; + writer.flush()?; - let reader = Reader::with_schemata(schema_b, schemata, output.as_slice()).unwrap(); + let reader = Reader::with_schemata(schema_b, schemata, output.as_slice())?; let value = reader.into_iter().next().unwrap().unwrap(); assert_eq!(value, record); + + Ok(()) } diff --git a/lang/rust/avro_derive/Cargo.toml b/lang/rust/avro_derive/Cargo.toml index 20cbfeea4bf..30cd748bccd 100644 --- a/lang/rust/avro_derive/Cargo.toml +++ b/lang/rust/avro_derive/Cargo.toml @@ -17,29 +17,29 @@ [package] name = "apache-avro-derive" -version = "0.15.0" -authors = ["Apache Avro team "] +version.workspace = true +authors.workspace = true description = "A library for deriving Avro schemata from Rust structs and enums" -license = "Apache-2.0" -readme = "README.md" -repository = "https://github.com/apache/avro" -edition = "2021" -rust-version = "1.60.0" +license.workspace = true +readme.workspace = true +repository.workspace = true +edition.workspace = true +rust-version.workspace = true keywords = ["avro", "data", "serialization", "derive"] -categories = ["encoding"] +categories.workspace = true documentation = "https://docs.rs/apache-avro-derive" [lib] proc-macro = true [dependencies] -darling = { default-features = false, version = "0.14.4" } -proc-macro2 = { default-features = false, version = "1.0.51" } -quote = { default-features = false, version = "1.0.23" } -serde_json = { default-features = false, version = "1.0.94", features = ["std"] } -syn = { default-features = false, version = "1.0.109", features = ["full", "fold"] } +darling = { default-features = false, version = "0.20.3" } +proc-macro2 = { default-features = false, version = "1.0.67" } +quote = { default-features = false, version = "1.0.33" } +serde_json = { default-features = false, version = "1.0.107", features = ["std"] } +syn = { default-features = false, version = "2.0.37", features = ["full", "fold"] } [dev-dependencies] apache-avro = { default-features = false, path = "../avro", features = ["derive"] } -proptest = { default-features = false, version = "1.1.0", features = ["std"] } -serde = { default-features = false, version = "1.0.154", features = ["derive"] } +proptest = { default-features = false, version = "1.2.0", features = ["std"] } +serde = { default-features = false, version = "1.0.188", features = ["derive"] } diff --git a/lang/rust/avro_derive/src/lib.rs b/lang/rust/avro_derive/src/lib.rs index 369bcfdb64b..5b36839be4e 100644 --- a/lang/rust/avro_derive/src/lib.rs +++ b/lang/rust/avro_derive/src/lib.rs @@ -190,14 +190,14 @@ fn get_data_struct_schema_def( .iter() .map(|field| (field.name.to_owned(), field.position)) .collect(); - apache_avro::schema::Schema::Record { + apache_avro::schema::Schema::Record(apache_avro::schema::RecordSchema { name, aliases: #record_aliases, doc: #record_doc, fields: schema_fields, lookup, attributes: Default::default(), - } + }) }) } @@ -217,13 +217,14 @@ fn get_data_enum_schema_def( .map(|variant| variant.ident.to_string()) .collect(); Ok(quote! { - apache_avro::schema::Schema::Enum { + apache_avro::schema::Schema::Enum(apache_avro::schema::EnumSchema { name: apache_avro::schema::Name::new(#full_schema_name).expect(&format!("Unable to parse enum name for schema {}", #full_schema_name)[..]), aliases: #enum_aliases, doc: #doc, symbols: vec![#(#symbols.to_owned()),*], + default: None, attributes: Default::default(), - } + }) }) } else { Err(vec![syn::Error::new( @@ -293,17 +294,19 @@ fn to_compile_errors(errors: Vec) -> proc_macro2::TokenStream { fn extract_outer_doc(attributes: &[Attribute]) -> Option { let doc = attributes .iter() - .filter(|attr| attr.style == AttrStyle::Outer && attr.path.is_ident("doc")) - .map(|attr| { - let mut tokens = attr.tokens.clone().into_iter(); - tokens.next(); // skip the Punct - let to_trim: &[char] = &['"', ' ']; - tokens - .next() // use the Literal - .unwrap() - .to_string() - .trim_matches(to_trim) - .to_string() + .filter(|attr| attr.style == AttrStyle::Outer && attr.path().is_ident("doc")) + .filter_map(|attr| { + let name_value = attr.meta.require_name_value(); + match name_value { + Ok(name_value) => match &name_value.value { + syn::Expr::Lit(expr_lit) => match expr_lit.lit { + syn::Lit::Str(ref lit_str) => Some(lit_str.value().trim().to_string()), + _ => None, + }, + _ => None, + }, + Err(_) => None, + } }) .collect::>() .join("\n"); diff --git a/lang/rust/avro_derive/tests/derive.rs b/lang/rust/avro_derive/tests/derive.rs index a22c6525fa2..0bfc9a95cf1 100644 --- a/lang/rust/avro_derive/tests/derive.rs +++ b/lang/rust/avro_derive/tests/derive.rs @@ -30,11 +30,8 @@ extern crate serde; #[cfg(test)] mod test_derive { - use apache_avro::schema::Alias; - use std::{ - borrow::{Borrow, Cow}, - sync::Mutex, - }; + use apache_avro::schema::{Alias, EnumSchema, RecordSchema}; + use std::{borrow::Cow, sync::Mutex}; use super::*; @@ -144,7 +141,7 @@ mod test_derive { "#; let schema = Schema::parse_str(schema).unwrap(); assert_eq!(schema, TestBasicNamespace::get_schema()); - if let Schema::Record { name, .. } = TestBasicNamespace::get_schema() { + if let Schema::Record(RecordSchema { name, .. }) = TestBasicNamespace::get_schema() { assert_eq!("com.testing.namespace".to_owned(), name.namespace.unwrap()) } else { panic!("TestBasicNamespace schema must be a record schema") @@ -191,7 +188,9 @@ mod test_derive { "#; let schema = Schema::parse_str(schema).unwrap(); assert_eq!(schema, TestComplexNamespace::get_schema()); - if let Schema::Record { name, fields, .. } = TestComplexNamespace::get_schema() { + if let Schema::Record(RecordSchema { name, fields, .. }) = + TestComplexNamespace::get_schema() + { assert_eq!( "com.testing.complex.namespace".to_owned(), name.namespace.unwrap() @@ -201,7 +200,7 @@ mod test_derive { .filter(|field| field.name == "a") .map(|field| &field.schema) .next(); - if let Some(Schema::Record { name, .. }) = inner_schema { + if let Some(Schema::Record(RecordSchema { name, .. })) = inner_schema { assert_eq!( "com.testing.namespace".to_owned(), name.namespace.clone().unwrap() @@ -864,7 +863,7 @@ mod test_derive { // test serde with manual equality for mutex let test = serde(test); assert_eq!("hey", test.a); - assert_eq!(vec![42], *test.b.borrow().lock().unwrap()); + assert_eq!(vec![42], *test.b.lock().unwrap()); assert_eq!(Cow::Owned::(32), test.c); } @@ -944,7 +943,9 @@ mod test_derive { } "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, doc, .. } = TestBasicWithAttributes::get_schema() { + if let Schema::Record(RecordSchema { name, doc, .. }) = + TestBasicWithAttributes::get_schema() + { assert_eq!("com.testing.namespace".to_owned(), name.namespace.unwrap()); assert_eq!("A Documented Record", doc.unwrap()) } else { @@ -985,13 +986,14 @@ mod test_derive { } "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, doc, .. } = TestBasicWithOuterDocAttributes::get_schema() { + let derived_schema = TestBasicWithOuterDocAttributes::get_schema(); + assert_eq!(&schema, &derived_schema); + if let Schema::Record(RecordSchema { name, doc, .. }) = derived_schema { assert_eq!("com.testing.namespace".to_owned(), name.namespace.unwrap()); assert_eq!("A Documented Record", doc.unwrap()) } else { panic!("TestBasicWithOuterDocAttributes schema must be a record schema") } - assert_eq!(schema, TestBasicWithOuterDocAttributes::get_schema()); } #[derive(Debug, Serialize, Deserialize, AvroSchema, Clone, PartialEq, Eq)] @@ -1028,7 +1030,8 @@ mod test_derive { } "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, doc, .. } = TestBasicWithLargeDoc::get_schema() { + if let Schema::Record(RecordSchema { name, doc, .. }) = TestBasicWithLargeDoc::get_schema() + { assert_eq!("com.testing.namespace".to_owned(), name.namespace.unwrap()); assert_eq!( "A Documented Record\nthat spans\nmultiple lines", @@ -1068,7 +1071,7 @@ mod test_derive { let schema = Schema::parse_str(schema).unwrap(); let derived_schema = TestBasicWithBool::get_schema(); - if let Schema::Record { name, .. } = derived_schema { + if let Schema::Record(RecordSchema { name, .. }) = derived_schema { assert_eq!("TestBasicWithBool", name.fullname(None)) } else { panic!("TestBasicWithBool schema must be a record schema") @@ -1099,7 +1102,7 @@ mod test_derive { } "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, .. } = TestBasicWithU32::get_schema() { + if let Schema::Record(RecordSchema { name, .. }) = TestBasicWithU32::get_schema() { assert_eq!("TestBasicWithU32", name.fullname(None)) } else { panic!("TestBasicWithU32 schema must be a record schema") @@ -1131,7 +1134,9 @@ mod test_derive { } "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, aliases, .. } = TestBasicStructWithAliases::get_schema() { + if let Schema::Record(RecordSchema { name, aliases, .. }) = + TestBasicStructWithAliases::get_schema() + { assert_eq!("TestBasicStructWithAliases", name.fullname(None)); assert_eq!( Some(vec![ @@ -1173,7 +1178,9 @@ mod test_derive { } "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, aliases, .. } = TestBasicStructWithAliases2::get_schema() { + if let Schema::Record(RecordSchema { name, aliases, .. }) = + TestBasicStructWithAliases2::get_schema() + { assert_eq!("TestBasicStructWithAliases2", name.fullname(None)); assert_eq!( Some(vec![ @@ -1212,7 +1219,9 @@ mod test_derive { } "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Enum { name, aliases, .. } = TestBasicEnumWithAliases::get_schema() { + if let Schema::Enum(EnumSchema { name, aliases, .. }) = + TestBasicEnumWithAliases::get_schema() + { assert_eq!("TestBasicEnumWithAliases", name.fullname(None)); assert_eq!( Some(vec![ @@ -1253,7 +1262,9 @@ mod test_derive { } "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Enum { name, aliases, .. } = TestBasicEnumWithAliases2::get_schema() { + if let Schema::Enum(EnumSchema { name, aliases, .. }) = + TestBasicEnumWithAliases2::get_schema() + { assert_eq!("TestBasicEnumWithAliases2", name.fullname(None)); assert_eq!( Some(vec![ @@ -1357,7 +1368,8 @@ mod test_derive { "#; let schema = Schema::parse_str(schema).unwrap(); - if let Schema::Record { name, fields, .. } = TestBasicStructWithDefaultValues::get_schema() + if let Schema::Record(RecordSchema { name, fields, .. }) = + TestBasicStructWithDefaultValues::get_schema() { assert_eq!("TestBasicStructWithDefaultValues", name.fullname(None)); use serde_json::json; @@ -1454,7 +1466,7 @@ mod test_derive { let schema = Schema::parse_str(schema).unwrap(); let derived_schema = TestBasicStructWithSkipAttribute::get_schema(); - if let Schema::Record { name, fields, .. } = &derived_schema { + if let Schema::Record(RecordSchema { name, fields, .. }) = &derived_schema { assert_eq!("TestBasicStructWithSkipAttribute", name.fullname(None)); for field in fields { match field.name.as_str() { @@ -1521,7 +1533,7 @@ mod test_derive { let schema = Schema::parse_str(schema).unwrap(); let derived_schema = TestBasicStructWithRenameAttribute::get_schema(); - if let Schema::Record { name, fields, .. } = &derived_schema { + if let Schema::Record(RecordSchema { name, fields, .. }) = &derived_schema { assert_eq!("TestBasicStructWithRenameAttribute", name.fullname(None)); for field in fields { match field.name.as_str() { @@ -1552,7 +1564,7 @@ mod test_derive { } let derived_schema = TestRawIdent::get_schema(); - if let Schema::Record { fields, .. } = derived_schema { + if let Schema::Record(RecordSchema { fields, .. }) = derived_schema { let field = fields.get(0).expect("TestRawIdent must contain a field"); assert_eq!(field.name, "type"); } else { diff --git a/lang/rust/avro_test_helper/Cargo.toml b/lang/rust/avro_test_helper/Cargo.toml index fb23999217a..3330dce24b1 100644 --- a/lang/rust/avro_test_helper/Cargo.toml +++ b/lang/rust/avro_test_helper/Cargo.toml @@ -17,23 +17,24 @@ [package] name = "apache-avro-test-helper" -version = "0.15.0" -edition = "2021" -rust-version = "1.60.0" +version.workspace = true +edition.workspace = true +rust-version.workspace = true description = "Apache Avro tests helper." -authors = ["Apache Avro team "] -license = "Apache-2.0" -readme = "README.md" -repository = "https://github.com/apache/avro" +authors.workspace = true +license.workspace = true +readme.workspace = true +repository.workspace = true keywords = ["avro", "data", "serialization", "test"] -categories = ["encoding"] +categories.workspace = true documentation = "https://docs.rs/apache-avro-test-helper" [dependencies] +anyhow = { default-features = false, version = "1.0.75", features = ["std"] } color-backtrace = { default-features = false, version = "0.5.1" } -ctor = { default-features = false, version = "0.1.26" } +ctor = { default-features = false, version = "0.2.4" } env_logger = { default-features = false, version = "0.10.0" } lazy_static = { default-features = false, version = "1.4.0" } -log = { default-features = false, version = "0.4.17" } +log = { default-features = false, version = "0.4.20" } ref_thread_local = { default-features = false, version = "0.1.1" } diff --git a/lang/rust/avro_test_helper/src/lib.rs b/lang/rust/avro_test_helper/src/lib.rs index 235b4c5184d..e2ab29eff15 100644 --- a/lang/rust/avro_test_helper/src/lib.rs +++ b/lang/rust/avro_test_helper/src/lib.rs @@ -45,6 +45,22 @@ fn after_all() { logger::clear_log_messages(); } +/// A custom error type for tests. +#[derive(Debug)] +pub enum TestError {} + +/// A converter of any error into [TestError]. +/// It is used to print better error messages in the tests. +/// Borrowed from +impl From for TestError { + #[track_caller] + fn from(err: Err) -> Self { + panic!("{}: {}", std::any::type_name::(), err); + } +} + +pub type TestResult = anyhow::Result<(), TestError>; + /// Does nothing. Just loads the crate. /// Should be used in the integration tests, because they do not use [dev-dependencies] /// and do not auto-load this crate. diff --git a/lang/rust/fuzz/Cargo.toml b/lang/rust/fuzz/Cargo.toml index c811e481de4..a80f3b9fa1d 100644 --- a/lang/rust/fuzz/Cargo.toml +++ b/lang/rust/fuzz/Cargo.toml @@ -20,7 +20,7 @@ name = "apache-avro-fuzz" version = "0.0.0" publish = false edition = "2021" -rust-version = "1.60.0" +rust-version = "1.65.0" [package.metadata] cargo-fuzz = true diff --git a/lang/rust/wasm-demo/Cargo.toml b/lang/rust/wasm-demo/Cargo.toml index bdf53911c02..d6a883b6466 100644 --- a/lang/rust/wasm-demo/Cargo.toml +++ b/lang/rust/wasm-demo/Cargo.toml @@ -18,16 +18,16 @@ [package] name = "hello-wasm" version = "0.1.0" -authors = ["Apache Avro team "] +authors.workspace = true description = "A demo project for testing apache_avro in WebAssembly" -license = "Apache-2.0" -readme = "README.md" -repository = "https://github.com/apache/avro" -edition = "2021" -rust-version = "1.60.0" +license.workspace = true +readme.workspace = true +repository.workspace = true +edition.workspace = true +rust-version.workspace = true keywords = ["avro", "data", "serialization", "wasm", "web assembly"] -categories = ["encoding"] -documentation = "https://docs.rs/apache-avro" +categories.workspace = true +documentation.workspace = true publish = false @@ -36,13 +36,9 @@ crate-type = ["cdylib", "rlib"] [dependencies] apache-avro = { path = "../avro" } -serde = { default-features = false, version = "1.0.154", features = ["derive"] } -wasm-bindgen = "0.2.84" +serde = { default-features = false, version = "1.0.188", features = ["derive"] } +wasm-bindgen = "0.2.87" [dev-dependencies] console_error_panic_hook = { version = "0.1.6" } -wasm-bindgen-test = "0.3.34" - -[profile.release] -# Tell `rustc` to optimize for small code size. -opt-level = "s" +wasm-bindgen-test = "0.3.37" diff --git a/pom.xml b/pom.xml index 8faa19f1545..6868dacf780 100644 --- a/pom.xml +++ b/pom.xml @@ -48,20 +48,20 @@ 0.15 - 3.2.1 + 3.2.2 9.3 - 3.2.1 - 1.6.1 + 3.3.0 + 1.7.0 3.1.0 - 3.0.1 + 3.1.0 3.5.0 - 3.8.1 + 3.8.2 3.0.0 3.4.1 3.2.1 3.5.2 2.27.2 - 3.0.0-M9 + 3.1.0 10 @@ -319,6 +319,12 @@ sign + + + --pinentry-mode + loopback + + diff --git a/share/docker/Dockerfile b/share/docker/Dockerfile index 691c1e40acf..51e91eab8dd 100644 --- a/share/docker/Dockerfile +++ b/share/docker/Dockerfile @@ -25,6 +25,9 @@ ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=isolemnlysweariamuptonogood \ # Install dependencies from vanilla system packages RUN apt-get -qqy update \ + && apt-get -qqy install software-properties-common \ + && add-apt-repository ppa:deadsnakes/ppa \ + && apt-get -qqy update \ && apt-get -qqy install --no-install-recommends ant \ apt-transport-https \ apt-utils \ @@ -52,8 +55,8 @@ RUN apt-get -qqy update \ libssl-dev \ make \ mypy \ - openjdk-17-jdk \ openjdk-11-jdk \ + openjdk-17-jdk \ openjdk-8-jdk \ perl \ python3 \ @@ -62,16 +65,18 @@ RUN apt-get -qqy update \ python3-snappy \ python3-venv \ python3-wheel \ + python3.10 \ + python3.11 \ + python3.11-dev \ + python3.7 \ + python3.7-distutils \ + python3.8 \ + python3.9 \ source-highlight \ subversion \ valgrind \ vim \ wget \ - python3.6 \ - python3.7 \ - python3.8 \ - python3.9 \ - python3.10 \ && apt-get -qqy clean # Install PHP @@ -114,8 +119,11 @@ RUN set -eux; \ ENV PATH="/opt/maven/bin:${PATH}" # Install nodejs +# The node deprecation warnings cause a 20 second sleep. +# But mom, I'm not even tired! RUN curl -sSL https://deb.nodesource.com/setup_14.x \ - | bash - \ + | sed 's/sleep 20/echo "But mom!"/' \ + | bash \ && apt-get -qqy install nodejs \ && apt-get -qqy clean \ && npm install -g grunt-cli \ @@ -168,10 +176,26 @@ RUN curl -sSL https://cpanmin.us \ Module::Install::Repository \ && rm -rf .cpanm -# Install Python packages -ENV PIP_NO_CACHE_DIR=off +# Install Python3 +ENV PATH="${PATH}:/opt/pypy3.8/bin:/opt/pypy3.9/bin:/opt/pypy3.10/bin" \ + PIP_NO_CACHE_DIR=off + +# https://docs.docker.com/engine/reference/builder/#automatic-platform-args-in-the-global-scope +ARG BUILDARCH +RUN case "${BUILDARCH:?}" in \ + arm64) pypyarch=aarch64;; \ + *) pypyarch=linux64;; \ + esac \ + && cd /opt \ + && for url in https://downloads.python.org/pypy/pypy3.8-v7.3.11-"$pypyarch".tar.bz2 \ + https://downloads.python.org/pypy/pypy3.9-v7.3.12-"$pypyarch".tar.bz2 \ + https://downloads.python.org/pypy/pypy3.10-v7.3.12-"$pypyarch".tar.bz2; \ + do curl -fsSL "$url" | tar -xvjpf -; \ + done \ + && ln -s pypy3.8* pypy3.8 \ + && ln -s pypy3.9* pypy3.9 \ + && ln -s pypy3.10* pypy3.10 -# Install Python3 packages RUN python3 -m pip install --upgrade pip setuptools wheel \ && python3 -m pip install tox zstandard @@ -179,9 +203,15 @@ RUN python3 -m pip install --upgrade pip setuptools wheel \ # Install Ruby RUN apt-get -qqy install ruby-full \ && apt-get -qqy clean +RUN mkdir -p /tmp/lang/ruby/lib/avro && mkdir -p /tmp/share +COPY lang/ruby/* /tmp/lang/ruby/ +COPY share/VERSION.txt /tmp/share/ +RUN gem install bundler --no-document && \ + apt-get install -qqy libyaml-dev && \ + cd /tmp/lang/ruby && bundle install # Install Rust -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain 1.60.0 +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain 1.65.0 # Note: This "ubertool" container has two JDK versions: # - OpenJDK 8 @@ -195,8 +225,9 @@ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --de RUN cd /opt ; \ wget https://dot.net/v1/dotnet-install.sh ; \ bash ./dotnet-install.sh --channel "3.1" --install-dir "/opt/dotnet" ; \ - bash ./dotnet-install.sh --channel "5.0" --install-dir "/opt/dotnet" ; \ - bash ./dotnet-install.sh --channel "6.0" --install-dir "/opt/dotnet" ; + bash ./dotnet-install.sh --channel "5.0" --install-dir "/opt/dotnet" ; \ + bash ./dotnet-install.sh --channel "6.0" --install-dir "/opt/dotnet" ; \ + bash ./dotnet-install.sh --channel "7.0" --install-dir "/opt/dotnet" ; ENV PATH $PATH:/opt/dotnet diff --git a/share/idl_grammar/org/apache/avro/idl/Idl.g4 b/share/idl_grammar/org/apache/avro/idl/Idl.g4 new file mode 100644 index 00000000000..7572e9cc33b --- /dev/null +++ b/share/idl_grammar/org/apache/avro/idl/Idl.g4 @@ -0,0 +1,257 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +grammar Idl; + +/* +** This file is a combined grammar to recognize the Avro IDL "language". By +** design, there are no actions in this grammar: this ensures the grammar is +** usable for any language supported by ANTLR. +** +** Some names, like BTrue & BFalse for booleans, may look a bit strange, but +** that's because they can otherwise conflict with identifiers in target\ +** languages like Java, Python, etc. +** +** Implementers can implement a listener or visitor to turn a parse result into +** a protocol and/or schema. +*/ + + +/************************************** +** ** +** Parser ** +** ** +**************************************/ + +// \u001a is the ascii 'sub'(stitute) character, used as end-of-file marker in older systems. It was also used as "end of character stream". +// Thus, accept it at end of the input and ignore anything that comes after it. (See: https://en.wikipedia.org/wiki/Substitute_character) +idlFile: ( + protocol=protocolDeclaration | + namespace=namespaceDeclaration? mainSchema=mainSchemaDeclaration? (imports+=importStatement|namedSchemas+=namedSchemaDeclaration)* +) ('\u001a' .*?)? EOF; + +protocolDeclaration: (doc=DocComment)? schemaProperties+=schemaProperty* Protocol name=identifier body=protocolDeclarationBody; + +protocolDeclarationBody : LBrace (imports+=importStatement|namedSchemas+=namedSchemaDeclaration|messages+=messageDeclaration)* RBrace ; + +namespaceDeclaration: Namespace namespace=identifier Semicolon; + +mainSchemaDeclaration: Schema mainSchema=fullType Semicolon; + +/** + * The parser accepts anything that's not a symbol as an identifier. That is, it accepts both an IdentifierToken and all keywords. Which + * identifiers are actually allowed is context dependent and decided when transforming the parse tree. + */ +identifier: word=(IdentifierToken + | Protocol + | Namespace + | Import + | IDL + | Schema + | Enum + | Fixed + | Error + | Record + | Array + | Map + | Union + | Boolean + | Int + | Long + | Float + | Double + | String + | Bytes + | Null + | BTrue + | BFalse + | Decimal + | Date + | Time + | Timestamp + | LocalTimestamp + | UUID + | Void + | Oneway + | Throws +); + +schemaProperty: At name=identifier LParen value=jsonValue RParen; + +importStatement: Import importType=(Schema|Protocol|IDL) location=StringLiteral Semicolon; + +namedSchemaDeclaration: fixedDeclaration | enumDeclaration | recordDeclaration; + +fixedDeclaration: (doc=DocComment)? schemaProperties+=schemaProperty* Fixed name=identifier LParen size=IntegerLiteral RParen Semicolon; + +enumDeclaration: (doc=DocComment)? schemaProperties+=schemaProperty* Enum name=identifier + LBrace (enumSymbols+=enumSymbol (Comma enumSymbols+=enumSymbol)*)? RBrace defaultSymbol=enumDefault?; + +enumSymbol: (doc=DocComment)? schemaProperties+=schemaProperty* name=identifier; + +enumDefault : Equals defaultSymbolName=identifier Semicolon; + +recordDeclaration: (doc=DocComment)? schemaProperties+=schemaProperty* recordType=(Record|Error) name=identifier body=recordBody; + +recordBody : LBrace fields+=fieldDeclaration* RBrace; + +fieldDeclaration: (doc=DocComment)? fieldType=fullType variableDeclarations+=variableDeclaration (Comma variableDeclarations+=variableDeclaration)* Semicolon; + +variableDeclaration: (doc=DocComment)? schemaProperties+=schemaProperty* fieldName=identifier (Equals defaultValue=jsonValue)?; + +messageDeclaration: (doc=DocComment)? schemaProperties+=schemaProperty* returnType=resultType name=identifier + LParen (formalParameters+=formalParameter (Comma formalParameters+=formalParameter)*)? RParen + (oneway=Oneway | Throws errors+=identifier (Comma errors+=identifier)*)? Semicolon; + +formalParameter: (doc=DocComment)? parameterType=fullType parameter=variableDeclaration; + +resultType: Void | plainType; + +fullType: schemaProperties+=schemaProperty* plainType; + +plainType: arrayType | mapType | unionType | nullableType; + +// identifier MUST be last, as it captures any other type as well. +nullableType: (primitiveType | referenceName=identifier) optional=QuestionMark?; + +primitiveType: typeName=(Boolean | Int | Long | Float | Double | Bytes | String | Null | Date | Time | Timestamp | LocalTimestamp | UUID) + | typeName=Decimal LParen precision=IntegerLiteral ( Comma scale=IntegerLiteral )? RParen; + +arrayType: Array LT elementType=fullType GT; + +mapType: Map LT valueType=fullType GT; + +unionType: Union LBrace types+=fullType (Comma types+=fullType)* RBrace; + +jsonValue: jsonObject | jsonArray | jsonLiteral; +jsonLiteral: literal=(StringLiteral | IntegerLiteral | FloatingPointLiteral | BTrue | BFalse | Null); +jsonObject: LBrace jsonPairs+=jsonPair (Comma jsonPairs+=jsonPair)* RBrace; +jsonPair: name=StringLiteral Colon value=jsonValue; +jsonArray: LBracket (jsonValues+=jsonValue (Comma jsonValues+=jsonValue)*)? RBracket; + + +/************************************** +** ** +** Lexer ** +** ** +**************************************/ + +/* +** Comments +*/ + +// Note 1: this might be more efficient using lexer modes, but these cannot be used in a mixed file like this. +// Note 1: To do so, split this file into 'idl_lexer.g4' and 'idl_parser.g4', and import the tokens with 'options { tokenVocab=idl_lexer; }' + +// Note 2: DOC_COMMENT is now a regular token. +DocComment: '/**' .*? '*/' -> channel(HIDDEN); +EmptyComment: '/**/' -> skip; +MultiLineComment: '/*' ~[*] .*? '*/' -> skip; +SingleLineComment: '//' .*? ('\n' | '\r' '\n'?) -> skip; + +/* +** Whitespace +*/ +// Should be after the rule(s) for single-line comments, especially if rewritten to use multiple lexer modes +WS: [ \t\n\r\f] -> skip; + +/* +** Simple tokens +*/ +Protocol: 'protocol'; +Namespace: 'namespace'; +Import: 'import'; +IDL: 'idl'; +Schema: 'schema'; +Enum: 'enum'; +Fixed: 'fixed'; +Error: 'error'; +Record: 'record'; +Array: 'array'; +Map: 'map'; +Union: 'union'; +Boolean: 'boolean'; +Int: 'int'; +Long: 'long'; +Float: 'float'; +Double: 'double'; +String: 'string'; +Bytes: 'bytes'; +Null: 'null'; +// The boolean values are not named True/False to avoid name conflicts with e.g. Python +BTrue: 'true'; +BFalse: 'false'; +Decimal: 'decimal'; +Date: 'date'; +Time: 'time_ms'; +Timestamp: 'timestamp_ms'; +LocalTimestamp: 'local_timestamp_ms'; +UUID: 'uuid'; +Void: 'void'; +Oneway: 'oneway'; +Throws: 'throws'; +LParen: '('; +RParen: ')'; +LBrace: '{'; +RBrace: '}'; +LBracket: '['; +RBracket: ']'; +Colon: ':'; +Semicolon: ';'; +Comma: ','; +At: '@'; +Equals: '='; +Dot: '.'; +Dash: '-'; +QuestionMark: '?'; +LT: '<'; +GT: '>'; + +/* +** Complex tokens +*/ + +// TODO: restrict to JSON string & number literals? + +// We use a reluctant qualifier, so we don't need to forbid the closing quote +StringLiteral: '"' (~[\\\n\r\b\f\t] | '\\' ([nrbft\\'"] | OctDigit OctDigit? | [0-3] OctDigit OctDigit | 'u' HexDigit HexDigit HexDigit HexDigit))*? '"'; +//StringLiteral: '"' (~[\\\u0000-\u001F] | '\\' ["\\/nrbft])*? '"'; + +IntegerLiteral: '-'? ( DecimalLiteral | HexLiteral | OctalLiteral ) [lL]?; +fragment DecimalLiteral: Digit9 Digit*; +fragment HexLiteral: '0' [xX] HexDigit+; +fragment OctalLiteral: '0' OctDigit*; + +fragment Digit9: [1-9]; +fragment Digit: '0' | Digit9; +fragment HexDigit: [0-9a-fA-F]; +fragment OctDigit: [0-7]; + +FloatingPointLiteral: [+-]? ('NaN' | 'Infinity' | DecimalFloatingPointLiteral | HexadecimalFloatingPointLiteral ); +fragment DecimalFloatingPointLiteral: (Digit+ '.' Digit* | '.' Digit+) DecimalExponent? [fFdD]? | Digit+ (DecimalExponent [fFdD]? | [fFdD]); +fragment DecimalExponent: [eE] [+\-]? Digit+; +fragment HexadecimalFloatingPointLiteral: '0' [xX] ( HexDigit+ ('.')? | HexDigit* '.' HexDigit+ ) HexadecimalExponent [fFdD]?; +fragment HexadecimalExponent: [pP] [+\-]? Digit+; + +/** + * An identifier token accepts any sequence of unicoode identifiers, optionally surrounded by backticks, separated by dots and/or dashes. + * Note that any sequence of identifier parts is an identifier token, even if an identifier part (also) matches an existing keyword. + * Also note that this token should *only* be used in the identifier grammar rule above. + */ +IdentifierToken: ( '`' IdentifierPart '`' | IdentifierPart)([.-] ( '`' IdentifierPart '`' | IdentifierPart) )*; +fragment IdentifierPart: [\p{XID_Start}] [\p{XID_Continue}]*; +// See discussion in AVRO-1022, AVRO-2659, AVRO-3115 +// fragment IdentifierPart: [A-Za-z] [A-Za-z0-9_]* diff --git a/share/test/schemas/contexts.avdl b/share/test/schemas/contexts.avdl index 245705f76df..17836c14dab 100644 --- a/share/test/schemas/contexts.avdl +++ b/share/test/schemas/contexts.avdl @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/share/test/schemas/echo.avdl b/share/test/schemas/echo.avdl index 87058e42935..14558862add 100644 --- a/share/test/schemas/echo.avdl +++ b/share/test/schemas/echo.avdl @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/share/test/schemas/http.avdl b/share/test/schemas/http.avdl index 67b6a72f4f7..13305444b43 100644 --- a/share/test/schemas/http.avdl +++ b/share/test/schemas/http.avdl @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/share/test/schemas/nestedNullable.avdl b/share/test/schemas/nestedNullable.avdl index ab2641cdf38..f31ae87571e 100644 --- a/share/test/schemas/nestedNullable.avdl +++ b/share/test/schemas/nestedNullable.avdl @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/share/test/schemas/schemaevolution.avdl b/share/test/schemas/schemaevolution.avdl index 1d8974d3428..7c77582484d 100644 --- a/share/test/schemas/schemaevolution.avdl +++ b/share/test/schemas/schemaevolution.avdl @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/share/test/schemas/social.avdl b/share/test/schemas/social.avdl index 329c9a6c930..fce2788dec3 100644 --- a/share/test/schemas/social.avdl +++ b/share/test/schemas/social.avdl @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/share/test/schemas/specialtypes.avdl b/share/test/schemas/specialtypes.avdl index a1ab7958260..3a4724b673b 100644 --- a/share/test/schemas/specialtypes.avdl +++ b/share/test/schemas/specialtypes.avdl @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information diff --git a/share/test/schemas/stringables.avdl b/share/test/schemas/stringables.avdl index 1214f04c49f..2fb799fdfea 100644 --- a/share/test/schemas/stringables.avdl +++ b/share/test/schemas/stringables.avdl @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information