diff --git a/.github/workflows/arrow.yml b/.github/workflows/arrow.yml
index 8203c15afc6c..da56c23b5cd9 100644
--- a/.github/workflows/arrow.yml
+++ b/.github/workflows/arrow.yml
@@ -39,6 +39,7 @@ on:
       - arrow-integration-test/**
       - arrow-ipc/**
       - arrow-json/**
+      - arrow-avro/**
       - arrow-ord/**
       - arrow-row/**
       - arrow-schema/**
@@ -55,7 +56,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
       - name: Setup Rust toolchain
@@ -78,6 +79,8 @@ jobs:
         run: cargo test -p arrow-csv --all-features
       - name: Test arrow-json with all features
         run: cargo test -p arrow-json --all-features
+      - name: Test arrow-avro with all features
+        run: cargo test -p arrow-avro --all-features
       - name: Test arrow-string with all features
         run: cargo test -p arrow-string --all-features
       - name: Test arrow-ord with all features
@@ -91,7 +94,7 @@ jobs:
       - name: Test arrow with default features
         run: cargo test -p arrow
       - name: Test arrow with all features apart from simd
-        run: cargo test -p arrow --features=force_validate,prettyprint,ipc_compression,ffi,dyn_cmp_dict,chrono-tz
+        run: cargo test -p arrow --features=force_validate,prettyprint,ipc_compression,ffi,chrono-tz
       - name: Run examples
         run: |
           # Test arrow examples
@@ -109,7 +112,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
       - name: Setup Rust toolchain
@@ -136,7 +139,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
       - name: Setup Rust toolchain
@@ -160,7 +163,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
       - name: Setup Rust toolchain
@@ -179,7 +182,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Setup Clippy
@@ -202,6 +205,8 @@ jobs:
         run: cargo clippy -p arrow-csv --all-targets --all-features -- -D warnings
       - name: Clippy arrow-json with all features
         run: cargo clippy -p arrow-json --all-targets --all-features -- -D warnings
+      - name: Clippy arrow-avro with all features
+        run: cargo clippy -p arrow-avro --all-targets --all-features -- -D warnings
       - name: Clippy arrow-string with all features
         run: cargo clippy -p arrow-string --all-targets --all-features -- -D warnings
       - name: Clippy arrow-ord with all features
@@ -211,7 +216,7 @@ jobs:
       - name: Clippy arrow-row with all features
         run: cargo clippy -p arrow-row --all-targets --all-features -- -D warnings
       - name: Clippy arrow with all features except SIMD
-        run: cargo clippy -p arrow --features=prettyprint,csv,ipc,test_utils,ffi,ipc_compression,dyn_cmp_dict,chrono-tz --all-targets -- -D warnings
+        run: cargo clippy -p arrow --features=prettyprint,csv,ipc,test_utils,ffi,ipc_compression,chrono-tz --all-targets -- -D warnings
       - name: Clippy arrow-integration-test with all features
         run: cargo clippy -p arrow-integration-test --all-targets --all-features -- -D warnings
       - name: Clippy arrow-integration-testing with all features
diff --git a/.github/workflows/arrow_flight.yml b/.github/workflows/arrow_flight.yml
index 5301a3f8563f..242e0f2a3b0d 100644
--- a/.github/workflows/arrow_flight.yml
+++ b/.github/workflows/arrow_flight.yml
@@ -47,7 +47,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
       - name: Setup Rust toolchain
@@ -68,7 +68,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Run gen
@@ -82,7 +82,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Setup Clippy
diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml
index 3fa254142dbe..64b2ca437067 100644
--- a/.github/workflows/coverage.yml
+++ b/.github/workflows/coverage.yml
@@ -36,7 +36,7 @@ jobs:
     # otherwise we get this error:
     # Failed to run tests: ASLR disable failed: EPERM: Operation not permitted
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
       - name: Setup Rust toolchain
diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index 0eb2d024f352..1447d72a53b1 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -38,7 +38,7 @@ jobs:
     name: Release Audit Tool (RAT)
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Setup Python
         uses: actions/setup-python@v4
         with:
@@ -50,8 +50,8 @@ jobs:
     name: Markdown format
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-node@v3
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
         with:
           node-version: "14"
       - name: Prettier check
diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml
index bb88e9dcd3f5..5f3d9e54c8db 100644
--- a/.github/workflows/dev_pr.yml
+++ b/.github/workflows/dev_pr.yml
@@ -37,7 +37,7 @@ jobs:
       contents: read
       pull-requests: write
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: Assign GitHub labels
         if: |
diff --git a/.github/workflows/dev_pr/labeler.yml b/.github/workflows/dev_pr/labeler.yml
index e5b86e8bcdf0..ea5873081f18 100644
--- a/.github/workflows/dev_pr/labeler.yml
+++ b/.github/workflows/dev_pr/labeler.yml
@@ -27,6 +27,7 @@ arrow:
   - arrow-integration-testing/**/*
   - arrow-ipc/**/*
   - arrow-json/**/*
+  - arrow-avro/**/*
   - arrow-ord/**/*
   - arrow-row/**/*
   - arrow-schema/**/*
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index d3f8e9046510..721260892402 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -43,7 +43,7 @@ jobs:
       env:
         RUSTDOCFLAGS: "-Dwarnings --enable-index-page -Zunstable-options"
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
       - name: Install python dev
@@ -77,7 +77,7 @@ jobs:
       contents: write
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Download crate docs
         uses: actions/download-artifact@v3
         with:
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 9b2e7797d5ff..c9cb4e31ced9 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -38,6 +38,7 @@ on:
       - arrow-integration-testing/**
       - arrow-ipc/**
       - arrow-json/**
+      - arrow-avro/**
       - arrow-ord/**
       - arrow-pyarrow-integration-testing/**
       - arrow-schema/**
@@ -56,7 +57,15 @@ jobs:
     env:
       ARROW_USE_CCACHE: OFF
       ARROW_CPP_EXE_PATH: /build/cpp/debug
+      ARROW_RUST_EXE_PATH: /build/rust/debug
       BUILD_DOCS_CPP: OFF
+      ARROW_INTEGRATION_CPP: ON
+      ARROW_INTEGRATION_CSHARP: ON
+      ARROW_INTEGRATION_GO: ON
+      ARROW_INTEGRATION_JAVA: ON
+      ARROW_INTEGRATION_JS: ON
+      # https://github.com/apache/arrow/pull/38403/files#r1371281630
+      ARCHERY_INTEGRATION_WITH_RUST: ON
       # These are necessary because the github runner overrides $HOME
       # https://github.com/actions/runner/issues/863
       RUSTUP_HOME: /root/.rustup
@@ -76,48 +85,20 @@ jobs:
       - name: Check cmake
         run: which cmake
       - name: Checkout Arrow
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           repository: apache/arrow
           submodules: true
           fetch-depth: 0
       - name: Checkout Arrow Rust
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           path: rust
           fetch-depth: 0
-      - name: Make build directory
-        run: mkdir /build
-      - name: Build Rust
-        run: conda run --no-capture-output ci/scripts/rust_build.sh $PWD /build
-      - name: Build C++
-        run: conda run --no-capture-output ci/scripts/cpp_build.sh $PWD /build
-      - name: Build C#
-        run: conda run --no-capture-output ci/scripts/csharp_build.sh $PWD /build
-      - name: Build Go
-        run: conda run --no-capture-output ci/scripts/go_build.sh $PWD
-      - name: Build Java
-        run: conda run --no-capture-output ci/scripts/java_build.sh $PWD /build
-      - name: Build JS
-        run: conda run --no-capture-output ci/scripts/js_build.sh $PWD /build
-      - name: Install archery
-        run: conda run --no-capture-output pip install -e dev/archery
-      - name: Run integration tests
-        run: |
-          conda run --no-capture-output archery integration \
-            --run-flight \
-            --with-cpp=1 \
-            --with-csharp=1 \
-            --with-java=1 \
-            --with-js=1 \
-            --with-go=1 \
-            --with-rust=1 \
-            --gold-dirs=testing/data/arrow-ipc-stream/integration/0.14.1 \
-            --gold-dirs=testing/data/arrow-ipc-stream/integration/0.17.1 \
-            --gold-dirs=testing/data/arrow-ipc-stream/integration/1.0.0-bigendian \
-            --gold-dirs=testing/data/arrow-ipc-stream/integration/1.0.0-littleendian \
-            --gold-dirs=testing/data/arrow-ipc-stream/integration/2.0.0-compression \
-            --gold-dirs=testing/data/arrow-ipc-stream/integration/4.0.0-shareddict
+      - name: Build
+        run: conda run --no-capture-output ci/scripts/integration_arrow_build.sh $PWD /build
+      - name: Run
+        run: conda run --no-capture-output ci/scripts/integration_arrow.sh $PWD /build
 
   # test FFI against the C-Data interface exposed by pyarrow
   pyarrow-integration-test:
@@ -126,8 +107,10 @@ jobs:
     strategy:
       matrix:
         rust: [ stable ]
+        # PyArrow 13 was the last version prior to introduction to Arrow PyCapsules
+        pyarrow: [ "13", "14" ]
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
       - name: Setup Rust toolchain
@@ -148,14 +131,14 @@ jobs:
           key: ${{ runner.os }}-${{ matrix.arch }}-target-maturin-cache-${{ matrix.rust }}-
       - uses: actions/setup-python@v4
         with:
-          python-version: '3.7'
+          python-version: '3.8'
       - name: Upgrade pip and setuptools
         run: pip install --upgrade pip setuptools wheel virtualenv
       - name: Create virtualenv and install dependencies
         run: |
           virtualenv venv
           source venv/bin/activate
-          pip install maturin toml pytest pytz pyarrow>=5.0
+          pip install maturin toml pytest pytz pyarrow==${{ matrix.pyarrow }}
       - name: Run Rust tests
         run: |
           source venv/bin/activate
diff --git a/.github/workflows/miri.yaml b/.github/workflows/miri.yaml
index 0c1f8069cd40..19b432121b6f 100644
--- a/.github/workflows/miri.yaml
+++ b/.github/workflows/miri.yaml
@@ -36,6 +36,7 @@ on:
       - arrow-data/**
       - arrow-ipc/**
       - arrow-json/**
+      - arrow-avro/**
       - arrow-schema/**
       - arrow-select/**
       - arrow-string/**
@@ -46,7 +47,7 @@ jobs:
     name: MIRI
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
       - name: Setup Rust toolchain
diff --git a/.github/workflows/object_store.yml b/.github/workflows/object_store.yml
index 01e14022e122..1b991e33c097 100644
--- a/.github/workflows/object_store.yml
+++ b/.github/workflows/object_store.yml
@@ -43,7 +43,7 @@ jobs:
       run:
         working-directory: object_store
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Setup Clippy
@@ -60,6 +60,8 @@ jobs:
         run: cargo clippy --features gcp -- -D warnings
       - name: Run clippy with azure feature
         run: cargo clippy --features azure -- -D warnings
+      - name: Run clippy with http feature
+        run: cargo clippy --features http -- -D warnings
       - name: Run clippy with all features
         run: cargo clippy --all-features -- -D warnings
       - name: Run clippy with all features and all targets
@@ -79,7 +81,7 @@ jobs:
     env:
       RUSTDOCFLAGS: "-Dwarnings"
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Run cargo doc
         run: cargo doc --document-private-items --no-deps --workspace --all-features
 
@@ -115,7 +117,7 @@ jobs:
       GOOGLE_SERVICE_ACCOUNT: "/tmp/gcs.json"
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: Configure Fake GCS Server (GCP emulation)
         # Custom image - see fsouza/fake-gcs-server#1164
@@ -124,7 +126,7 @@ jobs:
           # Give the container a moment to start up prior to configuring it
           sleep 1
           curl -v -X POST --data-binary '{"name":"test-bucket"}' -H "Content-Type: application/json" "http://localhost:4443/storage/v1/b"
-          echo '{"gcs_base_url": "http://localhost:4443", "disable_oauth": true, "client_email": "", "private_key": ""}' > "$GOOGLE_SERVICE_ACCOUNT"
+          echo '{"gcs_base_url": "http://localhost:4443", "disable_oauth": true, "client_email": "", "private_key": "", "private_key_id": ""}' > "$GOOGLE_SERVICE_ACCOUNT"
 
       - name: Setup WebDav
         run: docker run -d -p 8080:80 rclone/rclone serve webdav /data --addr :80
@@ -160,7 +162,7 @@ jobs:
       run:
         working-directory: object_store
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
       - name: Setup Rust toolchain
diff --git a/.github/workflows/parquet.yml b/.github/workflows/parquet.yml
index 55599b776c32..d664a0dc0730 100644
--- a/.github/workflows/parquet.yml
+++ b/.github/workflows/parquet.yml
@@ -40,6 +40,7 @@ on:
       - arrow-ipc/**
       - arrow-csv/**
       - arrow-json/**
+      - arrow-avro/**
       - parquet/**
       - .github/**
 
@@ -51,7 +52,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
       - name: Setup Rust toolchain
@@ -74,7 +75,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
       - name: Setup Rust toolchain
@@ -116,17 +117,19 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
           target: wasm32-unknown-unknown,wasm32-wasi
+      - name: Install clang # Needed for zlib compilation
+        run: apt-get update && apt-get install -y clang gcc-multilib
       - name: Build wasm32-unknown-unknown
-        run: cargo build -p parquet --no-default-features --features cli,snap,flate2,brotli --target wasm32-unknown-unknown
+        run: cargo build -p parquet --target wasm32-unknown-unknown
       - name: Build wasm32-wasi
-        run: cargo build -p parquet --no-default-features --features cli,snap,flate2,brotli --target wasm32-wasi
+        run: cargo build -p parquet --target wasm32-wasi
 
   pyspark-integration-test:
     name: PySpark Integration Test
@@ -135,7 +138,7 @@ jobs:
       matrix:
         rust: [ stable ]
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Setup Python
         uses: actions/setup-python@v4
         with:
@@ -168,7 +171,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Setup Clippy
diff --git a/.github/workflows/parquet_derive.yml b/.github/workflows/parquet_derive.yml
index 72b90ecfd81a..d8b02f73a8aa 100644
--- a/.github/workflows/parquet_derive.yml
+++ b/.github/workflows/parquet_derive.yml
@@ -43,7 +43,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
       - name: Setup Rust toolchain
@@ -57,7 +57,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Setup Clippy
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index f198f48dfec5..9c4b28b691b7 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -37,7 +37,7 @@ jobs:
     name: Test on Mac
     runs-on: macos-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
       - name: Install protoc with brew
@@ -60,7 +60,7 @@ jobs:
     name: Test on Windows
     runs-on: windows-latest
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
         with:
           submodules: true
       - name: Install protobuf compiler in /d/protoc
@@ -93,7 +93,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Setup rustfmt
@@ -110,7 +110,7 @@ jobs:
     container:
       image: amd64/rust
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Install cargo-msrv
diff --git a/CHANGELOG-old.md b/CHANGELOG-old.md
index c404133f564e..336adff990bd 100644
--- a/CHANGELOG-old.md
+++ b/CHANGELOG-old.md
@@ -19,6 +19,265 @@
 
 # Historical Changelog
 
+## [48.0.0](https://github.com/apache/arrow-rs/tree/48.0.0) (2023-10-18)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/47.0.0...48.0.0)
+
+**Breaking changes:**
+
+- Evaluate null\_regex for string type in csv \(now such values will be parsed as `Null` rather than `""`\) [\#4942](https://github.com/apache/arrow-rs/pull/4942) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([haohuaijin](https://github.com/haohuaijin))
+- fix\(csv\)!: infer null for empty column. [\#4910](https://github.com/apache/arrow-rs/pull/4910) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kskalski](https://github.com/kskalski))
+- feat: log headers/trailers in flight CLI \(+ minor fixes\) [\#4898](https://github.com/apache/arrow-rs/pull/4898) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([crepererum](https://github.com/crepererum))
+- fix\(arrow-json\)!: include null fields in schema inference with a type of Null [\#4894](https://github.com/apache/arrow-rs/pull/4894) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kskalski](https://github.com/kskalski))
+- Mark OnCloseRowGroup Send [\#4893](https://github.com/apache/arrow-rs/pull/4893) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([devinjdangelo](https://github.com/devinjdangelo))
+- Specialize Thrift Decoding \(~40% Faster\) \(\#4891\) [\#4892](https://github.com/apache/arrow-rs/pull/4892) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Make ArrowRowGroupWriter Public and SerializedRowGroupWriter Send [\#4850](https://github.com/apache/arrow-rs/pull/4850) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([devinjdangelo](https://github.com/devinjdangelo))
+
+**Implemented enhancements:**
+
+- Allow schema fields to merge with `Null` datatype [\#4901](https://github.com/apache/arrow-rs/issues/4901) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add option to FlightDataEncoder to always send dictionaries [\#4895](https://github.com/apache/arrow-rs/issues/4895) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Rework Thrift Encoding / Decoding of Parquet Metadata [\#4891](https://github.com/apache/arrow-rs/issues/4891) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Plans for supporting Extension Array to support Fixed shape tensor Array [\#4890](https://github.com/apache/arrow-rs/issues/4890)
+- Implement Take for UnionArray [\#4882](https://github.com/apache/arrow-rs/issues/4882) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Check precision overflow for casting floating to decimal [\#4865](https://github.com/apache/arrow-rs/issues/4865) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Replace lexical [\#4774](https://github.com/apache/arrow-rs/issues/4774) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add read access to settings in `csv::WriterBuilder` [\#4735](https://github.com/apache/arrow-rs/issues/4735) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Improve the performance of "DictionaryValue" row encoding [\#4712](https://github.com/apache/arrow-rs/issues/4712) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+
+**Fixed bugs:**
+
+- Should we make blank values and empty string to `None` in csv? [\#4939](https://github.com/apache/arrow-rs/issues/4939) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[FlightSQL\] SubstraitPlan structure is not exported [\#4932](https://github.com/apache/arrow-rs/issues/4932) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Loading page index breaks skipping of pages with nested types [\#4921](https://github.com/apache/arrow-rs/issues/4921) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- CSV schema inference assumes `Utf8` for empty columns [\#4903](https://github.com/apache/arrow-rs/issues/4903) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- parquet: Field Ids are not read from a Parquet file without serialized arrow schema [\#4877](https://github.com/apache/arrow-rs/issues/4877) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- make\_primitive\_scalar function loses DataType Internal information [\#4851](https://github.com/apache/arrow-rs/issues/4851) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- StructBuilder doesn't handle nulls correctly for empty structs [\#4842](https://github.com/apache/arrow-rs/issues/4842) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `NullArray::is_null()` returns `false` incorrectly [\#4835](https://github.com/apache/arrow-rs/issues/4835) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- cast\_string\_to\_decimal should check precision overflow [\#4829](https://github.com/apache/arrow-rs/issues/4829) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Null fields are omitted by `infer_json_schema_from_seekable` [\#4814](https://github.com/apache/arrow-rs/issues/4814) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Closed issues:**
+
+- Support for reading JSON Array to Arrow [\#4905](https://github.com/apache/arrow-rs/issues/4905) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Merged pull requests:**
+
+- Assume Pages Delimit Records When Offset Index Loaded \(\#4921\) [\#4943](https://github.com/apache/arrow-rs/pull/4943) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Update pyo3 requirement from 0.19 to 0.20 [\#4941](https://github.com/apache/arrow-rs/pull/4941) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([crepererum](https://github.com/crepererum))
+- Add `FileWriter` schema getter [\#4940](https://github.com/apache/arrow-rs/pull/4940) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([haixuanTao](https://github.com/haixuanTao))
+- feat: support parsing for parquet writer option [\#4938](https://github.com/apache/arrow-rs/pull/4938) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([fansehep](https://github.com/fansehep))
+- Export `SubstraitPlan` structure in arrow\_flight::sql \(\#4932\) [\#4933](https://github.com/apache/arrow-rs/pull/4933) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([amartins23](https://github.com/amartins23))
+- Update zstd requirement from 0.12.0 to 0.13.0 [\#4923](https://github.com/apache/arrow-rs/pull/4923) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- feat: add method for async read bloom filter [\#4917](https://github.com/apache/arrow-rs/pull/4917) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([hengfeiyang](https://github.com/hengfeiyang))
+- Minor: Clarify rationale for `FlightDataEncoder` API, add examples [\#4916](https://github.com/apache/arrow-rs/pull/4916) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alamb](https://github.com/alamb))
+- Update regex-syntax requirement from 0.7.1 to 0.8.0 [\#4914](https://github.com/apache/arrow-rs/pull/4914) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- feat: document & streamline flight SQL CLI [\#4912](https://github.com/apache/arrow-rs/pull/4912) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([crepererum](https://github.com/crepererum))
+- Support Arbitrary JSON values in JSON Reader \(\#4905\) [\#4911](https://github.com/apache/arrow-rs/pull/4911) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Cleanup CSV WriterBuilder, Default to AutoSI Second Precision \(\#4735\) [\#4909](https://github.com/apache/arrow-rs/pull/4909) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Update proc-macro2 requirement from =1.0.68 to =1.0.69 [\#4907](https://github.com/apache/arrow-rs/pull/4907) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- chore: add csv example [\#4904](https://github.com/apache/arrow-rs/pull/4904) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([fansehep](https://github.com/fansehep))
+- feat\(schema\): allow null fields to be merged with other datatypes [\#4902](https://github.com/apache/arrow-rs/pull/4902) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kskalski](https://github.com/kskalski))
+- Update proc-macro2 requirement from =1.0.67 to =1.0.68 [\#4900](https://github.com/apache/arrow-rs/pull/4900) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Add option to `FlightDataEncoder` to always resend batch dictionaries [\#4896](https://github.com/apache/arrow-rs/pull/4896) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alexwilcoxson-rel](https://github.com/alexwilcoxson-rel))
+- Fix integration tests [\#4889](https://github.com/apache/arrow-rs/pull/4889) ([tustvold](https://github.com/tustvold))
+- Support Parsing Avro File Headers [\#4888](https://github.com/apache/arrow-rs/pull/4888) ([tustvold](https://github.com/tustvold))
+- Support parquet bloom filter length [\#4885](https://github.com/apache/arrow-rs/pull/4885) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([letian-jiang](https://github.com/letian-jiang))
+- Replace lz4 with lz4\_flex Allowing Compilation for WASM [\#4884](https://github.com/apache/arrow-rs/pull/4884) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Implement Take for UnionArray [\#4883](https://github.com/apache/arrow-rs/pull/4883) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([avantgardnerio](https://github.com/avantgardnerio))
+- Update tonic-build requirement from =0.10.1 to =0.10.2 [\#4881](https://github.com/apache/arrow-rs/pull/4881) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- parquet: Read field IDs from Parquet Schema [\#4878](https://github.com/apache/arrow-rs/pull/4878) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Samrose-Ahmed](https://github.com/Samrose-Ahmed))
+- feat: improve flight CLI error handling [\#4873](https://github.com/apache/arrow-rs/pull/4873) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([crepererum](https://github.com/crepererum))
+- Support Encoding Parquet Columns in Parallel [\#4871](https://github.com/apache/arrow-rs/pull/4871) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Check precision overflow for casting floating to decimal [\#4866](https://github.com/apache/arrow-rs/pull/4866) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Make align\_buffers as public API [\#4863](https://github.com/apache/arrow-rs/pull/4863) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Enable new integration tests \(\#4828\) [\#4862](https://github.com/apache/arrow-rs/pull/4862) ([tustvold](https://github.com/tustvold))
+- Faster Serde Integration \(~80% faster\) [\#4861](https://github.com/apache/arrow-rs/pull/4861) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- fix: make\_primitive\_scalar bug [\#4852](https://github.com/apache/arrow-rs/pull/4852) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([JasonLi-cn](https://github.com/JasonLi-cn))
+- Update tonic-build requirement from =0.10.0 to =0.10.1 [\#4846](https://github.com/apache/arrow-rs/pull/4846) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Allow Constructing Non-Empty StructArray with no Fields \(\#4842\) [\#4845](https://github.com/apache/arrow-rs/pull/4845) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Refine documentation to `Array::is_null` [\#4838](https://github.com/apache/arrow-rs/pull/4838) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- fix: add missing precision overflow checking for `cast_string_to_decimal` [\#4830](https://github.com/apache/arrow-rs/pull/4830) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jonahgao](https://github.com/jonahgao))
+## [47.0.0](https://github.com/apache/arrow-rs/tree/47.0.0) (2023-09-19)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/46.0.0...47.0.0)
+
+**Breaking changes:**
+
+- Make FixedSizeBinaryArray value\_data return a reference [\#4820](https://github.com/apache/arrow-rs/issues/4820) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Update prost to v0.12.1 [\#4825](https://github.com/apache/arrow-rs/pull/4825) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([tustvold](https://github.com/tustvold))
+- feat: FixedSizeBinaryArray::value\_data return reference [\#4821](https://github.com/apache/arrow-rs/pull/4821) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([wjones127](https://github.com/wjones127))
+- Stateless Row Encoding / Don't Preserve Dictionaries in `RowConverter` \(\#4811\) [\#4819](https://github.com/apache/arrow-rs/pull/4819) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([tustvold](https://github.com/tustvold))
+- fix: entries field is non-nullable [\#4808](https://github.com/apache/arrow-rs/pull/4808) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([wjones127](https://github.com/wjones127))
+- Fix flight sql do put handling, add bind parameter support to FlightSQL cli client [\#4797](https://github.com/apache/arrow-rs/pull/4797) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([suremarc](https://github.com/suremarc))
+- Remove unused dyn\_cmp\_dict feature [\#4766](https://github.com/apache/arrow-rs/pull/4766) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Add underlying `std::io::Error` to `IoError` and add `IpcError` variant [\#4726](https://github.com/apache/arrow-rs/pull/4726) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alexandreyc](https://github.com/alexandreyc))
+
+**Implemented enhancements:**
+
+- Row Format Adapative Block Size  [\#4812](https://github.com/apache/arrow-rs/issues/4812) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Stateless Row Conversion [\#4811](https://github.com/apache/arrow-rs/issues/4811) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Add option to specify custom null values for CSV reader [\#4794](https://github.com/apache/arrow-rs/issues/4794) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- parquet::record::RowIter cannot be customized with batch\_size and defaults to 1024 [\#4782](https://github.com/apache/arrow-rs/issues/4782) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `DynScalar` abstraction \(something that makes it easy to create scalar `Datum`s\) [\#4781](https://github.com/apache/arrow-rs/issues/4781) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `Datum` is not exported as part of `arrow` \(it is only exported in `arrow_array`\) [\#4780](https://github.com/apache/arrow-rs/issues/4780) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `Scalar` is not exported as part of `arrow` \(it is only exported in `arrow_array`\) [\#4779](https://github.com/apache/arrow-rs/issues/4779) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support IntoPyArrow for impl RecordBatchReader [\#4730](https://github.com/apache/arrow-rs/issues/4730) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Datum Based String Kernels [\#4595](https://github.com/apache/arrow-rs/issues/4595) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+
+**Fixed bugs:**
+
+- MapArray::new\_from\_strings creates nullable entries field [\#4807](https://github.com/apache/arrow-rs/issues/4807) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- pyarrow module can't roundtrip tensor arrays [\#4805](https://github.com/apache/arrow-rs/issues/4805) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `concat_batches` errors with "schema mismatch" error when only metadata differs [\#4799](https://github.com/apache/arrow-rs/issues/4799) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- panic in `cmp` kernels with DictionaryArrays:  `Option::unwrap()` on a `None` value' [\#4788](https://github.com/apache/arrow-rs/issues/4788) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- stream ffi panics if schema metadata values aren't valid utf8 [\#4750](https://github.com/apache/arrow-rs/issues/4750) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Regression: Incorrect Sorting of `*ListArray` in 46.0.0 [\#4746](https://github.com/apache/arrow-rs/issues/4746) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Row is no longer comparable after reuse [\#4741](https://github.com/apache/arrow-rs/issues/4741) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- DoPut FlightSQL handler inadvertently consumes schema at start of Request\<Streaming\<FlightData\>\> [\#4658](https://github.com/apache/arrow-rs/issues/4658)
+- Return error when converting schema  [\#4752](https://github.com/apache/arrow-rs/pull/4752) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([wjones127](https://github.com/wjones127))
+- Implement PyArrowType for `Box<dyn RecordBatchReader + Send>` [\#4751](https://github.com/apache/arrow-rs/pull/4751) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([wjones127](https://github.com/wjones127))
+
+**Closed issues:**
+
+- Building arrow-rust for target wasm32-wasi falied to compile packed\_simd\_2 [\#4717](https://github.com/apache/arrow-rs/issues/4717)
+
+**Merged pull requests:**
+
+- Respect FormatOption::nulls for NullArray [\#4836](https://github.com/apache/arrow-rs/pull/4836) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Fix merge\_dictionary\_values in selection kernels [\#4833](https://github.com/apache/arrow-rs/pull/4833) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Fix like scalar null [\#4832](https://github.com/apache/arrow-rs/pull/4832) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- More chrono deprecations [\#4822](https://github.com/apache/arrow-rs/pull/4822) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+-  Adaptive Row Block Size \(\#4812\) [\#4818](https://github.com/apache/arrow-rs/pull/4818) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Update proc-macro2 requirement from =1.0.66 to =1.0.67 [\#4816](https://github.com/apache/arrow-rs/pull/4816) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Do not check schema for equality in concat\_batches [\#4815](https://github.com/apache/arrow-rs/pull/4815) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- fix: export record batch through stream [\#4806](https://github.com/apache/arrow-rs/pull/4806) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([wjones127](https://github.com/wjones127))
+- Improve CSV Reader Benchmark Coverage of Small Primitives [\#4803](https://github.com/apache/arrow-rs/pull/4803) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- csv: Add option to specify custom null values [\#4795](https://github.com/apache/arrow-rs/pull/4795) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([vrongmeal](https://github.com/vrongmeal))
+- Expand docstring and add example to `Scalar` [\#4793](https://github.com/apache/arrow-rs/pull/4793) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Re-export array crate root \(\#4780\) \(\#4779\) [\#4791](https://github.com/apache/arrow-rs/pull/4791) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Fix DictionaryArray::normalized\_keys \(\#4788\) [\#4789](https://github.com/apache/arrow-rs/pull/4789) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Allow custom tree builder for parquet::record::RowIter [\#4783](https://github.com/apache/arrow-rs/pull/4783) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([YuraKotov](https://github.com/YuraKotov))
+- Bump actions/checkout from 3 to 4 [\#4767](https://github.com/apache/arrow-rs/pull/4767) ([dependabot[bot]](https://github.com/apps/dependabot))
+- fix: avoid panic if offset index not exists. [\#4761](https://github.com/apache/arrow-rs/pull/4761) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([RinChanNOWWW](https://github.com/RinChanNOWWW))
+- Relax constraints on PyArrowType [\#4757](https://github.com/apache/arrow-rs/pull/4757) ([tustvold](https://github.com/tustvold))
+- Chrono deprecations [\#4748](https://github.com/apache/arrow-rs/pull/4748) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Fix List Sorting, Revert Removal of Rank Kernels [\#4747](https://github.com/apache/arrow-rs/pull/4747) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Clear row buffer before reuse [\#4742](https://github.com/apache/arrow-rs/pull/4742) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([yjshen](https://github.com/yjshen))
+- Datum based like kernels \(\#4595\) [\#4732](https://github.com/apache/arrow-rs/pull/4732) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([tustvold](https://github.com/tustvold))
+- feat: expose DoGet response headers & trailers [\#4727](https://github.com/apache/arrow-rs/pull/4727) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([crepererum](https://github.com/crepererum))
+- Cleanup length and bit\_length kernels [\#4718](https://github.com/apache/arrow-rs/pull/4718) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+## [46.0.0](https://github.com/apache/arrow-rs/tree/46.0.0) (2023-08-21)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/45.0.0...46.0.0)
+
+**Breaking changes:**
+
+- API improvement: `batches_to_flight_data` forces clone [\#4656](https://github.com/apache/arrow-rs/issues/4656) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add AnyDictionary Abstraction and Take ArrayRef in DictionaryArray::with\_values [\#4707](https://github.com/apache/arrow-rs/pull/4707) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Cleanup parquet type builders [\#4706](https://github.com/apache/arrow-rs/pull/4706) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Take kernel dyn Array [\#4705](https://github.com/apache/arrow-rs/pull/4705) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Improve ergonomics of Scalar [\#4704](https://github.com/apache/arrow-rs/pull/4704) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Datum based comparison kernels \(\#4596\) [\#4701](https://github.com/apache/arrow-rs/pull/4701) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([tustvold](https://github.com/tustvold))
+- Improve `Array` Logical Nullability [\#4691](https://github.com/apache/arrow-rs/pull/4691) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Validate ArrayData Buffer Alignment and Automatically Align IPC buffers \(\#4255\) [\#4681](https://github.com/apache/arrow-rs/pull/4681) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- More intuitive bool-to-string casting [\#4666](https://github.com/apache/arrow-rs/pull/4666) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([fsdvh](https://github.com/fsdvh))
+- enhancement: batches\_to\_flight\_data use a schema ref as param. [\#4665](https://github.com/apache/arrow-rs/pull/4665) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([jackwener](https://github.com/jackwener))
+- fix: from\_thrift avoid panic when stats in invalid. [\#4642](https://github.com/apache/arrow-rs/pull/4642) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jackwener](https://github.com/jackwener))
+- bug: Add some missing field in row group metadata: ordinal, total co… [\#4636](https://github.com/apache/arrow-rs/pull/4636) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([liurenjie1024](https://github.com/liurenjie1024))
+- Remove deprecated limit kernel [\#4597](https://github.com/apache/arrow-rs/pull/4597) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+
+**Implemented enhancements:**
+
+- parquet: support setting the field\_id with an ArrowWriter [\#4702](https://github.com/apache/arrow-rs/issues/4702) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Support references in i256 arithmetic ops [\#4694](https://github.com/apache/arrow-rs/issues/4694) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Precision-Loss Decimal Arithmetic [\#4664](https://github.com/apache/arrow-rs/issues/4664) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Faster i256 Division [\#4663](https://github.com/apache/arrow-rs/issues/4663) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support `concat_batches` for 0 columns [\#4661](https://github.com/apache/arrow-rs/issues/4661) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `filter_record_batch` should support filtering record batch without columns [\#4647](https://github.com/apache/arrow-rs/issues/4647) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Improve speed of `lexicographical_partition_ranges` [\#4614](https://github.com/apache/arrow-rs/issues/4614) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- object\_store: multipart ranges for HTTP [\#4612](https://github.com/apache/arrow-rs/issues/4612)
+- Add Rank Function [\#4606](https://github.com/apache/arrow-rs/issues/4606) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Datum Based Comparison Kernels [\#4596](https://github.com/apache/arrow-rs/issues/4596) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Convenience method to create `DataType::List` correctly [\#4544](https://github.com/apache/arrow-rs/issues/4544) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Remove Deprecated Arithmetic Kernels [\#4481](https://github.com/apache/arrow-rs/issues/4481) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Equality kernel where null==null gives true  [\#4438](https://github.com/apache/arrow-rs/issues/4438) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Fixed bugs:**
+
+- Parquet ArrowWriter Ignores Nulls in Dictionary Values [\#4690](https://github.com/apache/arrow-rs/issues/4690) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Schema Nullability Validation Fails to Account for Dictionary Nulls [\#4689](https://github.com/apache/arrow-rs/issues/4689) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Comparison Kernels Ignore Nulls in Dictionary Values [\#4688](https://github.com/apache/arrow-rs/issues/4688) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Casting List to String Ignores Format Options [\#4669](https://github.com/apache/arrow-rs/issues/4669) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Double free in C Stream Interface [\#4659](https://github.com/apache/arrow-rs/issues/4659) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- CI Failing On Packed SIMD [\#4651](https://github.com/apache/arrow-rs/issues/4651) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `RowInterner::size()` much too low for high cardinality dictionary columns [\#4645](https://github.com/apache/arrow-rs/issues/4645) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Decimal PrimitiveArray change datatype after try\_unary  [\#4644](https://github.com/apache/arrow-rs/issues/4644)
+- Better explanation in docs for Dictionary field encoding using RowConverter [\#4639](https://github.com/apache/arrow-rs/issues/4639) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `List(FixedSizeBinary)` array equality check may return wrong result [\#4637](https://github.com/apache/arrow-rs/issues/4637) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `arrow::compute::nullif` panics if `NullArray` is provided [\#4634](https://github.com/apache/arrow-rs/issues/4634) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Empty lists in FixedSizeListArray::try\_new is not handled [\#4623](https://github.com/apache/arrow-rs/issues/4623) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Bounds checking in `MutableBuffer::set_null_bits` can be bypassed [\#4620](https://github.com/apache/arrow-rs/issues/4620) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- TypedDictionaryArray Misleading Null Behaviour [\#4616](https://github.com/apache/arrow-rs/issues/4616) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- bug: Parquet writer missing row group metadata fields such as `compressed_size`, `file offset`. [\#4610](https://github.com/apache/arrow-rs/issues/4610) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `new_null_array` generates an invalid union array [\#4600](https://github.com/apache/arrow-rs/issues/4600) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Footer parsing fails for very large parquet file. [\#4592](https://github.com/apache/arrow-rs/issues/4592) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- bug\(parquet\): Disabling global statistics but enabling for particular column breaks reading [\#4587](https://github.com/apache/arrow-rs/issues/4587) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- `arrow::compute::concat` panics for dense union arrays with non-trivial type IDs [\#4578](https://github.com/apache/arrow-rs/issues/4578) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Closed issues:**
+
+- \[object\_store\] when Create a AmazonS3 instance  work with MinIO without set endpoint got error MissingRegion [\#4617](https://github.com/apache/arrow-rs/issues/4617)
+
+**Merged pull requests:**
+
+- Add distinct kernels \(\#960\) \(\#4438\) [\#4716](https://github.com/apache/arrow-rs/pull/4716) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Update parquet object\_store 0.7 [\#4715](https://github.com/apache/arrow-rs/pull/4715) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Support Field ID in ArrowWriter \(\#4702\) [\#4710](https://github.com/apache/arrow-rs/pull/4710) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Remove rank kernels [\#4703](https://github.com/apache/arrow-rs/pull/4703) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Support references in i256 arithmetic ops [\#4692](https://github.com/apache/arrow-rs/pull/4692) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Cleanup DynComparator \(\#2654\) [\#4687](https://github.com/apache/arrow-rs/pull/4687) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Separate metadata fetch from `ArrowReaderBuilder` construction \(\#4674\) [\#4676](https://github.com/apache/arrow-rs/pull/4676) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- cleanup some assert\(\) with error propagation [\#4673](https://github.com/apache/arrow-rs/pull/4673) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zeevm](https://github.com/zeevm))
+- Faster i256 Division \(2-100x\) \(\#4663\) [\#4672](https://github.com/apache/arrow-rs/pull/4672) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Fix MSRV CI [\#4671](https://github.com/apache/arrow-rs/pull/4671) ([tustvold](https://github.com/tustvold))
+- Fix equality of nested nullable FixedSizeBinary \(\#4637\) [\#4670](https://github.com/apache/arrow-rs/pull/4670) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Use ArrayFormatter in cast kernel [\#4668](https://github.com/apache/arrow-rs/pull/4668) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Minor: Improve API docs for FlightSQL metadata builders [\#4667](https://github.com/apache/arrow-rs/pull/4667) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alamb](https://github.com/alamb))
+- Support `concat_batches` for 0 columns [\#4662](https://github.com/apache/arrow-rs/pull/4662) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- fix ownership of c stream error [\#4660](https://github.com/apache/arrow-rs/pull/4660) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([wjones127](https://github.com/wjones127))
+- Minor: Fix illustration for dict encoding [\#4657](https://github.com/apache/arrow-rs/pull/4657) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([JayjeetAtGithub](https://github.com/JayjeetAtGithub))
+- minor: move comment to the correct location [\#4655](https://github.com/apache/arrow-rs/pull/4655) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jackwener](https://github.com/jackwener))
+- Update packed\_simd and run miri tests on simd code [\#4654](https://github.com/apache/arrow-rs/pull/4654) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- impl `From<Vec<T>>` for `BufferBuilder` and `MutableBuffer` [\#4650](https://github.com/apache/arrow-rs/pull/4650) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Filter record batch with 0 columns [\#4648](https://github.com/apache/arrow-rs/pull/4648) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Account for child `Bucket` size in OrderPreservingInterner [\#4646](https://github.com/apache/arrow-rs/pull/4646) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Implement `Default`,`Extend` and `FromIterator` for `BufferBuilder` [\#4638](https://github.com/apache/arrow-rs/pull/4638) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- fix\(select\): handle `NullArray` in `nullif` [\#4635](https://github.com/apache/arrow-rs/pull/4635) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kawadakk](https://github.com/kawadakk))
+- Move `BufferBuilder` to `arrow-buffer` [\#4630](https://github.com/apache/arrow-rs/pull/4630) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- allow zero sized empty fixed [\#4626](https://github.com/apache/arrow-rs/pull/4626) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([smiklos](https://github.com/smiklos))
+- fix: compute\_dictionary\_mapping use wrong offsetSize [\#4625](https://github.com/apache/arrow-rs/pull/4625) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jackwener](https://github.com/jackwener))
+- impl `FromIterator` for `MutableBuffer` [\#4624](https://github.com/apache/arrow-rs/pull/4624) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- expand docs for FixedSizeListArray [\#4622](https://github.com/apache/arrow-rs/pull/4622) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([smiklos](https://github.com/smiklos))
+- fix\(buffer\): panic on end index overflow in `MutableBuffer::set_null_bits` [\#4621](https://github.com/apache/arrow-rs/pull/4621) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kawadakk](https://github.com/kawadakk))
+- impl `Default` for `arrow_buffer::buffer::MutableBuffer` [\#4619](https://github.com/apache/arrow-rs/pull/4619) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
+- Minor: improve docs and add example for lexicographical\_partition\_ranges [\#4615](https://github.com/apache/arrow-rs/pull/4615) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Cleanup sort [\#4613](https://github.com/apache/arrow-rs/pull/4613) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Add rank function \(\#4606\) [\#4609](https://github.com/apache/arrow-rs/pull/4609) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Add more docs and examples for ListArray and OffsetsBuffer [\#4607](https://github.com/apache/arrow-rs/pull/4607) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Simplify dictionary sort [\#4605](https://github.com/apache/arrow-rs/pull/4605) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Consolidate sort benchmarks [\#4604](https://github.com/apache/arrow-rs/pull/4604) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Don't Reorder Nulls in sort\_to\_indices \(\#4545\) [\#4603](https://github.com/apache/arrow-rs/pull/4603) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- fix\(data\): create child arrays of correct length when building a sparse union null array [\#4601](https://github.com/apache/arrow-rs/pull/4601) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kawadakk](https://github.com/kawadakk))
+- Use u32 metadata\_len when parsing footer of parquet. [\#4599](https://github.com/apache/arrow-rs/pull/4599) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Berrysoft](https://github.com/Berrysoft))
+- fix\(data\): map type ID to child index before indexing a union child array [\#4598](https://github.com/apache/arrow-rs/pull/4598) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kawadakk](https://github.com/kawadakk))
+- Remove deprecated arithmetic kernels \(\#4481\) [\#4594](https://github.com/apache/arrow-rs/pull/4594) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Test Disabled Page Statistics \(\#4587\) [\#4589](https://github.com/apache/arrow-rs/pull/4589) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Cleanup ArrayData::buffers [\#4583](https://github.com/apache/arrow-rs/pull/4583) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Use contains\_nulls in ArrayData equality of byte arrays [\#4582](https://github.com/apache/arrow-rs/pull/4582) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Vectorized lexicographical\_partition\_ranges \(~80% faster\) [\#4575](https://github.com/apache/arrow-rs/pull/4575) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- chore: add datatype new\_list [\#4561](https://github.com/apache/arrow-rs/pull/4561) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([fansehep](https://github.com/fansehep))
 ## [45.0.0](https://github.com/apache/arrow-rs/tree/45.0.0) (2023-07-30)
 
 [Full Changelog](https://github.com/apache/arrow-rs/compare/44.0.0...45.0.0)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 74f74bc3ef13..ba27d6679ffe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,116 +19,54 @@
 
 # Changelog
 
-## [46.0.0](https://github.com/apache/arrow-rs/tree/46.0.0) (2023-08-21)
+## [49.0.0](https://github.com/apache/arrow-rs/tree/49.0.0) (2023-11-07)
 
-[Full Changelog](https://github.com/apache/arrow-rs/compare/45.0.0...46.0.0)
+[Full Changelog](https://github.com/apache/arrow-rs/compare/48.0.0...49.0.0)
 
 **Breaking changes:**
 
-- API improvement: `batches_to_flight_data` forces clone [\#4656](https://github.com/apache/arrow-rs/issues/4656) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Add AnyDictionary Abstraction and Take ArrayRef in DictionaryArray::with\_values [\#4707](https://github.com/apache/arrow-rs/pull/4707) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Cleanup parquet type builders [\#4706](https://github.com/apache/arrow-rs/pull/4706) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Take kernel dyn Array [\#4705](https://github.com/apache/arrow-rs/pull/4705) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Improve ergonomics of Scalar [\#4704](https://github.com/apache/arrow-rs/pull/4704) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Datum based comparison kernels \(\#4596\) [\#4701](https://github.com/apache/arrow-rs/pull/4701) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([tustvold](https://github.com/tustvold))
-- Improve `Array` Logical Nullability [\#4691](https://github.com/apache/arrow-rs/pull/4691) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Validate ArrayData Buffer Alignment and Automatically Align IPC buffers \(\#4255\) [\#4681](https://github.com/apache/arrow-rs/pull/4681) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- More intuitive bool-to-string casting [\#4666](https://github.com/apache/arrow-rs/pull/4666) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([fsdvh](https://github.com/fsdvh))
-- enhancement: batches\_to\_flight\_data use a schema ref as param. [\#4665](https://github.com/apache/arrow-rs/pull/4665) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([jackwener](https://github.com/jackwener))
-- fix: from\_thrift avoid panic when stats in invalid. [\#4642](https://github.com/apache/arrow-rs/pull/4642) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jackwener](https://github.com/jackwener))
-- bug: Add some missing field in row group metadata: ordinal, total co… [\#4636](https://github.com/apache/arrow-rs/pull/4636) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([liurenjie1024](https://github.com/liurenjie1024))
-- Remove deprecated limit kernel [\#4597](https://github.com/apache/arrow-rs/pull/4597) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Return row count when inferring schema from JSON [\#5008](https://github.com/apache/arrow-rs/pull/5008) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([asayers](https://github.com/asayers))
+- Update object\_store 0.8.0 [\#5043](https://github.com/apache/arrow-rs/pull/5043) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
 
 **Implemented enhancements:**
 
-- parquet: support setting the field\_id with an ArrowWriter [\#4702](https://github.com/apache/arrow-rs/issues/4702) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- Support references in i256 arithmetic ops [\#4694](https://github.com/apache/arrow-rs/issues/4694) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Precision-Loss Decimal Arithmetic [\#4664](https://github.com/apache/arrow-rs/issues/4664) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Faster i256 Division [\#4663](https://github.com/apache/arrow-rs/issues/4663) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Support `concat_batches` for 0 columns [\#4661](https://github.com/apache/arrow-rs/issues/4661) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- `filter_record_batch` should support filtering record batch without columns [\#4647](https://github.com/apache/arrow-rs/issues/4647) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Improve speed of `lexicographical_partition_ranges` [\#4614](https://github.com/apache/arrow-rs/issues/4614) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- object\_store: multipart ranges for HTTP [\#4612](https://github.com/apache/arrow-rs/issues/4612)
-- Add Rank Function [\#4606](https://github.com/apache/arrow-rs/issues/4606) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Datum Based Comparison Kernels [\#4596](https://github.com/apache/arrow-rs/issues/4596) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
-- Convenience method to create `DataType::List` correctly [\#4544](https://github.com/apache/arrow-rs/issues/4544) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Remove Deprecated Arithmetic Kernels [\#4481](https://github.com/apache/arrow-rs/issues/4481) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Equality kernel where null==null gives true  [\#4438](https://github.com/apache/arrow-rs/issues/4438) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Cast from integer/timestamp to timestamp/integer [\#5039](https://github.com/apache/arrow-rs/issues/5039) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support casting from integer to binary [\#5014](https://github.com/apache/arrow-rs/issues/5014) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Return row count when inferring schema from JSON [\#5007](https://github.com/apache/arrow-rs/issues/5007) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- \[FlightSQL\] Allow custom commands in get-flight-info [\#4996](https://github.com/apache/arrow-rs/issues/4996) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- Support `RecordBatch::remove_column()` and `Schema::remove_field()` [\#4952](https://github.com/apache/arrow-rs/issues/4952) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- `arrow_json`: support `binary` deserialization [\#4945](https://github.com/apache/arrow-rs/issues/4945) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Support StructArray in Cast Kernel [\#4908](https://github.com/apache/arrow-rs/issues/4908) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- There exists a `ParquetRecordWriter` proc macro in `parquet_derive`, but `ParquetRecordReader` is missing [\#4772](https://github.com/apache/arrow-rs/issues/4772) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
 
 **Fixed bugs:**
 
-- Parquet ArrowWriter Ignores Nulls in Dictionary Values [\#4690](https://github.com/apache/arrow-rs/issues/4690) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Schema Nullability Validation Fails to Account for Dictionary Nulls [\#4689](https://github.com/apache/arrow-rs/issues/4689) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Comparison Kernels Ignore Nulls in Dictionary Values [\#4688](https://github.com/apache/arrow-rs/issues/4688) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Casting List to String Ignores Format Options [\#4669](https://github.com/apache/arrow-rs/issues/4669) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Double free in C Stream Interface [\#4659](https://github.com/apache/arrow-rs/issues/4659) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- CI Failing On Packed SIMD [\#4651](https://github.com/apache/arrow-rs/issues/4651) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- `RowInterner::size()` much too low for high cardinality dictionary columns [\#4645](https://github.com/apache/arrow-rs/issues/4645) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Decimal PrimitiveArray change datatype after try\_unary  [\#4644](https://github.com/apache/arrow-rs/issues/4644)
-- Better explanation in docs for Dictionary field encoding using RowConverter [\#4639](https://github.com/apache/arrow-rs/issues/4639) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- `List(FixedSizeBinary)` array equality check may return wrong result [\#4637](https://github.com/apache/arrow-rs/issues/4637) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- `arrow::compute::nullif` panics if `NullArray` is provided [\#4634](https://github.com/apache/arrow-rs/issues/4634) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Empty lists in FixedSizeListArray::try\_new is not handled [\#4623](https://github.com/apache/arrow-rs/issues/4623) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Bounds checking in `MutableBuffer::set_null_bits` can be bypassed [\#4620](https://github.com/apache/arrow-rs/issues/4620) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- TypedDictionaryArray Misleading Null Behaviour [\#4616](https://github.com/apache/arrow-rs/issues/4616) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- bug: Parquet writer missing row group metadata fields such as `compressed_size`, `file offset`. [\#4610](https://github.com/apache/arrow-rs/issues/4610) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- `new_null_array` generates an invalid union array [\#4600](https://github.com/apache/arrow-rs/issues/4600) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-- Footer parsing fails for very large parquet file. [\#4592](https://github.com/apache/arrow-rs/issues/4592) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- bug\(parquet\): Disabling global statistics but enabling for particular column breaks reading [\#4587](https://github.com/apache/arrow-rs/issues/4587) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
-- `arrow::compute::concat` panics for dense union arrays with non-trivial type IDs [\#4578](https://github.com/apache/arrow-rs/issues/4578) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
-
-**Closed issues:**
-
-- \[object\_store\] when Create a AmazonS3 instance  work with MinIO without set endpoint got error MissingRegion [\#4617](https://github.com/apache/arrow-rs/issues/4617)
+- Regression when serializing large json numbers [\#5038](https://github.com/apache/arrow-rs/issues/5038) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- RowSelection::intersection Produces Invalid RowSelection [\#5036](https://github.com/apache/arrow-rs/issues/5036) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Incorrect comment on arrow::compute::kernels::sort::sort\_to\_indices [\#5029](https://github.com/apache/arrow-rs/issues/5029) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+
+**Documentation updates:**
+
+- chore: Update docs to refer to non deprecated function \(`partition`\) [\#5027](https://github.com/apache/arrow-rs/pull/5027) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
 
 **Merged pull requests:**
 
-- Add distinct kernels \(\#960\) \(\#4438\) [\#4716](https://github.com/apache/arrow-rs/pull/4716) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Update parquet object\_store 0.7 [\#4715](https://github.com/apache/arrow-rs/pull/4715) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Support Field ID in ArrowWriter \(\#4702\) [\#4710](https://github.com/apache/arrow-rs/pull/4710) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Remove rank kernels [\#4703](https://github.com/apache/arrow-rs/pull/4703) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Support references in i256 arithmetic ops [\#4692](https://github.com/apache/arrow-rs/pull/4692) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
-- Cleanup DynComparator \(\#2654\) [\#4687](https://github.com/apache/arrow-rs/pull/4687) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Separate metadata fetch from `ArrowReaderBuilder` construction \(\#4674\) [\#4676](https://github.com/apache/arrow-rs/pull/4676) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- cleanup some assert\(\) with error propagation [\#4673](https://github.com/apache/arrow-rs/pull/4673) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([zeevm](https://github.com/zeevm))
-- Faster i256 Division \(2-100x\) \(\#4663\) [\#4672](https://github.com/apache/arrow-rs/pull/4672) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Fix MSRV CI [\#4671](https://github.com/apache/arrow-rs/pull/4671) ([tustvold](https://github.com/tustvold))
-- Fix equality of nested nullable FixedSizeBinary \(\#4637\) [\#4670](https://github.com/apache/arrow-rs/pull/4670) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Use ArrayFormatter in cast kernel [\#4668](https://github.com/apache/arrow-rs/pull/4668) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Minor: Improve API docs for FlightSQL metadata builders [\#4667](https://github.com/apache/arrow-rs/pull/4667) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([alamb](https://github.com/alamb))
-- Support `concat_batches` for 0 columns [\#4662](https://github.com/apache/arrow-rs/pull/4662) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- fix ownership of c stream error [\#4660](https://github.com/apache/arrow-rs/pull/4660) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([wjones127](https://github.com/wjones127))
-- Minor: Fix illustration for dict encoding [\#4657](https://github.com/apache/arrow-rs/pull/4657) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([JayjeetAtGithub](https://github.com/JayjeetAtGithub))
-- minor: move comment to the correct location [\#4655](https://github.com/apache/arrow-rs/pull/4655) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jackwener](https://github.com/jackwener))
-- Update packed\_simd and run miri tests on simd code [\#4654](https://github.com/apache/arrow-rs/pull/4654) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- impl `From<Vec<T>>` for `BufferBuilder` and `MutableBuffer` [\#4650](https://github.com/apache/arrow-rs/pull/4650) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
-- Filter record batch with 0 columns [\#4648](https://github.com/apache/arrow-rs/pull/4648) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- Account for child `Bucket` size in OrderPreservingInterner [\#4646](https://github.com/apache/arrow-rs/pull/4646) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Implement `Default`,`Extend` and `FromIterator` for `BufferBuilder` [\#4638](https://github.com/apache/arrow-rs/pull/4638) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
-- fix\(select\): handle `NullArray` in `nullif` [\#4635](https://github.com/apache/arrow-rs/pull/4635) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kawadakk](https://github.com/kawadakk))
-- Move `BufferBuilder` to `arrow-buffer` [\#4630](https://github.com/apache/arrow-rs/pull/4630) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
-- allow zero sized empty fixed [\#4626](https://github.com/apache/arrow-rs/pull/4626) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([smiklos](https://github.com/smiklos))
-- fix: compute\_dictionary\_mapping use wrong offsetSize [\#4625](https://github.com/apache/arrow-rs/pull/4625) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jackwener](https://github.com/jackwener))
-- impl `FromIterator` for `MutableBuffer` [\#4624](https://github.com/apache/arrow-rs/pull/4624) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
-- expand docs for FixedSizeListArray [\#4622](https://github.com/apache/arrow-rs/pull/4622) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([smiklos](https://github.com/smiklos))
-- fix\(buffer\): panic on end index overflow in `MutableBuffer::set_null_bits` [\#4621](https://github.com/apache/arrow-rs/pull/4621) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kawadakk](https://github.com/kawadakk))
-- impl `Default` for `arrow_buffer::buffer::MutableBuffer` [\#4619](https://github.com/apache/arrow-rs/pull/4619) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mbrobbel](https://github.com/mbrobbel))
-- Minor: improve docs and add example for lexicographical\_partition\_ranges [\#4615](https://github.com/apache/arrow-rs/pull/4615) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Cleanup sort [\#4613](https://github.com/apache/arrow-rs/pull/4613) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Add rank function \(\#4606\) [\#4609](https://github.com/apache/arrow-rs/pull/4609) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Add more docs and examples for ListArray and OffsetsBuffer [\#4607](https://github.com/apache/arrow-rs/pull/4607) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Simplify dictionary sort [\#4605](https://github.com/apache/arrow-rs/pull/4605) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Consolidate sort benchmarks [\#4604](https://github.com/apache/arrow-rs/pull/4604) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Don't Reorder Nulls in sort\_to\_indices \(\#4545\) [\#4603](https://github.com/apache/arrow-rs/pull/4603) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- fix\(data\): create child arrays of correct length when building a sparse union null array [\#4601](https://github.com/apache/arrow-rs/pull/4601) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kawadakk](https://github.com/kawadakk))
-- Use u32 metadata\_len when parsing footer of parquet. [\#4599](https://github.com/apache/arrow-rs/pull/4599) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Berrysoft](https://github.com/Berrysoft))
-- fix\(data\): map type ID to child index before indexing a union child array [\#4598](https://github.com/apache/arrow-rs/pull/4598) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kawadakk](https://github.com/kawadakk))
-- Remove deprecated arithmetic kernels \(\#4481\) [\#4594](https://github.com/apache/arrow-rs/pull/4594) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Test Disabled Page Statistics \(\#4587\) [\#4589](https://github.com/apache/arrow-rs/pull/4589) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- Cleanup ArrayData::buffers [\#4583](https://github.com/apache/arrow-rs/pull/4583) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Use contains\_nulls in ArrayData equality of byte arrays [\#4582](https://github.com/apache/arrow-rs/pull/4582) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- Vectorized lexicographical\_partition\_ranges \(~80% faster\) [\#4575](https://github.com/apache/arrow-rs/pull/4575) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
-- chore: add datatype new\_list [\#4561](https://github.com/apache/arrow-rs/pull/4561) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([fansehep](https://github.com/fansehep))
+- Parquet f32/f64 handle signed zeros in statistics [\#5048](https://github.com/apache/arrow-rs/pull/5048) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Jefffrey](https://github.com/Jefffrey))
+- Fix serialization of large integers in JSON \(\#5038\) [\#5042](https://github.com/apache/arrow-rs/pull/5042) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Fix RowSelection::intersection \(\#5036\) [\#5041](https://github.com/apache/arrow-rs/pull/5041) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- Cast from integer/timestamp to timestamp/integer [\#5040](https://github.com/apache/arrow-rs/pull/5040) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- doc: update comment on sort\_to\_indices to reflect correct ordering [\#5033](https://github.com/apache/arrow-rs/pull/5033) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([westonpace](https://github.com/westonpace))
+- Support casting from integer to binary [\#5015](https://github.com/apache/arrow-rs/pull/5015) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([viirya](https://github.com/viirya))
+- Update tracing-log requirement from 0.1 to 0.2 [\#4998](https://github.com/apache/arrow-rs/pull/4998) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- feat\(flight-sql\): Allow custom commands in get-flight-info [\#4997](https://github.com/apache/arrow-rs/pull/4997) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([amartins23](https://github.com/amartins23))
+- \[MINOR\] No need to jump to web pages [\#4994](https://github.com/apache/arrow-rs/pull/4994) ([smallzhongfeng](https://github.com/smallzhongfeng))
+- Support metadata in SchemaBuilder [\#4987](https://github.com/apache/arrow-rs/pull/4987) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- feat: support schema change by idx and reverse [\#4985](https://github.com/apache/arrow-rs/pull/4985) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([fansehep](https://github.com/fansehep))
+- Bump actions/setup-node from 3 to 4 [\#4982](https://github.com/apache/arrow-rs/pull/4982) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Add arrow\_cast::base64 and document usage in arrow\_json [\#4975](https://github.com/apache/arrow-rs/pull/4975) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Add SchemaBuilder::remove \(\#4952\) [\#4964](https://github.com/apache/arrow-rs/pull/4964) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tustvold](https://github.com/tustvold))
+- Add `Field::remove()`, `Schema::remove()`, and `RecordBatch::remove_column()` APIs [\#4959](https://github.com/apache/arrow-rs/pull/4959) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Folyd](https://github.com/Folyd))
+- Add `RecordReader` trait and proc macro to implement it for a struct [\#4773](https://github.com/apache/arrow-rs/pull/4773) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Joseph-Rance](https://github.com/Joseph-Rance))
 
 
 
diff --git a/Cargo.toml b/Cargo.toml
index b118c937ca36..d5e834316b91 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,6 +21,7 @@ members = [
     "arrow",
     "arrow-arith",
     "arrow-array",
+    "arrow-avro",
     "arrow-buffer",
     "arrow-cast",
     "arrow-csv",
@@ -61,7 +62,7 @@ exclude = [
 ]
 
 [workspace.package]
-version = "46.0.0"
+version = "49.0.0"
 homepage = "https://github.com/apache/arrow-rs"
 repository = "https://github.com/apache/arrow-rs"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
@@ -76,18 +77,20 @@ edition = "2021"
 rust-version = "1.62"
 
 [workspace.dependencies]
-arrow = { version = "46.0.0", path = "./arrow", default-features = false }
-arrow-arith = { version = "46.0.0", path = "./arrow-arith" }
-arrow-array = { version = "46.0.0", path = "./arrow-array" }
-arrow-buffer = { version = "46.0.0", path = "./arrow-buffer" }
-arrow-cast = { version = "46.0.0", path = "./arrow-cast" }
-arrow-csv = { version = "46.0.0", path = "./arrow-csv" }
-arrow-data = { version = "46.0.0", path = "./arrow-data" }
-arrow-ipc = { version = "46.0.0", path = "./arrow-ipc" }
-arrow-json = { version = "46.0.0", path = "./arrow-json" }
-arrow-ord = { version = "46.0.0", path = "./arrow-ord" }
-arrow-row = { version = "46.0.0", path = "./arrow-row" }
-arrow-schema = { version = "46.0.0", path = "./arrow-schema" }
-arrow-select = { version = "46.0.0", path = "./arrow-select" }
-arrow-string = { version = "46.0.0", path = "./arrow-string" }
-parquet = { version = "46.0.0", path = "./parquet", default-features = false }
+arrow = { version = "49.0.0", path = "./arrow", default-features = false }
+arrow-arith = { version = "49.0.0", path = "./arrow-arith" }
+arrow-array = { version = "49.0.0", path = "./arrow-array" }
+arrow-buffer = { version = "49.0.0", path = "./arrow-buffer" }
+arrow-cast = { version = "49.0.0", path = "./arrow-cast" }
+arrow-csv = { version = "49.0.0", path = "./arrow-csv" }
+arrow-data = { version = "49.0.0", path = "./arrow-data" }
+arrow-ipc = { version = "49.0.0", path = "./arrow-ipc" }
+arrow-json = { version = "49.0.0", path = "./arrow-json" }
+arrow-ord = { version = "49.0.0", path = "./arrow-ord" }
+arrow-row = { version = "49.0.0", path = "./arrow-row" }
+arrow-schema = { version = "49.0.0", path = "./arrow-schema" }
+arrow-select = { version = "49.0.0", path = "./arrow-select" }
+arrow-string = { version = "49.0.0", path = "./arrow-string" }
+parquet = { version = "49.0.0", path = "./parquet", default-features = false }
+
+chrono = { version = "0.4.31", default-features = false, features = ["clock"] }
diff --git a/README.md b/README.md
index c3108917e87a..8cd3ec970b53 100644
--- a/README.md
+++ b/README.md
@@ -74,6 +74,6 @@ There is more information in the [contributing] guide.
 [flight-readme]: arrow-flight/README.md
 [datafusion-readme]: https://github.com/apache/arrow-datafusion/blob/master/README.md
 [ballista-readme]: https://github.com/apache/arrow-ballista/blob/master/README.md
-[objectstore-readme]: https://github.com/apache/arrow-rs/blob/master/object_store/README.md
+[objectstore-readme]: object_store/README.md
 [issues]: https://github.com/apache/arrow-rs/issues
 [discussions]: https://github.com/apache/arrow-rs/discussions
diff --git a/arrow-arith/Cargo.toml b/arrow-arith/Cargo.toml
index b5ea2e3c4354..57dc033e9645 100644
--- a/arrow-arith/Cargo.toml
+++ b/arrow-arith/Cargo.toml
@@ -38,7 +38,7 @@ arrow-array = { workspace = true }
 arrow-buffer = { workspace = true }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
-chrono = { version = "0.4.23", default-features = false }
+chrono = { workspace = true }
 half = { version = "2.1", default-features = false }
 num = { version = "0.4", default-features = false, features = ["std"] }
 
diff --git a/arrow-arith/src/aggregate.rs b/arrow-arith/src/aggregate.rs
index 04417c666c85..0dabaa50f5f6 100644
--- a/arrow-arith/src/aggregate.rs
+++ b/arrow-arith/src/aggregate.rs
@@ -207,15 +207,15 @@ where
             }
 
             let iter = ArrayIter::new(array);
-            let sum =
-                iter.into_iter()
-                    .try_fold(T::default_value(), |accumulator, value| {
-                        if let Some(value) = value {
-                            accumulator.add_checked(value)
-                        } else {
-                            Ok(accumulator)
-                        }
-                    })?;
+            let sum = iter
+                .into_iter()
+                .try_fold(T::default_value(), |accumulator, value| {
+                    if let Some(value) = value {
+                        accumulator.add_checked(value)
+                    } else {
+                        Ok(accumulator)
+                    }
+                })?;
 
             Ok(Some(sum))
         }
@@ -230,11 +230,7 @@ where
     T: ArrowNumericType,
     T::Native: ArrowNativeType,
 {
-    min_max_array_helper::<T, A, _, _>(
-        array,
-        |a, b| (is_nan(*a) & !is_nan(*b)) || a > b,
-        min,
-    )
+    min_max_array_helper::<T, A, _, _>(array, |a, b| (is_nan(*a) & !is_nan(*b)) || a > b, min)
 }
 
 /// Returns the max of values in the array of `ArrowNumericType` type, or dictionary
@@ -244,11 +240,7 @@ where
     T: ArrowNumericType,
     T::Native: ArrowNativeType,
 {
-    min_max_array_helper::<T, A, _, _>(
-        array,
-        |a, b| (!is_nan(*a) & is_nan(*b)) || a < b,
-        max,
-    )
+    min_max_array_helper::<T, A, _, _>(array, |a, b| (!is_nan(*a) & is_nan(*b)) || a < b, max)
 }
 
 fn min_max_array_helper<T, A: ArrayAccessor<Item = T::Native>, F, M>(
@@ -501,10 +493,7 @@ mod simd {
         fn init_accumulator_chunk() -> Self::SimdAccumulator;
 
         /// Updates the accumulator with the values of one chunk
-        fn accumulate_chunk_non_null(
-            accumulator: &mut Self::SimdAccumulator,
-            chunk: T::Simd,
-        );
+        fn accumulate_chunk_non_null(accumulator: &mut Self::SimdAccumulator, chunk: T::Simd);
 
         /// Updates the accumulator with the values of one chunk according to the given vector mask
         fn accumulate_chunk_nullable(
@@ -602,10 +591,7 @@ mod simd {
             (T::init(T::default_value()), T::mask_init(false))
         }
 
-        fn accumulate_chunk_non_null(
-            accumulator: &mut Self::SimdAccumulator,
-            chunk: T::Simd,
-        ) {
+        fn accumulate_chunk_non_null(accumulator: &mut Self::SimdAccumulator, chunk: T::Simd) {
             let acc_is_nan = !T::eq(accumulator.0, accumulator.0);
             let is_lt = acc_is_nan | T::lt(chunk, accumulator.0);
             let first_or_lt = !accumulator.1 | is_lt;
@@ -627,10 +613,7 @@ mod simd {
             accumulator.1 |= vecmask;
         }
 
-        fn accumulate_scalar(
-            accumulator: &mut Self::ScalarAccumulator,
-            value: T::Native,
-        ) {
+        fn accumulate_scalar(accumulator: &mut Self::ScalarAccumulator, value: T::Native) {
             if !accumulator.1 {
                 accumulator.0 = value;
             } else {
@@ -690,10 +673,7 @@ mod simd {
             (T::init(T::default_value()), T::mask_init(false))
         }
 
-        fn accumulate_chunk_non_null(
-            accumulator: &mut Self::SimdAccumulator,
-            chunk: T::Simd,
-        ) {
+        fn accumulate_chunk_non_null(accumulator: &mut Self::SimdAccumulator, chunk: T::Simd) {
             let chunk_is_nan = !T::eq(chunk, chunk);
             let is_gt = chunk_is_nan | T::gt(chunk, accumulator.0);
             let first_or_gt = !accumulator.1 | is_gt;
@@ -715,10 +695,7 @@ mod simd {
             accumulator.1 |= vecmask;
         }
 
-        fn accumulate_scalar(
-            accumulator: &mut Self::ScalarAccumulator,
-            value: T::Native,
-        ) {
+        fn accumulate_scalar(accumulator: &mut Self::ScalarAccumulator, value: T::Native) {
             if !accumulator.1 {
                 accumulator.0 = value;
             } else {
@@ -1009,8 +986,7 @@ mod tests {
 
     #[test]
     fn test_primitive_array_bool_or_with_nulls() {
-        let a =
-            BooleanArray::from(vec![None, Some(false), Some(false), None, Some(false)]);
+        let a = BooleanArray::from(vec![None, Some(false), Some(false), None, Some(false)]);
         assert!(!bool_or(&a).unwrap());
     }
 
@@ -1297,8 +1273,7 @@ mod tests {
         assert_eq!(Some(false), min_boolean(&a));
         assert_eq!(Some(true), max_boolean(&a));
 
-        let a =
-            BooleanArray::from(vec![Some(false), Some(true), None, Some(false), None]);
+        let a = BooleanArray::from(vec![Some(false), Some(true), None, Some(false), None]);
         assert_eq!(Some(false), min_boolean(&a));
         assert_eq!(Some(true), max_boolean(&a));
     }
diff --git a/arrow-arith/src/arithmetic.rs b/arrow-arith/src/arithmetic.rs
index 8635ce0ddd80..124614d77f97 100644
--- a/arrow-arith/src/arithmetic.rs
+++ b/arrow-arith/src/arithmetic.rs
@@ -48,8 +48,7 @@ fn get_fixed_point_info(
         )));
     }
 
-    let divisor =
-        i256::from_i128(10).pow_wrapping((product_scale - required_scale) as u32);
+    let divisor = i256::from_i128(10).pow_wrapping((product_scale - required_scale) as u32);
 
     Ok((precision, product_scale, divisor))
 }
@@ -78,8 +77,7 @@ pub fn multiply_fixed_point_dyn(
             let left = left.as_any().downcast_ref::<Decimal128Array>().unwrap();
             let right = right.as_any().downcast_ref::<Decimal128Array>().unwrap();
 
-            multiply_fixed_point(left, right, required_scale)
-                .map(|a| Arc::new(a) as ArrayRef)
+            multiply_fixed_point(left, right, required_scale).map(|a| Arc::new(a) as ArrayRef)
         }
         (_, _) => Err(ArrowError::CastError(format!(
             "Unsupported data type {}, {}",
@@ -113,10 +111,8 @@ pub fn multiply_fixed_point_checked(
     )?;
 
     if required_scale == product_scale {
-        return try_binary::<_, _, _, Decimal128Type>(left, right, |a, b| {
-            a.mul_checked(b)
-        })?
-        .with_precision_and_scale(precision, required_scale);
+        return try_binary::<_, _, _, Decimal128Type>(left, right, |a, b| a.mul_checked(b))?
+            .with_precision_and_scale(precision, required_scale);
     }
 
     try_binary::<_, _, _, Decimal128Type>(left, right, |a, b| {
@@ -213,17 +209,16 @@ mod tests {
             .unwrap();
 
         let err = mul(&a, &b).unwrap_err();
-        assert!(err.to_string().contains(
-            "Overflow happened on: 123456789000000000000000000 * 10000000000000000000"
-        ));
+        assert!(err
+            .to_string()
+            .contains("Overflow happened on: 123456789000000000000000000 * 10000000000000000000"));
 
         // Allow precision loss.
         let result = multiply_fixed_point_checked(&a, &b, 28).unwrap();
         // [1234567890]
-        let expected =
-            Decimal128Array::from(vec![12345678900000000000000000000000000000])
-                .with_precision_and_scale(38, 28)
-                .unwrap();
+        let expected = Decimal128Array::from(vec![12345678900000000000000000000000000000])
+            .with_precision_and_scale(38, 28)
+            .unwrap();
 
         assert_eq!(&expected, &result);
         assert_eq!(
@@ -233,13 +228,9 @@ mod tests {
 
         // Rounding case
         // [0.000000000000000001, 123456789.555555555555555555, 1.555555555555555555]
-        let a = Decimal128Array::from(vec![
-            1,
-            123456789555555555555555555,
-            1555555555555555555,
-        ])
-        .with_precision_and_scale(38, 18)
-        .unwrap();
+        let a = Decimal128Array::from(vec![1, 123456789555555555555555555, 1555555555555555555])
+            .with_precision_and_scale(38, 18)
+            .unwrap();
 
         // [1.555555555555555555, 11.222222222222222222, 0.000000000000000001]
         let b = Decimal128Array::from(vec![1555555555555555555, 11222222222222222222, 1])
@@ -311,10 +302,9 @@ mod tests {
         ));
 
         let result = multiply_fixed_point(&a, &b, 28).unwrap();
-        let expected =
-            Decimal128Array::from(vec![62946009661555981610246871926660136960])
-                .with_precision_and_scale(38, 28)
-                .unwrap();
+        let expected = Decimal128Array::from(vec![62946009661555981610246871926660136960])
+            .with_precision_and_scale(38, 28)
+            .unwrap();
 
         assert_eq!(&expected, &result);
     }
@@ -338,10 +328,9 @@ mod tests {
         // Avoid overflow by reducing the scale.
         let result = multiply_fixed_point(&a, &b, 28).unwrap();
         // [1234567890]
-        let expected =
-            Decimal128Array::from(vec![12345678900000000000000000000000000000])
-                .with_precision_and_scale(38, 28)
-                .unwrap();
+        let expected = Decimal128Array::from(vec![12345678900000000000000000000000000000])
+            .with_precision_and_scale(38, 28)
+            .unwrap();
 
         assert_eq!(&expected, &result);
         assert_eq!(
diff --git a/arrow-arith/src/arity.rs b/arrow-arith/src/arity.rs
index f3118d104536..ff8b82a5d943 100644
--- a/arrow-arith/src/arity.rs
+++ b/arrow-arith/src/arity.rs
@@ -49,10 +49,7 @@ where
 }
 
 /// See [`PrimitiveArray::try_unary`]
-pub fn try_unary<I, F, O>(
-    array: &PrimitiveArray<I>,
-    op: F,
-) -> Result<PrimitiveArray<O>, ArrowError>
+pub fn try_unary<I, F, O>(array: &PrimitiveArray<I>, op: F) -> Result<PrimitiveArray<O>, ArrowError>
 where
     I: ArrowPrimitiveType,
     O: ArrowPrimitiveType,
@@ -86,10 +83,7 @@ where
 }
 
 /// A helper function that applies a fallible unary function to a dictionary array with primitive value type.
-fn try_unary_dict<K, F, T>(
-    array: &DictionaryArray<K>,
-    op: F,
-) -> Result<ArrayRef, ArrowError>
+fn try_unary_dict<K, F, T>(array: &DictionaryArray<K>, op: F) -> Result<ArrayRef, ArrowError>
 where
     K: ArrowDictionaryKeyType + ArrowNumericType,
     T: ArrowPrimitiveType,
@@ -299,8 +293,7 @@ where
         try_binary_no_nulls(len, a, b, op)
     } else {
         let nulls =
-            NullBuffer::union(a.logical_nulls().as_ref(), b.logical_nulls().as_ref())
-                .unwrap();
+            NullBuffer::union(a.logical_nulls().as_ref(), b.logical_nulls().as_ref()).unwrap();
 
         let mut buffer = BufferBuilder::<O::Native>::new(len);
         buffer.append_n_zeroed(len);
@@ -308,8 +301,7 @@ where
 
         nulls.try_for_each_valid_idx(|idx| {
             unsafe {
-                *slice.get_unchecked_mut(idx) =
-                    op(a.value_unchecked(idx), b.value_unchecked(idx))?
+                *slice.get_unchecked_mut(idx) = op(a.value_unchecked(idx), b.value_unchecked(idx))?
             };
             Ok::<_, ArrowError>(())
         })?;
@@ -360,8 +352,7 @@ where
         try_binary_no_nulls_mut(len, a, b, op)
     } else {
         let nulls =
-            NullBuffer::union(a.logical_nulls().as_ref(), b.logical_nulls().as_ref())
-                .unwrap();
+            NullBuffer::union(a.logical_nulls().as_ref(), b.logical_nulls().as_ref()).unwrap();
 
         let mut builder = a.into_builder()?;
 
@@ -440,8 +431,7 @@ mod tests {
     #[test]
     #[allow(deprecated)]
     fn test_unary_f64_slice() {
-        let input =
-            Float64Array::from(vec![Some(5.1f64), None, Some(6.8), None, Some(7.2)]);
+        let input = Float64Array::from(vec![Some(5.1f64), None, Some(6.8), None, Some(7.2)]);
         let input_slice = input.slice(1, 4);
         let result = unary(&input_slice, |n| n.round());
         assert_eq!(
diff --git a/arrow-arith/src/bitwise.rs b/arrow-arith/src/bitwise.rs
index a5dec4638703..c7885952f8ba 100644
--- a/arrow-arith/src/bitwise.rs
+++ b/arrow-arith/src/bitwise.rs
@@ -212,10 +212,8 @@ mod tests {
     #[test]
     fn test_bitwise_shift_left() {
         let left = UInt64Array::from(vec![Some(1), Some(2), None, Some(4), Some(8)]);
-        let right =
-            UInt64Array::from(vec![Some(5), Some(10), Some(8), Some(12), Some(u64::MAX)]);
-        let expected =
-            UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(0)]);
+        let right = UInt64Array::from(vec![Some(5), Some(10), Some(8), Some(12), Some(u64::MAX)]);
+        let expected = UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(0)]);
         let result = bitwise_shift_left(&left, &right).unwrap();
         assert_eq!(expected, result);
     }
@@ -224,18 +222,15 @@ mod tests {
     fn test_bitwise_shift_left_scalar() {
         let left = UInt64Array::from(vec![Some(1), Some(2), None, Some(4), Some(8)]);
         let scalar = 2;
-        let expected =
-            UInt64Array::from(vec![Some(4), Some(8), None, Some(16), Some(32)]);
+        let expected = UInt64Array::from(vec![Some(4), Some(8), None, Some(16), Some(32)]);
         let result = bitwise_shift_left_scalar(&left, scalar).unwrap();
         assert_eq!(expected, result);
     }
 
     #[test]
     fn test_bitwise_shift_right() {
-        let left =
-            UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(3)]);
-        let right =
-            UInt64Array::from(vec![Some(5), Some(10), Some(8), Some(12), Some(65)]);
+        let left = UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(3)]);
+        let right = UInt64Array::from(vec![Some(5), Some(10), Some(8), Some(12), Some(65)]);
         let expected = UInt64Array::from(vec![Some(1), Some(2), None, Some(4), Some(1)]);
         let result = bitwise_shift_right(&left, &right).unwrap();
         assert_eq!(expected, result);
@@ -243,11 +238,9 @@ mod tests {
 
     #[test]
     fn test_bitwise_shift_right_scalar() {
-        let left =
-            UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(3)]);
+        let left = UInt64Array::from(vec![Some(32), Some(2048), None, Some(16384), Some(3)]);
         let scalar = 2;
-        let expected =
-            UInt64Array::from(vec![Some(8), Some(512), None, Some(4096), Some(0)]);
+        let expected = UInt64Array::from(vec![Some(8), Some(512), None, Some(4096), Some(0)]);
         let result = bitwise_shift_right_scalar(&left, scalar).unwrap();
         assert_eq!(expected, result);
     }
diff --git a/arrow-arith/src/boolean.rs b/arrow-arith/src/boolean.rs
index 46e5998208f1..269a36d66c2b 100644
--- a/arrow-arith/src/boolean.rs
+++ b/arrow-arith/src/boolean.rs
@@ -57,10 +57,7 @@ use arrow_schema::ArrowError;
 /// # Fails
 ///
 /// If the operands have different lengths
-pub fn and_kleene(
-    left: &BooleanArray,
-    right: &BooleanArray,
-) -> Result<BooleanArray, ArrowError> {
+pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
     if left.len() != right.len() {
         return Err(ArrowError::ComputeError(
             "Cannot perform bitwise operation on arrays of different length".to_string(),
@@ -155,10 +152,7 @@ pub fn and_kleene(
 /// # Fails
 ///
 /// If the operands have different lengths
-pub fn or_kleene(
-    left: &BooleanArray,
-    right: &BooleanArray,
-) -> Result<BooleanArray, ArrowError> {
+pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
     if left.len() != right.len() {
         return Err(ArrowError::ComputeError(
             "Cannot perform bitwise operation on arrays of different length".to_string(),
@@ -257,10 +251,7 @@ where
 /// let and_ab = and(&a, &b).unwrap();
 /// assert_eq!(and_ab, BooleanArray::from(vec![Some(false), Some(true), None]));
 /// ```
-pub fn and(
-    left: &BooleanArray,
-    right: &BooleanArray,
-) -> Result<BooleanArray, ArrowError> {
+pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
     binary_boolean_kernel(left, right, |a, b| a & b)
 }
 
@@ -581,8 +572,7 @@ mod tests {
         let a = a.as_any().downcast_ref::<BooleanArray>().unwrap();
         let c = not(a).unwrap();
 
-        let expected =
-            BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]);
+        let expected = BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]);
 
         assert_eq!(c, expected);
     }
@@ -631,12 +621,10 @@ mod tests {
     #[test]
     fn test_bool_array_and_sliced_same_offset() {
         let a = BooleanArray::from(vec![
-            false, false, false, false, false, false, false, false, false, false, true,
-            true,
+            false, false, false, false, false, false, false, false, false, false, true, true,
         ]);
         let b = BooleanArray::from(vec![
-            false, false, false, false, false, false, false, false, false, true, false,
-            true,
+            false, false, false, false, false, false, false, false, false, true, false, true,
         ]);
 
         let a = a.slice(8, 4);
@@ -654,12 +642,10 @@ mod tests {
     #[test]
     fn test_bool_array_and_sliced_same_offset_mod8() {
         let a = BooleanArray::from(vec![
-            false, false, true, true, false, false, false, false, false, false, false,
-            false,
+            false, false, true, true, false, false, false, false, false, false, false, false,
         ]);
         let b = BooleanArray::from(vec![
-            false, false, false, false, false, false, false, false, false, true, false,
-            true,
+            false, false, false, false, false, false, false, false, false, true, false, true,
         ]);
 
         let a = a.slice(0, 4);
@@ -677,8 +663,7 @@ mod tests {
     #[test]
     fn test_bool_array_and_sliced_offset1() {
         let a = BooleanArray::from(vec![
-            false, false, false, false, false, false, false, false, false, false, true,
-            true,
+            false, false, false, false, false, false, false, false, false, false, true, true,
         ]);
         let b = BooleanArray::from(vec![false, true, false, true]);
 
@@ -696,8 +681,7 @@ mod tests {
     fn test_bool_array_and_sliced_offset2() {
         let a = BooleanArray::from(vec![false, false, true, true]);
         let b = BooleanArray::from(vec![
-            false, false, false, false, false, false, false, false, false, true, false,
-            true,
+            false, false, false, false, false, false, false, false, false, true, false, true,
         ]);
 
         let b = b.slice(8, 4);
@@ -730,8 +714,7 @@ mod tests {
 
         let c = and(a, b).unwrap();
 
-        let expected =
-            BooleanArray::from(vec![Some(false), Some(false), None, Some(true)]);
+        let expected = BooleanArray::from(vec![Some(false), Some(false), None, Some(true)]);
 
         assert_eq!(expected, c);
     }
diff --git a/arrow-arith/src/numeric.rs b/arrow-arith/src/numeric.rs
index c47731ed5125..b2c87bba5143 100644
--- a/arrow-arith/src/numeric.rs
+++ b/arrow-arith/src/numeric.rs
@@ -144,13 +144,13 @@ pub fn neg(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
             let a = array
                 .as_primitive::<IntervalMonthDayNanoType>()
                 .try_unary::<_, IntervalMonthDayNanoType, ArrowError>(|x| {
-                let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(x);
-                Ok(IntervalMonthDayNanoType::make_value(
-                    months.neg_checked()?,
-                    days.neg_checked()?,
-                    nanos.neg_checked()?,
-                ))
-            })?;
+                    let (months, days, nanos) = IntervalMonthDayNanoType::to_parts(x);
+                    Ok(IntervalMonthDayNanoType::make_value(
+                        months.neg_checked()?,
+                        days.neg_checked()?,
+                        nanos.neg_checked()?,
+                    ))
+                })?;
             Ok(Arc::new(a))
         }
         t => Err(ArrowError::InvalidArgumentError(format!(
@@ -201,11 +201,7 @@ impl Op {
 }
 
 /// Dispatch the given `op` to the appropriate specialized kernel
-fn arithmetic_op(
-    op: Op,
-    lhs: &dyn Datum,
-    rhs: &dyn Datum,
-) -> Result<ArrayRef, ArrowError> {
+fn arithmetic_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<ArrayRef, ArrowError> {
     use DataType::*;
     use IntervalUnit::*;
     use TimeUnit::*;
@@ -675,8 +671,7 @@ fn date_op<T: DateOp>(
         (Date64, Op::Sub | Op::SubWrapping, Date64) => {
             let l = l.as_primitive::<Date64Type>();
             let r = r.as_primitive::<Date64Type>();
-            let result =
-                try_op_ref!(DurationMillisecondType, l, l_s, r, r_s, l.sub_checked(r));
+            let result = try_op_ref!(DurationMillisecondType, l, l_s, r, r_s, l.sub_checked(r));
             return Ok(result);
         }
         _ => {}
@@ -800,8 +795,7 @@ fn decimal_op<T: DecimalType>(
             let mul_pow = result_scale - s1 + s2;
 
             // p1 - s1 + s2 + result_scale
-            let result_precision =
-                (mul_pow.saturating_add(*p1 as i8) as u8).min(T::MAX_PRECISION);
+            let result_precision = (mul_pow.saturating_add(*p1 as i8) as u8).min(T::MAX_PRECISION);
 
             let (l_mul, r_mul) = match mul_pow.cmp(&0) {
                 Ordering::Greater => (
@@ -1158,7 +1152,10 @@ mod tests {
             .with_precision_and_scale(3, -1)
             .unwrap();
         let err = add(&a, &b).unwrap_err().to_string();
-        assert_eq!(err, "Compute error: Overflow happened on: 10 * 100000000000000000000000000000000000000");
+        assert_eq!(
+            err,
+            "Compute error: Overflow happened on: 10 * 100000000000000000000000000000000000000"
+        );
 
         let b = Decimal128Array::from(vec![0])
             .with_precision_and_scale(1, 1)
@@ -1199,9 +1196,7 @@ mod tests {
             "1960-01-30T04:23:20Z",
         ]
         .into_iter()
-        .map(|x| {
-            T::make_value(DateTime::parse_from_rfc3339(x).unwrap().naive_utc()).unwrap()
-        })
+        .map(|x| T::make_value(DateTime::parse_from_rfc3339(x).unwrap().naive_utc()).unwrap())
         .collect();
 
         let a = PrimitiveArray::<T>::new(values, None);
diff --git a/arrow-arith/src/temporal.rs b/arrow-arith/src/temporal.rs
index 7855b6fc6e46..a9c3de5401c1 100644
--- a/arrow-arith/src/temporal.rs
+++ b/arrow-arith/src/temporal.rs
@@ -23,9 +23,7 @@ use chrono::{DateTime, Datelike, NaiveDateTime, NaiveTime, Offset, Timelike};
 
 use arrow_array::builder::*;
 use arrow_array::iterator::ArrayIter;
-use arrow_array::temporal_conversions::{
-    as_datetime, as_datetime_with_timezone, as_time,
-};
+use arrow_array::temporal_conversions::{as_datetime, as_datetime_with_timezone, as_time};
 use arrow_array::timezone::Tz;
 use arrow_array::types::*;
 use arrow_array::*;
@@ -209,12 +207,9 @@ where
         }
         DataType::Timestamp(_, Some(tz)) => {
             let iter = ArrayIter::new(array);
-            extract_component_from_datetime_array::<&PrimitiveArray<T>, T, _>(
-                iter,
-                b,
-                tz,
-                |t| t.hour() as i32,
-            )
+            extract_component_from_datetime_array::<&PrimitiveArray<T>, T, _>(iter, b, tz, |t| {
+                t.hour() as i32
+            })
         }
         _ => return_compute_error_with!("hour does not support", array.data_type()),
     }
@@ -289,9 +284,7 @@ pub fn num_days_from_monday_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowErro
 /// Monday is encoded as `0`, Tuesday as `1`, etc.
 ///
 /// See also [`num_days_from_sunday`] which starts at Sunday.
-pub fn num_days_from_monday<T>(
-    array: &PrimitiveArray<T>,
-) -> Result<Int32Array, ArrowError>
+pub fn num_days_from_monday<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
 where
     T: ArrowTemporalType + ArrowNumericType,
     i64: From<T::Native>,
@@ -318,9 +311,7 @@ pub fn num_days_from_sunday_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowErro
 /// Sunday is encoded as `0`, Monday as `1`, etc.
 ///
 /// See also [`num_days_from_monday`] which starts at Monday.
-pub fn num_days_from_sunday<T>(
-    array: &PrimitiveArray<T>,
-) -> Result<Int32Array, ArrowError>
+pub fn num_days_from_sunday<T>(array: &PrimitiveArray<T>) -> Result<Int32Array, ArrowError>
 where
     T: ArrowTemporalType + ArrowNumericType,
     i64: From<T::Native>,
@@ -449,11 +440,7 @@ pub fn millisecond_dyn(array: &dyn Array) -> Result<ArrayRef, ArrowError> {
 }
 
 /// Extracts the time fraction of a given temporal array as an array of integers
-fn time_fraction_dyn<F>(
-    array: &dyn Array,
-    name: &str,
-    op: F,
-) -> Result<ArrayRef, ArrowError>
+fn time_fraction_dyn<F>(array: &dyn Array, name: &str, op: F) -> Result<ArrayRef, ArrowError>
 where
     F: Fn(NaiveDateTime) -> i32,
 {
@@ -498,14 +485,9 @@ where
         }
         DataType::Timestamp(_, Some(tz)) => {
             let iter = ArrayIter::new(array);
-            extract_component_from_datetime_array::<_, T, _>(iter, b, tz, |t| {
-                op(t.naive_local())
-            })
+            extract_component_from_datetime_array::<_, T, _>(iter, b, tz, |t| op(t.naive_local()))
         }
-        _ => return_compute_error_with!(
-            format!("{name} does not support"),
-            array.data_type()
-        ),
+        _ => return_compute_error_with!(format!("{name} does not support"), array.data_type()),
     }
 }
 
@@ -559,8 +541,7 @@ mod tests {
 
     #[test]
     fn test_temporal_array_time64_micro_hour() {
-        let a: PrimitiveArray<Time64MicrosecondType> =
-            vec![37800000000, 86339000000].into();
+        let a: PrimitiveArray<Time64MicrosecondType> = vec![37800000000, 86339000000].into();
 
         let b = hour(&a).unwrap();
         assert_eq!(10, b.value(0));
@@ -623,12 +604,10 @@ mod tests {
     #[test]
     fn test_temporal_array_timestamp_quarter_with_timezone() {
         // 24 * 60 * 60 = 86400
-        let a = TimestampSecondArray::from(vec![86400 * 90])
-            .with_timezone("+00:00".to_string());
+        let a = TimestampSecondArray::from(vec![86400 * 90]).with_timezone("+00:00".to_string());
         let b = quarter(&a).unwrap();
         assert_eq!(2, b.value(0));
-        let a = TimestampSecondArray::from(vec![86400 * 90])
-            .with_timezone("-10:00".to_string());
+        let a = TimestampSecondArray::from(vec![86400 * 90]).with_timezone("-10:00".to_string());
         let b = quarter(&a).unwrap();
         assert_eq!(1, b.value(0));
     }
@@ -659,12 +638,10 @@ mod tests {
     #[test]
     fn test_temporal_array_timestamp_month_with_timezone() {
         // 24 * 60 * 60 = 86400
-        let a = TimestampSecondArray::from(vec![86400 * 31])
-            .with_timezone("+00:00".to_string());
+        let a = TimestampSecondArray::from(vec![86400 * 31]).with_timezone("+00:00".to_string());
         let b = month(&a).unwrap();
         assert_eq!(2, b.value(0));
-        let a = TimestampSecondArray::from(vec![86400 * 31])
-            .with_timezone("-10:00".to_string());
+        let a = TimestampSecondArray::from(vec![86400 * 31]).with_timezone("-10:00".to_string());
         let b = month(&a).unwrap();
         assert_eq!(1, b.value(0));
     }
@@ -672,12 +649,10 @@ mod tests {
     #[test]
     fn test_temporal_array_timestamp_day_with_timezone() {
         // 24 * 60 * 60 = 86400
-        let a =
-            TimestampSecondArray::from(vec![86400]).with_timezone("+00:00".to_string());
+        let a = TimestampSecondArray::from(vec![86400]).with_timezone("+00:00".to_string());
         let b = day(&a).unwrap();
         assert_eq!(2, b.value(0));
-        let a =
-            TimestampSecondArray::from(vec![86400]).with_timezone("-10:00".to_string());
+        let a = TimestampSecondArray::from(vec![86400]).with_timezone("-10:00".to_string());
         let b = day(&a).unwrap();
         assert_eq!(1, b.value(0));
     }
@@ -857,8 +832,7 @@ mod tests {
 
     #[test]
     fn test_temporal_array_timestamp_second_with_timezone() {
-        let a =
-            TimestampSecondArray::from(vec![10, 20]).with_timezone("+00:00".to_string());
+        let a = TimestampSecondArray::from(vec![10, 20]).with_timezone("+00:00".to_string());
         let b = second(&a).unwrap();
         assert_eq!(10, b.value(0));
         assert_eq!(20, b.value(1));
@@ -866,8 +840,7 @@ mod tests {
 
     #[test]
     fn test_temporal_array_timestamp_minute_with_timezone() {
-        let a =
-            TimestampSecondArray::from(vec![0, 60]).with_timezone("+00:50".to_string());
+        let a = TimestampSecondArray::from(vec![0, 60]).with_timezone("+00:50".to_string());
         let b = minute(&a).unwrap();
         assert_eq!(50, b.value(0));
         assert_eq!(51, b.value(1));
@@ -875,48 +848,42 @@ mod tests {
 
     #[test]
     fn test_temporal_array_timestamp_minute_with_negative_timezone() {
-        let a =
-            TimestampSecondArray::from(vec![60 * 55]).with_timezone("-00:50".to_string());
+        let a = TimestampSecondArray::from(vec![60 * 55]).with_timezone("-00:50".to_string());
         let b = minute(&a).unwrap();
         assert_eq!(5, b.value(0));
     }
 
     #[test]
     fn test_temporal_array_timestamp_hour_with_timezone() {
-        let a = TimestampSecondArray::from(vec![60 * 60 * 10])
-            .with_timezone("+01:00".to_string());
+        let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("+01:00".to_string());
         let b = hour(&a).unwrap();
         assert_eq!(11, b.value(0));
     }
 
     #[test]
     fn test_temporal_array_timestamp_hour_with_timezone_without_colon() {
-        let a = TimestampSecondArray::from(vec![60 * 60 * 10])
-            .with_timezone("+0100".to_string());
+        let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("+0100".to_string());
         let b = hour(&a).unwrap();
         assert_eq!(11, b.value(0));
     }
 
     #[test]
     fn test_temporal_array_timestamp_hour_with_timezone_without_minutes() {
-        let a = TimestampSecondArray::from(vec![60 * 60 * 10])
-            .with_timezone("+01".to_string());
+        let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("+01".to_string());
         let b = hour(&a).unwrap();
         assert_eq!(11, b.value(0));
     }
 
     #[test]
     fn test_temporal_array_timestamp_hour_with_timezone_without_initial_sign() {
-        let a = TimestampSecondArray::from(vec![60 * 60 * 10])
-            .with_timezone("0100".to_string());
+        let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("0100".to_string());
         let err = hour(&a).unwrap_err().to_string();
         assert!(err.contains("Invalid timezone"), "{}", err);
     }
 
     #[test]
     fn test_temporal_array_timestamp_hour_with_timezone_with_only_colon() {
-        let a = TimestampSecondArray::from(vec![60 * 60 * 10])
-            .with_timezone("01:00".to_string());
+        let a = TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("01:00".to_string());
         let err = hour(&a).unwrap_err().to_string();
         assert!(err.contains("Invalid timezone"), "{}", err);
     }
@@ -960,10 +927,8 @@ mod tests {
 
         let b = hour_dyn(&dict).unwrap();
 
-        let expected_dict = DictionaryArray::new(
-            keys.clone(),
-            Arc::new(Int32Array::from(vec![11, 21, 7])),
-        );
+        let expected_dict =
+            DictionaryArray::new(keys.clone(), Arc::new(Int32Array::from(vec![11, 21, 7])));
         let expected = Arc::new(expected_dict) as ArrayRef;
         assert_eq!(&expected, &b);
 
@@ -987,8 +952,7 @@ mod tests {
         assert_eq!(&expected, &b);
         assert_eq!(&expected, &b_old);
 
-        let b =
-            time_fraction_dyn(&dict, "nanosecond", |t| t.nanosecond() as i32).unwrap();
+        let b = time_fraction_dyn(&dict, "nanosecond", |t| t.nanosecond() as i32).unwrap();
 
         let expected_dict =
             DictionaryArray::new(keys, Arc::new(Int32Array::from(vec![0, 0, 0, 0, 0])));
@@ -998,8 +962,7 @@ mod tests {
 
     #[test]
     fn test_year_dictionary_array() {
-        let a: PrimitiveArray<Date64Type> =
-            vec![Some(1514764800000), Some(1550636625000)].into();
+        let a: PrimitiveArray<Date64Type> = vec![Some(1514764800000), Some(1550636625000)].into();
 
         let keys = Int8Array::from_iter_values([0_i8, 1, 1, 0]);
         let dict = DictionaryArray::new(keys.clone(), Arc::new(a));
@@ -1018,24 +981,20 @@ mod tests {
     fn test_quarter_month_dictionary_array() {
         //1514764800000 -> 2018-01-01
         //1566275025000 -> 2019-08-20
-        let a: PrimitiveArray<Date64Type> =
-            vec![Some(1514764800000), Some(1566275025000)].into();
+        let a: PrimitiveArray<Date64Type> = vec![Some(1514764800000), Some(1566275025000)].into();
 
         let keys = Int8Array::from_iter_values([0_i8, 1, 1, 0]);
         let dict = DictionaryArray::new(keys.clone(), Arc::new(a));
 
         let b = quarter_dyn(&dict).unwrap();
 
-        let expected = DictionaryArray::new(
-            keys.clone(),
-            Arc::new(Int32Array::from(vec![1, 3, 3, 1])),
-        );
+        let expected =
+            DictionaryArray::new(keys.clone(), Arc::new(Int32Array::from(vec![1, 3, 3, 1])));
         assert_eq!(b.as_ref(), &expected);
 
         let b = month_dyn(&dict).unwrap();
 
-        let expected =
-            DictionaryArray::new(keys, Arc::new(Int32Array::from(vec![1, 8, 8, 1])));
+        let expected = DictionaryArray::new(keys, Arc::new(Int32Array::from(vec![1, 8, 8, 1])));
         assert_eq!(b.as_ref(), &expected);
     }
 
@@ -1043,8 +1002,7 @@ mod tests {
     fn test_num_days_from_monday_sunday_day_doy_week_dictionary_array() {
         //1514764800000 -> 2018-01-01 (Monday)
         //1550636625000 -> 2019-02-20 (Wednesday)
-        let a: PrimitiveArray<Date64Type> =
-            vec![Some(1514764800000), Some(1550636625000)].into();
+        let a: PrimitiveArray<Date64Type> = vec![Some(1514764800000), Some(1550636625000)].into();
 
         let keys = Int8Array::from(vec![Some(0_i8), Some(1), Some(1), Some(0), None]);
         let dict = DictionaryArray::new(keys.clone(), Arc::new(a));
diff --git a/arrow-array/Cargo.toml b/arrow-array/Cargo.toml
index 80a6eb3f541e..4f7ab24f9708 100644
--- a/arrow-array/Cargo.toml
+++ b/arrow-array/Cargo.toml
@@ -44,7 +44,7 @@ ahash = { version = "0.8", default-features = false, features = ["runtime-rng"]
 arrow-buffer = { workspace = true }
 arrow-schema = { workspace = true }
 arrow-data = { workspace = true }
-chrono = { version = "0.4.24", default-features = false, features = ["clock"] }
+chrono = { workspace = true }
 chrono-tz = { version = "0.8", optional = true }
 num = { version = "0.4.1", default-features = false, features = ["std"] }
 half = { version = "2.1", default-features = false, features = ["num-traits"] }
diff --git a/arrow-array/src/arithmetic.rs b/arrow-array/src/arithmetic.rs
index b0ecef70ee19..c9be39d44144 100644
--- a/arrow-array/src/arithmetic.rs
+++ b/arrow-array/src/arithmetic.rs
@@ -229,10 +229,7 @@ macro_rules! native_type_op {
             #[inline]
             fn pow_checked(self, exp: u32) -> Result<Self, ArrowError> {
                 self.checked_pow(exp).ok_or_else(|| {
-                    ArrowError::ComputeError(format!(
-                        "Overflow happened on: {:?} ^ {exp:?}",
-                        self
-                    ))
+                    ArrowError::ComputeError(format!("Overflow happened on: {:?} ^ {exp:?}", self))
                 })
             }
 
diff --git a/arrow-array/src/array/binary_array.rs b/arrow-array/src/array/binary_array.rs
index 75880bec30ce..6b18cbc2d9f7 100644
--- a/arrow-array/src/array/binary_array.rs
+++ b/arrow-array/src/array/binary_array.rs
@@ -16,9 +16,7 @@
 // under the License.
 
 use crate::types::{ByteArrayType, GenericBinaryType};
-use crate::{
-    Array, GenericByteArray, GenericListArray, GenericStringArray, OffsetSizeTrait,
-};
+use crate::{Array, GenericByteArray, GenericListArray, GenericStringArray, OffsetSizeTrait};
 use arrow_data::ArrayData;
 use arrow_schema::DataType;
 
@@ -102,9 +100,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericBinaryArray<OffsetSize> {
     }
 }
 
-impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&[u8]>>>
-    for GenericBinaryArray<OffsetSize>
-{
+impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&[u8]>>> for GenericBinaryArray<OffsetSize> {
     fn from(v: Vec<Option<&[u8]>>) -> Self {
         Self::from_opt_vec(v)
     }
@@ -376,9 +372,11 @@ mod tests {
             .unwrap();
         let binary_array1 = GenericBinaryArray::<O>::from(array_data1);
 
-        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
-            Field::new("item", DataType::UInt8, false),
-        ));
+        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
+            "item",
+            DataType::UInt8,
+            false,
+        )));
 
         let array_data2 = ArrayData::builder(data_type)
             .len(3)
@@ -423,9 +421,11 @@ mod tests {
 
         let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap());
         let null_buffer = Buffer::from_slice_ref([0b101]);
-        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
-            Field::new("item", DataType::UInt8, false),
-        ));
+        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
+            "item",
+            DataType::UInt8,
+            false,
+        )));
 
         // [None, Some(b"Parquet")]
         let array_data = ArrayData::builder(data_type)
@@ -456,9 +456,7 @@ mod tests {
         _test_generic_binary_array_from_list_array_with_offset::<i64>();
     }
 
-    fn _test_generic_binary_array_from_list_array_with_child_nulls_failed<
-        O: OffsetSizeTrait,
-    >() {
+    fn _test_generic_binary_array_from_list_array_with_child_nulls_failed<O: OffsetSizeTrait>() {
         let values = b"HelloArrow";
         let child_data = ArrayData::builder(DataType::UInt8)
             .len(10)
@@ -468,9 +466,11 @@ mod tests {
             .unwrap();
 
         let offsets = [0, 5, 10].map(|n| O::from_usize(n).unwrap());
-        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
-            Field::new("item", DataType::UInt8, true),
-        ));
+        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
+            "item",
+            DataType::UInt8,
+            true,
+        )));
 
         // [None, Some(b"Parquet")]
         let array_data = ArrayData::builder(data_type)
@@ -558,8 +558,7 @@ mod tests {
             .unwrap();
         let offsets: [i32; 4] = [0, 5, 5, 12];
 
-        let data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::UInt32, false)));
+        let data_type = DataType::List(Arc::new(Field::new("item", DataType::UInt32, false)));
         let array_data = ArrayData::builder(data_type)
             .len(3)
             .add_buffer(Buffer::from_slice_ref(offsets))
@@ -575,8 +574,7 @@ mod tests {
         expected = "Trying to access an element at index 4 from a BinaryArray of length 3"
     )]
     fn test_binary_array_get_value_index_out_of_bound() {
-        let values: [u8; 12] =
-            [104, 101, 108, 108, 111, 112, 97, 114, 113, 117, 101, 116];
+        let values: [u8; 12] = [104, 101, 108, 108, 111, 112, 97, 114, 113, 117, 101, 116];
         let offsets: [i32; 4] = [0, 5, 5, 12];
         let array_data = ArrayData::builder(DataType::Binary)
             .len(3)
diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs
index 4d19babe3e4b..a778dc92ea35 100644
--- a/arrow-array/src/array/boolean_array.rs
+++ b/arrow-array/src/array/boolean_array.rs
@@ -238,11 +238,7 @@ impl BooleanArray {
     ///
     /// This function panics if left and right are not the same length
     ///
-    pub fn from_binary<T: ArrayAccessor, S: ArrayAccessor, F>(
-        left: T,
-        right: S,
-        mut op: F,
-    ) -> Self
+    pub fn from_binary<T: ArrayAccessor, S: ArrayAccessor, F>(left: T, right: S, mut op: F) -> Self
     where
         F: FnMut(T::Item, S::Item) -> bool,
     {
@@ -362,8 +358,7 @@ impl From<ArrayData> for BooleanArray {
             1,
             "BooleanArray data should contain a single buffer only (values buffer)"
         );
-        let values =
-            BooleanBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
+        let values = BooleanBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
 
         Self {
             values,
@@ -591,9 +586,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "BooleanArray expected ArrayData with type Boolean got Int32"
-    )]
+    #[should_panic(expected = "BooleanArray expected ArrayData with type Boolean got Int32")]
     fn test_from_array_data_validation() {
         let _ = BooleanArray::from(ArrayData::new_empty(&DataType::Int32));
     }
diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs
index 37d8de931e99..db825bbea97d 100644
--- a/arrow-array/src/array/byte_array.rs
+++ b/arrow-array/src/array/byte_array.rs
@@ -197,8 +197,7 @@ impl<T: ByteArrayType> GenericByteArray<T> {
         let (_, data_len) = iter.size_hint();
         let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
 
-        let mut offsets =
-            MutableBuffer::new((data_len + 1) * std::mem::size_of::<T::Offset>());
+        let mut offsets = MutableBuffer::new((data_len + 1) * std::mem::size_of::<T::Offset>());
         offsets.push(T::Offset::usize_as(0));
 
         let mut values = MutableBuffer::new(0);
@@ -335,8 +334,7 @@ impl<T: ByteArrayType> GenericByteArray<T> {
     /// offset and data buffers are not shared by others.
     pub fn into_builder(self) -> Result<GenericByteBuilder<T>, Self> {
         let len = self.len();
-        let value_len =
-            T::Offset::as_usize(self.value_offsets()[len] - self.value_offsets()[0]);
+        let value_len = T::Offset::as_usize(self.value_offsets()[len] - self.value_offsets()[0]);
 
         let data = self.into_data();
         let null_bit_buffer = data.nulls().map(|b| b.inner().sliced());
@@ -578,17 +576,14 @@ mod tests {
 
         let nulls = NullBuffer::new_null(3);
         let err =
-            StringArray::try_new(offsets.clone(), data.clone(), Some(nulls.clone()))
-                .unwrap_err();
+            StringArray::try_new(offsets.clone(), data.clone(), Some(nulls.clone())).unwrap_err();
         assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for StringArray, expected 2 got 3");
 
-        let err =
-            BinaryArray::try_new(offsets.clone(), data.clone(), Some(nulls)).unwrap_err();
+        let err = BinaryArray::try_new(offsets.clone(), data.clone(), Some(nulls)).unwrap_err();
         assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for BinaryArray, expected 2 got 3");
 
         let non_utf8_data = Buffer::from_slice_ref(b"he\xFFloworld");
-        let err = StringArray::try_new(offsets.clone(), non_utf8_data.clone(), None)
-            .unwrap_err();
+        let err = StringArray::try_new(offsets.clone(), non_utf8_data.clone(), None).unwrap_err();
         assert_eq!(err.to_string(), "Invalid argument error: Encountered non UTF-8 data: invalid utf-8 sequence of 1 bytes from index 2");
 
         BinaryArray::new(offsets, non_utf8_data, None);
@@ -611,8 +606,7 @@ mod tests {
         BinaryArray::new(offsets, non_ascii_data.clone(), None);
 
         let offsets = OffsetBuffer::new(vec![0, 3, 10].into());
-        let err = StringArray::try_new(offsets.clone(), non_ascii_data.clone(), None)
-            .unwrap_err();
+        let err = StringArray::try_new(offsets.clone(), non_ascii_data.clone(), None).unwrap_err();
         assert_eq!(
             err.to_string(),
             "Invalid argument error: Split UTF-8 codepoint at offset 3"
diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs
index 5896cf02dfaa..1f4d83b1c5d0 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -286,10 +286,7 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
     /// # Errors
     ///
     /// Returns an error if any `keys[i] >= values.len() || keys[i] < 0`
-    pub fn try_new(
-        keys: PrimitiveArray<K>,
-        values: ArrayRef,
-    ) -> Result<Self, ArrowError> {
+    pub fn try_new(keys: PrimitiveArray<K>, values: ArrayRef) -> Result<Self, ArrowError> {
         let data_type = DataType::Dictionary(
             Box::new(keys.data_type().clone()),
             Box::new(values.data_type().clone()),
@@ -298,9 +295,11 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
         let zero = K::Native::usize_as(0);
         let values_len = values.len();
 
-        if let Some((idx, v)) = keys.values().iter().enumerate().find(|(idx, v)| {
-            (v.is_lt(zero) || v.as_usize() >= values_len) && keys.is_valid(*idx)
-        }) {
+        if let Some((idx, v)) =
+            keys.values().iter().enumerate().find(|(idx, v)| {
+                (v.is_lt(zero) || v.as_usize() >= values_len) && keys.is_valid(*idx)
+            })
+        {
             return Err(ArrowError::InvalidArgumentError(format!(
                 "Invalid dictionary key {v:?} at index {idx}, expected 0 <= key < {values_len}",
             )));
@@ -349,8 +348,7 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
     ///
     /// Panics if `values` is not a [`StringArray`].
     pub fn lookup_key(&self, value: &str) -> Option<K::Native> {
-        let rd_buf: &StringArray =
-            self.values.as_any().downcast_ref::<StringArray>().unwrap();
+        let rd_buf: &StringArray = self.values.as_any().downcast_ref::<StringArray>().unwrap();
 
         (0..rd_buf.len())
             .position(|i| rd_buf.value(i) == value)
@@ -463,10 +461,8 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
     ///
     pub fn with_values(&self, values: ArrayRef) -> Self {
         assert!(values.len() >= self.values.len());
-        let data_type = DataType::Dictionary(
-            Box::new(K::DATA_TYPE),
-            Box::new(values.data_type().clone()),
-        );
+        let data_type =
+            DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
         Self {
             data_type,
             keys: self.keys.clone(),
@@ -477,9 +473,7 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
 
     /// Returns `PrimitiveDictionaryBuilder` of this dictionary array for mutating
     /// its keys and values if the underlying data buffer is not shared by others.
-    pub fn into_primitive_dict_builder<V>(
-        self,
-    ) -> Result<PrimitiveDictionaryBuilder<K, V>, Self>
+    pub fn into_primitive_dict_builder<V>(self) -> Result<PrimitiveDictionaryBuilder<K, V>, Self>
     where
         V: ArrowPrimitiveType,
     {
@@ -540,8 +534,7 @@ impl<K: ArrowDictionaryKeyType> DictionaryArray<K> {
         V: ArrowPrimitiveType,
         F: Fn(V::Native) -> V::Native,
     {
-        let mut builder: PrimitiveDictionaryBuilder<K, V> =
-            self.into_primitive_dict_builder()?;
+        let mut builder: PrimitiveDictionaryBuilder<K, V> = self.into_primitive_dict_builder()?;
         builder
             .values_slice_mut()
             .iter_mut()
@@ -806,9 +799,7 @@ impl<'a, K: ArrowDictionaryKeyType, V> Clone for TypedDictionaryArray<'a, K, V>
 
 impl<'a, K: ArrowDictionaryKeyType, V> Copy for TypedDictionaryArray<'a, K, V> {}
 
-impl<'a, K: ArrowDictionaryKeyType, V> std::fmt::Debug
-    for TypedDictionaryArray<'a, K, V>
-{
+impl<'a, K: ArrowDictionaryKeyType, V> std::fmt::Debug for TypedDictionaryArray<'a, K, V> {
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         writeln!(f, "TypedDictionaryArray({:?})", self.dictionary)
     }
@@ -1005,7 +996,7 @@ impl<K: ArrowDictionaryKeyType> AnyDictionaryArray for DictionaryArray<K> {
         let v_len = self.values().len();
         assert_ne!(v_len, 0);
         let iter = self.keys().values().iter();
-        iter.map(|x| x.as_usize().min(v_len)).collect()
+        iter.map(|x| x.as_usize().min(v_len - 1)).collect()
     }
 
     fn with_values(&self, values: ArrayRef) -> ArrayRef {
@@ -1040,8 +1031,7 @@ mod tests {
         // Construct a dictionary array from the above two
         let key_type = DataType::Int16;
         let value_type = DataType::Int8;
-        let dict_data_type =
-            DataType::Dictionary(Box::new(key_type), Box::new(value_type));
+        let dict_data_type = DataType::Dictionary(Box::new(key_type), Box::new(value_type));
         let dict_data = ArrayData::builder(dict_data_type.clone())
             .len(3)
             .add_buffer(keys.clone())
@@ -1079,8 +1069,7 @@ mod tests {
 
     #[test]
     fn test_dictionary_array_fmt_debug() {
-        let mut builder =
-            PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(3, 2);
+        let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(3, 2);
         builder.append(12345678).unwrap();
         builder.append_null();
         builder.append(22345678).unwrap();
@@ -1090,8 +1079,7 @@ mod tests {
             format!("{array:?}")
         );
 
-        let mut builder =
-            PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(20, 2);
+        let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(20, 2);
         for _ in 0..20 {
             builder.append(1).unwrap();
         }
@@ -1267,9 +1255,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "Invalid dictionary key 3 at index 1, expected 0 <= key < 2"
-    )]
+    #[should_panic(expected = "Invalid dictionary key 3 at index 1, expected 0 <= key < 2")]
     fn test_try_new_index_too_large() {
         let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect();
         // dictionary only has 2 values, so offset 3 is out of bounds
@@ -1278,9 +1264,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "Invalid dictionary key -100 at index 0, expected 0 <= key < 2"
-    )]
+    #[should_panic(expected = "Invalid dictionary key -100 at index 0, expected 0 <= key < 2")]
     fn test_try_new_index_too_small() {
         let values: StringArray = [Some("foo"), Some("bar")].into_iter().collect();
         let keys: Int32Array = [Some(-100)].into_iter().collect();
@@ -1288,9 +1272,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "DictionaryArray's data type must match, expected Int64 got Int32"
-    )]
+    #[should_panic(expected = "DictionaryArray's data type must match, expected Int64 got Int32")]
     fn test_from_array_data_validation() {
         let a = DictionaryArray::<Int32Type>::from_iter(["32"]);
         let _ = DictionaryArray::<Int64Type>::from(a.into_data());
@@ -1335,8 +1317,7 @@ mod tests {
 
         let boxed: ArrayRef = Arc::new(dict_array);
 
-        let col: DictionaryArray<Int8Type> =
-            DictionaryArray::<Int8Type>::from(boxed.to_data());
+        let col: DictionaryArray<Int8Type> = DictionaryArray::<Int8Type>::from(boxed.to_data());
         let err = col.into_primitive_dict_builder::<Int32Type>();
 
         let returned = err.unwrap_err();
@@ -1385,4 +1366,13 @@ mod tests {
             .collect();
         assert_eq!(values, &[Some(50), None, None, Some(2)])
     }
+
+    #[test]
+    fn test_normalized_keys() {
+        let values = vec![132, 0, 1].into();
+        let nulls = NullBuffer::from(vec![false, true, true]);
+        let keys = Int32Array::new(values, Some(nulls));
+        let dictionary = DictionaryArray::new(keys, Arc::new(Int32Array::new_null(2)));
+        assert_eq!(&dictionary.normalized_keys(), &[1, 0, 1])
+    }
 }
diff --git a/arrow-array/src/array/fixed_size_binary_array.rs b/arrow-array/src/array/fixed_size_binary_array.rs
index 74a7c4c7a84a..d89bbd5ad084 100644
--- a/arrow-array/src/array/fixed_size_binary_array.rs
+++ b/arrow-array/src/array/fixed_size_binary_array.rs
@@ -81,10 +81,7 @@ impl FixedSizeBinaryArray {
     ) -> Result<Self, ArrowError> {
         let data_type = DataType::FixedSizeBinary(size);
         let s = size.to_usize().ok_or_else(|| {
-            ArrowError::InvalidArgumentError(format!(
-                "Size cannot be negative, got {}",
-                size
-            ))
+            ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {}", size))
         })?;
 
         let len = values.len() / s;
@@ -179,9 +176,18 @@ impl FixedSizeBinaryArray {
         self.value_length
     }
 
-    /// Returns a clone of the value data buffer
-    pub fn value_data(&self) -> Buffer {
-        self.value_data.clone()
+    /// Returns the values of this array.
+    ///
+    /// Unlike [`Self::value_data`] this returns the [`Buffer`]
+    /// allowing for zero-copy cloning.
+    #[inline]
+    pub fn values(&self) -> &Buffer {
+        &self.value_data
+    }
+
+    /// Returns the raw value data.
+    pub fn value_data(&self) -> &[u8] {
+        self.value_data.as_slice()
     }
 
     /// Returns a zero-copy slice of this array with the indicated offset and length.
@@ -324,10 +330,7 @@ impl FixedSizeBinaryArray {
     /// # Errors
     ///
     /// Returns error if argument has length zero, or sizes of nested slices don't match.
-    pub fn try_from_sparse_iter_with_size<T, U>(
-        mut iter: T,
-        size: i32,
-    ) -> Result<Self, ArrowError>
+    pub fn try_from_sparse_iter_with_size<T, U>(mut iter: T, size: i32) -> Result<Self, ArrowError>
     where
         T: Iterator<Item = Option<U>>,
         U: AsRef<[u8]>,
@@ -803,8 +806,7 @@ mod tests {
         let none_option: Option<[u8; 32]> = None;
         let input_arg = vec![none_option, none_option, none_option];
         #[allow(deprecated)]
-        let arr =
-            FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap();
+        let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.into_iter()).unwrap();
         assert_eq!(0, arr.value_length());
         assert_eq!(3, arr.len())
     }
@@ -819,16 +821,12 @@ mod tests {
             Some(vec![13, 14]),
         ];
         #[allow(deprecated)]
-        let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.iter().cloned())
-            .unwrap();
+        let arr = FixedSizeBinaryArray::try_from_sparse_iter(input_arg.iter().cloned()).unwrap();
         assert_eq!(2, arr.value_length());
         assert_eq!(5, arr.len());
 
-        let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
-            input_arg.into_iter(),
-            2,
-        )
-        .unwrap();
+        let arr =
+            FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 2).unwrap();
         assert_eq!(2, arr.value_length());
         assert_eq!(5, arr.len());
     }
@@ -837,11 +835,8 @@ mod tests {
     fn test_fixed_size_binary_array_from_sparse_iter_with_size_all_none() {
         let input_arg = vec![None, None, None, None, None] as Vec<Option<Vec<u8>>>;
 
-        let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
-            input_arg.into_iter(),
-            16,
-        )
-        .unwrap();
+        let arr = FixedSizeBinaryArray::try_from_sparse_iter_with_size(input_arg.into_iter(), 16)
+            .unwrap();
         assert_eq!(16, arr.value_length());
         assert_eq!(5, arr.len())
     }
@@ -908,8 +903,7 @@ mod tests {
     fn fixed_size_binary_array_all_null() {
         let data = vec![None] as Vec<Option<String>>;
         let array =
-            FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.into_iter(), 0)
-                .unwrap();
+            FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.into_iter(), 0).unwrap();
         array
             .into_data()
             .validate_full()
@@ -919,8 +913,7 @@ mod tests {
     #[test]
     // Test for https://github.com/apache/arrow-rs/issues/1390
     fn fixed_size_binary_array_all_null_in_batch_with_schema() {
-        let schema =
-            Schema::new(vec![Field::new("a", DataType::FixedSizeBinary(2), true)]);
+        let schema = Schema::new(vec![Field::new("a", DataType::FixedSizeBinary(2), true)]);
 
         let none_option: Option<[u8; 2]> = None;
         let item = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
diff --git a/arrow-array/src/array/fixed_size_list_array.rs b/arrow-array/src/array/fixed_size_list_array.rs
index db3ccbe0617b..f8f01516e3d4 100644
--- a/arrow-array/src/array/fixed_size_list_array.rs
+++ b/arrow-array/src/array/fixed_size_list_array.rs
@@ -130,12 +130,7 @@ impl FixedSizeListArray {
     /// # Panics
     ///
     /// Panics if [`Self::try_new`] returns an error
-    pub fn new(
-        field: FieldRef,
-        size: i32,
-        values: ArrayRef,
-        nulls: Option<NullBuffer>,
-    ) -> Self {
+    pub fn new(field: FieldRef, size: i32, values: ArrayRef, nulls: Option<NullBuffer>) -> Self {
         Self::try_new(field, size, values, nulls).unwrap()
     }
 
@@ -154,10 +149,7 @@ impl FixedSizeListArray {
         nulls: Option<NullBuffer>,
     ) -> Result<Self, ArrowError> {
         let s = size.to_usize().ok_or_else(|| {
-            ArrowError::InvalidArgumentError(format!(
-                "Size cannot be negative, got {}",
-                size
-            ))
+            ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {}", size))
         })?;
 
         let len = values.len() / s.max(1);
@@ -350,9 +342,8 @@ impl From<ArrayData> for FixedSizeListArray {
         };
 
         let size = value_length as usize;
-        let values = make_array(
-            data.child_data()[0].slice(data.offset() * size, data.len() * size),
-        );
+        let values =
+            make_array(data.child_data()[0].slice(data.offset() * size, data.len() * size));
         Self {
             data_type: data.data_type().clone(),
             values,
@@ -483,10 +474,8 @@ mod tests {
             .unwrap();
 
         // Construct a list array from the above two
-        let list_data_type = DataType::FixedSizeList(
-            Arc::new(Field::new("item", DataType::Int32, false)),
-            3,
-        );
+        let list_data_type =
+            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3);
         let list_data = ArrayData::builder(list_data_type.clone())
             .len(3)
             .add_child_data(value_data.clone())
@@ -538,10 +527,8 @@ mod tests {
             .unwrap();
 
         // Construct a list array from the above two
-        let list_data_type = DataType::FixedSizeList(
-            Arc::new(Field::new("item", DataType::Int32, false)),
-            3,
-        );
+        let list_data_type =
+            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3);
         let list_data = unsafe {
             ArrayData::builder(list_data_type)
                 .len(3)
@@ -569,10 +556,8 @@ mod tests {
         bit_util::set_bit(&mut null_bits, 4);
 
         // Construct a fixed size list array from the above two
-        let list_data_type = DataType::FixedSizeList(
-            Arc::new(Field::new("item", DataType::Int32, false)),
-            2,
-        );
+        let list_data_type =
+            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 2);
         let list_data = ArrayData::builder(list_data_type)
             .len(5)
             .add_child_data(value_data.clone())
@@ -611,9 +596,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "the offset of the new Buffer cannot exceed the existing length"
-    )]
+    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
     fn test_fixed_size_list_array_index_out_of_bound() {
         // Construct a value array
         let value_data = ArrayData::builder(DataType::Int32)
@@ -631,10 +614,8 @@ mod tests {
         bit_util::set_bit(&mut null_bits, 4);
 
         // Construct a fixed size list array from the above two
-        let list_data_type = DataType::FixedSizeList(
-            Arc::new(Field::new("item", DataType::Int32, false)),
-            2,
-        );
+        let list_data_type =
+            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 2);
         let list_data = ArrayData::builder(list_data_type)
             .len(5)
             .add_child_data(value_data)
@@ -668,8 +649,7 @@ mod tests {
         let list = FixedSizeListArray::new(field.clone(), 4, values.clone(), None);
         assert_eq!(list.len(), 1);
 
-        let err = FixedSizeListArray::try_new(field.clone(), -1, values.clone(), None)
-            .unwrap_err();
+        let err = FixedSizeListArray::try_new(field.clone(), -1, values.clone(), None).unwrap_err();
         assert_eq!(
             err.to_string(),
             "Invalid argument error: Size cannot be negative, got -1"
@@ -679,13 +659,11 @@ mod tests {
         assert_eq!(list.len(), 6);
 
         let nulls = NullBuffer::new_null(2);
-        let err = FixedSizeListArray::try_new(field, 2, values.clone(), Some(nulls))
-            .unwrap_err();
+        let err = FixedSizeListArray::try_new(field, 2, values.clone(), Some(nulls)).unwrap_err();
         assert_eq!(err.to_string(), "Invalid argument error: Incorrect length of null buffer for FixedSizeListArray, expected 3 got 2");
 
         let field = Arc::new(Field::new("item", DataType::Int32, false));
-        let err = FixedSizeListArray::try_new(field.clone(), 2, values.clone(), None)
-            .unwrap_err();
+        let err = FixedSizeListArray::try_new(field.clone(), 2, values.clone(), None).unwrap_err();
         assert_eq!(err.to_string(), "Invalid argument error: Found unmasked nulls for non-nullable FixedSizeListArray field \"item\"");
 
         // Valid as nulls in child masked by parent
diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs
index e36d0ac4434f..9758c112a1ef 100644
--- a/arrow-array/src/array/list_array.rs
+++ b/arrow-array/src/array/list_array.rs
@@ -372,9 +372,8 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
 
 impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> {
     fn from(data: ArrayData) -> Self {
-        Self::try_new_from_array_data(data).expect(
-            "Expected infallible creation of GenericListArray from ArrayDataRef failed",
-        )
+        Self::try_new_from_array_data(data)
+            .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed")
     }
 }
 
@@ -391,17 +390,14 @@ impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayDa
     }
 }
 
-impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray>
-    for GenericListArray<OffsetSize>
-{
+impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> {
     fn from(value: FixedSizeListArray) -> Self {
         let (field, size) = match value.data_type() {
             DataType::FixedSizeList(f, size) => (f, *size as usize),
             _ => unreachable!(),
         };
 
-        let offsets =
-            OffsetBuffer::from_lengths(std::iter::repeat(size).take(value.len()));
+        let offsets = OffsetBuffer::from_lengths(std::iter::repeat(size).take(value.len()));
 
         Self {
             data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()),
@@ -415,9 +411,10 @@ impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray>
 impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
     fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
         if data.buffers().len() != 1 {
-            return Err(ArrowError::InvalidArgumentError(
-                format!("ListArray data should contain a single buffer only (value offsets), had {}",
-                        data.buffers().len())));
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "ListArray data should contain a single buffer only (value offsets), had {}",
+                data.buffers().len()
+            )));
         }
 
         if data.child_data().len() != 1 {
@@ -593,8 +590,7 @@ mod tests {
         let value_offsets = Buffer::from([]);
 
         // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
         let list_data = ArrayData::builder(list_data_type)
             .len(0)
             .add_buffer(value_offsets)
@@ -620,8 +616,7 @@ mod tests {
         let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
 
         // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
         let list_data = ArrayData::builder(list_data_type.clone())
             .len(3)
             .add_buffer(value_offsets.clone())
@@ -807,8 +802,7 @@ mod tests {
         bit_util::set_bit(&mut null_bits, 8);
 
         // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
         let list_data = ArrayData::builder(list_data_type)
             .len(9)
             .add_buffer(value_offsets)
@@ -839,8 +833,7 @@ mod tests {
         }
 
         // Check offset and length for each non-null value.
-        let sliced_list_array =
-            sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
+        let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap();
         assert_eq!(2, sliced_list_array.value_offsets()[2]);
         assert_eq!(2, sliced_list_array.value_length(2));
         assert_eq!(4, sliced_list_array.value_offsets()[3]);
@@ -951,9 +944,7 @@ mod tests {
         list_array.value(10);
     }
     #[test]
-    #[should_panic(
-        expected = "ListArray data should contain a single buffer only (value offsets)"
-    )]
+    #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")]
     // Different error messages, so skip for now
     // https://github.com/apache/arrow-rs/issues/1545
     #[cfg(not(feature = "force_validate"))]
@@ -964,8 +955,7 @@ mod tests {
                 .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7]))
                 .build_unchecked()
         };
-        let list_data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
         let list_data = unsafe {
             ArrayData::builder(list_data_type)
                 .len(3)
@@ -976,16 +966,13 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "ListArray should contain a single child array (values array)"
-    )]
+    #[should_panic(expected = "ListArray should contain a single child array (values array)")]
     // Different error messages, so skip for now
     // https://github.com/apache/arrow-rs/issues/1545
     #[cfg(not(feature = "force_validate"))]
     fn test_list_array_invalid_child_array_len() {
         let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]);
-        let list_data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
         let list_data = unsafe {
             ArrayData::builder(list_data_type)
                 .len(3)
@@ -996,9 +983,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List"
-    )]
+    #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")]
     fn test_from_array_data_validation() {
         let mut builder = ListBuilder::new(Int32Builder::new());
         builder.values().append_value(1);
@@ -1017,8 +1002,7 @@ mod tests {
 
         let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]);
 
-        let list_data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
             .add_buffer(value_offsets)
@@ -1033,9 +1017,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "Memory pointer is not aligned with the specified scalar type"
-    )]
+    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
     // Different error messages, so skip for now
     // https://github.com/apache/arrow-rs/issues/1545
     #[cfg(not(feature = "force_validate"))]
@@ -1051,9 +1033,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "Memory pointer is not aligned with the specified scalar type"
-    )]
+    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
     // Different error messages, so skip for now
     // https://github.com/apache/arrow-rs/issues/1545
     #[cfg(not(feature = "force_validate"))]
@@ -1068,8 +1048,7 @@ mod tests {
                 .build_unchecked()
         };
 
-        let list_data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
         let list_data = unsafe {
             ArrayData::builder(list_data_type)
                 .add_buffer(buf2)
@@ -1187,9 +1166,8 @@ mod tests {
 
         let nulls = NullBuffer::new_null(3);
         let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into());
-        let err =
-            LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
-                .unwrap_err();
+        let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls))
+            .unwrap_err();
 
         assert_eq!(
             err.to_string(),
@@ -1197,9 +1175,8 @@ mod tests {
         );
 
         let field = Arc::new(Field::new("element", DataType::Int64, false));
-        let err =
-            LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
-                .unwrap_err();
+        let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None)
+            .unwrap_err();
 
         assert_eq!(
             err.to_string(),
@@ -1210,8 +1187,8 @@ mod tests {
         let values = Int64Array::new(vec![0; 7].into(), Some(nulls));
         let values = Arc::new(values);
 
-        let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), None)
-            .unwrap_err();
+        let err =
+            LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err();
 
         assert_eq!(
             err.to_string(),
@@ -1222,8 +1199,7 @@ mod tests {
         LargeListArray::new(field.clone(), offsets.clone(), values, None);
 
         let values = Int64Array::new(vec![0; 2].into(), None);
-        let err =
-            LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
+        let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err();
 
         assert_eq!(
             err.to_string(),
diff --git a/arrow-array/src/array/map_array.rs b/arrow-array/src/array/map_array.rs
index fca49cd7836f..bde7fdd5a953 100644
--- a/arrow-array/src/array/map_array.rs
+++ b/arrow-array/src/array/map_array.rs
@@ -17,9 +17,7 @@
 
 use crate::array::{get_offsets, print_long_array};
 use crate::iterator::MapArrayIter;
-use crate::{
-    make_array, Array, ArrayAccessor, ArrayRef, ListArray, StringArray, StructArray,
-};
+use crate::{make_array, Array, ArrayAccessor, ArrayRef, ListArray, StringArray, StructArray};
 use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer, ToByteSlice};
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType, Field, FieldRef};
@@ -264,9 +262,10 @@ impl MapArray {
         }
 
         if data.buffers().len() != 1 {
-            return Err(ArrowError::InvalidArgumentError(
-                format!("MapArray data should contain a single buffer only (value offsets), had {}",
-                        data.len())));
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "MapArray data should contain a single buffer only (value offsets), had {}",
+                data.len()
+            )));
         }
 
         if data.child_data().len() != 1 {
@@ -281,9 +280,9 @@ impl MapArray {
         if let DataType::Struct(fields) = entries.data_type() {
             if fields.len() != 2 {
                 return Err(ArrowError::InvalidArgumentError(format!(
-                "MapArray should contain a struct array with 2 fields, have {} fields",
-                fields.len()
-            )));
+                    "MapArray should contain a struct array with 2 fields, have {} fields",
+                    fields.len()
+                )));
             }
         } else {
             return Err(ArrowError::InvalidArgumentError(format!(
@@ -330,7 +329,7 @@ impl MapArray {
             Arc::new(Field::new(
                 "entries",
                 entry_struct.data_type().clone(),
-                true,
+                false,
             )),
             false,
         );
@@ -477,7 +476,7 @@ mod tests {
             Arc::new(Field::new(
                 "entries",
                 entry_struct.data_type().clone(),
-                true,
+                false,
             )),
             false,
         );
@@ -523,7 +522,7 @@ mod tests {
             Arc::new(Field::new(
                 "entries",
                 entry_struct.data_type().clone(),
-                true,
+                false,
             )),
             false,
         );
@@ -576,8 +575,7 @@ mod tests {
         assert_eq!(2, map_array.value_length(1));
 
         let key_array = Arc::new(Int32Array::from(vec![3, 4, 5])) as ArrayRef;
-        let value_array =
-            Arc::new(UInt32Array::from(vec![None, Some(40), None])) as ArrayRef;
+        let value_array = Arc::new(UInt32Array::from(vec![None, Some(40), None])) as ArrayRef;
         let struct_array =
             StructArray::from(vec![(keys_field, key_array), (values_field, value_array)]);
         assert_eq!(
@@ -645,7 +643,7 @@ mod tests {
             Arc::new(Field::new(
                 "entries",
                 entry_struct.data_type().clone(),
-                true,
+                false,
             )),
             false,
         );
@@ -669,9 +667,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "MapArray expected ArrayData with DataType::Map got Dictionary"
-    )]
+    #[should_panic(expected = "MapArray expected ArrayData with DataType::Map got Dictionary")]
     fn test_from_array_data_validation() {
         // A DictionaryArray has similar buffer layout to a MapArray
         // but the meaning of the values differs
@@ -692,12 +688,9 @@ mod tests {
         //  [[a, b, c], [d, e, f], [g, h]]
         let entry_offsets = [0, 3, 6, 8];
 
-        let map_array = MapArray::new_from_strings(
-            keys.clone().into_iter(),
-            &values_data,
-            &entry_offsets,
-        )
-        .unwrap();
+        let map_array =
+            MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
+                .unwrap();
 
         assert_eq!(
             &values_data,
@@ -768,9 +761,8 @@ mod tests {
             "Invalid argument error: Incorrect length of null buffer for MapArray, expected 4 got 3"
         );
 
-        let err =
-            MapArray::try_new(field, offsets.clone(), entries.slice(0, 2), None, false)
-                .unwrap_err();
+        let err = MapArray::try_new(field, offsets.clone(), entries.slice(0, 2), None, false)
+            .unwrap_err();
 
         assert_eq!(
             err.to_string(),
@@ -783,9 +775,7 @@ mod tests {
             .to_string();
 
         assert!(
-            err.starts_with(
-                "Invalid argument error: MapArray expected data type Int64 got Struct"
-            ),
+            err.starts_with("Invalid argument error: MapArray expected data type Int64 got Struct"),
             "{err}"
         );
 
diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs
index 905ec1e5431b..f19406c1610b 100644
--- a/arrow-array/src/array/mod.rs
+++ b/arrow-array/src/array/mod.rs
@@ -173,20 +173,22 @@ pub trait Array: std::fmt::Debug + Send + Sync {
     /// ```
     fn offset(&self) -> usize;
 
-    /// Returns the null buffer of this array if any
+    /// Returns the null buffer of this array if any.
     ///
-    /// Note: some arrays can encode their nullability in their children, for example,
+    /// The null buffer encodes the "physical" nulls of an array.
+    /// However, some arrays can also encode nullability in their children, for example,
     /// [`DictionaryArray::values`] values or [`RunArray::values`], or without a null buffer,
-    /// such as [`NullArray`]. Use [`Array::logical_nulls`] to obtain a computed mask encoding this
+    /// such as [`NullArray`]. To determine if each element of such an array is logically null,
+    /// you can use the slower [`Array::logical_nulls`] to obtain a computed mask .
     fn nulls(&self) -> Option<&NullBuffer>;
 
-    /// Returns the logical null buffer of this array if any
+    /// Returns a potentially computed [`NullBuffer`] that represent the logical null values of this array, if any.
     ///
     /// In most cases this will be the same as [`Array::nulls`], except for:
     ///
-    /// * DictionaryArray where [`DictionaryArray::values`] contains nulls
-    /// * RunArray where [`RunArray::values`] contains nulls
-    /// * NullArray where all indices are nulls
+    /// * [`DictionaryArray`] where [`DictionaryArray::values`] contains nulls
+    /// * [`RunArray`] where [`RunArray::values`] contains nulls
+    /// * [`NullArray`] where all indices are nulls
     ///
     /// In these cases a logical [`NullBuffer`] will be computed, encoding the logical nullability
     /// of these arrays, beyond what is encoded in [`Array::nulls`]
@@ -194,31 +196,33 @@ pub trait Array: std::fmt::Debug + Send + Sync {
         self.nulls().cloned()
     }
 
-    /// Returns whether the element at `index` is null.
-    /// When using this function on a slice, the index is relative to the slice.
+    /// Returns whether the element at `index` is null according to [`Array::nulls`]
     ///
-    /// Note: this method returns the physical nullability, i.e. that encoded in [`Array::nulls`]
-    /// see [`Array::logical_nulls`] for logical nullability
+    /// Note: For performance reasons, this method returns nullability solely as determined by the
+    /// null buffer. This difference can lead to surprising results, for example, [`NullArray::is_null`] always
+    /// returns `false` as the array lacks a null buffer. Similarly [`DictionaryArray`] and [`RunArray`] may
+    /// encode nullability in their children. See [`Self::logical_nulls`] for more information.
     ///
     /// # Example:
     ///
     /// ```
-    /// use arrow_array::{Array, Int32Array};
+    /// use arrow_array::{Array, Int32Array, NullArray};
     ///
     /// let array = Int32Array::from(vec![Some(1), None]);
-    ///
     /// assert_eq!(array.is_null(0), false);
     /// assert_eq!(array.is_null(1), true);
+    ///
+    /// // NullArrays do not have a null buffer, and therefore always
+    /// // return false for is_null.
+    /// let array = NullArray::new(1);
+    /// assert_eq!(array.is_null(0), false);
     /// ```
     fn is_null(&self, index: usize) -> bool {
         self.nulls().map(|n| n.is_null(index)).unwrap_or_default()
     }
 
-    /// Returns whether the element at `index` is not null.
-    /// When using this function on a slice, the index is relative to the slice.
-    ///
-    /// Note: this method returns the physical nullability, i.e. that encoded in [`Array::nulls`]
-    /// see [`Array::logical_nulls`] for logical nullability
+    /// Returns whether the element at `index` is *not* null, the
+    /// opposite of [`Self::is_null`].
     ///
     /// # Example:
     ///
@@ -532,9 +536,7 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
         DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef,
         DataType::Date32 => Arc::new(Date32Array::from(data)) as ArrayRef,
         DataType::Date64 => Arc::new(Date64Array::from(data)) as ArrayRef,
-        DataType::Time32(TimeUnit::Second) => {
-            Arc::new(Time32SecondArray::from(data)) as ArrayRef
-        }
+        DataType::Time32(TimeUnit::Second) => Arc::new(Time32SecondArray::from(data)) as ArrayRef,
         DataType::Time32(TimeUnit::Millisecond) => {
             Arc::new(Time32MillisecondArray::from(data)) as ArrayRef
         }
@@ -579,9 +581,7 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
         }
         DataType::Binary => Arc::new(BinaryArray::from(data)) as ArrayRef,
         DataType::LargeBinary => Arc::new(LargeBinaryArray::from(data)) as ArrayRef,
-        DataType::FixedSizeBinary(_) => {
-            Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef
-        }
+        DataType::FixedSizeBinary(_) => Arc::new(FixedSizeBinaryArray::from(data)) as ArrayRef,
         DataType::Utf8 => Arc::new(StringArray::from(data)) as ArrayRef,
         DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data)) as ArrayRef,
         DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef,
@@ -589,50 +589,24 @@ pub fn make_array(data: ArrayData) -> ArrayRef {
         DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef,
         DataType::Map(_, _) => Arc::new(MapArray::from(data)) as ArrayRef,
         DataType::Union(_, _) => Arc::new(UnionArray::from(data)) as ArrayRef,
-        DataType::FixedSizeList(_, _) => {
-            Arc::new(FixedSizeListArray::from(data)) as ArrayRef
-        }
+        DataType::FixedSizeList(_, _) => Arc::new(FixedSizeListArray::from(data)) as ArrayRef,
         DataType::Dictionary(ref key_type, _) => match key_type.as_ref() {
-            DataType::Int8 => {
-                Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef
-            }
-            DataType::Int16 => {
-                Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef
-            }
-            DataType::Int32 => {
-                Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef
-            }
-            DataType::Int64 => {
-                Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef
-            }
-            DataType::UInt8 => {
-                Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef
-            }
-            DataType::UInt16 => {
-                Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef
-            }
-            DataType::UInt32 => {
-                Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef
-            }
-            DataType::UInt64 => {
-                Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef
-            }
+            DataType::Int8 => Arc::new(DictionaryArray::<Int8Type>::from(data)) as ArrayRef,
+            DataType::Int16 => Arc::new(DictionaryArray::<Int16Type>::from(data)) as ArrayRef,
+            DataType::Int32 => Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef,
+            DataType::Int64 => Arc::new(DictionaryArray::<Int64Type>::from(data)) as ArrayRef,
+            DataType::UInt8 => Arc::new(DictionaryArray::<UInt8Type>::from(data)) as ArrayRef,
+            DataType::UInt16 => Arc::new(DictionaryArray::<UInt16Type>::from(data)) as ArrayRef,
+            DataType::UInt32 => Arc::new(DictionaryArray::<UInt32Type>::from(data)) as ArrayRef,
+            DataType::UInt64 => Arc::new(DictionaryArray::<UInt64Type>::from(data)) as ArrayRef,
             dt => panic!("Unexpected dictionary key type {dt:?}"),
         },
-        DataType::RunEndEncoded(ref run_ends_type, _) => {
-            match run_ends_type.data_type() {
-                DataType::Int16 => {
-                    Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef
-                }
-                DataType::Int32 => {
-                    Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef
-                }
-                DataType::Int64 => {
-                    Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef
-                }
-                dt => panic!("Unexpected data type for run_ends array {dt:?}"),
-            }
-        }
+        DataType::RunEndEncoded(ref run_ends_type, _) => match run_ends_type.data_type() {
+            DataType::Int16 => Arc::new(RunArray::<Int16Type>::from(data)) as ArrayRef,
+            DataType::Int32 => Arc::new(RunArray::<Int32Type>::from(data)) as ArrayRef,
+            DataType::Int64 => Arc::new(RunArray::<Int64Type>::from(data)) as ArrayRef,
+            dt => panic!("Unexpected data type for run_ends array {dt:?}"),
+        },
         DataType::Null => Arc::new(NullArray::from(data)) as ArrayRef,
         DataType::Decimal128(_, _) => Arc::new(Decimal128Array::from(data)) as ArrayRef,
         DataType::Decimal256(_, _) => Arc::new(Decimal256Array::from(data)) as ArrayRef,
@@ -683,11 +657,8 @@ unsafe fn get_offsets<O: ArrowNativeType>(data: &ArrayData) -> OffsetBuffer<O> {
     match data.is_empty() && data.buffers()[0].is_empty() {
         true => OffsetBuffer::new_empty(),
         false => {
-            let buffer = ScalarBuffer::new(
-                data.buffers()[0].clone(),
-                data.offset(),
-                data.len() + 1,
-            );
+            let buffer =
+                ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len() + 1);
             // Safety:
             // ArrayData is valid
             unsafe { OffsetBuffer::new_unchecked(buffer) }
@@ -696,11 +667,7 @@ unsafe fn get_offsets<O: ArrowNativeType>(data: &ArrayData) -> OffsetBuffer<O> {
 }
 
 /// Helper function for printing potentially long arrays.
-fn print_long_array<A, F>(
-    array: &A,
-    f: &mut std::fmt::Formatter,
-    print_item: F,
-) -> std::fmt::Result
+fn print_long_array<A, F>(array: &A, f: &mut std::fmt::Formatter, print_item: F) -> std::fmt::Result
 where
     A: Array,
     F: Fn(&A, usize, &mut std::fmt::Formatter) -> std::fmt::Result,
@@ -763,8 +730,7 @@ mod tests {
 
     #[test]
     fn test_empty_list_primitive() {
-        let data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
         let array = new_empty_array(&data_type);
         let a = array.as_any().downcast_ref::<ListArray>().unwrap();
         assert_eq!(a.len(), 0);
@@ -795,8 +761,7 @@ mod tests {
     fn test_null_struct() {
         // It is possible to create a null struct containing a non-nullable child
         // see https://github.com/apache/arrow-rs/pull/3244 for details
-        let struct_type =
-            DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
+        let struct_type = DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
         let array = new_null_array(&struct_type, 9);
 
         let a = array.as_any().downcast_ref::<StructArray>().unwrap();
@@ -823,8 +788,7 @@ mod tests {
 
     #[test]
     fn test_null_list_primitive() {
-        let data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
         let array = new_null_array(&data_type, 9);
         let a = array.as_any().downcast_ref::<ListArray>().unwrap();
         assert_eq!(a.len(), 9);
@@ -858,8 +822,8 @@ mod tests {
 
     #[test]
     fn test_null_dictionary() {
-        let values = vec![None, None, None, None, None, None, None, None, None]
-            as Vec<Option<&str>>;
+        let values =
+            vec![None, None, None, None, None, None, None, None, None] as Vec<Option<&str>>;
 
         let array: DictionaryArray<Int8Type> = values.into_iter().collect();
         let array = Arc::new(array) as ArrayRef;
@@ -961,8 +925,7 @@ mod tests {
     #[test]
     fn test_memory_size_primitive() {
         let arr = PrimitiveArray::<Int64Type>::from_iter_values(0..128);
-        let empty =
-            PrimitiveArray::<Int64Type>::from(ArrayData::new_empty(arr.data_type()));
+        let empty = PrimitiveArray::<Int64Type>::from(ArrayData::new_empty(arr.data_type()));
 
         // subtract empty array to avoid magic numbers for the size of additional fields
         assert_eq!(
diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs
index 4c07e81468aa..1112acacfcd9 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -917,8 +917,8 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
         let null_bit_buffer = data.nulls().map(|b| b.inner().sliced());
 
         let element_len = std::mem::size_of::<T::Native>();
-        let buffer = data.buffers()[0]
-            .slice_with_length(data.offset() * element_len, len * element_len);
+        let buffer =
+            data.buffers()[0].slice_with_length(data.offset() * element_len, len * element_len);
 
         drop(data);
 
@@ -1116,10 +1116,9 @@ impl<T: ArrowPrimitiveType> std::fmt::Debug for PrimitiveArray<T> {
                             },
                             // if the time zone is invalid, shows NaiveDateTime with an error message
                             Err(_) => match as_datetime::<T>(v) {
-                                Some(datetime) => write!(
-                                    f,
-                                    "{datetime:?} (Unknown Time Zone '{tz_string}')"
-                                ),
+                                Some(datetime) => {
+                                    write!(f, "{datetime:?} (Unknown Time Zone '{tz_string}')")
+                                }
                                 None => write!(f, "null"),
                             },
                         }
@@ -1191,25 +1190,19 @@ def_from_for_primitive!(Float64Type, f64);
 def_from_for_primitive!(Decimal128Type, i128);
 def_from_for_primitive!(Decimal256Type, i256);
 
-impl<T: ArrowPrimitiveType> From<Option<<T as ArrowPrimitiveType>::Native>>
-    for NativeAdapter<T>
-{
+impl<T: ArrowPrimitiveType> From<Option<<T as ArrowPrimitiveType>::Native>> for NativeAdapter<T> {
     fn from(value: Option<<T as ArrowPrimitiveType>::Native>) -> Self {
         NativeAdapter { native: value }
     }
 }
 
-impl<T: ArrowPrimitiveType> From<&Option<<T as ArrowPrimitiveType>::Native>>
-    for NativeAdapter<T>
-{
+impl<T: ArrowPrimitiveType> From<&Option<<T as ArrowPrimitiveType>::Native>> for NativeAdapter<T> {
     fn from(value: &Option<<T as ArrowPrimitiveType>::Native>) -> Self {
         NativeAdapter { native: *value }
     }
 }
 
-impl<T: ArrowPrimitiveType, Ptr: Into<NativeAdapter<T>>> FromIterator<Ptr>
-    for PrimitiveArray<T>
-{
+impl<T: ArrowPrimitiveType, Ptr: Into<NativeAdapter<T>>> FromIterator<Ptr> for PrimitiveArray<T> {
     fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
         let iter = iter.into_iter();
         let (lower, _) = iter.size_hint();
@@ -1265,15 +1258,8 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
 
         let (null, buffer) = trusted_len_unzip(iterator);
 
-        let data = ArrayData::new_unchecked(
-            T::DATA_TYPE,
-            len,
-            None,
-            Some(null),
-            0,
-            vec![buffer],
-            vec![],
-        );
+        let data =
+            ArrayData::new_unchecked(T::DATA_TYPE, len, None, Some(null), 0, vec![buffer], vec![]);
         PrimitiveArray::from(data)
     }
 }
@@ -1294,9 +1280,7 @@ macro_rules! def_numeric_from_vec {
         }
 
         // Constructs a primitive array from a vector. Should only be used for testing.
-        impl From<Vec<Option<<$ty as ArrowPrimitiveType>::Native>>>
-            for PrimitiveArray<$ty>
-        {
+        impl From<Vec<Option<<$ty as ArrowPrimitiveType>::Native>>> for PrimitiveArray<$ty> {
             fn from(data: Vec<Option<<$ty as ArrowPrimitiveType>::Native>>) -> Self {
                 PrimitiveArray::from_iter(data.iter())
             }
@@ -1392,8 +1376,7 @@ impl<T: ArrowPrimitiveType> From<ArrayData> for PrimitiveArray<T> {
             "PrimitiveArray data should contain a single buffer only (values buffer)"
         );
 
-        let values =
-            ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
+        let values = ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len());
         Self {
             data_type: data.data_type().clone(),
             values,
@@ -1407,11 +1390,7 @@ impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> {
     /// specified precision and scale.
     ///
     /// See [`validate_decimal_precision_and_scale`]
-    pub fn with_precision_and_scale(
-        self,
-        precision: u8,
-        scale: i8,
-    ) -> Result<Self, ArrowError> {
+    pub fn with_precision_and_scale(self, precision: u8, scale: i8) -> Result<Self, ArrowError> {
         validate_decimal_precision_and_scale::<T>(precision, scale)?;
         Ok(Self {
             data_type: T::TYPE_CONSTRUCTOR(precision, scale),
@@ -1575,8 +1554,7 @@ mod tests {
         // 1:        00:00:00.001
         // 37800005: 10:30:00.005
         // 86399210: 23:59:59.210
-        let arr: PrimitiveArray<Time32MillisecondType> =
-            vec![1, 37_800_005, 86_399_210].into();
+        let arr: PrimitiveArray<Time32MillisecondType> = vec![1, 37_800_005, 86_399_210].into();
         assert_eq!(3, arr.len());
         assert_eq!(0, arr.offset());
         assert_eq!(0, arr.null_count());
@@ -1858,11 +1836,7 @@ mod tests {
     #[test]
     fn test_timestamp_fmt_debug() {
         let arr: PrimitiveArray<TimestampMillisecondType> =
-            TimestampMillisecondArray::from(vec![
-                1546214400000,
-                1546214400000,
-                -1546214400000,
-            ]);
+            TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000]);
         assert_eq!(
             "PrimitiveArray<Timestamp(Millisecond, None)>\n[\n  2018-12-31T00:00:00,\n  2018-12-31T00:00:00,\n  1921-01-02T00:00:00,\n]",
             format!("{arr:?}")
@@ -1872,12 +1846,8 @@ mod tests {
     #[test]
     fn test_timestamp_utc_fmt_debug() {
         let arr: PrimitiveArray<TimestampMillisecondType> =
-            TimestampMillisecondArray::from(vec![
-                1546214400000,
-                1546214400000,
-                -1546214400000,
-            ])
-            .with_timezone_utc();
+            TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
+                .with_timezone_utc();
         assert_eq!(
             "PrimitiveArray<Timestamp(Millisecond, Some(\"+00:00\"))>\n[\n  2018-12-31T00:00:00+00:00,\n  2018-12-31T00:00:00+00:00,\n  1921-01-02T00:00:00+00:00,\n]",
             format!("{arr:?}")
@@ -1888,12 +1858,8 @@ mod tests {
     #[cfg(feature = "chrono-tz")]
     fn test_timestamp_with_named_tz_fmt_debug() {
         let arr: PrimitiveArray<TimestampMillisecondType> =
-            TimestampMillisecondArray::from(vec![
-                1546214400000,
-                1546214400000,
-                -1546214400000,
-            ])
-            .with_timezone("Asia/Taipei".to_string());
+            TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
+                .with_timezone("Asia/Taipei".to_string());
         assert_eq!(
             "PrimitiveArray<Timestamp(Millisecond, Some(\"Asia/Taipei\"))>\n[\n  2018-12-31T08:00:00+08:00,\n  2018-12-31T08:00:00+08:00,\n  1921-01-02T08:00:00+08:00,\n]",
             format!("{:?}", arr)
@@ -1904,12 +1870,8 @@ mod tests {
     #[cfg(not(feature = "chrono-tz"))]
     fn test_timestamp_with_named_tz_fmt_debug() {
         let arr: PrimitiveArray<TimestampMillisecondType> =
-            TimestampMillisecondArray::from(vec![
-                1546214400000,
-                1546214400000,
-                -1546214400000,
-            ])
-            .with_timezone("Asia/Taipei".to_string());
+            TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
+                .with_timezone("Asia/Taipei".to_string());
 
         println!("{arr:?}");
 
@@ -1922,12 +1884,8 @@ mod tests {
     #[test]
     fn test_timestamp_with_fixed_offset_tz_fmt_debug() {
         let arr: PrimitiveArray<TimestampMillisecondType> =
-            TimestampMillisecondArray::from(vec![
-                1546214400000,
-                1546214400000,
-                -1546214400000,
-            ])
-            .with_timezone("+08:00".to_string());
+            TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
+                .with_timezone("+08:00".to_string());
         assert_eq!(
             "PrimitiveArray<Timestamp(Millisecond, Some(\"+08:00\"))>\n[\n  2018-12-31T08:00:00+08:00,\n  2018-12-31T08:00:00+08:00,\n  1921-01-02T08:00:00+08:00,\n]",
             format!("{arr:?}")
@@ -1937,12 +1895,8 @@ mod tests {
     #[test]
     fn test_timestamp_with_incorrect_tz_fmt_debug() {
         let arr: PrimitiveArray<TimestampMillisecondType> =
-            TimestampMillisecondArray::from(vec![
-                1546214400000,
-                1546214400000,
-                -1546214400000,
-            ])
-            .with_timezone("xxx".to_string());
+            TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000])
+                .with_timezone("xxx".to_string());
         assert_eq!(
             "PrimitiveArray<Timestamp(Millisecond, Some(\"xxx\"))>\n[\n  2018-12-31T00:00:00 (Unknown Time Zone 'xxx'),\n  2018-12-31T00:00:00 (Unknown Time Zone 'xxx'),\n  1921-01-02T00:00:00 (Unknown Time Zone 'xxx'),\n]",
             format!("{arr:?}")
@@ -1952,14 +1906,13 @@ mod tests {
     #[test]
     #[cfg(feature = "chrono-tz")]
     fn test_timestamp_with_tz_with_daylight_saving_fmt_debug() {
-        let arr: PrimitiveArray<TimestampMillisecondType> =
-            TimestampMillisecondArray::from(vec![
-                1647161999000,
-                1647162000000,
-                1667717999000,
-                1667718000000,
-            ])
-            .with_timezone("America/Denver".to_string());
+        let arr: PrimitiveArray<TimestampMillisecondType> = TimestampMillisecondArray::from(vec![
+            1647161999000,
+            1647162000000,
+            1667717999000,
+            1667718000000,
+        ])
+        .with_timezone("America/Denver".to_string());
         assert_eq!(
             "PrimitiveArray<Timestamp(Millisecond, Some(\"America/Denver\"))>\n[\n  2022-03-13T01:59:59-07:00,\n  2022-03-13T03:00:00-06:00,\n  2022-11-06T00:59:59-06:00,\n  2022-11-06T01:00:00-06:00,\n]",
             format!("{:?}", arr)
@@ -1997,8 +1950,7 @@ mod tests {
     #[test]
     fn test_timestamp_micros_out_of_range() {
         // replicate the issue from https://github.com/apache/arrow-datafusion/issues/3832
-        let arr: PrimitiveArray<TimestampMicrosecondType> =
-            vec![9065525203050843594].into();
+        let arr: PrimitiveArray<TimestampMicrosecondType> = vec![9065525203050843594].into();
         assert_eq!(
             "PrimitiveArray<Timestamp(Microsecond, None)>\n[\n  null,\n]",
             format!("{arr:?}")
@@ -2143,8 +2095,7 @@ mod tests {
 
     #[test]
     fn test_decimal256() {
-        let values: Vec<_> =
-            vec![i256::ZERO, i256::ONE, i256::MINUS_ONE, i256::MIN, i256::MAX];
+        let values: Vec<_> = vec![i256::ZERO, i256::ONE, i256::MINUS_ONE, i256::MIN, i256::MAX];
 
         let array: PrimitiveArray<Decimal256Type> =
             PrimitiveArray::from_iter(values.iter().copied());
@@ -2166,8 +2117,8 @@ mod tests {
         // let val_8887: [u8; 16] = [192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
         // let val_neg_8887: [u8; 16] = [64, 36, 75, 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255];
         let values: [u8; 32] = [
-            192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253,
-            255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+            192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253, 255, 255,
+            255, 255, 255, 255, 255, 255, 255, 255, 255,
         ];
         let array_data = ArrayData::builder(DataType::Decimal128(38, 6))
             .len(2)
@@ -2232,8 +2183,7 @@ mod tests {
 
     #[test]
     fn test_decimal_from_iter() {
-        let array: Decimal128Array =
-            vec![Some(-100), None, Some(101)].into_iter().collect();
+        let array: Decimal128Array = vec![Some(-100), None, Some(101)].into_iter().collect();
         assert_eq!(array.len(), 3);
         assert_eq!(array.data_type(), &DataType::Decimal128(38, 10));
         assert_eq!(-100_i128, array.value(0));
@@ -2343,8 +2293,7 @@ mod tests {
 
     #[test]
     fn test_decimal_array_set_null_if_overflow_with_precision() {
-        let array =
-            Decimal128Array::from(vec![Some(123456), Some(123), None, Some(123456)]);
+        let array = Decimal128Array::from(vec![Some(123456), Some(123), None, Some(123456)]);
         let result = array.null_if_overflow_precision(5);
         let expected = Decimal128Array::from(vec![None, Some(123), None, None]);
         assert_eq!(result, expected);
@@ -2361,8 +2310,7 @@ mod tests {
         let decimal2 = i256::from_i128(56789);
         builder.append_value(decimal2);
 
-        let array: Decimal256Array =
-            builder.finish().with_precision_and_scale(76, 6).unwrap();
+        let array: Decimal256Array = builder.finish().with_precision_and_scale(76, 6).unwrap();
 
         let collected: Vec<_> = array.iter().collect();
         assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected);
@@ -2387,8 +2335,7 @@ mod tests {
 
     #[test]
     fn test_from_iter_decimal128array() {
-        let mut array: Decimal128Array =
-            vec![Some(-100), None, Some(101)].into_iter().collect();
+        let mut array: Decimal128Array = vec![Some(-100), None, Some(101)].into_iter().collect();
         array = array.with_precision_and_scale(38, 10).unwrap();
         assert_eq!(array.len(), 3);
         assert_eq!(array.data_type(), &DataType::Decimal128(38, 10));
@@ -2404,13 +2351,11 @@ mod tests {
         let array = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7]);
         let r = array.unary_opt::<_, Int32Type>(|x| (x % 2 != 0).then_some(x));
 
-        let expected =
-            Int32Array::from(vec![Some(1), None, Some(3), None, Some(5), None, Some(7)]);
+        let expected = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5), None, Some(7)]);
         assert_eq!(r, expected);
 
         let r = expected.unary_opt::<_, Int32Type>(|x| (x % 3 != 0).then_some(x));
-        let expected =
-            Int32Array::from(vec![Some(1), None, None, None, Some(5), None, Some(7)]);
+        let expected = Int32Array::from(vec![Some(1), None, None, None, Some(5), None, Some(7)]);
         assert_eq!(r, expected);
     }
 
@@ -2513,9 +2458,8 @@ mod tests {
         Int32Array::new(vec![1, 2, 3, 4].into(), None);
         Int32Array::new(vec![1, 2, 3, 4].into(), Some(NullBuffer::new_null(4)));
 
-        let err =
-            Int32Array::try_new(vec![1, 2, 3, 4].into(), Some(NullBuffer::new_null(3)))
-                .unwrap_err();
+        let err = Int32Array::try_new(vec![1, 2, 3, 4].into(), Some(NullBuffer::new_null(3)))
+            .unwrap_err();
 
         assert_eq!(
             err.to_string(),
diff --git a/arrow-array/src/array/run_array.rs b/arrow-array/src/array/run_array.rs
index ba6986c28463..4877f9f850a3 100644
--- a/arrow-array/src/array/run_array.rs
+++ b/arrow-array/src/array/run_array.rs
@@ -91,10 +91,7 @@ impl<R: RunEndIndexType> RunArray<R> {
     /// Attempts to create RunArray using given run_ends (index where a run ends)
     /// and the values (value of the run). Returns an error if the given data is not compatible
     /// with RunEndEncoded specification.
-    pub fn try_new(
-        run_ends: &PrimitiveArray<R>,
-        values: &dyn Array,
-    ) -> Result<Self, ArrowError> {
+    pub fn try_new(run_ends: &PrimitiveArray<R>, values: &dyn Array) -> Result<Self, ArrowError> {
         let run_ends_type = run_ends.data_type().clone();
         let values_type = values.data_type().clone();
         let ree_array_type = DataType::RunEndEncoded(
@@ -182,10 +179,7 @@ impl<R: RunEndIndexType> RunArray<R> {
     /// scaled well for larger inputs.
     /// See <https://github.com/apache/arrow-rs/pull/3622#issuecomment-1407753727> for more details.
     #[inline]
-    pub fn get_physical_indices<I>(
-        &self,
-        logical_indices: &[I],
-    ) -> Result<Vec<usize>, ArrowError>
+    pub fn get_physical_indices<I>(&self, logical_indices: &[I]) -> Result<Vec<usize>, ArrowError>
     where
         I: ArrowNativeType,
     {
@@ -211,8 +205,7 @@ impl<R: RunEndIndexType> RunArray<R> {
         });
 
         // Return early if all the logical indices cannot be converted to physical indices.
-        let largest_logical_index =
-            logical_indices[*ordered_indices.last().unwrap()].as_usize();
+        let largest_logical_index = logical_indices[*ordered_indices.last().unwrap()].as_usize();
         if largest_logical_index >= len {
             return Err(ArrowError::InvalidArgumentError(format!(
                 "Cannot convert all logical indices to physical indices. The logical index cannot be converted is {largest_logical_index}.",
@@ -225,8 +218,7 @@ impl<R: RunEndIndexType> RunArray<R> {
         let mut physical_indices = vec![0; indices_len];
 
         let mut ordered_index = 0_usize;
-        for (physical_index, run_end) in
-            self.run_ends.values().iter().enumerate().skip(skip_value)
+        for (physical_index, run_end) in self.run_ends.values().iter().enumerate().skip(skip_value)
         {
             // Get the run end index (relative to offset) of current physical index
             let run_end_value = run_end.as_usize() - offset;
@@ -234,8 +226,7 @@ impl<R: RunEndIndexType> RunArray<R> {
             // All the `logical_indices` that are less than current run end index
             // belongs to current physical index.
             while ordered_index < indices_len
-                && logical_indices[ordered_indices[ordered_index]].as_usize()
-                    < run_end_value
+                && logical_indices[ordered_indices[ordered_index]].as_usize() < run_end_value
             {
                 physical_indices[ordered_indices[ordered_index]] = physical_index;
                 ordered_index += 1;
@@ -245,8 +236,7 @@ impl<R: RunEndIndexType> RunArray<R> {
         // If there are input values >= run_ends.last_value then we'll not be able to convert
         // all logical indices to physical indices.
         if ordered_index < logical_indices.len() {
-            let logical_index =
-                logical_indices[ordered_indices[ordered_index]].as_usize();
+            let logical_index = logical_indices[ordered_indices[ordered_index]].as_usize();
             return Err(ArrowError::InvalidArgumentError(format!(
                 "Cannot convert all logical indices to physical indices. The logical index cannot be converted is {logical_index}.",
             )));
@@ -704,8 +694,7 @@ mod tests {
                 seed.shuffle(&mut rng);
             }
             // repeat the items between 1 and 8 times. Cap the length for smaller sized arrays
-            let num =
-                max_run_length.min(rand::thread_rng().gen_range(1..=max_run_length));
+            let num = max_run_length.min(rand::thread_rng().gen_range(1..=max_run_length));
             for _ in 0..num {
                 result.push(seed[ix]);
             }
@@ -749,19 +738,16 @@ mod tests {
     #[test]
     fn test_run_array() {
         // Construct a value array
-        let value_data = PrimitiveArray::<Int8Type>::from_iter_values([
-            10_i8, 11, 12, 13, 14, 15, 16, 17,
-        ]);
+        let value_data =
+            PrimitiveArray::<Int8Type>::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
 
         // Construct a run_ends array:
         let run_ends_values = [4_i16, 6, 7, 9, 13, 18, 20, 22];
-        let run_ends_data = PrimitiveArray::<Int16Type>::from_iter_values(
-            run_ends_values.iter().copied(),
-        );
+        let run_ends_data =
+            PrimitiveArray::<Int16Type>::from_iter_values(run_ends_values.iter().copied());
 
         // Construct a run ends encoded array from the above two
-        let ree_array =
-            RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
+        let ree_array = RunArray::<Int16Type>::try_new(&run_ends_data, &value_data).unwrap();
 
         assert_eq!(ree_array.len(), 22);
         assert_eq!(ree_array.null_count(), 0);
@@ -872,8 +858,7 @@ mod tests {
         let values: StringArray = [Some("foo"), Some("bar"), None, Some("baz")]
             .into_iter()
             .collect();
-        let run_ends: Int32Array =
-            [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
+        let run_ends: Int32Array = [Some(1), Some(2), Some(3), Some(4)].into_iter().collect();
 
         let array = RunArray::<Int32Type>::try_new(&run_ends, &values).unwrap();
         assert_eq!(array.values().data_type(), &DataType::Utf8);
@@ -924,7 +909,10 @@ mod tests {
         let run_ends: Int32Array = [Some(1), None, Some(3)].into_iter().collect();
 
         let actual = RunArray::<Int32Type>::try_new(&run_ends, &values);
-        let expected = ArrowError::InvalidArgumentError("Found null values in run_ends array. The run_ends array should not have null values.".to_string());
+        let expected = ArrowError::InvalidArgumentError(
+            "Found null values in run_ends array. The run_ends array should not have null values."
+                .to_string(),
+        );
         assert_eq!(expected.to_string(), actual.err().unwrap().to_string());
     }
 
@@ -1003,8 +991,7 @@ mod tests {
             let mut rng = thread_rng();
             logical_indices.shuffle(&mut rng);
 
-            let physical_indices =
-                run_array.get_physical_indices(&logical_indices).unwrap();
+            let physical_indices = run_array.get_physical_indices(&logical_indices).unwrap();
 
             assert_eq!(logical_indices.len(), physical_indices.len());
 
diff --git a/arrow-array/src/array/string_array.rs b/arrow-array/src/array/string_array.rs
index cac4651f4496..9d266e0ca4b8 100644
--- a/arrow-array/src/array/string_array.rs
+++ b/arrow-array/src/array/string_array.rs
@@ -59,9 +59,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericStringArray<OffsetSize> {
 
     /// Fallibly creates a [`GenericStringArray`] from a [`GenericBinaryArray`] returning
     /// an error if [`GenericBinaryArray`] contains invalid UTF-8 data
-    pub fn try_from_binary(
-        v: GenericBinaryArray<OffsetSize>,
-    ) -> Result<Self, ArrowError> {
+    pub fn try_from_binary(v: GenericBinaryArray<OffsetSize>) -> Result<Self, ArrowError> {
         let (offsets, values, nulls) = v.into_parts();
         Self::try_new(offsets, values, nulls)
     }
@@ -83,9 +81,7 @@ impl<OffsetSize: OffsetSizeTrait> From<GenericBinaryArray<OffsetSize>>
     }
 }
 
-impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&str>>>
-    for GenericStringArray<OffsetSize>
-{
+impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<&str>>> for GenericStringArray<OffsetSize> {
     fn from(v: Vec<Option<&str>>) -> Self {
         v.into_iter().collect()
     }
@@ -97,9 +93,7 @@ impl<OffsetSize: OffsetSizeTrait> From<Vec<&str>> for GenericStringArray<OffsetS
     }
 }
 
-impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<String>>>
-    for GenericStringArray<OffsetSize>
-{
+impl<OffsetSize: OffsetSizeTrait> From<Vec<Option<String>>> for GenericStringArray<OffsetSize> {
     fn from(v: Vec<Option<String>>) -> Self {
         v.into_iter().collect()
     }
@@ -438,13 +432,11 @@ mod tests {
         let expected: LargeStringArray = data.clone().into_iter().map(Some).collect();
 
         // Iterator reports too many items
-        let arr =
-            LargeStringArray::from_iter_values(BadIterator::new(3, 10, data.clone()));
+        let arr = LargeStringArray::from_iter_values(BadIterator::new(3, 10, data.clone()));
         assert_eq!(expected, arr);
 
         // Iterator reports too few items
-        let arr =
-            LargeStringArray::from_iter_values(BadIterator::new(3, 1, data.clone()));
+        let arr = LargeStringArray::from_iter_values(BadIterator::new(3, 1, data.clone()));
         assert_eq!(expected, arr);
     }
 
@@ -460,9 +452,11 @@ mod tests {
 
         let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap());
         let null_buffer = Buffer::from_slice_ref([0b101]);
-        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
-            Field::new("item", DataType::UInt8, false),
-        ));
+        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
+            "item",
+            DataType::UInt8,
+            false,
+        )));
 
         // [None, Some("Parquet")]
         let array_data = ArrayData::builder(data_type)
@@ -493,9 +487,7 @@ mod tests {
         _test_generic_string_array_from_list_array::<i64>();
     }
 
-    fn _test_generic_string_array_from_list_array_with_child_nulls_failed<
-        O: OffsetSizeTrait,
-    >() {
+    fn _test_generic_string_array_from_list_array_with_child_nulls_failed<O: OffsetSizeTrait>() {
         let values = b"HelloArrow";
         let child_data = ArrayData::builder(DataType::UInt8)
             .len(10)
@@ -508,9 +500,11 @@ mod tests {
 
         // It is possible to create a null struct containing a non-nullable child
         // see https://github.com/apache/arrow-rs/pull/3244 for details
-        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
-            Field::new("item", DataType::UInt8, true),
-        ));
+        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
+            "item",
+            DataType::UInt8,
+            true,
+        )));
 
         // [None, Some(b"Parquet")]
         let array_data = ArrayData::builder(data_type)
@@ -544,9 +538,11 @@ mod tests {
             .unwrap();
 
         let offsets = [0, 2, 3].map(|n| O::from_usize(n).unwrap());
-        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(
-            Field::new("item", DataType::UInt16, false),
-        ));
+        let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Arc::new(Field::new(
+            "item",
+            DataType::UInt16,
+            false,
+        )));
 
         let array_data = ArrayData::builder(data_type)
             .len(2)
diff --git a/arrow-array/src/array/struct_array.rs b/arrow-array/src/array/struct_array.rs
index 284c3b26a946..699da28cf7a3 100644
--- a/arrow-array/src/array/struct_array.rs
+++ b/arrow-array/src/array/struct_array.rs
@@ -197,6 +197,23 @@ impl StructArray {
         }
     }
 
+    /// Create a new [`StructArray`] containing no fields
+    ///
+    /// # Panics
+    ///
+    /// If `len != nulls.len()`
+    pub fn new_empty_fields(len: usize, nulls: Option<NullBuffer>) -> Self {
+        if let Some(n) = &nulls {
+            assert_eq!(len, n.len())
+        }
+        Self {
+            len,
+            data_type: DataType::Struct(Fields::empty()),
+            fields: vec![],
+            nulls,
+        }
+    }
+
     /// Deconstruct this array into its constituent parts
     pub fn into_parts(self) -> (Fields, Vec<ArrayRef>, Option<NullBuffer>) {
         let f = match self.data_type {
@@ -445,9 +462,7 @@ impl Index<&str> for StructArray {
 mod tests {
     use super::*;
 
-    use crate::{
-        BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array, StringArray,
-    };
+    use crate::{BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array, StringArray};
     use arrow_buffer::ToByteSlice;
     use std::sync::Arc;
 
@@ -523,12 +538,10 @@ mod tests {
             None,
             Some("mark"),
         ]));
-        let ints: ArrayRef =
-            Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
+        let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
 
         let arr =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap();
+            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]).unwrap();
 
         let struct_data = arr.into_data();
         assert_eq!(4, struct_data.len());
@@ -561,13 +574,11 @@ mod tests {
             None,
             // 3 elements, not 4
         ]));
-        let ints: ArrayRef =
-            Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
+        let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), None, Some(4)]));
 
-        let err =
-            StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-                .unwrap_err()
-                .to_string();
+        let err = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
+            .unwrap_err()
+            .to_string();
 
         assert_eq!(
             err,
@@ -582,8 +593,7 @@ mod tests {
     fn test_struct_array_from_mismatched_types_single() {
         drop(StructArray::from(vec![(
             Arc::new(Field::new("b", DataType::Int16, false)),
-            Arc::new(BooleanArray::from(vec![false, false, true, true]))
-                as Arc<dyn Array>,
+            Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
         )]));
     }
 
@@ -595,8 +605,7 @@ mod tests {
         drop(StructArray::from(vec![
             (
                 Arc::new(Field::new("b", DataType::Int16, false)),
-                Arc::new(BooleanArray::from(vec![false, false, true, true]))
-                    as Arc<dyn Array>,
+                Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
             ),
             (
                 Arc::new(Field::new("c", DataType::Utf8, false)),
@@ -716,9 +725,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "Found unmasked nulls for non-nullable StructArray field \\\"c\\\""
-    )]
+    #[should_panic(expected = "Found unmasked nulls for non-nullable StructArray field \\\"c\\\"")]
     fn test_struct_array_from_mismatched_nullability() {
         drop(StructArray::from(vec![(
             Arc::new(Field::new("c", DataType::Int32, false)),
diff --git a/arrow-array/src/array/union_array.rs b/arrow-array/src/array/union_array.rs
index 74a5f1efa767..94ac0bc879e4 100644
--- a/arrow-array/src/array/union_array.rs
+++ b/arrow-array/src/array/union_array.rs
@@ -179,8 +179,7 @@ impl UnionArray {
         if let Some(b) = &value_offsets {
             if ((type_ids.len()) * 4) != b.len() {
                 return Err(ArrowError::InvalidArgumentError(
-                    "Type Ids and Offsets represent a different number of array slots."
-                        .to_string(),
+                    "Type Ids and Offsets represent a different number of array slots.".to_string(),
                 ));
             }
         }
@@ -216,9 +215,8 @@ impl UnionArray {
 
         // Unsafe Justification: arguments were validated above (and
         // re-revalidated as part of data().validate() below)
-        let new_self = unsafe {
-            Self::new_unchecked(field_type_ids, type_ids, value_offsets, child_arrays)
-        };
+        let new_self =
+            unsafe { Self::new_unchecked(field_type_ids, type_ids, value_offsets, child_arrays) };
         new_self.to_data().validate()?;
 
         Ok(new_self)
@@ -1059,7 +1057,13 @@ mod tests {
         let mut builder = UnionBuilder::new_sparse();
         builder.append::<Float32Type>("a", 1.0).unwrap();
         let err = builder.append::<Int32Type>("a", 1).unwrap_err().to_string();
-        assert!(err.contains("Attempt to write col \"a\" with type Int32 doesn't match existing type Float32"), "{}", err);
+        assert!(
+            err.contains(
+                "Attempt to write col \"a\" with type Int32 doesn't match existing type Float32"
+            ),
+            "{}",
+            err
+        );
     }
 
     #[test]
diff --git a/arrow-array/src/builder/boolean_builder.rs b/arrow-array/src/builder/boolean_builder.rs
index 5f0013269677..7e59d940a50e 100644
--- a/arrow-array/src/builder/boolean_builder.rs
+++ b/arrow-array/src/builder/boolean_builder.rs
@@ -127,11 +127,7 @@ impl BooleanBuilder {
     ///
     /// Returns an error if the slices are of different lengths
     #[inline]
-    pub fn append_values(
-        &mut self,
-        values: &[bool],
-        is_valid: &[bool],
-    ) -> Result<(), ArrowError> {
+    pub fn append_values(&mut self, values: &[bool], is_valid: &[bool]) -> Result<(), ArrowError> {
         if values.len() != is_valid.len() {
             Err(ArrowError::InvalidArgumentError(
                 "Value and validity lengths must be equal".to_string(),
@@ -250,8 +246,7 @@ mod tests {
 
     #[test]
     fn test_boolean_array_builder_append_slice() {
-        let arr1 =
-            BooleanArray::from(vec![Some(true), Some(false), None, None, Some(false)]);
+        let arr1 = BooleanArray::from(vec![Some(true), Some(false), None, None, Some(false)]);
 
         let mut builder = BooleanArray::builder(0);
         builder.append_slice(&[true, false]);
diff --git a/arrow-array/src/builder/buffer_builder.rs b/arrow-array/src/builder/buffer_builder.rs
index 01e4c1d4e217..2b66a8187fa9 100644
--- a/arrow-array/src/builder/buffer_builder.rs
+++ b/arrow-array/src/builder/buffer_builder.rs
@@ -45,11 +45,9 @@ pub type Float32BufferBuilder = BufferBuilder<f32>;
 pub type Float64BufferBuilder = BufferBuilder<f64>;
 
 /// Buffer builder for 128-bit decimal type.
-pub type Decimal128BufferBuilder =
-    BufferBuilder<<Decimal128Type as ArrowPrimitiveType>::Native>;
+pub type Decimal128BufferBuilder = BufferBuilder<<Decimal128Type as ArrowPrimitiveType>::Native>;
 /// Buffer builder for 256-bit decimal type.
-pub type Decimal256BufferBuilder =
-    BufferBuilder<<Decimal256Type as ArrowPrimitiveType>::Native>;
+pub type Decimal256BufferBuilder = BufferBuilder<<Decimal256Type as ArrowPrimitiveType>::Native>;
 
 /// Buffer builder for timestamp type of second unit.
 pub type TimestampSecondBufferBuilder =
@@ -107,9 +105,7 @@ pub type DurationNanosecondBufferBuilder =
 
 #[cfg(test)]
 mod tests {
-    use crate::builder::{
-        ArrayBuilder, Int32BufferBuilder, Int8Builder, UInt8BufferBuilder,
-    };
+    use crate::builder::{ArrayBuilder, Int32BufferBuilder, Int8Builder, UInt8BufferBuilder};
     use crate::Array;
 
     #[test]
diff --git a/arrow-array/src/builder/fixed_size_binary_builder.rs b/arrow-array/src/builder/fixed_size_binary_builder.rs
index 180150e988f3..0a50eb8a50e9 100644
--- a/arrow-array/src/builder/fixed_size_binary_builder.rs
+++ b/arrow-array/src/builder/fixed_size_binary_builder.rs
@@ -75,7 +75,8 @@ impl FixedSizeBinaryBuilder {
     pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<(), ArrowError> {
         if self.value_length != value.as_ref().len() as i32 {
             Err(ArrowError::InvalidArgumentError(
-                "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths".to_string()
+                "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
+                    .to_string(),
             ))
         } else {
             self.values_builder.append_slice(value.as_ref());
@@ -95,11 +96,10 @@ impl FixedSizeBinaryBuilder {
     /// Builds the [`FixedSizeBinaryArray`] and reset this builder.
     pub fn finish(&mut self) -> FixedSizeBinaryArray {
         let array_length = self.len();
-        let array_data_builder =
-            ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
-                .add_buffer(self.values_builder.finish())
-                .nulls(self.null_buffer_builder.finish())
-                .len(array_length);
+        let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
+            .add_buffer(self.values_builder.finish())
+            .nulls(self.null_buffer_builder.finish())
+            .len(array_length);
         let array_data = unsafe { array_data_builder.build_unchecked() };
         FixedSizeBinaryArray::from(array_data)
     }
@@ -108,11 +108,10 @@ impl FixedSizeBinaryBuilder {
     pub fn finish_cloned(&self) -> FixedSizeBinaryArray {
         let array_length = self.len();
         let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
-        let array_data_builder =
-            ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
-                .add_buffer(values_buffer)
-                .nulls(self.null_buffer_builder.finish_cloned())
-                .len(array_length);
+        let array_data_builder = ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
+            .add_buffer(values_buffer)
+            .nulls(self.null_buffer_builder.finish_cloned())
+            .len(array_length);
         let array_data = unsafe { array_data_builder.build_unchecked() };
         FixedSizeBinaryArray::from(array_data)
     }
diff --git a/arrow-array/src/builder/generic_byte_run_builder.rs b/arrow-array/src/builder/generic_byte_run_builder.rs
index 41165208de55..3cde76c4a039 100644
--- a/arrow-array/src/builder/generic_byte_run_builder.rs
+++ b/arrow-array/src/builder/generic_byte_run_builder.rs
@@ -19,10 +19,7 @@ use crate::types::bytes::ByteArrayNativeType;
 use std::{any::Any, sync::Arc};
 
 use crate::{
-    types::{
-        BinaryType, ByteArrayType, LargeBinaryType, LargeUtf8Type, RunEndIndexType,
-        Utf8Type,
-    },
+    types::{BinaryType, ByteArrayType, LargeBinaryType, LargeUtf8Type, RunEndIndexType, Utf8Type},
     ArrayRef, ArrowPrimitiveType, RunArray,
 };
 
@@ -112,10 +109,7 @@ where
     pub fn with_capacity(capacity: usize, data_capacity: usize) -> Self {
         Self {
             run_ends_builder: PrimitiveBuilder::with_capacity(capacity),
-            values_builder: GenericByteBuilder::<V>::with_capacity(
-                capacity,
-                data_capacity,
-            ),
+            values_builder: GenericByteBuilder::<V>::with_capacity(capacity, data_capacity),
             current_value: Vec::new(),
             has_current_value: false,
             current_run_end_index: 0,
@@ -282,12 +276,13 @@ where
     }
 
     fn run_end_index_as_native(&self) -> R::Native {
-        R::Native::from_usize(self.current_run_end_index)
-        .unwrap_or_else(|| panic!(
+        R::Native::from_usize(self.current_run_end_index).unwrap_or_else(|| {
+            panic!(
                 "Cannot convert the value {} from `usize` to native form of arrow datatype {}",
                 self.current_run_end_index,
                 R::DATA_TYPE
-        ))
+            )
+        })
     }
 }
 
@@ -413,8 +408,7 @@ mod tests {
 
         // Values are polymorphic and so require a downcast.
         let av = array.values();
-        let ava: &GenericByteArray<T> =
-            av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
+        let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
 
         assert_eq!(*ava.value(0), *values[0]);
         assert!(ava.is_null(1));
@@ -459,8 +453,7 @@ mod tests {
 
         // Values are polymorphic and so require a downcast.
         let av = array.values();
-        let ava: &GenericByteArray<T> =
-            av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
+        let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
 
         assert_eq!(ava.value(0), values[0]);
         assert!(ava.is_null(1));
diff --git a/arrow-array/src/builder/generic_bytes_builder.rs b/arrow-array/src/builder/generic_bytes_builder.rs
index d84be8c2fca6..2c7ee7a3e448 100644
--- a/arrow-array/src/builder/generic_bytes_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_builder.rs
@@ -68,12 +68,8 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
         let value_builder = BufferBuilder::<u8>::new_from_buffer(value_buffer);
 
         let null_buffer_builder = null_buffer
-            .map(|buffer| {
-                NullBufferBuilder::new_from_buffer(buffer, offsets_builder.len() - 1)
-            })
-            .unwrap_or_else(|| {
-                NullBufferBuilder::new_with_len(offsets_builder.len() - 1)
-            });
+            .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, offsets_builder.len() - 1))
+            .unwrap_or_else(|| NullBufferBuilder::new_with_len(offsets_builder.len() - 1));
 
         Self {
             offsets_builder,
@@ -84,8 +80,7 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
 
     #[inline]
     fn next_offset(&self) -> T::Offset {
-        T::Offset::from_usize(self.value_builder.len())
-            .expect("byte array offset overflow")
+        T::Offset::from_usize(self.value_builder.len()).expect("byte array offset overflow")
     }
 
     /// Appends a value into the builder.
diff --git a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
index 282f423fa6d1..b0c722ae7cda 100644
--- a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
@@ -16,9 +16,7 @@
 // under the License.
 
 use crate::builder::{ArrayBuilder, GenericByteBuilder, PrimitiveBuilder};
-use crate::types::{
-    ArrowDictionaryKeyType, ByteArrayType, GenericBinaryType, GenericStringType,
-};
+use crate::types::{ArrowDictionaryKeyType, ByteArrayType, GenericBinaryType, GenericStringType};
 use crate::{Array, ArrayRef, DictionaryArray, GenericByteArray};
 use arrow_buffer::ArrowNativeType;
 use arrow_schema::{ArrowError, DataType};
@@ -91,10 +89,7 @@ where
             state: Default::default(),
             dedup: Default::default(),
             keys_builder: PrimitiveBuilder::with_capacity(keys_capacity),
-            values_builder: GenericByteBuilder::<T>::with_capacity(
-                value_capacity,
-                data_capacity,
-            ),
+            values_builder: GenericByteBuilder::<T>::with_capacity(value_capacity, data_capacity),
         }
     }
 
@@ -131,8 +126,7 @@ where
         let mut dedup = HashMap::with_capacity_and_hasher(dict_len, ());
 
         let values_len = dictionary_values.value_data().len();
-        let mut values_builder =
-            GenericByteBuilder::<T>::with_capacity(dict_len, values_len);
+        let mut values_builder = GenericByteBuilder::<T>::with_capacity(dict_len, values_len);
 
         K::Native::from_usize(dictionary_values.len())
             .ok_or(ArrowError::DictionaryKeyOverflowError)?;
@@ -214,10 +208,7 @@ where
     /// value is appended to the values array.
     ///
     /// Returns an error if the new index would overflow the key type.
-    pub fn append(
-        &mut self,
-        value: impl AsRef<T::Native>,
-    ) -> Result<K::Native, ArrowError> {
+    pub fn append(&mut self, value: impl AsRef<T::Native>) -> Result<K::Native, ArrowError> {
         let value_native: &T::Native = value.as_ref();
         let value_bytes: &[u8] = value_native.as_ref();
 
@@ -240,8 +231,7 @@ where
                     state.hash_one(get_bytes(storage, *idx))
                 });
 
-                K::Native::from_usize(idx)
-                    .ok_or(ArrowError::DictionaryKeyOverflowError)?
+                K::Native::from_usize(idx).ok_or(ArrowError::DictionaryKeyOverflowError)?
             }
         };
         self.keys_builder.append_value(key);
@@ -283,8 +273,7 @@ where
         let values = self.values_builder.finish();
         let keys = self.keys_builder.finish();
 
-        let data_type =
-            DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
+        let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
 
         let builder = keys
             .into_data()
@@ -300,8 +289,7 @@ where
         let values = self.values_builder.finish_cloned();
         let keys = self.keys_builder.finish_cloned();
 
-        let data_type =
-            DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
+        let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(T::DATA_TYPE));
 
         let builder = keys
             .into_data()
@@ -367,12 +355,10 @@ fn get_bytes<T: ByteArrayType>(values: &GenericByteBuilder<T>, idx: usize) -> &[
 /// assert_eq!(ava.value(1), "def");
 ///
 /// ```
-pub type StringDictionaryBuilder<K> =
-    GenericByteDictionaryBuilder<K, GenericStringType<i32>>;
+pub type StringDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericStringType<i32>>;
 
 /// Builder for [`DictionaryArray`] of [`LargeStringArray`](crate::array::LargeStringArray)
-pub type LargeStringDictionaryBuilder<K> =
-    GenericByteDictionaryBuilder<K, GenericStringType<i64>>;
+pub type LargeStringDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericStringType<i64>>;
 
 /// Builder for [`DictionaryArray`] of [`BinaryArray`](crate::array::BinaryArray)
 ///
@@ -407,12 +393,10 @@ pub type LargeStringDictionaryBuilder<K> =
 /// assert_eq!(ava.value(1), b"def");
 ///
 /// ```
-pub type BinaryDictionaryBuilder<K> =
-    GenericByteDictionaryBuilder<K, GenericBinaryType<i32>>;
+pub type BinaryDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericBinaryType<i32>>;
 
 /// Builder for [`DictionaryArray`] of [`LargeBinaryArray`](crate::array::LargeBinaryArray)
-pub type LargeBinaryDictionaryBuilder<K> =
-    GenericByteDictionaryBuilder<K, GenericBinaryType<i64>>;
+pub type LargeBinaryDictionaryBuilder<K> = GenericByteDictionaryBuilder<K, GenericBinaryType<i64>>;
 
 #[cfg(test)]
 mod tests {
@@ -444,8 +428,7 @@ mod tests {
 
         // Values are polymorphic and so require a downcast.
         let av = array.values();
-        let ava: &GenericByteArray<T> =
-            av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
+        let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
 
         assert_eq!(*ava.value(0), *values[0]);
         assert_eq!(*ava.value(1), *values[1]);
@@ -483,8 +466,7 @@ mod tests {
 
         // Values are polymorphic and so require a downcast.
         let av = array.values();
-        let ava: &GenericByteArray<T> =
-            av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
+        let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
 
         assert_eq!(ava.value(0), values[0]);
         assert_eq!(ava.value(1), values[1]);
@@ -542,11 +524,8 @@ mod tests {
         <T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
     {
         let mut builder =
-            GenericByteDictionaryBuilder::<Int8Type, T>::new_with_dictionary(
-                6,
-                &dictionary,
-            )
-            .unwrap();
+            GenericByteDictionaryBuilder::<Int8Type, T>::new_with_dictionary(6, &dictionary)
+                .unwrap();
         builder.append(values[0]).unwrap();
         builder.append_null();
         builder.append(values[1]).unwrap();
@@ -562,8 +541,7 @@ mod tests {
 
         // Values are polymorphic and so require a downcast.
         let av = array.values();
-        let ava: &GenericByteArray<T> =
-            av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
+        let ava: &GenericByteArray<T> = av.as_any().downcast_ref::<GenericByteArray<T>>().unwrap();
 
         assert!(!ava.is_valid(0));
         assert_eq!(ava.value(1), values[1]);
@@ -597,11 +575,8 @@ mod tests {
         <T as ByteArrayType>::Native: AsRef<<T as ByteArrayType>::Native>,
     {
         let mut builder =
-            GenericByteDictionaryBuilder::<Int16Type, T>::new_with_dictionary(
-                4,
-                &dictionary,
-            )
-            .unwrap();
+            GenericByteDictionaryBuilder::<Int16Type, T>::new_with_dictionary(4, &dictionary)
+                .unwrap();
         builder.append(values[0]).unwrap();
         builder.append_null();
         builder.append(values[1]).unwrap();
diff --git a/arrow-array/src/builder/generic_list_builder.rs b/arrow-array/src/builder/generic_list_builder.rs
index 5cc7f7b04e0a..21eaadd5208a 100644
--- a/arrow-array/src/builder/generic_list_builder.rs
+++ b/arrow-array/src/builder/generic_list_builder.rs
@@ -353,7 +353,7 @@ where
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::builder::{Int32Builder, ListBuilder};
+    use crate::builder::{make_builder, Int32Builder, ListBuilder};
     use crate::cast::AsArray;
     use crate::types::Int32Type;
     use crate::{Array, Int32Array};
@@ -548,4 +548,204 @@ mod tests {
         assert_eq!(elements.null_count(), 1);
         assert!(elements.is_null(3));
     }
+
+    #[test]
+    fn test_boxed_primitive_aray_builder() {
+        let values_builder = make_builder(&DataType::Int32, 5);
+        let mut builder = ListBuilder::new(values_builder);
+
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_slice(&[1, 2, 3]);
+        builder.append(true);
+
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_slice(&[4, 5, 6]);
+        builder.append(true);
+
+        let arr = builder.finish();
+        assert_eq!(2, arr.len());
+
+        let elements = arr.values().as_primitive::<Int32Type>();
+        assert_eq!(elements.values(), &[1, 2, 3, 4, 5, 6]);
+    }
+
+    #[test]
+    fn test_boxed_list_list_array_builder() {
+        // This test is same as `test_list_list_array_builder` but uses boxed builders.
+        let values_builder = make_builder(
+            &DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
+            10,
+        );
+        let mut builder = ListBuilder::new(values_builder);
+
+        //  [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]]
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+            .expect("should be an ListBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(1);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+            .expect("should be an ListBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(2);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+            .expect("should be an ListBuilder")
+            .append(true);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+            .expect("should be an ListBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(3);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+            .expect("should be an ListBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(4);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+            .expect("should be an ListBuilder")
+            .append(true);
+        builder.append(true);
+
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+            .expect("should be an ListBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(5);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+            .expect("should be an ListBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(6);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+            .expect("should be an ListBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(7);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+            .expect("should be an ListBuilder")
+            .append(true);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+            .expect("should be an ListBuilder")
+            .append(false);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+            .expect("should be an ListBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(8);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+            .expect("should be an ListBuilder")
+            .append(true);
+        builder.append(true);
+
+        builder.append(false);
+
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+            .expect("should be an ListBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(9);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+            .expect("should be an ListBuilder")
+            .values()
+            .as_any_mut()
+            .downcast_mut::<Int32Builder>()
+            .expect("should be an Int32Builder")
+            .append_value(10);
+        builder
+            .values()
+            .as_any_mut()
+            .downcast_mut::<ListBuilder<Box<dyn ArrayBuilder>>>()
+            .expect("should be an ListBuilder")
+            .append(true);
+        builder.append(true);
+
+        let l1 = builder.finish();
+
+        assert_eq!(4, l1.len());
+        assert_eq!(1, l1.null_count());
+
+        assert_eq!(l1.value_offsets(), &[0, 2, 5, 5, 6]);
+        let l2 = l1.values().as_list::<i32>();
+
+        assert_eq!(6, l2.len());
+        assert_eq!(1, l2.null_count());
+        assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8, 10]);
+
+        let i1 = l2.values().as_primitive::<Int32Type>();
+        assert_eq!(10, i1.len());
+        assert_eq!(0, i1.null_count());
+        assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
+    }
 }
diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs
index 4e3ec4a7944d..3a5244ed81a0 100644
--- a/arrow-array/src/builder/map_builder.rs
+++ b/arrow-array/src/builder/map_builder.rs
@@ -86,11 +86,7 @@ impl Default for MapFieldNames {
 
 impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
     /// Creates a new `MapBuilder`
-    pub fn new(
-        field_names: Option<MapFieldNames>,
-        key_builder: K,
-        value_builder: V,
-    ) -> Self {
+    pub fn new(field_names: Option<MapFieldNames>, key_builder: K, value_builder: V) -> Self {
         let capacity = key_builder.len();
         Self::with_capacity(field_names, key_builder, value_builder, capacity)
     }
@@ -243,12 +239,9 @@ mod tests {
     use super::*;
 
     #[test]
-    #[should_panic(
-        expected = "Keys array must have no null values, found 1 null value(s)"
-    )]
+    #[should_panic(expected = "Keys array must have no null values, found 1 null value(s)")]
     fn test_map_builder_with_null_keys_panics() {
-        let mut builder =
-            MapBuilder::new(None, StringBuilder::new(), Int32Builder::new());
+        let mut builder = MapBuilder::new(None, StringBuilder::new(), Int32Builder::new());
         builder.keys().append_null();
         builder.values().append_value(42);
         builder.append(true).unwrap();
diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs
index 38a7500dd55f..8382f7af87b0 100644
--- a/arrow-array/src/builder/mod.rs
+++ b/arrow-array/src/builder/mod.rs
@@ -265,6 +265,36 @@ pub trait ArrayBuilder: Any + Send {
     fn into_box_any(self: Box<Self>) -> Box<dyn Any>;
 }
 
+impl ArrayBuilder for Box<dyn ArrayBuilder> {
+    fn len(&self) -> usize {
+        (**self).len()
+    }
+
+    fn is_empty(&self) -> bool {
+        (**self).is_empty()
+    }
+
+    fn finish(&mut self) -> ArrayRef {
+        (**self).finish()
+    }
+
+    fn finish_cloned(&self) -> ArrayRef {
+        (**self).finish_cloned()
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        (**self).as_any()
+    }
+
+    fn as_any_mut(&mut self) -> &mut dyn Any {
+        (**self).as_any_mut()
+    }
+
+    fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+        self
+    }
+}
+
 /// Builder for [`ListArray`](crate::array::ListArray)
 pub type ListBuilder<T> = GenericListBuilder<i32, T>;
 
diff --git a/arrow-array/src/builder/primitive_builder.rs b/arrow-array/src/builder/primitive_builder.rs
index b23d6bba36c4..0aad2dbfce0e 100644
--- a/arrow-array/src/builder/primitive_builder.rs
+++ b/arrow-array/src/builder/primitive_builder.rs
@@ -161,9 +161,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
         let values_builder = BufferBuilder::<T::Native>::new_from_buffer(values_buffer);
 
         let null_buffer_builder = null_buffer
-            .map(|buffer| {
-                NullBufferBuilder::new_from_buffer(buffer, values_builder.len())
-            })
+            .map(|buffer| NullBufferBuilder::new_from_buffer(buffer, values_builder.len()))
             .unwrap_or_else(|| NullBufferBuilder::new_with_len(values_builder.len()));
 
         Self {
@@ -256,10 +254,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
     /// This requires the iterator be a trusted length. This could instead require
     /// the iterator implement `TrustedLen` once that is stabilized.
     #[inline]
-    pub unsafe fn append_trusted_len_iter(
-        &mut self,
-        iter: impl IntoIterator<Item = T::Native>,
-    ) {
+    pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T::Native>) {
         let iter = iter.into_iter();
         let len = iter
             .size_hint()
@@ -328,11 +323,7 @@ impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> {
 
 impl<P: DecimalType> PrimitiveBuilder<P> {
     /// Sets the precision and scale
-    pub fn with_precision_and_scale(
-        self,
-        precision: u8,
-        scale: i8,
-    ) -> Result<Self, ArrowError> {
+    pub fn with_precision_and_scale(self, precision: u8, scale: i8) -> Result<Self, ArrowError> {
         validate_decimal_precision_and_scale::<P>(precision, scale)?;
         Ok(Self {
             data_type: P::TYPE_CONSTRUCTOR(precision, scale),
@@ -592,25 +583,21 @@ mod tests {
 
     #[test]
     fn test_primitive_array_builder_with_data_type() {
-        let mut builder =
-            Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
+        let mut builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
         builder.append_value(1);
         let array = builder.finish();
         assert_eq!(array.precision(), 1);
         assert_eq!(array.scale(), 2);
 
         let data_type = DataType::Timestamp(TimeUnit::Nanosecond, Some("+00:00".into()));
-        let mut builder =
-            TimestampNanosecondBuilder::new().with_data_type(data_type.clone());
+        let mut builder = TimestampNanosecondBuilder::new().with_data_type(data_type.clone());
         builder.append_value(1);
         let array = builder.finish();
         assert_eq!(array.data_type(), &data_type);
     }
 
     #[test]
-    #[should_panic(
-        expected = "incompatible data type for builder, expected Int32 got Int64"
-    )]
+    #[should_panic(expected = "incompatible data type for builder, expected Int32 got Int64")]
     fn test_invalid_with_data_type() {
         Int32Builder::new().with_data_type(DataType::Int64);
     }
diff --git a/arrow-array/src/builder/primitive_dictionary_builder.rs b/arrow-array/src/builder/primitive_dictionary_builder.rs
index 7323ee57627d..a47b2d30d4f3 100644
--- a/arrow-array/src/builder/primitive_dictionary_builder.rs
+++ b/arrow-array/src/builder/primitive_dictionary_builder.rs
@@ -221,8 +221,7 @@ where
                 let key = self.values_builder.len();
                 self.values_builder.append_value(value);
                 vacant.insert(key);
-                K::Native::from_usize(key)
-                    .ok_or(ArrowError::DictionaryKeyOverflowError)?
+                K::Native::from_usize(key).ok_or(ArrowError::DictionaryKeyOverflowError)?
             }
             Entry::Occupied(o) => K::Native::usize_as(*o.get()),
         };
@@ -266,10 +265,8 @@ where
         let values = self.values_builder.finish();
         let keys = self.keys_builder.finish();
 
-        let data_type = DataType::Dictionary(
-            Box::new(K::DATA_TYPE),
-            Box::new(values.data_type().clone()),
-        );
+        let data_type =
+            DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
 
         let builder = keys
             .into_data()
@@ -285,8 +282,7 @@ where
         let values = self.values_builder.finish_cloned();
         let keys = self.keys_builder.finish_cloned();
 
-        let data_type =
-            DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(V::DATA_TYPE));
+        let data_type = DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(V::DATA_TYPE));
 
         let builder = keys
             .into_data()
@@ -331,8 +327,7 @@ mod tests {
 
     #[test]
     fn test_primitive_dictionary_builder() {
-        let mut builder =
-            PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(3, 2);
+        let mut builder = PrimitiveDictionaryBuilder::<UInt8Type, UInt32Type>::with_capacity(3, 2);
         builder.append(12345678).unwrap();
         builder.append_null();
         builder.append(22345678).unwrap();
@@ -384,8 +379,7 @@ mod tests {
     #[test]
     fn test_primitive_dictionary_with_builders() {
         let keys_builder = PrimitiveBuilder::<Int32Type>::new();
-        let values_builder =
-            Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
+        let values_builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(1, 2));
         let mut builder =
             PrimitiveDictionaryBuilder::<Int32Type, Decimal128Type>::new_from_empty_builders(
                 keys_builder,
diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs
index 0c878e621056..06b8385b3164 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -106,24 +106,18 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
         DataType::Float32 => Box::new(Float32Builder::with_capacity(capacity)),
         DataType::Float64 => Box::new(Float64Builder::with_capacity(capacity)),
         DataType::Binary => Box::new(BinaryBuilder::with_capacity(capacity, 1024)),
-        DataType::LargeBinary => {
-            Box::new(LargeBinaryBuilder::with_capacity(capacity, 1024))
-        }
+        DataType::LargeBinary => Box::new(LargeBinaryBuilder::with_capacity(capacity, 1024)),
         DataType::FixedSizeBinary(len) => {
             Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
         }
         DataType::Decimal128(p, s) => Box::new(
-            Decimal128Builder::with_capacity(capacity)
-                .with_data_type(DataType::Decimal128(*p, *s)),
+            Decimal128Builder::with_capacity(capacity).with_data_type(DataType::Decimal128(*p, *s)),
         ),
         DataType::Decimal256(p, s) => Box::new(
-            Decimal256Builder::with_capacity(capacity)
-                .with_data_type(DataType::Decimal256(*p, *s)),
+            Decimal256Builder::with_capacity(capacity).with_data_type(DataType::Decimal256(*p, *s)),
         ),
         DataType::Utf8 => Box::new(StringBuilder::with_capacity(capacity, 1024)),
-        DataType::LargeUtf8 => {
-            Box::new(LargeStringBuilder::with_capacity(capacity, 1024))
-        }
+        DataType::LargeUtf8 => Box::new(LargeStringBuilder::with_capacity(capacity, 1024)),
         DataType::Date32 => Box::new(Date32Builder::with_capacity(capacity)),
         DataType::Date64 => Box::new(Date64Builder::with_capacity(capacity)),
         DataType::Time32(TimeUnit::Second) => {
@@ -175,19 +169,18 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
         DataType::Duration(TimeUnit::Nanosecond) => {
             Box::new(DurationNanosecondBuilder::with_capacity(capacity))
         }
-        DataType::Struct(fields) => {
-            Box::new(StructBuilder::from_fields(fields.clone(), capacity))
+        DataType::List(field) => {
+            let builder = make_builder(field.data_type(), capacity);
+            Box::new(ListBuilder::with_capacity(builder, capacity))
         }
+        DataType::Struct(fields) => Box::new(StructBuilder::from_fields(fields.clone(), capacity)),
         t => panic!("Data type {t:?} is not currently supported"),
     }
 }
 
 impl StructBuilder {
     /// Creates a new `StructBuilder`
-    pub fn new(
-        fields: impl Into<Fields>,
-        field_builders: Vec<Box<dyn ArrayBuilder>>,
-    ) -> Self {
+    pub fn new(fields: impl Into<Fields>, field_builders: Vec<Box<dyn ArrayBuilder>>) -> Self {
         Self {
             field_builders,
             fields: fields.into(),
@@ -233,6 +226,9 @@ impl StructBuilder {
     /// Builds the `StructArray` and reset this builder.
     pub fn finish(&mut self) -> StructArray {
         self.validate_content();
+        if self.fields.is_empty() {
+            return StructArray::new_empty_fields(self.len(), self.null_buffer_builder.finish());
+        }
 
         let arrays = self.field_builders.iter_mut().map(|f| f.finish()).collect();
         let nulls = self.null_buffer_builder.finish();
@@ -243,6 +239,13 @@ impl StructBuilder {
     pub fn finish_cloned(&self) -> StructArray {
         self.validate_content();
 
+        if self.fields.is_empty() {
+            return StructArray::new_empty_fields(
+                self.len(),
+                self.null_buffer_builder.finish_cloned(),
+            );
+        }
+
         let arrays = self
             .field_builders
             .iter()
@@ -508,14 +511,18 @@ mod tests {
 
     #[test]
     #[should_panic(
-        expected = "Data type List(Field { name: \"item\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) is not currently supported"
+        expected = "Data type Map(Field { name: \"entries\", data_type: Struct([Field { name: \"keys\", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"values\", data_type: UInt32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false) is not currently supported"
     )]
     fn test_struct_array_builder_from_schema_unsupported_type() {
-        let list_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int64, true)));
+        let keys = Arc::new(Field::new("keys", DataType::Int32, false));
+        let values = Arc::new(Field::new("values", DataType::UInt32, false));
+        let struct_type = DataType::Struct(Fields::from(vec![keys, values]));
+        let map_data_type =
+            DataType::Map(Arc::new(Field::new("entries", struct_type, false)), false);
+
         let fields = vec![
             Field::new("f1", DataType::Int16, false),
-            Field::new("f2", list_type, false),
+            Field::new("f2", map_data_type, false),
         ];
 
         let _ = StructBuilder::from_fields(fields, 5);
@@ -558,9 +565,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "Number of fields is not equal to the number of field_builders."
-    )]
+    #[should_panic(expected = "Number of fields is not equal to the number of field_builders.")]
     fn test_struct_array_builder_unequal_field_field_builders() {
         let int_builder = Int32Builder::with_capacity(10);
 
@@ -591,4 +596,19 @@ mod tests {
         let mut sa = StructBuilder::new(fields, field_builders);
         sa.finish();
     }
+
+    #[test]
+    fn test_empty() {
+        let mut builder = StructBuilder::new(Fields::empty(), vec![]);
+        builder.append(true);
+        builder.append(false);
+
+        let a1 = builder.finish_cloned();
+        let a2 = builder.finish();
+        assert_eq!(a1, a2);
+        assert_eq!(a1.len(), 2);
+        assert_eq!(a1.null_count(), 1);
+        assert!(a1.is_valid(0));
+        assert!(a1.is_null(1));
+    }
 }
diff --git a/arrow-array/src/builder/union_builder.rs b/arrow-array/src/builder/union_builder.rs
index f74afb2aa9aa..4f88c9d41b9a 100644
--- a/arrow-array/src/builder/union_builder.rs
+++ b/arrow-array/src/builder/union_builder.rs
@@ -65,11 +65,7 @@ impl<T: ArrowNativeType> FieldDataValues for BufferBuilder<T> {
 
 impl FieldData {
     /// Creates a new `FieldData`.
-    fn new<T: ArrowPrimitiveType>(
-        type_id: i8,
-        data_type: DataType,
-        capacity: usize,
-    ) -> Self {
+    fn new<T: ArrowPrimitiveType>(type_id: i8, data_type: DataType, capacity: usize) -> Self {
         Self {
             type_id,
             data_type,
@@ -222,7 +218,12 @@ impl UnionBuilder {
         let mut field_data = match self.fields.remove(&type_name) {
             Some(data) => {
                 if data.data_type != T::DATA_TYPE {
-                    return Err(ArrowError::InvalidArgumentError(format!("Attempt to write col \"{}\" with type {} doesn't match existing type {}", type_name, T::DATA_TYPE, data.data_type)));
+                    return Err(ArrowError::InvalidArgumentError(format!(
+                        "Attempt to write col \"{}\" with type {} doesn't match existing type {}",
+                        type_name,
+                        T::DATA_TYPE,
+                        data.data_type
+                    )));
                 }
                 data
             }
diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs
index b6cda44e8973..2e21f3e7e640 100644
--- a/arrow-array/src/cast.rs
+++ b/arrow-array/src/cast.rs
@@ -578,9 +578,7 @@ macro_rules! downcast_run_array {
 
 /// Force downcast of an [`Array`], such as an [`ArrayRef`] to
 /// [`GenericListArray<T>`], panicking on failure.
-pub fn as_generic_list_array<S: OffsetSizeTrait>(
-    arr: &dyn Array,
-) -> &GenericListArray<S> {
+pub fn as_generic_list_array<S: OffsetSizeTrait>(arr: &dyn Array) -> &GenericListArray<S> {
     arr.as_any()
         .downcast_ref::<GenericListArray<S>>()
         .expect("Unable to downcast to list array")
@@ -612,9 +610,7 @@ pub fn as_large_list_array(arr: &dyn Array) -> &LargeListArray {
 /// Force downcast of an [`Array`], such as an [`ArrayRef`] to
 /// [`GenericBinaryArray<S>`], panicking on failure.
 #[inline]
-pub fn as_generic_binary_array<S: OffsetSizeTrait>(
-    arr: &dyn Array,
-) -> &GenericBinaryArray<S> {
+pub fn as_generic_binary_array<S: OffsetSizeTrait>(arr: &dyn Array) -> &GenericBinaryArray<S> {
     arr.as_any()
         .downcast_ref::<GenericBinaryArray<S>>()
         .expect("Unable to downcast to binary array")
@@ -826,8 +822,7 @@ pub trait AsArray: private::Sealed {
     }
 
     /// Downcast this to a [`DictionaryArray`] returning `None` if not possible
-    fn as_dictionary_opt<K: ArrowDictionaryKeyType>(&self)
-        -> Option<&DictionaryArray<K>>;
+    fn as_dictionary_opt<K: ArrowDictionaryKeyType>(&self) -> Option<&DictionaryArray<K>>;
 
     /// Downcast this to a [`DictionaryArray`] panicking if not possible
     fn as_dictionary<K: ArrowDictionaryKeyType>(&self) -> &DictionaryArray<K> {
@@ -877,9 +872,7 @@ impl AsArray for dyn Array + '_ {
         self.as_any().downcast_ref()
     }
 
-    fn as_dictionary_opt<K: ArrowDictionaryKeyType>(
-        &self,
-    ) -> Option<&DictionaryArray<K>> {
+    fn as_dictionary_opt<K: ArrowDictionaryKeyType>(&self) -> Option<&DictionaryArray<K>> {
         self.as_any().downcast_ref()
     }
 
@@ -926,9 +919,7 @@ impl AsArray for ArrayRef {
         self.as_any().downcast_ref()
     }
 
-    fn as_dictionary_opt<K: ArrowDictionaryKeyType>(
-        &self,
-    ) -> Option<&DictionaryArray<K>> {
+    fn as_dictionary_opt<K: ArrowDictionaryKeyType>(&self) -> Option<&DictionaryArray<K>> {
         self.as_ref().as_dictionary_opt()
     }
 
@@ -972,9 +963,7 @@ mod tests {
 
     #[test]
     fn test_decimal256array() {
-        let a = Decimal256Array::from_iter_values(
-            [1, 2, 4, 5].into_iter().map(i256::from_i128),
-        );
+        let a = Decimal256Array::from_iter_values([1, 2, 4, 5].into_iter().map(i256::from_i128));
         assert!(!as_primitive_array::<Decimal256Type>(&a).is_empty());
     }
 }
diff --git a/arrow-array/src/delta.rs b/arrow-array/src/delta.rs
index bf9ee5ca685f..d9aa4aa6de5d 100644
--- a/arrow-array/src/delta.rs
+++ b/arrow-array/src/delta.rs
@@ -55,10 +55,7 @@ pub(crate) fn add_months_datetime<Tz: TimeZone>(
 /// Add the given number of days to the given datetime.
 ///
 /// Returns `None` when it will result in overflow.
-pub(crate) fn add_days_datetime<Tz: TimeZone>(
-    dt: DateTime<Tz>,
-    days: i32,
-) -> Option<DateTime<Tz>> {
+pub(crate) fn add_days_datetime<Tz: TimeZone>(dt: DateTime<Tz>, days: i32) -> Option<DateTime<Tz>> {
     match days.cmp(&0) {
         Ordering::Equal => Some(dt),
         Ordering::Greater => dt.checked_add_days(Days::new(days as u64)),
@@ -83,10 +80,7 @@ pub(crate) fn sub_months_datetime<Tz: TimeZone>(
 /// Substract the given number of days to the given datetime.
 ///
 /// Returns `None` when it will result in overflow.
-pub(crate) fn sub_days_datetime<Tz: TimeZone>(
-    dt: DateTime<Tz>,
-    days: i32,
-) -> Option<DateTime<Tz>> {
+pub(crate) fn sub_days_datetime<Tz: TimeZone>(dt: DateTime<Tz>, days: i32) -> Option<DateTime<Tz>> {
     match days.cmp(&0) {
         Ordering::Equal => Some(dt),
         Ordering::Greater => dt.checked_sub_days(Days::new(days as u64)),
diff --git a/arrow-array/src/iterator.rs b/arrow-array/src/iterator.rs
index a198332ca5b5..3f9cc0d525c1 100644
--- a/arrow-array/src/iterator.rs
+++ b/arrow-array/src/iterator.rs
@@ -18,8 +18,8 @@
 //! Idiomatic iterators for [`Array`](crate::Array)
 
 use crate::array::{
-    ArrayAccessor, BooleanArray, FixedSizeBinaryArray, GenericBinaryArray,
-    GenericListArray, GenericStringArray, PrimitiveArray,
+    ArrayAccessor, BooleanArray, FixedSizeBinaryArray, GenericBinaryArray, GenericListArray,
+    GenericStringArray, PrimitiveArray,
 };
 use crate::{FixedSizeListArray, MapArray};
 use arrow_buffer::NullBuffer;
@@ -187,8 +187,7 @@ mod tests {
 
     #[test]
     fn test_string_array_iter_round_trip() {
-        let array =
-            StringArray::from(vec![Some("a"), None, Some("aaa"), None, Some("aaaaa")]);
+        let array = StringArray::from(vec![Some("a"), None, Some("aaa"), None, Some("aaaaa")]);
         let array = Arc::new(array) as ArrayRef;
 
         let array = array.as_any().downcast_ref::<StringArray>().unwrap();
@@ -211,8 +210,7 @@ mod tests {
 
         // check if DoubleEndedIterator is implemented
         let result: StringArray = array.iter().rev().collect();
-        let rev_array =
-            StringArray::from(vec![Some("aaaaa"), None, Some("aaa"), None, Some("a")]);
+        let rev_array = StringArray::from(vec![Some("aaaaa"), None, Some("aaa"), None, Some("a")]);
         assert_eq!(result, rev_array);
         // check if ExactSizeIterator is implemented
         let _ = array.iter().rposition(|opt_b| opt_b == Some("a"));
diff --git a/arrow-array/src/lib.rs b/arrow-array/src/lib.rs
index afb7ec5e6e44..ef98c5efefb0 100644
--- a/arrow-array/src/lib.rs
+++ b/arrow-array/src/lib.rs
@@ -182,8 +182,7 @@ pub use array::*;
 
 mod record_batch;
 pub use record_batch::{
-    RecordBatch, RecordBatchIterator, RecordBatchOptions, RecordBatchReader,
-    RecordBatchWriter,
+    RecordBatch, RecordBatchIterator, RecordBatchOptions, RecordBatchReader, RecordBatchWriter,
 };
 
 mod arithmetic;
diff --git a/arrow-array/src/numeric.rs b/arrow-array/src/numeric.rs
index afc0e2c33010..b5e474ba696a 100644
--- a/arrow-array/src/numeric.rs
+++ b/arrow-array/src/numeric.rs
@@ -179,8 +179,8 @@ macro_rules! make_numeric_type {
                     16 => {
                         // same general logic as for 8 lanes, extended to 16 bits
                         let vecidx = i32x16::new(
-                            1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
-                            8192, 16384, 32768,
+                            1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384,
+                            32768,
                         );
 
                         let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
@@ -194,21 +194,19 @@ macro_rules! make_numeric_type {
                         let tmp = &mut [0_i16; 32];
 
                         let vecidx = i32x16::new(
-                            1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
-                            8192, 16384, 32768,
+                            1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384,
+                            32768,
                         );
 
                         let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
                         let vecmask = (vecidx & vecmask).eq(vecidx);
 
-                        i16x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[0..16]);
+                        i16x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[0..16]);
 
                         let vecmask = i32x16::splat(((mask >> 16) & 0xFFFF) as i32);
                         let vecmask = (vecidx & vecmask).eq(vecidx);
 
-                        i16x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[16..32]);
+                        i16x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[16..32]);
 
                         unsafe { std::mem::transmute(i16x32::from_slice_unaligned(tmp)) }
                     }
@@ -218,33 +216,29 @@ macro_rules! make_numeric_type {
                         let tmp = &mut [0_i8; 64];
 
                         let vecidx = i32x16::new(
-                            1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
-                            8192, 16384, 32768,
+                            1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384,
+                            32768,
                         );
 
                         let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
                         let vecmask = (vecidx & vecmask).eq(vecidx);
 
-                        i8x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[0..16]);
+                        i8x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[0..16]);
 
                         let vecmask = i32x16::splat(((mask >> 16) & 0xFFFF) as i32);
                         let vecmask = (vecidx & vecmask).eq(vecidx);
 
-                        i8x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[16..32]);
+                        i8x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[16..32]);
 
                         let vecmask = i32x16::splat(((mask >> 32) & 0xFFFF) as i32);
                         let vecmask = (vecidx & vecmask).eq(vecidx);
 
-                        i8x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[32..48]);
+                        i8x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[32..48]);
 
                         let vecmask = i32x16::splat(((mask >> 48) & 0xFFFF) as i32);
                         let vecmask = (vecidx & vecmask).eq(vecidx);
 
-                        i8x16::from_cast(vecmask)
-                            .write_to_slice_unaligned(&mut tmp[48..64]);
+                        i8x16::from_cast(vecmask).write_to_slice_unaligned(&mut tmp[48..64]);
 
                         unsafe { std::mem::transmute(i8x64::from_slice_unaligned(tmp)) }
                     }
@@ -269,11 +263,7 @@ macro_rules! make_numeric_type {
 
             /// Selects elements of `a` and `b` using `mask`
             #[inline]
-            fn mask_select(
-                mask: Self::SimdMask,
-                a: Self::Simd,
-                b: Self::Simd,
-            ) -> Self::Simd {
+            fn mask_select(mask: Self::SimdMask, a: Self::Simd, b: Self::Simd) -> Self::Simd {
                 mask.select(a, b)
             }
 
@@ -327,10 +317,7 @@ macro_rules! make_numeric_type {
             }
 
             #[inline]
-            fn unary_op<F: Fn(Self::Simd) -> Self::Simd>(
-                a: Self::Simd,
-                op: F,
-            ) -> Self::Simd {
+            fn unary_op<F: Fn(Self::Simd) -> Self::Simd>(a: Self::Simd, op: F) -> Self::Simd {
                 op(a)
             }
         }
@@ -581,8 +568,7 @@ mod tests {
         let mask = 0b1101;
         let actual = IntervalMonthDayNanoType::mask_from_u64(mask);
         let expected = expected_mask!(i128, mask);
-        let expected =
-            m128x4::from_cast(i128x4::from_slice_unaligned(expected.as_slice()));
+        let expected = m128x4::from_cast(i128x4::from_slice_unaligned(expected.as_slice()));
 
         assert_eq!(expected, actual);
     }
@@ -612,8 +598,7 @@ mod tests {
         let mask = 0b10101010_10101010;
         let actual = Float32Type::mask_from_u64(mask);
         let expected = expected_mask!(i32, mask);
-        let expected =
-            m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
+        let expected = m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
 
         assert_eq!(expected, actual);
     }
@@ -623,8 +608,7 @@ mod tests {
         let mask = 0b01010101_01010101;
         let actual = Int32Type::mask_from_u64(mask);
         let expected = expected_mask!(i32, mask);
-        let expected =
-            m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
+        let expected = m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
 
         assert_eq!(expected, actual);
     }
@@ -634,17 +618,14 @@ mod tests {
         let mask = 0b01010101_01010101_10101010_10101010;
         let actual = UInt16Type::mask_from_u64(mask);
         let expected = expected_mask!(i16, mask);
-        dbg!(&expected);
-        let expected =
-            m16x32::from_cast(i16x32::from_slice_unaligned(expected.as_slice()));
+        let expected = m16x32::from_cast(i16x32::from_slice_unaligned(expected.as_slice()));
 
         assert_eq!(expected, actual);
     }
 
     #[test]
     fn test_mask_i8() {
-        let mask =
-            0b01010101_01010101_10101010_10101010_01010101_01010101_10101010_10101010;
+        let mask = 0b01010101_01010101_10101010_10101010_01010101_01010101_10101010_10101010;
         let actual = Int8Type::mask_from_u64(mask);
         let expected = expected_mask!(i8, mask);
         let expected = m8x64::from_cast(i8x64::from_slice_unaligned(expected.as_slice()));
diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs
index 27804447fba6..4e859fdfe7ea 100644
--- a/arrow-array/src/record_batch.rs
+++ b/arrow-array/src/record_batch.rs
@@ -107,10 +107,7 @@ impl RecordBatch {
     ///     vec![Arc::new(id_array)]
     /// ).unwrap();
     /// ```
-    pub fn try_new(
-        schema: SchemaRef,
-        columns: Vec<ArrayRef>,
-    ) -> Result<Self, ArrowError> {
+    pub fn try_new(schema: SchemaRef, columns: Vec<ArrayRef>) -> Result<Self, ArrowError> {
         let options = RecordBatchOptions::new();
         Self::try_new_impl(schema, columns, &options)
     }
@@ -179,9 +176,7 @@ impl RecordBatch {
         // check that all columns have the same row count
         if columns.iter().any(|c| c.len() != row_count) {
             let err = match options.row_count {
-                Some(_) => {
-                    "all columns in a record batch must have the specified row count"
-                }
+                Some(_) => "all columns in a record batch must have the specified row count",
                 None => "all columns in a record batch must have the same length",
             };
             return Err(ArrowError::InvalidArgumentError(err.to_string()));
@@ -190,9 +185,7 @@ impl RecordBatch {
         // function for comparing column type and field type
         // return true if 2 types are not matched
         let type_not_match = if options.match_field_names {
-            |(_, (col_type, field_type)): &(usize, (&DataType, &DataType))| {
-                col_type != field_type
-            }
+            |(_, (col_type, field_type)): &(usize, (&DataType, &DataType))| col_type != field_type
         } else {
             |(_, (col_type, field_type)): &(usize, (&DataType, &DataType))| {
                 !col_type.equals_datatype(field_type)
@@ -334,6 +327,40 @@ impl RecordBatch {
         &self.columns[..]
     }
 
+    /// Remove column by index and return it.
+    ///
+    /// Return the `ArrayRef` if the column is removed.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `index`` out of bounds.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use std::sync::Arc;
+    /// use arrow_array::{BooleanArray, Int32Array, RecordBatch};
+    /// use arrow_schema::{DataType, Field, Schema};
+    /// let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+    /// let bool_array = BooleanArray::from(vec![true, false, false, true, true]);
+    /// let schema = Schema::new(vec![
+    ///     Field::new("id", DataType::Int32, false),
+    ///     Field::new("bool", DataType::Boolean, false),
+    /// ]);
+    ///
+    /// let mut batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array), Arc::new(bool_array)]).unwrap();
+    ///
+    /// let removed_column = batch.remove_column(0);
+    /// assert_eq!(removed_column.as_any().downcast_ref::<Int32Array>().unwrap(), &Int32Array::from(vec![1, 2, 3, 4, 5]));
+    /// assert_eq!(batch.num_columns(), 1);
+    /// ```
+    pub fn remove_column(&mut self, index: usize) -> ArrayRef {
+        let mut builder = SchemaBuilder::from(self.schema.fields());
+        builder.remove(index);
+        self.schema = Arc::new(builder.finish());
+        self.columns.remove(index)
+    }
+
     /// Return a new RecordBatch where each column is sliced
     /// according to `offset` and `length`
     ///
@@ -484,7 +511,11 @@ impl From<StructArray> for RecordBatch {
     fn from(value: StructArray) -> Self {
         let row_count = value.len();
         let (fields, columns, nulls) = value.into_parts();
-        assert_eq!(nulls.map(|n| n.null_count()).unwrap_or_default(), 0, "Cannot convert nullable StructArray to RecordBatch, see StructArray documentation");
+        assert_eq!(
+            nulls.map(|n| n.null_count()).unwrap_or_default(),
+            0,
+            "Cannot convert nullable StructArray to RecordBatch, see StructArray documentation"
+        );
 
         RecordBatch {
             schema: Arc::new(Schema::new(fields)),
@@ -588,9 +619,7 @@ where
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::{
-        BooleanArray, Int32Array, Int64Array, Int8Array, ListArray, StringArray,
-    };
+    use crate::{BooleanArray, Int32Array, Int64Array, Int8Array, ListArray, StringArray};
     use arrow_buffer::{Buffer, ToByteSlice};
     use arrow_data::{ArrayData, ArrayDataBuilder};
     use arrow_schema::Fields;
@@ -606,8 +635,7 @@ mod tests {
         let b = StringArray::from(vec!["a", "b", "c", "d", "e"]);
 
         let record_batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
-                .unwrap();
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap();
         check_batch(record_batch, 5)
     }
 
@@ -622,8 +650,7 @@ mod tests {
         let b = StringArray::from(vec!["a", "b", "c", "d", "e"]);
 
         let record_batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
-                .unwrap();
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap();
         assert_eq!(record_batch.get_array_memory_size(), 364);
     }
 
@@ -649,8 +676,7 @@ mod tests {
         let b = StringArray::from(vec!["a", "b", "c", "d", "e", "f", "h", "i"]);
 
         let record_batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
-                .unwrap();
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap();
 
         let offset = 2;
         let length = 5;
@@ -699,8 +725,8 @@ mod tests {
         ]));
         let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"]));
 
-        let record_batch = RecordBatch::try_from_iter(vec![("a", a), ("b", b)])
-            .expect("valid conversion");
+        let record_batch =
+            RecordBatch::try_from_iter(vec![("a", a), ("b", b)]).expect("valid conversion");
 
         let expected_schema = Schema::new(vec![
             Field::new("a", DataType::Int32, true),
@@ -716,11 +742,9 @@ mod tests {
         let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"]));
 
         // Note there are no nulls in a or b, but we specify that b is nullable
-        let record_batch = RecordBatch::try_from_iter_with_nullable(vec![
-            ("a", a, false),
-            ("b", b, true),
-        ])
-        .expect("valid conversion");
+        let record_batch =
+            RecordBatch::try_from_iter_with_nullable(vec![("a", a, false), ("b", b, true)])
+                .expect("valid conversion");
 
         let expected_schema = Schema::new(vec![
             Field::new("a", DataType::Int32, false),
@@ -792,8 +816,7 @@ mod tests {
         let a = Int32Array::from(vec![1, 2, 3, 4, 5]);
         let b = Int32Array::from(vec![1, 2, 3, 4, 5]);
 
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]);
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]);
         assert!(batch.is_err());
     }
 
@@ -863,11 +886,8 @@ mod tests {
             Field::new("id", DataType::Int32, false),
             Field::new("val", DataType::Int32, false),
         ]);
-        let record_batch = RecordBatch::try_new(
-            Arc::new(schema1),
-            vec![id_arr.clone(), val_arr.clone()],
-        )
-        .unwrap();
+        let record_batch =
+            RecordBatch::try_new(Arc::new(schema1), vec![id_arr.clone(), val_arr.clone()]).unwrap();
 
         assert_eq!(record_batch["id"].as_ref(), id_arr.as_ref());
         assert_eq!(record_batch["val"].as_ref(), val_arr.as_ref());
@@ -1005,15 +1025,12 @@ mod tests {
         let b: ArrayRef = Arc::new(StringArray::from(vec!["a", "b", "c"]));
         let c: ArrayRef = Arc::new(StringArray::from(vec!["d", "e", "f"]));
 
-        let record_batch = RecordBatch::try_from_iter(vec![
-            ("a", a.clone()),
-            ("b", b.clone()),
-            ("c", c.clone()),
-        ])
-        .expect("valid conversion");
+        let record_batch =
+            RecordBatch::try_from_iter(vec![("a", a.clone()), ("b", b.clone()), ("c", c.clone())])
+                .expect("valid conversion");
 
-        let expected = RecordBatch::try_from_iter(vec![("a", a), ("c", c)])
-            .expect("valid conversion");
+        let expected =
+            RecordBatch::try_from_iter(vec![("a", a), ("c", c)]).expect("valid conversion");
 
         assert_eq!(expected, record_batch.project(&[0, 2]).unwrap());
     }
@@ -1049,8 +1066,7 @@ mod tests {
 
         let options = RecordBatchOptions::new().with_row_count(Some(10));
 
-        let ok =
-            RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap();
+        let ok = RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap();
         assert_eq!(ok.num_rows(), 10);
 
         let a = ok.slice(2, 5);
diff --git a/arrow-array/src/run_iterator.rs b/arrow-array/src/run_iterator.rs
index 489aabf4756a..7a98fccb73b5 100644
--- a/arrow-array/src/run_iterator.rs
+++ b/arrow-array/src/run_iterator.rs
@@ -86,8 +86,7 @@ where
         // If current logical index is greater than current run end index then increment
         // the physical index.
         let run_ends = self.array.run_ends().values();
-        if self.current_front_logical >= run_ends[self.current_front_physical].as_usize()
-        {
+        if self.current_front_logical >= run_ends[self.current_front_physical].as_usize() {
             // As the run_ends is expected to be strictly increasing, there
             // should be at least one logical entry in one physical entry. Because of this
             // reason the next value can be accessed by incrementing physical index once.
@@ -136,8 +135,7 @@ where
 
         let run_ends = self.array.run_ends().values();
         if self.current_back_physical > 0
-            && self.current_back_logical
-                < run_ends[self.current_back_physical - 1].as_usize()
+            && self.current_back_logical < run_ends[self.current_back_physical - 1].as_usize()
         {
             // As the run_ends is expected to be strictly increasing, there
             // should be at least one logical entry in one physical entry. Because of this
@@ -211,8 +209,7 @@ mod tests {
                 seed.shuffle(&mut rng);
             }
             // repeat the items between 1 and 8 times. Cap the length for smaller sized arrays
-            let num =
-                max_run_length.min(rand::thread_rng().gen_range(1..=max_run_length));
+            let num = max_run_length.min(rand::thread_rng().gen_range(1..=max_run_length));
             for _ in 0..num {
                 result.push(seed[ix]);
             }
@@ -285,8 +282,7 @@ mod tests {
         for logical_len in logical_lengths {
             let input_array = build_input_array(logical_len);
 
-            let mut run_array_builder =
-                PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
+            let mut run_array_builder = PrimitiveRunBuilder::<Int32Type, Int32Type>::new();
             run_array_builder.extend(input_array.iter().copied());
             let run_array = run_array_builder.finish();
             let typed_array = run_array.downcast::<Int32Array>().unwrap();
@@ -327,8 +323,7 @@ mod tests {
             })
             .collect();
 
-        let result_asref: Vec<Option<&str>> =
-            result.iter().map(|f| f.as_deref()).collect();
+        let result_asref: Vec<Option<&str>> = result.iter().map(|f| f.as_deref()).collect();
 
         let expected_vec = vec![
             Some("abb"),
@@ -364,8 +359,7 @@ mod tests {
 
             // Iterate on sliced typed run array
             let actual: Vec<Option<i32>> = sliced_typed_run_array.into_iter().collect();
-            let expected: Vec<Option<i32>> =
-                input_array.iter().take(slice_len).copied().collect();
+            let expected: Vec<Option<i32>> = input_array.iter().take(slice_len).copied().collect();
             assert_eq!(expected, actual);
 
             // test for offset = total_len - slice_len, length = slice_len
diff --git a/arrow-array/src/scalar.rs b/arrow-array/src/scalar.rs
index 7dfdbddd964a..f2a696a8f329 100644
--- a/arrow-array/src/scalar.rs
+++ b/arrow-array/src/scalar.rs
@@ -98,9 +98,32 @@ impl Datum for &dyn Array {
     }
 }
 
-/// A wrapper around a single value [`Array`] indicating kernels should treat it as a scalar value
+/// A wrapper around a single value [`Array`] that implements
+/// [`Datum`] and indicates [compute] kernels should treat this array
+/// as a scalar value (a single value).
 ///
-/// See [`Datum`] for more information
+/// Using a [`Scalar`] is often much more efficient than creating an
+/// [`Array`] with the same (repeated) value.
+///
+/// See [`Datum`] for more information.
+///
+/// # Example
+///
+/// ```rust
+/// # use arrow_array::{Scalar, Int32Array, ArrayRef};
+/// # fn get_array() -> ArrayRef { std::sync::Arc::new(Int32Array::from(vec![42])) }
+/// // Create a (typed) scalar for Int32Array for the value 42
+/// let scalar = Scalar::new(Int32Array::from(vec![42]));
+///
+/// // Create a scalar using PrimtiveArray::scalar
+/// let scalar = Int32Array::new_scalar(42);
+///
+/// // create a scalar from an ArrayRef (for dynamic typed Arrays)
+/// let array: ArrayRef = get_array();
+/// let scalar = Scalar::new(array);
+/// ```
+///
+/// [compute]: https://docs.rs/arrow/latest/arrow/compute/index.html
 #[derive(Debug, Copy, Clone)]
 pub struct Scalar<T: Array>(T);
 
diff --git a/arrow-array/src/temporal_conversions.rs b/arrow-array/src/temporal_conversions.rs
index f1f3f36d3c61..e0edcc9bc182 100644
--- a/arrow-array/src/temporal_conversions.rs
+++ b/arrow-array/src/temporal_conversions.rs
@@ -20,9 +20,7 @@
 use crate::timezone::Tz;
 use crate::ArrowPrimitiveType;
 use arrow_schema::{DataType, TimeUnit};
-use chrono::{
-    DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Timelike, Utc,
-};
+use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Timelike, Utc};
 
 /// Number of seconds in a day
 pub const SECONDS_IN_DAY: i64 = 86_400;
@@ -221,10 +219,7 @@ pub fn as_datetime<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveDateTime> {
 }
 
 /// Converts an [`ArrowPrimitiveType`] to [`DateTime<Tz>`]
-pub fn as_datetime_with_timezone<T: ArrowPrimitiveType>(
-    v: i64,
-    tz: Tz,
-) -> Option<DateTime<Tz>> {
+pub fn as_datetime_with_timezone<T: ArrowPrimitiveType>(v: i64, tz: Tz) -> Option<DateTime<Tz>> {
     let naive = as_datetime::<T>(v)?;
     Some(Utc.from_utc_datetime(&naive).with_timezone(&tz))
 }
@@ -274,8 +269,8 @@ pub fn as_duration<T: ArrowPrimitiveType>(v: i64) -> Option<Duration> {
 #[cfg(test)]
 mod tests {
     use crate::temporal_conversions::{
-        date64_to_datetime, split_second, timestamp_ms_to_datetime,
-        timestamp_ns_to_datetime, timestamp_us_to_datetime, NANOSECONDS,
+        date64_to_datetime, split_second, timestamp_ms_to_datetime, timestamp_ns_to_datetime,
+        timestamp_us_to_datetime, NANOSECONDS,
     };
     use chrono::NaiveDateTime;
 
diff --git a/arrow-array/src/timezone.rs b/arrow-array/src/timezone.rs
index f56189c46512..dc91886f34c5 100644
--- a/arrow-array/src/timezone.rs
+++ b/arrow-array/src/timezone.rs
@@ -38,8 +38,8 @@ fn parse_fixed_offset(tz: &str) -> Option<FixedOffset> {
     if values.iter().any(|x| *x > 9) {
         return None;
     }
-    let secs = (values[0] * 10 + values[1]) as i32 * 60 * 60
-        + (values[2] * 10 + values[3]) as i32 * 60;
+    let secs =
+        (values[0] * 10 + values[1]) as i32 * 60 * 60 + (values[2] * 10 + values[3]) as i32 * 60;
 
     match bytes[0] {
         b'+' => FixedOffset::east_opt(secs),
@@ -122,10 +122,7 @@ mod private {
             })
         }
 
-        fn offset_from_local_datetime(
-            &self,
-            local: &NaiveDateTime,
-        ) -> LocalResult<Self::Offset> {
+        fn offset_from_local_datetime(&self, local: &NaiveDateTime) -> LocalResult<Self::Offset> {
             tz!(self, tz, {
                 tz.offset_from_local_datetime(local).map(|x| TzOffset {
                     tz: *self,
@@ -285,10 +282,7 @@ mod private {
             self.0.offset_from_local_date(local).map(TzOffset)
         }
 
-        fn offset_from_local_datetime(
-            &self,
-            local: &NaiveDateTime,
-        ) -> LocalResult<Self::Offset> {
+        fn offset_from_local_datetime(&self, local: &NaiveDateTime) -> LocalResult<Self::Offset> {
             self.0.offset_from_local_datetime(local).map(TzOffset)
         }
 
diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
index d79b32a991ed..16d0e822d052 100644
--- a/arrow-array/src/types.rs
+++ b/arrow-array/src/types.rs
@@ -18,8 +18,7 @@
 //! Zero-sized types used to parameterize generic array implementations
 
 use crate::delta::{
-    add_days_datetime, add_months_datetime, shift_months, sub_days_datetime,
-    sub_months_datetime,
+    add_days_datetime, add_months_datetime, shift_months, sub_days_datetime, sub_months_datetime,
 };
 use crate::temporal_conversions::as_datetime_with_timezone;
 use crate::timezone::Tz;
@@ -27,9 +26,8 @@ use crate::{ArrowNativeTypeOp, OffsetSizeTrait};
 use arrow_buffer::{i256, Buffer, OffsetBuffer};
 use arrow_data::decimal::{validate_decimal256_precision, validate_decimal_precision};
 use arrow_schema::{
-    ArrowError, DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION,
-    DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
-    DECIMAL_DEFAULT_SCALE,
+    ArrowError, DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
+    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL_DEFAULT_SCALE,
 };
 use chrono::{Duration, NaiveDate, NaiveDateTime};
 use half::f16;
@@ -875,9 +873,7 @@ impl IntervalDayTimeType {
     ///
     /// * `i` - The IntervalDayTimeType to convert
     #[inline]
-    pub fn to_parts(
-        i: <IntervalDayTimeType as ArrowPrimitiveType>::Native,
-    ) -> (i32, i32) {
+    pub fn to_parts(i: <IntervalDayTimeType as ArrowPrimitiveType>::Native) -> (i32, i32) {
         let days = (i >> 32) as i32;
         let ms = i as i32;
         (days, ms)
@@ -1221,10 +1217,7 @@ pub trait DecimalType:
     fn format_decimal(value: Self::Native, precision: u8, scale: i8) -> String;
 
     /// Validates that `value` contains no more than `precision` decimal digits
-    fn validate_decimal_precision(
-        value: Self::Native,
-        precision: u8,
-    ) -> Result<(), ArrowError>;
+    fn validate_decimal_precision(value: Self::Native, precision: u8) -> Result<(), ArrowError>;
 }
 
 /// Validate that `precision` and `scale` are valid for `T`
@@ -1368,12 +1361,14 @@ pub(crate) mod bytes {
     }
 
     impl ByteArrayNativeType for [u8] {
+        #[inline]
         unsafe fn from_bytes_unchecked(b: &[u8]) -> &Self {
             b
         }
     }
 
     impl ByteArrayNativeType for str {
+        #[inline]
         unsafe fn from_bytes_unchecked(b: &[u8]) -> &Self {
             std::str::from_utf8_unchecked(b)
         }
@@ -1398,10 +1393,7 @@ pub trait ByteArrayType: 'static + Send + Sync + bytes::ByteArrayTypeSealed {
     const DATA_TYPE: DataType;
 
     /// Verifies that every consecutive pair of `offsets` denotes a valid slice of `values`
-    fn validate(
-        offsets: &OffsetBuffer<Self::Offset>,
-        values: &Buffer,
-    ) -> Result<(), ArrowError>;
+    fn validate(offsets: &OffsetBuffer<Self::Offset>, values: &Buffer) -> Result<(), ArrowError>;
 }
 
 /// [`ByteArrayType`] for string arrays
@@ -1420,10 +1412,7 @@ impl<O: OffsetSizeTrait> ByteArrayType for GenericStringType<O> {
         DataType::Utf8
     };
 
-    fn validate(
-        offsets: &OffsetBuffer<Self::Offset>,
-        values: &Buffer,
-    ) -> Result<(), ArrowError> {
+    fn validate(offsets: &OffsetBuffer<Self::Offset>, values: &Buffer) -> Result<(), ArrowError> {
         // Verify that the slice as a whole is valid UTF-8
         let validated = std::str::from_utf8(values).map_err(|e| {
             ArrowError::InvalidArgumentError(format!("Encountered non UTF-8 data: {e}"))
@@ -1469,10 +1458,7 @@ impl<O: OffsetSizeTrait> ByteArrayType for GenericBinaryType<O> {
         DataType::Binary
     };
 
-    fn validate(
-        offsets: &OffsetBuffer<Self::Offset>,
-        values: &Buffer,
-    ) -> Result<(), ArrowError> {
+    fn validate(offsets: &OffsetBuffer<Self::Offset>, values: &Buffer) -> Result<(), ArrowError> {
         // offsets are guaranteed to be monotonically increasing and non-empty
         let max_offset = offsets.last().unwrap().as_usize();
         if values.len() < max_offset {
diff --git a/arrow-avro/Cargo.toml b/arrow-avro/Cargo.toml
new file mode 100644
index 000000000000..9575874c41d2
--- /dev/null
+++ b/arrow-avro/Cargo.toml
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[package]
+name = "arrow-avro"
+version = { workspace = true }
+description = "Support for parsing Avro format into the Arrow format"
+homepage = { workspace = true }
+repository = { workspace = true }
+authors = { workspace = true }
+license = { workspace = true }
+keywords = { workspace = true }
+include = { workspace = true }
+edition = { workspace = true }
+rust-version = { workspace = true }
+
+[lib]
+name = "arrow_avro"
+path = "src/lib.rs"
+bench = false
+
+[dependencies]
+arrow-array = { workspace = true  }
+arrow-buffer = { workspace = true  }
+arrow-cast = { workspace = true  }
+arrow-data = { workspace = true  }
+arrow-schema = { workspace = true  }
+serde_json = { version = "1.0", default-features = false, features = ["std"] }
+serde = { version = "1.0.188", features = ["derive"] }
+
+[dev-dependencies]
+
diff --git a/arrow/src/datatypes/ffi.rs b/arrow-avro/src/compression.rs
similarity index 69%
rename from arrow/src/datatypes/ffi.rs
rename to arrow-avro/src/compression.rs
index b248758bc120..a1a44fc22b68 100644
--- a/arrow/src/datatypes/ffi.rs
+++ b/arrow-avro/src/compression.rs
@@ -14,3 +14,19 @@
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
+
+use serde::{Deserialize, Serialize};
+
+/// The metadata key used for storing the JSON encoded [`CompressionCodec`]
+pub const CODEC_METADATA_KEY: &str = "avro.codec";
+
+#[derive(Debug, Copy, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum CompressionCodec {
+    Null,
+    Deflate,
+    BZip2,
+    Snappy,
+    XZ,
+    ZStandard,
+}
diff --git a/arrow-avro/src/lib.rs b/arrow-avro/src/lib.rs
new file mode 100644
index 000000000000..c76ecb399a45
--- /dev/null
+++ b/arrow-avro/src/lib.rs
@@ -0,0 +1,38 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Convert data to / from the [Apache Arrow] memory format and [Apache Avro]
+//!
+//! [Apache Arrow]: https://arrow.apache.org
+//! [Apache Avro]: https://avro.apache.org/
+
+#![allow(unused)] // Temporary
+
+pub mod reader;
+mod schema;
+
+mod compression;
+
+#[cfg(test)]
+mod test_util {
+    pub fn arrow_test_data(path: &str) -> String {
+        match std::env::var("ARROW_TEST_DATA") {
+            Ok(dir) => format!("{dir}/{path}"),
+            Err(_) => format!("../testing/data/{path}"),
+        }
+    }
+}
diff --git a/arrow-avro/src/reader/block.rs b/arrow-avro/src/reader/block.rs
new file mode 100644
index 000000000000..479f0ef90909
--- /dev/null
+++ b/arrow-avro/src/reader/block.rs
@@ -0,0 +1,141 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Decoder for [`Block`]
+
+use crate::reader::vlq::VLQDecoder;
+use arrow_schema::ArrowError;
+
+/// A file data block
+///
+/// <https://avro.apache.org/docs/1.11.1/specification/#object-container-files>
+#[derive(Debug, Default)]
+pub struct Block {
+    /// The number of objects in this block
+    pub count: usize,
+    /// The serialized objects within this block
+    pub data: Vec<u8>,
+    /// The sync marker
+    pub sync: [u8; 16],
+}
+
+/// A decoder for [`Block`]
+#[derive(Debug)]
+pub struct BlockDecoder {
+    state: BlockDecoderState,
+    in_progress: Block,
+    vlq_decoder: VLQDecoder,
+    bytes_remaining: usize,
+}
+
+#[derive(Debug)]
+enum BlockDecoderState {
+    Count,
+    Size,
+    Data,
+    Sync,
+    Finished,
+}
+
+impl Default for BlockDecoder {
+    fn default() -> Self {
+        Self {
+            state: BlockDecoderState::Count,
+            in_progress: Default::default(),
+            vlq_decoder: Default::default(),
+            bytes_remaining: 0,
+        }
+    }
+}
+
+impl BlockDecoder {
+    /// Parse [`Block`] from `buf`, returning the number of bytes read
+    ///
+    /// This method can be called multiple times with consecutive chunks of data, allowing
+    /// integration with chunked IO systems like [`BufRead::fill_buf`]
+    ///
+    /// All errors should be considered fatal, and decoding aborted
+    ///
+    /// Once an entire [`Block`] has been decoded this method will not read any further
+    /// input bytes, until [`Self::flush`] is called. Afterwards [`Self::decode`]
+    /// can then be used again to read the next block, if any
+    ///
+    /// [`BufRead::fill_buf`]: std::io::BufRead::fill_buf
+    pub fn decode(&mut self, mut buf: &[u8]) -> Result<usize, ArrowError> {
+        let max_read = buf.len();
+        while !buf.is_empty() {
+            match self.state {
+                BlockDecoderState::Count => {
+                    if let Some(c) = self.vlq_decoder.long(&mut buf) {
+                        self.in_progress.count = c.try_into().map_err(|_| {
+                            ArrowError::ParseError(format!(
+                                "Block count cannot be negative, got {c}"
+                            ))
+                        })?;
+
+                        self.state = BlockDecoderState::Size;
+                    }
+                }
+                BlockDecoderState::Size => {
+                    if let Some(c) = self.vlq_decoder.long(&mut buf) {
+                        self.bytes_remaining = c.try_into().map_err(|_| {
+                            ArrowError::ParseError(format!(
+                                "Block size cannot be negative, got {c}"
+                            ))
+                        })?;
+
+                        self.in_progress.data.reserve(self.bytes_remaining);
+                        self.state = BlockDecoderState::Data;
+                    }
+                }
+                BlockDecoderState::Data => {
+                    let to_read = self.bytes_remaining.min(buf.len());
+                    self.in_progress.data.extend_from_slice(&buf[..to_read]);
+                    buf = &buf[to_read..];
+                    self.bytes_remaining -= to_read;
+                    if self.bytes_remaining == 0 {
+                        self.bytes_remaining = 16;
+                        self.state = BlockDecoderState::Sync;
+                    }
+                }
+                BlockDecoderState::Sync => {
+                    let to_decode = buf.len().min(self.bytes_remaining);
+                    let write = &mut self.in_progress.sync[16 - to_decode..];
+                    write[..to_decode].copy_from_slice(&buf[..to_decode]);
+                    self.bytes_remaining -= to_decode;
+                    buf = &buf[to_decode..];
+                    if self.bytes_remaining == 0 {
+                        self.state = BlockDecoderState::Finished;
+                    }
+                }
+                BlockDecoderState::Finished => return Ok(max_read - buf.len()),
+            }
+        }
+        Ok(max_read)
+    }
+
+    /// Flush this decoder returning the parsed [`Block`] if any
+    pub fn flush(&mut self) -> Option<Block> {
+        match self.state {
+            BlockDecoderState::Finished => {
+                self.state = BlockDecoderState::Count;
+                Some(std::mem::take(&mut self.in_progress))
+            }
+            _ => None,
+        }
+    }
+}
diff --git a/arrow-avro/src/reader/header.rs b/arrow-avro/src/reader/header.rs
new file mode 100644
index 000000000000..00e85b39be73
--- /dev/null
+++ b/arrow-avro/src/reader/header.rs
@@ -0,0 +1,288 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Decoder for [`Header`]
+
+use crate::reader::vlq::VLQDecoder;
+use crate::schema::Schema;
+use arrow_schema::ArrowError;
+
+#[derive(Debug)]
+enum HeaderDecoderState {
+    /// Decoding the [`MAGIC`] prefix
+    Magic,
+    /// Decoding a block count
+    BlockCount,
+    /// Decoding a block byte length
+    BlockLen,
+    /// Decoding a key length
+    KeyLen,
+    /// Decoding a key string
+    Key,
+    /// Decoding a value length
+    ValueLen,
+    /// Decoding a value payload
+    Value,
+    /// Decoding sync marker
+    Sync,
+    /// Finished decoding
+    Finished,
+}
+
+/// A decoded header for an [Object Container File](https://avro.apache.org/docs/1.11.1/specification/#object-container-files)
+#[derive(Debug, Clone)]
+pub struct Header {
+    meta_offsets: Vec<usize>,
+    meta_buf: Vec<u8>,
+    sync: [u8; 16],
+}
+
+impl Header {
+    /// Returns an iterator over the meta keys in this header
+    pub fn metadata(&self) -> impl Iterator<Item = (&[u8], &[u8])> {
+        let mut last = 0;
+        self.meta_offsets.windows(2).map(move |w| {
+            let start = last;
+            last = w[1];
+            (&self.meta_buf[start..w[0]], &self.meta_buf[w[0]..w[1]])
+        })
+    }
+
+    /// Returns the value for a given metadata key if present
+    pub fn get(&self, key: impl AsRef<[u8]>) -> Option<&[u8]> {
+        self.metadata()
+            .find_map(|(k, v)| (k == key.as_ref()).then_some(v))
+    }
+
+    /// Returns the sync token for this file
+    pub fn sync(&self) -> [u8; 16] {
+        self.sync
+    }
+}
+
+/// A decoder for [`Header`]
+///
+/// The avro file format does not encode the length of the header, and so it
+/// is necessary to provide a push-based decoder that can be used with streams
+#[derive(Debug)]
+pub struct HeaderDecoder {
+    state: HeaderDecoderState,
+    vlq_decoder: VLQDecoder,
+
+    /// The end offsets of strings in `meta_buf`
+    meta_offsets: Vec<usize>,
+    /// The raw binary data of the metadata map
+    meta_buf: Vec<u8>,
+
+    /// The decoded sync marker
+    sync_marker: [u8; 16],
+
+    /// The number of remaining tuples in the current block
+    tuples_remaining: usize,
+    /// The number of bytes remaining in the current string/bytes payload
+    bytes_remaining: usize,
+}
+
+impl Default for HeaderDecoder {
+    fn default() -> Self {
+        Self {
+            state: HeaderDecoderState::Magic,
+            meta_offsets: vec![],
+            meta_buf: vec![],
+            sync_marker: [0; 16],
+            vlq_decoder: Default::default(),
+            tuples_remaining: 0,
+            bytes_remaining: MAGIC.len(),
+        }
+    }
+}
+
+const MAGIC: &[u8; 4] = b"Obj\x01";
+
+impl HeaderDecoder {
+    /// Parse [`Header`] from `buf`, returning the number of bytes read
+    ///
+    /// This method can be called multiple times with consecutive chunks of data, allowing
+    /// integration with chunked IO systems like [`BufRead::fill_buf`]
+    ///
+    /// All errors should be considered fatal, and decoding aborted
+    ///
+    /// Once the entire [`Header`] has been decoded this method will not read any further
+    /// input bytes, and the header can be obtained with [`Self::flush`]
+    ///
+    /// [`BufRead::fill_buf`]: std::io::BufRead::fill_buf
+    pub fn decode(&mut self, mut buf: &[u8]) -> Result<usize, ArrowError> {
+        let max_read = buf.len();
+        while !buf.is_empty() {
+            match self.state {
+                HeaderDecoderState::Magic => {
+                    let remaining = &MAGIC[MAGIC.len() - self.bytes_remaining..];
+                    let to_decode = buf.len().min(remaining.len());
+                    if !buf.starts_with(&remaining[..to_decode]) {
+                        return Err(ArrowError::ParseError("Incorrect avro magic".to_string()));
+                    }
+                    self.bytes_remaining -= to_decode;
+                    buf = &buf[to_decode..];
+                    if self.bytes_remaining == 0 {
+                        self.state = HeaderDecoderState::BlockCount;
+                    }
+                }
+                HeaderDecoderState::BlockCount => {
+                    if let Some(block_count) = self.vlq_decoder.long(&mut buf) {
+                        match block_count.try_into() {
+                            Ok(0) => {
+                                self.state = HeaderDecoderState::Sync;
+                                self.bytes_remaining = 16;
+                            }
+                            Ok(remaining) => {
+                                self.tuples_remaining = remaining;
+                                self.state = HeaderDecoderState::KeyLen;
+                            }
+                            Err(_) => {
+                                self.tuples_remaining = block_count.unsigned_abs() as _;
+                                self.state = HeaderDecoderState::BlockLen;
+                            }
+                        }
+                    }
+                }
+                HeaderDecoderState::BlockLen => {
+                    if self.vlq_decoder.long(&mut buf).is_some() {
+                        self.state = HeaderDecoderState::KeyLen
+                    }
+                }
+                HeaderDecoderState::Key => {
+                    let to_read = self.bytes_remaining.min(buf.len());
+                    self.meta_buf.extend_from_slice(&buf[..to_read]);
+                    self.bytes_remaining -= to_read;
+                    buf = &buf[to_read..];
+                    if self.bytes_remaining == 0 {
+                        self.meta_offsets.push(self.meta_buf.len());
+                        self.state = HeaderDecoderState::ValueLen;
+                    }
+                }
+                HeaderDecoderState::Value => {
+                    let to_read = self.bytes_remaining.min(buf.len());
+                    self.meta_buf.extend_from_slice(&buf[..to_read]);
+                    self.bytes_remaining -= to_read;
+                    buf = &buf[to_read..];
+                    if self.bytes_remaining == 0 {
+                        self.meta_offsets.push(self.meta_buf.len());
+
+                        self.tuples_remaining -= 1;
+                        match self.tuples_remaining {
+                            0 => self.state = HeaderDecoderState::BlockCount,
+                            _ => self.state = HeaderDecoderState::KeyLen,
+                        }
+                    }
+                }
+                HeaderDecoderState::KeyLen => {
+                    if let Some(len) = self.vlq_decoder.long(&mut buf) {
+                        self.bytes_remaining = len as _;
+                        self.state = HeaderDecoderState::Key;
+                    }
+                }
+                HeaderDecoderState::ValueLen => {
+                    if let Some(len) = self.vlq_decoder.long(&mut buf) {
+                        self.bytes_remaining = len as _;
+                        self.state = HeaderDecoderState::Value;
+                    }
+                }
+                HeaderDecoderState::Sync => {
+                    let to_decode = buf.len().min(self.bytes_remaining);
+                    let write = &mut self.sync_marker[16 - to_decode..];
+                    write[..to_decode].copy_from_slice(&buf[..to_decode]);
+                    self.bytes_remaining -= to_decode;
+                    buf = &buf[to_decode..];
+                    if self.bytes_remaining == 0 {
+                        self.state = HeaderDecoderState::Finished;
+                    }
+                }
+                HeaderDecoderState::Finished => return Ok(max_read - buf.len()),
+            }
+        }
+        Ok(max_read)
+    }
+
+    /// Flush this decoder returning the parsed [`Header`] if any
+    pub fn flush(&mut self) -> Option<Header> {
+        match self.state {
+            HeaderDecoderState::Finished => {
+                self.state = HeaderDecoderState::Magic;
+                Some(Header {
+                    meta_offsets: std::mem::take(&mut self.meta_offsets),
+                    meta_buf: std::mem::take(&mut self.meta_buf),
+                    sync: self.sync_marker,
+                })
+            }
+            _ => None,
+        }
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    use crate::reader::read_header;
+    use crate::schema::SCHEMA_METADATA_KEY;
+    use crate::test_util::arrow_test_data;
+    use std::fs::File;
+    use std::io::{BufRead, BufReader};
+
+    #[test]
+    fn test_header_decode() {
+        let mut decoder = HeaderDecoder::default();
+        for m in MAGIC {
+            decoder.decode(std::slice::from_ref(m)).unwrap();
+        }
+
+        let mut decoder = HeaderDecoder::default();
+        assert_eq!(decoder.decode(MAGIC).unwrap(), 4);
+
+        let mut decoder = HeaderDecoder::default();
+        decoder.decode(b"Ob").unwrap();
+        let err = decoder.decode(b"s").unwrap_err().to_string();
+        assert_eq!(err, "Parser error: Incorrect avro magic");
+    }
+
+    fn decode_file(file: &str) -> Header {
+        let file = File::open(file).unwrap();
+        read_header(BufReader::with_capacity(100, file)).unwrap()
+    }
+
+    #[test]
+    fn test_header() {
+        let header = decode_file(&arrow_test_data("avro/alltypes_plain.avro"));
+        let schema_json = header.get(SCHEMA_METADATA_KEY).unwrap();
+        let expected = br#"{"type":"record","name":"topLevelRecord","fields":[{"name":"id","type":["int","null"]},{"name":"bool_col","type":["boolean","null"]},{"name":"tinyint_col","type":["int","null"]},{"name":"smallint_col","type":["int","null"]},{"name":"int_col","type":["int","null"]},{"name":"bigint_col","type":["long","null"]},{"name":"float_col","type":["float","null"]},{"name":"double_col","type":["double","null"]},{"name":"date_string_col","type":["bytes","null"]},{"name":"string_col","type":["bytes","null"]},{"name":"timestamp_col","type":[{"type":"long","logicalType":"timestamp-micros"},"null"]}]}"#;
+        assert_eq!(schema_json, expected);
+        let _schema: Schema<'_> = serde_json::from_slice(schema_json).unwrap();
+        assert_eq!(
+            u128::from_le_bytes(header.sync()),
+            226966037233754408753420635932530907102
+        );
+
+        let header = decode_file(&arrow_test_data("avro/fixed_length_decimal.avro"));
+        let schema_json = header.get(SCHEMA_METADATA_KEY).unwrap();
+        let expected = br#"{"type":"record","name":"topLevelRecord","fields":[{"name":"value","type":[{"type":"fixed","name":"fixed","namespace":"topLevelRecord.value","size":11,"logicalType":"decimal","precision":25,"scale":2},"null"]}]}"#;
+        assert_eq!(schema_json, expected);
+        let _schema: Schema<'_> = serde_json::from_slice(schema_json).unwrap();
+        assert_eq!(
+            u128::from_le_bytes(header.sync()),
+            325166208089902833952788552656412487328
+        );
+    }
+}
diff --git a/arrow-avro/src/reader/mod.rs b/arrow-avro/src/reader/mod.rs
new file mode 100644
index 000000000000..7769bbbc4998
--- /dev/null
+++ b/arrow-avro/src/reader/mod.rs
@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Read Avro data to Arrow
+
+use crate::reader::block::{Block, BlockDecoder};
+use crate::reader::header::{Header, HeaderDecoder};
+use arrow_schema::ArrowError;
+use std::io::BufRead;
+
+mod header;
+
+mod block;
+
+mod vlq;
+
+/// Read a [`Header`] from the provided [`BufRead`]
+fn read_header<R: BufRead>(mut reader: R) -> Result<Header, ArrowError> {
+    let mut decoder = HeaderDecoder::default();
+    loop {
+        let buf = reader.fill_buf()?;
+        if buf.is_empty() {
+            break;
+        }
+        let read = buf.len();
+        let decoded = decoder.decode(buf)?;
+        reader.consume(decoded);
+        if decoded != read {
+            break;
+        }
+    }
+
+    decoder
+        .flush()
+        .ok_or_else(|| ArrowError::ParseError("Unexpected EOF".to_string()))
+}
+
+/// Return an iterator of [`Block`] from the provided [`BufRead`]
+fn read_blocks<R: BufRead>(mut reader: R) -> impl Iterator<Item = Result<Block, ArrowError>> {
+    let mut decoder = BlockDecoder::default();
+
+    let mut try_next = move || {
+        loop {
+            let buf = reader.fill_buf()?;
+            if buf.is_empty() {
+                break;
+            }
+            let read = buf.len();
+            let decoded = decoder.decode(buf)?;
+            reader.consume(decoded);
+            if decoded != read {
+                break;
+            }
+        }
+        Ok(decoder.flush())
+    };
+    std::iter::from_fn(move || try_next().transpose())
+}
+
+#[cfg(test)]
+mod test {
+    use crate::reader::{read_blocks, read_header};
+    use crate::test_util::arrow_test_data;
+    use std::fs::File;
+    use std::io::BufReader;
+
+    #[test]
+    fn test_mux() {
+        let file = File::open(arrow_test_data("avro/alltypes_plain.avro")).unwrap();
+        let mut reader = BufReader::new(file);
+        let header = read_header(&mut reader).unwrap();
+        for result in read_blocks(reader) {
+            let block = result.unwrap();
+            assert_eq!(block.sync, header.sync());
+        }
+    }
+}
diff --git a/arrow-avro/src/reader/vlq.rs b/arrow-avro/src/reader/vlq.rs
new file mode 100644
index 000000000000..80f1c60eec7d
--- /dev/null
+++ b/arrow-avro/src/reader/vlq.rs
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// Decoder for zig-zag encoded variable length (VLW) integers
+///
+/// See also:
+/// <https://avro.apache.org/docs/1.11.1/specification/#primitive-types-1>
+/// <https://protobuf.dev/programming-guides/encoding/#varints>
+#[derive(Debug, Default)]
+pub struct VLQDecoder {
+    /// Scratch space for decoding VLQ integers
+    in_progress: u64,
+    shift: u32,
+}
+
+impl VLQDecoder {
+    /// Decode a signed long from `buf`
+    pub fn long(&mut self, buf: &mut &[u8]) -> Option<i64> {
+        while let Some(byte) = buf.first().copied() {
+            *buf = &buf[1..];
+            self.in_progress |= ((byte & 0x7F) as u64) << self.shift;
+            self.shift += 7;
+            if byte & 0x80 == 0 {
+                let val = self.in_progress;
+                self.in_progress = 0;
+                self.shift = 0;
+                return Some((val >> 1) as i64 ^ -((val & 1) as i64));
+            }
+        }
+        None
+    }
+}
diff --git a/arrow-avro/src/schema.rs b/arrow-avro/src/schema.rs
new file mode 100644
index 000000000000..17b82cf861b7
--- /dev/null
+++ b/arrow-avro/src/schema.rs
@@ -0,0 +1,482 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+
+/// The metadata key used for storing the JSON encoded [`Schema`]
+pub const SCHEMA_METADATA_KEY: &str = "avro.schema";
+
+/// Either a [`PrimitiveType`] or a reference to a previously defined named type
+///
+/// <https://avro.apache.org/docs/1.11.1/specification/#names>
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum TypeName<'a> {
+    Primitive(PrimitiveType),
+    Ref(&'a str),
+}
+
+/// A primitive type
+///
+/// <https://avro.apache.org/docs/1.11.1/specification/#primitive-types>
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "camelCase")]
+pub enum PrimitiveType {
+    Null,
+    Boolean,
+    Int,
+    Long,
+    Float,
+    Double,
+    Bytes,
+    String,
+}
+
+/// Additional attributes within a [`Schema`]
+///
+/// <https://avro.apache.org/docs/1.11.1/specification/#schema-declaration>
+#[derive(Debug, Clone, PartialEq, Eq, Default, Deserialize, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Attributes<'a> {
+    /// A logical type name
+    ///
+    /// <https://avro.apache.org/docs/1.11.1/specification/#logical-types>
+    #[serde(default)]
+    pub logical_type: Option<&'a str>,
+
+    /// Additional JSON attributes
+    #[serde(flatten)]
+    pub additional: HashMap<&'a str, serde_json::Value>,
+}
+
+/// A type definition that is not a variant of [`ComplexType`]
+#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Type<'a> {
+    #[serde(borrow)]
+    pub r#type: TypeName<'a>,
+    #[serde(flatten)]
+    pub attributes: Attributes<'a>,
+}
+
+/// An Avro schema
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(untagged)]
+pub enum Schema<'a> {
+    #[serde(borrow)]
+    TypeName(TypeName<'a>),
+    #[serde(borrow)]
+    Union(Vec<Schema<'a>>),
+    #[serde(borrow)]
+    Complex(ComplexType<'a>),
+    #[serde(borrow)]
+    Type(Type<'a>),
+}
+
+/// A complex type
+///
+/// <https://avro.apache.org/docs/1.11.1/specification/#complex-types>
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(tag = "type", rename_all = "camelCase")]
+pub enum ComplexType<'a> {
+    #[serde(borrow)]
+    Union(Vec<Schema<'a>>),
+    #[serde(borrow)]
+    Record(Record<'a>),
+    #[serde(borrow)]
+    Enum(Enum<'a>),
+    #[serde(borrow)]
+    Array(Array<'a>),
+    #[serde(borrow)]
+    Map(Map<'a>),
+    #[serde(borrow)]
+    Fixed(Fixed<'a>),
+}
+
+/// A record
+///
+/// <https://avro.apache.org/docs/1.11.1/specification/#schema-record>
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct Record<'a> {
+    #[serde(borrow)]
+    pub name: &'a str,
+    #[serde(borrow, default)]
+    pub namespace: Option<&'a str>,
+    #[serde(borrow, default)]
+    pub doc: Option<&'a str>,
+    #[serde(borrow, default)]
+    pub aliases: Vec<&'a str>,
+    #[serde(borrow)]
+    pub fields: Vec<Field<'a>>,
+    #[serde(flatten)]
+    pub attributes: Attributes<'a>,
+}
+
+/// A field within a [`Record`]
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct Field<'a> {
+    #[serde(borrow)]
+    pub name: &'a str,
+    #[serde(borrow, default)]
+    pub doc: Option<&'a str>,
+    #[serde(borrow)]
+    pub r#type: Schema<'a>,
+    #[serde(borrow, default)]
+    pub default: Option<&'a str>,
+}
+
+/// An enumeration
+///
+/// <https://avro.apache.org/docs/1.11.1/specification/#enums>
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct Enum<'a> {
+    #[serde(borrow)]
+    pub name: &'a str,
+    #[serde(borrow, default)]
+    pub namespace: Option<&'a str>,
+    #[serde(borrow, default)]
+    pub doc: Option<&'a str>,
+    #[serde(borrow, default)]
+    pub aliases: Vec<&'a str>,
+    #[serde(borrow)]
+    pub symbols: Vec<&'a str>,
+    #[serde(borrow, default)]
+    pub default: Option<&'a str>,
+    #[serde(flatten)]
+    pub attributes: Attributes<'a>,
+}
+
+/// An array
+///
+/// <https://avro.apache.org/docs/1.11.1/specification/#arrays>
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct Array<'a> {
+    #[serde(borrow)]
+    pub items: Box<Schema<'a>>,
+    #[serde(flatten)]
+    pub attributes: Attributes<'a>,
+}
+
+/// A map
+///
+/// <https://avro.apache.org/docs/1.11.1/specification/#maps>
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct Map<'a> {
+    #[serde(borrow)]
+    pub values: Box<Schema<'a>>,
+    #[serde(flatten)]
+    pub attributes: Attributes<'a>,
+}
+
+/// A fixed length binary array
+///
+/// <https://avro.apache.org/docs/1.11.1/specification/#fixed>
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct Fixed<'a> {
+    #[serde(borrow)]
+    pub name: &'a str,
+    #[serde(borrow, default)]
+    pub namespace: Option<&'a str>,
+    #[serde(borrow, default)]
+    pub aliases: Vec<&'a str>,
+    pub size: usize,
+    #[serde(flatten)]
+    pub attributes: Attributes<'a>,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+    #[test]
+    fn test_deserialize() {
+        let t: Schema = serde_json::from_str("\"string\"").unwrap();
+        assert_eq!(
+            t,
+            Schema::TypeName(TypeName::Primitive(PrimitiveType::String))
+        );
+
+        let t: Schema = serde_json::from_str("[\"int\", \"null\"]").unwrap();
+        assert_eq!(
+            t,
+            Schema::Union(vec![
+                Schema::TypeName(TypeName::Primitive(PrimitiveType::Int)),
+                Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)),
+            ])
+        );
+
+        let t: Type = serde_json::from_str(
+            r#"{
+                   "type":"long",
+                   "logicalType":"timestamp-micros"
+                }"#,
+        )
+        .unwrap();
+
+        let timestamp = Type {
+            r#type: TypeName::Primitive(PrimitiveType::Long),
+            attributes: Attributes {
+                logical_type: Some("timestamp-micros"),
+                additional: Default::default(),
+            },
+        };
+
+        assert_eq!(t, timestamp);
+
+        let t: ComplexType = serde_json::from_str(
+            r#"{
+                   "type":"fixed",
+                   "name":"fixed",
+                   "namespace":"topLevelRecord.value",
+                   "size":11,
+                   "logicalType":"decimal",
+                   "precision":25,
+                   "scale":2
+                }"#,
+        )
+        .unwrap();
+
+        let decimal = ComplexType::Fixed(Fixed {
+            name: "fixed",
+            namespace: Some("topLevelRecord.value"),
+            aliases: vec![],
+            size: 11,
+            attributes: Attributes {
+                logical_type: Some("decimal"),
+                additional: vec![("precision", json!(25)), ("scale", json!(2))]
+                    .into_iter()
+                    .collect(),
+            },
+        });
+
+        assert_eq!(t, decimal);
+
+        let schema: Schema = serde_json::from_str(
+            r#"{
+               "type":"record",
+               "name":"topLevelRecord",
+               "fields":[
+                  {
+                     "name":"value",
+                     "type":[
+                        {
+                           "type":"fixed",
+                           "name":"fixed",
+                           "namespace":"topLevelRecord.value",
+                           "size":11,
+                           "logicalType":"decimal",
+                           "precision":25,
+                           "scale":2
+                        },
+                        "null"
+                     ]
+                  }
+               ]
+            }"#,
+        )
+        .unwrap();
+
+        assert_eq!(
+            schema,
+            Schema::Complex(ComplexType::Record(Record {
+                name: "topLevelRecord",
+                namespace: None,
+                doc: None,
+                aliases: vec![],
+                fields: vec![Field {
+                    name: "value",
+                    doc: None,
+                    r#type: Schema::Union(vec![
+                        Schema::Complex(decimal),
+                        Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)),
+                    ]),
+                    default: None,
+                },],
+                attributes: Default::default(),
+            }))
+        );
+
+        let schema: Schema = serde_json::from_str(
+            r#"{
+                  "type": "record",
+                  "name": "LongList",
+                  "aliases": ["LinkedLongs"],
+                  "fields" : [
+                    {"name": "value", "type": "long"},
+                    {"name": "next", "type": ["null", "LongList"]}
+                  ]
+                }"#,
+        )
+        .unwrap();
+
+        assert_eq!(
+            schema,
+            Schema::Complex(ComplexType::Record(Record {
+                name: "LongList",
+                namespace: None,
+                doc: None,
+                aliases: vec!["LinkedLongs"],
+                fields: vec![
+                    Field {
+                        name: "value",
+                        doc: None,
+                        r#type: Schema::TypeName(TypeName::Primitive(PrimitiveType::Long)),
+                        default: None,
+                    },
+                    Field {
+                        name: "next",
+                        doc: None,
+                        r#type: Schema::Union(vec![
+                            Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)),
+                            Schema::TypeName(TypeName::Ref("LongList")),
+                        ]),
+                        default: None,
+                    }
+                ],
+                attributes: Attributes::default(),
+            }))
+        );
+
+        let schema: Schema = serde_json::from_str(
+            r#"{
+               "type":"record",
+               "name":"topLevelRecord",
+               "fields":[
+                  {
+                     "name":"id",
+                     "type":[
+                        "int",
+                        "null"
+                     ]
+                  },
+                  {
+                     "name":"timestamp_col",
+                     "type":[
+                        {
+                           "type":"long",
+                           "logicalType":"timestamp-micros"
+                        },
+                        "null"
+                     ]
+                  }
+               ]
+            }"#,
+        )
+        .unwrap();
+
+        assert_eq!(
+            schema,
+            Schema::Complex(ComplexType::Record(Record {
+                name: "topLevelRecord",
+                namespace: None,
+                doc: None,
+                aliases: vec![],
+                fields: vec![
+                    Field {
+                        name: "id",
+                        doc: None,
+                        r#type: Schema::Union(vec![
+                            Schema::TypeName(TypeName::Primitive(PrimitiveType::Int)),
+                            Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)),
+                        ]),
+                        default: None,
+                    },
+                    Field {
+                        name: "timestamp_col",
+                        doc: None,
+                        r#type: Schema::Union(vec![
+                            Schema::Type(timestamp),
+                            Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)),
+                        ]),
+                        default: None,
+                    }
+                ],
+                attributes: Default::default(),
+            }))
+        );
+
+        let schema: Schema = serde_json::from_str(
+            r#"{
+                  "type": "record",
+                  "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc",
+                  "fields": [
+                    {"name": "clientHash",
+                     "type": {"type": "fixed", "name": "MD5", "size": 16}},
+                    {"name": "clientProtocol", "type": ["null", "string"]},
+                    {"name": "serverHash", "type": "MD5"},
+                    {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]}
+                  ]
+            }"#,
+        )
+        .unwrap();
+
+        assert_eq!(
+            schema,
+            Schema::Complex(ComplexType::Record(Record {
+                name: "HandshakeRequest",
+                namespace: Some("org.apache.avro.ipc"),
+                doc: None,
+                aliases: vec![],
+                fields: vec![
+                    Field {
+                        name: "clientHash",
+                        doc: None,
+                        r#type: Schema::Complex(ComplexType::Fixed(Fixed {
+                            name: "MD5",
+                            namespace: None,
+                            aliases: vec![],
+                            size: 16,
+                            attributes: Default::default(),
+                        })),
+                        default: None,
+                    },
+                    Field {
+                        name: "clientProtocol",
+                        doc: None,
+                        r#type: Schema::Union(vec![
+                            Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)),
+                            Schema::TypeName(TypeName::Primitive(PrimitiveType::String)),
+                        ]),
+                        default: None,
+                    },
+                    Field {
+                        name: "serverHash",
+                        doc: None,
+                        r#type: Schema::TypeName(TypeName::Ref("MD5")),
+                        default: None,
+                    },
+                    Field {
+                        name: "meta",
+                        doc: None,
+                        r#type: Schema::Union(vec![
+                            Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)),
+                            Schema::Complex(ComplexType::Map(Map {
+                                values: Box::new(Schema::TypeName(TypeName::Primitive(
+                                    PrimitiveType::Bytes
+                                ))),
+                                attributes: Default::default(),
+                            })),
+                        ]),
+                        default: None,
+                    }
+                ],
+                attributes: Default::default(),
+            }))
+        );
+    }
+}
diff --git a/arrow-buffer/src/bigint/div.rs b/arrow-buffer/src/bigint/div.rs
index ba530ffcc6c8..e1b2ed4f8aa5 100644
--- a/arrow-buffer/src/bigint/div.rs
+++ b/arrow-buffer/src/bigint/div.rs
@@ -26,10 +26,7 @@
 /// # Panics
 ///
 /// Panics if divisor is zero
-pub fn div_rem<const N: usize>(
-    numerator: &[u64; N],
-    divisor: &[u64; N],
-) -> ([u64; N], [u64; N]) {
+pub fn div_rem<const N: usize>(numerator: &[u64; N], divisor: &[u64; N]) -> ([u64; N], [u64; N]) {
     let numerator_bits = bits(numerator);
     let divisor_bits = bits(divisor);
     assert_ne!(divisor_bits, 0, "division by zero");
@@ -61,10 +58,7 @@ fn bits(arr: &[u64]) -> usize {
 }
 
 /// Division of numerator by a u64 divisor
-fn div_rem_small<const N: usize>(
-    numerator: &[u64; N],
-    divisor: u64,
-) -> ([u64; N], [u64; N]) {
+fn div_rem_small<const N: usize>(numerator: &[u64; N], divisor: u64) -> ([u64; N], [u64; N]) {
     let mut rem = 0u64;
     let mut numerator = *numerator;
     numerator.iter_mut().rev().for_each(|d| {
@@ -227,11 +221,7 @@ fn sub_assign(a: &mut [u64], b: &[u64]) -> bool {
 }
 
 /// Converts an overflowing binary operation on scalars to one on slices
-fn binop_slice(
-    a: &mut [u64],
-    b: &[u64],
-    binop: impl Fn(u64, u64) -> (u64, bool) + Copy,
-) -> bool {
+fn binop_slice(a: &mut [u64], b: &[u64], binop: impl Fn(u64, u64) -> (u64, bool) + Copy) -> bool {
     let mut c = false;
     a.iter_mut().zip(b.iter()).for_each(|(x, y)| {
         let (res1, overflow1) = y.overflowing_add(u64::from(c));
diff --git a/arrow-buffer/src/bigint/mod.rs b/arrow-buffer/src/bigint/mod.rs
index d064663bf63a..afbb3a31df12 100644
--- a/arrow-buffer/src/bigint/mod.rs
+++ b/arrow-buffer/src/bigint/mod.rs
@@ -310,9 +310,7 @@ impl i256 {
                 (Self::from_le_bytes(bytes), false)
             }
             Ordering::Equal => (Self::from_le_bytes(v_bytes.try_into().unwrap()), false),
-            Ordering::Greater => {
-                (Self::from_le_bytes(v_bytes[..32].try_into().unwrap()), true)
-            }
+            Ordering::Greater => (Self::from_le_bytes(v_bytes[..32].try_into().unwrap()), true),
         }
     }
 
@@ -357,8 +355,7 @@ impl i256 {
     #[inline]
     pub fn checked_add(self, other: Self) -> Option<Self> {
         let r = self.wrapping_add(other);
-        ((other.is_negative() && r < self) || (!other.is_negative() && r >= self))
-            .then_some(r)
+        ((other.is_negative() && r < self) || (!other.is_negative() && r >= self)).then_some(r)
     }
 
     /// Performs wrapping subtraction
@@ -373,8 +370,7 @@ impl i256 {
     #[inline]
     pub fn checked_sub(self, other: Self) -> Option<Self> {
         let r = self.wrapping_sub(other);
-        ((other.is_negative() && r > self) || (!other.is_negative() && r <= self))
-            .then_some(r)
+        ((other.is_negative() && r > self) || (!other.is_negative() && r <= self)).then_some(r)
     }
 
     /// Performs wrapping multiplication
@@ -591,9 +587,7 @@ impl i256 {
 
 /// Temporary workaround due to lack of stable const array slicing
 /// See <https://github.com/rust-lang/rust/issues/90091>
-const fn split_array<const N: usize, const M: usize>(
-    vals: [u8; N],
-) -> ([u8; M], [u8; M]) {
+const fn split_array<const N: usize, const M: usize>(vals: [u8; N]) -> ([u8; M], [u8; M]) {
     let mut a = [0; M];
     let mut b = [0; M];
     let mut i = 0;
@@ -915,8 +909,7 @@ mod tests {
 
         // Addition
         let actual = il.wrapping_add(ir);
-        let (expected, overflow) =
-            i256::from_bigint_with_overflow(bl.clone() + br.clone());
+        let (expected, overflow) = i256::from_bigint_with_overflow(bl.clone() + br.clone());
         assert_eq!(actual, expected);
 
         let checked = il.checked_add(ir);
@@ -927,8 +920,7 @@ mod tests {
 
         // Subtraction
         let actual = il.wrapping_sub(ir);
-        let (expected, overflow) =
-            i256::from_bigint_with_overflow(bl.clone() - br.clone());
+        let (expected, overflow) = i256::from_bigint_with_overflow(bl.clone() - br.clone());
         assert_eq!(actual.to_string(), expected.to_string());
 
         let checked = il.checked_sub(ir);
@@ -939,8 +931,7 @@ mod tests {
 
         // Multiplication
         let actual = il.wrapping_mul(ir);
-        let (expected, overflow) =
-            i256::from_bigint_with_overflow(bl.clone() * br.clone());
+        let (expected, overflow) = i256::from_bigint_with_overflow(bl.clone() * br.clone());
         assert_eq!(actual.to_string(), expected.to_string());
 
         let checked = il.checked_mul(ir);
@@ -996,8 +987,7 @@ mod tests {
         // Exponentiation
         for exp in vec![0, 1, 2, 3, 8, 100].into_iter() {
             let actual = il.wrapping_pow(exp);
-            let (expected, overflow) =
-                i256::from_bigint_with_overflow(bl.clone().pow(exp));
+            let (expected, overflow) = i256::from_bigint_with_overflow(bl.clone().pow(exp));
             assert_eq!(actual.to_string(), expected.to_string());
 
             let checked = il.checked_pow(exp);
@@ -1212,7 +1202,10 @@ mod tests {
             ("000000000000000000000000000000000000000", Some(i256::ZERO)),
             ("0000000000000000000000000000000000000000-11", None),
             ("11-1111111111111111111111111111111111111", None),
-            ("115792089237316195423570985008687907853269984665640564039457584007913129639936", None)
+            (
+                "115792089237316195423570985008687907853269984665640564039457584007913129639936",
+                None,
+            ),
         ];
         for (case, expected) in cases {
             assert_eq!(i256::from_string(case), expected)
diff --git a/arrow-buffer/src/buffer/boolean.rs b/arrow-buffer/src/buffer/boolean.rs
index 577c716e4bea..c651edcad92e 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -223,13 +223,7 @@ impl BitAnd<&BooleanBuffer> for &BooleanBuffer {
     fn bitand(self, rhs: &BooleanBuffer) -> Self::Output {
         assert_eq!(self.len, rhs.len);
         BooleanBuffer {
-            buffer: buffer_bin_and(
-                &self.buffer,
-                self.offset,
-                &rhs.buffer,
-                rhs.offset,
-                self.len,
-            ),
+            buffer: buffer_bin_and(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
             offset: 0,
             len: self.len,
         }
@@ -242,13 +236,7 @@ impl BitOr<&BooleanBuffer> for &BooleanBuffer {
     fn bitor(self, rhs: &BooleanBuffer) -> Self::Output {
         assert_eq!(self.len, rhs.len);
         BooleanBuffer {
-            buffer: buffer_bin_or(
-                &self.buffer,
-                self.offset,
-                &rhs.buffer,
-                rhs.offset,
-                self.len,
-            ),
+            buffer: buffer_bin_or(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
             offset: 0,
             len: self.len,
         }
@@ -261,13 +249,7 @@ impl BitXor<&BooleanBuffer> for &BooleanBuffer {
     fn bitxor(self, rhs: &BooleanBuffer) -> Self::Output {
         assert_eq!(self.len, rhs.len);
         BooleanBuffer {
-            buffer: buffer_bin_xor(
-                &self.buffer,
-                self.offset,
-                &rhs.buffer,
-                rhs.offset,
-                self.len,
-            ),
+            buffer: buffer_bin_xor(&self.buffer, self.offset, &rhs.buffer, rhs.offset, self.len),
             offset: 0,
             len: self.len,
         }
@@ -428,8 +410,7 @@ mod tests {
         let buf = Buffer::from(&[0, 1, 1, 0, 0]);
         let boolean_buf = &BooleanBuffer::new(buf, offset, len);
 
-        let expected =
-            BooleanBuffer::new(Buffer::from(&[255, 254, 254, 255, 255]), offset, len);
+        let expected = BooleanBuffer::new(Buffer::from(&[255, 254, 254, 255, 255]), offset, len);
         assert_eq!(!boolean_buf, expected);
     }
 }
diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs
index 8296d3fbcc31..05530eed9b08 100644
--- a/arrow-buffer/src/buffer/immutable.rs
+++ b/arrow-buffer/src/buffer/immutable.rs
@@ -323,6 +323,14 @@ impl Buffer {
                 length,
             })
     }
+
+    /// Returns true if this [`Buffer`] is equal to `other`, using pointer comparisons
+    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
+    /// return false when the arrays are logically equal
+    #[inline]
+    pub fn ptr_eq(&self, other: &Self) -> bool {
+        self.ptr == other.ptr && self.length == other.length
+    }
 }
 
 /// Creating a `Buffer` instance by copying the memory from a `AsRef<[u8]>` into a newly
@@ -515,9 +523,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "the offset of the new Buffer cannot exceed the existing length"
-    )]
+    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
     fn test_slice_offset_out_of_bound() {
         let buf = Buffer::from(&[2, 4, 6, 8, 10]);
         buf.slice(6);
@@ -680,9 +686,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "the offset of the new Buffer cannot exceed the existing length"
-    )]
+    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
     fn slice_overflow() {
         let buffer = Buffer::from(MutableBuffer::from_len_zeroed(12));
         buffer.slice_with_length(2, usize::MAX);
diff --git a/arrow-buffer/src/buffer/mutable.rs b/arrow-buffer/src/buffer/mutable.rs
index 2c56f9a5b270..69c986cc1056 100644
--- a/arrow-buffer/src/buffer/mutable.rs
+++ b/arrow-buffer/src/buffer/mutable.rs
@@ -334,9 +334,7 @@ impl MutableBuffer {
 
     #[inline]
     pub(super) fn into_buffer(self) -> Buffer {
-        let bytes = unsafe {
-            Bytes::new(self.data, self.len, Deallocation::Standard(self.layout))
-        };
+        let bytes = unsafe { Bytes::new(self.data, self.len, Deallocation::Standard(self.layout)) };
         std::mem::forget(self);
         Buffer::from_bytes(bytes)
     }
@@ -351,8 +349,7 @@ impl MutableBuffer {
         // SAFETY
         // ArrowNativeType is trivially transmutable, is sealed to prevent potentially incorrect
         // implementation outside this crate, and this method checks alignment
-        let (prefix, offsets, suffix) =
-            unsafe { self.as_slice_mut().align_to_mut::<T>() };
+        let (prefix, offsets, suffix) = unsafe { self.as_slice_mut().align_to_mut::<T>() };
         assert!(prefix.is_empty() && suffix.is_empty());
         offsets
     }
@@ -604,9 +601,7 @@ impl MutableBuffer {
     //    we can't specialize `extend` for `TrustedLen` like `Vec` does.
     // 2. `from_trusted_len_iter_bool` is faster.
     #[inline]
-    pub unsafe fn from_trusted_len_iter_bool<I: Iterator<Item = bool>>(
-        mut iterator: I,
-    ) -> Self {
+    pub unsafe fn from_trusted_len_iter_bool<I: Iterator<Item = bool>>(mut iterator: I) -> Self {
         let (_, upper) = iterator.size_hint();
         let len = upper.expect("from_trusted_len_iter requires an upper limit");
 
diff --git a/arrow-buffer/src/buffer/null.rs b/arrow-buffer/src/buffer/null.rs
index e0c7d9ef8f49..c79aef398059 100644
--- a/arrow-buffer/src/buffer/null.rs
+++ b/arrow-buffer/src/buffer/null.rs
@@ -71,10 +71,7 @@ impl NullBuffer {
     /// This is commonly used by binary operations where the result is NULL if either
     /// of the input values is NULL. Handling the null mask separately in this way
     /// can yield significant performance improvements over an iterator approach
-    pub fn union(
-        lhs: Option<&NullBuffer>,
-        rhs: Option<&NullBuffer>,
-    ) -> Option<NullBuffer> {
+    pub fn union(lhs: Option<&NullBuffer>, rhs: Option<&NullBuffer>) -> Option<NullBuffer> {
         match (lhs, rhs) {
             (Some(lhs), Some(rhs)) => Some(Self::new(lhs.inner() & rhs.inner())),
             (Some(n), None) | (None, Some(n)) => Some(n.clone()),
diff --git a/arrow-buffer/src/buffer/offset.rs b/arrow-buffer/src/buffer/offset.rs
index fede32c57924..652d30c3b0ab 100644
--- a/arrow-buffer/src/buffer/offset.rs
+++ b/arrow-buffer/src/buffer/offset.rs
@@ -148,6 +148,14 @@ impl<O: ArrowNativeType> OffsetBuffer<O> {
     pub fn slice(&self, offset: usize, len: usize) -> Self {
         Self(self.0.slice(offset, len.saturating_add(1)))
     }
+
+    /// Returns true if this [`OffsetBuffer`] is equal to `other`, using pointer comparisons
+    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
+    /// return false when the arrays are logically equal
+    #[inline]
+    pub fn ptr_eq(&self, other: &Self) -> bool {
+        self.0.ptr_eq(&other.0)
+    }
 }
 
 impl<T: ArrowNativeType> Deref for OffsetBuffer<T> {
@@ -211,8 +219,7 @@ mod tests {
         assert_eq!(buffer.as_ref(), &[0, 2, 8, 11, 18, 20]);
 
         let half_max = i32::MAX / 2;
-        let buffer =
-            OffsetBuffer::<i32>::from_lengths([half_max as usize, half_max as usize]);
+        let buffer = OffsetBuffer::<i32>::from_lengths([half_max as usize, half_max as usize]);
         assert_eq!(buffer.as_ref(), &[0, half_max, half_max * 2]);
     }
 
diff --git a/arrow-buffer/src/buffer/ops.rs b/arrow-buffer/src/buffer/ops.rs
index eccff6280dd8..ca00e41bea21 100644
--- a/arrow-buffer/src/buffer/ops.rs
+++ b/arrow-buffer/src/buffer/ops.rs
@@ -184,10 +184,6 @@ pub fn buffer_bin_xor(
 
 /// Apply a bitwise not to one input and return the result as a Buffer.
 /// The input is treated as a bitmap, meaning that offset and length are specified in number of bits.
-pub fn buffer_unary_not(
-    left: &Buffer,
-    offset_in_bits: usize,
-    len_in_bits: usize,
-) -> Buffer {
+pub fn buffer_unary_not(left: &Buffer, offset_in_bits: usize, len_in_bits: usize) -> Buffer {
     bitwise_unary_op_helper(left, offset_in_bits, len_in_bits, |a| !a)
 }
diff --git a/arrow-buffer/src/buffer/run.rs b/arrow-buffer/src/buffer/run.rs
index 29c0f3dfd949..3dbbe344a025 100644
--- a/arrow-buffer/src/buffer/run.rs
+++ b/arrow-buffer/src/buffer/run.rs
@@ -110,11 +110,7 @@ where
     ///
     /// - `buffer` must contain strictly increasing values greater than zero
     /// - The last value of `buffer` must be greater than or equal to `offset + len`
-    pub unsafe fn new_unchecked(
-        run_ends: ScalarBuffer<E>,
-        offset: usize,
-        len: usize,
-    ) -> Self {
+    pub unsafe fn new_unchecked(run_ends: ScalarBuffer<E>, offset: usize, len: usize) -> Self {
         Self {
             run_ends,
             offset,
diff --git a/arrow-buffer/src/buffer/scalar.rs b/arrow-buffer/src/buffer/scalar.rs
index 70c86f11866d..3b75d5384046 100644
--- a/arrow-buffer/src/buffer/scalar.rs
+++ b/arrow-buffer/src/buffer/scalar.rs
@@ -86,6 +86,14 @@ impl<T: ArrowNativeType> ScalarBuffer<T> {
     pub fn into_inner(self) -> Buffer {
         self.buffer
     }
+
+    /// Returns true if this [`ScalarBuffer`] is equal to `other`, using pointer comparisons
+    /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
+    /// return false when the arrays are logically equal
+    #[inline]
+    pub fn ptr_eq(&self, other: &Self) -> bool {
+        self.buffer.ptr_eq(&other.buffer)
+    }
 }
 
 impl<T: ArrowNativeType> Deref for ScalarBuffer<T> {
@@ -213,9 +221,7 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "Memory pointer is not aligned with the specified scalar type"
-    )]
+    #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")]
     fn test_unaligned() {
         let expected = [0_i32, 1, 2];
         let buffer = Buffer::from_iter(expected.iter().cloned());
@@ -224,18 +230,14 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "the offset of the new Buffer cannot exceed the existing length"
-    )]
+    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
     fn test_length_out_of_bounds() {
         let buffer = Buffer::from_iter([0_i32, 1, 2]);
         ScalarBuffer::<i32>::new(buffer, 1, 3);
     }
 
     #[test]
-    #[should_panic(
-        expected = "the offset of the new Buffer cannot exceed the existing length"
-    )]
+    #[should_panic(expected = "the offset of the new Buffer cannot exceed the existing length")]
     fn test_offset_out_of_bounds() {
         let buffer = Buffer::from_iter([0_i32, 1, 2]);
         ScalarBuffer::<i32>::new(buffer, 4, 0);
diff --git a/arrow-buffer/src/builder/boolean.rs b/arrow-buffer/src/builder/boolean.rs
index f0e7f0f13670..ca178ae5ce4e 100644
--- a/arrow-buffer/src/builder/boolean.rs
+++ b/arrow-buffer/src/builder/boolean.rs
@@ -154,14 +154,12 @@ impl BooleanBufferBuilder {
 
                 if cur_remainder != 0 {
                     // Pad last byte with 1s
-                    *self.buffer.as_slice_mut().last_mut().unwrap() |=
-                        !((1 << cur_remainder) - 1)
+                    *self.buffer.as_slice_mut().last_mut().unwrap() |= !((1 << cur_remainder) - 1)
                 }
                 self.buffer.resize(new_len_bytes, 0xFF);
                 if new_remainder != 0 {
                     // Clear remaining bits
-                    *self.buffer.as_slice_mut().last_mut().unwrap() &=
-                        (1 << new_remainder) - 1
+                    *self.buffer.as_slice_mut().last_mut().unwrap() &= (1 << new_remainder) - 1
                 }
                 self.len = new_len;
             }
diff --git a/arrow-buffer/src/bytes.rs b/arrow-buffer/src/bytes.rs
index 8f5019d5a4cc..81860b604868 100644
--- a/arrow-buffer/src/bytes.rs
+++ b/arrow-buffer/src/bytes.rs
@@ -60,11 +60,7 @@ impl Bytes {
     /// This function is unsafe as there is no guarantee that the given pointer is valid for `len`
     /// bytes. If the `ptr` and `capacity` come from a `Buffer`, then this is guaranteed.
     #[inline]
-    pub(crate) unsafe fn new(
-        ptr: NonNull<u8>,
-        len: usize,
-        deallocation: Deallocation,
-    ) -> Bytes {
+    pub(crate) unsafe fn new(ptr: NonNull<u8>, len: usize, deallocation: Deallocation) -> Bytes {
         Bytes {
             ptr,
             len,
diff --git a/arrow-buffer/src/util/bit_chunk_iterator.rs b/arrow-buffer/src/util/bit_chunk_iterator.rs
index 6830acae94a1..9e4fb8268dff 100644
--- a/arrow-buffer/src/util/bit_chunk_iterator.rs
+++ b/arrow-buffer/src/util/bit_chunk_iterator.rs
@@ -60,8 +60,7 @@ impl<'a> UnalignedBitChunk<'a> {
 
         // If less than 8 bytes, read into prefix
         if buffer.len() <= 8 {
-            let (suffix_mask, trailing_padding) =
-                compute_suffix_mask(len, offset_padding);
+            let (suffix_mask, trailing_padding) = compute_suffix_mask(len, offset_padding);
             let prefix = read_u64(buffer) & suffix_mask & prefix_mask;
 
             return Self {
@@ -75,8 +74,7 @@ impl<'a> UnalignedBitChunk<'a> {
 
         // If less than 16 bytes, read into prefix and suffix
         if buffer.len() <= 16 {
-            let (suffix_mask, trailing_padding) =
-                compute_suffix_mask(len, offset_padding);
+            let (suffix_mask, trailing_padding) = compute_suffix_mask(len, offset_padding);
             let prefix = read_u64(&buffer[..8]) & prefix_mask;
             let suffix = read_u64(&buffer[8..]) & suffix_mask;
 
@@ -167,10 +165,7 @@ impl<'a> UnalignedBitChunk<'a> {
 }
 
 pub type UnalignedBitChunkIterator<'a> = std::iter::Chain<
-    std::iter::Chain<
-        std::option::IntoIter<u64>,
-        std::iter::Cloned<std::slice::Iter<'a, u64>>,
-    >,
+    std::iter::Chain<std::option::IntoIter<u64>, std::iter::Cloned<std::slice::Iter<'a, u64>>>,
     std::option::IntoIter<u64>,
 >;
 
@@ -338,9 +333,8 @@ impl Iterator for BitChunkIterator<'_> {
         } else {
             // the constructor ensures that bit_offset is in 0..8
             // that means we need to read at most one additional byte to fill in the high bits
-            let next = unsafe {
-                std::ptr::read_unaligned(raw_data.add(index + 1) as *const u8) as u64
-            };
+            let next =
+                unsafe { std::ptr::read_unaligned(raw_data.add(index + 1) as *const u8) as u64 };
 
             (current >> bit_offset) | (next << (64 - bit_offset))
         };
@@ -387,8 +381,8 @@ mod tests {
     #[test]
     fn test_iter_unaligned() {
         let input: &[u8] = &[
-            0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
-            0b00100000, 0b01000000, 0b11111111,
+            0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000, 0b00100000,
+            0b01000000, 0b11111111,
         ];
         let buffer: Buffer = Buffer::from(input);
 
@@ -408,8 +402,8 @@ mod tests {
     #[test]
     fn test_iter_unaligned_remainder_1_byte() {
         let input: &[u8] = &[
-            0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
-            0b00100000, 0b01000000, 0b11111111,
+            0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000, 0b00100000,
+            0b01000000, 0b11111111,
         ];
         let buffer: Buffer = Buffer::from(input);
 
@@ -442,8 +436,8 @@ mod tests {
     #[test]
     fn test_iter_unaligned_remainder_bits_large() {
         let input: &[u8] = &[
-            0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111, 0b00000000,
-            0b11111111, 0b00000000, 0b11111111,
+            0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111,
+            0b00000000, 0b11111111,
         ];
         let buffer: Buffer = Buffer::from(input);
 
@@ -637,11 +631,8 @@ mod tests {
             let max_truncate = 128.min(mask_len - offset);
             let truncate = rng.gen::<usize>().checked_rem(max_truncate).unwrap_or(0);
 
-            let unaligned = UnalignedBitChunk::new(
-                buffer.as_slice(),
-                offset,
-                mask_len - offset - truncate,
-            );
+            let unaligned =
+                UnalignedBitChunk::new(buffer.as_slice(), offset, mask_len - offset - truncate);
 
             let bool_slice = &bools[offset..mask_len - truncate];
 
diff --git a/arrow-buffer/src/util/bit_iterator.rs b/arrow-buffer/src/util/bit_iterator.rs
index 4e24ccdabec0..df40a8fbaccb 100644
--- a/arrow-buffer/src/util/bit_iterator.rs
+++ b/arrow-buffer/src/util/bit_iterator.rs
@@ -276,8 +276,8 @@ mod tests {
         assert_eq!(
             actual,
             &[
-                false, true, false, false, true, false, true, false, false, false, false,
-                false, true, false
+                false, true, false, false, true, false, true, false, false, false, false, false,
+                true, false
             ]
         );
 
diff --git a/arrow-buffer/src/util/bit_mask.rs b/arrow-buffer/src/util/bit_mask.rs
index 2af24b782632..8f81cb7d0469 100644
--- a/arrow-buffer/src/util/bit_mask.rs
+++ b/arrow-buffer/src/util/bit_mask.rs
@@ -42,8 +42,7 @@ pub fn set_bits(
     let chunks = BitChunks::new(data, offset_read + bits_to_align, len - bits_to_align);
     chunks.iter().for_each(|chunk| {
         null_count += chunk.count_zeros();
-        write_data[write_byte_index..write_byte_index + 8]
-            .copy_from_slice(&chunk.to_le_bytes());
+        write_data[write_byte_index..write_byte_index + 8].copy_from_slice(&chunk.to_le_bytes());
         write_byte_index += 8;
     });
 
@@ -70,8 +69,8 @@ mod tests {
     fn test_set_bits_aligned() {
         let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
         let source: &[u8] = &[
-            0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
-            0b11100111, 0b10100101,
+            0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
+            0b10100101,
         ];
 
         let destination_offset = 8;
@@ -80,8 +79,8 @@ mod tests {
         let len = 64;
 
         let expected_data: &[u8] = &[
-            0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
-            0b11100111, 0b10100101, 0,
+            0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
+            0b10100101, 0,
         ];
         let expected_null_count = 24;
         let result = set_bits(
@@ -100,8 +99,8 @@ mod tests {
     fn test_set_bits_unaligned_destination_start() {
         let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
         let source: &[u8] = &[
-            0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
-            0b11100111, 0b10100101,
+            0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
+            0b10100101,
         ];
 
         let destination_offset = 3;
@@ -110,8 +109,8 @@ mod tests {
         let len = 64;
 
         let expected_data: &[u8] = &[
-            0b00111000, 0b00101111, 0b11001101, 0b11011100, 0b01011110, 0b00011111,
-            0b00111110, 0b00101111, 0b00000101, 0b00000000,
+            0b00111000, 0b00101111, 0b11001101, 0b11011100, 0b01011110, 0b00011111, 0b00111110,
+            0b00101111, 0b00000101, 0b00000000,
         ];
         let expected_null_count = 24;
         let result = set_bits(
@@ -130,8 +129,8 @@ mod tests {
     fn test_set_bits_unaligned_destination_end() {
         let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
         let source: &[u8] = &[
-            0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
-            0b11100111, 0b10100101,
+            0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
+            0b10100101,
         ];
 
         let destination_offset = 8;
@@ -140,8 +139,8 @@ mod tests {
         let len = 62;
 
         let expected_data: &[u8] = &[
-            0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
-            0b11100111, 0b00100101, 0,
+            0, 0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
+            0b00100101, 0,
         ];
         let expected_null_count = 23;
         let result = set_bits(
@@ -160,9 +159,9 @@ mod tests {
     fn test_set_bits_unaligned() {
         let mut destination: Vec<u8> = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
         let source: &[u8] = &[
-            0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
-            0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
-            0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011,
+            0b11100111, 0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111,
+            0b10100101, 0b10011001, 0b11011011, 0b11101011, 0b11000011, 0b11100111, 0b10100101,
+            0b10011001, 0b11011011, 0b11101011, 0b11000011,
         ];
 
         let destination_offset = 3;
@@ -171,9 +170,8 @@ mod tests {
         let len = 95;
 
         let expected_data: &[u8] = &[
-            0b01111000, 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000,
-            0b01111001, 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000,
-            0b00000001,
+            0b01111000, 0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b01111001,
+            0b01101001, 0b11100110, 0b11110110, 0b11111010, 0b11110000, 0b00000001,
         ];
         let expected_null_count = 35;
         let result = set_bits(
diff --git a/arrow-cast/Cargo.toml b/arrow-cast/Cargo.toml
index 2758a4817814..19b857297d14 100644
--- a/arrow-cast/Cargo.toml
+++ b/arrow-cast/Cargo.toml
@@ -45,15 +45,17 @@ arrow-buffer = { workspace = true }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
 arrow-select = { workspace = true }
-chrono = { version = "0.4.23", default-features = false, features = ["clock"] }
+chrono = { workspace = true }
 half = { version = "2.1", default-features = false }
 num = { version = "0.4", default-features = false, features = ["std"] }
 lexical-core = { version = "^0.8", default-features = false, features = ["write-integers", "write-floats", "parse-integers", "parse-floats"] }
 comfy-table = { version = "7.0", optional = true, default-features = false }
+base64 = "0.21"
 
 [dev-dependencies]
 criterion = { version = "0.5", default-features = false }
 half = { version = "2.1", default-features = false }
+rand = "0.8"
 
 [build-dependencies]
 
diff --git a/arrow-cast/src/base64.rs b/arrow-cast/src/base64.rs
new file mode 100644
index 000000000000..e109c8112480
--- /dev/null
+++ b/arrow-cast/src/base64.rs
@@ -0,0 +1,117 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Functions for Base64 encoding/decoding
+
+use arrow_array::{Array, GenericBinaryArray, GenericStringArray, OffsetSizeTrait};
+use arrow_buffer::OffsetBuffer;
+use arrow_schema::ArrowError;
+use base64::encoded_len;
+use base64::engine::Config;
+
+pub use base64::prelude::*;
+
+/// Bas64 encode each element of `array` with the provided `engine`
+pub fn b64_encode<E: Engine, O: OffsetSizeTrait>(
+    engine: &E,
+    array: &GenericBinaryArray<O>,
+) -> GenericStringArray<O> {
+    let lengths = array.offsets().windows(2).map(|w| {
+        let len = w[1].as_usize() - w[0].as_usize();
+        encoded_len(len, engine.config().encode_padding()).unwrap()
+    });
+    let offsets = OffsetBuffer::<O>::from_lengths(lengths);
+    let buffer_len = offsets.last().unwrap().as_usize();
+    let mut buffer = vec![0_u8; buffer_len];
+    let mut offset = 0;
+
+    for i in 0..array.len() {
+        let len = engine
+            .encode_slice(array.value(i), &mut buffer[offset..])
+            .unwrap();
+        offset += len;
+    }
+    assert_eq!(offset, buffer_len);
+
+    // Safety: Base64 is valid UTF-8
+    unsafe { GenericStringArray::new_unchecked(offsets, buffer.into(), array.nulls().cloned()) }
+}
+
+/// Base64 decode each element of `array` with the provided `engine`
+pub fn b64_decode<E: Engine, O: OffsetSizeTrait>(
+    engine: &E,
+    array: &GenericBinaryArray<O>,
+) -> Result<GenericBinaryArray<O>, ArrowError> {
+    let estimated_len = array.values().len(); // This is an overestimate
+    let mut buffer = vec![0; estimated_len];
+
+    let mut offsets = Vec::with_capacity(array.len() + 1);
+    offsets.push(O::usize_as(0));
+    let mut offset = 0;
+
+    for v in array.iter() {
+        if let Some(v) = v {
+            let len = engine.decode_slice(v, &mut buffer[offset..]).unwrap();
+            // This cannot overflow as `len` is less than `v.len()` and `a` is valid
+            offset += len;
+        }
+        offsets.push(O::usize_as(offset));
+    }
+
+    // Safety: offsets monotonically increasing by construction
+    let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
+
+    Ok(GenericBinaryArray::new(
+        offsets,
+        buffer.into(),
+        array.nulls().cloned(),
+    ))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow_array::BinaryArray;
+    use base64::prelude::{BASE64_STANDARD, BASE64_STANDARD_NO_PAD};
+    use rand::{thread_rng, Rng};
+
+    fn test_engine<E: Engine>(e: &E, a: &BinaryArray) {
+        let encoded = b64_encode(e, a);
+        encoded.to_data().validate_full().unwrap();
+
+        let to_decode = encoded.into();
+        let decoded = b64_decode(e, &to_decode).unwrap();
+        decoded.to_data().validate_full().unwrap();
+
+        assert_eq!(&decoded, a);
+    }
+
+    #[test]
+    fn test_b64() {
+        let mut rng = thread_rng();
+        let len = rng.gen_range(1024..1050);
+        let data: BinaryArray = (0..len)
+            .map(|_| {
+                let len = rng.gen_range(0..16);
+                Some((0..len).map(|_| rng.gen()).collect::<Vec<u8>>())
+            })
+            .collect();
+
+        test_engine(&BASE64_STANDARD, &data);
+        test_engine(&BASE64_STANDARD_NO_PAD, &data);
+    }
+}
diff --git a/arrow-cast/src/cast.rs b/arrow-cast/src/cast.rs
index 7b8e6144bb49..8facb4f161f4 100644
--- a/arrow-cast/src/cast.rs
+++ b/arrow-cast/src/cast.rs
@@ -46,10 +46,9 @@ use crate::parse::{
     parse_interval_day_time, parse_interval_month_day_nano, parse_interval_year_month,
     string_to_datetime, Parser,
 };
-use arrow_array::{
-    builder::*, cast::*, temporal_conversions::*, timezone::Tz, types::*, *,
-};
-use arrow_buffer::{i256, ArrowNativeType, Buffer, OffsetBuffer};
+use arrow_array::{builder::*, cast::*, temporal_conversions::*, timezone::Tz, types::*, *};
+use arrow_buffer::{i256, ArrowNativeType, OffsetBuffer};
+use arrow_data::transform::MutableArrayData;
 use arrow_data::ArrayData;
 use arrow_schema::*;
 use arrow_select::take::take;
@@ -74,10 +73,9 @@ impl<'a> Default for CastOptions<'a> {
     }
 }
 
-/// Return true if a value of type `from_type` can be cast into a
-/// value of `to_type`. Note that such as cast may be lossy.
+/// Return true if a value of type `from_type` can be cast into a value of `to_type`.
 ///
-/// If this function returns true to stay consistent with the `cast` kernel below.
+/// See [`cast_with_options`] for more information
 pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
     use self::DataType::*;
     use self::IntervalUnit::*;
@@ -125,21 +123,15 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
         }
         (Dictionary(_, value_type), _) => can_cast_types(value_type, to_type),
         (_, Dictionary(_, value_type)) => can_cast_types(from_type, value_type),
-        (LargeList(list_from), LargeList(list_to)) => {
-            can_cast_types(list_from.data_type(), list_to.data_type())
-        }
-        (List(list_from), List(list_to)) => {
+        (List(list_from) | LargeList(list_from), List(list_to) | LargeList(list_to)) => {
             can_cast_types(list_from.data_type(), list_to.data_type())
         }
-        (List(list_from), LargeList(list_to)) => {
-            list_from.data_type() == list_to.data_type()
-        }
-        (LargeList(list_from), List(list_to)) => {
-            list_from.data_type() == list_to.data_type()
-        }
         (List(list_from) | LargeList(list_from), Utf8 | LargeUtf8) => {
             can_cast_types(list_from.data_type(), to_type)
         }
+        (List(list_from) | LargeList(list_from), FixedSizeList(list_to, _)) => {
+            can_cast_types(list_from.data_type(), list_to.data_type())
+        }
         (List(_), _) => false,
         (FixedSizeList(list_from,_), List(list_to)) => {
             list_from.data_type() == list_to.data_type()
@@ -168,17 +160,16 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
         (Decimal128(_, _) | Decimal256(_, _), Utf8 | LargeUtf8) => true,
         // Utf8 to decimal
         (Utf8 | LargeUtf8, Decimal128(_, _) | Decimal256(_, _)) => true,
-        (Decimal128(_, _) | Decimal256(_, _), _) => false,
-        (_, Decimal128(_, _) | Decimal256(_, _)) => false,
         (Struct(_), _) => false,
         (_, Struct(_)) => false,
         (_, Boolean) => {
-            DataType::is_numeric(from_type)
+            DataType::is_integer(from_type) ||
+                DataType::is_floating(from_type)
                 || from_type == &Utf8
                 || from_type == &LargeUtf8
         }
         (Boolean, _) => {
-            DataType::is_numeric(to_type) || to_type == &Utf8 || to_type == &LargeUtf8
+            DataType::is_integer(to_type) || DataType::is_floating(to_type) || to_type == &Utf8 || to_type == &LargeUtf8
         }
 
         (Binary, LargeBinary | Utf8 | LargeUtf8 | FixedSizeBinary(_)) => true,
@@ -205,6 +196,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
         (Utf8 | LargeUtf8, _) => to_type.is_numeric() && to_type != &Float16,
         (_, Utf8 | LargeUtf8) => from_type.is_primitive(),
 
+        (_, Binary | LargeBinary) => from_type.is_integer(),
+
         // start numeric casts
         (
             UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 | Int32 | Int64 | Float32 | Float64,
@@ -227,8 +220,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
         (Time64(_), Time32(to_unit)) => {
             matches!(to_unit, Second | Millisecond)
         }
-        (Timestamp(_, _), Int64) => true,
-        (Int64, Timestamp(_, _)) => true,
+        (Timestamp(_, _), _) if to_type.is_numeric() && to_type != &Float16 => true,
+        (_, Timestamp(_, _)) if from_type.is_numeric() && from_type != &Float16 => true,
         (Date64, Timestamp(_, None)) => true,
         (Date32, Timestamp(_, None)) => true,
         (
@@ -268,30 +261,9 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
     }
 }
 
-/// Cast `array` to the provided data type and return a new Array with
-/// type `to_type`, if possible.
+/// Cast `array` to the provided data type and return a new Array with type `to_type`, if possible.
 ///
-/// Behavior:
-/// * Boolean to Utf8: `true` => '1', `false` => `0`
-/// * Utf8 to boolean: `true`, `yes`, `on`, `1` => `true`, `false`, `no`, `off`, `0` => `false`,
-///   short variants are accepted, other strings return null or error
-/// * Utf8 to numeric: strings that can't be parsed to numbers return null, float strings
-///   in integer casts return null
-/// * Numeric to boolean: 0 returns `false`, any other value returns `true`
-/// * List to List: the underlying data type is cast
-/// * Primitive to List: a list array with 1 value per slot is created
-/// * Date32 and Date64: precision lost when going to higher interval
-/// * Time32 and Time64: precision lost when going to higher interval
-/// * Timestamp and Date{32|64}: precision lost when going to higher interval
-/// * Temporal to/from backing primitive: zero-copy with data type change
-/// * Casting from `float32/float64` to `Decimal(precision, scale)` rounds to the `scale` decimals
-///   (i.e. casting 6.4999 to Decimal(10, 1) becomes 6.5). This is the breaking change from `26.0.0`.
-///   It used to truncate it instead of round (i.e. outputs 6.4 instead)
-///
-/// Unsupported Casts
-/// * To or from `StructArray`
-/// * List to primitive
-/// * Interval and duration
+/// See [`cast_with_options`] for more information
 pub fn cast(array: &dyn Array, to_type: &DataType) -> Result<ArrayRef, ArrowError> {
     cast_with_options(array, to_type, &CastOptions::default())
 }
@@ -364,21 +336,32 @@ where
 
     if cast_options.safe {
         array
-            .unary_opt::<_, Decimal128Type>(|v| (mul * v.as_()).round().to_i128())
+            .unary_opt::<_, Decimal128Type>(|v| {
+                (mul * v.as_())
+                    .round()
+                    .to_i128()
+                    .filter(|v| Decimal128Type::validate_decimal_precision(*v, precision).is_ok())
+            })
             .with_precision_and_scale(precision, scale)
             .map(|a| Arc::new(a) as ArrayRef)
     } else {
         array
             .try_unary::<_, Decimal128Type, _>(|v| {
-                (mul * v.as_()).round().to_i128().ok_or_else(|| {
-                    ArrowError::CastError(format!(
-                        "Cannot cast to {}({}, {}). Overflowing on {:?}",
-                        Decimal128Type::PREFIX,
-                        precision,
-                        scale,
-                        v
-                    ))
-                })
+                (mul * v.as_())
+                    .round()
+                    .to_i128()
+                    .ok_or_else(|| {
+                        ArrowError::CastError(format!(
+                            "Cannot cast to {}({}, {}). Overflowing on {:?}",
+                            Decimal128Type::PREFIX,
+                            precision,
+                            scale,
+                            v
+                        ))
+                    })
+                    .and_then(|v| {
+                        Decimal128Type::validate_decimal_precision(v, precision).map(|_| v)
+                    })
             })?
             .with_precision_and_scale(precision, scale)
             .map(|a| Arc::new(a) as ArrayRef)
@@ -398,21 +381,28 @@ where
 
     if cast_options.safe {
         array
-            .unary_opt::<_, Decimal256Type>(|v| i256::from_f64((v.as_() * mul).round()))
+            .unary_opt::<_, Decimal256Type>(|v| {
+                i256::from_f64((v.as_() * mul).round())
+                    .filter(|v| Decimal256Type::validate_decimal_precision(*v, precision).is_ok())
+            })
             .with_precision_and_scale(precision, scale)
             .map(|a| Arc::new(a) as ArrayRef)
     } else {
         array
             .try_unary::<_, Decimal256Type, _>(|v| {
-                i256::from_f64((v.as_() * mul).round()).ok_or_else(|| {
-                    ArrowError::CastError(format!(
-                        "Cannot cast to {}({}, {}). Overflowing on {:?}",
-                        Decimal256Type::PREFIX,
-                        precision,
-                        scale,
-                        v
-                    ))
-                })
+                i256::from_f64((v.as_() * mul).round())
+                    .ok_or_else(|| {
+                        ArrowError::CastError(format!(
+                            "Cannot cast to {}({}, {}). Overflowing on {:?}",
+                            Decimal256Type::PREFIX,
+                            precision,
+                            scale,
+                            v
+                        ))
+                    })
+                    .and_then(|v| {
+                        Decimal256Type::validate_decimal_precision(v, precision).map(|_| v)
+                    })
             })?
             .with_precision_and_scale(precision, scale)
             .map(|a| Arc::new(a) as ArrayRef)
@@ -473,7 +463,10 @@ fn cast_month_day_nano_to_duration<D: ArrowTemporalType<Native = i64>>(
             .map(|v| {
                 v.map(|v| match v >> 64 {
                     0 => Ok((v as i64) / scale),
-                    _ => Err(ArrowError::ComputeError("Cannot convert interval containing non-zero months or days to duration".to_string()))
+                    _ => Err(ArrowError::ComputeError(
+                        "Cannot convert interval containing non-zero months or days to duration"
+                            .to_string(),
+                    )),
                 })
                 .transpose()
             })
@@ -539,10 +532,7 @@ fn cast_duration_to_interval<D: ArrowTemporalType<Native = i64>>(
 }
 
 /// Cast the primitive array using [`PrimitiveArray::reinterpret_cast`]
-fn cast_reinterpret_arrays<
-    I: ArrowPrimitiveType,
-    O: ArrowPrimitiveType<Native = I::Native>,
->(
+fn cast_reinterpret_arrays<I: ArrowPrimitiveType, O: ArrowPrimitiveType<Native = I::Native>>(
     array: &dyn Array,
 ) -> Result<ArrayRef, ArrowError> {
     Ok(Arc::new(array.as_primitive::<I>().reinterpret_cast::<O>()))
@@ -593,14 +583,13 @@ where
             } else {
                 let v = array.value(i).div_checked(div)?;
 
-                let value =
-                    <T::Native as NumCast>::from::<D::Native>(v).ok_or_else(|| {
-                        ArrowError::CastError(format!(
-                            "value of {:?} is out of range {}",
-                            v,
-                            T::DATA_TYPE
-                        ))
-                    })?;
+                let value = <T::Native as NumCast>::from::<D::Native>(v).ok_or_else(|| {
+                    ArrowError::CastError(format!(
+                        "value of {:?} is out of range {}",
+                        v,
+                        T::DATA_TYPE
+                    ))
+                })?;
 
                 value_builder.append_value(value);
             }
@@ -669,11 +658,11 @@ fn as_time_res_with_timezone<T: ArrowPrimitiveType>(
     })
 }
 
-/// Cast `array` to the provided data type and return a new Array with
-/// type `to_type`, if possible. It accepts `CastOptions` to allow consumers
-/// to configure cast behavior.
+/// Cast `array` to the provided data type and return a new Array with type `to_type`, if possible.
+///
+/// Accepts [`CastOptions`] to specify cast behavior.
 ///
-/// Behavior:
+/// ## Behavior
 /// * Boolean to Utf8: `true` => '1', `false` => `0`
 /// * Utf8 to boolean: `true`, `yes`, `on`, `1` => `true`, `false`, `no`, `off`, `0` => `false`,
 ///   short variants are accepted, other strings return null or error
@@ -681,15 +670,21 @@ fn as_time_res_with_timezone<T: ArrowPrimitiveType>(
 ///   in integer casts return null
 /// * Numeric to boolean: 0 returns `false`, any other value returns `true`
 /// * List to List: the underlying data type is cast
+/// * List to FixedSizeList: the underlying data type is cast. If safe is true and a list element
+/// has the wrong length it will be replaced with NULL, otherwise an error will be returned
 /// * Primitive to List: a list array with 1 value per slot is created
 /// * Date32 and Date64: precision lost when going to higher interval
 /// * Time32 and Time64: precision lost when going to higher interval
 /// * Timestamp and Date{32|64}: precision lost when going to higher interval
 /// * Temporal to/from backing primitive: zero-copy with data type change
+/// * Casting from `float32/float64` to `Decimal(precision, scale)` rounds to the `scale` decimals
+///   (i.e. casting `6.4999` to Decimal(10, 1) becomes `6.5`). Prior to  version `26.0.0`,
+///   casting would truncate instead (i.e. outputs `6.4` instead)
 ///
 /// Unsupported Casts
 /// * To or from `StructArray`
 /// * List to primitive
+/// * Interval and duration
 pub fn cast_with_options(
     array: &dyn Array,
     to_type: &DataType,
@@ -760,29 +755,17 @@ pub fn cast_with_options(
                 "Casting from type {from_type:?} to dictionary type {to_type:?} not supported",
             ))),
         },
-        (List(_), List(ref to)) => {
-            cast_list_inner::<i32>(array, to, to_type, cast_options)
-        }
-        (LargeList(_), LargeList(ref to)) => {
-            cast_list_inner::<i64>(array, to, to_type, cast_options)
-        }
-        (List(list_from), LargeList(list_to)) => {
-            if list_to.data_type() != list_from.data_type() {
-                Err(ArrowError::CastError(
-                    "cannot cast list to large-list with different child data".into(),
-                ))
-            } else {
-                cast_list_container::<i32, i64>(array, cast_options)
-            }
-        }
-        (LargeList(list_from), List(list_to)) => {
-            if list_to.data_type() != list_from.data_type() {
-                Err(ArrowError::CastError(
-                    "cannot cast large-list to list with different child data".into(),
-                ))
-            } else {
-                cast_list_container::<i64, i32>(array, cast_options)
-            }
+        (List(_), List(to)) => cast_list_values::<i32>(array, to, cast_options),
+        (LargeList(_), LargeList(to)) => cast_list_values::<i64>(array, to, cast_options),
+        (List(_), LargeList(list_to)) => cast_list::<i32, i64>(array, list_to, cast_options),
+        (LargeList(_), List(list_to)) => cast_list::<i64, i32>(array, list_to, cast_options),
+        (List(_), FixedSizeList(field, size)) => {
+            let array = array.as_list::<i32>();
+            cast_list_to_fixed_size_list::<i32>(array, field, *size, cast_options)
+        }
+        (LargeList(_), FixedSizeList(field, size)) => {
+            let array = array.as_list::<i64>();
+            cast_list_to_fixed_size_list::<i64>(array, field, *size, cast_options)
         }
         (List(_) | LargeList(_), _) => match to_type {
             Utf8 => value_to_string::<i32>(array, cast_options),
@@ -809,7 +792,6 @@ pub fn cast_with_options(
                 cast_fixed_size_list_to_list::<i64>(array)
             }
         }
-
         (_, List(ref to)) => cast_values_to_list::<i32>(array, to, cast_options),
         (_, LargeList(ref to)) => cast_values_to_list::<i64>(array, to, cast_options),
         (Decimal128(_, s1), Decimal128(p2, s2)) => {
@@ -848,7 +830,7 @@ pub fn cast_with_options(
                 cast_options,
             )
         }
-        (Decimal128(_, scale), _) => {
+        (Decimal128(_, scale), _) if !to_type.is_temporal() => {
             // cast decimal to other type
             match to_type {
                 UInt8 => cast_decimal_to_integer::<Decimal128Type, UInt8Type>(
@@ -899,16 +881,12 @@ pub fn cast_with_options(
                     *scale,
                     cast_options,
                 ),
-                Float32 => {
-                    cast_decimal_to_float::<Decimal128Type, Float32Type, _>(array, |x| {
-                        (x as f64 / 10_f64.powi(*scale as i32)) as f32
-                    })
-                }
-                Float64 => {
-                    cast_decimal_to_float::<Decimal128Type, Float64Type, _>(array, |x| {
-                        x as f64 / 10_f64.powi(*scale as i32)
-                    })
-                }
+                Float32 => cast_decimal_to_float::<Decimal128Type, Float32Type, _>(array, |x| {
+                    (x as f64 / 10_f64.powi(*scale as i32)) as f32
+                }),
+                Float64 => cast_decimal_to_float::<Decimal128Type, Float64Type, _>(array, |x| {
+                    x as f64 / 10_f64.powi(*scale as i32)
+                }),
                 Utf8 => value_to_string::<i32>(array, cast_options),
                 LargeUtf8 => value_to_string::<i64>(array, cast_options),
                 Null => Ok(new_null_array(to_type, array.len())),
@@ -917,7 +895,7 @@ pub fn cast_with_options(
                 ))),
             }
         }
-        (Decimal256(_, scale), _) => {
+        (Decimal256(_, scale), _) if !to_type.is_temporal() => {
             // cast decimal to other type
             match to_type {
                 UInt8 => cast_decimal_to_integer::<Decimal256Type, UInt8Type>(
@@ -968,16 +946,12 @@ pub fn cast_with_options(
                     *scale,
                     cast_options,
                 ),
-                Float32 => {
-                    cast_decimal_to_float::<Decimal256Type, Float32Type, _>(array, |x| {
-                        (x.to_f64().unwrap() / 10_f64.powi(*scale as i32)) as f32
-                    })
-                }
-                Float64 => {
-                    cast_decimal_to_float::<Decimal256Type, Float64Type, _>(array, |x| {
-                        x.to_f64().unwrap() / 10_f64.powi(*scale as i32)
-                    })
-                }
+                Float32 => cast_decimal_to_float::<Decimal256Type, Float32Type, _>(array, |x| {
+                    (x.to_f64().unwrap() / 10_f64.powi(*scale as i32)) as f32
+                }),
+                Float64 => cast_decimal_to_float::<Decimal256Type, Float64Type, _>(array, |x| {
+                    x.to_f64().unwrap() / 10_f64.powi(*scale as i32)
+                }),
                 Utf8 => value_to_string::<i32>(array, cast_options),
                 LargeUtf8 => value_to_string::<i64>(array, cast_options),
                 Null => Ok(new_null_array(to_type, array.len())),
@@ -986,7 +960,7 @@ pub fn cast_with_options(
                 ))),
             }
         }
-        (_, Decimal128(precision, scale)) => {
+        (_, Decimal128(precision, scale)) if !from_type.is_temporal() => {
             // cast data to decimal
             match from_type {
                 UInt8 => cast_integer_to_decimal::<_, Decimal128Type, _>(
@@ -1075,7 +1049,7 @@ pub fn cast_with_options(
                 ))),
             }
         }
-        (_, Decimal256(precision, scale)) => {
+        (_, Decimal256(precision, scale)) if !from_type.is_temporal() => {
             // cast data to decimal
             match from_type {
                 UInt8 => cast_integer_to_decimal::<_, Decimal256Type, _>(
@@ -1219,25 +1193,35 @@ pub fn cast_with_options(
             Float64 => parse_string::<Float64Type, i32>(array, cast_options),
             Date32 => parse_string::<Date32Type, i32>(array, cast_options),
             Date64 => parse_string::<Date64Type, i32>(array, cast_options),
-            Binary => Ok(Arc::new(BinaryArray::from(array.as_string::<i32>().clone()))),
+            Binary => Ok(Arc::new(BinaryArray::from(
+                array.as_string::<i32>().clone(),
+            ))),
             LargeBinary => {
                 let binary = BinaryArray::from(array.as_string::<i32>().clone());
                 cast_byte_container::<BinaryType, LargeBinaryType>(&binary)
             }
             LargeUtf8 => cast_byte_container::<Utf8Type, LargeUtf8Type>(array),
             Time32(TimeUnit::Second) => parse_string::<Time32SecondType, i32>(array, cast_options),
-            Time32(TimeUnit::Millisecond) => parse_string::<Time32MillisecondType, i32>(array, cast_options),
-            Time64(TimeUnit::Microsecond) => parse_string::<Time64MicrosecondType, i32>(array, cast_options),
-            Time64(TimeUnit::Nanosecond) => parse_string::<Time64NanosecondType, i32>(array, cast_options),
-            Timestamp(TimeUnit::Second, to_tz) => {
-                cast_string_to_timestamp::<i32, TimestampSecondType>(array, to_tz, cast_options)
+            Time32(TimeUnit::Millisecond) => {
+                parse_string::<Time32MillisecondType, i32>(array, cast_options)
             }
-            Timestamp(TimeUnit::Millisecond, to_tz) => {
-                cast_string_to_timestamp::<i32, TimestampMillisecondType>(array, to_tz, cast_options)
+            Time64(TimeUnit::Microsecond) => {
+                parse_string::<Time64MicrosecondType, i32>(array, cast_options)
             }
-            Timestamp(TimeUnit::Microsecond, to_tz) => {
-                cast_string_to_timestamp::<i32, TimestampMicrosecondType>(array, to_tz, cast_options)
+            Time64(TimeUnit::Nanosecond) => {
+                parse_string::<Time64NanosecondType, i32>(array, cast_options)
             }
+            Timestamp(TimeUnit::Second, to_tz) => {
+                cast_string_to_timestamp::<i32, TimestampSecondType>(array, to_tz, cast_options)
+            }
+            Timestamp(TimeUnit::Millisecond, to_tz) => cast_string_to_timestamp::<
+                i32,
+                TimestampMillisecondType,
+            >(array, to_tz, cast_options),
+            Timestamp(TimeUnit::Microsecond, to_tz) => cast_string_to_timestamp::<
+                i32,
+                TimestampMicrosecondType,
+            >(array, to_tz, cast_options),
             Timestamp(TimeUnit::Nanosecond, to_tz) => {
                 cast_string_to_timestamp::<i32, TimestampNanosecondType>(array, to_tz, cast_options)
             }
@@ -1269,26 +1253,33 @@ pub fn cast_with_options(
             Date64 => parse_string::<Date64Type, i64>(array, cast_options),
             Utf8 => cast_byte_container::<LargeUtf8Type, Utf8Type>(array),
             Binary => {
-                let large_binary =
-                    LargeBinaryArray::from(array.as_string::<i64>().clone());
+                let large_binary = LargeBinaryArray::from(array.as_string::<i64>().clone());
                 cast_byte_container::<LargeBinaryType, BinaryType>(&large_binary)
             }
             LargeBinary => Ok(Arc::new(LargeBinaryArray::from(
                 array.as_string::<i64>().clone(),
             ))),
             Time32(TimeUnit::Second) => parse_string::<Time32SecondType, i64>(array, cast_options),
-            Time32(TimeUnit::Millisecond) => parse_string::<Time32MillisecondType, i64>(array, cast_options),
-            Time64(TimeUnit::Microsecond) => parse_string::<Time64MicrosecondType, i64>(array, cast_options),
-            Time64(TimeUnit::Nanosecond) => parse_string::<Time64NanosecondType, i64>(array, cast_options),
-            Timestamp(TimeUnit::Second, to_tz) => {
-                cast_string_to_timestamp::<i64, TimestampSecondType>(array, to_tz, cast_options)
+            Time32(TimeUnit::Millisecond) => {
+                parse_string::<Time32MillisecondType, i64>(array, cast_options)
             }
-            Timestamp(TimeUnit::Millisecond, to_tz) => {
-                cast_string_to_timestamp::<i64, TimestampMillisecondType>(array, to_tz, cast_options)
+            Time64(TimeUnit::Microsecond) => {
+                parse_string::<Time64MicrosecondType, i64>(array, cast_options)
             }
-            Timestamp(TimeUnit::Microsecond, to_tz) => {
-                cast_string_to_timestamp::<i64, TimestampMicrosecondType>(array, to_tz, cast_options)
+            Time64(TimeUnit::Nanosecond) => {
+                parse_string::<Time64NanosecondType, i64>(array, cast_options)
             }
+            Timestamp(TimeUnit::Second, to_tz) => {
+                cast_string_to_timestamp::<i64, TimestampSecondType>(array, to_tz, cast_options)
+            }
+            Timestamp(TimeUnit::Millisecond, to_tz) => cast_string_to_timestamp::<
+                i64,
+                TimestampMillisecondType,
+            >(array, to_tz, cast_options),
+            Timestamp(TimeUnit::Microsecond, to_tz) => cast_string_to_timestamp::<
+                i64,
+                TimestampMicrosecondType,
+            >(array, to_tz, cast_options),
             Timestamp(TimeUnit::Nanosecond, to_tz) => {
                 cast_string_to_timestamp::<i64, TimestampNanosecondType>(array, to_tz, cast_options)
             }
@@ -1311,9 +1302,7 @@ pub fn cast_with_options(
                 let array = cast_binary_to_string::<i32>(array, cast_options)?;
                 cast_byte_container::<Utf8Type, LargeUtf8Type>(array.as_ref())
             }
-            LargeBinary => {
-                cast_byte_container::<BinaryType, LargeBinaryType>(array)
-            }
+            LargeBinary => cast_byte_container::<BinaryType, LargeBinaryType>(array),
             FixedSizeBinary(size) => {
                 cast_binary_to_fixed_size_binary::<i32>(array, *size, cast_options)
             }
@@ -1337,278 +1326,139 @@ pub fn cast_with_options(
         },
         (FixedSizeBinary(size), _) => match to_type {
             Binary => cast_fixed_size_binary_to_binary::<i32>(array, *size),
-            LargeBinary =>
-                cast_fixed_size_binary_to_binary::<i64>(array, *size),
+            LargeBinary => cast_fixed_size_binary_to_binary::<i64>(array, *size),
             _ => Err(ArrowError::CastError(format!(
                 "Casting from {from_type:?} to {to_type:?} not supported",
             ))),
         },
-        (from_type, LargeUtf8) if from_type.is_primitive() => value_to_string::<i64>(array, cast_options),
-        (from_type, Utf8) if from_type.is_primitive() => value_to_string::<i32>(array, cast_options),
+        (from_type, LargeUtf8) if from_type.is_primitive() => {
+            value_to_string::<i64>(array, cast_options)
+        }
+        (from_type, Utf8) if from_type.is_primitive() => {
+            value_to_string::<i32>(array, cast_options)
+        }
+        (from_type, Binary) if from_type.is_integer() => match from_type {
+            UInt8 => cast_numeric_to_binary::<UInt8Type, i32>(array),
+            UInt16 => cast_numeric_to_binary::<UInt16Type, i32>(array),
+            UInt32 => cast_numeric_to_binary::<UInt32Type, i32>(array),
+            UInt64 => cast_numeric_to_binary::<UInt64Type, i32>(array),
+            Int8 => cast_numeric_to_binary::<Int8Type, i32>(array),
+            Int16 => cast_numeric_to_binary::<Int16Type, i32>(array),
+            Int32 => cast_numeric_to_binary::<Int32Type, i32>(array),
+            Int64 => cast_numeric_to_binary::<Int64Type, i32>(array),
+            _ => unreachable!(),
+        },
+        (from_type, LargeBinary) if from_type.is_integer() => match from_type {
+            UInt8 => cast_numeric_to_binary::<UInt8Type, i64>(array),
+            UInt16 => cast_numeric_to_binary::<UInt16Type, i64>(array),
+            UInt32 => cast_numeric_to_binary::<UInt32Type, i64>(array),
+            UInt64 => cast_numeric_to_binary::<UInt64Type, i64>(array),
+            Int8 => cast_numeric_to_binary::<Int8Type, i64>(array),
+            Int16 => cast_numeric_to_binary::<Int16Type, i64>(array),
+            Int32 => cast_numeric_to_binary::<Int32Type, i64>(array),
+            Int64 => cast_numeric_to_binary::<Int64Type, i64>(array),
+            _ => unreachable!(),
+        },
         // start numeric casts
-        (UInt8, UInt16) => {
-            cast_numeric_arrays::<UInt8Type, UInt16Type>(array, cast_options)
-        }
-        (UInt8, UInt32) => {
-            cast_numeric_arrays::<UInt8Type, UInt32Type>(array, cast_options)
-        }
-        (UInt8, UInt64) => {
-            cast_numeric_arrays::<UInt8Type, UInt64Type>(array, cast_options)
-        }
+        (UInt8, UInt16) => cast_numeric_arrays::<UInt8Type, UInt16Type>(array, cast_options),
+        (UInt8, UInt32) => cast_numeric_arrays::<UInt8Type, UInt32Type>(array, cast_options),
+        (UInt8, UInt64) => cast_numeric_arrays::<UInt8Type, UInt64Type>(array, cast_options),
         (UInt8, Int8) => cast_numeric_arrays::<UInt8Type, Int8Type>(array, cast_options),
-        (UInt8, Int16) => {
-            cast_numeric_arrays::<UInt8Type, Int16Type>(array, cast_options)
-        }
-        (UInt8, Int32) => {
-            cast_numeric_arrays::<UInt8Type, Int32Type>(array, cast_options)
-        }
-        (UInt8, Int64) => {
-            cast_numeric_arrays::<UInt8Type, Int64Type>(array, cast_options)
-        }
-        (UInt8, Float32) => {
-            cast_numeric_arrays::<UInt8Type, Float32Type>(array, cast_options)
-        }
-        (UInt8, Float64) => {
-            cast_numeric_arrays::<UInt8Type, Float64Type>(array, cast_options)
-        }
-
-        (UInt16, UInt8) => {
-            cast_numeric_arrays::<UInt16Type, UInt8Type>(array, cast_options)
-        }
-        (UInt16, UInt32) => {
-            cast_numeric_arrays::<UInt16Type, UInt32Type>(array, cast_options)
-        }
-        (UInt16, UInt64) => {
-            cast_numeric_arrays::<UInt16Type, UInt64Type>(array, cast_options)
-        }
-        (UInt16, Int8) => {
-            cast_numeric_arrays::<UInt16Type, Int8Type>(array, cast_options)
-        }
-        (UInt16, Int16) => {
-            cast_numeric_arrays::<UInt16Type, Int16Type>(array, cast_options)
-        }
-        (UInt16, Int32) => {
-            cast_numeric_arrays::<UInt16Type, Int32Type>(array, cast_options)
-        }
-        (UInt16, Int64) => {
-            cast_numeric_arrays::<UInt16Type, Int64Type>(array, cast_options)
-        }
-        (UInt16, Float32) => {
-            cast_numeric_arrays::<UInt16Type, Float32Type>(array, cast_options)
-        }
-        (UInt16, Float64) => {
-            cast_numeric_arrays::<UInt16Type, Float64Type>(array, cast_options)
-        }
-
-        (UInt32, UInt8) => {
-            cast_numeric_arrays::<UInt32Type, UInt8Type>(array, cast_options)
-        }
-        (UInt32, UInt16) => {
-            cast_numeric_arrays::<UInt32Type, UInt16Type>(array, cast_options)
-        }
-        (UInt32, UInt64) => {
-            cast_numeric_arrays::<UInt32Type, UInt64Type>(array, cast_options)
-        }
-        (UInt32, Int8) => {
-            cast_numeric_arrays::<UInt32Type, Int8Type>(array, cast_options)
-        }
-        (UInt32, Int16) => {
-            cast_numeric_arrays::<UInt32Type, Int16Type>(array, cast_options)
-        }
-        (UInt32, Int32) => {
-            cast_numeric_arrays::<UInt32Type, Int32Type>(array, cast_options)
-        }
-        (UInt32, Int64) => {
-            cast_numeric_arrays::<UInt32Type, Int64Type>(array, cast_options)
-        }
-        (UInt32, Float32) => {
-            cast_numeric_arrays::<UInt32Type, Float32Type>(array, cast_options)
-        }
-        (UInt32, Float64) => {
-            cast_numeric_arrays::<UInt32Type, Float64Type>(array, cast_options)
-        }
-
-        (UInt64, UInt8) => {
-            cast_numeric_arrays::<UInt64Type, UInt8Type>(array, cast_options)
-        }
-        (UInt64, UInt16) => {
-            cast_numeric_arrays::<UInt64Type, UInt16Type>(array, cast_options)
-        }
-        (UInt64, UInt32) => {
-            cast_numeric_arrays::<UInt64Type, UInt32Type>(array, cast_options)
-        }
-        (UInt64, Int8) => {
-            cast_numeric_arrays::<UInt64Type, Int8Type>(array, cast_options)
-        }
-        (UInt64, Int16) => {
-            cast_numeric_arrays::<UInt64Type, Int16Type>(array, cast_options)
-        }
-        (UInt64, Int32) => {
-            cast_numeric_arrays::<UInt64Type, Int32Type>(array, cast_options)
-        }
-        (UInt64, Int64) => {
-            cast_numeric_arrays::<UInt64Type, Int64Type>(array, cast_options)
-        }
-        (UInt64, Float32) => {
-            cast_numeric_arrays::<UInt64Type, Float32Type>(array, cast_options)
-        }
-        (UInt64, Float64) => {
-            cast_numeric_arrays::<UInt64Type, Float64Type>(array, cast_options)
-        }
+        (UInt8, Int16) => cast_numeric_arrays::<UInt8Type, Int16Type>(array, cast_options),
+        (UInt8, Int32) => cast_numeric_arrays::<UInt8Type, Int32Type>(array, cast_options),
+        (UInt8, Int64) => cast_numeric_arrays::<UInt8Type, Int64Type>(array, cast_options),
+        (UInt8, Float32) => cast_numeric_arrays::<UInt8Type, Float32Type>(array, cast_options),
+        (UInt8, Float64) => cast_numeric_arrays::<UInt8Type, Float64Type>(array, cast_options),
+
+        (UInt16, UInt8) => cast_numeric_arrays::<UInt16Type, UInt8Type>(array, cast_options),
+        (UInt16, UInt32) => cast_numeric_arrays::<UInt16Type, UInt32Type>(array, cast_options),
+        (UInt16, UInt64) => cast_numeric_arrays::<UInt16Type, UInt64Type>(array, cast_options),
+        (UInt16, Int8) => cast_numeric_arrays::<UInt16Type, Int8Type>(array, cast_options),
+        (UInt16, Int16) => cast_numeric_arrays::<UInt16Type, Int16Type>(array, cast_options),
+        (UInt16, Int32) => cast_numeric_arrays::<UInt16Type, Int32Type>(array, cast_options),
+        (UInt16, Int64) => cast_numeric_arrays::<UInt16Type, Int64Type>(array, cast_options),
+        (UInt16, Float32) => cast_numeric_arrays::<UInt16Type, Float32Type>(array, cast_options),
+        (UInt16, Float64) => cast_numeric_arrays::<UInt16Type, Float64Type>(array, cast_options),
+
+        (UInt32, UInt8) => cast_numeric_arrays::<UInt32Type, UInt8Type>(array, cast_options),
+        (UInt32, UInt16) => cast_numeric_arrays::<UInt32Type, UInt16Type>(array, cast_options),
+        (UInt32, UInt64) => cast_numeric_arrays::<UInt32Type, UInt64Type>(array, cast_options),
+        (UInt32, Int8) => cast_numeric_arrays::<UInt32Type, Int8Type>(array, cast_options),
+        (UInt32, Int16) => cast_numeric_arrays::<UInt32Type, Int16Type>(array, cast_options),
+        (UInt32, Int32) => cast_numeric_arrays::<UInt32Type, Int32Type>(array, cast_options),
+        (UInt32, Int64) => cast_numeric_arrays::<UInt32Type, Int64Type>(array, cast_options),
+        (UInt32, Float32) => cast_numeric_arrays::<UInt32Type, Float32Type>(array, cast_options),
+        (UInt32, Float64) => cast_numeric_arrays::<UInt32Type, Float64Type>(array, cast_options),
+
+        (UInt64, UInt8) => cast_numeric_arrays::<UInt64Type, UInt8Type>(array, cast_options),
+        (UInt64, UInt16) => cast_numeric_arrays::<UInt64Type, UInt16Type>(array, cast_options),
+        (UInt64, UInt32) => cast_numeric_arrays::<UInt64Type, UInt32Type>(array, cast_options),
+        (UInt64, Int8) => cast_numeric_arrays::<UInt64Type, Int8Type>(array, cast_options),
+        (UInt64, Int16) => cast_numeric_arrays::<UInt64Type, Int16Type>(array, cast_options),
+        (UInt64, Int32) => cast_numeric_arrays::<UInt64Type, Int32Type>(array, cast_options),
+        (UInt64, Int64) => cast_numeric_arrays::<UInt64Type, Int64Type>(array, cast_options),
+        (UInt64, Float32) => cast_numeric_arrays::<UInt64Type, Float32Type>(array, cast_options),
+        (UInt64, Float64) => cast_numeric_arrays::<UInt64Type, Float64Type>(array, cast_options),
 
         (Int8, UInt8) => cast_numeric_arrays::<Int8Type, UInt8Type>(array, cast_options),
-        (Int8, UInt16) => {
-            cast_numeric_arrays::<Int8Type, UInt16Type>(array, cast_options)
-        }
-        (Int8, UInt32) => {
-            cast_numeric_arrays::<Int8Type, UInt32Type>(array, cast_options)
-        }
-        (Int8, UInt64) => {
-            cast_numeric_arrays::<Int8Type, UInt64Type>(array, cast_options)
-        }
+        (Int8, UInt16) => cast_numeric_arrays::<Int8Type, UInt16Type>(array, cast_options),
+        (Int8, UInt32) => cast_numeric_arrays::<Int8Type, UInt32Type>(array, cast_options),
+        (Int8, UInt64) => cast_numeric_arrays::<Int8Type, UInt64Type>(array, cast_options),
         (Int8, Int16) => cast_numeric_arrays::<Int8Type, Int16Type>(array, cast_options),
         (Int8, Int32) => cast_numeric_arrays::<Int8Type, Int32Type>(array, cast_options),
         (Int8, Int64) => cast_numeric_arrays::<Int8Type, Int64Type>(array, cast_options),
-        (Int8, Float32) => {
-            cast_numeric_arrays::<Int8Type, Float32Type>(array, cast_options)
-        }
-        (Int8, Float64) => {
-            cast_numeric_arrays::<Int8Type, Float64Type>(array, cast_options)
-        }
+        (Int8, Float32) => cast_numeric_arrays::<Int8Type, Float32Type>(array, cast_options),
+        (Int8, Float64) => cast_numeric_arrays::<Int8Type, Float64Type>(array, cast_options),
 
-        (Int16, UInt8) => {
-            cast_numeric_arrays::<Int16Type, UInt8Type>(array, cast_options)
-        }
-        (Int16, UInt16) => {
-            cast_numeric_arrays::<Int16Type, UInt16Type>(array, cast_options)
-        }
-        (Int16, UInt32) => {
-            cast_numeric_arrays::<Int16Type, UInt32Type>(array, cast_options)
-        }
-        (Int16, UInt64) => {
-            cast_numeric_arrays::<Int16Type, UInt64Type>(array, cast_options)
-        }
+        (Int16, UInt8) => cast_numeric_arrays::<Int16Type, UInt8Type>(array, cast_options),
+        (Int16, UInt16) => cast_numeric_arrays::<Int16Type, UInt16Type>(array, cast_options),
+        (Int16, UInt32) => cast_numeric_arrays::<Int16Type, UInt32Type>(array, cast_options),
+        (Int16, UInt64) => cast_numeric_arrays::<Int16Type, UInt64Type>(array, cast_options),
         (Int16, Int8) => cast_numeric_arrays::<Int16Type, Int8Type>(array, cast_options),
-        (Int16, Int32) => {
-            cast_numeric_arrays::<Int16Type, Int32Type>(array, cast_options)
-        }
-        (Int16, Int64) => {
-            cast_numeric_arrays::<Int16Type, Int64Type>(array, cast_options)
-        }
-        (Int16, Float32) => {
-            cast_numeric_arrays::<Int16Type, Float32Type>(array, cast_options)
-        }
-        (Int16, Float64) => {
-            cast_numeric_arrays::<Int16Type, Float64Type>(array, cast_options)
-        }
-
-        (Int32, UInt8) => {
-            cast_numeric_arrays::<Int32Type, UInt8Type>(array, cast_options)
-        }
-        (Int32, UInt16) => {
-            cast_numeric_arrays::<Int32Type, UInt16Type>(array, cast_options)
-        }
-        (Int32, UInt32) => {
-            cast_numeric_arrays::<Int32Type, UInt32Type>(array, cast_options)
-        }
-        (Int32, UInt64) => {
-            cast_numeric_arrays::<Int32Type, UInt64Type>(array, cast_options)
-        }
+        (Int16, Int32) => cast_numeric_arrays::<Int16Type, Int32Type>(array, cast_options),
+        (Int16, Int64) => cast_numeric_arrays::<Int16Type, Int64Type>(array, cast_options),
+        (Int16, Float32) => cast_numeric_arrays::<Int16Type, Float32Type>(array, cast_options),
+        (Int16, Float64) => cast_numeric_arrays::<Int16Type, Float64Type>(array, cast_options),
+
+        (Int32, UInt8) => cast_numeric_arrays::<Int32Type, UInt8Type>(array, cast_options),
+        (Int32, UInt16) => cast_numeric_arrays::<Int32Type, UInt16Type>(array, cast_options),
+        (Int32, UInt32) => cast_numeric_arrays::<Int32Type, UInt32Type>(array, cast_options),
+        (Int32, UInt64) => cast_numeric_arrays::<Int32Type, UInt64Type>(array, cast_options),
         (Int32, Int8) => cast_numeric_arrays::<Int32Type, Int8Type>(array, cast_options),
-        (Int32, Int16) => {
-            cast_numeric_arrays::<Int32Type, Int16Type>(array, cast_options)
-        }
-        (Int32, Int64) => {
-            cast_numeric_arrays::<Int32Type, Int64Type>(array, cast_options)
-        }
-        (Int32, Float32) => {
-            cast_numeric_arrays::<Int32Type, Float32Type>(array, cast_options)
-        }
-        (Int32, Float64) => {
-            cast_numeric_arrays::<Int32Type, Float64Type>(array, cast_options)
-        }
-
-        (Int64, UInt8) => {
-            cast_numeric_arrays::<Int64Type, UInt8Type>(array, cast_options)
-        }
-        (Int64, UInt16) => {
-            cast_numeric_arrays::<Int64Type, UInt16Type>(array, cast_options)
-        }
-        (Int64, UInt32) => {
-            cast_numeric_arrays::<Int64Type, UInt32Type>(array, cast_options)
-        }
-        (Int64, UInt64) => {
-            cast_numeric_arrays::<Int64Type, UInt64Type>(array, cast_options)
-        }
+        (Int32, Int16) => cast_numeric_arrays::<Int32Type, Int16Type>(array, cast_options),
+        (Int32, Int64) => cast_numeric_arrays::<Int32Type, Int64Type>(array, cast_options),
+        (Int32, Float32) => cast_numeric_arrays::<Int32Type, Float32Type>(array, cast_options),
+        (Int32, Float64) => cast_numeric_arrays::<Int32Type, Float64Type>(array, cast_options),
+
+        (Int64, UInt8) => cast_numeric_arrays::<Int64Type, UInt8Type>(array, cast_options),
+        (Int64, UInt16) => cast_numeric_arrays::<Int64Type, UInt16Type>(array, cast_options),
+        (Int64, UInt32) => cast_numeric_arrays::<Int64Type, UInt32Type>(array, cast_options),
+        (Int64, UInt64) => cast_numeric_arrays::<Int64Type, UInt64Type>(array, cast_options),
         (Int64, Int8) => cast_numeric_arrays::<Int64Type, Int8Type>(array, cast_options),
-        (Int64, Int16) => {
-            cast_numeric_arrays::<Int64Type, Int16Type>(array, cast_options)
-        }
-        (Int64, Int32) => {
-            cast_numeric_arrays::<Int64Type, Int32Type>(array, cast_options)
-        }
-        (Int64, Float32) => {
-            cast_numeric_arrays::<Int64Type, Float32Type>(array, cast_options)
-        }
-        (Int64, Float64) => {
-            cast_numeric_arrays::<Int64Type, Float64Type>(array, cast_options)
-        }
-
-        (Float32, UInt8) => {
-            cast_numeric_arrays::<Float32Type, UInt8Type>(array, cast_options)
-        }
-        (Float32, UInt16) => {
-            cast_numeric_arrays::<Float32Type, UInt16Type>(array, cast_options)
-        }
-        (Float32, UInt32) => {
-            cast_numeric_arrays::<Float32Type, UInt32Type>(array, cast_options)
-        }
-        (Float32, UInt64) => {
-            cast_numeric_arrays::<Float32Type, UInt64Type>(array, cast_options)
-        }
-        (Float32, Int8) => {
-            cast_numeric_arrays::<Float32Type, Int8Type>(array, cast_options)
-        }
-        (Float32, Int16) => {
-            cast_numeric_arrays::<Float32Type, Int16Type>(array, cast_options)
-        }
-        (Float32, Int32) => {
-            cast_numeric_arrays::<Float32Type, Int32Type>(array, cast_options)
-        }
-        (Float32, Int64) => {
-            cast_numeric_arrays::<Float32Type, Int64Type>(array, cast_options)
-        }
-        (Float32, Float64) => {
-            cast_numeric_arrays::<Float32Type, Float64Type>(array, cast_options)
-        }
-
-        (Float64, UInt8) => {
-            cast_numeric_arrays::<Float64Type, UInt8Type>(array, cast_options)
-        }
-        (Float64, UInt16) => {
-            cast_numeric_arrays::<Float64Type, UInt16Type>(array, cast_options)
-        }
-        (Float64, UInt32) => {
-            cast_numeric_arrays::<Float64Type, UInt32Type>(array, cast_options)
-        }
-        (Float64, UInt64) => {
-            cast_numeric_arrays::<Float64Type, UInt64Type>(array, cast_options)
-        }
-        (Float64, Int8) => {
-            cast_numeric_arrays::<Float64Type, Int8Type>(array, cast_options)
-        }
-        (Float64, Int16) => {
-            cast_numeric_arrays::<Float64Type, Int16Type>(array, cast_options)
-        }
-        (Float64, Int32) => {
-            cast_numeric_arrays::<Float64Type, Int32Type>(array, cast_options)
-        }
-        (Float64, Int64) => {
-            cast_numeric_arrays::<Float64Type, Int64Type>(array, cast_options)
-        }
-        (Float64, Float32) => {
-            cast_numeric_arrays::<Float64Type, Float32Type>(array, cast_options)
-        }
+        (Int64, Int16) => cast_numeric_arrays::<Int64Type, Int16Type>(array, cast_options),
+        (Int64, Int32) => cast_numeric_arrays::<Int64Type, Int32Type>(array, cast_options),
+        (Int64, Float32) => cast_numeric_arrays::<Int64Type, Float32Type>(array, cast_options),
+        (Int64, Float64) => cast_numeric_arrays::<Int64Type, Float64Type>(array, cast_options),
+
+        (Float32, UInt8) => cast_numeric_arrays::<Float32Type, UInt8Type>(array, cast_options),
+        (Float32, UInt16) => cast_numeric_arrays::<Float32Type, UInt16Type>(array, cast_options),
+        (Float32, UInt32) => cast_numeric_arrays::<Float32Type, UInt32Type>(array, cast_options),
+        (Float32, UInt64) => cast_numeric_arrays::<Float32Type, UInt64Type>(array, cast_options),
+        (Float32, Int8) => cast_numeric_arrays::<Float32Type, Int8Type>(array, cast_options),
+        (Float32, Int16) => cast_numeric_arrays::<Float32Type, Int16Type>(array, cast_options),
+        (Float32, Int32) => cast_numeric_arrays::<Float32Type, Int32Type>(array, cast_options),
+        (Float32, Int64) => cast_numeric_arrays::<Float32Type, Int64Type>(array, cast_options),
+        (Float32, Float64) => cast_numeric_arrays::<Float32Type, Float64Type>(array, cast_options),
+
+        (Float64, UInt8) => cast_numeric_arrays::<Float64Type, UInt8Type>(array, cast_options),
+        (Float64, UInt16) => cast_numeric_arrays::<Float64Type, UInt16Type>(array, cast_options),
+        (Float64, UInt32) => cast_numeric_arrays::<Float64Type, UInt32Type>(array, cast_options),
+        (Float64, UInt64) => cast_numeric_arrays::<Float64Type, UInt64Type>(array, cast_options),
+        (Float64, Int8) => cast_numeric_arrays::<Float64Type, Int8Type>(array, cast_options),
+        (Float64, Int16) => cast_numeric_arrays::<Float64Type, Int16Type>(array, cast_options),
+        (Float64, Int32) => cast_numeric_arrays::<Float64Type, Int32Type>(array, cast_options),
+        (Float64, Int64) => cast_numeric_arrays::<Float64Type, Int64Type>(array, cast_options),
+        (Float64, Float32) => cast_numeric_arrays::<Float64Type, Float32Type>(array, cast_options),
         // end numeric casts
 
         // temporal casts
@@ -1664,92 +1514,106 @@ pub fn cast_with_options(
             cast_reinterpret_arrays::<Time64NanosecondType, Int64Type>(array)
         }
         (Date32, Date64) => Ok(Arc::new(
-            array.as_primitive::<Date32Type>()
+            array
+                .as_primitive::<Date32Type>()
                 .unary::<_, Date64Type>(|x| x as i64 * MILLISECONDS_IN_DAY),
         )),
         (Date64, Date32) => Ok(Arc::new(
-            array.as_primitive::<Date64Type>()
+            array
+                .as_primitive::<Date64Type>()
                 .unary::<_, Date32Type>(|x| (x / MILLISECONDS_IN_DAY) as i32),
         )),
 
         (Time32(TimeUnit::Second), Time32(TimeUnit::Millisecond)) => Ok(Arc::new(
-            array.as_primitive::<Time32SecondType>()
+            array
+                .as_primitive::<Time32SecondType>()
                 .unary::<_, Time32MillisecondType>(|x| x * MILLISECONDS as i32),
         )),
         (Time32(TimeUnit::Second), Time64(TimeUnit::Microsecond)) => Ok(Arc::new(
-            array.as_primitive::<Time32SecondType>()
+            array
+                .as_primitive::<Time32SecondType>()
                 .unary::<_, Time64MicrosecondType>(|x| x as i64 * MICROSECONDS),
         )),
         (Time32(TimeUnit::Second), Time64(TimeUnit::Nanosecond)) => Ok(Arc::new(
-            array.as_primitive::<Time32SecondType>()
+            array
+                .as_primitive::<Time32SecondType>()
                 .unary::<_, Time64NanosecondType>(|x| x as i64 * NANOSECONDS),
         )),
 
         (Time32(TimeUnit::Millisecond), Time32(TimeUnit::Second)) => Ok(Arc::new(
-            array.as_primitive::<Time32MillisecondType>()
+            array
+                .as_primitive::<Time32MillisecondType>()
                 .unary::<_, Time32SecondType>(|x| x / MILLISECONDS as i32),
         )),
         (Time32(TimeUnit::Millisecond), Time64(TimeUnit::Microsecond)) => Ok(Arc::new(
-            array.as_primitive::<Time32MillisecondType>()
-                .unary::<_, Time64MicrosecondType>(|x| {
-                    x as i64 * (MICROSECONDS / MILLISECONDS)
-                }),
+            array
+                .as_primitive::<Time32MillisecondType>()
+                .unary::<_, Time64MicrosecondType>(|x| x as i64 * (MICROSECONDS / MILLISECONDS)),
         )),
         (Time32(TimeUnit::Millisecond), Time64(TimeUnit::Nanosecond)) => Ok(Arc::new(
-            array.as_primitive::<Time32MillisecondType>()
-                .unary::<_, Time64NanosecondType>(|x| {
-                    x as i64 * (MICROSECONDS / NANOSECONDS)
-                }),
+            array
+                .as_primitive::<Time32MillisecondType>()
+                .unary::<_, Time64NanosecondType>(|x| x as i64 * (MICROSECONDS / NANOSECONDS)),
         )),
 
         (Time64(TimeUnit::Microsecond), Time32(TimeUnit::Second)) => Ok(Arc::new(
-            array.as_primitive::<Time64MicrosecondType>()
+            array
+                .as_primitive::<Time64MicrosecondType>()
                 .unary::<_, Time32SecondType>(|x| (x / MICROSECONDS) as i32),
         )),
         (Time64(TimeUnit::Microsecond), Time32(TimeUnit::Millisecond)) => Ok(Arc::new(
-            array.as_primitive::<Time64MicrosecondType>()
-                .unary::<_, Time32MillisecondType>(|x| {
-                    (x / (MICROSECONDS / MILLISECONDS)) as i32
-                }),
+            array
+                .as_primitive::<Time64MicrosecondType>()
+                .unary::<_, Time32MillisecondType>(|x| (x / (MICROSECONDS / MILLISECONDS)) as i32),
         )),
         (Time64(TimeUnit::Microsecond), Time64(TimeUnit::Nanosecond)) => Ok(Arc::new(
-            array.as_primitive::<Time64MicrosecondType>()
+            array
+                .as_primitive::<Time64MicrosecondType>()
                 .unary::<_, Time64NanosecondType>(|x| x * (NANOSECONDS / MICROSECONDS)),
         )),
 
         (Time64(TimeUnit::Nanosecond), Time32(TimeUnit::Second)) => Ok(Arc::new(
-            array.as_primitive::<Time64NanosecondType>()
+            array
+                .as_primitive::<Time64NanosecondType>()
                 .unary::<_, Time32SecondType>(|x| (x / NANOSECONDS) as i32),
         )),
         (Time64(TimeUnit::Nanosecond), Time32(TimeUnit::Millisecond)) => Ok(Arc::new(
-            array.as_primitive::<Time64NanosecondType>()
-                .unary::<_, Time32MillisecondType>(|x| {
-                    (x / (NANOSECONDS / MILLISECONDS)) as i32
-                }),
+            array
+                .as_primitive::<Time64NanosecondType>()
+                .unary::<_, Time32MillisecondType>(|x| (x / (NANOSECONDS / MILLISECONDS)) as i32),
         )),
         (Time64(TimeUnit::Nanosecond), Time64(TimeUnit::Microsecond)) => Ok(Arc::new(
-            array.as_primitive::<Time64NanosecondType>()
+            array
+                .as_primitive::<Time64NanosecondType>()
                 .unary::<_, Time64MicrosecondType>(|x| x / (NANOSECONDS / MICROSECONDS)),
         )),
 
-        (Timestamp(TimeUnit::Second, _), Int64) => {
-            cast_reinterpret_arrays::<TimestampSecondType, Int64Type>(array)
+        // Timestamp to integer/floating/decimals
+        (Timestamp(TimeUnit::Second, _), _) if to_type.is_numeric() => {
+            let array = cast_reinterpret_arrays::<TimestampSecondType, Int64Type>(array)?;
+            cast_with_options(&array, to_type, cast_options)
         }
-        (Timestamp(TimeUnit::Millisecond, _), Int64) => {
-            cast_reinterpret_arrays::<TimestampMillisecondType, Int64Type>(array)
+        (Timestamp(TimeUnit::Millisecond, _), _) if to_type.is_numeric() => {
+            let array = cast_reinterpret_arrays::<TimestampMillisecondType, Int64Type>(array)?;
+            cast_with_options(&array, to_type, cast_options)
         }
-        (Timestamp(TimeUnit::Microsecond, _), Int64) => {
-            cast_reinterpret_arrays::<TimestampMicrosecondType, Int64Type>(array)
+        (Timestamp(TimeUnit::Microsecond, _), _) if to_type.is_numeric() => {
+            let array = cast_reinterpret_arrays::<TimestampMicrosecondType, Int64Type>(array)?;
+            cast_with_options(&array, to_type, cast_options)
         }
-        (Timestamp(TimeUnit::Nanosecond, _), Int64) => {
-            cast_reinterpret_arrays::<TimestampNanosecondType, Int64Type>(array)
+        (Timestamp(TimeUnit::Nanosecond, _), _) if to_type.is_numeric() => {
+            let array = cast_reinterpret_arrays::<TimestampNanosecondType, Int64Type>(array)?;
+            cast_with_options(&array, to_type, cast_options)
         }
 
-        (Int64, Timestamp(unit, tz)) => Ok(make_timestamp_array(
-            array.as_primitive(),
-            unit.clone(),
-            tz.clone(),
-        )),
+        (_, Timestamp(unit, tz)) if from_type.is_numeric() => {
+            let array = cast_with_options(array, &Int64, cast_options)?;
+            Ok(make_timestamp_array(
+                array.as_primitive(),
+                unit.clone(),
+                tz.clone(),
+            ))
+        }
 
         (Timestamp(from_unit, from_tz), Timestamp(to_unit, to_tz)) => {
             let array = cast_with_options(array, &Int64, cast_options)?;
@@ -1783,39 +1647,29 @@ pub fn cast_with_options(
                 (None, Some(to_tz)) => {
                     let to_tz: Tz = to_tz.parse()?;
                     match to_unit {
-                        TimeUnit::Second => {
-                            adjust_timestamp_to_timezone::<TimestampSecondType>(
-                                converted,
-                                &to_tz,
-                                cast_options,
-                            )?
-                        }
-                        TimeUnit::Millisecond => {
-                            adjust_timestamp_to_timezone::<TimestampMillisecondType>(
-                                converted,
-                                &to_tz,
-                                cast_options,
-                            )?
-                        }
-                        TimeUnit::Microsecond => {
-                            adjust_timestamp_to_timezone::<TimestampMicrosecondType>(
-                                converted,
-                                &to_tz,
-                                cast_options,
-                            )?
-                        }
-                        TimeUnit::Nanosecond => {
-                            adjust_timestamp_to_timezone::<TimestampNanosecondType>(
-                                converted,
-                                &to_tz,
-                                cast_options,
-                            )?
-                        }
+                        TimeUnit::Second => adjust_timestamp_to_timezone::<TimestampSecondType>(
+                            converted,
+                            &to_tz,
+                            cast_options,
+                        )?,
+                        TimeUnit::Millisecond => adjust_timestamp_to_timezone::<
+                            TimestampMillisecondType,
+                        >(
+                            converted, &to_tz, cast_options
+                        )?,
+                        TimeUnit::Microsecond => adjust_timestamp_to_timezone::<
+                            TimestampMicrosecondType,
+                        >(
+                            converted, &to_tz, cast_options
+                        )?,
+                        TimeUnit::Nanosecond => adjust_timestamp_to_timezone::<
+                            TimestampNanosecondType,
+                        >(
+                            converted, &to_tz, cast_options
+                        )?,
                     }
                 }
-                _ => {
-                    converted
-                }
+                _ => converted,
             };
             Ok(make_timestamp_array(
                 &adjusted,
@@ -1834,45 +1688,43 @@ pub fn cast_with_options(
                 if time_array.is_null(i) {
                     b.append_null();
                 } else {
-                    b.append_value(num::integer::div_floor::<i64>(time_array.value(i), from_size) as i32);
+                    b.append_value(
+                        num::integer::div_floor::<i64>(time_array.value(i), from_size) as i32,
+                    );
                 }
             }
 
             Ok(Arc::new(b.finish()) as ArrayRef)
         }
-        (Timestamp(TimeUnit::Second, _), Date64) => Ok(Arc::new(
-            match cast_options.safe {
-                true => {
-                    // change error to None
-                    array.as_primitive::<TimestampSecondType>()
-                        .unary_opt::<_, Date64Type>(|x| {
-                            x.checked_mul(MILLISECONDS)
-                        })
-                }
-                false => {
-                    array.as_primitive::<TimestampSecondType>().try_unary::<_, Date64Type, _>(
-                        |x| {
-                            x.mul_checked(MILLISECONDS)
-                        },
-                    )?
-                }
-            },
-        )),
+        (Timestamp(TimeUnit::Second, _), Date64) => Ok(Arc::new(match cast_options.safe {
+            true => {
+                // change error to None
+                array
+                    .as_primitive::<TimestampSecondType>()
+                    .unary_opt::<_, Date64Type>(|x| x.checked_mul(MILLISECONDS))
+            }
+            false => array
+                .as_primitive::<TimestampSecondType>()
+                .try_unary::<_, Date64Type, _>(|x| x.mul_checked(MILLISECONDS))?,
+        })),
         (Timestamp(TimeUnit::Millisecond, _), Date64) => {
             cast_reinterpret_arrays::<TimestampMillisecondType, Date64Type>(array)
         }
         (Timestamp(TimeUnit::Microsecond, _), Date64) => Ok(Arc::new(
-            array.as_primitive::<TimestampMicrosecondType>()
+            array
+                .as_primitive::<TimestampMicrosecondType>()
                 .unary::<_, Date64Type>(|x| x / (MICROSECONDS / MILLISECONDS)),
         )),
         (Timestamp(TimeUnit::Nanosecond, _), Date64) => Ok(Arc::new(
-            array.as_primitive::<TimestampNanosecondType>()
+            array
+                .as_primitive::<TimestampNanosecondType>()
                 .unary::<_, Date64Type>(|x| x / (NANOSECONDS / MILLISECONDS)),
         )),
         (Timestamp(TimeUnit::Second, tz), Time64(TimeUnit::Microsecond)) => {
             let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
             Ok(Arc::new(
-                array.as_primitive::<TimestampSecondType>()
+                array
+                    .as_primitive::<TimestampSecondType>()
                     .try_unary::<_, Time64MicrosecondType, ArrowError>(|x| {
                         Ok(time_to_time64us(as_time_res_with_timezone::<
                             TimestampSecondType,
@@ -1883,7 +1735,8 @@ pub fn cast_with_options(
         (Timestamp(TimeUnit::Second, tz), Time64(TimeUnit::Nanosecond)) => {
             let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
             Ok(Arc::new(
-                array.as_primitive::<TimestampSecondType>()
+                array
+                    .as_primitive::<TimestampSecondType>()
                     .try_unary::<_, Time64NanosecondType, ArrowError>(|x| {
                         Ok(time_to_time64ns(as_time_res_with_timezone::<
                             TimestampSecondType,
@@ -1894,7 +1747,8 @@ pub fn cast_with_options(
         (Timestamp(TimeUnit::Millisecond, tz), Time64(TimeUnit::Microsecond)) => {
             let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
             Ok(Arc::new(
-                array.as_primitive::<TimestampMillisecondType>()
+                array
+                    .as_primitive::<TimestampMillisecondType>()
                     .try_unary::<_, Time64MicrosecondType, ArrowError>(|x| {
                         Ok(time_to_time64us(as_time_res_with_timezone::<
                             TimestampMillisecondType,
@@ -1905,7 +1759,8 @@ pub fn cast_with_options(
         (Timestamp(TimeUnit::Millisecond, tz), Time64(TimeUnit::Nanosecond)) => {
             let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
             Ok(Arc::new(
-                array.as_primitive::<TimestampMillisecondType>()
+                array
+                    .as_primitive::<TimestampMillisecondType>()
                     .try_unary::<_, Time64NanosecondType, ArrowError>(|x| {
                         Ok(time_to_time64ns(as_time_res_with_timezone::<
                             TimestampMillisecondType,
@@ -1916,7 +1771,8 @@ pub fn cast_with_options(
         (Timestamp(TimeUnit::Microsecond, tz), Time64(TimeUnit::Microsecond)) => {
             let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
             Ok(Arc::new(
-                array.as_primitive::<TimestampMicrosecondType>()
+                array
+                    .as_primitive::<TimestampMicrosecondType>()
                     .try_unary::<_, Time64MicrosecondType, ArrowError>(|x| {
                         Ok(time_to_time64us(as_time_res_with_timezone::<
                             TimestampMicrosecondType,
@@ -1927,7 +1783,8 @@ pub fn cast_with_options(
         (Timestamp(TimeUnit::Microsecond, tz), Time64(TimeUnit::Nanosecond)) => {
             let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
             Ok(Arc::new(
-                array.as_primitive::<TimestampMicrosecondType>()
+                array
+                    .as_primitive::<TimestampMicrosecondType>()
                     .try_unary::<_, Time64NanosecondType, ArrowError>(|x| {
                         Ok(time_to_time64ns(as_time_res_with_timezone::<
                             TimestampMicrosecondType,
@@ -1938,7 +1795,8 @@ pub fn cast_with_options(
         (Timestamp(TimeUnit::Nanosecond, tz), Time64(TimeUnit::Microsecond)) => {
             let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
             Ok(Arc::new(
-                array.as_primitive::<TimestampNanosecondType>()
+                array
+                    .as_primitive::<TimestampNanosecondType>()
                     .try_unary::<_, Time64MicrosecondType, ArrowError>(|x| {
                         Ok(time_to_time64us(as_time_res_with_timezone::<
                             TimestampNanosecondType,
@@ -1949,7 +1807,8 @@ pub fn cast_with_options(
         (Timestamp(TimeUnit::Nanosecond, tz), Time64(TimeUnit::Nanosecond)) => {
             let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
             Ok(Arc::new(
-                array.as_primitive::<TimestampNanosecondType>()
+                array
+                    .as_primitive::<TimestampNanosecondType>()
                     .try_unary::<_, Time64NanosecondType, ArrowError>(|x| {
                         Ok(time_to_time64ns(as_time_res_with_timezone::<
                             TimestampNanosecondType,
@@ -1960,7 +1819,8 @@ pub fn cast_with_options(
         (Timestamp(TimeUnit::Second, tz), Time32(TimeUnit::Second)) => {
             let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
             Ok(Arc::new(
-                array.as_primitive::<TimestampSecondType>()
+                array
+                    .as_primitive::<TimestampSecondType>()
                     .try_unary::<_, Time32SecondType, ArrowError>(|x| {
                         Ok(time_to_time32s(as_time_res_with_timezone::<
                             TimestampSecondType,
@@ -1971,7 +1831,8 @@ pub fn cast_with_options(
         (Timestamp(TimeUnit::Second, tz), Time32(TimeUnit::Millisecond)) => {
             let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
             Ok(Arc::new(
-                array.as_primitive::<TimestampSecondType>()
+                array
+                    .as_primitive::<TimestampSecondType>()
                     .try_unary::<_, Time32MillisecondType, ArrowError>(|x| {
                         Ok(time_to_time32ms(as_time_res_with_timezone::<
                             TimestampSecondType,
@@ -1982,7 +1843,8 @@ pub fn cast_with_options(
         (Timestamp(TimeUnit::Millisecond, tz), Time32(TimeUnit::Second)) => {
             let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
             Ok(Arc::new(
-                array.as_primitive::<TimestampMillisecondType>()
+                array
+                    .as_primitive::<TimestampMillisecondType>()
                     .try_unary::<_, Time32SecondType, ArrowError>(|x| {
                         Ok(time_to_time32s(as_time_res_with_timezone::<
                             TimestampMillisecondType,
@@ -1993,7 +1855,8 @@ pub fn cast_with_options(
         (Timestamp(TimeUnit::Millisecond, tz), Time32(TimeUnit::Millisecond)) => {
             let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
             Ok(Arc::new(
-                array.as_primitive::<TimestampMillisecondType>()
+                array
+                    .as_primitive::<TimestampMillisecondType>()
                     .try_unary::<_, Time32MillisecondType, ArrowError>(|x| {
                         Ok(time_to_time32ms(as_time_res_with_timezone::<
                             TimestampMillisecondType,
@@ -2004,7 +1867,8 @@ pub fn cast_with_options(
         (Timestamp(TimeUnit::Microsecond, tz), Time32(TimeUnit::Second)) => {
             let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
             Ok(Arc::new(
-                array.as_primitive::<TimestampMicrosecondType>()
+                array
+                    .as_primitive::<TimestampMicrosecondType>()
                     .try_unary::<_, Time32SecondType, ArrowError>(|x| {
                         Ok(time_to_time32s(as_time_res_with_timezone::<
                             TimestampMicrosecondType,
@@ -2015,7 +1879,8 @@ pub fn cast_with_options(
         (Timestamp(TimeUnit::Microsecond, tz), Time32(TimeUnit::Millisecond)) => {
             let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
             Ok(Arc::new(
-                array.as_primitive::<TimestampMicrosecondType>()
+                array
+                    .as_primitive::<TimestampMicrosecondType>()
                     .try_unary::<_, Time32MillisecondType, ArrowError>(|x| {
                         Ok(time_to_time32ms(as_time_res_with_timezone::<
                             TimestampMicrosecondType,
@@ -2026,7 +1891,8 @@ pub fn cast_with_options(
         (Timestamp(TimeUnit::Nanosecond, tz), Time32(TimeUnit::Second)) => {
             let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
             Ok(Arc::new(
-                array.as_primitive::<TimestampNanosecondType>()
+                array
+                    .as_primitive::<TimestampNanosecondType>()
                     .try_unary::<_, Time32SecondType, ArrowError>(|x| {
                         Ok(time_to_time32s(as_time_res_with_timezone::<
                             TimestampNanosecondType,
@@ -2037,7 +1903,8 @@ pub fn cast_with_options(
         (Timestamp(TimeUnit::Nanosecond, tz), Time32(TimeUnit::Millisecond)) => {
             let tz = tz.as_ref().map(|tz| tz.parse()).transpose()?;
             Ok(Arc::new(
-                array.as_primitive::<TimestampNanosecondType>()
+                array
+                    .as_primitive::<TimestampNanosecondType>()
                     .try_unary::<_, Time32MillisecondType, ArrowError>(|x| {
                         Ok(time_to_time32ms(as_time_res_with_timezone::<
                             TimestampNanosecondType,
@@ -2047,38 +1914,41 @@ pub fn cast_with_options(
         }
 
         (Date64, Timestamp(TimeUnit::Second, None)) => Ok(Arc::new(
-            array.as_primitive::<Date64Type>()
+            array
+                .as_primitive::<Date64Type>()
                 .unary::<_, TimestampSecondType>(|x| x / MILLISECONDS),
         )),
         (Date64, Timestamp(TimeUnit::Millisecond, None)) => {
             cast_reinterpret_arrays::<Date64Type, TimestampMillisecondType>(array)
         }
         (Date64, Timestamp(TimeUnit::Microsecond, None)) => Ok(Arc::new(
-            array.as_primitive::<Date64Type>().unary::<_, TimestampMicrosecondType>(
-                |x| x * (MICROSECONDS / MILLISECONDS),
-            ),
+            array
+                .as_primitive::<Date64Type>()
+                .unary::<_, TimestampMicrosecondType>(|x| x * (MICROSECONDS / MILLISECONDS)),
         )),
         (Date64, Timestamp(TimeUnit::Nanosecond, None)) => Ok(Arc::new(
-            array.as_primitive::<Date64Type>().unary::<_, TimestampNanosecondType>(
-                |x| x * (NANOSECONDS / MILLISECONDS),
-            ),
+            array
+                .as_primitive::<Date64Type>()
+                .unary::<_, TimestampNanosecondType>(|x| x * (NANOSECONDS / MILLISECONDS)),
         )),
         (Date32, Timestamp(TimeUnit::Second, None)) => Ok(Arc::new(
-            array.as_primitive::<Date32Type>()
+            array
+                .as_primitive::<Date32Type>()
                 .unary::<_, TimestampSecondType>(|x| (x as i64) * SECONDS_IN_DAY),
         )),
         (Date32, Timestamp(TimeUnit::Millisecond, None)) => Ok(Arc::new(
-            array.as_primitive::<Date32Type>().unary::<_, TimestampMillisecondType>(
-                |x| (x as i64) * MILLISECONDS_IN_DAY,
-            ),
+            array
+                .as_primitive::<Date32Type>()
+                .unary::<_, TimestampMillisecondType>(|x| (x as i64) * MILLISECONDS_IN_DAY),
         )),
         (Date32, Timestamp(TimeUnit::Microsecond, None)) => Ok(Arc::new(
-            array.as_primitive::<Date32Type>().unary::<_, TimestampMicrosecondType>(
-                |x| (x as i64) * MICROSECONDS_IN_DAY,
-            ),
+            array
+                .as_primitive::<Date32Type>()
+                .unary::<_, TimestampMicrosecondType>(|x| (x as i64) * MICROSECONDS_IN_DAY),
         )),
         (Date32, Timestamp(TimeUnit::Nanosecond, None)) => Ok(Arc::new(
-            array.as_primitive::<Date32Type>()
+            array
+                .as_primitive::<Date32Type>()
                 .unary::<_, TimestampNanosecondType>(|x| (x as i64) * NANOSECONDS_IN_DAY),
         )),
         (Int64, Duration(TimeUnit::Second)) => {
@@ -2396,9 +2266,7 @@ where
 
 // Natural cast between numeric types
 // If the value of T can't be casted to R, will throw error
-fn try_numeric_cast<T, R>(
-    from: &PrimitiveArray<T>,
-) -> Result<PrimitiveArray<R>, ArrowError>
+fn try_numeric_cast<T, R>(from: &PrimitiveArray<T>) -> Result<PrimitiveArray<R>, ArrowError>
 where
     T: ArrowPrimitiveType,
     R: ArrowPrimitiveType,
@@ -2448,6 +2316,19 @@ fn value_to_string<O: OffsetSizeTrait>(
     Ok(Arc::new(builder.finish()))
 }
 
+fn cast_numeric_to_binary<FROM: ArrowPrimitiveType, O: OffsetSizeTrait>(
+    array: &dyn Array,
+) -> Result<ArrayRef, ArrowError> {
+    let array = array.as_primitive::<FROM>();
+    let size = std::mem::size_of::<FROM::Native>();
+    let offsets = OffsetBuffer::from_lengths(std::iter::repeat(size).take(array.len()));
+    Ok(Arc::new(GenericBinaryArray::<O>::new(
+        offsets,
+        array.values().inner().clone(),
+        array.nulls().cloned(),
+    )))
+}
+
 /// Parse UTF-8
 fn parse_string<P: Parser, O: OffsetSizeTrait>(
     array: &dyn Array,
@@ -2499,11 +2380,7 @@ fn cast_string_to_timestamp<O: OffsetSizeTrait, T: ArrowTimestampType>(
     Ok(Arc::new(out.with_timezone_opt(to_tz.clone())))
 }
 
-fn cast_string_to_timestamp_impl<
-    O: OffsetSizeTrait,
-    T: ArrowTimestampType,
-    Tz: TimeZone,
->(
+fn cast_string_to_timestamp_impl<O: OffsetSizeTrait, T: ArrowTimestampType, Tz: TimeZone>(
     array: &GenericStringArray<O>,
     tz: &Tz,
     cast_options: &CastOptions,
@@ -2660,9 +2537,7 @@ fn adjust_timestamp_to_timezone<T: ArrowTimestampType>(
     } else {
         array.try_unary::<_, Int64Type, _>(|o| {
             adjust(o).ok_or_else(|| {
-                ArrowError::CastError(
-                    "Cannot cast timezone to different timezone".to_string(),
-                )
+                ArrowError::CastError("Cannot cast timezone to different timezone".to_string())
             })
         })?
     };
@@ -2686,11 +2561,10 @@ where
         .iter()
         .map(|value| match value {
             Some(value) => match value.to_ascii_lowercase().trim() {
-                "t" | "tr" | "tru" | "true" | "y" | "ye" | "yes" | "on" | "1" => {
-                    Ok(Some(true))
+                "t" | "tr" | "tru" | "true" | "y" | "ye" | "yes" | "on" | "1" => Ok(Some(true)),
+                "f" | "fa" | "fal" | "fals" | "false" | "n" | "no" | "of" | "off" | "0" => {
+                    Ok(Some(false))
                 }
-                "f" | "fa" | "fal" | "fals" | "false" | "n" | "no" | "of" | "off"
-                | "0" => Ok(Some(false)),
                 invalid_value => match cast_options.safe {
                     true => Ok(None),
                     false => Err(ArrowError::CastError(format!(
@@ -2722,19 +2596,38 @@ where
         )));
     }
 
-    let integers = parts[0].trim_start_matches('0');
+    let (negative, first_part) = if parts[0].is_empty() {
+        (false, parts[0])
+    } else {
+        match parts[0].as_bytes()[0] {
+            b'-' => (true, &parts[0][1..]),
+            b'+' => (false, &parts[0][1..]),
+            _ => (false, parts[0]),
+        }
+    };
+
+    let integers = first_part.trim_start_matches('0');
     let decimals = if parts.len() == 2 { parts[1] } else { "" };
 
+    if !integers.is_empty() && !integers.as_bytes()[0].is_ascii_digit() {
+        return Err(ArrowError::InvalidArgumentError(format!(
+            "Invalid decimal format: {value_str:?}"
+        )));
+    }
+
+    if !decimals.is_empty() && !decimals.as_bytes()[0].is_ascii_digit() {
+        return Err(ArrowError::InvalidArgumentError(format!(
+            "Invalid decimal format: {value_str:?}"
+        )));
+    }
+
     // Adjust decimal based on scale
-    let number_decimals = if decimals.len() > scale {
+    let mut number_decimals = if decimals.len() > scale {
         let decimal_number = i256::from_string(decimals).ok_or_else(|| {
-            ArrowError::InvalidArgumentError(format!(
-                "Cannot parse decimal format: {value_str}"
-            ))
+            ArrowError::InvalidArgumentError(format!("Cannot parse decimal format: {value_str}"))
         })?;
 
-        let div =
-            i256::from_i128(10_i128).pow_checked((decimals.len() - scale) as u32)?;
+        let div = i256::from_i128(10_i128).pow_checked((decimals.len() - scale) as u32)?;
 
         let half = div.div_wrapping(i256::from_i128(2));
         let half_neg = half.neg_wrapping();
@@ -2756,9 +2649,7 @@ where
                         "Cannot parse decimal format: {value_str}"
                     ))
                 })
-                .map(|v| {
-                    v.mul_wrapping(i256::from_i128(10_i128).pow_wrapping(scale as u32))
-                })?
+                .map(|v| v.mul_wrapping(i256::from_i128(10_i128).pow_wrapping(scale as u32)))?
         } else {
             i256::ZERO
         };
@@ -2771,6 +2662,10 @@ where
         format!("{integers}{decimals}")
     };
 
+    if negative {
+        number_decimals.insert(0, '-');
+    }
+
     let value = i256::from_string(number_decimals.as_str()).ok_or_else(|| {
         ArrowError::InvalidArgumentError(format!(
             "Cannot convert {} to {}: Overflow",
@@ -2780,11 +2675,7 @@ where
     })?;
 
     T::Native::from_decimal(value).ok_or_else(|| {
-        ArrowError::InvalidArgumentError(format!(
-            "Cannot convert {} to {}",
-            value_str,
-            T::PREFIX
-        ))
+        ArrowError::InvalidArgumentError(format!("Cannot convert {} to {}", value_str, T::PREFIX))
     })
 }
 
@@ -2801,6 +2692,11 @@ where
     if cast_options.safe {
         let iter = from.iter().map(|v| {
             v.and_then(|v| parse_string_to_decimal_native::<T>(v, scale as usize).ok())
+                .and_then(|v| {
+                    T::validate_decimal_precision(v, precision)
+                        .is_ok()
+                        .then_some(v)
+                })
         });
         // Benefit:
         //     20% performance improvement
@@ -2815,13 +2711,15 @@ where
             .iter()
             .map(|v| {
                 v.map(|v| {
-                    parse_string_to_decimal_native::<T>(v, scale as usize).map_err(|_| {
-                        ArrowError::CastError(format!(
-                            "Cannot cast string '{}' to value of {:?} type",
-                            v,
-                            T::DATA_TYPE,
-                        ))
-                    })
+                    parse_string_to_decimal_native::<T>(v, scale as usize)
+                        .map_err(|_| {
+                            ArrowError::CastError(format!(
+                                "Cannot cast string '{}' to value of {:?} type",
+                                v,
+                                T::DATA_TYPE,
+                            ))
+                        })
+                        .and_then(|v| T::validate_decimal_precision(v, precision).map(|_| v))
                 })
                 .transpose()
             })
@@ -2878,8 +2776,7 @@ fn cast_numeric_to_bool<FROM>(from: &dyn Array) -> Result<ArrayRef, ArrowError>
 where
     FROM: ArrowPrimitiveType,
 {
-    numeric_to_bool_cast::<FROM>(from.as_primitive::<FROM>())
-        .map(|to| Arc::new(to) as ArrayRef)
+    numeric_to_bool_cast::<FROM>(from.as_primitive::<FROM>()).map(|to| Arc::new(to) as ArrayRef)
 }
 
 fn numeric_to_bool_cast<T>(from: &PrimitiveArray<T>) -> Result<BooleanArray, ArrowError>
@@ -2918,10 +2815,7 @@ where
     )))
 }
 
-fn bool_to_numeric_cast<T>(
-    from: &BooleanArray,
-    _cast_options: &CastOptions,
-) -> PrimitiveArray<T>
+fn bool_to_numeric_cast<T>(from: &BooleanArray, _cast_options: &CastOptions) -> PrimitiveArray<T>
 where
     T: ArrowPrimitiveType,
     T::Native: num::NumCast,
@@ -2969,8 +2863,7 @@ fn dictionary_cast<K: ArrowDictionaryKeyType>(
                 Arc::new(PrimitiveArray::<K>::from(dict_array.keys().to_data()));
             let values_array = dict_array.values();
             let cast_keys = cast_with_options(&keys_array, to_index_type, cast_options)?;
-            let cast_values =
-                cast_with_options(values_array, to_value_type, cast_options)?;
+            let cast_values = cast_with_options(values_array, to_value_type, cast_options)?;
 
             // Failure to cast keys (because they don't fit in the
             // target type) results in NULL values;
@@ -3042,66 +2935,24 @@ fn cast_to_dictionary<K: ArrowDictionaryKeyType>(
     use DataType::*;
 
     match *dict_value_type {
-        Int8 => pack_numeric_to_dictionary::<K, Int8Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        Int16 => pack_numeric_to_dictionary::<K, Int16Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        Int32 => pack_numeric_to_dictionary::<K, Int32Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        Int64 => pack_numeric_to_dictionary::<K, Int64Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        UInt8 => pack_numeric_to_dictionary::<K, UInt8Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        Decimal128(_, _) => pack_numeric_to_dictionary::<K, Decimal128Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        Decimal256(_, _) => pack_numeric_to_dictionary::<K, Decimal256Type>(
-            array,
-            dict_value_type,
-            cast_options,
-        ),
-        Utf8 => pack_byte_to_dictionary::<K, GenericStringType<i32>>(array, cast_options),
-        LargeUtf8 => {
-            pack_byte_to_dictionary::<K, GenericStringType<i64>>(array, cast_options)
-        }
-        Binary => {
-            pack_byte_to_dictionary::<K, GenericBinaryType<i32>>(array, cast_options)
-        }
-        LargeBinary => {
-            pack_byte_to_dictionary::<K, GenericBinaryType<i64>>(array, cast_options)
+        Int8 => pack_numeric_to_dictionary::<K, Int8Type>(array, dict_value_type, cast_options),
+        Int16 => pack_numeric_to_dictionary::<K, Int16Type>(array, dict_value_type, cast_options),
+        Int32 => pack_numeric_to_dictionary::<K, Int32Type>(array, dict_value_type, cast_options),
+        Int64 => pack_numeric_to_dictionary::<K, Int64Type>(array, dict_value_type, cast_options),
+        UInt8 => pack_numeric_to_dictionary::<K, UInt8Type>(array, dict_value_type, cast_options),
+        UInt16 => pack_numeric_to_dictionary::<K, UInt16Type>(array, dict_value_type, cast_options),
+        UInt32 => pack_numeric_to_dictionary::<K, UInt32Type>(array, dict_value_type, cast_options),
+        UInt64 => pack_numeric_to_dictionary::<K, UInt64Type>(array, dict_value_type, cast_options),
+        Decimal128(_, _) => {
+            pack_numeric_to_dictionary::<K, Decimal128Type>(array, dict_value_type, cast_options)
+        }
+        Decimal256(_, _) => {
+            pack_numeric_to_dictionary::<K, Decimal256Type>(array, dict_value_type, cast_options)
         }
+        Utf8 => pack_byte_to_dictionary::<K, GenericStringType<i32>>(array, cast_options),
+        LargeUtf8 => pack_byte_to_dictionary::<K, GenericStringType<i64>>(array, cast_options),
+        Binary => pack_byte_to_dictionary::<K, GenericBinaryType<i32>>(array, cast_options),
+        LargeBinary => pack_byte_to_dictionary::<K, GenericBinaryType<i64>>(array, cast_options),
         _ => Err(ArrowError::CastError(format!(
             "Unsupported output type for dictionary packing: {dict_value_type:?}"
         ))),
@@ -3123,8 +2974,7 @@ where
     let cast_values = cast_with_options(array, dict_value_type, cast_options)?;
     let values = cast_values.as_primitive::<V>();
 
-    let mut b =
-        PrimitiveDictionaryBuilder::<K, V>::with_capacity(values.len(), values.len());
+    let mut b = PrimitiveDictionaryBuilder::<K, V>::with_capacity(values.len(), values.len());
 
     // copy each element one at a time
     for i in 0..values.len() {
@@ -3152,8 +3002,7 @@ where
         .as_any()
         .downcast_ref::<GenericByteArray<T>>()
         .unwrap();
-    let mut b =
-        GenericByteDictionaryBuilder::<K, T>::with_capacity(values.len(), 1024, 1024);
+    let mut b = GenericByteDictionaryBuilder::<K, T>::with_capacity(values.len(), 1024, 1024);
 
     // copy each element one at a time
     for i in 0..values.len() {
@@ -3178,29 +3027,6 @@ fn cast_values_to_list<O: OffsetSizeTrait>(
     Ok(Arc::new(list))
 }
 
-/// Helper function that takes an Generic list container and casts the inner datatype.
-fn cast_list_inner<OffsetSize: OffsetSizeTrait>(
-    array: &dyn Array,
-    to: &Field,
-    to_type: &DataType,
-    cast_options: &CastOptions,
-) -> Result<ArrayRef, ArrowError> {
-    let data = array.to_data();
-    let underlying_array = make_array(data.child_data()[0].clone());
-    let cast_array =
-        cast_with_options(underlying_array.as_ref(), to.data_type(), cast_options)?;
-    let builder = data
-        .into_builder()
-        .data_type(to_type.clone())
-        .child_data(vec![cast_array.into_data()]);
-
-    // Safety
-    // Data was valid before
-    let array_data = unsafe { builder.build_unchecked() };
-    let list = GenericListArray::<OffsetSize>::from(array_data);
-    Ok(Arc::new(list) as ArrayRef)
-}
-
 /// A specified helper to cast from `GenericBinaryArray` to `GenericStringArray` when they have same
 /// offset size so re-encoding offset is unnecessary.
 fn cast_binary_to_string<O: OffsetSizeTrait>(
@@ -3217,10 +3043,8 @@ fn cast_binary_to_string<O: OffsetSizeTrait>(
         Err(e) => match cast_options.safe {
             true => {
                 // Fallback to slow method to convert invalid sequences to nulls
-                let mut builder = GenericStringBuilder::<O>::with_capacity(
-                    array.len(),
-                    array.value_data().len(),
-                );
+                let mut builder =
+                    GenericStringBuilder::<O>::with_capacity(array.len(), array.value_data().len());
 
                 let iter = array
                     .iter()
@@ -3315,8 +3139,8 @@ where
     offsets
         .iter()
         .try_for_each::<_, Result<_, ArrowError>>(|offset| {
-            let offset = <<TO as ByteArrayType>::Offset as NumCast>::from(*offset)
-                .ok_or_else(|| {
+            let offset =
+                <<TO as ByteArrayType>::Offset as NumCast>::from(*offset).ok_or_else(|| {
                     ArrowError::ComputeError(format!(
                         "{}{} array too large to cast to {}{} array",
                         FROM::Offset::PREFIX,
@@ -3345,9 +3169,7 @@ where
     Ok(Arc::new(GenericByteArray::<TO>::from(array_data)))
 }
 
-fn cast_fixed_size_list_to_list<OffsetSize>(
-    array: &dyn Array,
-) -> Result<ArrayRef, ArrowError>
+fn cast_fixed_size_list_to_list<OffsetSize>(array: &dyn Array) -> Result<ArrayRef, ArrowError>
 where
     OffsetSize: OffsetSizeTrait,
 {
@@ -3356,80 +3178,134 @@ where
     Ok(Arc::new(list))
 }
 
-/// Cast the container type of List/Largelist array but not the inner types.
-/// This function can leave the value data intact and only has to cast the offset dtypes.
-fn cast_list_container<OffsetSizeFrom, OffsetSizeTo>(
-    array: &dyn Array,
-    _cast_options: &CastOptions,
+fn cast_list_to_fixed_size_list<OffsetSize>(
+    array: &GenericListArray<OffsetSize>,
+    field: &FieldRef,
+    size: i32,
+    cast_options: &CastOptions,
 ) -> Result<ArrayRef, ArrowError>
 where
-    OffsetSizeFrom: OffsetSizeTrait + ToPrimitive,
-    OffsetSizeTo: OffsetSizeTrait + NumCast,
+    OffsetSize: OffsetSizeTrait,
 {
-    let list = array.as_list::<OffsetSizeFrom>();
-    // the value data stored by the list
-    let values = list.values();
+    let cap = array.len() * size as usize;
 
-    let out_dtype = match array.data_type() {
-        DataType::List(value_type) => {
-            assert_eq!(
-                std::mem::size_of::<OffsetSizeFrom>(),
-                std::mem::size_of::<i32>()
-            );
-            assert_eq!(
-                std::mem::size_of::<OffsetSizeTo>(),
-                std::mem::size_of::<i64>()
-            );
-            DataType::LargeList(value_type.clone())
+    let mut nulls = (cast_options.safe || array.null_count() != 0).then(|| {
+        let mut buffer = BooleanBufferBuilder::new(array.len());
+        match array.nulls() {
+            Some(n) => buffer.append_buffer(n.inner()),
+            None => buffer.append_n(array.len(), true),
         }
-        DataType::LargeList(value_type) => {
-            assert_eq!(
-                std::mem::size_of::<OffsetSizeFrom>(),
-                std::mem::size_of::<i64>()
-            );
-            assert_eq!(
-                std::mem::size_of::<OffsetSizeTo>(),
-                std::mem::size_of::<i32>()
-            );
-            if values.len() > i32::MAX as usize {
-                return Err(ArrowError::ComputeError(
-                    "LargeList too large to cast to List".into(),
-                ));
+        buffer
+    });
+
+    // Nulls in FixedSizeListArray take up space and so we must pad the values
+    let values = array.values().to_data();
+    let mut mutable = MutableArrayData::new(vec![&values], cast_options.safe, cap);
+    // The end position in values of the last incorrectly-sized list slice
+    let mut last_pos = 0;
+    for (idx, w) in array.offsets().windows(2).enumerate() {
+        let start_pos = w[0].as_usize();
+        let end_pos = w[1].as_usize();
+        let len = end_pos - start_pos;
+
+        if len != size as usize {
+            if cast_options.safe || array.is_null(idx) {
+                if last_pos != start_pos {
+                    // Extend with valid slices
+                    mutable.extend(0, last_pos, start_pos);
+                }
+                // Pad this slice with nulls
+                mutable.extend_nulls(size as _);
+                nulls.as_mut().unwrap().set_bit(idx, false);
+                // Set last_pos to the end of this slice's values
+                last_pos = end_pos
+            } else {
+                return Err(ArrowError::CastError(format!(
+                    "Cannot cast to FixedSizeList({size}): value at index {idx} has length {len}",
+                )));
             }
-            DataType::List(value_type.clone())
         }
-        // implementation error
-        _ => unreachable!(),
+    }
+
+    let values = match last_pos {
+        0 => array.values().slice(0, cap), // All slices were the correct length
+        _ => {
+            if mutable.len() != cap {
+                // Remaining slices were all correct length
+                let remaining = cap - mutable.len();
+                mutable.extend(0, last_pos, last_pos + remaining)
+            }
+            make_array(mutable.freeze())
+        }
     };
 
-    let iter = list.value_offsets().iter().map(|idx| {
-        let idx: OffsetSizeTo = NumCast::from(*idx).unwrap();
-        idx
-    });
+    // Cast the inner values if necessary
+    let values = cast_with_options(values.as_ref(), field.data_type(), cast_options)?;
 
-    // SAFETY
-    //      A slice produces a trusted length iterator
-    let offset_buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
+    // Construct the FixedSizeListArray
+    let nulls = nulls.map(|mut x| x.finish().into());
+    let array = FixedSizeListArray::new(field.clone(), size, values, nulls);
+    Ok(Arc::new(array))
+}
 
-    // wrap up
-    let builder = ArrayData::builder(out_dtype)
-        .len(list.len())
-        .add_buffer(offset_buffer)
-        .add_child_data(values.to_data())
-        .nulls(list.nulls().cloned());
+/// Helper function that takes an Generic list container and casts the inner datatype.
+fn cast_list_values<O: OffsetSizeTrait>(
+    array: &dyn Array,
+    to: &FieldRef,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    let list = array.as_list::<O>();
+    let values = cast_with_options(list.values(), to.data_type(), cast_options)?;
+    Ok(Arc::new(GenericListArray::<O>::new(
+        to.clone(),
+        list.offsets().clone(),
+        values,
+        list.nulls().cloned(),
+    )))
+}
 
-    let array_data = unsafe { builder.build_unchecked() };
-    Ok(Arc::new(GenericListArray::<OffsetSizeTo>::from(array_data)))
+/// Cast the container type of List/Largelist array along with the inner datatype
+fn cast_list<I: OffsetSizeTrait, O: OffsetSizeTrait>(
+    array: &dyn Array,
+    field: &FieldRef,
+    cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+    let list = array.as_list::<I>();
+    let values = list.values();
+    let offsets = list.offsets();
+    let nulls = list.nulls().cloned();
+
+    if !O::IS_LARGE && values.len() > i32::MAX as usize {
+        return Err(ArrowError::ComputeError(
+            "LargeList too large to cast to List".into(),
+        ));
+    }
+
+    // Recursively cast values
+    let values = cast_with_options(values, field.data_type(), cast_options)?;
+    let offsets: Vec<_> = offsets.iter().map(|x| O::usize_as(x.as_usize())).collect();
+
+    // Safety: valid offsets and checked for overflow
+    let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
+
+    Ok(Arc::new(GenericListArray::<O>::new(
+        field.clone(),
+        offsets,
+        values,
+        nulls,
+    )))
 }
 
 #[cfg(test)]
 mod tests {
+    use arrow_buffer::{Buffer, NullBuffer};
+
     use super::*;
 
     macro_rules! generate_cast_test_case {
         ($INPUT_ARRAY: expr, $OUTPUT_TYPE_ARRAY: ident, $OUTPUT_TYPE: expr, $OUTPUT_VALUES: expr) => {
-            let output = $OUTPUT_TYPE_ARRAY::from($OUTPUT_VALUES)
-                .with_data_type($OUTPUT_TYPE.clone());
+            let output =
+                $OUTPUT_TYPE_ARRAY::from($OUTPUT_VALUES).with_data_type($OUTPUT_TYPE.clone());
 
             // assert cast type
             let input_array_type = $INPUT_ARRAY.data_type();
@@ -3442,8 +3318,7 @@ mod tests {
                 safe: false,
                 format_options: FormatOptions::default(),
             };
-            let result =
-                cast_with_options($INPUT_ARRAY, $OUTPUT_TYPE, &cast_option).unwrap();
+            let result = cast_with_options($INPUT_ARRAY, $OUTPUT_TYPE, &cast_option).unwrap();
             assert_eq!($OUTPUT_TYPE, result.data_type());
             assert_eq!(result.as_ref(), &output);
         };
@@ -3777,8 +3652,7 @@ mod tests {
 
     #[test]
     fn test_cast_decimal_to_numeric() {
-        let value_array: Vec<Option<i128>> =
-            vec![Some(125), Some(225), Some(325), None, Some(525)];
+        let value_array: Vec<Option<i128>> = vec![Some(125), Some(225), Some(325), None, Some(525)];
         let array = create_decimal_array(value_array, 38, 2).unwrap();
         // u8
         generate_cast_test_case!(
@@ -4590,8 +4464,7 @@ mod tests {
 
     #[test]
     fn test_cast_i32_to_list_f64_nullable_sliced() {
-        let array =
-            Int32Array::from(vec![Some(5), None, Some(7), Some(8), None, Some(10)]);
+        let array = Int32Array::from(vec![Some(5), None, Some(7), Some(8), None, Some(10)]);
         let array = array.slice(2, 4);
         let b = cast(
             &array,
@@ -4641,9 +4514,8 @@ mod tests {
             Ok(_) => panic!("expected error"),
             Err(e) => {
                 assert!(
-                    e.to_string().contains(
-                        "Cast error: Cannot cast string 'seven' to value of Int32 type",
-                    ),
+                    e.to_string()
+                        .contains("Cast error: Cannot cast string 'seven' to value of Int32 type",),
                     "Error: {e}"
                 )
             }
@@ -4654,8 +4526,7 @@ mod tests {
     fn test_cast_utf8_to_bool() {
         let strings = StringArray::from(vec!["true", "false", "invalid", " Y ", ""]);
         let casted = cast(&strings, &DataType::Boolean).unwrap();
-        let expected =
-            BooleanArray::from(vec![Some(true), Some(false), None, Some(true), None]);
+        let expected = BooleanArray::from(vec![Some(true), Some(false), None, Some(true), None]);
         assert_eq!(*as_boolean_array(&casted), expected);
     }
 
@@ -4673,9 +4544,9 @@ mod tests {
         match casted {
             Ok(_) => panic!("expected error"),
             Err(e) => {
-                assert!(e.to_string().contains(
-                    "Cast error: Cannot cast value 'invalid' to value of Boolean type"
-                ))
+                assert!(e
+                    .to_string()
+                    .contains("Cast error: Cannot cast value 'invalid' to value of Boolean type"))
             }
         }
     }
@@ -4721,25 +4592,157 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "Casting from Int32 to Timestamp(Microsecond, None) not supported"
-    )]
-    fn test_cast_int32_to_timestamp() {
+    fn test_cast_integer_to_timestamp() {
+        let array = Int64Array::from(vec![Some(2), Some(10), None]);
+        let expected = cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
+
+        let array = Int8Array::from(vec![Some(2), Some(10), None]);
+        let actual = cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
+
+        assert_eq!(&actual, &expected);
+
+        let array = Int16Array::from(vec![Some(2), Some(10), None]);
+        let actual = cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
+
+        assert_eq!(&actual, &expected);
+
         let array = Int32Array::from(vec![Some(2), Some(10), None]);
-        cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
+        let actual = cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
+
+        assert_eq!(&actual, &expected);
+
+        let array = UInt8Array::from(vec![Some(2), Some(10), None]);
+        let actual = cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
+
+        assert_eq!(&actual, &expected);
+
+        let array = UInt16Array::from(vec![Some(2), Some(10), None]);
+        let actual = cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
+
+        assert_eq!(&actual, &expected);
+
+        let array = UInt32Array::from(vec![Some(2), Some(10), None]);
+        let actual = cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
+
+        assert_eq!(&actual, &expected);
+
+        let array = UInt64Array::from(vec![Some(2), Some(10), None]);
+        let actual = cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
+
+        assert_eq!(&actual, &expected);
+    }
+
+    #[test]
+    fn test_cast_timestamp_to_integer() {
+        let array = TimestampMillisecondArray::from(vec![Some(5), Some(1), None])
+            .with_timezone("UTC".to_string());
+        let expected = cast(&array, &DataType::Int64).unwrap();
+
+        let actual = cast(&cast(&array, &DataType::Int8).unwrap(), &DataType::Int64).unwrap();
+        assert_eq!(&actual, &expected);
+
+        let actual = cast(&cast(&array, &DataType::Int16).unwrap(), &DataType::Int64).unwrap();
+        assert_eq!(&actual, &expected);
+
+        let actual = cast(&cast(&array, &DataType::Int32).unwrap(), &DataType::Int64).unwrap();
+        assert_eq!(&actual, &expected);
+
+        let actual = cast(&cast(&array, &DataType::UInt8).unwrap(), &DataType::Int64).unwrap();
+        assert_eq!(&actual, &expected);
+
+        let actual = cast(&cast(&array, &DataType::UInt16).unwrap(), &DataType::Int64).unwrap();
+        assert_eq!(&actual, &expected);
+
+        let actual = cast(&cast(&array, &DataType::UInt32).unwrap(), &DataType::Int64).unwrap();
+        assert_eq!(&actual, &expected);
+
+        let actual = cast(&cast(&array, &DataType::UInt64).unwrap(), &DataType::Int64).unwrap();
+        assert_eq!(&actual, &expected);
+    }
+
+    #[test]
+    fn test_cast_floating_to_timestamp() {
+        let array = Int64Array::from(vec![Some(2), Some(10), None]);
+        let expected = cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
+
+        let array = Float32Array::from(vec![Some(2.0), Some(10.6), None]);
+        let actual = cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
+
+        assert_eq!(&actual, &expected);
+
+        let array = Float64Array::from(vec![Some(2.1), Some(10.2), None]);
+        let actual = cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
+
+        assert_eq!(&actual, &expected);
+    }
+
+    #[test]
+    fn test_cast_timestamp_to_floating() {
+        let array = TimestampMillisecondArray::from(vec![Some(5), Some(1), None])
+            .with_timezone("UTC".to_string());
+        let expected = cast(&array, &DataType::Int64).unwrap();
+
+        let actual = cast(&cast(&array, &DataType::Float32).unwrap(), &DataType::Int64).unwrap();
+        assert_eq!(&actual, &expected);
+
+        let actual = cast(&cast(&array, &DataType::Float64).unwrap(), &DataType::Int64).unwrap();
+        assert_eq!(&actual, &expected);
+    }
+
+    #[test]
+    fn test_cast_decimal_to_timestamp() {
+        let array = Int64Array::from(vec![Some(2), Some(10), None]);
+        let expected = cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
+
+        let array = Decimal128Array::from(vec![Some(200), Some(1000), None])
+            .with_precision_and_scale(4, 2)
+            .unwrap();
+        let actual = cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
+
+        assert_eq!(&actual, &expected);
+
+        let array = Decimal256Array::from(vec![
+            Some(i256::from_i128(2000)),
+            Some(i256::from_i128(10000)),
+            None,
+        ])
+        .with_precision_and_scale(5, 3)
+        .unwrap();
+        let actual = cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
+
+        assert_eq!(&actual, &expected);
+    }
+
+    #[test]
+    fn test_cast_timestamp_to_decimal() {
+        let array = TimestampMillisecondArray::from(vec![Some(5), Some(1), None])
+            .with_timezone("UTC".to_string());
+        let expected = cast(&array, &DataType::Int64).unwrap();
+
+        let actual = cast(
+            &cast(&array, &DataType::Decimal128(5, 2)).unwrap(),
+            &DataType::Int64,
+        )
+        .unwrap();
+        assert_eq!(&actual, &expected);
+
+        let actual = cast(
+            &cast(&array, &DataType::Decimal256(10, 5)).unwrap(),
+            &DataType::Int64,
+        )
+        .unwrap();
+        assert_eq!(&actual, &expected);
     }
 
     #[test]
     fn test_cast_list_i32_to_list_u16() {
-        let value_data =
-            Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 100000000]).into_data();
+        let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 100000000]).into_data();
 
         let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
 
         // Construct a list array from the above two
         // [[0,0,0], [-1, -2, -1], [2, 100000000]]
-        let list_data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
             .add_buffer(value_offsets)
@@ -4783,19 +4786,14 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(
-        expected = "Casting from Int32 to Timestamp(Microsecond, None) not supported"
-    )]
     fn test_cast_list_i32_to_list_timestamp() {
         // Construct a value array
-        let value_data =
-            Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 8, 100000000]).into_data();
+        let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 8, 100000000]).into_data();
 
         let value_offsets = Buffer::from_slice_ref([0, 3, 6, 9]);
 
         // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
             .add_buffer(value_offsets)
@@ -4804,7 +4802,7 @@ mod tests {
             .unwrap();
         let list_array = Arc::new(ListArray::from(list_data)) as ArrayRef;
 
-        cast(
+        let actual = cast(
             &list_array,
             &DataType::List(Arc::new(Field::new(
                 "item",
@@ -4813,6 +4811,22 @@ mod tests {
             ))),
         )
         .unwrap();
+
+        let expected = cast(
+            &cast(
+                &list_array,
+                &DataType::List(Arc::new(Field::new("item", DataType::Int64, true))),
+            )
+            .unwrap(),
+            &DataType::List(Arc::new(Field::new(
+                "item",
+                DataType::Timestamp(TimeUnit::Microsecond, None),
+                true,
+            ))),
+        )
+        .unwrap();
+
+        assert_eq!(&actual, &expected);
     }
 
     #[test]
@@ -4940,10 +4954,37 @@ mod tests {
                 format_options: FormatOptions::default(),
             };
             let err = cast_with_options(array, &to_type, &options).unwrap_err();
-            assert_eq!(err.to_string(), "Cast error: Cannot cast string 'Not a valid date' to value of Date32 type");
+            assert_eq!(
+                err.to_string(),
+                "Cast error: Cannot cast string 'Not a valid date' to value of Date32 type"
+            );
         }
     }
 
+    #[test]
+    fn test_cast_string_format_yyyymmdd_to_date32() {
+        let a = Arc::new(StringArray::from(vec![
+            Some("2020-12-25"),
+            Some("20201117"),
+        ])) as ArrayRef;
+
+        let to_type = DataType::Date32;
+        let options = CastOptions {
+            safe: false,
+            format_options: FormatOptions::default(),
+        };
+        let result = cast_with_options(&a, &to_type, &options).unwrap();
+        let c = result.as_primitive::<Date32Type>();
+        assert_eq!(
+            chrono::NaiveDate::from_ymd_opt(2020, 12, 25),
+            c.value_as_date(0)
+        );
+        assert_eq!(
+            chrono::NaiveDate::from_ymd_opt(2020, 11, 17),
+            c.value_as_date(1)
+        );
+    }
+
     #[test]
     fn test_cast_string_to_time32second() {
         let a1 = Arc::new(StringArray::from(vec![
@@ -5097,14 +5138,16 @@ mod tests {
                 format_options: FormatOptions::default(),
             };
             let err = cast_with_options(array, &to_type, &options).unwrap_err();
-            assert_eq!(err.to_string(), "Cast error: Cannot cast string 'Not a valid date' to value of Date64 type");
+            assert_eq!(
+                err.to_string(),
+                "Cast error: Cannot cast string 'Not a valid date' to value of Date64 type"
+            );
         }
     }
 
     macro_rules! test_safe_string_to_interval {
         ($data_vec:expr, $interval_unit:expr, $array_ty:ty, $expect_vec:expr) => {
-            let source_string_array =
-                Arc::new(StringArray::from($data_vec.clone())) as ArrayRef;
+            let source_string_array = Arc::new(StringArray::from($data_vec.clone())) as ArrayRef;
 
             let options = CastOptions {
                 safe: true,
@@ -5358,23 +5401,61 @@ mod tests {
     }
 
     #[test]
-    fn test_fixed_size_binary_to_binary() {
-        let bytes_1 = "Hiiii".as_bytes();
-        let bytes_2 = "Hello".as_bytes();
-
-        let binary_data = vec![Some(bytes_1), Some(bytes_2), None];
-        let a1 = Arc::new(FixedSizeBinaryArray::from(binary_data.clone())) as ArrayRef;
+    fn test_fixed_size_binary_to_binary() {
+        let bytes_1 = "Hiiii".as_bytes();
+        let bytes_2 = "Hello".as_bytes();
+
+        let binary_data = vec![Some(bytes_1), Some(bytes_2), None];
+        let a1 = Arc::new(FixedSizeBinaryArray::from(binary_data.clone())) as ArrayRef;
+
+        let array_ref = cast(&a1, &DataType::Binary).unwrap();
+        let down_cast = array_ref.as_binary::<i32>();
+        assert_eq!(bytes_1, down_cast.value(0));
+        assert_eq!(bytes_2, down_cast.value(1));
+        assert!(down_cast.is_null(2));
+
+        let array_ref = cast(&a1, &DataType::LargeBinary).unwrap();
+        let down_cast = array_ref.as_binary::<i64>();
+        assert_eq!(bytes_1, down_cast.value(0));
+        assert_eq!(bytes_2, down_cast.value(1));
+        assert!(down_cast.is_null(2));
+    }
+
+    #[test]
+    fn test_numeric_to_binary() {
+        let a = Int16Array::from(vec![Some(1), Some(511), None]);
+
+        let array_ref = cast(&a, &DataType::Binary).unwrap();
+        let down_cast = array_ref.as_binary::<i32>();
+        assert_eq!(&1_i16.to_le_bytes(), down_cast.value(0));
+        assert_eq!(&511_i16.to_le_bytes(), down_cast.value(1));
+        assert!(down_cast.is_null(2));
+
+        let a = Int64Array::from(vec![Some(-1), Some(123456789), None]);
+
+        let array_ref = cast(&a, &DataType::Binary).unwrap();
+        let down_cast = array_ref.as_binary::<i32>();
+        assert_eq!(&(-1_i64).to_le_bytes(), down_cast.value(0));
+        assert_eq!(&123456789_i64.to_le_bytes(), down_cast.value(1));
+        assert!(down_cast.is_null(2));
+    }
+
+    #[test]
+    fn test_numeric_to_large_binary() {
+        let a = Int16Array::from(vec![Some(1), Some(511), None]);
 
-        let array_ref = cast(&a1, &DataType::Binary).unwrap();
-        let down_cast = array_ref.as_binary::<i32>();
-        assert_eq!(bytes_1, down_cast.value(0));
-        assert_eq!(bytes_2, down_cast.value(1));
+        let array_ref = cast(&a, &DataType::LargeBinary).unwrap();
+        let down_cast = array_ref.as_binary::<i64>();
+        assert_eq!(&1_i16.to_le_bytes(), down_cast.value(0));
+        assert_eq!(&511_i16.to_le_bytes(), down_cast.value(1));
         assert!(down_cast.is_null(2));
 
-        let array_ref = cast(&a1, &DataType::LargeBinary).unwrap();
+        let a = Int64Array::from(vec![Some(-1), Some(123456789), None]);
+
+        let array_ref = cast(&a, &DataType::LargeBinary).unwrap();
         let down_cast = array_ref.as_binary::<i64>();
-        assert_eq!(bytes_1, down_cast.value(0));
-        assert_eq!(bytes_2, down_cast.value(1));
+        assert_eq!(&(-1_i64).to_le_bytes(), down_cast.value(0));
+        assert_eq!(&123456789_i64.to_le_bytes(), down_cast.value(1));
         assert!(down_cast.is_null(2));
     }
 
@@ -5398,12 +5479,9 @@ mod tests {
 
     #[test]
     fn test_cast_timestamp_to_date32() {
-        let array = TimestampMillisecondArray::from(vec![
-            Some(864000000005),
-            Some(1545696000001),
-            None,
-        ])
-        .with_timezone("UTC".to_string());
+        let array =
+            TimestampMillisecondArray::from(vec![Some(864000000005), Some(1545696000001), None])
+                .with_timezone("UTC".to_string());
         let b = cast(&array, &DataType::Date32).unwrap();
         let c = b.as_primitive::<Date32Type>();
         assert_eq!(10000, c.value(0));
@@ -5413,19 +5491,15 @@ mod tests {
 
     #[test]
     fn test_cast_timestamp_to_date64() {
-        let array = TimestampMillisecondArray::from(vec![
-            Some(864000000005),
-            Some(1545696000001),
-            None,
-        ]);
+        let array =
+            TimestampMillisecondArray::from(vec![Some(864000000005), Some(1545696000001), None]);
         let b = cast(&array, &DataType::Date64).unwrap();
         let c = b.as_primitive::<Date64Type>();
         assert_eq!(864000000005, c.value(0));
         assert_eq!(1545696000001, c.value(1));
         assert!(c.is_null(2));
 
-        let array =
-            TimestampSecondArray::from(vec![Some(864000000005), Some(1545696000001)]);
+        let array = TimestampSecondArray::from(vec![Some(864000000005), Some(1545696000001)]);
         let b = cast(&array, &DataType::Date64).unwrap();
         let c = b.as_primitive::<Date64Type>();
         assert_eq!(864000000005000, c.value(0));
@@ -5477,9 +5551,8 @@ mod tests {
         assert!(c.is_null(2));
 
         // test timestamp microseconds
-        let a =
-            TimestampMicrosecondArray::from(vec![Some(86405000000), Some(1000000), None])
-                .with_timezone("+01:00".to_string());
+        let a = TimestampMicrosecondArray::from(vec![Some(86405000000), Some(1000000), None])
+            .with_timezone("+01:00".to_string());
         let array = Arc::new(a) as ArrayRef;
         let b = cast(&array, &DataType::Time64(TimeUnit::Microsecond)).unwrap();
         let c = b.as_primitive::<Time64MicrosecondType>();
@@ -5493,12 +5566,8 @@ mod tests {
         assert!(c.is_null(2));
 
         // test timestamp nanoseconds
-        let a = TimestampNanosecondArray::from(vec![
-            Some(86405000000000),
-            Some(1000000000),
-            None,
-        ])
-        .with_timezone("+01:00".to_string());
+        let a = TimestampNanosecondArray::from(vec![Some(86405000000000), Some(1000000000), None])
+            .with_timezone("+01:00".to_string());
         let array = Arc::new(a) as ArrayRef;
         let b = cast(&array, &DataType::Time64(TimeUnit::Microsecond)).unwrap();
         let c = b.as_primitive::<Time64MicrosecondType>();
@@ -5512,8 +5581,8 @@ mod tests {
         assert!(c.is_null(2));
 
         // test overflow
-        let a = TimestampSecondArray::from(vec![Some(i64::MAX)])
-            .with_timezone("+01:00".to_string());
+        let a =
+            TimestampSecondArray::from(vec![Some(i64::MAX)]).with_timezone("+01:00".to_string());
         let array = Arc::new(a) as ArrayRef;
         let b = cast(&array, &DataType::Time64(TimeUnit::Microsecond));
         assert!(b.is_err());
@@ -5556,9 +5625,8 @@ mod tests {
         assert!(c.is_null(2));
 
         // test timestamp microseconds
-        let a =
-            TimestampMicrosecondArray::from(vec![Some(86405000000), Some(1000000), None])
-                .with_timezone("+01:00".to_string());
+        let a = TimestampMicrosecondArray::from(vec![Some(86405000000), Some(1000000), None])
+            .with_timezone("+01:00".to_string());
         let array = Arc::new(a) as ArrayRef;
         let b = cast(&array, &DataType::Time32(TimeUnit::Second)).unwrap();
         let c = b.as_primitive::<Time32SecondType>();
@@ -5572,12 +5640,8 @@ mod tests {
         assert!(c.is_null(2));
 
         // test timestamp nanoseconds
-        let a = TimestampNanosecondArray::from(vec![
-            Some(86405000000000),
-            Some(1000000000),
-            None,
-        ])
-        .with_timezone("+01:00".to_string());
+        let a = TimestampNanosecondArray::from(vec![Some(86405000000000), Some(1000000000), None])
+            .with_timezone("+01:00".to_string());
         let array = Arc::new(a) as ArrayRef;
         let b = cast(&array, &DataType::Time32(TimeUnit::Second)).unwrap();
         let c = b.as_primitive::<Time32SecondType>();
@@ -5591,8 +5655,8 @@ mod tests {
         assert!(c.is_null(2));
 
         // test overflow
-        let a = TimestampSecondArray::from(vec![Some(i64::MAX)])
-            .with_timezone("+01:00".to_string());
+        let a =
+            TimestampSecondArray::from(vec![Some(i64::MAX)]).with_timezone("+01:00".to_string());
         let array = Arc::new(a) as ArrayRef;
         let b = cast(&array, &DataType::Time32(TimeUnit::Second));
         assert!(b.is_err());
@@ -5679,8 +5743,7 @@ mod tests {
 
     #[test]
     fn test_cast_date64_to_timestamp() {
-        let array =
-            Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
+        let array = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
         let b = cast(&array, &DataType::Timestamp(TimeUnit::Second, None)).unwrap();
         let c = b.as_primitive::<TimestampSecondType>();
         assert_eq!(864000000, c.value(0));
@@ -5690,8 +5753,7 @@ mod tests {
 
     #[test]
     fn test_cast_date64_to_timestamp_ms() {
-        let array =
-            Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
+        let array = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
         let b = cast(&array, &DataType::Timestamp(TimeUnit::Millisecond, None)).unwrap();
         let c = b
             .as_any()
@@ -5704,8 +5766,7 @@ mod tests {
 
     #[test]
     fn test_cast_date64_to_timestamp_us() {
-        let array =
-            Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
+        let array = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
         let b = cast(&array, &DataType::Timestamp(TimeUnit::Microsecond, None)).unwrap();
         let c = b
             .as_any()
@@ -5718,8 +5779,7 @@ mod tests {
 
     #[test]
     fn test_cast_date64_to_timestamp_ns() {
-        let array =
-            Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
+        let array = Date64Array::from(vec![Some(864000000005), Some(1545696000001), None]);
         let b = cast(&array, &DataType::Timestamp(TimeUnit::Nanosecond, None)).unwrap();
         let c = b
             .as_any()
@@ -5732,12 +5792,9 @@ mod tests {
 
     #[test]
     fn test_cast_timestamp_to_i64() {
-        let array = TimestampMillisecondArray::from(vec![
-            Some(864000000005),
-            Some(1545696000001),
-            None,
-        ])
-        .with_timezone("UTC".to_string());
+        let array =
+            TimestampMillisecondArray::from(vec![Some(864000000005), Some(1545696000001), None])
+                .with_timezone("UTC".to_string());
         let b = cast(&array, &DataType::Int64).unwrap();
         let c = b.as_primitive::<Int64Type>();
         assert_eq!(&DataType::Int64, c.data_type());
@@ -5769,11 +5826,8 @@ mod tests {
     #[test]
     fn test_cast_timestamp_to_strings() {
         // "2018-12-25T00:00:02.001", "1997-05-19T00:00:03.005", None
-        let array = TimestampMillisecondArray::from(vec![
-            Some(864000003005),
-            Some(1545696002001),
-            None,
-        ]);
+        let array =
+            TimestampMillisecondArray::from(vec![Some(864000003005), Some(1545696002001), None]);
         let out = cast(&array, &DataType::Utf8).unwrap();
         let out = out
             .as_any()
@@ -5817,13 +5871,9 @@ mod tests {
                 .with_timestamp_tz_format(Some(ts_format)),
         };
         // "2018-12-25T00:00:02.001", "1997-05-19T00:00:03.005", None
-        let array_without_tz = TimestampMillisecondArray::from(vec![
-            Some(864000003005),
-            Some(1545696002001),
-            None,
-        ]);
-        let out =
-            cast_with_options(&array_without_tz, &DataType::Utf8, &cast_options).unwrap();
+        let array_without_tz =
+            TimestampMillisecondArray::from(vec![Some(864000003005), Some(1545696002001), None]);
+        let out = cast_with_options(&array_without_tz, &DataType::Utf8, &cast_options).unwrap();
         let out = out
             .as_any()
             .downcast_ref::<StringArray>()
@@ -5839,8 +5889,7 @@ mod tests {
             ]
         );
         let out =
-            cast_with_options(&array_without_tz, &DataType::LargeUtf8, &cast_options)
-                .unwrap();
+            cast_with_options(&array_without_tz, &DataType::LargeUtf8, &cast_options).unwrap();
         let out = out
             .as_any()
             .downcast_ref::<LargeStringArray>()
@@ -5856,14 +5905,10 @@ mod tests {
             ]
         );
 
-        let array_with_tz = TimestampMillisecondArray::from(vec![
-            Some(864000003005),
-            Some(1545696002001),
-            None,
-        ])
-        .with_timezone(tz.to_string());
-        let out =
-            cast_with_options(&array_with_tz, &DataType::Utf8, &cast_options).unwrap();
+        let array_with_tz =
+            TimestampMillisecondArray::from(vec![Some(864000003005), Some(1545696002001), None])
+                .with_timezone(tz.to_string());
+        let out = cast_with_options(&array_with_tz, &DataType::Utf8, &cast_options).unwrap();
         let out = out
             .as_any()
             .downcast_ref::<StringArray>()
@@ -5878,8 +5923,7 @@ mod tests {
                 None
             ]
         );
-        let out = cast_with_options(&array_with_tz, &DataType::LargeUtf8, &cast_options)
-            .unwrap();
+        let out = cast_with_options(&array_with_tz, &DataType::LargeUtf8, &cast_options).unwrap();
         let out = out
             .as_any()
             .downcast_ref::<LargeStringArray>()
@@ -5898,11 +5942,8 @@ mod tests {
 
     #[test]
     fn test_cast_between_timestamps() {
-        let array = TimestampMillisecondArray::from(vec![
-            Some(864000003005),
-            Some(1545696002001),
-            None,
-        ]);
+        let array =
+            TimestampMillisecondArray::from(vec![Some(864000003005), Some(1545696002001), None]);
         let b = cast(&array, &DataType::Timestamp(TimeUnit::Second, None)).unwrap();
         let c = b.as_primitive::<TimestampSecondType>();
         assert_eq!(864000003, c.value(0));
@@ -6306,8 +6347,7 @@ mod tests {
         ];
         let u64_array: ArrayRef = Arc::new(UInt64Array::from(u64_values));
 
-        let f64_expected =
-            vec![0.0, 255.0, 65535.0, 4294967295.0, 18446744073709552000.0];
+        let f64_expected = vec![0.0, 255.0, 65535.0, 4294967295.0, 18446744073709552000.0];
         assert_eq!(
             f64_expected,
             get_cast_values::<Float64Type>(&u64_array, &DataType::Float64)
@@ -6316,8 +6356,7 @@ mod tests {
                 .collect::<Vec<f64>>()
         );
 
-        let f32_expected =
-            vec![0.0, 255.0, 65535.0, 4294967300.0, 18446744000000000000.0];
+        let f32_expected = vec![0.0, 255.0, 65535.0, 4294967300.0, 18446744000000000000.0];
         assert_eq!(
             f32_expected,
             get_cast_values::<Float32Type>(&u64_array, &DataType::Float32)
@@ -6350,8 +6389,7 @@ mod tests {
             get_cast_values::<Int8Type>(&u64_array, &DataType::Int8)
         );
 
-        let u64_expected =
-            vec!["0", "255", "65535", "4294967295", "18446744073709551615"];
+        let u64_expected = vec!["0", "255", "65535", "4294967295", "18446744073709551615"];
         assert_eq!(
             u64_expected,
             get_cast_values::<UInt64Type>(&u64_array, &DataType::UInt64)
@@ -6782,15 +6820,13 @@ mod tests {
             get_cast_values::<Int8Type>(&i32_array, &DataType::Int8)
         );
 
-        let u64_expected =
-            vec!["null", "null", "null", "0", "127", "32767", "2147483647"];
+        let u64_expected = vec!["null", "null", "null", "0", "127", "32767", "2147483647"];
         assert_eq!(
             u64_expected,
             get_cast_values::<UInt64Type>(&i32_array, &DataType::UInt64)
         );
 
-        let u32_expected =
-            vec!["null", "null", "null", "0", "127", "32767", "2147483647"];
+        let u32_expected = vec!["null", "null", "null", "0", "127", "32767", "2147483647"];
         assert_eq!(
             u32_expected,
             get_cast_values::<UInt32Type>(&i32_array, &DataType::UInt32)
@@ -6826,8 +6862,7 @@ mod tests {
 
     #[test]
     fn test_cast_from_int16() {
-        let i16_values: Vec<i16> =
-            vec![i16::MIN, i8::MIN as i16, 0, i8::MAX as i16, i16::MAX];
+        let i16_values: Vec<i16> = vec![i16::MIN, i8::MIN as i16, 0, i8::MAX as i16, i16::MAX];
         let i16_array: ArrayRef = Arc::new(Int16Array::from(i16_values));
 
         let f64_expected = vec!["-32768.0", "-128.0", "0.0", "127.0", "32767.0"];
@@ -7168,8 +7203,7 @@ mod tests {
     fn test_cast_string_array_to_dict() {
         use DataType::*;
 
-        let array = Arc::new(StringArray::from(vec![Some("one"), None, Some("three")]))
-            as ArrayRef;
+        let array = Arc::new(StringArray::from(vec![Some("one"), None, Some("three")])) as ArrayRef;
 
         let expected = vec!["one", "null", "three"];
 
@@ -7268,16 +7302,12 @@ mod tests {
         cast_from_null_to_other(&data_type);
 
         // Cast null from and to list
-        let data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
         cast_from_null_to_other(&data_type);
-        let data_type =
-            DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true)));
+        let data_type = DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true)));
         cast_from_null_to_other(&data_type);
-        let data_type = DataType::FixedSizeList(
-            Arc::new(Field::new("item", DataType::Int32, true)),
-            4,
-        );
+        let data_type =
+            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 4);
         cast_from_null_to_other(&data_type);
 
         // Cast null from and to dictionary
@@ -7288,8 +7318,7 @@ mod tests {
         cast_from_null_to_other(&data_type);
 
         // Cast null from and to struct
-        let data_type =
-            DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
+        let data_type = DataType::Struct(vec![Field::new("data", DataType::Int64, false)].into());
         cast_from_null_to_other(&data_type);
     }
 
@@ -7469,6 +7498,183 @@ mod tests {
         assert_eq!(&expected.value(2), &actual.value(2));
     }
 
+    #[test]
+    fn test_cast_list_to_fsl() {
+        // There four noteworthy cases we should handle:
+        // 1. No nulls
+        // 2. Nulls that are always empty
+        // 3. Nulls that have varying lengths
+        // 4. Nulls that are correctly sized (same as target list size)
+
+        // Non-null case
+        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let values = vec![
+            Some(vec![Some(1), Some(2), Some(3)]),
+            Some(vec![Some(4), Some(5), Some(6)]),
+        ];
+        let array = Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(
+            values.clone(),
+        )) as ArrayRef;
+        let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
+            values, 3,
+        )) as ArrayRef;
+        let actual = cast(array.as_ref(), &DataType::FixedSizeList(field.clone(), 3)).unwrap();
+        assert_eq!(expected.as_ref(), actual.as_ref());
+
+        // Null cases
+        // Array is [[1, 2, 3], null, [4, 5, 6], null]
+        let cases = [
+            (
+                // Zero-length nulls
+                vec![1, 2, 3, 4, 5, 6],
+                vec![3, 0, 3, 0],
+            ),
+            (
+                // Varying-length nulls
+                vec![1, 2, 3, 0, 0, 4, 5, 6, 0],
+                vec![3, 2, 3, 1],
+            ),
+            (
+                // Correctly-sized nulls
+                vec![1, 2, 3, 0, 0, 0, 4, 5, 6, 0, 0, 0],
+                vec![3, 3, 3, 3],
+            ),
+            (
+                // Mixed nulls
+                vec![1, 2, 3, 4, 5, 6, 0, 0, 0],
+                vec![3, 0, 3, 3],
+            ),
+        ];
+        let null_buffer = NullBuffer::from(vec![true, false, true, false]);
+
+        let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
+            vec![
+                Some(vec![Some(1), Some(2), Some(3)]),
+                None,
+                Some(vec![Some(4), Some(5), Some(6)]),
+                None,
+            ],
+            3,
+        )) as ArrayRef;
+
+        for (values, lengths) in cases.iter() {
+            let array = Arc::new(ListArray::new(
+                field.clone(),
+                OffsetBuffer::from_lengths(lengths.clone()),
+                Arc::new(Int32Array::from(values.clone())),
+                Some(null_buffer.clone()),
+            )) as ArrayRef;
+            let actual = cast(array.as_ref(), &DataType::FixedSizeList(field.clone(), 3)).unwrap();
+            assert_eq!(expected.as_ref(), actual.as_ref());
+        }
+    }
+
+    #[test]
+    fn test_cast_list_to_fsl_safety() {
+        let values = vec![
+            Some(vec![Some(1), Some(2), Some(3)]),
+            Some(vec![Some(4), Some(5)]),
+            Some(vec![Some(6), Some(7), Some(8), Some(9)]),
+            Some(vec![Some(3), Some(4), Some(5)]),
+        ];
+        let array = Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(
+            values.clone(),
+        )) as ArrayRef;
+
+        let res = cast_with_options(
+            array.as_ref(),
+            &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 3),
+            &CastOptions {
+                safe: false,
+                ..Default::default()
+            },
+        );
+        assert!(res.is_err());
+        assert!(format!("{:?}", res)
+            .contains("Cannot cast to FixedSizeList(3): value at index 1 has length 2"));
+
+        // When safe=true (default), the cast will fill nulls for lists that are
+        // too short and truncate lists that are too long.
+        let res = cast(
+            array.as_ref(),
+            &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 3),
+        )
+        .unwrap();
+        let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
+            vec![
+                Some(vec![Some(1), Some(2), Some(3)]),
+                None, // Too short -> replaced with null
+                None, // Too long -> replaced with null
+                Some(vec![Some(3), Some(4), Some(5)]),
+            ],
+            3,
+        )) as ArrayRef;
+        assert_eq!(expected.as_ref(), res.as_ref());
+    }
+
+    #[test]
+    fn test_cast_large_list_to_fsl() {
+        let values = vec![Some(vec![Some(1), Some(2)]), Some(vec![Some(3), Some(4)])];
+        let array = Arc::new(LargeListArray::from_iter_primitive::<Int32Type, _, _>(
+            values.clone(),
+        )) as ArrayRef;
+        let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(
+            values, 2,
+        )) as ArrayRef;
+        let actual = cast(
+            array.as_ref(),
+            &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 2),
+        )
+        .unwrap();
+        assert_eq!(expected.as_ref(), actual.as_ref());
+    }
+
+    #[test]
+    fn test_cast_list_to_fsl_subcast() {
+        let array = Arc::new(LargeListArray::from_iter_primitive::<Int32Type, _, _>(
+            vec![
+                Some(vec![Some(1), Some(2)]),
+                Some(vec![Some(3), Some(i32::MAX)]),
+            ],
+        )) as ArrayRef;
+        let expected = Arc::new(FixedSizeListArray::from_iter_primitive::<Int64Type, _, _>(
+            vec![
+                Some(vec![Some(1), Some(2)]),
+                Some(vec![Some(3), Some(i32::MAX as i64)]),
+            ],
+            2,
+        )) as ArrayRef;
+        let actual = cast(
+            array.as_ref(),
+            &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int64, true)), 2),
+        )
+        .unwrap();
+        assert_eq!(expected.as_ref(), actual.as_ref());
+
+        let res = cast_with_options(
+            array.as_ref(),
+            &DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int16, true)), 2),
+            &CastOptions {
+                safe: false,
+                ..Default::default()
+            },
+        );
+        assert!(res.is_err());
+        assert!(format!("{:?}", res).contains("Can't cast value 2147483647 to type Int16"));
+    }
+
+    #[test]
+    fn test_cast_list_to_fsl_empty() {
+        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let array = new_empty_array(&DataType::List(field.clone()));
+
+        let target_type = DataType::FixedSizeList(field.clone(), 3);
+        let expected = new_empty_array(&target_type);
+
+        let actual = cast(array.as_ref(), &target_type).unwrap();
+        assert_eq!(expected.as_ref(), actual.as_ref());
+    }
+
     fn make_list_array() -> ListArray {
         // Construct a value array
         let value_data = ArrayData::builder(DataType::Int32)
@@ -7482,8 +7688,7 @@ mod tests {
         let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
 
         // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
             .add_buffer(value_offsets)
@@ -7525,10 +7730,8 @@ mod tests {
             .build()
             .unwrap();
 
-        let list_data_type = DataType::FixedSizeList(
-            Arc::new(Field::new("item", DataType::Int32, true)),
-            4,
-        );
+        let list_data_type =
+            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 4);
         let list_data = ArrayData::builder(list_data_type)
             .len(2)
             .add_child_data(value_data)
@@ -7545,10 +7748,8 @@ mod tests {
             .build()
             .unwrap();
 
-        let list_data_type = DataType::FixedSizeList(
-            Arc::new(Field::new("item", DataType::Int64, true)),
-            4,
-        );
+        let list_data_type =
+            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int64, true)), 4);
         let list_data = ArrayData::builder(list_data_type)
             .len(2)
             .add_child_data(value_data)
@@ -7589,8 +7790,7 @@ mod tests {
         let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
         let value_data = str_array.into_data();
 
-        let list_data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Utf8, true)));
+        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Utf8, true)));
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
             .add_buffer(value_offsets)
@@ -7739,6 +7939,68 @@ mod tests {
         assert!(casted_array.is_err());
     }
 
+    #[test]
+    fn test_cast_floating_point_to_decimal128_precision_overflow() {
+        let array = Float64Array::from(vec![1.1]);
+        let array = Arc::new(array) as ArrayRef;
+        let casted_array = cast_with_options(
+            &array,
+            &DataType::Decimal128(2, 2),
+            &CastOptions {
+                safe: true,
+                format_options: FormatOptions::default(),
+            },
+        );
+        assert!(casted_array.is_ok());
+        assert!(casted_array.unwrap().is_null(0));
+
+        let casted_array = cast_with_options(
+            &array,
+            &DataType::Decimal128(2, 2),
+            &CastOptions {
+                safe: false,
+                format_options: FormatOptions::default(),
+            },
+        );
+        let err = casted_array.unwrap_err().to_string();
+        let expected_error = "Invalid argument error: 110 is too large to store in a Decimal128 of precision 2. Max is 99";
+        assert!(
+            err.contains(expected_error),
+            "did not find expected error '{expected_error}' in actual error '{err}'"
+        );
+    }
+
+    #[test]
+    fn test_cast_floating_point_to_decimal256_precision_overflow() {
+        let array = Float64Array::from(vec![1.1]);
+        let array = Arc::new(array) as ArrayRef;
+        let casted_array = cast_with_options(
+            &array,
+            &DataType::Decimal256(2, 2),
+            &CastOptions {
+                safe: true,
+                format_options: FormatOptions::default(),
+            },
+        );
+        assert!(casted_array.is_ok());
+        assert!(casted_array.unwrap().is_null(0));
+
+        let casted_array = cast_with_options(
+            &array,
+            &DataType::Decimal256(2, 2),
+            &CastOptions {
+                safe: false,
+                format_options: FormatOptions::default(),
+            },
+        );
+        let err = casted_array.unwrap_err().to_string();
+        let expected_error = "Invalid argument error: 110 is too large to store in a Decimal256 of precision 2. Max is 99";
+        assert!(
+            err.contains(expected_error),
+            "did not find expected error '{expected_error}' in actual error '{err}'"
+        );
+    }
+
     #[test]
     fn test_cast_floating_point_to_decimal128_overflow() {
         let array = Float64Array::from(vec![f64::MAX]);
@@ -7867,12 +8129,7 @@ mod tests {
         let array = vec![Some(123)];
         let input_decimal_array = create_decimal_array(array, 10, -1).unwrap();
         let array = Arc::new(input_decimal_array) as ArrayRef;
-        generate_cast_test_case!(
-            &array,
-            Decimal128Array,
-            &output_type,
-            vec![Some(12_i128),]
-        );
+        generate_cast_test_case!(&array, Decimal128Array, &output_type, vec![Some(12_i128),]);
 
         let casted_array = cast(&array, &output_type).unwrap();
         let decimal_arr = casted_array.as_primitive::<Decimal128Type>();
@@ -7882,12 +8139,7 @@ mod tests {
         let array = vec![Some(125)];
         let input_decimal_array = create_decimal_array(array, 10, -1).unwrap();
         let array = Arc::new(input_decimal_array) as ArrayRef;
-        generate_cast_test_case!(
-            &array,
-            Decimal128Array,
-            &output_type,
-            vec![Some(13_i128),]
-        );
+        generate_cast_test_case!(&array, Decimal128Array, &output_type, vec![Some(13_i128),]);
 
         let casted_array = cast(&array, &output_type).unwrap();
         let decimal_arr = casted_array.as_primitive::<Decimal128Type>();
@@ -8030,6 +8282,21 @@ mod tests {
         assert_eq!("0.00", decimal_arr.value_as_string(10));
         assert_eq!("0.00", decimal_arr.value_as_string(11));
         assert!(decimal_arr.is_null(12));
+        assert_eq!("-1.23", decimal_arr.value_as_string(13));
+        assert_eq!("-1.24", decimal_arr.value_as_string(14));
+        assert_eq!("0.00", decimal_arr.value_as_string(15));
+        assert_eq!("-123.00", decimal_arr.value_as_string(16));
+        assert_eq!("-123.23", decimal_arr.value_as_string(17));
+        assert_eq!("-0.12", decimal_arr.value_as_string(18));
+        assert_eq!("1.23", decimal_arr.value_as_string(19));
+        assert_eq!("1.24", decimal_arr.value_as_string(20));
+        assert_eq!("0.00", decimal_arr.value_as_string(21));
+        assert_eq!("123.00", decimal_arr.value_as_string(22));
+        assert_eq!("123.23", decimal_arr.value_as_string(23));
+        assert_eq!("0.12", decimal_arr.value_as_string(24));
+        assert!(decimal_arr.is_null(25));
+        assert!(decimal_arr.is_null(26));
+        assert!(decimal_arr.is_null(27));
 
         // Decimal256
         let output_type = DataType::Decimal256(76, 3);
@@ -8051,6 +8318,21 @@ mod tests {
         assert_eq!("0.000", decimal_arr.value_as_string(10));
         assert_eq!("0.000", decimal_arr.value_as_string(11));
         assert!(decimal_arr.is_null(12));
+        assert_eq!("-1.235", decimal_arr.value_as_string(13));
+        assert_eq!("-1.236", decimal_arr.value_as_string(14));
+        assert_eq!("0.000", decimal_arr.value_as_string(15));
+        assert_eq!("-123.000", decimal_arr.value_as_string(16));
+        assert_eq!("-123.234", decimal_arr.value_as_string(17));
+        assert_eq!("-0.123", decimal_arr.value_as_string(18));
+        assert_eq!("1.235", decimal_arr.value_as_string(19));
+        assert_eq!("1.236", decimal_arr.value_as_string(20));
+        assert_eq!("0.000", decimal_arr.value_as_string(21));
+        assert_eq!("123.000", decimal_arr.value_as_string(22));
+        assert_eq!("123.234", decimal_arr.value_as_string(23));
+        assert_eq!("0.123", decimal_arr.value_as_string(24));
+        assert!(decimal_arr.is_null(25));
+        assert!(decimal_arr.is_null(26));
+        assert!(decimal_arr.is_null(27));
     }
 
     #[test]
@@ -8069,6 +8351,21 @@ mod tests {
             Some(""),
             Some(" "),
             None,
+            Some("-1.23499999"),
+            Some("-1.23599999"),
+            Some("-0.00001"),
+            Some("-123"),
+            Some("-123.234000"),
+            Some("-000.123"),
+            Some("+1.23499999"),
+            Some("+1.23599999"),
+            Some("+0.00001"),
+            Some("+123"),
+            Some("+123.234000"),
+            Some("+000.123"),
+            Some("1.-23499999"),
+            Some("-1.-23499999"),
+            Some("--1.23499999"),
         ]);
         let array = Arc::new(str_array) as ArrayRef;
 
@@ -8091,6 +8388,21 @@ mod tests {
             Some(""),
             Some(" "),
             None,
+            Some("-1.23499999"),
+            Some("-1.23599999"),
+            Some("-0.00001"),
+            Some("-123"),
+            Some("-123.234000"),
+            Some("-000.123"),
+            Some("+1.23499999"),
+            Some("+1.23599999"),
+            Some("+0.00001"),
+            Some("+123"),
+            Some("+123.234000"),
+            Some("+000.123"),
+            Some("1.-23499999"),
+            Some("-1.-23499999"),
+            Some("--1.23499999"),
         ]);
         let array = Arc::new(str_array) as ArrayRef;
 
@@ -8129,9 +8441,9 @@ mod tests {
         let str_array = StringArray::from(vec![". 0.123"]);
         let array = Arc::new(str_array) as ArrayRef;
         let casted_err = cast_with_options(&array, &output_type, &option).unwrap_err();
-        assert!(casted_err.to_string().contains(
-            "Cannot cast string '. 0.123' to value of Decimal128(38, 10) type"
-        ));
+        assert!(casted_err
+            .to_string()
+            .contains("Cannot cast string '. 0.123' to value of Decimal128(38, 10) type"));
     }
 
     fn test_cast_string_to_decimal128_overflow(overflow_array: ArrayRef) {
@@ -8152,6 +8464,32 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_cast_string_to_decimal128_precision_overflow() {
+        let array = StringArray::from(vec!["1000".to_string()]);
+        let array = Arc::new(array) as ArrayRef;
+        let casted_array = cast_with_options(
+            &array,
+            &DataType::Decimal128(10, 8),
+            &CastOptions {
+                safe: true,
+                format_options: FormatOptions::default(),
+            },
+        );
+        assert!(casted_array.is_ok());
+        assert!(casted_array.unwrap().is_null(0));
+
+        let err = cast_with_options(
+            &array,
+            &DataType::Decimal128(10, 8),
+            &CastOptions {
+                safe: false,
+                format_options: FormatOptions::default(),
+            },
+        );
+        assert_eq!("Invalid argument error: 100000000000 is too large to store in a Decimal128 of precision 10. Max is 9999999999", err.unwrap_err().to_string());
+    }
+
     #[test]
     fn test_cast_utf8_to_decimal128_overflow() {
         let overflow_str_array = StringArray::from(vec![
@@ -8209,6 +8547,32 @@ mod tests {
         assert!(decimal_arr.is_null(6));
     }
 
+    #[test]
+    fn test_cast_string_to_decimal256_precision_overflow() {
+        let array = StringArray::from(vec!["1000".to_string()]);
+        let array = Arc::new(array) as ArrayRef;
+        let casted_array = cast_with_options(
+            &array,
+            &DataType::Decimal256(10, 8),
+            &CastOptions {
+                safe: true,
+                format_options: FormatOptions::default(),
+            },
+        );
+        assert!(casted_array.is_ok());
+        assert!(casted_array.unwrap().is_null(0));
+
+        let err = cast_with_options(
+            &array,
+            &DataType::Decimal256(10, 8),
+            &CastOptions {
+                safe: false,
+                format_options: FormatOptions::default(),
+            },
+        );
+        assert_eq!("Invalid argument error: 100000000000 is too large to store in a Decimal256 of precision 10. Max is 9999999999", err.unwrap_err().to_string());
+    }
+
     #[test]
     fn test_cast_utf8_to_decimal256_overflow() {
         let overflow_str_array = StringArray::from(vec![
@@ -8356,9 +8720,8 @@ mod tests {
 
             let tz = tz.as_ref().parse().unwrap();
 
-            let as_tz = |v: i64| {
-                as_datetime_with_timezone::<TimestampNanosecondType>(v, tz).unwrap()
-            };
+            let as_tz =
+                |v: i64| as_datetime_with_timezone::<TimestampNanosecondType>(v, tz).unwrap();
 
             let as_utc = |v: &i64| as_tz(*v).naive_utc().to_string();
             let as_local = |v: &i64| as_tz(*v).naive_local().to_string();
@@ -8468,8 +8831,7 @@ mod tests {
             None,
         ];
 
-        let array256: Vec<Option<i256>> =
-            array128.iter().map(|v| v.map(i256::from_i128)).collect();
+        let array256: Vec<Option<i256>> = array128.iter().map(|v| v.map(i256::from_i128)).collect();
 
         test_decimal_to_string::<arrow_array::types::Decimal128Type, i32>(
             DataType::Utf8,
@@ -8558,11 +8920,9 @@ mod tests {
     fn test_cast_from_duration_to_interval() {
         // from duration second to interval month day nano
         let array = vec![1234567];
-        let casted_array = cast_from_duration_to_interval::<DurationSecondType>(
-            array,
-            &CastOptions::default(),
-        )
-        .unwrap();
+        let casted_array =
+            cast_from_duration_to_interval::<DurationSecondType>(array, &CastOptions::default())
+                .unwrap();
         assert_eq!(
             casted_array.data_type(),
             &DataType::Interval(IntervalUnit::MonthDayNano)
@@ -8681,10 +9041,7 @@ mod tests {
             .as_any()
             .downcast_ref::<PrimitiveArray<T>>()
             .ok_or_else(|| {
-                ArrowError::ComputeError(format!(
-                    "Failed to downcast to {}",
-                    T::DATA_TYPE
-                ))
+                ArrowError::ComputeError(format!("Failed to downcast to {}", T::DATA_TYPE))
             })
             .cloned()
     }
@@ -8722,8 +9079,7 @@ mod tests {
             cast_from_interval_to_duration(&array, &nullable).unwrap();
         assert!(!casted_array.is_valid(0));
 
-        let res =
-            cast_from_interval_to_duration::<DurationMillisecondType>(&array, &fallible);
+        let res = cast_from_interval_to_duration::<DurationMillisecondType>(&array, &fallible);
         assert!(res.is_err());
 
         // from interval month day nano to duration microsecond
@@ -8734,8 +9090,7 @@ mod tests {
 
         let array = vec![i128::MAX].into();
         let casted_array =
-            cast_from_interval_to_duration::<DurationMicrosecondType>(&array, &nullable)
-                .unwrap();
+            cast_from_interval_to_duration::<DurationMicrosecondType>(&array, &nullable).unwrap();
         assert!(!casted_array.is_valid(0));
 
         let casted_array =
@@ -8766,8 +9121,7 @@ mod tests {
         ]
         .into();
         let casted_array =
-            cast_from_interval_to_duration::<DurationNanosecondType>(&array, &nullable)
-                .unwrap();
+            cast_from_interval_to_duration::<DurationNanosecondType>(&array, &nullable).unwrap();
         assert!(!casted_array.is_valid(0));
         assert!(!casted_array.is_valid(1));
         assert!(!casted_array.is_valid(2));
@@ -8836,11 +9190,9 @@ mod tests {
     fn test_cast_from_interval_day_time_to_interval_month_day_nano() {
         // from interval day time to interval month day nano
         let array = vec![123];
-        let casted_array = cast_from_interval_day_time_to_interval_month_day_nano(
-            array,
-            &CastOptions::default(),
-        )
-        .unwrap();
+        let casted_array =
+            cast_from_interval_day_time_to_interval_month_day_nano(array, &CastOptions::default())
+                .unwrap();
         assert_eq!(
             casted_array.data_type(),
             &DataType::Interval(IntervalUnit::MonthDayNano)
@@ -8874,8 +9226,7 @@ mod tests {
             .map(|ts| ts / 1_000_000)
             .collect::<Vec<_>>();
 
-        let array =
-            TimestampMillisecondArray::from(ts_array).with_timezone("UTC".to_string());
+        let array = TimestampMillisecondArray::from(ts_array).with_timezone("UTC".to_string());
         let casted_array = cast(&array, &DataType::Date32).unwrap();
         let date_array = casted_array.as_primitive::<Date32Type>();
         let casted_array = cast(&date_array, &DataType::Utf8).unwrap();
@@ -8902,6 +9253,26 @@ mod tests {
         assert_eq!(formatted.value(1).to_string(), "[[4], [null], [6]]");
     }
 
+    #[test]
+    fn test_nested_list_cast() {
+        let mut builder = ListBuilder::new(ListBuilder::new(Int32Builder::new()));
+        builder.append_value([Some([Some(1), Some(2), None]), None]);
+        builder.append_value([None, Some([]), None]);
+        builder.append_null();
+        builder.append_value([Some([Some(2), Some(3)])]);
+        let start = builder.finish();
+
+        let mut builder = LargeListBuilder::new(LargeListBuilder::new(Int8Builder::new()));
+        builder.append_value([Some([Some(1), Some(2), None]), None]);
+        builder.append_value([None, Some([]), None]);
+        builder.append_null();
+        builder.append_value([Some([Some(2), Some(3)])]);
+        let expected = builder.finish();
+
+        let actual = cast(&start, expected.data_type()).unwrap();
+        assert_eq!(actual.as_ref(), &expected);
+    }
+
     const CAST_OPTIONS: CastOptions<'static> = CastOptions {
         safe: true,
         format_options: FormatOptions::new(),
diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs
index d15d57cf3c05..28c29c94bbdb 100644
--- a/arrow-cast/src/display.rs
+++ b/arrow-cast/src/display.rs
@@ -129,10 +129,7 @@ impl<'a> FormatOptions<'a> {
     }
 
     /// Overrides the format used for [`DataType::Timestamp`] columns with a timezone
-    pub const fn with_timestamp_tz_format(
-        self,
-        timestamp_tz_format: Option<&'a str>,
-    ) -> Self {
+    pub const fn with_timestamp_tz_format(self, timestamp_tz_format: Option<&'a str>) -> Self {
         Self {
             timestamp_tz_format,
             ..self
@@ -173,9 +170,7 @@ impl<'a> ValueFormatter<'a> {
         match self.formatter.format.write(self.idx, s) {
             Ok(_) => Ok(()),
             Err(FormatError::Arrow(e)) => Err(e),
-            Err(FormatError::Format(_)) => {
-                Err(ArrowError::CastError("Format error".to_string()))
-            }
+            Err(FormatError::Format(_)) => Err(ArrowError::CastError("Format error".to_string())),
         }
     }
 
@@ -260,10 +255,7 @@ impl<'a> ArrayFormatter<'a> {
     /// Returns an [`ArrayFormatter`] that can be used to format `array`
     ///
     /// This returns an error if an array of the given data type cannot be formatted
-    pub fn try_new(
-        array: &'a dyn Array,
-        options: &FormatOptions<'a>,
-    ) -> Result<Self, ArrowError> {
+    pub fn try_new(array: &'a dyn Array, options: &FormatOptions<'a>) -> Result<Self, ArrowError> {
         Ok(Self {
             format: make_formatter(array, options)?,
             safe: options.safe,
@@ -399,8 +391,15 @@ impl<'a> DisplayIndex for &'a BooleanArray {
     }
 }
 
-impl<'a> DisplayIndex for &'a NullArray {
-    fn write(&self, _idx: usize, _f: &mut dyn Write) -> FormatResult {
+impl<'a> DisplayIndexState<'a> for &'a NullArray {
+    type State = &'a str;
+
+    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
+        Ok(options.null)
+    }
+
+    fn write(&self, state: &Self::State, _idx: usize, f: &mut dyn Write) -> FormatResult {
+        f.write_str(state)?;
         Ok(())
     }
 }
@@ -465,9 +464,7 @@ fn write_timestamp(
             let date = Utc.from_utc_datetime(&naive).with_timezone(&tz);
             match format {
                 Some(s) => write!(f, "{}", date.format(s))?,
-                None => {
-                    write!(f, "{}", date.to_rfc3339_opts(SecondsFormat::AutoSi, true))?
-                }
+                None => write!(f, "{}", date.to_rfc3339_opts(SecondsFormat::AutoSi, true))?,
             }
         }
         None => match format {
@@ -519,19 +516,11 @@ macro_rules! temporal_display {
         impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
             type State = TimeFormat<'a>;
 
-            fn prepare(
-                &self,
-                options: &FormatOptions<'a>,
-            ) -> Result<Self::State, ArrowError> {
+            fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
                 Ok(options.$format)
             }
 
-            fn write(
-                &self,
-                fmt: &Self::State,
-                idx: usize,
-                f: &mut dyn Write,
-            ) -> FormatResult {
+            fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
                 let value = self.value(idx);
                 let naive = $convert(value as _).ok_or_else(|| {
                     ArrowError::CastError(format!(
@@ -568,19 +557,11 @@ macro_rules! duration_display {
         impl<'a> DisplayIndexState<'a> for &'a PrimitiveArray<$t> {
             type State = DurationFormat;
 
-            fn prepare(
-                &self,
-                options: &FormatOptions<'a>,
-            ) -> Result<Self::State, ArrowError> {
+            fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
                 Ok(options.duration_format)
             }
 
-            fn write(
-                &self,
-                fmt: &Self::State,
-                idx: usize,
-                f: &mut dyn Write,
-            ) -> FormatResult {
+            fn write(&self, fmt: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
                 let v = self.value(idx);
                 match fmt {
                     DurationFormat::ISO8601 => write!(f, "{}", $convert(v))?,
@@ -697,8 +678,7 @@ impl<'a> DisplayIndex for &'a PrimitiveArray<IntervalMonthDayNanoType> {
     fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
         let value: u128 = self.value(idx) as u128;
 
-        let months_part: i32 =
-            ((value & 0xFFFFFFFF000000000000000000000000) >> 96) as i32;
+        let months_part: i32 = ((value & 0xFFFFFFFF000000000000000000000000) >> 96) as i32;
         let days_part: i32 = ((value & 0xFFFFFFFF0000000000000000) >> 64) as i32;
         let nanoseconds_part: i64 = (value & 0xFFFFFFFFFFFFFFFF) as i64;
 
@@ -930,10 +910,7 @@ impl<'a> DisplayIndexState<'a> for &'a UnionArray {
 /// suitable for converting large arrays or record batches.
 ///
 /// Please see [`ArrayFormatter`] for a more performant interface
-pub fn array_value_to_string(
-    column: &dyn Array,
-    row: usize,
-) -> Result<String, ArrowError> {
+pub fn array_value_to_string(column: &dyn Array, row: usize) -> Result<String, ArrowError> {
     let options = FormatOptions::default().with_display_error(true);
     let formatter = ArrayFormatter::try_new(column, &options)?;
     Ok(formatter.value(row).to_string())
@@ -979,12 +956,9 @@ mod tests {
         //  [[a, b, c], [d, e, f], [g, h]]
         let entry_offsets = [0, 3, 6, 8];
 
-        let map_array = MapArray::new_from_strings(
-            keys.clone().into_iter(),
-            &values_data,
-            &entry_offsets,
-        )
-        .unwrap();
+        let map_array =
+            MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
+                .unwrap();
         assert_eq!(
             "{d: 30, e: 40, f: 50}",
             array_value_to_string(&map_array, 1).unwrap()
@@ -999,8 +973,7 @@ mod tests {
     #[test]
     fn test_array_value_to_string_duration() {
         let iso_fmt = FormatOptions::new();
-        let pretty_fmt =
-            FormatOptions::new().with_duration_format(DurationFormat::Pretty);
+        let pretty_fmt = FormatOptions::new().with_duration_format(DurationFormat::Pretty);
 
         let array = DurationNanosecondArray::from(vec![
             1,
@@ -1098,4 +1071,12 @@ mod tests {
         assert_eq!(iso[5], "-P45DT50554S");
         assert_eq!(pretty[5], "-45 days -14 hours -2 mins -34 secs");
     }
+
+    #[test]
+    fn test_null() {
+        let array = NullArray::new(2);
+        let options = FormatOptions::new().with_null("NULL");
+        let formatted = format_array(&array, &options);
+        assert_eq!(formatted, &["NULL".to_string(), "NULL".to_string()])
+    }
 }
diff --git a/arrow-cast/src/lib.rs b/arrow-cast/src/lib.rs
index d2677a0e0a53..71ebe6c0ed8b 100644
--- a/arrow-cast/src/lib.rs
+++ b/arrow-cast/src/lib.rs
@@ -21,6 +21,7 @@ pub mod cast;
 pub use cast::*;
 pub mod display;
 pub mod parse;
-
 #[cfg(feature = "prettyprint")]
 pub mod pretty;
+
+pub mod base64;
diff --git a/arrow-cast/src/parse.rs b/arrow-cast/src/parse.rs
index ac3b89e0ba02..750f38006d33 100644
--- a/arrow-cast/src/parse.rs
+++ b/arrow-cast/src/parse.rs
@@ -64,10 +64,7 @@ impl TimestampParser {
 
     /// Parses a date of the form `1997-01-31`
     fn date(&self) -> Option<NaiveDate> {
-        if self.mask & 0b1111111111 != 0b1101101111
-            || !self.test(4, b'-')
-            || !self.test(7, b'-')
-        {
+        if self.mask & 0b1111111111 != 0b1101101111 || !self.test(4, b'-') || !self.test(7, b'-') {
             return None;
         }
 
@@ -173,13 +170,9 @@ impl TimestampParser {
 /// * "2023-01-01 04:05:06.789 PST",
 ///
 /// [IANA timezones]: https://www.iana.org/time-zones
-pub fn string_to_datetime<T: TimeZone>(
-    timezone: &T,
-    s: &str,
-) -> Result<DateTime<T>, ArrowError> {
-    let err = |ctx: &str| {
-        ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"))
-    };
+pub fn string_to_datetime<T: TimeZone>(timezone: &T, s: &str) -> Result<DateTime<T>, ArrowError> {
+    let err =
+        |ctx: &str| ArrowError::ParseError(format!("Error parsing timestamp from '{s}': {ctx}"));
 
     let bytes = s.as_bytes();
     if bytes.len() < 10 {
@@ -277,16 +270,11 @@ pub fn string_to_timestamp_nanos(s: &str) -> Result<i64, ArrowError> {
     to_timestamp_nanos(string_to_datetime(&Utc, s)?.naive_utc())
 }
 
-/// Defensive check to prevent chrono-rs panics when nanosecond conversion happens on non-supported dates
+/// Fallible conversion of [`NaiveDateTime`] to `i64` nanoseconds
 #[inline]
 fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
-    if dt.timestamp().checked_mul(1_000_000_000).is_none() {
-        return Err(ArrowError::ParseError(
-            ERR_NANOSECONDS_NOT_SUPPORTED.to_string(),
-        ));
-    }
-
-    Ok(dt.timestamp_nanos())
+    dt.timestamp_nanos_opt()
+        .ok_or_else(|| ArrowError::ParseError(ERR_NANOSECONDS_NOT_SUPPORTED.to_string()))
 }
 
 /// Accepts a string in ISO8601 standard format and some
@@ -305,9 +293,8 @@ fn to_timestamp_nanos(dt: NaiveDateTime) -> Result<i64, ArrowError> {
 /// This function does not support parsing strings with a timezone
 /// or offset specified, as it considers only time since midnight.
 pub fn string_to_time_nanoseconds(s: &str) -> Result<i64, ArrowError> {
-    let nt = string_to_time(s).ok_or_else(|| {
-        ArrowError::ParseError(format!("Failed to parse \'{s}\' as time"))
-    })?;
+    let nt = string_to_time(s)
+        .ok_or_else(|| ArrowError::ParseError(format!("Failed to parse \'{s}\' as time")))?;
     Ok(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
 }
 
@@ -318,12 +305,8 @@ fn string_to_time(s: &str) -> Option<NaiveTime> {
     }
 
     let (am, bytes) = match bytes.get(bytes.len() - 3..) {
-        Some(b" AM" | b" am" | b" Am" | b" aM") => {
-            (Some(true), &bytes[..bytes.len() - 3])
-        }
-        Some(b" PM" | b" pm" | b" pM" | b" Pm") => {
-            (Some(false), &bytes[..bytes.len() - 3])
-        }
+        Some(b" AM" | b" am" | b" Am" | b" aM") => (Some(true), &bytes[..bytes.len() - 3]),
+        Some(b" PM" | b" pm" | b" pM" | b" Pm") => (Some(false), &bytes[..bytes.len() - 3]),
         _ => (None, bytes),
     };
 
@@ -506,10 +489,7 @@ impl Parser for Time64NanosecondType {
 
     fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
         let nt = NaiveTime::parse_from_str(string, format).ok()?;
-        Some(
-            nt.num_seconds_from_midnight() as i64 * 1_000_000_000
-                + nt.nanosecond() as i64,
-        )
+        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000_000 + nt.nanosecond() as i64)
     }
 }
 
@@ -524,10 +504,7 @@ impl Parser for Time64MicrosecondType {
 
     fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
         let nt = NaiveTime::parse_from_str(string, format).ok()?;
-        Some(
-            nt.num_seconds_from_midnight() as i64 * 1_000_000
-                + nt.nanosecond() as i64 / 1_000,
-        )
+        Some(nt.num_seconds_from_midnight() as i64 * 1_000_000 + nt.nanosecond() as i64 / 1_000)
     }
 }
 
@@ -542,10 +519,7 @@ impl Parser for Time32MillisecondType {
 
     fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
         let nt = NaiveTime::parse_from_str(string, format).ok()?;
-        Some(
-            nt.num_seconds_from_midnight() as i32 * 1_000
-                + nt.nanosecond() as i32 / 1_000_000,
-        )
+        Some(nt.num_seconds_from_midnight() as i32 * 1_000 + nt.nanosecond() as i32 / 1_000_000)
     }
 }
 
@@ -560,10 +534,7 @@ impl Parser for Time32SecondType {
 
     fn parse_formatted(string: &str, format: &str) -> Option<Self::Native> {
         let nt = NaiveTime::parse_from_str(string, format).ok()?;
-        Some(
-            nt.num_seconds_from_midnight() as i32
-                + nt.nanosecond() as i32 / 1_000_000_000,
-        )
+        Some(nt.num_seconds_from_midnight() as i32 + nt.nanosecond() as i32 / 1_000_000_000)
     }
 }
 
@@ -588,8 +559,20 @@ fn parse_date(string: &str) -> Option<NaiveDate> {
 
     const HYPHEN: u8 = b'-'.wrapping_sub(b'0');
 
+    //  refer to https://www.rfc-editor.org/rfc/rfc3339#section-3
     if digits[4] != HYPHEN {
-        return None;
+        let (year, month, day) = match (mask, string.len()) {
+            (0b11111111, 8) => (
+                digits[0] as u16 * 1000
+                    + digits[1] as u16 * 100
+                    + digits[2] as u16 * 10
+                    + digits[3] as u16,
+                digits[4] * 10 + digits[5],
+                digits[6] * 10 + digits[7],
+            ),
+            _ => return None,
+        };
+        return NaiveDate::from_ymd_opt(year as _, month as _, day as _);
     }
 
     let (month, day) = match mask {
@@ -620,10 +603,8 @@ fn parse_date(string: &str) -> Option<NaiveDate> {
         _ => return None,
     };
 
-    let year = digits[0] as u16 * 1000
-        + digits[1] as u16 * 100
-        + digits[2] as u16 * 10
-        + digits[3] as u16;
+    let year =
+        digits[0] as u16 * 1000 + digits[1] as u16 * 100 + digits[2] as u16 * 10 + digits[3] as u16;
 
     NaiveDate::from_ymd_opt(year as _, month as _, day as _)
 }
@@ -733,8 +714,7 @@ pub fn parse_decimal<T: DecimalType>(
                     fractionals += 1;
                     digits += 1;
                     result = result.mul_wrapping(base);
-                    result =
-                        result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
+                    result = result.add_wrapping(T::Native::usize_as((b - b'0') as usize));
                 }
 
                 // Fail on "."
@@ -776,9 +756,11 @@ pub fn parse_interval_year_month(
     let config = IntervalParseConfig::new(IntervalUnit::Year);
     let interval = Interval::parse(value, &config)?;
 
-    let months = interval.to_year_months().map_err(|_| ArrowError::CastError(format!(
+    let months = interval.to_year_months().map_err(|_| {
+        ArrowError::CastError(format!(
             "Cannot cast {value} to IntervalYearMonth. Only year and month fields are allowed."
-        )))?;
+        ))
+    })?;
 
     Ok(IntervalYearMonthType::make_value(0, months))
 }
@@ -893,21 +875,16 @@ impl FromStr for IntervalAmount {
                     Ok(0)
                 } else {
                     integer.parse::<i64>().map_err(|_| {
-                        ArrowError::ParseError(format!(
-                            "Failed to parse {s} as interval amount"
-                        ))
+                        ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
                     })
                 }?;
 
                 let frac_unscaled = frac.parse::<i64>().map_err(|_| {
-                    ArrowError::ParseError(format!(
-                        "Failed to parse {s} as interval amount"
-                    ))
+                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
                 })?;
 
                 // scale fractional part by interval precision
-                let frac =
-                    frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
+                let frac = frac_unscaled * 10_i64.pow(INTERVAL_PRECISION - frac.len() as u32);
 
                 // propagate the sign of the integer part to the fractional part
                 let frac = if integer < 0 || explicit_neg {
@@ -920,9 +897,9 @@ impl FromStr for IntervalAmount {
 
                 Ok(result)
             }
-            Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(
-                format!("Failed to parse {s} as interval amount"),
-            )),
+            Some((_, frac)) if frac.starts_with('-') => Err(ArrowError::ParseError(format!(
+                "Failed to parse {s} as interval amount"
+            ))),
             Some((_, frac)) if frac.len() > INTERVAL_PRECISION as usize => {
                 Err(ArrowError::ParseError(format!(
                     "{s} exceeds the precision available for interval amount"
@@ -930,9 +907,7 @@ impl FromStr for IntervalAmount {
             }
             Some(_) | None => {
                 let integer = s.parse::<i64>().map_err(|_| {
-                    ArrowError::ParseError(format!(
-                        "Failed to parse {s} as interval amount"
-                    ))
+                    ArrowError::ParseError(format!("Failed to parse {s} as interval amount"))
                 })?;
 
                 let result = Self { integer, frac: 0 };
@@ -1010,25 +985,20 @@ impl Interval {
     /// e.g. INTERVAL '0.5 MONTH' = 15 days, INTERVAL '1.5 MONTH' = 1 month 15 days
     /// e.g. INTERVAL '0.5 DAY' = 12 hours, INTERVAL '1.5 DAY' = 1 day 12 hours
     /// [Postgres reference](https://www.postgresql.org/docs/15/datatype-datetime.html#DATATYPE-INTERVAL-INPUT:~:text=Field%20values%20can,fractional%20on%20output.)
-    fn add(
-        &self,
-        amount: IntervalAmount,
-        unit: IntervalUnit,
-    ) -> Result<Self, ArrowError> {
+    fn add(&self, amount: IntervalAmount, unit: IntervalUnit) -> Result<Self, ArrowError> {
         let result = match unit {
             IntervalUnit::Century => {
                 let months_int = amount.integer.mul_checked(100)?.mul_checked(12)?;
                 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 2);
-                let months =
-                    months_int
-                        .add_checked(month_frac)?
-                        .try_into()
-                        .map_err(|_| {
-                            ArrowError::ParseError(format!(
-                        "Unable to represent {} centuries as months in a signed 32-bit integer",
-                        &amount.integer
-                    ))
-                        })?;
+                let months = months_int
+                    .add_checked(month_frac)?
+                    .try_into()
+                    .map_err(|_| {
+                        ArrowError::ParseError(format!(
+                            "Unable to represent {} centuries as months in a signed 32-bit integer",
+                            &amount.integer
+                        ))
+                    })?;
 
                 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
             }
@@ -1036,32 +1006,30 @@ impl Interval {
                 let months_int = amount.integer.mul_checked(10)?.mul_checked(12)?;
 
                 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION - 1);
-                let months =
-                    months_int
-                        .add_checked(month_frac)?
-                        .try_into()
-                        .map_err(|_| {
-                            ArrowError::ParseError(format!(
-                        "Unable to represent {} decades as months in a signed 32-bit integer",
-                        &amount.integer
-                    ))
-                        })?;
+                let months = months_int
+                    .add_checked(month_frac)?
+                    .try_into()
+                    .map_err(|_| {
+                        ArrowError::ParseError(format!(
+                            "Unable to represent {} decades as months in a signed 32-bit integer",
+                            &amount.integer
+                        ))
+                    })?;
 
                 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
             }
             IntervalUnit::Year => {
                 let months_int = amount.integer.mul_checked(12)?;
                 let month_frac = amount.frac * 12 / 10_i64.pow(INTERVAL_PRECISION);
-                let months =
-                    months_int
-                        .add_checked(month_frac)?
-                        .try_into()
-                        .map_err(|_| {
-                            ArrowError::ParseError(format!(
-                        "Unable to represent {} years as months in a signed 32-bit integer",
-                        &amount.integer
-                    ))
-                        })?;
+                let months = months_int
+                    .add_checked(month_frac)?
+                    .try_into()
+                    .map_err(|_| {
+                        ArrowError::ParseError(format!(
+                            "Unable to represent {} years as months in a signed 32-bit integer",
+                            &amount.integer
+                        ))
+                    })?;
 
                 Self::new(self.months.add_checked(months)?, self.days, self.nanos)
             }
@@ -1095,8 +1063,7 @@ impl Interval {
                     ))
                 })?;
 
-                let nanos =
-                    amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
+                let nanos = amount.frac * 7 * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
 
                 Self::new(
                     self.months,
@@ -1112,8 +1079,7 @@ impl Interval {
                     ))
                 })?;
 
-                let nanos =
-                    amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
+                let nanos = amount.frac * 24 * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
 
                 Self::new(
                     self.months,
@@ -1123,8 +1089,7 @@ impl Interval {
             }
             IntervalUnit::Hour => {
                 let nanos_int = amount.integer.mul_checked(NANOS_PER_HOUR)?;
-                let nanos_frac =
-                    amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
+                let nanos_frac = amount.frac * 6 * 6 / 10_i64.pow(INTERVAL_PRECISION - 11);
                 let nanos = nanos_int.add_checked(nanos_frac)?;
 
                 Interval::new(self.months, self.days, self.nanos.add_checked(nanos)?)
@@ -1313,12 +1278,12 @@ mod tests {
 
         // Ensure both T and ' ' variants work
         assert_eq!(
-            naive_datetime.timestamp_nanos(),
+            naive_datetime.timestamp_nanos_opt().unwrap(),
             parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
         );
 
         assert_eq!(
-            naive_datetime.timestamp_nanos(),
+            naive_datetime.timestamp_nanos_opt().unwrap(),
             parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
         );
 
@@ -1331,12 +1296,12 @@ mod tests {
 
         // Ensure both T and ' ' variants work
         assert_eq!(
-            naive_datetime_whole_secs.timestamp_nanos(),
+            naive_datetime_whole_secs.timestamp_nanos_opt().unwrap(),
             parse_timestamp("2020-09-08T13:42:29").unwrap()
         );
 
         assert_eq!(
-            naive_datetime_whole_secs.timestamp_nanos(),
+            naive_datetime_whole_secs.timestamp_nanos_opt().unwrap(),
             parse_timestamp("2020-09-08 13:42:29").unwrap()
         );
 
@@ -1349,7 +1314,7 @@ mod tests {
         );
 
         assert_eq!(
-            naive_datetime_no_time.timestamp_nanos(),
+            naive_datetime_no_time.timestamp_nanos_opt().unwrap(),
             parse_timestamp("2020-09-08").unwrap()
         )
     }
@@ -1403,8 +1368,7 @@ mod tests {
             "2030-12-04T17:11:10.123456",
         ];
         for case in cases {
-            let chrono =
-                NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
+            let chrono = NaiveDateTime::parse_from_str(case, "%Y-%m-%dT%H:%M:%S%.f").unwrap();
             let custom = string_to_datetime(&Utc, case).unwrap();
             assert_eq!(chrono, custom.naive_utc())
         }
@@ -1436,8 +1400,7 @@ mod tests {
         ];
 
         for (s, ctx) in cases {
-            let expected =
-                format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
+            let expected = format!("Parser error: Error parsing timestamp from '{s}': {ctx}");
             let actual = string_to_datetime(&Utc, s).unwrap_err().to_string();
             assert_eq!(actual, expected)
         }
@@ -1463,12 +1426,12 @@ mod tests {
 
         // Ensure both T and ' ' variants work
         assert_eq!(
-            naive_datetime.timestamp_nanos(),
+            naive_datetime.timestamp_nanos_opt().unwrap(),
             parse_timestamp("2020-09-08T13:42:29.190855").unwrap()
         );
 
         assert_eq!(
-            naive_datetime.timestamp_nanos(),
+            naive_datetime.timestamp_nanos_opt().unwrap(),
             parse_timestamp("2020-09-08 13:42:29.190855").unwrap()
         );
 
@@ -1479,12 +1442,12 @@ mod tests {
 
         // Ensure both T and ' ' variants work
         assert_eq!(
-            naive_datetime.timestamp_nanos(),
+            naive_datetime.timestamp_nanos_opt().unwrap(),
             parse_timestamp("2020-09-08T13:42:29").unwrap()
         );
 
         assert_eq!(
-            naive_datetime.timestamp_nanos(),
+            naive_datetime.timestamp_nanos_opt().unwrap(),
             parse_timestamp("2020-09-08 13:42:29").unwrap()
         );
 
@@ -1502,8 +1465,7 @@ mod tests {
         assert_eq!(local, "2020-09-08 15:42:29");
 
         let dt =
-            NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ")
-                .unwrap();
+            NaiveDateTime::parse_from_str("2020-09-08T13:42:29Z", "%Y-%m-%dT%H:%M:%SZ").unwrap();
         let local: Tz = "+08:00".parse().unwrap();
 
         // Parsed as offset from UTC
@@ -1634,10 +1596,7 @@ mod tests {
 
         // custom format
         assert_eq!(
-            Time64NanosecondType::parse_formatted(
-                "02 - 10 - 01 - .1234567",
-                "%H - %M - %S - %.f"
-            ),
+            Time64NanosecondType::parse_formatted("02 - 10 - 01 - .1234567", "%H - %M - %S - %.f"),
             Some(7_801_123_456_700)
         );
     }
@@ -1714,10 +1673,7 @@ mod tests {
 
         // custom format
         assert_eq!(
-            Time64MicrosecondType::parse_formatted(
-                "02 - 10 - 01 - .1234",
-                "%H - %M - %S - %.f"
-            ),
+            Time64MicrosecondType::parse_formatted("02 - 10 - 01 - .1234", "%H - %M - %S - %.f"),
             Some(7_801_123_400)
         );
     }
@@ -1764,10 +1720,7 @@ mod tests {
 
         // custom format
         assert_eq!(
-            Time32MillisecondType::parse_formatted(
-                "02 - 10 - 01 - .1",
-                "%H - %M - %S - %.f"
-            ),
+            Time32MillisecondType::parse_formatted("02 - 10 - 01 - .1", "%H - %M - %S - %.f"),
             Some(7_801_100)
         );
     }
@@ -2010,8 +1963,19 @@ mod tests {
         );
 
         assert_eq!(
-            Interval::new(-13i32, -8i32, -NANOS_PER_HOUR - NANOS_PER_MINUTE - NANOS_PER_SECOND - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64),
-            Interval::parse("-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond", &config).unwrap(),
+            Interval::new(
+                -13i32,
+                -8i32,
+                -NANOS_PER_HOUR
+                    - NANOS_PER_MINUTE
+                    - NANOS_PER_SECOND
+                    - (1.11_f64 * NANOS_PER_MILLIS as f64) as i64
+            ),
+            Interval::parse(
+                "-1 year -1 month -1 week -1 day -1 hour -1 minute -1 second -1.11 millisecond",
+                &config
+            )
+            .unwrap(),
         );
     }
 
@@ -2285,22 +2249,34 @@ mod tests {
         let edge_tests_256 = [
             (
                 "9999999999999999999999999999999999999999999999999999999999999999999999999999",
-i256::from_string("9999999999999999999999999999999999999999999999999999999999999999999999999999").unwrap(),
+                i256::from_string(
+                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
+                )
+                .unwrap(),
                 0,
             ),
             (
                 "999999999999999999999999999999999999999999999999999999999999999999999999.9999",
-                i256::from_string("9999999999999999999999999999999999999999999999999999999999999999999999999999").unwrap(),
+                i256::from_string(
+                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
+                )
+                .unwrap(),
                 4,
             ),
             (
                 "99999999999999999999999999999999999999999999999999.99999999999999999999999999",
-                i256::from_string("9999999999999999999999999999999999999999999999999999999999999999999999999999").unwrap(),
+                i256::from_string(
+                    "9999999999999999999999999999999999999999999999999999999999999999999999999999",
+                )
+                .unwrap(),
                 26,
             ),
             (
                 "99999999999999999999999999999999999999999999999999",
-                i256::from_string("9999999999999999999999999999999999999999999999999900000000000000000000000000").unwrap(),
+                i256::from_string(
+                    "9999999999999999999999999999999999999999999999999900000000000000000000000000",
+                )
+                .unwrap(),
                 26,
             ),
         ];
diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs
index 59a9f9d605e2..550afa9f739d 100644
--- a/arrow-cast/src/pretty.rs
+++ b/arrow-cast/src/pretty.rs
@@ -25,9 +25,7 @@ use comfy_table::{Cell, Table};
 use std::fmt::Display;
 
 /// Create a visual representation of record batches
-pub fn pretty_format_batches(
-    results: &[RecordBatch],
-) -> Result<impl Display, ArrowError> {
+pub fn pretty_format_batches(results: &[RecordBatch]) -> Result<impl Display, ArrowError> {
     let options = FormatOptions::default().with_display_error(true);
     pretty_format_batches_with_options(results, &options)
 }
@@ -70,10 +68,7 @@ pub fn print_columns(col_name: &str, results: &[ArrayRef]) -> Result<(), ArrowEr
 }
 
 /// Convert a series of record batches into a table
-fn create_table(
-    results: &[RecordBatch],
-    options: &FormatOptions,
-) -> Result<Table, ArrowError> {
+fn create_table(results: &[RecordBatch], options: &FormatOptions) -> Result<Table, ArrowError> {
     let mut table = Table::new();
     table.load_preset("||--+-++|    ++++++");
 
@@ -209,8 +204,8 @@ mod tests {
         let table = pretty_format_columns("a", &columns).unwrap().to_string();
 
         let expected = vec![
-            "+---+", "| a |", "+---+", "| a |", "| b |", "|   |", "| d |", "| e |",
-            "|   |", "| g |", "+---+",
+            "+---+", "| a |", "+---+", "| a |", "| b |", "|   |", "| d |", "| e |", "|   |",
+            "| g |", "+---+",
         ];
 
         let actual: Vec<&str> = table.lines().collect();
@@ -289,10 +284,8 @@ mod tests {
     #[test]
     fn test_pretty_format_fixed_size_list() {
         // define a schema.
-        let field_type = DataType::FixedSizeList(
-            Arc::new(Field::new("item", DataType::Int32, true)),
-            3,
-        );
+        let field_type =
+            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, true)), 3);
         let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type, true)]));
 
         let keys_builder = Int32Array::builder(3);
@@ -383,10 +376,7 @@ mod tests {
         };
     }
 
-    fn timestamp_batch<T: ArrowTimestampType>(
-        timezone: &str,
-        value: T::Native,
-    ) -> RecordBatch {
+    fn timestamp_batch<T: ArrowTimestampType>(timezone: &str, value: T::Native) -> RecordBatch {
         let mut builder = PrimitiveBuilder::<T>::with_capacity(10);
         builder.append_value(value);
         builder.append_null();
@@ -621,8 +611,8 @@ mod tests {
 
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
         let expected = vec![
-            "+------+", "| f    |", "+------+", "| 101  |", "|      |", "| 200  |",
-            "| 3040 |", "+------+",
+            "+------+", "| f    |", "+------+", "| 101  |", "|      |", "| 200  |", "| 3040 |",
+            "+------+",
         ];
 
         let actual: Vec<&str> = table.lines().collect();
@@ -660,16 +650,14 @@ mod tests {
                 )),
                 Arc::new(StructArray::from(vec![(
                     Arc::new(Field::new("c121", DataType::Utf8, false)),
-                    Arc::new(StringArray::from(vec![Some("e"), Some("f"), Some("g")]))
-                        as ArrayRef,
+                    Arc::new(StringArray::from(vec![Some("e"), Some("f"), Some("g")])) as ArrayRef,
                 )])) as ArrayRef,
             ),
         ]);
         let c2 = StringArray::from(vec![Some("a"), Some("b"), Some("c")]);
 
         let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)])
-                .unwrap();
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap();
 
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
         let expected = vec![
@@ -705,8 +693,7 @@ mod tests {
             UnionMode::Dense,
         )]);
 
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
         let actual: Vec<&str> = table.lines().collect();
         let expected = vec![
@@ -742,8 +729,7 @@ mod tests {
             UnionMode::Sparse,
         )]);
 
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
         let actual: Vec<&str> = table.lines().collect();
         let expected = vec![
@@ -799,8 +785,7 @@ mod tests {
             UnionMode::Sparse,
         )]);
 
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(outer)]).unwrap();
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(outer)]).unwrap();
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
         let actual: Vec<&str> = table.lines().collect();
         let expected = vec![
@@ -882,8 +867,7 @@ mod tests {
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
 
         let expected = vec![
-            "+------+", "| f16  |", "+------+", "| NaN  |", "| 4    |", "| -inf |",
-            "+------+",
+            "+------+", "| f16  |", "+------+", "| NaN  |", "| 4    |", "| -inf |", "+------+",
         ];
 
         let actual: Vec<&str> = table.lines().collect();
@@ -986,9 +970,7 @@ mod tests {
     fn test_format_options() {
         let options = FormatOptions::default().with_null("null");
         let array = Int32Array::from(vec![Some(1), Some(2), None, Some(3), Some(4)]);
-        let batch =
-            RecordBatch::try_from_iter([("my_column_name", Arc::new(array) as _)])
-                .unwrap();
+        let batch = RecordBatch::try_from_iter([("my_column_name", Arc::new(array) as _)]).unwrap();
 
         let column = pretty_format_columns_with_options(
             "my_column_name",
diff --git a/arrow-csv/Cargo.toml b/arrow-csv/Cargo.toml
index 1f1a762d5065..66a6d7dbcaa5 100644
--- a/arrow-csv/Cargo.toml
+++ b/arrow-csv/Cargo.toml
@@ -39,7 +39,7 @@ arrow-buffer = { workspace = true }
 arrow-cast = { workspace = true }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
-chrono = { version = "0.4.23", default-features = false, features = ["clock"] }
+chrono = { workspace = true }
 csv = { version = "1.1", default-features = false }
 csv-core = { version = "0.1" }
 lazy_static = { version = "1.4", default-features = false }
diff --git a/arrow-csv/examples/README.md b/arrow-csv/examples/README.md
new file mode 100644
index 000000000000..340413e76d94
--- /dev/null
+++ b/arrow-csv/examples/README.md
@@ -0,0 +1,21 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Examples
+- [`csv_calculation.rs`](csv_calculation.rs): performs a simple calculation using the CSV reader
\ No newline at end of file
diff --git a/arrow-csv/examples/csv_calculation.rs b/arrow-csv/examples/csv_calculation.rs
new file mode 100644
index 000000000000..6ce963e2b012
--- /dev/null
+++ b/arrow-csv/examples/csv_calculation.rs
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::cast::AsArray;
+use arrow_array::types::Int16Type;
+use arrow_csv::ReaderBuilder;
+
+use arrow_schema::{DataType, Field, Schema};
+use std::fs::File;
+use std::sync::Arc;
+
+fn main() {
+    // read csv from file
+    let file = File::open("arrow-csv/test/data/example.csv").unwrap();
+    let csv_schema = Schema::new(vec![
+        Field::new("c1", DataType::Int16, true),
+        Field::new("c2", DataType::Float32, true),
+        Field::new("c3", DataType::Utf8, true),
+        Field::new("c4", DataType::Boolean, true),
+    ]);
+    let mut reader = ReaderBuilder::new(Arc::new(csv_schema))
+        .with_header(true)
+        .build(file)
+        .unwrap();
+
+    match reader.next() {
+        Some(r) => match r {
+            Ok(r) => {
+                // get the column(0) max value
+                let col = r.column(0).as_primitive::<Int16Type>();
+                let max = col.iter().max().flatten();
+                println!("max value column(0): {max:?}")
+            }
+            Err(e) => {
+                println!("{e:?}");
+            }
+        },
+        None => {
+            println!("csv is empty");
+        }
+    }
+}
diff --git a/arrow-csv/src/reader/mod.rs b/arrow-csv/src/reader/mod.rs
index 328c2cd41f3b..83c8965fdf8a 100644
--- a/arrow-csv/src/reader/mod.rs
+++ b/arrow-csv/src/reader/mod.rs
@@ -133,8 +133,8 @@ use arrow_schema::*;
 use chrono::{TimeZone, Utc};
 use csv::StringRecord;
 use lazy_static::lazy_static;
-use regex::RegexSet;
-use std::fmt;
+use regex::{Regex, RegexSet};
+use std::fmt::{self, Debug};
 use std::fs::File;
 use std::io::{BufRead, BufReader as StdBufReader, Read, Seek, SeekFrom};
 use std::sync::Arc;
@@ -157,6 +157,22 @@ lazy_static! {
     ]).unwrap();
 }
 
+/// A wrapper over `Option<Regex>` to check if the value is `NULL`.
+#[derive(Debug, Clone, Default)]
+struct NullRegex(Option<Regex>);
+
+impl NullRegex {
+    /// Returns true if the value should be considered as `NULL` according to
+    /// the provided regular expression.
+    #[inline]
+    fn is_null(&self, s: &str) -> bool {
+        match &self.0 {
+            Some(r) => r.is_match(s),
+            None => s.is_empty(),
+        }
+    }
+}
+
 #[derive(Default, Copy, Clone)]
 struct InferredDataType {
     /// Packed booleans indicating type
@@ -177,6 +193,7 @@ impl InferredDataType {
     /// Returns the inferred data type
     fn get(&self) -> DataType {
         match self.packed {
+            0 => DataType::Null,
             1 => DataType::Boolean,
             2 => DataType::Int64,
             4 | 6 => DataType::Float64, // Promote Int64 to Float64
@@ -208,16 +225,17 @@ impl InferredDataType {
 /// The format specification for the CSV file
 #[derive(Debug, Clone, Default)]
 pub struct Format {
-    has_header: bool,
+    header: bool,
     delimiter: Option<u8>,
     escape: Option<u8>,
     quote: Option<u8>,
     terminator: Option<u8>,
+    null_regex: NullRegex,
 }
 
 impl Format {
     pub fn with_header(mut self, has_header: bool) -> Self {
-        self.has_header = has_header;
+        self.header = has_header;
         self
     }
 
@@ -241,6 +259,12 @@ impl Format {
         self
     }
 
+    /// Provide a regex to match null values, defaults to `^$`
+    pub fn with_null_regex(mut self, null_regex: Regex) -> Self {
+        self.null_regex = NullRegex(Some(null_regex));
+        self
+    }
+
     /// Infer schema of CSV records from the provided `reader`
     ///
     /// If `max_records` is `None`, all records will be read, otherwise up to `max_records`
@@ -256,7 +280,7 @@ impl Format {
 
         // get or create header names
         // when has_header is false, creates default column names with column_ prefix
-        let headers: Vec<String> = if self.has_header {
+        let headers: Vec<String> = if self.header {
             let headers = &csv_reader.headers().map_err(map_csv_error)?.clone();
             headers.iter().map(|s| s.to_string()).collect()
         } else {
@@ -268,8 +292,7 @@ impl Format {
 
         let header_length = headers.len();
         // keep track of inferred field types
-        let mut column_types: Vec<InferredDataType> =
-            vec![Default::default(); header_length];
+        let mut column_types: Vec<InferredDataType> = vec![Default::default(); header_length];
 
         let mut records_count = 0;
 
@@ -283,11 +306,9 @@ impl Format {
 
             // Note since we may be looking at a sample of the data, we make the safe assumption that
             // they could be nullable
-            for (i, column_type) in
-                column_types.iter_mut().enumerate().take(header_length)
-            {
+            for (i, column_type) in column_types.iter_mut().enumerate().take(header_length) {
                 if let Some(string) = record.get(i) {
-                    if !string.is_empty() {
+                    if !self.null_regex.is_null(string) {
                         column_type.update(string)
                     }
                 }
@@ -307,7 +328,7 @@ impl Format {
     /// Build a [`csv::Reader`] for this [`Format`]
     fn build_reader<R: Read>(&self, reader: R) -> csv::Reader<R> {
         let mut builder = csv::ReaderBuilder::new();
-        builder.has_headers(self.has_header);
+        builder.has_headers(self.header);
 
         if let Some(c) = self.delimiter {
             builder.delimiter(c);
@@ -379,7 +400,7 @@ pub fn infer_reader_schema<R: Read>(
 ) -> Result<(Schema, usize), ArrowError> {
     let format = Format {
         delimiter: Some(delimiter),
-        has_header,
+        header: has_header,
         ..Default::default()
     };
     format.infer_schema(reader, max_read_records)
@@ -401,7 +422,7 @@ pub fn infer_schema_from_files(
     let mut records_to_read = max_read_records.unwrap_or(usize::MAX);
     let format = Format {
         delimiter: Some(delimiter),
-        has_header,
+        header: has_header,
         ..Default::default()
     };
 
@@ -557,6 +578,9 @@ pub struct Decoder {
 
     /// A decoder for [`StringRecords`]
     record_decoder: RecordDecoder,
+
+    /// Check if the string matches this pattern for `NULL`.
+    null_regex: NullRegex,
 }
 
 impl Decoder {
@@ -579,8 +603,7 @@ impl Decoder {
             return Ok(bytes);
         }
 
-        let to_read =
-            self.batch_size.min(self.end - self.line_number) - self.record_decoder.len();
+        let to_read = self.batch_size.min(self.end - self.line_number) - self.record_decoder.len();
         let (_, bytes) = self.record_decoder.decode(buf, to_read)?;
         Ok(bytes)
     }
@@ -603,6 +626,7 @@ impl Decoder {
             Some(self.schema.metadata.clone()),
             self.projection.as_ref(),
             self.line_number,
+            &self.null_regex,
         )?;
         self.line_number += rows.len();
         Ok(Some(batch))
@@ -621,6 +645,7 @@ fn parse(
     metadata: Option<std::collections::HashMap<String, String>>,
     projection: Option<&Vec<usize>>,
     line_number: usize,
+    null_regex: &NullRegex,
 ) -> Result<RecordBatch, ArrowError> {
     let projection: Vec<usize> = match projection {
         Some(v) => v.clone(),
@@ -633,70 +658,70 @@ fn parse(
             let i = *i;
             let field = &fields[i];
             match field.data_type() {
-                DataType::Boolean => build_boolean_array(line_number, rows, i),
-                DataType::Decimal128(precision, scale) => {
-                    build_decimal_array::<Decimal128Type>(
-                        line_number,
-                        rows,
-                        i,
-                        *precision,
-                        *scale,
-                    )
-                }
-                DataType::Decimal256(precision, scale) => {
-                    build_decimal_array::<Decimal256Type>(
-                        line_number,
-                        rows,
-                        i,
-                        *precision,
-                        *scale,
-                    )
+                DataType::Boolean => build_boolean_array(line_number, rows, i, null_regex),
+                DataType::Decimal128(precision, scale) => build_decimal_array::<Decimal128Type>(
+                    line_number,
+                    rows,
+                    i,
+                    *precision,
+                    *scale,
+                    null_regex,
+                ),
+                DataType::Decimal256(precision, scale) => build_decimal_array::<Decimal256Type>(
+                    line_number,
+                    rows,
+                    i,
+                    *precision,
+                    *scale,
+                    null_regex,
+                ),
+                DataType::Int8 => {
+                    build_primitive_array::<Int8Type>(line_number, rows, i, null_regex)
                 }
-                DataType::Int8 => build_primitive_array::<Int8Type>(line_number, rows, i),
                 DataType::Int16 => {
-                    build_primitive_array::<Int16Type>(line_number, rows, i)
+                    build_primitive_array::<Int16Type>(line_number, rows, i, null_regex)
                 }
                 DataType::Int32 => {
-                    build_primitive_array::<Int32Type>(line_number, rows, i)
+                    build_primitive_array::<Int32Type>(line_number, rows, i, null_regex)
                 }
                 DataType::Int64 => {
-                    build_primitive_array::<Int64Type>(line_number, rows, i)
+                    build_primitive_array::<Int64Type>(line_number, rows, i, null_regex)
                 }
                 DataType::UInt8 => {
-                    build_primitive_array::<UInt8Type>(line_number, rows, i)
+                    build_primitive_array::<UInt8Type>(line_number, rows, i, null_regex)
                 }
                 DataType::UInt16 => {
-                    build_primitive_array::<UInt16Type>(line_number, rows, i)
+                    build_primitive_array::<UInt16Type>(line_number, rows, i, null_regex)
                 }
                 DataType::UInt32 => {
-                    build_primitive_array::<UInt32Type>(line_number, rows, i)
+                    build_primitive_array::<UInt32Type>(line_number, rows, i, null_regex)
                 }
                 DataType::UInt64 => {
-                    build_primitive_array::<UInt64Type>(line_number, rows, i)
+                    build_primitive_array::<UInt64Type>(line_number, rows, i, null_regex)
                 }
                 DataType::Float32 => {
-                    build_primitive_array::<Float32Type>(line_number, rows, i)
+                    build_primitive_array::<Float32Type>(line_number, rows, i, null_regex)
                 }
                 DataType::Float64 => {
-                    build_primitive_array::<Float64Type>(line_number, rows, i)
+                    build_primitive_array::<Float64Type>(line_number, rows, i, null_regex)
                 }
                 DataType::Date32 => {
-                    build_primitive_array::<Date32Type>(line_number, rows, i)
+                    build_primitive_array::<Date32Type>(line_number, rows, i, null_regex)
                 }
                 DataType::Date64 => {
-                    build_primitive_array::<Date64Type>(line_number, rows, i)
+                    build_primitive_array::<Date64Type>(line_number, rows, i, null_regex)
                 }
                 DataType::Time32(TimeUnit::Second) => {
-                    build_primitive_array::<Time32SecondType>(line_number, rows, i)
+                    build_primitive_array::<Time32SecondType>(line_number, rows, i, null_regex)
                 }
                 DataType::Time32(TimeUnit::Millisecond) => {
-                    build_primitive_array::<Time32MillisecondType>(line_number, rows, i)
+                    build_primitive_array::<Time32MillisecondType>(line_number, rows, i, null_regex)
                 }
                 DataType::Time64(TimeUnit::Microsecond) => {
-                    build_primitive_array::<Time64MicrosecondType>(line_number, rows, i)
+                    build_primitive_array::<Time64MicrosecondType>(line_number, rows, i, null_regex)
                 }
                 DataType::Time64(TimeUnit::Nanosecond) => {
-                    build_primitive_array::<Time64NanosecondType>(line_number, rows, i)
+                    build_primitive_array::<Time64NanosecondType>(line_number, rows, i, null_regex)
                 }
                 DataType::Timestamp(TimeUnit::Second, tz) => {
                     build_timestamp_array::<TimestampSecondType>(
@@ -704,6 +729,7 @@ fn parse(
                         rows,
                         i,
                         tz.as_deref(),
+                        null_regex,
                     )
                 }
                 DataType::Timestamp(TimeUnit::Millisecond, tz) => {
@@ -712,6 +738,7 @@ fn parse(
                         rows,
                         i,
                         tz.as_deref(),
+                        null_regex,
                     )
                 }
                 DataType::Timestamp(TimeUnit::Microsecond, tz) => {
@@ -720,6 +747,7 @@ fn parse(
                         rows,
                         i,
                         tz.as_deref(),
+                        null_regex,
                     )
                 }
                 DataType::Timestamp(TimeUnit::Nanosecond, tz) => {
@@ -728,11 +756,16 @@ fn parse(
                         rows,
                         i,
                         tz.as_deref(),
+                        null_regex,
                     )
                 }
+                DataType::Null => Ok(Arc::new(NullArray::builder(rows.len()).finish()) as ArrayRef),
                 DataType::Utf8 => Ok(Arc::new(
                     rows.iter()
-                        .map(|row| Some(row.get(i)))
+                        .map(|row| {
+                            let s = row.get(i);
+                            (!null_regex.is_null(s)).then_some(s)
+                        })
                         .collect::<StringArray>(),
                 ) as ArrayRef),
                 DataType::Dictionary(key_type, value_type)
@@ -791,8 +824,7 @@ fn parse(
         })
         .collect();
 
-    let projected_fields: Fields =
-        projection.iter().map(|i| fields[*i].clone()).collect();
+    let projected_fields: Fields = projection.iter().map(|i| fields[*i].clone()).collect();
 
     let projected_schema = Arc::new(match metadata {
         None => Schema::new(projected_fields),
@@ -827,16 +859,16 @@ fn build_decimal_array<T: DecimalType>(
     col_idx: usize,
     precision: u8,
     scale: i8,
+    null_regex: &NullRegex,
 ) -> Result<ArrayRef, ArrowError> {
     let mut decimal_builder = PrimitiveBuilder::<T>::with_capacity(rows.len());
     for row in rows.iter() {
         let s = row.get(col_idx);
-        if s.is_empty() {
+        if null_regex.is_null(s) {
             // append null
             decimal_builder.append_null();
         } else {
-            let decimal_value: Result<T::Native, _> =
-                parse_decimal::<T>(s, precision, scale);
+            let decimal_value: Result<T::Native, _> = parse_decimal::<T>(s, precision, scale);
             match decimal_value {
                 Ok(v) => {
                     decimal_builder.append_value(v);
@@ -859,12 +891,13 @@ fn build_primitive_array<T: ArrowPrimitiveType + Parser>(
     line_number: usize,
     rows: &StringRecords<'_>,
     col_idx: usize,
+    null_regex: &NullRegex,
 ) -> Result<ArrayRef, ArrowError> {
     rows.iter()
         .enumerate()
         .map(|(row_index, row)| {
             let s = row.get(col_idx);
-            if s.is_empty() {
+            if null_regex.is_null(s) {
                 return Ok(None);
             }
 
@@ -888,14 +921,15 @@ fn build_timestamp_array<T: ArrowTimestampType>(
     rows: &StringRecords<'_>,
     col_idx: usize,
     timezone: Option<&str>,
+    null_regex: &NullRegex,
 ) -> Result<ArrayRef, ArrowError> {
     Ok(Arc::new(match timezone {
         Some(timezone) => {
             let tz: Tz = timezone.parse()?;
-            build_timestamp_array_impl::<T, _>(line_number, rows, col_idx, &tz)?
+            build_timestamp_array_impl::<T, _>(line_number, rows, col_idx, &tz, null_regex)?
                 .with_timezone(timezone)
         }
-        None => build_timestamp_array_impl::<T, _>(line_number, rows, col_idx, &Utc)?,
+        None => build_timestamp_array_impl::<T, _>(line_number, rows, col_idx, &Utc, null_regex)?,
     }))
 }
 
@@ -904,29 +938,36 @@ fn build_timestamp_array_impl<T: ArrowTimestampType, Tz: TimeZone>(
     rows: &StringRecords<'_>,
     col_idx: usize,
     timezone: &Tz,
+    null_regex: &NullRegex,
 ) -> Result<PrimitiveArray<T>, ArrowError> {
     rows.iter()
         .enumerate()
         .map(|(row_index, row)| {
             let s = row.get(col_idx);
-            if s.is_empty() {
+            if null_regex.is_null(s) {
                 return Ok(None);
             }
 
-            let date = string_to_datetime(timezone, s).map_err(|e| {
-                ArrowError::ParseError(format!(
-                    "Error parsing column {col_idx} at line {}: {}",
-                    line_number + row_index,
-                    e
-                ))
-            })?;
-
-            Ok(Some(match T::UNIT {
-                TimeUnit::Second => date.timestamp(),
-                TimeUnit::Millisecond => date.timestamp_millis(),
-                TimeUnit::Microsecond => date.timestamp_micros(),
-                TimeUnit::Nanosecond => date.timestamp_nanos(),
-            }))
+            let date = string_to_datetime(timezone, s)
+                .and_then(|date| match T::UNIT {
+                    TimeUnit::Second => Ok(date.timestamp()),
+                    TimeUnit::Millisecond => Ok(date.timestamp_millis()),
+                    TimeUnit::Microsecond => Ok(date.timestamp_micros()),
+                    TimeUnit::Nanosecond => date.timestamp_nanos_opt().ok_or_else(|| {
+                        ArrowError::ParseError(format!(
+                            "{} would overflow 64-bit signed nanoseconds",
+                            date.to_rfc3339(),
+                        ))
+                    }),
+                })
+                .map_err(|e| {
+                    ArrowError::ParseError(format!(
+                        "Error parsing column {col_idx} at line {}: {}",
+                        line_number + row_index,
+                        e
+                    ))
+                })?;
+            Ok(Some(date))
         })
         .collect()
 }
@@ -936,12 +977,13 @@ fn build_boolean_array(
     line_number: usize,
     rows: &StringRecords<'_>,
     col_idx: usize,
+    null_regex: &NullRegex,
 ) -> Result<ArrayRef, ArrowError> {
     rows.iter()
         .enumerate()
         .map(|(row_index, row)| {
             let s = row.get(col_idx);
-            if s.is_empty() {
+            if null_regex.is_null(s) {
                 return Ok(None);
             }
             let parsed = parse_bool(s);
@@ -1010,8 +1052,16 @@ impl ReaderBuilder {
     }
 
     /// Set whether the CSV file has headers
+    #[deprecated(note = "Use with_header")]
+    #[doc(hidden)]
     pub fn has_header(mut self, has_header: bool) -> Self {
-        self.format.has_header = has_header;
+        self.format.header = has_header;
+        self
+    }
+
+    /// Set whether the CSV file has a header
+    pub fn with_header(mut self, has_header: bool) -> Self {
+        self.format.header = has_header;
         self
     }
 
@@ -1042,6 +1092,12 @@ impl ReaderBuilder {
         self
     }
 
+    /// Provide a regex to match null values, defaults to `^$`
+    pub fn with_null_regex(mut self, null_regex: Regex) -> Self {
+        self.format.null_regex = NullRegex(Some(null_regex));
+        self
+    }
+
     /// Set the batch size (number of records to load at one time)
     pub fn with_batch_size(mut self, batch_size: usize) -> Self {
         self.batch_size = batch_size;
@@ -1070,10 +1126,7 @@ impl ReaderBuilder {
     }
 
     /// Create a new `BufReader` from a buffered reader
-    pub fn build_buffered<R: BufRead>(
-        self,
-        reader: R,
-    ) -> Result<BufReader<R>, ArrowError> {
+    pub fn build_buffered<R: BufRead>(self, reader: R) -> Result<BufReader<R>, ArrowError> {
         Ok(BufReader {
             reader,
             decoder: self.build_decoder(),
@@ -1085,7 +1138,7 @@ impl ReaderBuilder {
         let delimiter = self.format.build_parser();
         let record_decoder = RecordDecoder::new(delimiter, self.schema.fields().len());
 
-        let header = self.format.has_header as usize;
+        let header = self.format.header as usize;
 
         let (start, end) = match self.bounds {
             Some((start, end)) => (start + header, end + header),
@@ -1100,6 +1153,7 @@ impl ReaderBuilder {
             end,
             projection: self.projection,
             batch_size: self.batch_size,
+            null_regex: self.format.null_regex,
         }
     }
 }
@@ -1218,14 +1272,13 @@ mod tests {
             Field::new("lng", DataType::Float64, false),
         ]);
 
-        let file_with_headers =
-            File::open("test/data/uk_cities_with_headers.csv").unwrap();
+        let file_with_headers = File::open("test/data/uk_cities_with_headers.csv").unwrap();
         let file_without_headers = File::open("test/data/uk_cities.csv").unwrap();
         let both_files = file_with_headers
             .chain(Cursor::new("\n".to_string()))
             .chain(file_without_headers);
         let mut csv = ReaderBuilder::new(Arc::new(schema))
-            .has_header(true)
+            .with_header(true)
             .build(both_files)
             .unwrap();
         let batch = csv.next().unwrap().unwrap();
@@ -1243,7 +1296,7 @@ mod tests {
             .unwrap();
 
         file.rewind().unwrap();
-        let builder = ReaderBuilder::new(Arc::new(schema)).has_header(true);
+        let builder = ReaderBuilder::new(Arc::new(schema)).with_header(true);
 
         let mut csv = builder.build(file).unwrap();
         let expected_schema = Schema::new(vec![
@@ -1406,14 +1459,14 @@ mod tests {
         let schema = Arc::new(Schema::new(vec![
             Field::new("c_int", DataType::UInt64, false),
             Field::new("c_float", DataType::Float32, true),
-            Field::new("c_string", DataType::Utf8, false),
+            Field::new("c_string", DataType::Utf8, true),
             Field::new("c_bool", DataType::Boolean, false),
         ]));
 
         let file = File::open("test/data/null_test.csv").unwrap();
 
         let mut csv = ReaderBuilder::new(schema)
-            .has_header(true)
+            .with_header(true)
             .build(file)
             .unwrap();
 
@@ -1426,6 +1479,91 @@ mod tests {
         assert!(!batch.column(1).is_null(4));
     }
 
+    #[test]
+    fn test_init_nulls() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("c_int", DataType::UInt64, true),
+            Field::new("c_float", DataType::Float32, true),
+            Field::new("c_string", DataType::Utf8, true),
+            Field::new("c_bool", DataType::Boolean, true),
+            Field::new("c_null", DataType::Null, true),
+        ]));
+        let file = File::open("test/data/init_null_test.csv").unwrap();
+
+        let mut csv = ReaderBuilder::new(schema)
+            .with_header(true)
+            .build(file)
+            .unwrap();
+
+        let batch = csv.next().unwrap().unwrap();
+
+        assert!(batch.column(1).is_null(0));
+        assert!(!batch.column(1).is_null(1));
+        assert!(batch.column(1).is_null(2));
+        assert!(!batch.column(1).is_null(3));
+        assert!(!batch.column(1).is_null(4));
+    }
+
+    #[test]
+    fn test_init_nulls_with_inference() {
+        let format = Format::default().with_header(true).with_delimiter(b',');
+
+        let mut file = File::open("test/data/init_null_test.csv").unwrap();
+        let (schema, _) = format.infer_schema(&mut file, None).unwrap();
+        file.rewind().unwrap();
+
+        let expected_schema = Schema::new(vec![
+            Field::new("c_int", DataType::Int64, true),
+            Field::new("c_float", DataType::Float64, true),
+            Field::new("c_string", DataType::Utf8, true),
+            Field::new("c_bool", DataType::Boolean, true),
+            Field::new("c_null", DataType::Null, true),
+        ]);
+        assert_eq!(schema, expected_schema);
+
+        let mut csv = ReaderBuilder::new(Arc::new(schema))
+            .with_format(format)
+            .build(file)
+            .unwrap();
+
+        let batch = csv.next().unwrap().unwrap();
+
+        assert!(batch.column(1).is_null(0));
+        assert!(!batch.column(1).is_null(1));
+        assert!(batch.column(1).is_null(2));
+        assert!(!batch.column(1).is_null(3));
+        assert!(!batch.column(1).is_null(4));
+    }
+
+    #[test]
+    fn test_custom_nulls() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("c_int", DataType::UInt64, true),
+            Field::new("c_float", DataType::Float32, true),
+            Field::new("c_string", DataType::Utf8, true),
+            Field::new("c_bool", DataType::Boolean, true),
+        ]));
+
+        let file = File::open("test/data/custom_null_test.csv").unwrap();
+
+        let null_regex = Regex::new("^nil$").unwrap();
+
+        let mut csv = ReaderBuilder::new(schema)
+            .with_header(true)
+            .with_null_regex(null_regex)
+            .build(file)
+            .unwrap();
+
+        let batch = csv.next().unwrap().unwrap();
+
+        // "nil"s should be NULL
+        assert!(batch.column(0).is_null(1));
+        assert!(batch.column(1).is_null(2));
+        assert!(batch.column(3).is_null(4));
+        assert!(batch.column(2).is_null(3));
+        assert!(!batch.column(2).is_null(4));
+    }
+
     #[test]
     fn test_nulls_with_inference() {
         let mut file = File::open("test/data/various_types.csv").unwrap();
@@ -1457,8 +1595,7 @@ mod tests {
             schema.field(5).data_type()
         );
 
-        let names: Vec<&str> =
-            schema.fields().iter().map(|x| x.name().as_str()).collect();
+        let names: Vec<&str> = schema.fields().iter().map(|x| x.name().as_str()).collect();
         assert_eq!(
             names,
             vec![
@@ -1485,6 +1622,42 @@ mod tests {
         assert!(!batch.column(1).is_null(4));
     }
 
+    #[test]
+    fn test_custom_nulls_with_inference() {
+        let mut file = File::open("test/data/custom_null_test.csv").unwrap();
+
+        let null_regex = Regex::new("^nil$").unwrap();
+
+        let format = Format::default()
+            .with_header(true)
+            .with_null_regex(null_regex);
+
+        let (schema, _) = format.infer_schema(&mut file, None).unwrap();
+        file.rewind().unwrap();
+
+        let expected_schema = Schema::new(vec![
+            Field::new("c_int", DataType::Int64, true),
+            Field::new("c_float", DataType::Float64, true),
+            Field::new("c_string", DataType::Utf8, true),
+            Field::new("c_bool", DataType::Boolean, true),
+        ]);
+
+        assert_eq!(schema, expected_schema);
+
+        let builder = ReaderBuilder::new(Arc::new(schema))
+            .with_format(format)
+            .with_batch_size(512)
+            .with_projection(vec![0, 1, 2, 3]);
+
+        let mut csv = builder.build(file).unwrap();
+        let batch = csv.next().unwrap().unwrap();
+
+        assert_eq!(5, batch.num_rows());
+        assert_eq!(4, batch.num_columns());
+
+        assert_eq!(batch.schema().as_ref(), &expected_schema);
+    }
+
     #[test]
     fn test_parse_invalid_csv() {
         let file = File::open("test/data/various_types_invalid.csv").unwrap();
@@ -1497,7 +1670,7 @@ mod tests {
         ]);
 
         let builder = ReaderBuilder::new(Arc::new(schema))
-            .has_header(true)
+            .with_header(true)
             .with_delimiter(b'|')
             .with_batch_size(512)
             .with_projection(vec![0, 1, 2, 3]);
@@ -1598,16 +1771,11 @@ mod tests {
             -2203932304000
         );
         assert_eq!(
-            Date64Type::parse_formatted("1900-02-28 12:34:56", "%Y-%m-%d %H:%M:%S")
-                .unwrap(),
+            Date64Type::parse_formatted("1900-02-28 12:34:56", "%Y-%m-%d %H:%M:%S").unwrap(),
             -2203932304000
         );
         assert_eq!(
-            Date64Type::parse_formatted(
-                "1900-02-28 12:34:56+0030",
-                "%Y-%m-%d %H:%M:%S%z"
-            )
-            .unwrap(),
+            Date64Type::parse_formatted("1900-02-28 12:34:56+0030", "%Y-%m-%d %H:%M:%S%z").unwrap(),
             -2203932304000 - (30 * 60 * 1000)
         );
     }
@@ -1644,10 +1812,7 @@ mod tests {
 
     #[test]
     fn test_parse_timestamp() {
-        test_parse_timestamp_impl::<TimestampNanosecondType>(
-            None,
-            &[0, 0, -7_200_000_000_000],
-        );
+        test_parse_timestamp_impl::<TimestampNanosecondType>(None, &[0, 0, -7_200_000_000_000]);
         test_parse_timestamp_impl::<TimestampNanosecondType>(
             Some("+00:00".into()),
             &[0, 0, -7_200_000_000_000],
@@ -1664,10 +1829,7 @@ mod tests {
             Some("-03".into()),
             &[10_800_000, 0, -7_200_000],
         );
-        test_parse_timestamp_impl::<TimestampSecondType>(
-            Some("-03".into()),
-            &[10_800, 0, -7_200],
-        );
+        test_parse_timestamp_impl::<TimestampSecondType>(Some("-03".into()), &[10_800, 0, -7_200]);
     }
 
     #[test]
@@ -1824,7 +1986,7 @@ mod tests {
             Field::new("text2", DataType::Utf8, false),
         ]);
         let builder = ReaderBuilder::new(Arc::new(schema))
-            .has_header(false)
+            .with_header(false)
             .with_quote(b'~'); // default is ", change to ~
 
         let mut csv_text = Vec::new();
@@ -1856,7 +2018,7 @@ mod tests {
             Field::new("text2", DataType::Utf8, false),
         ]);
         let builder = ReaderBuilder::new(Arc::new(schema))
-            .has_header(false)
+            .with_header(false)
             .with_escape(b'\\'); // default is None, change to \
 
         let mut csv_text = Vec::new();
@@ -1888,7 +2050,7 @@ mod tests {
             Field::new("text2", DataType::Utf8, false),
         ]);
         let builder = ReaderBuilder::new(Arc::new(schema))
-            .has_header(false)
+            .with_header(false)
             .with_terminator(b'\n'); // default is CRLF, change to LF
 
         let mut csv_text = Vec::new();
@@ -1930,7 +2092,7 @@ mod tests {
         ]));
 
         for (idx, (bounds, has_header, expected)) in tests.into_iter().enumerate() {
-            let mut reader = ReaderBuilder::new(schema.clone()).has_header(has_header);
+            let mut reader = ReaderBuilder::new(schema.clone()).with_header(has_header);
             if let Some((start, end)) = bounds {
                 reader = reader.with_bounds(start, end);
             }
@@ -1995,7 +2157,7 @@ mod tests {
                 for capacity in [1, 3, 7, 100] {
                     let reader = ReaderBuilder::new(schema.clone())
                         .with_batch_size(batch_size)
-                        .has_header(has_header)
+                        .with_header(has_header)
                         .build(File::open(path).unwrap())
                         .unwrap();
 
@@ -2006,14 +2168,12 @@ mod tests {
                         expected_rows
                     );
 
-                    let buffered = std::io::BufReader::with_capacity(
-                        capacity,
-                        File::open(path).unwrap(),
-                    );
+                    let buffered =
+                        std::io::BufReader::with_capacity(capacity, File::open(path).unwrap());
 
                     let reader = ReaderBuilder::new(schema.clone())
                         .with_batch_size(batch_size)
-                        .has_header(has_header)
+                        .with_header(has_header)
                         .build_buffered(buffered)
                         .unwrap();
 
@@ -2026,8 +2186,8 @@ mod tests {
 
     fn err_test(csv: &[u8], expected: &str) {
         let schema = Arc::new(Schema::new(vec![
-            Field::new("text1", DataType::Utf8, false),
-            Field::new("text2", DataType::Utf8, false),
+            Field::new("text1", DataType::Utf8, true),
+            Field::new("text2", DataType::Utf8, true),
         ]));
         let buffer = std::io::BufReader::with_capacity(2, Cursor::new(csv));
         let b = ReaderBuilder::new(schema)
@@ -2132,7 +2292,7 @@ mod tests {
     #[test]
     fn test_inference() {
         let cases: &[(&[&str], DataType)] = &[
-            (&[], DataType::Utf8),
+            (&[], DataType::Null),
             (&["false", "12"], DataType::Utf8),
             (&["12", "cupcakes"], DataType::Utf8),
             (&["12", "12.4"], DataType::Float64),
diff --git a/arrow-csv/src/reader/records.rs b/arrow-csv/src/reader/records.rs
index a59d02e0e2d8..877cfb3ee653 100644
--- a/arrow-csv/src/reader/records.rs
+++ b/arrow-csv/src/reader/records.rs
@@ -76,11 +76,7 @@ impl RecordDecoder {
     /// Decodes records from `input` returning the number of records and bytes read
     ///
     /// Note: this expects to be called with an empty `input` to signal EOF
-    pub fn decode(
-        &mut self,
-        input: &[u8],
-        to_read: usize,
-    ) -> Result<(usize, usize), ArrowError> {
+    pub fn decode(&mut self, input: &[u8], to_read: usize) -> Result<(usize, usize), ArrowError> {
         if to_read == 0 {
             return Ok((0, 0));
         }
@@ -124,11 +120,17 @@ impl RecordDecoder {
                     // Need to allocate more capacity
                     ReadRecordResult::OutputFull => break,
                     ReadRecordResult::OutputEndsFull => {
-                        return Err(ArrowError::CsvError(format!("incorrect number of fields for line {}, expected {} got more than {}", self.line_number, self.num_columns, self.current_field)));
+                        return Err(ArrowError::CsvError(format!(
+                            "incorrect number of fields for line {}, expected {} got more than {}",
+                            self.line_number, self.num_columns, self.current_field
+                        )));
                     }
                     ReadRecordResult::Record => {
                         if self.current_field != self.num_columns {
-                            return Err(ArrowError::CsvError(format!("incorrect number of fields for line {}, expected {} got {}", self.line_number, self.num_columns, self.current_field)));
+                            return Err(ArrowError::CsvError(format!(
+                                "incorrect number of fields for line {}, expected {} got {}",
+                                self.line_number, self.num_columns, self.current_field
+                            )));
                         }
                         read += 1;
                         self.current_field = 0;
@@ -334,8 +336,7 @@ mod tests {
         let mut decoder = RecordDecoder::new(Reader::new(), 2);
         let err = decoder.decode(csv.as_bytes(), 4).unwrap_err().to_string();
 
-        let expected =
-            "Csv error: incorrect number of fields for line 3, expected 2 got 1";
+        let expected = "Csv error: incorrect number of fields for line 3, expected 2 got 1";
 
         assert_eq!(err, expected);
 
diff --git a/arrow-csv/src/writer.rs b/arrow-csv/src/writer.rs
index 840e8e8a93cc..0bb76e536e67 100644
--- a/arrow-csv/src/writer.rs
+++ b/arrow-csv/src/writer.rs
@@ -70,11 +70,6 @@ use csv::ByteRecord;
 use std::io::Write;
 
 use crate::map_csv_error;
-
-const DEFAULT_DATE_FORMAT: &str = "%F";
-const DEFAULT_TIME_FORMAT: &str = "%T";
-const DEFAULT_TIMESTAMP_FORMAT: &str = "%FT%H:%M:%S.%9f";
-const DEFAULT_TIMESTAMP_TZ_FORMAT: &str = "%FT%H:%M:%S.%9f%:z";
 const DEFAULT_NULL_VALUE: &str = "";
 
 /// A CSV writer
@@ -82,41 +77,29 @@ const DEFAULT_NULL_VALUE: &str = "";
 pub struct Writer<W: Write> {
     /// The object to write to
     writer: csv::Writer<W>,
-    /// Whether file should be written with headers. Defaults to `true`
+    /// Whether file should be written with headers, defaults to `true`
     has_headers: bool,
-    /// The date format for date arrays
+    /// The date format for date arrays, defaults to RFC3339
     date_format: Option<String>,
-    /// The datetime format for datetime arrays
+    /// The datetime format for datetime arrays, defaults to RFC3339
     datetime_format: Option<String>,
-    /// The timestamp format for timestamp arrays
+    /// The timestamp format for timestamp arrays, defaults to RFC3339
     timestamp_format: Option<String>,
-    /// The timestamp format for timestamp (with timezone) arrays
+    /// The timestamp format for timestamp (with timezone) arrays, defaults to RFC3339
     timestamp_tz_format: Option<String>,
-    /// The time format for time arrays
+    /// The time format for time arrays, defaults to RFC3339
     time_format: Option<String>,
     /// Is the beginning-of-writer
     beginning: bool,
-    /// The value to represent null entries
-    null_value: String,
+    /// The value to represent null entries, defaults to [`DEFAULT_NULL_VALUE`]
+    null_value: Option<String>,
 }
 
 impl<W: Write> Writer<W> {
     /// Create a new CsvWriter from a writable object, with default options
     pub fn new(writer: W) -> Self {
         let delimiter = b',';
-        let mut builder = csv::WriterBuilder::new();
-        let writer = builder.delimiter(delimiter).from_writer(writer);
-        Writer {
-            writer,
-            has_headers: true,
-            date_format: Some(DEFAULT_DATE_FORMAT.to_string()),
-            datetime_format: Some(DEFAULT_TIMESTAMP_FORMAT.to_string()),
-            time_format: Some(DEFAULT_TIME_FORMAT.to_string()),
-            timestamp_format: Some(DEFAULT_TIMESTAMP_FORMAT.to_string()),
-            timestamp_tz_format: Some(DEFAULT_TIMESTAMP_TZ_FORMAT.to_string()),
-            beginning: true,
-            null_value: DEFAULT_NULL_VALUE.to_string(),
-        }
+        WriterBuilder::new().with_delimiter(delimiter).build(writer)
     }
 
     /// Write a vector of record batches to a writable object
@@ -138,7 +121,7 @@ impl<W: Write> Writer<W> {
         }
 
         let options = FormatOptions::default()
-            .with_null(&self.null_value)
+            .with_null(self.null_value.as_deref().unwrap_or(DEFAULT_NULL_VALUE))
             .with_date_format(self.date_format.as_deref())
             .with_datetime_format(self.datetime_format.as_deref())
             .with_timestamp_format(self.timestamp_format.as_deref())
@@ -207,9 +190,9 @@ impl<W: Write> RecordBatchWriter for Writer<W> {
 #[derive(Clone, Debug)]
 pub struct WriterBuilder {
     /// Optional column delimiter. Defaults to `b','`
-    delimiter: Option<u8>,
+    delimiter: u8,
     /// Whether to write column names as file headers. Defaults to `true`
-    has_headers: bool,
+    has_header: bool,
     /// Optional date format for date arrays
     date_format: Option<String>,
     /// Optional datetime format for datetime arrays
@@ -227,14 +210,14 @@ pub struct WriterBuilder {
 impl Default for WriterBuilder {
     fn default() -> Self {
         Self {
-            has_headers: true,
-            delimiter: None,
-            date_format: Some(DEFAULT_DATE_FORMAT.to_string()),
-            datetime_format: Some(DEFAULT_TIMESTAMP_FORMAT.to_string()),
-            time_format: Some(DEFAULT_TIME_FORMAT.to_string()),
-            timestamp_format: Some(DEFAULT_TIMESTAMP_FORMAT.to_string()),
-            timestamp_tz_format: Some(DEFAULT_TIMESTAMP_TZ_FORMAT.to_string()),
-            null_value: Some(DEFAULT_NULL_VALUE.to_string()),
+            has_header: true,
+            delimiter: b',',
+            date_format: None,
+            datetime_format: None,
+            time_format: None,
+            timestamp_format: None,
+            timestamp_tz_format: None,
+            null_value: None,
         }
     }
 }
@@ -254,7 +237,7 @@ impl WriterBuilder {
     ///     let file = File::create("target/out.csv").unwrap();
     ///
     ///     // create a builder that doesn't write headers
-    ///     let builder = WriterBuilder::new().has_headers(false);
+    ///     let builder = WriterBuilder::new().with_header(false);
     ///     let writer = builder.build(file);
     ///
     ///     writer
@@ -265,48 +248,92 @@ impl WriterBuilder {
     }
 
     /// Set whether to write headers
+    #[deprecated(note = "Use Self::with_header")]
+    #[doc(hidden)]
     pub fn has_headers(mut self, has_headers: bool) -> Self {
-        self.has_headers = has_headers;
+        self.has_header = has_headers;
         self
     }
 
+    /// Set whether to write the CSV file with a header
+    pub fn with_header(mut self, header: bool) -> Self {
+        self.has_header = header;
+        self
+    }
+
+    /// Returns `true` if this writer is configured to write a header
+    pub fn header(&self) -> bool {
+        self.has_header
+    }
+
     /// Set the CSV file's column delimiter as a byte character
     pub fn with_delimiter(mut self, delimiter: u8) -> Self {
-        self.delimiter = Some(delimiter);
+        self.delimiter = delimiter;
         self
     }
 
+    /// Get the CSV file's column delimiter as a byte character
+    pub fn delimiter(&self) -> u8 {
+        self.delimiter
+    }
+
     /// Set the CSV file's date format
     pub fn with_date_format(mut self, format: String) -> Self {
         self.date_format = Some(format);
         self
     }
 
+    /// Get the CSV file's date format if set, defaults to RFC3339
+    pub fn date_format(&self) -> Option<&str> {
+        self.date_format.as_deref()
+    }
+
     /// Set the CSV file's datetime format
     pub fn with_datetime_format(mut self, format: String) -> Self {
         self.datetime_format = Some(format);
         self
     }
 
+    /// Get the CSV file's datetime format if set, defaults to RFC3339
+    pub fn datetime_format(&self) -> Option<&str> {
+        self.datetime_format.as_deref()
+    }
+
     /// Set the CSV file's time format
     pub fn with_time_format(mut self, format: String) -> Self {
         self.time_format = Some(format);
         self
     }
 
+    /// Get the CSV file's datetime time if set, defaults to RFC3339
+    pub fn time_format(&self) -> Option<&str> {
+        self.time_format.as_deref()
+    }
+
     /// Set the CSV file's timestamp format
     pub fn with_timestamp_format(mut self, format: String) -> Self {
         self.timestamp_format = Some(format);
         self
     }
 
+    /// Get the CSV file's timestamp format if set, defaults to RFC3339
+    pub fn timestamp_format(&self) -> Option<&str> {
+        self.timestamp_format.as_deref()
+    }
+
     /// Set the value to represent null in output
     pub fn with_null(mut self, null_value: String) -> Self {
         self.null_value = Some(null_value);
         self
     }
 
-    /// Use RFC3339 format for date/time/timestamps
+    /// Get the value to represent null in output
+    pub fn null(&self) -> &str {
+        self.null_value.as_deref().unwrap_or(DEFAULT_NULL_VALUE)
+    }
+
+    /// Use RFC3339 format for date/time/timestamps (default)
+    #[deprecated(note = "Use WriterBuilder::default()")]
     pub fn with_rfc3339(mut self) -> Self {
         self.date_format = None;
         self.datetime_format = None;
@@ -318,21 +345,18 @@ impl WriterBuilder {
 
     /// Create a new `Writer`
     pub fn build<W: Write>(self, writer: W) -> Writer<W> {
-        let delimiter = self.delimiter.unwrap_or(b',');
         let mut builder = csv::WriterBuilder::new();
-        let writer = builder.delimiter(delimiter).from_writer(writer);
+        let writer = builder.delimiter(self.delimiter).from_writer(writer);
         Writer {
             writer,
-            has_headers: self.has_headers,
+            beginning: true,
+            has_headers: self.has_header,
             date_format: self.date_format,
             datetime_format: self.datetime_format,
             time_format: self.time_format,
             timestamp_format: self.timestamp_format,
             timestamp_tz_format: self.timestamp_tz_format,
-            beginning: true,
-            null_value: self
-                .null_value
-                .unwrap_or_else(|| DEFAULT_NULL_VALUE.to_string()),
+            null_value: self.null_value,
         }
     }
 }
@@ -365,18 +389,12 @@ mod tests {
             "consectetur adipiscing elit",
             "sed do eiusmod tempor",
         ]);
-        let c2 = PrimitiveArray::<Float64Type>::from(vec![
-            Some(123.564532),
-            None,
-            Some(-556132.25),
-        ]);
+        let c2 =
+            PrimitiveArray::<Float64Type>::from(vec![Some(123.564532), None, Some(-556132.25)]);
         let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
         let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
-        let c5 = TimestampMillisecondArray::from(vec![
-            None,
-            Some(1555584887378),
-            Some(1555555555555),
-        ]);
+        let c5 =
+            TimestampMillisecondArray::from(vec![None, Some(1555584887378), Some(1555555555555)]);
         let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]);
         let c7: DictionaryArray<Int32Type> =
             vec!["cupcakes", "cupcakes", "foo"].into_iter().collect();
@@ -411,11 +429,11 @@ mod tests {
 
         let expected = r#"c1,c2,c3,c4,c5,c6,c7
 Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,cupcakes
-consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20,cupcakes
-sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
+consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378,06:51:20,cupcakes
+sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
 Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,cupcakes
-consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20,cupcakes
-sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
+consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378,06:51:20,cupcakes
+sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
 "#;
         assert_eq!(expected.to_string(), String::from_utf8(buffer).unwrap());
     }
@@ -427,13 +445,11 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
             Field::new("c2", DataType::Decimal256(76, 6), true),
         ]);
 
-        let mut c1_builder =
-            Decimal128Builder::new().with_data_type(DataType::Decimal128(38, 6));
+        let mut c1_builder = Decimal128Builder::new().with_data_type(DataType::Decimal128(38, 6));
         c1_builder.extend(vec![Some(-3335724), Some(2179404), None, Some(290472)]);
         let c1 = c1_builder.finish();
 
-        let mut c2_builder =
-            Decimal256Builder::new().with_data_type(DataType::Decimal256(76, 6));
+        let mut c2_builder = Decimal256Builder::new().with_data_type(DataType::Decimal256(76, 6));
         c2_builder.extend(vec![
             Some(i256::from_i128(-3335724)),
             Some(i256::from_i128(2179404)),
@@ -443,8 +459,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
         let c2 = c2_builder.finish();
 
         let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)])
-                .unwrap();
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap();
 
         let mut file = tempfile::tempfile().unwrap();
 
@@ -488,11 +503,8 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
             "consectetur adipiscing elit",
             "sed do eiusmod tempor",
         ]);
-        let c2 = PrimitiveArray::<Float64Type>::from(vec![
-            Some(123.564532),
-            None,
-            Some(-556132.25),
-        ]);
+        let c2 =
+            PrimitiveArray::<Float64Type>::from(vec![Some(123.564532), None, Some(-556132.25)]);
         let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
         let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
         let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]);
@@ -512,7 +524,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
         let mut file = tempfile::tempfile().unwrap();
 
         let builder = WriterBuilder::new()
-            .has_headers(false)
+            .with_header(false)
             .with_delimiter(b'|')
             .with_null("NULL".to_string())
             .with_time_format("%r".to_string());
@@ -560,7 +572,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
         )
         .unwrap();
 
-        let builder = WriterBuilder::new().has_headers(false);
+        let builder = WriterBuilder::new().with_header(false);
 
         let mut buf: Cursor<Vec<u8>> = Default::default();
         // drop the writer early to release the borrow.
@@ -605,8 +617,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
         let c0 = UInt32Array::from(vec![Some(123), Some(234)]);
         let c1 = Date64Array::from(vec![Some(1926632005177), Some(1926632005177685347)]);
         let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c0), Arc::new(c1)])
-                .unwrap();
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c0), Arc::new(c1)]).unwrap();
 
         let mut file = tempfile::tempfile().unwrap();
         let mut writer = Writer::new(&mut file);
@@ -632,15 +643,9 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
             Field::new("c4", DataType::Time32(TimeUnit::Second), false),
         ]);
 
-        let c1 = TimestampMillisecondArray::from(vec![
-            Some(1555584887378),
-            Some(1635577147000),
-        ])
-        .with_timezone("+00:00".to_string());
-        let c2 = TimestampMillisecondArray::from(vec![
-            Some(1555584887378),
-            Some(1635577147000),
-        ]);
+        let c1 = TimestampMillisecondArray::from(vec![Some(1555584887378), Some(1635577147000)])
+            .with_timezone("+00:00".to_string());
+        let c2 = TimestampMillisecondArray::from(vec![Some(1555584887378), Some(1635577147000)]);
         let c3 = Date32Array::from(vec![3, 2]);
         let c4 = Time32SecondArray::from(vec![1234, 24680]);
 
@@ -652,7 +657,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,foo
 
         let mut file = tempfile::tempfile().unwrap();
 
-        let builder = WriterBuilder::new().with_rfc3339();
+        let builder = WriterBuilder::new();
         let mut writer = builder.build(&mut file);
         let batches = vec![&batch];
         for batch in batches {
diff --git a/arrow-csv/test/data/custom_null_test.csv b/arrow-csv/test/data/custom_null_test.csv
new file mode 100644
index 000000000000..39f9fc4b3eff
--- /dev/null
+++ b/arrow-csv/test/data/custom_null_test.csv
@@ -0,0 +1,6 @@
+c_int,c_float,c_string,c_bool
+1,1.1,"1.11",True
+nil,2.2,"2.22",TRUE
+3,nil,"3.33",true
+4,4.4,nil,False
+5,6.6,"",nil
diff --git a/arrow-csv/test/data/example.csv b/arrow-csv/test/data/example.csv
new file mode 100644
index 000000000000..0c03cee84528
--- /dev/null
+++ b/arrow-csv/test/data/example.csv
@@ -0,0 +1,4 @@
+c1,c2,c3,c4
+1,1.1,"hong kong",true
+3,323.12,"XiAn",false
+10,131323.12,"cheng du",false
\ No newline at end of file
diff --git a/arrow-csv/test/data/init_null_test.csv b/arrow-csv/test/data/init_null_test.csv
new file mode 100644
index 000000000000..f7d8a299645d
--- /dev/null
+++ b/arrow-csv/test/data/init_null_test.csv
@@ -0,0 +1,6 @@
+c_int,c_float,c_string,c_bool,c_null
+,,,,
+2,2.2,"a",TRUE,
+3,,"b",true,
+4,4.4,,False,
+5,6.6,"",FALSE,
\ No newline at end of file
diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 7e07194012bf..10c53c549e2b 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -42,9 +42,7 @@ pub(crate) fn contains_nulls(
 ) -> bool {
     match null_bit_buffer {
         Some(buffer) => {
-            match BitSliceIterator::new(buffer.validity(), buffer.offset() + offset, len)
-                .next()
-            {
+            match BitSliceIterator::new(buffer.validity(), buffer.offset() + offset, len).next() {
                 Some((start, end)) => start != 0 || end != len,
                 None => len != 0, // No non-null values
             }
@@ -130,9 +128,9 @@ pub(crate) fn new_buffers(data_type: &DataType, capacity: usize) -> [MutableBuff
             MutableBuffer::new(capacity * k.primitive_width().unwrap()),
             empty_buffer,
         ],
-        DataType::FixedSizeList(_, _)
-        | DataType::Struct(_)
-        | DataType::RunEndEncoded(_, _) => [empty_buffer, MutableBuffer::new(0)],
+        DataType::FixedSizeList(_, _) | DataType::Struct(_) | DataType::RunEndEncoded(_, _) => {
+            [empty_buffer, MutableBuffer::new(0)]
+        }
         DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => [
             MutableBuffer::new(capacity * mem::size_of::<u8>()),
             empty_buffer,
@@ -159,10 +157,9 @@ pub(crate) fn into_buffers(
 ) -> Vec<Buffer> {
     match data_type {
         DataType::Null | DataType::Struct(_) | DataType::FixedSizeList(_, _) => vec![],
-        DataType::Utf8
-        | DataType::Binary
-        | DataType::LargeUtf8
-        | DataType::LargeBinary => vec![buffer1.into(), buffer2.into()],
+        DataType::Utf8 | DataType::Binary | DataType::LargeUtf8 | DataType::LargeBinary => {
+            vec![buffer1.into(), buffer2.into()]
+        }
         DataType::Union(_, mode) => {
             match mode {
                 // Based on Union's DataTypeLayout
@@ -174,7 +171,7 @@ pub(crate) fn into_buffers(
     }
 }
 
-/// An generic representation of Arrow array data which encapsulates common attributes and
+/// A generic representation of Arrow array data which encapsulates common attributes and
 /// operations for Arrow array. Specific operations for different arrays types (e.g.,
 /// primitive, list, struct) are implemented in `Array`.
 ///
@@ -452,12 +449,11 @@ impl ArrayData {
         for spec in layout.buffers.iter() {
             match spec {
                 BufferSpec::FixedWidth { byte_width, .. } => {
-                    let buffer_size =
-                        self.len.checked_mul(*byte_width).ok_or_else(|| {
-                            ArrowError::ComputeError(
-                                "Integer overflow computing buffer size".to_string(),
-                            )
-                        })?;
+                    let buffer_size = self.len.checked_mul(*byte_width).ok_or_else(|| {
+                        ArrowError::ComputeError(
+                            "Integer overflow computing buffer size".to_string(),
+                        )
+                    })?;
                     result += buffer_size;
                 }
                 BufferSpec::VariableWidth => {
@@ -590,9 +586,7 @@ impl ArrayData {
                 DataType::LargeBinary | DataType::LargeUtf8 => {
                     (vec![zeroed((len + 1) * 8), zeroed(0)], vec![], true)
                 }
-                DataType::FixedSizeBinary(i) => {
-                    (vec![zeroed(*i as usize * len)], vec![], true)
-                }
+                DataType::FixedSizeBinary(i) => (vec![zeroed(*i as usize * len)], vec![], true),
                 DataType::List(f) | DataType::Map(f, _) => (
                     vec![zeroed((len + 1) * 4)],
                     vec![ArrayData::new_empty(f.data_type())],
@@ -705,7 +699,7 @@ impl ArrayData {
     ///
     /// This can be useful for when interacting with data sent over IPC or FFI, that may
     /// not meet the minimum alignment requirements
-    fn align_buffers(&mut self) {
+    pub fn align_buffers(&mut self) {
         let layout = layout(&self.data_type);
         for (buffer, spec) in self.buffers.iter_mut().zip(&layout.buffers) {
             if let BufferSpec::FixedWidth { alignment, .. } = spec {
@@ -749,9 +743,7 @@ impl ArrayData {
             )));
         }
 
-        for (i, (buffer, spec)) in
-            self.buffers.iter().zip(layout.buffers.iter()).enumerate()
-        {
+        for (i, (buffer, spec)) in self.buffers.iter().zip(layout.buffers.iter()).enumerate() {
             match spec {
                 BufferSpec::FixedWidth {
                     byte_width,
@@ -999,10 +991,8 @@ impl ArrayData {
             }
             DataType::RunEndEncoded(run_ends_field, values_field) => {
                 self.validate_num_child_data(2)?;
-                let run_ends_data =
-                    self.get_valid_child_data(0, run_ends_field.data_type())?;
-                let values_data =
-                    self.get_valid_child_data(1, values_field.data_type())?;
+                let run_ends_data = self.get_valid_child_data(0, run_ends_field.data_type())?;
+                let values_data = self.get_valid_child_data(1, values_field.data_type())?;
                 if run_ends_data.len != values_data.len {
                     return Err(ArrowError::InvalidArgumentError(format!(
                         "The run_ends array length should be the same as values array length. Run_ends array length is {}, values array length is {}",
@@ -1022,9 +1012,7 @@ impl ArrayData {
                 for (i, (_, field)) in fields.iter().enumerate() {
                     let field_data = self.get_valid_child_data(i, field.data_type())?;
 
-                    if mode == &UnionMode::Sparse
-                        && field_data.len < (self.len + self.offset)
-                    {
+                    if mode == &UnionMode::Sparse && field_data.len < (self.len + self.offset) {
                         return Err(ArrowError::InvalidArgumentError(format!(
                             "Sparse union child array #{} has length smaller than expected for union array ({} < {})",
                             i, field_data.len, self.len + self.offset
@@ -1083,14 +1071,14 @@ impl ArrayData {
         i: usize,
         expected_type: &DataType,
     ) -> Result<&ArrayData, ArrowError> {
-        let values_data = self.child_data
-            .get(i)
-            .ok_or_else(|| {
-                ArrowError::InvalidArgumentError(format!(
-                    "{} did not have enough child arrays. Expected at least {} but had only {}",
-                    self.data_type, i+1, self.child_data.len()
-                ))
-            })?;
+        let values_data = self.child_data.get(i).ok_or_else(|| {
+            ArrowError::InvalidArgumentError(format!(
+                "{} did not have enough child arrays. Expected at least {} but had only {}",
+                self.data_type,
+                i + 1,
+                self.child_data.len()
+            ))
+        })?;
 
         if expected_type != &values_data.data_type {
             return Err(ArrowError::InvalidArgumentError(format!(
@@ -1160,7 +1148,8 @@ impl ArrayData {
             if actual != nulls.null_count() {
                 return Err(ArrowError::InvalidArgumentError(format!(
                     "null_count value ({}) doesn't match actual number of nulls in array ({})",
-                    nulls.null_count(), actual
+                    nulls.null_count(),
+                    actual
                 )));
             }
         }
@@ -1209,23 +1198,22 @@ impl ArrayData {
     ) -> Result<(), ArrowError> {
         let mask = match mask {
             Some(mask) => mask,
-            None => return match child.null_count() {
-                0 => Ok(()),
-                _ => Err(ArrowError::InvalidArgumentError(format!(
-                    "non-nullable child of type {} contains nulls not present in parent {}",
-                    child.data_type,
-                    self.data_type
-                ))),
-            },
+            None => {
+                return match child.null_count() {
+                    0 => Ok(()),
+                    _ => Err(ArrowError::InvalidArgumentError(format!(
+                        "non-nullable child of type {} contains nulls not present in parent {}",
+                        child.data_type, self.data_type
+                    ))),
+                }
+            }
         };
 
         match child.nulls() {
-            Some(nulls) if !mask.contains(nulls) => {
-                Err(ArrowError::InvalidArgumentError(format!(
-                    "non-nullable child of type {} contains nulls not present in parent",
-                    child.data_type
-                )))
-            }
+            Some(nulls) if !mask.contains(nulls) => Err(ArrowError::InvalidArgumentError(format!(
+                "non-nullable child of type {} contains nulls not present in parent",
+                child.data_type
+            ))),
             _ => Ok(()),
         }
     }
@@ -1240,9 +1228,7 @@ impl ArrayData {
             DataType::Utf8 => self.validate_utf8::<i32>(),
             DataType::LargeUtf8 => self.validate_utf8::<i64>(),
             DataType::Binary => self.validate_offsets_full::<i32>(self.buffers[1].len()),
-            DataType::LargeBinary => {
-                self.validate_offsets_full::<i64>(self.buffers[1].len())
-            }
+            DataType::LargeBinary => self.validate_offsets_full::<i64>(self.buffers[1].len()),
             DataType::List(_) | DataType::Map(_, _) => {
                 let child = &self.child_data[0];
                 self.validate_offsets_full::<i32>(child.len)
@@ -1300,11 +1286,7 @@ impl ArrayData {
     ///
     /// For example, the offsets buffer contained `[1, 2, 4]`, this
     /// function would call `validate([1,2])`, and `validate([2,4])`
-    fn validate_each_offset<T, V>(
-        &self,
-        offset_limit: usize,
-        validate: V,
-    ) -> Result<(), ArrowError>
+    fn validate_each_offset<T, V>(&self, offset_limit: usize, validate: V) -> Result<(), ArrowError>
     where
         T: ArrowNativeType + TryInto<usize> + num::Num + std::fmt::Display,
         V: Fn(usize, Range<usize>) -> Result<(), ArrowError>,
@@ -1358,32 +1340,26 @@ impl ArrayData {
         let values_buffer = &self.buffers[1].as_slice();
         if let Ok(values_str) = std::str::from_utf8(values_buffer) {
             // Validate Offsets are correct
-            self.validate_each_offset::<T, _>(
-                values_buffer.len(),
-                |string_index, range| {
-                    if !values_str.is_char_boundary(range.start)
-                        || !values_str.is_char_boundary(range.end)
-                    {
-                        return Err(ArrowError::InvalidArgumentError(format!(
-                            "incomplete utf-8 byte sequence from index {string_index}"
-                        )));
-                    }
-                    Ok(())
-                },
-            )
+            self.validate_each_offset::<T, _>(values_buffer.len(), |string_index, range| {
+                if !values_str.is_char_boundary(range.start)
+                    || !values_str.is_char_boundary(range.end)
+                {
+                    return Err(ArrowError::InvalidArgumentError(format!(
+                        "incomplete utf-8 byte sequence from index {string_index}"
+                    )));
+                }
+                Ok(())
+            })
         } else {
             // find specific offset that failed utf8 validation
-            self.validate_each_offset::<T, _>(
-                values_buffer.len(),
-                |string_index, range| {
-                    std::str::from_utf8(&values_buffer[range.clone()]).map_err(|e| {
-                        ArrowError::InvalidArgumentError(format!(
-                            "Invalid UTF8 sequence at string index {string_index} ({range:?}): {e}"
-                        ))
-                    })?;
-                    Ok(())
-                },
-            )
+            self.validate_each_offset::<T, _>(values_buffer.len(), |string_index, range| {
+                std::str::from_utf8(&values_buffer[range.clone()]).map_err(|e| {
+                    ArrowError::InvalidArgumentError(format!(
+                        "Invalid UTF8 sequence at string index {string_index} ({range:?}): {e}"
+                    ))
+                })?;
+                Ok(())
+            })
         }
     }
 
@@ -1414,8 +1390,7 @@ impl ArrayData {
         assert!(buffer.len() / mem::size_of::<T>() >= required_len);
 
         // Justification: buffer size was validated above
-        let indexes: &[T] =
-            &buffer.typed_data::<T>()[self.offset..self.offset + self.len];
+        let indexes: &[T] = &buffer.typed_data::<T>()[self.offset..self.offset + self.len];
 
         indexes.iter().enumerate().try_for_each(|(i, &dict_index)| {
             // Do not check the value is null (value can be arbitrary)
diff --git a/arrow-data/src/decimal.rs b/arrow-data/src/decimal.rs
index f74ab880d478..74279bfb9af1 100644
--- a/arrow-data/src/decimal.rs
+++ b/arrow-data/src/decimal.rs
@@ -19,8 +19,8 @@ use arrow_buffer::i256;
 use arrow_schema::ArrowError;
 
 pub use arrow_schema::{
-    DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION,
-    DECIMAL256_MAX_SCALE, DECIMAL_DEFAULT_SCALE,
+    DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
+    DECIMAL_DEFAULT_SCALE,
 };
 
 // MAX decimal256 value of little-endian format for each precision.
@@ -28,308 +28,308 @@ pub use arrow_schema::{
 // is encoded to the 32-byte width format of little-endian.
 pub(crate) const MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [i256; 76] = [
     i256::from_le_bytes([
-        9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0,
+        9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0,
     ]),
     i256::from_le_bytes([
-        99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0,
+        99, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0,
     ]),
     i256::from_le_bytes([
-        231, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0,
+        231, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0,
     ]),
     i256::from_le_bytes([
-        15, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0,
+        15, 39, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0,
     ]),
     i256::from_le_bytes([
-        159, 134, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0,
+        159, 134, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0,
     ]),
     i256::from_le_bytes([
-        63, 66, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0,
+        63, 66, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0,
     ]),
     i256::from_le_bytes([
-        127, 150, 152, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0,
+        127, 150, 152, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 224, 245, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0,
+        255, 224, 245, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 201, 154, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0,
+        255, 201, 154, 59, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 227, 11, 84, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0,
+        255, 227, 11, 84, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 231, 118, 72, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0,
+        255, 231, 118, 72, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 15, 165, 212, 232, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0,
+        255, 15, 165, 212, 232, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 159, 114, 78, 24, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0,
+        255, 159, 114, 78, 24, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 63, 122, 16, 243, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0,
+        255, 63, 122, 16, 243, 90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 127, 198, 164, 126, 141, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 127, 198, 164, 126, 141, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 192, 111, 242, 134, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 192, 111, 242, 134, 35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 137, 93, 120, 69, 99, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 137, 93, 120, 69, 99, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 99, 167, 179, 182, 224, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 99, 167, 179, 182, 224, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 231, 137, 4, 35, 199, 138, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 231, 137, 4, 35, 199, 138, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 15, 99, 45, 94, 199, 107, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 15, 99, 45, 94, 199, 107, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 159, 222, 197, 173, 201, 53, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 159, 222, 197, 173, 201, 53, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 63, 178, 186, 201, 224, 25, 30, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 63, 178, 186, 201, 224, 25, 30, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 127, 246, 74, 225, 199, 2, 45, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 127, 246, 74, 225, 199, 2, 45, 21, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 160, 237, 204, 206, 27, 194, 211, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 160, 237, 204, 206, 27, 194, 211, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 73, 72, 1, 20, 22, 149, 69, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 73, 72, 1, 20, 22, 149, 69, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 227, 210, 12, 200, 220, 210, 183, 82, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 227, 210, 12, 200, 220, 210, 183, 82, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 231, 60, 128, 208, 159, 60, 46, 59, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 231, 60, 128, 208, 159, 60, 46, 59, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 15, 97, 2, 37, 62, 94, 206, 79, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 15, 97, 2, 37, 62, 94, 206, 79, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 159, 202, 23, 114, 109, 174, 15, 30, 67, 1, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 159, 202, 23, 114, 109, 174, 15, 30, 67, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 63, 234, 237, 116, 70, 208, 156, 44, 159, 12, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 63, 234, 237, 116, 70, 208, 156, 44, 159, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 127, 38, 75, 145, 192, 34, 32, 190, 55, 126, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 127, 38, 75, 145, 192, 34, 32, 190, 55, 126, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 128, 239, 172, 133, 91, 65, 109, 45, 238, 4, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 128, 239, 172, 133, 91, 65, 109, 45, 238, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 9, 91, 193, 56, 147, 141, 68, 198, 77, 49, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 9, 91, 193, 56, 147, 141, 68, 198, 77, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 99, 142, 141, 55, 192, 135, 173, 190, 9, 237, 1, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 99, 142, 141, 55, 192, 135, 173, 190, 9, 237, 1, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 231, 143, 135, 43, 130, 77, 199, 114, 97, 66, 19, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 231, 143, 135, 43, 130, 77, 199, 114, 97, 66, 19, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 15, 159, 75, 179, 21, 7, 201, 123, 206, 151, 192, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 15, 159, 75, 179, 21, 7, 201, 123, 206, 151, 192, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 159, 54, 244, 0, 217, 70, 218, 213, 16, 238, 133, 7, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 159, 54, 244, 0, 217, 70, 218, 213, 16, 238, 133, 7, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 63, 34, 138, 9, 122, 196, 134, 90, 168, 76, 59, 75, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 63, 34, 138, 9, 122, 196, 134, 90, 168, 76, 59, 75, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 127, 86, 101, 95, 196, 172, 67, 137, 147, 254, 80, 240, 2, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 127, 86, 101, 95, 196, 172, 67, 137, 147, 254, 80, 240, 2, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 96, 245, 185, 171, 191, 164, 92, 195, 241, 41, 99, 29,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 96, 245, 185, 171, 191, 164, 92, 195, 241, 41, 99, 29, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 201, 149, 67, 181, 124, 111, 158, 161, 113, 163, 223,
-        37, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 201, 149, 67, 181, 124, 111, 158, 161, 113, 163, 223, 37, 1, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 227, 217, 163, 20, 223, 90, 48, 80, 112, 98, 188, 122,
-        11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 227, 217, 163, 20, 223, 90, 48, 80, 112, 98, 188, 122, 11, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 231, 130, 102, 206, 182, 140, 227, 33, 99, 216, 91, 203,
-        114, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 231, 130, 102, 206, 182, 140, 227, 33, 99, 216, 91, 203, 114, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 15, 29, 1, 16, 36, 127, 227, 82, 223, 115, 150, 241,
-        123, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 15, 29, 1, 16, 36, 127, 227, 82, 223, 115, 150, 241, 123, 4, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 159, 34, 11, 160, 104, 247, 226, 60, 185, 134, 224, 111,
-        215, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 159, 34, 11, 160, 104, 247, 226, 60, 185, 134, 224, 111, 215, 44,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 63, 90, 111, 64, 22, 170, 221, 96, 60, 67, 197, 94, 106,
-        192, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 63, 90, 111, 64, 22, 170, 221, 96, 60, 67, 197, 94, 106, 192, 1,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 127, 134, 89, 132, 222, 164, 168, 200, 91, 160, 180,
-        179, 39, 132, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 127, 134, 89, 132, 222, 164, 168, 200, 91, 160, 180, 179, 39, 132,
+        17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 64, 127, 43, 177, 112, 150, 214, 149, 67, 14, 5,
-        141, 41, 175, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 64, 127, 43, 177, 112, 150, 214, 149, 67, 14, 5, 141, 41,
+        175, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 137, 248, 178, 235, 102, 224, 97, 218, 163, 142,
-        50, 130, 159, 215, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 137, 248, 178, 235, 102, 224, 97, 218, 163, 142, 50, 130,
+        159, 215, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 99, 181, 253, 52, 5, 196, 210, 135, 102, 146, 249,
-        21, 59, 108, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 99, 181, 253, 52, 5, 196, 210, 135, 102, 146, 249, 21, 59,
+        108, 68, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 231, 21, 233, 17, 52, 168, 59, 78, 1, 184, 191,
-        219, 78, 58, 172, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 231, 21, 233, 17, 52, 168, 59, 78, 1, 184, 191, 219, 78, 58,
+        172, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 15, 219, 26, 179, 8, 146, 84, 14, 13, 48, 125, 149,
-        20, 71, 186, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 15, 219, 26, 179, 8, 146, 84, 14, 13, 48, 125, 149, 20, 71,
+        186, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 159, 142, 12, 255, 86, 180, 77, 143, 130, 224, 227,
-        214, 205, 198, 70, 11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 159, 142, 12, 255, 86, 180, 77, 143, 130, 224, 227, 214, 205,
+        198, 70, 11, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 63, 146, 125, 246, 101, 11, 9, 153, 25, 197, 230,
-        100, 10, 196, 195, 112, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 63, 146, 125, 246, 101, 11, 9, 153, 25, 197, 230, 100, 10,
+        196, 195, 112, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 127, 182, 231, 160, 251, 113, 90, 250, 255, 178, 3,
-        241, 103, 168, 165, 103, 104, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 127, 182, 231, 160, 251, 113, 90, 250, 255, 178, 3, 241, 103,
+        168, 165, 103, 104, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 32, 13, 73, 212, 115, 136, 199, 255, 253, 36,
-        106, 15, 148, 120, 12, 20, 4, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 255, 32, 13, 73, 212, 115, 136, 199, 255, 253, 36, 106, 15,
+        148, 120, 12, 20, 4, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 73, 131, 218, 74, 134, 84, 203, 253, 235, 113,
-        37, 154, 200, 181, 124, 200, 40, 0, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 255, 73, 131, 218, 74, 134, 84, 203, 253, 235, 113, 37, 154,
+        200, 181, 124, 200, 40, 0, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 227, 32, 137, 236, 62, 77, 241, 233, 55, 115,
-        118, 5, 214, 25, 223, 212, 151, 1, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 255, 227, 32, 137, 236, 62, 77, 241, 233, 55, 115, 118, 5,
+        214, 25, 223, 212, 151, 1, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 231, 72, 91, 61, 117, 4, 109, 35, 47, 128,
-        160, 54, 92, 2, 183, 80, 238, 15, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 255, 231, 72, 91, 61, 117, 4, 109, 35, 47, 128, 160, 54, 92,
+        2, 183, 80, 238, 15, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 15, 217, 144, 101, 148, 44, 66, 98, 215, 1,
-        69, 34, 154, 23, 38, 39, 79, 159, 0, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 255, 15, 217, 144, 101, 148, 44, 66, 98, 215, 1, 69, 34, 154,
+        23, 38, 39, 79, 159, 0, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 159, 122, 168, 247, 203, 189, 149, 214, 105,
-        18, 178, 86, 5, 236, 124, 135, 23, 57, 6, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 255, 159, 122, 168, 247, 203, 189, 149, 214, 105, 18, 178,
+        86, 5, 236, 124, 135, 23, 57, 6, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 63, 202, 148, 172, 247, 105, 217, 97, 34, 184,
-        244, 98, 53, 56, 225, 74, 235, 58, 62, 0, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 255, 63, 202, 148, 172, 247, 105, 217, 97, 34, 184, 244, 98,
+        53, 56, 225, 74, 235, 58, 62, 0, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 127, 230, 207, 189, 172, 35, 126, 210, 87, 49,
-        143, 221, 21, 50, 204, 236, 48, 77, 110, 2, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 255, 127, 230, 207, 189, 172, 35, 126, 210, 87, 49, 143, 221,
+        21, 50, 204, 236, 48, 77, 110, 2, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 255, 0, 31, 106, 191, 100, 237, 56, 110, 237,
-        151, 167, 218, 244, 249, 63, 233, 3, 79, 24, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 255, 255, 0, 31, 106, 191, 100, 237, 56, 110, 237, 151, 167,
+        218, 244, 249, 63, 233, 3, 79, 24, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 255, 9, 54, 37, 122, 239, 69, 57, 78, 70, 239,
-        139, 138, 144, 195, 127, 28, 39, 22, 243, 0, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 255, 255, 9, 54, 37, 122, 239, 69, 57, 78, 70, 239, 139, 138,
+        144, 195, 127, 28, 39, 22, 243, 0, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 255, 99, 28, 116, 197, 90, 187, 60, 14, 191,
-        88, 119, 105, 165, 163, 253, 28, 135, 221, 126, 9, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 255, 255, 99, 28, 116, 197, 90, 187, 60, 14, 191, 88, 119,
+        105, 165, 163, 253, 28, 135, 221, 126, 9, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 255, 231, 27, 137, 182, 139, 81, 95, 142, 118,
-        119, 169, 30, 118, 100, 232, 33, 71, 167, 244, 94, 0, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 255, 255, 231, 27, 137, 182, 139, 81, 95, 142, 118, 119, 169,
+        30, 118, 100, 232, 33, 71, 167, 244, 94, 0, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 255, 15, 23, 91, 33, 117, 47, 185, 143, 161,
-        170, 158, 50, 157, 236, 19, 83, 199, 136, 142, 181, 3, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 255, 255, 15, 23, 91, 33, 117, 47, 185, 143, 161, 170, 158,
+        50, 157, 236, 19, 83, 199, 136, 142, 181, 3, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 255, 159, 230, 142, 77, 147, 218, 59, 157, 79,
-        170, 50, 250, 35, 62, 199, 62, 201, 87, 145, 23, 37, 0, 0, 0,
+        255, 255, 255, 255, 255, 255, 255, 255, 159, 230, 142, 77, 147, 218, 59, 157, 79, 170, 50,
+        250, 35, 62, 199, 62, 201, 87, 145, 23, 37, 0, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 255, 63, 2, 149, 7, 193, 137, 86, 36, 28, 167,
-        250, 197, 103, 109, 200, 115, 220, 109, 173, 235, 114, 1, 0, 0,
+        255, 255, 255, 255, 255, 255, 255, 255, 63, 2, 149, 7, 193, 137, 86, 36, 28, 167, 250, 197,
+        103, 109, 200, 115, 220, 109, 173, 235, 114, 1, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 255, 127, 22, 210, 75, 138, 97, 97, 107, 25,
-        135, 202, 187, 13, 70, 212, 133, 156, 74, 198, 52, 125, 14, 0, 0,
+        255, 255, 255, 255, 255, 255, 255, 255, 127, 22, 210, 75, 138, 97, 97, 107, 25, 135, 202,
+        187, 13, 70, 212, 133, 156, 74, 198, 52, 125, 14, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 224, 52, 246, 102, 207, 205, 49,
-        254, 70, 233, 85, 137, 188, 74, 58, 29, 234, 190, 15, 228, 144, 0, 0,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 224, 52, 246, 102, 207, 205, 49, 254, 70, 233,
+        85, 137, 188, 74, 58, 29, 234, 190, 15, 228, 144, 0, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 201, 16, 158, 5, 26, 10, 242, 237,
-        197, 28, 91, 93, 93, 235, 70, 36, 37, 117, 157, 232, 168, 5, 0,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 201, 16, 158, 5, 26, 10, 242, 237, 197, 28,
+        91, 93, 93, 235, 70, 36, 37, 117, 157, 232, 168, 5, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 227, 167, 44, 56, 4, 101, 116, 75,
-        187, 31, 143, 165, 165, 49, 197, 106, 115, 147, 38, 22, 153, 56, 0,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 227, 167, 44, 56, 4, 101, 116, 75, 187, 31,
+        143, 165, 165, 49, 197, 106, 115, 147, 38, 22, 153, 56, 0,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 231, 142, 190, 49, 42, 242, 139,
-        242, 80, 61, 151, 119, 120, 240, 179, 43, 130, 194, 129, 221, 250, 53, 2,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 231, 142, 190, 49, 42, 242, 139, 242, 80, 61,
+        151, 119, 120, 240, 179, 43, 130, 194, 129, 221, 250, 53, 2,
     ]),
     i256::from_le_bytes([
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 15, 149, 113, 241, 165, 117, 119,
-        121, 41, 101, 232, 171, 180, 100, 7, 181, 21, 153, 17, 167, 204, 27, 22,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 15, 149, 113, 241, 165, 117, 119, 121, 41,
+        101, 232, 171, 180, 100, 7, 181, 21, 153, 17, 167, 204, 27, 22,
     ]),
 ];
 
@@ -338,308 +338,308 @@ pub(crate) const MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [i256; 76] = [
 // is encoded to the 76-byte width format of little-endian.
 pub(crate) const MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION: [i256; 76] = [
     i256::from_le_bytes([
-        247, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        247, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        157, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        157, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        25, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        25, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        241, 216, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        241, 216, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        97, 121, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        97, 121, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        193, 189, 240, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        193, 189, 240, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        129, 105, 103, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        129, 105, 103, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 31, 10, 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 31, 10, 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 54, 101, 196, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 54, 101, 196, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 28, 244, 171, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 28, 244, 171, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 24, 137, 183, 232, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 24, 137, 183, 232, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 240, 90, 43, 23, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 240, 90, 43, 23, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 96, 141, 177, 231, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 96, 141, 177, 231, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 192, 133, 239, 12, 165, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 192, 133, 239, 12, 165, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 128, 57, 91, 129, 114, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 128, 57, 91, 129, 114, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 63, 144, 13, 121, 220, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 63, 144, 13, 121, 220, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 118, 162, 135, 186, 156, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 118, 162, 135, 186, 156, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 156, 88, 76, 73, 31, 242, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 156, 88, 76, 73, 31, 242, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 24, 118, 251, 220, 56, 117, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 24, 118, 251, 220, 56, 117, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 240, 156, 210, 161, 56, 148, 250, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 240, 156, 210, 161, 56, 148, 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 96, 33, 58, 82, 54, 202, 201, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 96, 33, 58, 82, 54, 202, 201, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 192, 77, 69, 54, 31, 230, 225, 253, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 192, 77, 69, 54, 31, 230, 225, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 128, 9, 181, 30, 56, 253, 210, 234, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 128, 9, 181, 30, 56, 253, 210, 234, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 95, 18, 51, 49, 228, 61, 44, 255, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 95, 18, 51, 49, 228, 61, 44, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 182, 183, 254, 235, 233, 106, 186, 247, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 182, 183, 254, 235, 233, 106, 186, 247, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 28, 45, 243, 55, 35, 45, 72, 173, 255, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 28, 45, 243, 55, 35, 45, 72, 173, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 24, 195, 127, 47, 96, 195, 209, 196, 252, 255, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 24, 195, 127, 47, 96, 195, 209, 196, 252, 255, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 240, 158, 253, 218, 193, 161, 49, 176, 223, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 240, 158, 253, 218, 193, 161, 49, 176, 223, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 96, 53, 232, 141, 146, 81, 240, 225, 188, 254, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 96, 53, 232, 141, 146, 81, 240, 225, 188, 254, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 192, 21, 18, 139, 185, 47, 99, 211, 96, 243, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 192, 21, 18, 139, 185, 47, 99, 211, 96, 243, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 128, 217, 180, 110, 63, 221, 223, 65, 200, 129, 255, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 128, 217, 180, 110, 63, 221, 223, 65, 200, 129, 255, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 127, 16, 83, 122, 164, 190, 146, 210, 17, 251, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 127, 16, 83, 122, 164, 190, 146, 210, 17, 251, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 246, 164, 62, 199, 108, 114, 187, 57, 178, 206, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 246, 164, 62, 199, 108, 114, 187, 57, 178, 206, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 156, 113, 114, 200, 63, 120, 82, 65, 246, 18, 254, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 156, 113, 114, 200, 63, 120, 82, 65, 246, 18, 254, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 24, 112, 120, 212, 125, 178, 56, 141, 158, 189, 236, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 24, 112, 120, 212, 125, 178, 56, 141, 158, 189, 236, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 240, 96, 180, 76, 234, 248, 54, 132, 49, 104, 63, 255, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 240, 96, 180, 76, 234, 248, 54, 132, 49, 104, 63, 255, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 96, 201, 11, 255, 38, 185, 37, 42, 239, 17, 122, 248, 255, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 96, 201, 11, 255, 38, 185, 37, 42, 239, 17, 122, 248, 255, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 192, 221, 117, 246, 133, 59, 121, 165, 87, 179, 196, 180, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 192, 221, 117, 246, 133, 59, 121, 165, 87, 179, 196, 180, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 128, 169, 154, 160, 59, 83, 188, 118, 108, 1, 175, 15, 253, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 128, 169, 154, 160, 59, 83, 188, 118, 108, 1, 175, 15, 253, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 159, 10, 70, 84, 64, 91, 163, 60, 14, 214, 156, 226, 255, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 159, 10, 70, 84, 64, 91, 163, 60, 14, 214, 156, 226, 255, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 54, 106, 188, 74, 131, 144, 97, 94, 142, 92, 32, 218, 254, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 54, 106, 188, 74, 131, 144, 97, 94, 142, 92, 32, 218, 254, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 28, 38, 92, 235, 32, 165, 207, 175, 143, 157, 67, 133, 244, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 28, 38, 92, 235, 32, 165, 207, 175, 143, 157, 67, 133, 244, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 24, 125, 153, 49, 73, 115, 28, 222, 156, 39, 164, 52, 141, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 24, 125, 153, 49, 73, 115, 28, 222, 156, 39, 164, 52, 141, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 240, 226, 254, 239, 219, 128, 28, 173, 32, 140, 105, 14, 132, 251,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 240, 226, 254, 239, 219, 128, 28, 173, 32, 140, 105, 14, 132, 251, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 96, 221, 244, 95, 151, 8, 29, 195, 70, 121, 31, 144, 40, 211, 255,
-        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 96, 221, 244, 95, 151, 8, 29, 195, 70, 121, 31, 144, 40, 211, 255, 255, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 192, 165, 144, 191, 233, 85, 34, 159, 195, 188, 58, 161, 149, 63,
-        254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 192, 165, 144, 191, 233, 85, 34, 159, 195, 188, 58, 161, 149, 63, 254, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 128, 121, 166, 123, 33, 91, 87, 55, 164, 95, 75, 76, 216, 123,
-        238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 128, 121, 166, 123, 33, 91, 87, 55, 164, 95, 75, 76, 216, 123, 238, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 191, 128, 212, 78, 143, 105, 41, 106, 188, 241, 250, 114, 214,
-        80, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 191, 128, 212, 78, 143, 105, 41, 106, 188, 241, 250, 114, 214, 80, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 118, 7, 77, 20, 153, 31, 158, 37, 92, 113, 205, 125, 96, 40,
-        249, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 118, 7, 77, 20, 153, 31, 158, 37, 92, 113, 205, 125, 96, 40, 249, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 156, 74, 2, 203, 250, 59, 45, 120, 153, 109, 6, 234, 196, 147,
-        187, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 156, 74, 2, 203, 250, 59, 45, 120, 153, 109, 6, 234, 196, 147, 187, 255,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 24, 234, 22, 238, 203, 87, 196, 177, 254, 71, 64, 36, 177, 197,
-        83, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 24, 234, 22, 238, 203, 87, 196, 177, 254, 71, 64, 36, 177, 197, 83, 253,
+        255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 240, 36, 229, 76, 247, 109, 171, 241, 242, 207, 130, 106, 235,
-        184, 69, 229, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 240, 36, 229, 76, 247, 109, 171, 241, 242, 207, 130, 106, 235, 184, 69,
+        229, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 96, 113, 243, 0, 169, 75, 178, 112, 125, 31, 28, 41, 50, 57,
-        185, 244, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 96, 113, 243, 0, 169, 75, 178, 112, 125, 31, 28, 41, 50, 57, 185, 244,
+        254, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 192, 109, 130, 9, 154, 244, 246, 102, 230, 58, 25, 155, 245,
-        59, 60, 143, 245, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 192, 109, 130, 9, 154, 244, 246, 102, 230, 58, 25, 155, 245, 59, 60, 143,
+        245, 255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 128, 73, 24, 95, 4, 142, 165, 5, 0, 77, 252, 14, 152, 87, 90,
-        152, 151, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 128, 73, 24, 95, 4, 142, 165, 5, 0, 77, 252, 14, 152, 87, 90, 152, 151,
+        255, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 223, 242, 182, 43, 140, 119, 56, 0, 2, 219, 149, 240, 107,
-        135, 243, 235, 251, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 0, 223, 242, 182, 43, 140, 119, 56, 0, 2, 219, 149, 240, 107, 135, 243,
+        235, 251, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 182, 124, 37, 181, 121, 171, 52, 2, 20, 142, 218, 101, 55,
-        74, 131, 55, 215, 255, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 0, 182, 124, 37, 181, 121, 171, 52, 2, 20, 142, 218, 101, 55, 74, 131,
+        55, 215, 255, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 28, 223, 118, 19, 193, 178, 14, 22, 200, 140, 137, 250, 41,
-        230, 32, 43, 104, 254, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 0, 28, 223, 118, 19, 193, 178, 14, 22, 200, 140, 137, 250, 41, 230, 32,
+        43, 104, 254, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 24, 183, 164, 194, 138, 251, 146, 220, 208, 127, 95, 201,
-        163, 253, 72, 175, 17, 240, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 0, 24, 183, 164, 194, 138, 251, 146, 220, 208, 127, 95, 201, 163, 253,
+        72, 175, 17, 240, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 240, 38, 111, 154, 107, 211, 189, 157, 40, 254, 186, 221,
-        101, 232, 217, 216, 176, 96, 255, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 0, 240, 38, 111, 154, 107, 211, 189, 157, 40, 254, 186, 221, 101, 232,
+        217, 216, 176, 96, 255, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 96, 133, 87, 8, 52, 66, 106, 41, 150, 237, 77, 169, 250, 19,
-        131, 120, 232, 198, 249, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 0, 96, 133, 87, 8, 52, 66, 106, 41, 150, 237, 77, 169, 250, 19, 131, 120,
+        232, 198, 249, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 192, 53, 107, 83, 8, 150, 38, 158, 221, 71, 11, 157, 202,
-        199, 30, 181, 20, 197, 193, 255, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 0, 192, 53, 107, 83, 8, 150, 38, 158, 221, 71, 11, 157, 202, 199, 30,
+        181, 20, 197, 193, 255, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 128, 25, 48, 66, 83, 220, 129, 45, 168, 206, 112, 34, 234,
-        205, 51, 19, 207, 178, 145, 253, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 0, 128, 25, 48, 66, 83, 220, 129, 45, 168, 206, 112, 34, 234, 205, 51,
+        19, 207, 178, 145, 253, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 0, 255, 224, 149, 64, 155, 18, 199, 145, 18, 104, 88, 37,
-        11, 6, 192, 22, 252, 176, 231, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 0, 0, 255, 224, 149, 64, 155, 18, 199, 145, 18, 104, 88, 37, 11, 6, 192,
+        22, 252, 176, 231, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 0, 246, 201, 218, 133, 16, 186, 198, 177, 185, 16, 116, 117,
-        111, 60, 128, 227, 216, 233, 12, 255, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 0, 0, 246, 201, 218, 133, 16, 186, 198, 177, 185, 16, 116, 117, 111, 60,
+        128, 227, 216, 233, 12, 255, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 0, 156, 227, 139, 58, 165, 68, 195, 241, 64, 167, 136, 150,
-        90, 92, 2, 227, 120, 34, 129, 246, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 0, 0, 156, 227, 139, 58, 165, 68, 195, 241, 64, 167, 136, 150, 90, 92, 2,
+        227, 120, 34, 129, 246, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 0, 24, 228, 118, 73, 116, 174, 160, 113, 137, 136, 86, 225,
-        137, 155, 23, 222, 184, 88, 11, 161, 255, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 0, 0, 24, 228, 118, 73, 116, 174, 160, 113, 137, 136, 86, 225, 137, 155,
+        23, 222, 184, 88, 11, 161, 255, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 0, 240, 232, 164, 222, 138, 208, 70, 112, 94, 85, 97, 205,
-        98, 19, 236, 172, 56, 119, 113, 74, 252, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 0, 0, 240, 232, 164, 222, 138, 208, 70, 112, 94, 85, 97, 205, 98, 19,
+        236, 172, 56, 119, 113, 74, 252, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 0, 96, 25, 113, 178, 108, 37, 196, 98, 176, 85, 205, 5, 220,
-        193, 56, 193, 54, 168, 110, 232, 218, 255, 255, 255,
+        1, 0, 0, 0, 0, 0, 0, 0, 96, 25, 113, 178, 108, 37, 196, 98, 176, 85, 205, 5, 220, 193, 56,
+        193, 54, 168, 110, 232, 218, 255, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 0, 192, 253, 106, 248, 62, 118, 169, 219, 227, 88, 5, 58,
-        152, 146, 55, 140, 35, 146, 82, 20, 141, 254, 255, 255,
+        1, 0, 0, 0, 0, 0, 0, 0, 192, 253, 106, 248, 62, 118, 169, 219, 227, 88, 5, 58, 152, 146,
+        55, 140, 35, 146, 82, 20, 141, 254, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 0, 128, 233, 45, 180, 117, 158, 158, 148, 230, 120, 53, 68,
-        242, 185, 43, 122, 99, 181, 57, 203, 130, 241, 255, 255,
+        1, 0, 0, 0, 0, 0, 0, 0, 128, 233, 45, 180, 117, 158, 158, 148, 230, 120, 53, 68, 242, 185,
+        43, 122, 99, 181, 57, 203, 130, 241, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 0, 0, 31, 203, 9, 153, 48, 50, 206, 1, 185, 22, 170, 118,
-        67, 181, 197, 226, 21, 65, 240, 27, 111, 255, 255,
+        1, 0, 0, 0, 0, 0, 0, 0, 0, 31, 203, 9, 153, 48, 50, 206, 1, 185, 22, 170, 118, 67, 181,
+        197, 226, 21, 65, 240, 27, 111, 255, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 0, 0, 54, 239, 97, 250, 229, 245, 13, 18, 58, 227, 164, 162,
-        162, 20, 185, 219, 218, 138, 98, 23, 87, 250, 255,
+        1, 0, 0, 0, 0, 0, 0, 0, 0, 54, 239, 97, 250, 229, 245, 13, 18, 58, 227, 164, 162, 162, 20,
+        185, 219, 218, 138, 98, 23, 87, 250, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 0, 0, 28, 88, 211, 199, 251, 154, 139, 180, 68, 224, 112,
-        90, 90, 206, 58, 149, 140, 108, 217, 233, 102, 199, 255,
+        1, 0, 0, 0, 0, 0, 0, 0, 0, 28, 88, 211, 199, 251, 154, 139, 180, 68, 224, 112, 90, 90, 206,
+        58, 149, 140, 108, 217, 233, 102, 199, 255,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 0, 0, 24, 113, 65, 206, 213, 13, 116, 13, 175, 194, 104,
-        136, 135, 15, 76, 212, 125, 61, 126, 34, 5, 202, 253,
+        1, 0, 0, 0, 0, 0, 0, 0, 0, 24, 113, 65, 206, 213, 13, 116, 13, 175, 194, 104, 136, 135, 15,
+        76, 212, 125, 61, 126, 34, 5, 202, 253,
     ]),
     i256::from_le_bytes([
-        1, 0, 0, 0, 0, 0, 0, 0, 0, 240, 106, 142, 14, 90, 138, 136, 134, 214, 154, 23,
-        84, 75, 155, 248, 74, 234, 102, 238, 88, 51, 228, 233,
+        1, 0, 0, 0, 0, 0, 0, 0, 0, 240, 106, 142, 14, 90, 138, 136, 134, 214, 154, 23, 84, 75, 155,
+        248, 74, 234, 102, 238, 88, 51, 228, 233,
     ]),
 ];
 
@@ -758,10 +758,7 @@ pub fn validate_decimal_precision(value: i128, precision: u8) -> Result<(), Arro
 /// Validates that the specified `i256` of value can be properly
 /// interpreted as a Decimal256 number with precision `precision`
 #[inline]
-pub fn validate_decimal256_precision(
-    value: i256,
-    precision: u8,
-) -> Result<(), ArrowError> {
+pub fn validate_decimal256_precision(value: i256, precision: u8) -> Result<(), ArrowError> {
     if precision > DECIMAL256_MAX_PRECISION {
         return Err(ArrowError::InvalidArgumentError(format!(
             "Max precision of a Decimal256 is {DECIMAL256_MAX_PRECISION}, but got {precision}",
diff --git a/arrow-data/src/equal/boolean.rs b/arrow-data/src/equal/boolean.rs
index a20ca5ac0bd7..addae936f118 100644
--- a/arrow-data/src/equal/boolean.rs
+++ b/arrow-data/src/equal/boolean.rs
@@ -78,11 +78,10 @@ pub(super) fn boolean_equal(
         // get a ref of the null buffer bytes, to use in testing for nullness
         let lhs_nulls = lhs.nulls().unwrap();
 
-        BitIndexIterator::new(lhs_nulls.validity(), lhs_start + lhs_nulls.offset(), len)
-            .all(|i| {
-                let lhs_pos = lhs_start + lhs.offset() + i;
-                let rhs_pos = rhs_start + rhs.offset() + i;
-                get_bit(lhs_values, lhs_pos) == get_bit(rhs_values, rhs_pos)
-            })
+        BitIndexIterator::new(lhs_nulls.validity(), lhs_start + lhs_nulls.offset(), len).all(|i| {
+            let lhs_pos = lhs_start + lhs.offset() + i;
+            let rhs_pos = rhs_start + rhs.offset() + i;
+            get_bit(lhs_values, lhs_pos) == get_bit(rhs_values, rhs_pos)
+        })
     }
 }
diff --git a/arrow-data/src/equal/fixed_binary.rs b/arrow-data/src/equal/fixed_binary.rs
index 40dacdddd3a0..0778d77e2fdd 100644
--- a/arrow-data/src/equal/fixed_binary.rs
+++ b/arrow-data/src/equal/fixed_binary.rs
@@ -75,20 +75,15 @@ pub(super) fn fixed_binary_equal(
             })
         } else {
             let lhs_nulls = lhs.nulls().unwrap();
-            let lhs_slices_iter = BitSliceIterator::new(
-                lhs_nulls.validity(),
-                lhs_start + lhs_nulls.offset(),
-                len,
-            );
+            let lhs_slices_iter =
+                BitSliceIterator::new(lhs_nulls.validity(), lhs_start + lhs_nulls.offset(), len);
             let rhs_nulls = rhs.nulls().unwrap();
-            let rhs_slices_iter = BitSliceIterator::new(
-                rhs_nulls.validity(),
-                rhs_start + rhs_nulls.offset(),
-                len,
-            );
+            let rhs_slices_iter =
+                BitSliceIterator::new(rhs_nulls.validity(), rhs_start + rhs_nulls.offset(), len);
 
-            lhs_slices_iter.zip(rhs_slices_iter).all(
-                |((l_start, l_end), (r_start, r_end))| {
+            lhs_slices_iter
+                .zip(rhs_slices_iter)
+                .all(|((l_start, l_end), (r_start, r_end))| {
                     l_start == r_start
                         && l_end == r_end
                         && equal_len(
@@ -98,8 +93,7 @@ pub(super) fn fixed_binary_equal(
                             (rhs_start + r_start) * size,
                             (l_end - l_start) * size,
                         )
-                },
-            )
+                })
         }
     }
 }
diff --git a/arrow-data/src/equal/mod.rs b/arrow-data/src/equal/mod.rs
index fbc868d3f5c4..b279546474a0 100644
--- a/arrow-data/src/equal/mod.rs
+++ b/arrow-data/src/equal/mod.rs
@@ -76,24 +76,16 @@ fn equal_values(
         DataType::Int64 => primitive_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Float32 => primitive_equal::<f32>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Float64 => primitive_equal::<f64>(lhs, rhs, lhs_start, rhs_start, len),
-        DataType::Decimal128(_, _) => {
-            primitive_equal::<i128>(lhs, rhs, lhs_start, rhs_start, len)
-        }
-        DataType::Decimal256(_, _) => {
-            primitive_equal::<i256>(lhs, rhs, lhs_start, rhs_start, len)
-        }
-        DataType::Date32
-        | DataType::Time32(_)
-        | DataType::Interval(IntervalUnit::YearMonth) => {
+        DataType::Decimal128(_, _) => primitive_equal::<i128>(lhs, rhs, lhs_start, rhs_start, len),
+        DataType::Decimal256(_, _) => primitive_equal::<i256>(lhs, rhs, lhs_start, rhs_start, len),
+        DataType::Date32 | DataType::Time32(_) | DataType::Interval(IntervalUnit::YearMonth) => {
             primitive_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len)
         }
         DataType::Date64
         | DataType::Interval(IntervalUnit::DayTime)
         | DataType::Time64(_)
         | DataType::Timestamp(_, _)
-        | DataType::Duration(_) => {
-            primitive_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len)
-        }
+        | DataType::Duration(_) => primitive_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Interval(IntervalUnit::MonthDayNano) => {
             primitive_equal::<i128>(lhs, rhs, lhs_start, rhs_start, len)
         }
@@ -103,39 +95,21 @@ fn equal_values(
         DataType::LargeUtf8 | DataType::LargeBinary => {
             variable_sized_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len)
         }
-        DataType::FixedSizeBinary(_) => {
-            fixed_binary_equal(lhs, rhs, lhs_start, rhs_start, len)
-        }
+        DataType::FixedSizeBinary(_) => fixed_binary_equal(lhs, rhs, lhs_start, rhs_start, len),
         DataType::List(_) => list_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
         DataType::LargeList(_) => list_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
-        DataType::FixedSizeList(_, _) => {
-            fixed_list_equal(lhs, rhs, lhs_start, rhs_start, len)
-        }
+        DataType::FixedSizeList(_, _) => fixed_list_equal(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Struct(_) => struct_equal(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Union(_, _) => union_equal(lhs, rhs, lhs_start, rhs_start, len),
         DataType::Dictionary(data_type, _) => match data_type.as_ref() {
             DataType::Int8 => dictionary_equal::<i8>(lhs, rhs, lhs_start, rhs_start, len),
-            DataType::Int16 => {
-                dictionary_equal::<i16>(lhs, rhs, lhs_start, rhs_start, len)
-            }
-            DataType::Int32 => {
-                dictionary_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len)
-            }
-            DataType::Int64 => {
-                dictionary_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len)
-            }
-            DataType::UInt8 => {
-                dictionary_equal::<u8>(lhs, rhs, lhs_start, rhs_start, len)
-            }
-            DataType::UInt16 => {
-                dictionary_equal::<u16>(lhs, rhs, lhs_start, rhs_start, len)
-            }
-            DataType::UInt32 => {
-                dictionary_equal::<u32>(lhs, rhs, lhs_start, rhs_start, len)
-            }
-            DataType::UInt64 => {
-                dictionary_equal::<u64>(lhs, rhs, lhs_start, rhs_start, len)
-            }
+            DataType::Int16 => dictionary_equal::<i16>(lhs, rhs, lhs_start, rhs_start, len),
+            DataType::Int32 => dictionary_equal::<i32>(lhs, rhs, lhs_start, rhs_start, len),
+            DataType::Int64 => dictionary_equal::<i64>(lhs, rhs, lhs_start, rhs_start, len),
+            DataType::UInt8 => dictionary_equal::<u8>(lhs, rhs, lhs_start, rhs_start, len),
+            DataType::UInt16 => dictionary_equal::<u16>(lhs, rhs, lhs_start, rhs_start, len),
+            DataType::UInt32 => dictionary_equal::<u32>(lhs, rhs, lhs_start, rhs_start, len),
+            DataType::UInt64 => dictionary_equal::<u64>(lhs, rhs, lhs_start, rhs_start, len),
             _ => unreachable!(),
         },
         DataType::Float16 => primitive_equal::<f16>(lhs, rhs, lhs_start, rhs_start, len),
diff --git a/arrow-data/src/equal/primitive.rs b/arrow-data/src/equal/primitive.rs
index 7b3cbc9eb949..e92fdd2ba23b 100644
--- a/arrow-data/src/equal/primitive.rs
+++ b/arrow-data/src/equal/primitive.rs
@@ -73,20 +73,15 @@ pub(super) fn primitive_equal<T>(
             })
         } else {
             let lhs_nulls = lhs.nulls().unwrap();
-            let lhs_slices_iter = BitSliceIterator::new(
-                lhs_nulls.validity(),
-                lhs_start + lhs_nulls.offset(),
-                len,
-            );
+            let lhs_slices_iter =
+                BitSliceIterator::new(lhs_nulls.validity(), lhs_start + lhs_nulls.offset(), len);
             let rhs_nulls = rhs.nulls().unwrap();
-            let rhs_slices_iter = BitSliceIterator::new(
-                rhs_nulls.validity(),
-                rhs_start + rhs_nulls.offset(),
-                len,
-            );
+            let rhs_slices_iter =
+                BitSliceIterator::new(rhs_nulls.validity(), rhs_start + rhs_nulls.offset(), len);
 
-            lhs_slices_iter.zip(rhs_slices_iter).all(
-                |((l_start, l_end), (r_start, r_end))| {
+            lhs_slices_iter
+                .zip(rhs_slices_iter)
+                .all(|((l_start, l_end), (r_start, r_end))| {
                     l_start == r_start
                         && l_end == r_end
                         && equal_len(
@@ -96,8 +91,7 @@ pub(super) fn primitive_equal<T>(
                             (rhs_start + r_start) * byte_width,
                             (l_end - l_start) * byte_width,
                         )
-                },
-            )
+                })
         }
     }
 }
diff --git a/arrow-data/src/equal/union.rs b/arrow-data/src/equal/union.rs
index 5869afc30dbe..62de276e507f 100644
--- a/arrow-data/src/equal/union.rs
+++ b/arrow-data/src/equal/union.rs
@@ -116,10 +116,7 @@ pub(super) fn union_equal(
                     rhs_fields,
                 )
         }
-        (
-            DataType::Union(_, UnionMode::Sparse),
-            DataType::Union(_, UnionMode::Sparse),
-        ) => {
+        (DataType::Union(_, UnionMode::Sparse), DataType::Union(_, UnionMode::Sparse)) => {
             lhs_type_id_range == rhs_type_id_range
                 && equal_sparse(lhs, rhs, lhs_start, rhs_start, len)
         }
diff --git a/arrow-data/src/equal/utils.rs b/arrow-data/src/equal/utils.rs
index fa6211542550..cc81943756d2 100644
--- a/arrow-data/src/equal/utils.rs
+++ b/arrow-data/src/equal/utils.rs
@@ -73,11 +73,9 @@ pub(super) fn base_equal(lhs: &ArrayData, rhs: &ArrayData) -> bool {
                     let r_value_field = r_fields.get(1).unwrap();
 
                     // We don't enforce the equality of field names
-                    let data_type_equal = l_key_field.data_type()
-                        == r_key_field.data_type()
+                    let data_type_equal = l_key_field.data_type() == r_key_field.data_type()
                         && l_value_field.data_type() == r_value_field.data_type();
-                    let nullability_equal = l_key_field.is_nullable()
-                        == r_key_field.is_nullable()
+                    let nullability_equal = l_key_field.is_nullable() == r_key_field.is_nullable()
                         && l_value_field.is_nullable() == r_value_field.is_nullable();
                     let metadata_equal = l_key_field.metadata() == r_key_field.metadata()
                         && l_value_field.metadata() == r_value_field.metadata();
diff --git a/arrow-data/src/ffi.rs b/arrow-data/src/ffi.rs
index 7623ced043cc..589f7dac6d19 100644
--- a/arrow-data/src/ffi.rs
+++ b/arrow-data/src/ffi.rs
@@ -168,6 +168,12 @@ impl FFI_ArrowArray {
             .collect::<Box<_>>();
         let n_children = children.len() as i64;
 
+        // As in the IPC format, emit null_count = length for Null type
+        let null_count = match data.data_type() {
+            DataType::Null => data.len(),
+            _ => data.null_count(),
+        };
+
         // create the private data owning everything.
         // any other data must be added here, e.g. via a struct, to track lifetime.
         let mut private_data = Box::new(ArrayPrivateData {
@@ -179,7 +185,7 @@ impl FFI_ArrowArray {
 
         Self {
             length: data.len() as i64,
-            null_count: data.null_count() as i64,
+            null_count: null_count as i64,
             offset: data.offset() as i64,
             n_buffers,
             n_children,
@@ -191,6 +197,22 @@ impl FFI_ArrowArray {
         }
     }
 
+    /// Takes ownership of the pointed to [`FFI_ArrowArray`]
+    ///
+    /// This acts to [move] the data out of `array`, setting the release callback to NULL
+    ///
+    /// # Safety
+    ///
+    /// * `array` must be [valid] for reads and writes
+    /// * `array` must be properly aligned
+    /// * `array` must point to a properly initialized value of [`FFI_ArrowArray`]
+    ///
+    /// [move]: https://arrow.apache.org/docs/format/CDataInterface.html#moving-an-array
+    /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
+    pub unsafe fn from_raw(array: *mut FFI_ArrowArray) -> Self {
+        std::ptr::replace(array, Self::empty())
+    }
+
     /// create an empty `FFI_ArrowArray`, which can be used to import data into
     pub fn empty() -> Self {
         Self {
diff --git a/arrow-data/src/transform/list.rs b/arrow-data/src/transform/list.rs
index 9d5d8330cb1e..d9a1c62a8e8e 100644
--- a/arrow-data/src/transform/list.rs
+++ b/arrow-data/src/transform/list.rs
@@ -23,9 +23,7 @@ use crate::ArrayData;
 use arrow_buffer::ArrowNativeType;
 use num::{CheckedAdd, Integer};
 
-pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
-    array: &ArrayData,
-) -> Extend {
+pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(array: &ArrayData) -> Extend {
     let offsets = array.buffer::<T>(0);
     Box::new(
         move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
@@ -35,11 +33,7 @@ pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
             let last_offset: T = unsafe { get_last_offset(offset_buffer) };
 
             // offsets
-            extend_offsets::<T>(
-                offset_buffer,
-                last_offset,
-                &offsets[start..start + len + 1],
-            );
+            extend_offsets::<T>(offset_buffer, last_offset, &offsets[start..start + len + 1]);
 
             mutable.child_data[0].extend(
                 index,
@@ -50,10 +44,7 @@ pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
     )
 }
 
-pub(super) fn extend_nulls<T: ArrowNativeType>(
-    mutable: &mut _MutableArrayData,
-    len: usize,
-) {
+pub(super) fn extend_nulls<T: ArrowNativeType>(mutable: &mut _MutableArrayData, len: usize) {
     let offset_buffer = &mut mutable.buffer1;
 
     // this is safe due to how offset is built. See details on `get_last_offset`
diff --git a/arrow-data/src/transform/mod.rs b/arrow-data/src/transform/mod.rs
index f4b2b46d1723..268cf10f2326 100644
--- a/arrow-data/src/transform/mod.rs
+++ b/arrow-data/src/transform/mod.rs
@@ -173,11 +173,7 @@ impl<'a> std::fmt::Debug for MutableArrayData<'a> {
 /// Builds an extend that adds `offset` to the source primitive
 /// Additionally validates that `max` fits into the
 /// the underlying primitive returning None if not
-fn build_extend_dictionary(
-    array: &ArrayData,
-    offset: usize,
-    max: usize,
-) -> Option<Extend> {
+fn build_extend_dictionary(array: &ArrayData, offset: usize, max: usize) -> Option<Extend> {
     macro_rules! validate_and_build {
         ($dt: ty) => {{
             let _: $dt = max.try_into().ok()?;
@@ -215,27 +211,19 @@ fn build_extend(array: &ArrayData) -> Extend {
         DataType::Int64 => primitive::build_extend::<i64>(array),
         DataType::Float32 => primitive::build_extend::<f32>(array),
         DataType::Float64 => primitive::build_extend::<f64>(array),
-        DataType::Date32
-        | DataType::Time32(_)
-        | DataType::Interval(IntervalUnit::YearMonth) => {
+        DataType::Date32 | DataType::Time32(_) | DataType::Interval(IntervalUnit::YearMonth) => {
             primitive::build_extend::<i32>(array)
         }
         DataType::Date64
         | DataType::Time64(_)
         | DataType::Timestamp(_, _)
         | DataType::Duration(_)
-        | DataType::Interval(IntervalUnit::DayTime) => {
-            primitive::build_extend::<i64>(array)
-        }
-        DataType::Interval(IntervalUnit::MonthDayNano) => {
-            primitive::build_extend::<i128>(array)
-        }
+        | DataType::Interval(IntervalUnit::DayTime) => primitive::build_extend::<i64>(array),
+        DataType::Interval(IntervalUnit::MonthDayNano) => primitive::build_extend::<i128>(array),
         DataType::Decimal128(_, _) => primitive::build_extend::<i128>(array),
         DataType::Decimal256(_, _) => primitive::build_extend::<i256>(array),
         DataType::Utf8 | DataType::Binary => variable_size::build_extend::<i32>(array),
-        DataType::LargeUtf8 | DataType::LargeBinary => {
-            variable_size::build_extend::<i64>(array)
-        }
+        DataType::LargeUtf8 | DataType::LargeBinary => variable_size::build_extend::<i64>(array),
         DataType::Map(_, _) | DataType::List(_) => list::build_extend::<i32>(array),
         DataType::LargeList(_) => list::build_extend::<i64>(array),
         DataType::Dictionary(_, _) => unreachable!("should use build_extend_dictionary"),
@@ -265,9 +253,9 @@ fn build_extend_nulls(data_type: &DataType) -> ExtendNulls {
         DataType::Int64 => primitive::extend_nulls::<i64>,
         DataType::Float32 => primitive::extend_nulls::<f32>,
         DataType::Float64 => primitive::extend_nulls::<f64>,
-        DataType::Date32
-        | DataType::Time32(_)
-        | DataType::Interval(IntervalUnit::YearMonth) => primitive::extend_nulls::<i32>,
+        DataType::Date32 | DataType::Time32(_) | DataType::Interval(IntervalUnit::YearMonth) => {
+            primitive::extend_nulls::<i32>
+        }
         DataType::Date64
         | DataType::Time64(_)
         | DataType::Timestamp(_, _)
@@ -366,6 +354,14 @@ impl<'a> MutableArrayData<'a> {
     ) -> Self {
         let data_type = arrays[0].data_type();
 
+        for a in arrays.iter().skip(1) {
+            assert_eq!(
+                data_type,
+                a.data_type(),
+                "Arrays with inconsistent types passed to MutableArrayData"
+            )
+        }
+
         // if any of the arrays has nulls, insertions from any array requires setting bits
         // as there is at least one array with nulls.
         let use_nulls = use_nulls | arrays.iter().any(|array| array.null_count() > 0);
@@ -380,10 +376,7 @@ impl<'a> MutableArrayData<'a> {
                 array_capacity = *capacity;
                 preallocate_offset_and_binary_buffer::<i64>(*capacity, *value_cap)
             }
-            (
-                DataType::Utf8 | DataType::Binary,
-                Capacities::Binary(capacity, Some(value_cap)),
-            ) => {
+            (DataType::Utf8 | DataType::Binary, Capacities::Binary(capacity, Some(value_cap))) => {
                 array_capacity = *capacity;
                 preallocate_offset_and_binary_buffer::<i32>(*capacity, *value_cap)
             }
@@ -391,10 +384,7 @@ impl<'a> MutableArrayData<'a> {
                 array_capacity = *capacity;
                 new_buffers(data_type, *capacity)
             }
-            (
-                DataType::List(_) | DataType::LargeList(_),
-                Capacities::List(capacity, _),
-            ) => {
+            (DataType::List(_) | DataType::LargeList(_), Capacities::List(capacity, _)) => {
                 array_capacity = *capacity;
                 new_buffers(data_type, *capacity)
             }
@@ -435,16 +425,15 @@ impl<'a> MutableArrayData<'a> {
                     .map(|array| &array.child_data()[0])
                     .collect::<Vec<_>>();
 
-                let capacities = if let Capacities::List(capacity, ref child_capacities) =
-                    capacities
-                {
-                    child_capacities
-                        .clone()
-                        .map(|c| *c)
-                        .unwrap_or(Capacities::Array(capacity))
-                } else {
-                    Capacities::Array(array_capacity)
-                };
+                let capacities =
+                    if let Capacities::List(capacity, ref child_capacities) = capacities {
+                        child_capacities
+                            .clone()
+                            .map(|c| *c)
+                            .unwrap_or(Capacities::Array(capacity))
+                    } else {
+                        Capacities::Array(array_capacity)
+                    };
 
                 vec![MutableArrayData::with_capacities(
                     children, use_nulls, capacities,
@@ -546,8 +535,7 @@ impl<'a> MutableArrayData<'a> {
                             .collect();
                         let capacity = lengths.iter().sum();
 
-                        let mut mutable =
-                            MutableArrayData::new(dictionaries, false, capacity);
+                        let mut mutable = MutableArrayData::new(dictionaries, false, capacity);
 
                         for (i, len) in lengths.iter().enumerate() {
                             mutable.extend(i, 0, *len)
diff --git a/arrow-data/src/transform/primitive.rs b/arrow-data/src/transform/primitive.rs
index b5c826438bfc..627dc00de1df 100644
--- a/arrow-data/src/transform/primitive.rs
+++ b/arrow-data/src/transform/primitive.rs
@@ -47,9 +47,6 @@ where
     )
 }
 
-pub(super) fn extend_nulls<T: ArrowNativeType>(
-    mutable: &mut _MutableArrayData,
-    len: usize,
-) {
+pub(super) fn extend_nulls<T: ArrowNativeType>(mutable: &mut _MutableArrayData, len: usize) {
     mutable.buffer1.extend_zeros(len * size_of::<T>());
 }
diff --git a/arrow-data/src/transform/utils.rs b/arrow-data/src/transform/utils.rs
index 17bb87e88a5c..5407f68e0d0c 100644
--- a/arrow-data/src/transform/utils.rs
+++ b/arrow-data/src/transform/utils.rs
@@ -45,9 +45,7 @@ pub(super) fn extend_offsets<T: ArrowNativeType + Integer + CheckedAdd>(
 }
 
 #[inline]
-pub(super) unsafe fn get_last_offset<T: ArrowNativeType>(
-    offset_buffer: &MutableBuffer,
-) -> T {
+pub(super) unsafe fn get_last_offset<T: ArrowNativeType>(offset_buffer: &MutableBuffer) -> T {
     // JUSTIFICATION
     //  Benefit
     //      20% performance improvement extend of variable sized arrays (see bench `mutable_array`)
diff --git a/arrow-data/src/transform/variable_size.rs b/arrow-data/src/transform/variable_size.rs
index 597a8b2b6645..fa1592d973ed 100644
--- a/arrow-data/src/transform/variable_size.rs
+++ b/arrow-data/src/transform/variable_size.rs
@@ -39,9 +39,7 @@ fn extend_offset_values<T: ArrowNativeType + AsPrimitive<usize>>(
     buffer.extend_from_slice(new_values);
 }
 
-pub(super) fn build_extend<
-    T: ArrowNativeType + Integer + CheckedAdd + AsPrimitive<usize>,
->(
+pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd + AsPrimitive<usize>>(
     array: &ArrayData,
 ) -> Extend {
     let offsets = array.buffer::<T>(0);
@@ -54,21 +52,14 @@ pub(super) fn build_extend<
             // this is safe due to how offset is built. See details on `get_last_offset`
             let last_offset = unsafe { get_last_offset(offset_buffer) };
 
-            extend_offsets::<T>(
-                offset_buffer,
-                last_offset,
-                &offsets[start..start + len + 1],
-            );
+            extend_offsets::<T>(offset_buffer, last_offset, &offsets[start..start + len + 1]);
             // values
             extend_offset_values::<T>(values_buffer, offsets, values, start, len);
         },
     )
 }
 
-pub(super) fn extend_nulls<T: ArrowNativeType>(
-    mutable: &mut _MutableArrayData,
-    len: usize,
-) {
+pub(super) fn extend_nulls<T: ArrowNativeType>(mutable: &mut _MutableArrayData, len: usize) {
     let offset_buffer = &mut mutable.buffer1;
 
     // this is safe due to how offset is built. See details on `get_last_offset`
diff --git a/arrow-flight/Cargo.toml b/arrow-flight/Cargo.toml
index 1a53dbddb13d..1bea347c3037 100644
--- a/arrow-flight/Cargo.toml
+++ b/arrow-flight/Cargo.toml
@@ -44,14 +44,15 @@ bytes = { version = "1", default-features = false }
 futures = { version = "0.3", default-features = false, features = ["alloc"] }
 once_cell = { version = "1", optional = true }
 paste = { version = "1.0" }
-prost = { version = "0.11", default-features = false, features = ["prost-derive"] }
+prost = { version = "0.12.1", default-features = false, features = ["prost-derive"] }
 tokio = { version = "1.0", default-features = false, features = ["macros", "rt", "rt-multi-thread"] }
-tonic = { version = "0.9", default-features = false, features = ["transport", "codegen", "prost"] }
+tonic = { version = "0.10.0", default-features = false, features = ["transport", "codegen", "prost"] }
 
 # CLI-related dependencies
-clap = { version = "4.1", default-features = false, features = ["std", "derive", "env", "help", "error-context", "usage"], optional = true }
-tracing-log = { version = "0.1", optional = true }
-tracing-subscriber = { version = "0.3.1", default-features = false, features = ["ansi", "fmt"], optional = true }
+anyhow = { version = "1.0", optional = true }
+clap = { version = "4.4.6", default-features = false, features = ["std", "derive", "env", "help", "error-context", "usage", "wrap_help", "color", "suggestions"], optional = true }
+tracing-log = { version = "0.2", optional = true }
+tracing-subscriber = { version = "0.3.1", default-features = false, features = ["ansi", "env-filter", "fmt"], optional = true }
 
 [package.metadata.docs.rs]
 all-features = true
@@ -62,7 +63,7 @@ flight-sql-experimental = ["arrow-arith", "arrow-data", "arrow-ord", "arrow-row"
 tls = ["tonic/tls"]
 
 # Enable CLI tools
-cli = ["arrow-cast/prettyprint", "clap", "tracing-log", "tracing-subscriber", "tonic/tls-webpki-roots"]
+cli = ["anyhow", "arrow-cast/prettyprint", "clap", "tracing-log", "tracing-subscriber", "tonic/tls-webpki-roots"]
 
 [dev-dependencies]
 arrow-cast = { workspace = true, features = ["prettyprint"] }
diff --git a/arrow-flight/README.md b/arrow-flight/README.md
index 9194b209fe72..b80772ac927e 100644
--- a/arrow-flight/README.md
+++ b/arrow-flight/README.md
@@ -44,5 +44,33 @@ that demonstrate how to build a Flight server implemented with [tonic](https://d
 ## Feature Flags
 
 - `flight-sql-experimental`: Enables experimental support for
-  [Apache Arrow FlightSQL](https://arrow.apache.org/docs/format/FlightSql.html),
-  a protocol for interacting with SQL databases.
+  [Apache Arrow FlightSQL], a protocol for interacting with SQL databases.
+
+## CLI
+
+This crates offers a basic [Apache Arrow FlightSQL] command line interface.
+
+The client can be installed from the repository:
+
+```console
+$ cargo install --features=cli,flight-sql-experimental,tls --bin=flight_sql_client --path=. --locked
+```
+
+The client comes with extensive help text:
+
+```console
+$ flight_sql_client help
+```
+
+A query can be executed using:
+
+```console
+$ flight_sql_client --host example.com statement-query "SELECT 1;"
++----------+
+| Int64(1) |
++----------+
+| 1        |
++----------+
+```
+
+[apache arrow flightsql]: https://arrow.apache.org/docs/format/FlightSql.html
diff --git a/arrow-flight/examples/flight_sql_server.rs b/arrow-flight/examples/flight_sql_server.rs
index 1e99957390d8..bd94d3c499ca 100644
--- a/arrow-flight/examples/flight_sql_server.rs
+++ b/arrow-flight/examples/flight_sql_server.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use arrow_flight::sql::server::PeekableFlightDataStream;
 use base64::prelude::BASE64_STANDARD;
 use base64::Engine;
 use futures::{stream, Stream, TryStreamExt};
@@ -31,28 +32,26 @@ use arrow_array::builder::StringBuilder;
 use arrow_array::{ArrayRef, RecordBatch};
 use arrow_flight::encode::FlightDataEncoderBuilder;
 use arrow_flight::sql::metadata::{
-    SqlInfoData, SqlInfoDataBuilder, XdbcTypeInfo, XdbcTypeInfoData,
-    XdbcTypeInfoDataBuilder,
+    SqlInfoData, SqlInfoDataBuilder, XdbcTypeInfo, XdbcTypeInfoData, XdbcTypeInfoDataBuilder,
 };
 use arrow_flight::sql::{
     server::FlightSqlService, ActionBeginSavepointRequest, ActionBeginSavepointResult,
-    ActionBeginTransactionRequest, ActionBeginTransactionResult,
-    ActionCancelQueryRequest, ActionCancelQueryResult,
-    ActionClosePreparedStatementRequest, ActionCreatePreparedStatementRequest,
-    ActionCreatePreparedStatementResult, ActionCreatePreparedSubstraitPlanRequest,
-    ActionEndSavepointRequest, ActionEndTransactionRequest, Any, CommandGetCatalogs,
-    CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys,
-    CommandGetImportedKeys, CommandGetPrimaryKeys, CommandGetSqlInfo,
-    CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo,
+    ActionBeginTransactionRequest, ActionBeginTransactionResult, ActionCancelQueryRequest,
+    ActionCancelQueryResult, ActionClosePreparedStatementRequest,
+    ActionCreatePreparedStatementRequest, ActionCreatePreparedStatementResult,
+    ActionCreatePreparedSubstraitPlanRequest, ActionEndSavepointRequest,
+    ActionEndTransactionRequest, Any, CommandGetCatalogs, CommandGetCrossReference,
+    CommandGetDbSchemas, CommandGetExportedKeys, CommandGetImportedKeys, CommandGetPrimaryKeys,
+    CommandGetSqlInfo, CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo,
     CommandPreparedStatementQuery, CommandPreparedStatementUpdate, CommandStatementQuery,
-    CommandStatementSubstraitPlan, CommandStatementUpdate, Nullable, ProstMessageExt,
-    Searchable, SqlInfo, TicketStatementQuery, XdbcDataType,
+    CommandStatementSubstraitPlan, CommandStatementUpdate, Nullable, ProstMessageExt, Searchable,
+    SqlInfo, TicketStatementQuery, XdbcDataType,
 };
 use arrow_flight::utils::batches_to_flight_data;
 use arrow_flight::{
-    flight_service_server::FlightService, flight_service_server::FlightServiceServer,
-    Action, FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest,
-    HandshakeResponse, IpcMessage, Location, SchemaAsIpc, Ticket,
+    flight_service_server::FlightService, flight_service_server::FlightServiceServer, Action,
+    FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest, HandshakeResponse,
+    IpcMessage, Location, SchemaAsIpc, Ticket,
 };
 use arrow_ipc::writer::IpcWriteOptions;
 use arrow_schema::{ArrowError, DataType, Field, Schema};
@@ -166,8 +165,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
         let bytes = BASE64_STANDARD
             .decode(base64)
             .map_err(|e| status!("authorization not decodable", e))?;
-        let str = String::from_utf8(bytes)
-            .map_err(|e| status!("authorization not parsable", e))?;
+        let str = String::from_utf8(bytes).map_err(|e| status!("authorization not parsable", e))?;
         let parts: Vec<_> = str.split(':').collect();
         let (user, pass) = match parts.as_slice() {
             [user, pass] => (user, pass),
@@ -194,8 +192,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
         _message: Any,
     ) -> Result<Response<<Self as FlightService>::DoGetStream>, Status> {
         self.check_token(&request)?;
-        let batch =
-            Self::fake_result().map_err(|e| status!("Could not fake a result", e))?;
+        let batch = Self::fake_result().map_err(|e| status!("Could not fake a result", e))?;
         let schema = batch.schema();
         let batches = vec![batch];
         let flight_data = batches_to_flight_data(schema.as_ref(), batches)
@@ -237,8 +234,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
         self.check_token(&request)?;
         let handle = std::str::from_utf8(&cmd.prepared_statement_handle)
             .map_err(|e| status!("Unable to parse handle", e))?;
-        let batch =
-            Self::fake_result().map_err(|e| status!("Could not fake a result", e))?;
+        let batch = Self::fake_result().map_err(|e| status!("Could not fake a result", e))?;
         let schema = (*batch.schema()).clone();
         let num_rows = batch.num_rows();
         let num_bytes = batch.get_array_memory_size();
@@ -602,7 +598,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
     async fn do_put_statement_update(
         &self,
         _ticket: CommandStatementUpdate,
-        _request: Request<Streaming<FlightData>>,
+        _request: Request<PeekableFlightDataStream>,
     ) -> Result<i64, Status> {
         Ok(FAKE_UPDATE_RESULT)
     }
@@ -610,7 +606,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
     async fn do_put_substrait_plan(
         &self,
         _ticket: CommandStatementSubstraitPlan,
-        _request: Request<Streaming<FlightData>>,
+        _request: Request<PeekableFlightDataStream>,
     ) -> Result<i64, Status> {
         Err(Status::unimplemented(
             "do_put_substrait_plan not implemented",
@@ -620,7 +616,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
     async fn do_put_prepared_statement_query(
         &self,
         _query: CommandPreparedStatementQuery,
-        _request: Request<Streaming<FlightData>>,
+        _request: Request<PeekableFlightDataStream>,
     ) -> Result<Response<<Self as FlightService>::DoPutStream>, Status> {
         Err(Status::unimplemented(
             "do_put_prepared_statement_query not implemented",
@@ -630,7 +626,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
     async fn do_put_prepared_statement_update(
         &self,
         _query: CommandPreparedStatementUpdate,
-        _request: Request<Streaming<FlightData>>,
+        _request: Request<PeekableFlightDataStream>,
     ) -> Result<i64, Status> {
         Err(Status::unimplemented(
             "do_put_prepared_statement_update not implemented",
@@ -735,8 +731,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     if std::env::var("USE_TLS").ok().is_some() {
         let cert = std::fs::read_to_string("arrow-flight/examples/data/server.pem")?;
         let key = std::fs::read_to_string("arrow-flight/examples/data/server.key")?;
-        let client_ca =
-            std::fs::read_to_string("arrow-flight/examples/data/client_ca.pem")?;
+        let client_ca = std::fs::read_to_string("arrow-flight/examples/data/client_ca.pem")?;
 
         let tls_config = ServerTlsConfig::new()
             .identity(Identity::from_pem(&cert, &key))
@@ -788,7 +783,6 @@ mod tests {
 
     use arrow_cast::pretty::pretty_format_batches;
     use arrow_flight::sql::client::FlightSqlServiceClient;
-    use arrow_flight::utils::flight_data_to_batches;
     use tonic::transport::server::TcpIncoming;
     use tonic::transport::{Certificate, Endpoint};
     use tower::service_fn;
@@ -954,8 +948,7 @@ mod tests {
 
             let ticket = flight_info.endpoint[0].ticket.as_ref().unwrap().clone();
             let flight_data = client.do_get(ticket).await.unwrap();
-            let flight_data: Vec<FlightData> = flight_data.try_collect().await.unwrap();
-            let batches = flight_data_to_batches(&flight_data).unwrap();
+            let batches: Vec<_> = flight_data.try_collect().await.unwrap();
 
             let res = pretty_format_batches(batches.as_slice()).unwrap();
             let expected = r#"
diff --git a/arrow-flight/examples/server.rs b/arrow-flight/examples/server.rs
index 1ed21acef9b8..85ac4ca1384c 100644
--- a/arrow-flight/examples/server.rs
+++ b/arrow-flight/examples/server.rs
@@ -20,9 +20,9 @@ use tonic::transport::Server;
 use tonic::{Request, Response, Status, Streaming};
 
 use arrow_flight::{
-    flight_service_server::FlightService, flight_service_server::FlightServiceServer,
-    Action, ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
-    HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket,
+    flight_service_server::FlightService, flight_service_server::FlightServiceServer, Action,
+    ActionType, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo, HandshakeRequest,
+    HandshakeResponse, PutResult, SchemaResult, Ticket,
 };
 
 #[derive(Clone)]
diff --git a/arrow-flight/gen/Cargo.toml b/arrow-flight/gen/Cargo.toml
index 8f889c0a7cb9..4f7a032f51e5 100644
--- a/arrow-flight/gen/Cargo.toml
+++ b/arrow-flight/gen/Cargo.toml
@@ -32,6 +32,6 @@ publish = false
 [dependencies]
 # Pin specific version of the tonic-build dependencies to avoid auto-generated
 # (and checked in) arrow.flight.protocol.rs from changing
-proc-macro2 = { version = "=1.0.66", default-features = false }
-prost-build = { version = "=0.11.9", default-features = false }
-tonic-build = { version = "=0.9.2", default-features = false, features = ["transport", "prost"] }
+proc-macro2 = { version = "=1.0.70", default-features = false }
+prost-build = { version = "=0.12.3", default-features = false }
+tonic-build = { version = "=0.10.2", default-features = false, features = ["transport", "prost"] }
diff --git a/arrow-flight/src/arrow.flight.protocol.rs b/arrow-flight/src/arrow.flight.protocol.rs
index 10dc7ace0356..e76013bd7c5f 100644
--- a/arrow-flight/src/arrow.flight.protocol.rs
+++ b/arrow-flight/src/arrow.flight.protocol.rs
@@ -685,7 +685,7 @@ pub mod flight_service_server {
     #[async_trait]
     pub trait FlightService: Send + Sync + 'static {
         /// Server streaming response type for the Handshake method.
-        type HandshakeStream: futures_core::Stream<
+        type HandshakeStream: tonic::codegen::tokio_stream::Stream<
                 Item = std::result::Result<super::HandshakeResponse, tonic::Status>,
             >
             + Send
@@ -700,7 +700,7 @@ pub mod flight_service_server {
             request: tonic::Request<tonic::Streaming<super::HandshakeRequest>>,
         ) -> std::result::Result<tonic::Response<Self::HandshakeStream>, tonic::Status>;
         /// Server streaming response type for the ListFlights method.
-        type ListFlightsStream: futures_core::Stream<
+        type ListFlightsStream: tonic::codegen::tokio_stream::Stream<
                 Item = std::result::Result<super::FlightInfo, tonic::Status>,
             >
             + Send
@@ -744,7 +744,7 @@ pub mod flight_service_server {
             request: tonic::Request<super::FlightDescriptor>,
         ) -> std::result::Result<tonic::Response<super::SchemaResult>, tonic::Status>;
         /// Server streaming response type for the DoGet method.
-        type DoGetStream: futures_core::Stream<
+        type DoGetStream: tonic::codegen::tokio_stream::Stream<
                 Item = std::result::Result<super::FlightData, tonic::Status>,
             >
             + Send
@@ -759,7 +759,7 @@ pub mod flight_service_server {
             request: tonic::Request<super::Ticket>,
         ) -> std::result::Result<tonic::Response<Self::DoGetStream>, tonic::Status>;
         /// Server streaming response type for the DoPut method.
-        type DoPutStream: futures_core::Stream<
+        type DoPutStream: tonic::codegen::tokio_stream::Stream<
                 Item = std::result::Result<super::PutResult, tonic::Status>,
             >
             + Send
@@ -776,7 +776,7 @@ pub mod flight_service_server {
             request: tonic::Request<tonic::Streaming<super::FlightData>>,
         ) -> std::result::Result<tonic::Response<Self::DoPutStream>, tonic::Status>;
         /// Server streaming response type for the DoExchange method.
-        type DoExchangeStream: futures_core::Stream<
+        type DoExchangeStream: tonic::codegen::tokio_stream::Stream<
                 Item = std::result::Result<super::FlightData, tonic::Status>,
             >
             + Send
@@ -792,7 +792,7 @@ pub mod flight_service_server {
             request: tonic::Request<tonic::Streaming<super::FlightData>>,
         ) -> std::result::Result<tonic::Response<Self::DoExchangeStream>, tonic::Status>;
         /// Server streaming response type for the DoAction method.
-        type DoActionStream: futures_core::Stream<
+        type DoActionStream: tonic::codegen::tokio_stream::Stream<
                 Item = std::result::Result<super::Result, tonic::Status>,
             >
             + Send
@@ -809,7 +809,7 @@ pub mod flight_service_server {
             request: tonic::Request<super::Action>,
         ) -> std::result::Result<tonic::Response<Self::DoActionStream>, tonic::Status>;
         /// Server streaming response type for the ListActions method.
-        type ListActionsStream: futures_core::Stream<
+        type ListActionsStream: tonic::codegen::tokio_stream::Stream<
                 Item = std::result::Result<super::ActionType, tonic::Status>,
             >
             + Send
@@ -930,7 +930,9 @@ pub mod flight_service_server {
                             >,
                         ) -> Self::Future {
                             let inner = Arc::clone(&self.0);
-                            let fut = async move { (*inner).handshake(request).await };
+                            let fut = async move {
+                                <T as FlightService>::handshake(&inner, request).await
+                            };
                             Box::pin(fut)
                         }
                     }
@@ -976,7 +978,7 @@ pub mod flight_service_server {
                         ) -> Self::Future {
                             let inner = Arc::clone(&self.0);
                             let fut = async move {
-                                (*inner).list_flights(request).await
+                                <T as FlightService>::list_flights(&inner, request).await
                             };
                             Box::pin(fut)
                         }
@@ -1022,7 +1024,7 @@ pub mod flight_service_server {
                         ) -> Self::Future {
                             let inner = Arc::clone(&self.0);
                             let fut = async move {
-                                (*inner).get_flight_info(request).await
+                                <T as FlightService>::get_flight_info(&inner, request).await
                             };
                             Box::pin(fut)
                         }
@@ -1067,7 +1069,9 @@ pub mod flight_service_server {
                             request: tonic::Request<super::FlightDescriptor>,
                         ) -> Self::Future {
                             let inner = Arc::clone(&self.0);
-                            let fut = async move { (*inner).get_schema(request).await };
+                            let fut = async move {
+                                <T as FlightService>::get_schema(&inner, request).await
+                            };
                             Box::pin(fut)
                         }
                     }
@@ -1112,7 +1116,9 @@ pub mod flight_service_server {
                             request: tonic::Request<super::Ticket>,
                         ) -> Self::Future {
                             let inner = Arc::clone(&self.0);
-                            let fut = async move { (*inner).do_get(request).await };
+                            let fut = async move {
+                                <T as FlightService>::do_get(&inner, request).await
+                            };
                             Box::pin(fut)
                         }
                     }
@@ -1157,7 +1163,9 @@ pub mod flight_service_server {
                             request: tonic::Request<tonic::Streaming<super::FlightData>>,
                         ) -> Self::Future {
                             let inner = Arc::clone(&self.0);
-                            let fut = async move { (*inner).do_put(request).await };
+                            let fut = async move {
+                                <T as FlightService>::do_put(&inner, request).await
+                            };
                             Box::pin(fut)
                         }
                     }
@@ -1202,7 +1210,9 @@ pub mod flight_service_server {
                             request: tonic::Request<tonic::Streaming<super::FlightData>>,
                         ) -> Self::Future {
                             let inner = Arc::clone(&self.0);
-                            let fut = async move { (*inner).do_exchange(request).await };
+                            let fut = async move {
+                                <T as FlightService>::do_exchange(&inner, request).await
+                            };
                             Box::pin(fut)
                         }
                     }
@@ -1247,7 +1257,9 @@ pub mod flight_service_server {
                             request: tonic::Request<super::Action>,
                         ) -> Self::Future {
                             let inner = Arc::clone(&self.0);
-                            let fut = async move { (*inner).do_action(request).await };
+                            let fut = async move {
+                                <T as FlightService>::do_action(&inner, request).await
+                            };
                             Box::pin(fut)
                         }
                     }
@@ -1293,7 +1305,7 @@ pub mod flight_service_server {
                         ) -> Self::Future {
                             let inner = Arc::clone(&self.0);
                             let fut = async move {
-                                (*inner).list_actions(request).await
+                                <T as FlightService>::list_actions(&inner, request).await
                             };
                             Box::pin(fut)
                         }
diff --git a/arrow-flight/src/bin/flight_sql_client.rs b/arrow-flight/src/bin/flight_sql_client.rs
index 20c8062f899e..296efc1c308e 100644
--- a/arrow-flight/src/bin/flight_sql_client.rs
+++ b/arrow-flight/src/bin/flight_sql_client.rs
@@ -17,62 +17,58 @@
 
 use std::{sync::Arc, time::Duration};
 
-use arrow_array::RecordBatch;
-use arrow_cast::pretty::pretty_format_batches;
-use arrow_flight::{
-    sql::client::FlightSqlServiceClient, utils::flight_data_to_batches, FlightData,
-};
-use arrow_schema::{ArrowError, Schema};
-use clap::Parser;
+use anyhow::{bail, Context, Result};
+use arrow_array::{ArrayRef, Datum, RecordBatch, StringArray};
+use arrow_cast::{cast_with_options, pretty::pretty_format_batches, CastOptions};
+use arrow_flight::{sql::client::FlightSqlServiceClient, FlightInfo};
+use arrow_schema::Schema;
+use clap::{Parser, Subcommand};
 use futures::TryStreamExt;
-use tonic::transport::{Channel, ClientTlsConfig, Endpoint};
+use tonic::{
+    metadata::MetadataMap,
+    transport::{Channel, ClientTlsConfig, Endpoint},
+};
 use tracing_log::log::info;
 
-/// A ':' separated key value pair
-#[derive(Debug, Clone)]
-struct KeyValue<K, V> {
-    pub key: K,
-    pub value: V,
-}
-
-impl<K, V> std::str::FromStr for KeyValue<K, V>
-where
-    K: std::str::FromStr,
-    V: std::str::FromStr,
-    K::Err: std::fmt::Display,
-    V::Err: std::fmt::Display,
-{
-    type Err = String;
-
-    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
-        let parts = s.splitn(2, ':').collect::<Vec<_>>();
-        match parts.as_slice() {
-            [key, value] => {
-                let key = K::from_str(key).map_err(|e| e.to_string())?;
-                let value = V::from_str(value.trim()).map_err(|e| e.to_string())?;
-                Ok(Self { key, value })
-            }
-            _ => Err(format!(
-                "Invalid key value pair - expected 'KEY:VALUE' got '{s}'"
-            )),
-        }
-    }
+/// Logging CLI config.
+#[derive(Debug, Parser)]
+pub struct LoggingArgs {
+    /// Log verbosity.
+    ///
+    /// Defaults to "warn".
+    ///
+    /// Use `-v` for "info", `-vv` for "debug", `-vvv` for "trace".
+    ///
+    /// Note you can also set logging level using `RUST_LOG` environment variable:
+    /// `RUST_LOG=debug`.
+    #[clap(
+        short = 'v',
+        long = "verbose",
+        action = clap::ArgAction::Count,
+    )]
+    log_verbose_count: u8,
 }
 
 #[derive(Debug, Parser)]
 struct ClientArgs {
     /// Additional headers.
     ///
-    /// Values should be key value pairs separated by ':'
-    #[clap(long, value_delimiter = ',')]
-    headers: Vec<KeyValue<String, String>>,
+    /// Can be given multiple times. Headers and values are separated by '='.
+    ///
+    /// Example: `-H foo=bar -H baz=42`
+    #[clap(long = "header", short = 'H', value_parser = parse_key_val)]
+    headers: Vec<(String, String)>,
 
-    /// Username
-    #[clap(long)]
+    /// Username.
+    ///
+    /// Optional. If given, `password` must also be set.
+    #[clap(long, requires = "password")]
     username: Option<String>,
 
-    /// Password
-    #[clap(long)]
+    /// Password.
+    ///
+    /// Optional. If given, `username` must also be set.
+    #[clap(long, requires = "username")]
     password: Option<String>,
 
     /// Auth token.
@@ -80,78 +76,199 @@ struct ClientArgs {
     token: Option<String>,
 
     /// Use TLS.
+    ///
+    /// If not provided, use cleartext connection.
     #[clap(long)]
     tls: bool,
 
     /// Server host.
+    ///
+    /// Required.
     #[clap(long)]
     host: String,
 
     /// Server port.
+    ///
+    /// Defaults to `443` if `tls` is set, otherwise defaults to `80`.
     #[clap(long)]
     port: Option<u16>,
 }
 
 #[derive(Debug, Parser)]
 struct Args {
+    /// Logging args.
+    #[clap(flatten)]
+    logging_args: LoggingArgs,
+
     /// Client args.
     #[clap(flatten)]
     client_args: ClientArgs,
 
-    /// SQL query.
-    query: String,
+    #[clap(subcommand)]
+    cmd: Command,
+}
+
+/// Different available commands.
+#[derive(Debug, Subcommand)]
+enum Command {
+    /// Execute given statement.
+    StatementQuery {
+        /// SQL query.
+        ///
+        /// Required.
+        query: String,
+    },
+
+    /// Prepare given statement and then execute it.
+    PreparedStatementQuery {
+        /// SQL query.
+        ///
+        /// Required.
+        ///
+        /// Can contains placeholders like `$1`.
+        ///
+        /// Example: `SELECT * FROM t WHERE x = $1`
+        query: String,
+
+        /// Additional parameters.
+        ///
+        /// Can be given multiple times. Names and values are separated by '='. Values will be
+        /// converted to the type that the server reported for the prepared statement.
+        ///
+        /// Example: `-p $1=42`
+        #[clap(short, value_parser = parse_key_val)]
+        params: Vec<(String, String)>,
+    },
 }
 
 #[tokio::main]
-async fn main() {
+async fn main() -> Result<()> {
     let args = Args::parse();
-    setup_logging();
-    let mut client = setup_client(args.client_args).await.expect("setup client");
+    setup_logging(args.logging_args)?;
+    let mut client = setup_client(args.client_args)
+        .await
+        .context("setup client")?;
+
+    let flight_info = match args.cmd {
+        Command::StatementQuery { query } => client
+            .execute(query, None)
+            .await
+            .context("execute statement")?,
+        Command::PreparedStatementQuery { query, params } => {
+            let mut prepared_stmt = client
+                .prepare(query, None)
+                .await
+                .context("prepare statement")?;
+
+            if !params.is_empty() {
+                prepared_stmt
+                    .set_parameters(
+                        construct_record_batch_from_params(
+                            &params,
+                            prepared_stmt
+                                .parameter_schema()
+                                .context("get parameter schema")?,
+                        )
+                        .context("construct parameters")?,
+                    )
+                    .context("bind parameters")?;
+            }
 
-    let info = client
-        .execute(args.query, None)
+            prepared_stmt
+                .execute()
+                .await
+                .context("execute prepared statement")?
+        }
+    };
+
+    let batches = execute_flight(&mut client, flight_info)
         .await
-        .expect("prepare statement");
-    info!("got flight info");
+        .context("read flight data")?;
+
+    let res = pretty_format_batches(batches.as_slice()).context("format results")?;
+    println!("{res}");
+
+    Ok(())
+}
 
-    let schema = Arc::new(Schema::try_from(info.clone()).expect("valid schema"));
+async fn execute_flight(
+    client: &mut FlightSqlServiceClient<Channel>,
+    info: FlightInfo,
+) -> Result<Vec<RecordBatch>> {
+    let schema = Arc::new(Schema::try_from(info.clone()).context("valid schema")?);
     let mut batches = Vec::with_capacity(info.endpoint.len() + 1);
     batches.push(RecordBatch::new_empty(schema));
     info!("decoded schema");
 
     for endpoint in info.endpoint {
         let Some(ticket) = &endpoint.ticket else {
-            panic!("did not get ticket");
+            bail!("did not get ticket");
         };
-        let flight_data = client.do_get(ticket.clone()).await.expect("do get");
-        let flight_data: Vec<FlightData> = flight_data
+
+        let mut flight_data = client.do_get(ticket.clone()).await.context("do get")?;
+        log_metadata(flight_data.headers(), "header");
+
+        let mut endpoint_batches: Vec<_> = (&mut flight_data)
             .try_collect()
             .await
-            .expect("collect data stream");
-        let mut endpoint_batches = flight_data_to_batches(&flight_data)
-            .expect("convert flight data to record batches");
+            .context("collect data stream")?;
         batches.append(&mut endpoint_batches);
+
+        if let Some(trailers) = flight_data.trailers() {
+            log_metadata(&trailers, "trailer");
+        }
     }
     info!("received data");
 
-    let res = pretty_format_batches(batches.as_slice()).expect("format results");
-    println!("{res}");
+    Ok(batches)
+}
+
+fn construct_record_batch_from_params(
+    params: &[(String, String)],
+    parameter_schema: &Schema,
+) -> Result<RecordBatch> {
+    let mut items = Vec::<(&String, ArrayRef)>::new();
+
+    for (name, value) in params {
+        let field = parameter_schema.field_with_name(name)?;
+        let value_as_array = StringArray::new_scalar(value);
+        let casted = cast_with_options(
+            value_as_array.get().0,
+            field.data_type(),
+            &CastOptions::default(),
+        )?;
+        items.push((name, casted))
+    }
+
+    Ok(RecordBatch::try_from_iter(items)?)
 }
 
-fn setup_logging() {
-    tracing_log::LogTracer::init().expect("tracing log init");
-    tracing_subscriber::fmt::init();
+fn setup_logging(args: LoggingArgs) -> Result<()> {
+    use tracing_subscriber::{util::SubscriberInitExt, EnvFilter, FmtSubscriber};
+
+    tracing_log::LogTracer::init().context("tracing log init")?;
+
+    let filter = match args.log_verbose_count {
+        0 => "warn",
+        1 => "info",
+        2 => "debug",
+        _ => "trace",
+    };
+    let filter = EnvFilter::try_new(filter).context("set up log env filter")?;
+
+    let subscriber = FmtSubscriber::builder().with_env_filter(filter).finish();
+    subscriber.try_init().context("init logging subscriber")?;
+
+    Ok(())
 }
 
-async fn setup_client(
-    args: ClientArgs,
-) -> Result<FlightSqlServiceClient<Channel>, ArrowError> {
+async fn setup_client(args: ClientArgs) -> Result<FlightSqlServiceClient<Channel>> {
     let port = args.port.unwrap_or(if args.tls { 443 } else { 80 });
 
     let protocol = if args.tls { "https" } else { "http" };
 
     let mut endpoint = Endpoint::new(format!("{}://{}:{}", protocol, args.host, port))
-        .map_err(|_| ArrowError::IpcError("Cannot create endpoint".to_string()))?
+        .context("create endpoint")?
         .connect_timeout(Duration::from_secs(20))
         .timeout(Duration::from_secs(20))
         .tcp_nodelay(true) // Disable Nagle's Algorithm since we don't want packets to wait
@@ -162,21 +279,18 @@ async fn setup_client(
 
     if args.tls {
         let tls_config = ClientTlsConfig::new();
-        endpoint = endpoint.tls_config(tls_config).map_err(|_| {
-            ArrowError::IpcError("Cannot create TLS endpoint".to_string())
-        })?;
+        endpoint = endpoint
+            .tls_config(tls_config)
+            .context("create TLS endpoint")?;
     }
 
-    let channel = endpoint
-        .connect()
-        .await
-        .map_err(|e| ArrowError::IpcError(format!("Cannot connect to endpoint: {e}")))?;
+    let channel = endpoint.connect().await.context("connect to endpoint")?;
 
     let mut client = FlightSqlServiceClient::new(channel);
     info!("connected");
 
-    for kv in args.headers {
-        client.set_header(kv.key, kv.value);
+    for (k, v) in args.headers {
+        client.set_header(k, v);
     }
 
     if let Some(token) = args.token {
@@ -190,16 +304,48 @@ async fn setup_client(
             client
                 .handshake(&username, &password)
                 .await
-                .expect("handshake");
+                .context("handshake")?;
             info!("performed handshake");
         }
         (Some(_), None) => {
-            panic!("when username is set, you also need to set a password")
+            bail!("when username is set, you also need to set a password")
         }
         (None, Some(_)) => {
-            panic!("when password is set, you also need to set a username")
+            bail!("when password is set, you also need to set a username")
         }
     }
 
     Ok(client)
 }
+
+/// Parse a single key-value pair
+fn parse_key_val(s: &str) -> Result<(String, String), String> {
+    let pos = s
+        .find('=')
+        .ok_or_else(|| format!("invalid KEY=value: no `=` found in `{s}`"))?;
+    Ok((s[..pos].to_owned(), s[pos + 1..].to_owned()))
+}
+
+/// Log headers/trailers.
+fn log_metadata(map: &MetadataMap, what: &'static str) {
+    for k_v in map.iter() {
+        match k_v {
+            tonic::metadata::KeyAndValueRef::Ascii(k, v) => {
+                info!(
+                    "{}: {}={}",
+                    what,
+                    k.as_str(),
+                    v.to_str().unwrap_or("<invalid>"),
+                );
+            }
+            tonic::metadata::KeyAndValueRef::Binary(k, v) => {
+                info!(
+                    "{}: {}={}",
+                    what,
+                    k.as_str(),
+                    String::from_utf8_lossy(v.as_ref()),
+                );
+            }
+        }
+    }
+}
diff --git a/arrow-flight/src/client.rs b/arrow-flight/src/client.rs
index 8793f7834bfb..a264012c82ec 100644
--- a/arrow-flight/src/client.rs
+++ b/arrow-flight/src/client.rs
@@ -249,10 +249,7 @@ impl FlightClient {
     ///   .expect("error fetching data");
     /// # }
     /// ```
-    pub async fn get_flight_info(
-        &mut self,
-        descriptor: FlightDescriptor,
-    ) -> Result<FlightInfo> {
+    pub async fn get_flight_info(&mut self, descriptor: FlightDescriptor) -> Result<FlightInfo> {
         let request = self.make_request(descriptor);
 
         let response = self.inner.get_flight_info(request).await?.into_inner();
@@ -452,10 +449,7 @@ impl FlightClient {
     ///   .expect("error making request");
     /// # }
     /// ```
-    pub async fn get_schema(
-        &mut self,
-        flight_descriptor: FlightDescriptor,
-    ) -> Result<Schema> {
+    pub async fn get_schema(&mut self, flight_descriptor: FlightDescriptor) -> Result<Schema> {
         let request = self.make_request(flight_descriptor);
 
         let schema_result = self.inner.get_schema(request).await?.into_inner();
@@ -488,9 +482,7 @@ impl FlightClient {
     ///   .expect("error gathering actions");
     /// # }
     /// ```
-    pub async fn list_actions(
-        &mut self,
-    ) -> Result<BoxStream<'static, Result<ActionType>>> {
+    pub async fn list_actions(&mut self) -> Result<BoxStream<'static, Result<ActionType>>> {
         let request = self.make_request(Empty {});
 
         let action_stream = self
@@ -528,10 +520,7 @@ impl FlightClient {
     ///   .expect("error gathering action results");
     /// # }
     /// ```
-    pub async fn do_action(
-        &mut self,
-        action: Action,
-    ) -> Result<BoxStream<'static, Result<Bytes>>> {
+    pub async fn do_action(&mut self, action: Action) -> Result<BoxStream<'static, Result<Bytes>>> {
         let request = self.make_request(action);
 
         let result_stream = self
diff --git a/arrow-flight/src/decode.rs b/arrow-flight/src/decode.rs
index dfcdd260602c..95bbe2b46bb2 100644
--- a/arrow-flight/src/decode.rs
+++ b/arrow-flight/src/decode.rs
@@ -21,9 +21,7 @@ use arrow_buffer::Buffer;
 use arrow_schema::{Schema, SchemaRef};
 use bytes::Bytes;
 use futures::{ready, stream::BoxStream, Stream, StreamExt};
-use std::{
-    collections::HashMap, convert::TryFrom, fmt::Debug, pin::Pin, sync::Arc, task::Poll,
-};
+use std::{collections::HashMap, convert::TryFrom, fmt::Debug, pin::Pin, sync::Arc, task::Poll};
 use tonic::metadata::MetadataMap;
 
 use crate::error::{FlightError, Result};
@@ -270,16 +268,14 @@ impl FlightDataDecoder {
     /// state as necessary.
     fn extract_message(&mut self, data: FlightData) -> Result<Option<DecodedFlightData>> {
         use arrow_ipc::MessageHeader;
-        let message = arrow_ipc::root_as_message(&data.data_header[..]).map_err(|e| {
-            FlightError::DecodeError(format!("Error decoding root message: {e}"))
-        })?;
+        let message = arrow_ipc::root_as_message(&data.data_header[..])
+            .map_err(|e| FlightError::DecodeError(format!("Error decoding root message: {e}")))?;
 
         match message.header_type() {
             MessageHeader::NONE => Ok(Some(DecodedFlightData::new_none(data))),
             MessageHeader::Schema => {
-                let schema = Schema::try_from(&data).map_err(|e| {
-                    FlightError::DecodeError(format!("Error decoding schema: {e}"))
-                })?;
+                let schema = Schema::try_from(&data)
+                    .map_err(|e| FlightError::DecodeError(format!("Error decoding schema: {e}")))?;
 
                 let schema = Arc::new(schema);
                 let dictionaries_by_field = HashMap::new();
@@ -300,12 +296,11 @@ impl FlightDataDecoder {
                 };
 
                 let buffer = Buffer::from_bytes(data.data_body.into());
-                let dictionary_batch =
-                    message.header_as_dictionary_batch().ok_or_else(|| {
-                        FlightError::protocol(
-                            "Could not get dictionary batch from DictionaryBatch message",
-                        )
-                    })?;
+                let dictionary_batch = message.header_as_dictionary_batch().ok_or_else(|| {
+                    FlightError::protocol(
+                        "Could not get dictionary batch from DictionaryBatch message",
+                    )
+                })?;
 
                 arrow_ipc::reader::read_dictionary(
                     &buffer,
@@ -315,9 +310,7 @@ impl FlightDataDecoder {
                     &message.version(),
                 )
                 .map_err(|e| {
-                    FlightError::DecodeError(format!(
-                        "Error decoding ipc dictionary: {e}"
-                    ))
+                    FlightError::DecodeError(format!("Error decoding ipc dictionary: {e}"))
                 })?;
 
                 // Updated internal state, but no decoded message
@@ -338,9 +331,7 @@ impl FlightDataDecoder {
                     &state.dictionaries_by_field,
                 )
                 .map_err(|e| {
-                    FlightError::DecodeError(format!(
-                        "Error decoding ipc RecordBatch: {e}"
-                    ))
+                    FlightError::DecodeError(format!("Error decoding ipc RecordBatch: {e}"))
                 })?;
 
                 Ok(Some(DecodedFlightData::new_record_batch(data, batch)))
diff --git a/arrow-flight/src/encode.rs b/arrow-flight/src/encode.rs
index cd2ee7c02b68..e6ef9994d487 100644
--- a/arrow-flight/src/encode.rs
+++ b/arrow-flight/src/encode.rs
@@ -30,10 +30,17 @@ use futures::{ready, stream::BoxStream, Stream, StreamExt};
 /// This can be used to implement [`FlightService::do_get`] in an
 /// Arrow Flight implementation;
 ///
+/// This structure encodes a stream of `Result`s rather than `RecordBatch`es  to
+/// propagate errors from streaming execution, where the generation of the
+/// `RecordBatch`es is incremental, and an error may occur even after
+/// several have already been successfully produced.
+///
 /// # Caveats
-///   1. [`DictionaryArray`](arrow_array::array::DictionaryArray)s
-///   are converted to their underlying types prior to transport, due to
-///   <https://github.com/apache/arrow-rs/issues/3389>.
+///   1. When [`DictionaryHandling`] is [`DictionaryHandling::Hydrate`], [`DictionaryArray`](arrow_array::array::DictionaryArray)s
+///   are converted to their underlying types prior to transport.
+///   When [`DictionaryHandling`] is [`DictionaryHandling::Resend`], Dictionary [`FlightData`] is sent with every
+///   [`RecordBatch`] that contains a [`DictionaryArray`](arrow_array::array::DictionaryArray).
+///   See <https://github.com/apache/arrow-rs/issues/3389>.
 ///
 /// # Example
 /// ```no_run
@@ -41,14 +48,14 @@ use futures::{ready, stream::BoxStream, Stream, StreamExt};
 /// # use arrow_array::{ArrayRef, RecordBatch, UInt32Array};
 /// # async fn f() {
 /// # let c1 = UInt32Array::from(vec![1, 2, 3, 4, 5, 6]);
-/// # let record_batch = RecordBatch::try_from_iter(vec![
+/// # let batch = RecordBatch::try_from_iter(vec![
 /// #      ("a", Arc::new(c1) as ArrayRef)
 /// #   ])
 /// #   .expect("cannot create record batch");
 /// use arrow_flight::encode::FlightDataEncoderBuilder;
 ///
 /// // Get an input stream of Result<RecordBatch, FlightError>
-/// let input_stream = futures::stream::iter(vec![Ok(record_batch)]);
+/// let input_stream = futures::stream::iter(vec![Ok(batch)]);
 ///
 /// // Build a stream of `Result<FlightData>` (e.g. to return for do_get)
 /// let flight_data_stream = FlightDataEncoderBuilder::new()
@@ -59,6 +66,39 @@ use futures::{ready, stream::BoxStream, Stream, StreamExt};
 /// # }
 /// ```
 ///
+/// # Example: Sending `Vec<RecordBatch>`
+///
+/// You can create a [`Stream`] to pass to [`Self::build`] from an existing
+/// `Vec` of `RecordBatch`es like this:
+///
+/// ```
+/// # use std::sync::Arc;
+/// # use arrow_array::{ArrayRef, RecordBatch, UInt32Array};
+/// # async fn f() {
+/// # fn make_batches() -> Vec<RecordBatch> {
+/// #   let c1 = UInt32Array::from(vec![1, 2, 3, 4, 5, 6]);
+/// #   let batch = RecordBatch::try_from_iter(vec![
+/// #      ("a", Arc::new(c1) as ArrayRef)
+/// #   ])
+/// #   .expect("cannot create record batch");
+/// #   vec![batch.clone(), batch.clone()]
+/// # }
+/// use arrow_flight::encode::FlightDataEncoderBuilder;
+///
+/// // Get batches that you want to send via Flight
+/// let batches: Vec<RecordBatch> = make_batches();
+///
+/// // Create an input stream of Result<RecordBatch, FlightError>
+/// let input_stream = futures::stream::iter(
+///   batches.into_iter().map(Ok)
+/// );
+///
+/// // Build a stream of `Result<FlightData>` (e.g. to return for do_get)
+/// let flight_data_stream = FlightDataEncoderBuilder::new()
+///  .build(input_stream);
+/// # }
+/// ```
+///
 /// [`FlightService::do_get`]: crate::flight_service_server::FlightService::do_get
 /// [`FlightError`]: crate::error::FlightError
 #[derive(Debug)]
@@ -74,6 +114,9 @@ pub struct FlightDataEncoderBuilder {
     schema: Option<SchemaRef>,
     /// Optional flight descriptor, if known before data.
     descriptor: Option<FlightDescriptor>,
+    /// Deterimines how `DictionaryArray`s are encoded for transport.
+    /// See [`DictionaryHandling`] for more information.
+    dictionary_handling: DictionaryHandling,
 }
 
 /// Default target size for encoded [`FlightData`].
@@ -90,6 +133,7 @@ impl Default for FlightDataEncoderBuilder {
             app_metadata: Bytes::new(),
             schema: None,
             descriptor: None,
+            dictionary_handling: DictionaryHandling::Hydrate,
         }
     }
 }
@@ -114,6 +158,12 @@ impl FlightDataEncoderBuilder {
         self
     }
 
+    /// Set [`DictionaryHandling`] for encoder
+    pub fn with_dictionary_handling(mut self, dictionary_handling: DictionaryHandling) -> Self {
+        self.dictionary_handling = dictionary_handling;
+        self
+    }
+
     /// Specify application specific metadata included in the
     /// [`FlightData::app_metadata`] field of the the first Schema
     /// message
@@ -138,16 +188,15 @@ impl FlightDataEncoderBuilder {
     }
 
     /// Specify a flight descriptor in the first FlightData message.
-    pub fn with_flight_descriptor(
-        mut self,
-        descriptor: Option<FlightDescriptor>,
-    ) -> Self {
+    pub fn with_flight_descriptor(mut self, descriptor: Option<FlightDescriptor>) -> Self {
         self.descriptor = descriptor;
         self
     }
 
-    /// Return a [`Stream`] of [`FlightData`],
-    /// consuming self. More details on [`FlightDataEncoder`]
+    /// Takes a [`Stream`] of [`Result<RecordBatch>`] and returns a [`Stream`]
+    /// of [`FlightData`], consuming self.
+    ///
+    /// See example on [`Self`] and [`FlightDataEncoder`] for more details
     pub fn build<S>(self, input: S) -> FlightDataEncoder
     where
         S: Stream<Item = Result<RecordBatch>> + Send + 'static,
@@ -158,6 +207,7 @@ impl FlightDataEncoderBuilder {
             app_metadata,
             schema,
             descriptor,
+            dictionary_handling,
         } = self;
 
         FlightDataEncoder::new(
@@ -167,6 +217,7 @@ impl FlightDataEncoderBuilder {
             options,
             app_metadata,
             descriptor,
+            dictionary_handling,
         )
     }
 }
@@ -192,6 +243,9 @@ pub struct FlightDataEncoder {
     done: bool,
     /// cleared after the first FlightData message is sent
     descriptor: Option<FlightDescriptor>,
+    /// Deterimines how `DictionaryArray`s are encoded for transport.
+    /// See [`DictionaryHandling`] for more information.
+    dictionary_handling: DictionaryHandling,
 }
 
 impl FlightDataEncoder {
@@ -202,16 +256,21 @@ impl FlightDataEncoder {
         options: IpcWriteOptions,
         app_metadata: Bytes,
         descriptor: Option<FlightDescriptor>,
+        dictionary_handling: DictionaryHandling,
     ) -> Self {
         let mut encoder = Self {
             inner,
             schema: None,
             max_flight_data_size,
-            encoder: FlightIpcEncoder::new(options),
+            encoder: FlightIpcEncoder::new(
+                options,
+                dictionary_handling != DictionaryHandling::Resend,
+            ),
             app_metadata: Some(app_metadata),
             queue: VecDeque::new(),
             done: false,
             descriptor,
+            dictionary_handling,
         };
 
         // If schema is known up front, enqueue it immediately
@@ -242,7 +301,8 @@ impl FlightDataEncoder {
     fn encode_schema(&mut self, schema: &SchemaRef) -> SchemaRef {
         // The first message is the schema message, and all
         // batches have the same schema
-        let schema = Arc::new(prepare_schema_for_flight(schema));
+        let send_dictionaries = self.dictionary_handling == DictionaryHandling::Resend;
+        let schema = Arc::new(prepare_schema_for_flight(schema, send_dictionaries));
         let mut schema_flight_data = self.encoder.encode_schema(&schema);
 
         // attach any metadata requested
@@ -264,11 +324,11 @@ impl FlightDataEncoder {
         };
 
         // encode the batch
-        let batch = prepare_batch_for_flight(&batch, schema)?;
+        let send_dictionaries = self.dictionary_handling == DictionaryHandling::Resend;
+        let batch = prepare_batch_for_flight(&batch, schema, send_dictionaries)?;
 
         for batch in split_batch_for_grpc_response(batch, self.max_flight_data_size) {
-            let (flight_dictionaries, flight_batch) =
-                self.encoder.encode_batch(&batch)?;
+            let (flight_dictionaries, flight_batch) = self.encoder.encode_batch(&batch)?;
 
             self.queue_messages(flight_dictionaries);
             self.queue_message(flight_batch);
@@ -325,17 +385,46 @@ impl Stream for FlightDataEncoder {
     }
 }
 
+/// Defines how a [`FlightDataEncoder`] encodes [`DictionaryArray`]s
+///
+/// [`DictionaryArray`]: arrow_array::DictionaryArray
+#[derive(Debug, PartialEq)]
+pub enum DictionaryHandling {
+    /// Expands to the underlying type (default). This likely sends more data
+    /// over the network but requires less memory (dictionaries are not tracked)
+    /// and is more compatible with other arrow flight client implementations
+    /// that may not support `DictionaryEncoding`
+    ///
+    /// An IPC response, streaming or otherwise, defines its schema up front
+    /// which defines the mapping from dictionary IDs. It then sends these
+    /// dictionaries over the wire.
+    ///
+    /// This requires identifying the different dictionaries in use, assigning
+    /// them IDs, and sending new dictionaries, delta or otherwise, when needed
+    ///
+    /// See also:
+    /// * <https://github.com/apache/arrow-rs/issues/1206>
+    Hydrate,
+    /// Send dictionary FlightData with every RecordBatch that contains a
+    /// [`DictionaryArray`]. See [`Self::Hydrate`] for more tradeoffs. No
+    /// attempt is made to skip sending the same (logical) dictionary values
+    /// twice.
+    ///
+    /// [`DictionaryArray`]: arrow_array::DictionaryArray
+    Resend,
+}
+
 /// Prepare an arrow Schema for transport over the Arrow Flight protocol
 ///
 /// Convert dictionary types to underlying types
 ///
 /// See hydrate_dictionary for more information
-fn prepare_schema_for_flight(schema: &Schema) -> Schema {
+fn prepare_schema_for_flight(schema: &Schema, send_dictionaries: bool) -> Schema {
     let fields: Fields = schema
         .fields()
         .iter()
         .map(|field| match field.data_type() {
-            DataType::Dictionary(_, value_type) => Field::new(
+            DataType::Dictionary(_, value_type) if !send_dictionaries => Field::new(
                 field.name(),
                 value_type.as_ref().clone(),
                 field.is_nullable(),
@@ -364,9 +453,8 @@ fn split_batch_for_grpc_response(
         .map(|col| col.get_buffer_memory_size())
         .sum::<usize>();
 
-    let n_batches = (size / max_flight_data_size
-        + usize::from(size % max_flight_data_size != 0))
-    .max(1);
+    let n_batches =
+        (size / max_flight_data_size + usize::from(size % max_flight_data_size != 0)).max(1);
     let rows_per_batch = (batch.num_rows() / n_batches).max(1);
     let mut out = Vec::with_capacity(n_batches + 1);
 
@@ -394,8 +482,7 @@ struct FlightIpcEncoder {
 }
 
 impl FlightIpcEncoder {
-    fn new(options: IpcWriteOptions) -> Self {
-        let error_on_replacement = true;
+    fn new(options: IpcWriteOptions, error_on_replacement: bool) -> Self {
         Self {
             options,
             data_gen: IpcDataGenerator::default(),
@@ -410,18 +497,12 @@ impl FlightIpcEncoder {
 
     /// Convert a `RecordBatch` to a Vec of `FlightData` representing
     /// dictionaries and a `FlightData` representing the batch
-    fn encode_batch(
-        &mut self,
-        batch: &RecordBatch,
-    ) -> Result<(Vec<FlightData>, FlightData)> {
-        let (encoded_dictionaries, encoded_batch) = self.data_gen.encoded_batch(
-            batch,
-            &mut self.dictionary_tracker,
-            &self.options,
-        )?;
-
-        let flight_dictionaries =
-            encoded_dictionaries.into_iter().map(Into::into).collect();
+    fn encode_batch(&mut self, batch: &RecordBatch) -> Result<(Vec<FlightData>, FlightData)> {
+        let (encoded_dictionaries, encoded_batch) =
+            self.data_gen
+                .encoded_batch(batch, &mut self.dictionary_tracker, &self.options)?;
+
+        let flight_dictionaries = encoded_dictionaries.into_iter().map(Into::into).collect();
         let flight_batch = encoded_batch.into();
 
         Ok((flight_dictionaries, flight_batch))
@@ -438,12 +519,14 @@ impl FlightIpcEncoder {
 fn prepare_batch_for_flight(
     batch: &RecordBatch,
     schema: SchemaRef,
+    send_dictionaries: bool,
 ) -> Result<RecordBatch> {
     let columns = batch
         .columns()
         .iter()
-        .map(hydrate_dictionary)
+        .map(|c| hydrate_dictionary(c, send_dictionaries))
         .collect::<Result<Vec<_>>>()?;
+
     let options = RecordBatchOptions::new().with_row_count(Some(batch.num_rows()));
 
     Ok(RecordBatch::try_new_with_options(
@@ -451,35 +534,26 @@ fn prepare_batch_for_flight(
     )?)
 }
 
-/// Hydrates a dictionary to its underlying type
-///
-/// An IPC response, streaming or otherwise, defines its schema up front
-/// which defines the mapping from dictionary IDs. It then sends these
-/// dictionaries over the wire.
-///
-/// This requires identifying the different dictionaries in use, assigning
-/// them IDs, and sending new dictionaries, delta or otherwise, when needed
-///
-/// See also:
-/// * <https://github.com/apache/arrow-rs/issues/1206>
-///
-/// For now we just hydrate the dictionaries to their underlying type
-fn hydrate_dictionary(array: &ArrayRef) -> Result<ArrayRef> {
-    let arr = if let DataType::Dictionary(_, value) = array.data_type() {
-        arrow_cast::cast(array, value)?
-    } else {
-        Arc::clone(array)
+/// Hydrates a dictionary to its underlying type if send_dictionaries is false. If send_dictionaries
+/// is true, dictionaries are sent with every batch which is not as optimal as described in [DictionaryHandling::Hydrate] above,
+/// but does enable sending DictionaryArray's via Flight.
+fn hydrate_dictionary(array: &ArrayRef, send_dictionaries: bool) -> Result<ArrayRef> {
+    let arr = match array.data_type() {
+        DataType::Dictionary(_, value) if !send_dictionaries => arrow_cast::cast(array, value)?,
+        _ => Arc::clone(array),
     };
     Ok(arr)
 }
 
 #[cfg(test)]
 mod tests {
-    use arrow_array::types::*;
     use arrow_array::*;
+    use arrow_array::{cast::downcast_array, types::*};
     use arrow_cast::pretty::pretty_format_batches;
     use std::collections::HashMap;
 
+    use crate::decode::{DecodedPayload, FlightDataDecoder};
+
     use super::*;
 
     #[test]
@@ -496,11 +570,9 @@ mod tests {
         let (_, baseline_flight_batch) = make_flight_data(&batch, &options);
 
         let big_batch = batch.slice(0, batch.num_rows() - 1);
-        let optimized_big_batch =
-            prepare_batch_for_flight(&big_batch, Arc::clone(&schema))
-                .expect("failed to optimize");
-        let (_, optimized_big_flight_batch) =
-            make_flight_data(&optimized_big_batch, &options);
+        let optimized_big_batch = prepare_batch_for_flight(&big_batch, Arc::clone(&schema), false)
+            .expect("failed to optimize");
+        let (_, optimized_big_flight_batch) = make_flight_data(&optimized_big_batch, &options);
 
         assert_eq!(
             baseline_flight_batch.data_body.len(),
@@ -509,25 +581,96 @@ mod tests {
 
         let small_batch = batch.slice(0, 1);
         let optimized_small_batch =
-            prepare_batch_for_flight(&small_batch, Arc::clone(&schema))
+            prepare_batch_for_flight(&small_batch, Arc::clone(&schema), false)
                 .expect("failed to optimize");
-        let (_, optimized_small_flight_batch) =
-            make_flight_data(&optimized_small_batch, &options);
+        let (_, optimized_small_flight_batch) = make_flight_data(&optimized_small_batch, &options);
 
         assert!(
-            baseline_flight_batch.data_body.len()
-                > optimized_small_flight_batch.data_body.len()
+            baseline_flight_batch.data_body.len() > optimized_small_flight_batch.data_body.len()
         );
     }
 
+    #[tokio::test]
+    async fn test_dictionary_hydration() {
+        let arr: DictionaryArray<UInt16Type> = vec!["a", "a", "b"].into_iter().collect();
+        let schema = Arc::new(Schema::new(vec![Field::new_dictionary(
+            "dict",
+            DataType::UInt16,
+            DataType::Utf8,
+            false,
+        )]));
+        let batch = RecordBatch::try_new(schema, vec![Arc::new(arr)]).unwrap();
+        let encoder =
+            FlightDataEncoderBuilder::default().build(futures::stream::once(async { Ok(batch) }));
+        let mut decoder = FlightDataDecoder::new(encoder);
+        let expected_schema = Schema::new(vec![Field::new("dict", DataType::Utf8, false)]);
+        let expected_schema = Arc::new(expected_schema);
+        while let Some(decoded) = decoder.next().await {
+            let decoded = decoded.unwrap();
+            match decoded.payload {
+                DecodedPayload::None => {}
+                DecodedPayload::Schema(s) => assert_eq!(s, expected_schema),
+                DecodedPayload::RecordBatch(b) => {
+                    assert_eq!(b.schema(), expected_schema);
+                    let expected_array = StringArray::from(vec!["a", "a", "b"]);
+                    let actual_array = b.column_by_name("dict").unwrap();
+                    let actual_array = downcast_array::<StringArray>(actual_array);
+
+                    assert_eq!(actual_array, expected_array);
+                }
+            }
+        }
+    }
+
+    #[tokio::test]
+    async fn test_send_dictionaries() {
+        let schema = Arc::new(Schema::new(vec![Field::new_dictionary(
+            "dict",
+            DataType::UInt16,
+            DataType::Utf8,
+            false,
+        )]));
+
+        let arr_one: Arc<DictionaryArray<UInt16Type>> =
+            Arc::new(vec!["a", "a", "b"].into_iter().collect());
+        let arr_two: Arc<DictionaryArray<UInt16Type>> =
+            Arc::new(vec!["b", "a", "c"].into_iter().collect());
+        let batch_one = RecordBatch::try_new(schema.clone(), vec![arr_one.clone()]).unwrap();
+        let batch_two = RecordBatch::try_new(schema.clone(), vec![arr_two.clone()]).unwrap();
+
+        let encoder = FlightDataEncoderBuilder::default()
+            .with_dictionary_handling(DictionaryHandling::Resend)
+            .build(futures::stream::iter(vec![Ok(batch_one), Ok(batch_two)]));
+
+        let mut decoder = FlightDataDecoder::new(encoder);
+        let mut expected_array = arr_one;
+        while let Some(decoded) = decoder.next().await {
+            let decoded = decoded.unwrap();
+            match decoded.payload {
+                DecodedPayload::None => {}
+                DecodedPayload::Schema(s) => assert_eq!(s, schema),
+                DecodedPayload::RecordBatch(b) => {
+                    assert_eq!(b.schema(), schema);
+
+                    let actual_array = Arc::new(downcast_array::<DictionaryArray<UInt16Type>>(
+                        b.column_by_name("dict").unwrap(),
+                    ));
+
+                    assert_eq!(actual_array, expected_array);
+
+                    expected_array = arr_two.clone();
+                }
+            }
+        }
+    }
+
     #[test]
     fn test_schema_metadata_encoded() {
-        let schema =
-            Schema::new(vec![Field::new("data", DataType::Int32, false)]).with_metadata(
-                HashMap::from([("some_key".to_owned(), "some_value".to_owned())]),
-            );
+        let schema = Schema::new(vec![Field::new("data", DataType::Int32, false)]).with_metadata(
+            HashMap::from([("some_key".to_owned(), "some_value".to_owned())]),
+        );
 
-        let got = prepare_schema_for_flight(&schema);
+        let got = prepare_schema_for_flight(&schema, false);
         assert!(got.metadata().contains_key("some_key"));
     }
 
@@ -540,7 +683,7 @@ mod tests {
         )
         .expect("cannot create record batch");
 
-        prepare_batch_for_flight(&batch, batch.schema()).expect("failed to optimize");
+        prepare_batch_for_flight(&batch, batch.schema(), false).expect("failed to optimize");
     }
 
     pub fn make_flight_data(
@@ -554,8 +697,7 @@ mod tests {
             .encoded_batch(batch, &mut dictionary_tracker, options)
             .expect("DictionaryTracker configured above to not error on replacement");
 
-        let flight_dictionaries =
-            encoded_dictionaries.into_iter().map(Into::into).collect();
+        let flight_dictionaries = encoded_dictionaries.into_iter().map(Into::into).collect();
         let flight_batch = encoded_batch.into();
 
         (flight_dictionaries, flight_batch)
@@ -576,8 +718,7 @@ mod tests {
         // split once
         let n_rows = max_flight_data_size + 1;
         assert!(n_rows % 2 == 1, "should be an odd number");
-        let c =
-            UInt8Array::from((0..n_rows).map(|i| (i % 256) as u8).collect::<Vec<_>>());
+        let c = UInt8Array::from((0..n_rows).map(|i| (i % 256) as u8).collect::<Vec<_>>());
         let batch = RecordBatch::try_from_iter(vec![("a", Arc::new(c) as ArrayRef)])
             .expect("cannot create record batch");
         let split = split_batch_for_grpc_response(batch.clone(), max_flight_data_size);
@@ -624,8 +765,7 @@ mod tests {
 
         let input_rows = batch.num_rows();
 
-        let split =
-            split_batch_for_grpc_response(batch.clone(), max_flight_data_size_bytes);
+        let split = split_batch_for_grpc_response(batch.clone(), max_flight_data_size_bytes);
         let sizes: Vec<_> = split.iter().map(|batch| batch.num_rows()).collect();
         let output_rows: usize = sizes.iter().sum();
 
@@ -638,8 +778,7 @@ mod tests {
 
     #[tokio::test]
     async fn flight_data_size_even() {
-        let s1 =
-            StringArray::from_iter_values(std::iter::repeat(".10 bytes.").take(1024));
+        let s1 = StringArray::from_iter_values(std::iter::repeat(".10 bytes.").take(1024));
         let i1 = Int16Array::from_iter_values(0..1024);
         let s2 = StringArray::from_iter_values(std::iter::repeat("6bytes").take(1024));
         let i2 = Int64Array::from_iter_values(0..1024);
@@ -659,8 +798,7 @@ mod tests {
     async fn flight_data_size_uneven_variable_lengths() {
         // each row has a longer string than the last with increasing lengths 0 --> 1024
         let array = StringArray::from_iter_values((0..1024).map(|i| "*".repeat(i)));
-        let batch =
-            RecordBatch::try_from_iter(vec![("data", Arc::new(array) as _)]).unwrap();
+        let batch = RecordBatch::try_from_iter(vec![("data", Arc::new(array) as _)]).unwrap();
 
         // overage is much higher than ideal
         // https://github.com/apache/arrow-rs/issues/3478
@@ -714,8 +852,7 @@ mod tests {
             })
             .collect();
 
-        let batch =
-            RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
+        let batch = RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
 
         verify_encoded_split(batch, 160).await;
     }
@@ -725,11 +862,9 @@ mod tests {
         // large dictionary (all distinct values ==> 1024 entries in dictionary)
         let values: Vec<_> = (1..1024).map(|i| "**".repeat(i)).collect();
 
-        let array: DictionaryArray<Int32Type> =
-            values.iter().map(|s| Some(s.as_str())).collect();
+        let array: DictionaryArray<Int32Type> = values.iter().map(|s| Some(s.as_str())).collect();
 
-        let batch =
-            RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
+        let batch = RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
 
         // overage is much higher than ideal
         // https://github.com/apache/arrow-rs/issues/3478
@@ -743,8 +878,7 @@ mod tests {
         let keys = Int32Array::from_iter_values((0..3000).map(|i| (3000 - i) % 1024));
         let array = DictionaryArray::new(keys, Arc::new(values));
 
-        let batch =
-            RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
+        let batch = RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
 
         // overage is much higher than ideal
         // https://github.com/apache/arrow-rs/issues/3478
@@ -760,12 +894,9 @@ mod tests {
         // medium cardinality
         let values3: Vec<_> = (1..1024).map(|i| "**".repeat(i % 100)).collect();
 
-        let array1: DictionaryArray<Int32Type> =
-            values1.iter().map(|s| Some(s.as_str())).collect();
-        let array2: DictionaryArray<Int32Type> =
-            values2.iter().map(|s| Some(s.as_str())).collect();
-        let array3: DictionaryArray<Int32Type> =
-            values3.iter().map(|s| Some(s.as_str())).collect();
+        let array1: DictionaryArray<Int32Type> = values1.iter().map(|s| Some(s.as_str())).collect();
+        let array2: DictionaryArray<Int32Type> = values2.iter().map(|s| Some(s.as_str())).collect();
+        let array3: DictionaryArray<Int32Type> = values3.iter().map(|s| Some(s.as_str())).collect();
 
         let batch = RecordBatch::try_from_iter(vec![
             ("a1", Arc::new(array1) as _),
@@ -785,17 +916,13 @@ mod tests {
             .flight_descriptor
             .as_ref()
             .map(|descriptor| {
-                let path_len: usize =
-                    descriptor.path.iter().map(|p| p.as_bytes().len()).sum();
+                let path_len: usize = descriptor.path.iter().map(|p| p.as_bytes().len()).sum();
 
                 std::mem::size_of_val(descriptor) + descriptor.cmd.len() + path_len
             })
             .unwrap_or(0);
 
-        flight_descriptor_size
-            + d.app_metadata.len()
-            + d.data_body.len()
-            + d.data_header.len()
+        flight_descriptor_size + d.app_metadata.len() + d.data_body.len() + d.data_header.len()
     }
 
     /// Coverage for <https://github.com/apache/arrow-rs/issues/3478>
diff --git a/arrow-flight/src/lib.rs b/arrow-flight/src/lib.rs
index 04edf266389c..8d05f658703a 100644
--- a/arrow-flight/src/lib.rs
+++ b/arrow-flight/src/lib.rs
@@ -133,10 +133,7 @@ pub struct IpcMessage(pub Bytes);
 
 // Useful conversion functions
 
-fn flight_schema_as_encoded_data(
-    arrow_schema: &Schema,
-    options: &IpcWriteOptions,
-) -> EncodedData {
+fn flight_schema_as_encoded_data(arrow_schema: &Schema, options: &IpcWriteOptions) -> EncodedData {
     let data_gen = writer::IpcDataGenerator::default();
     data_gen.schema_to_bytes(arrow_schema, options)
 }
@@ -316,16 +313,6 @@ impl TryFrom<SchemaAsIpc<'_>> for SchemaResult {
     }
 }
 
-// TryFrom...
-
-impl TryFrom<i32> for DescriptorType {
-    type Error = ArrowError;
-
-    fn try_from(value: i32) -> ArrowResult<Self> {
-        value.try_into()
-    }
-}
-
 impl TryFrom<SchemaAsIpc<'_>> for IpcMessage {
     type Error = ArrowError;
 
diff --git a/arrow-flight/src/sql/arrow.flight.protocol.sql.rs b/arrow-flight/src/sql/arrow.flight.protocol.sql.rs
index b2137d8543d3..c7c23311e61e 100644
--- a/arrow-flight/src/sql/arrow.flight.protocol.sql.rs
+++ b/arrow-flight/src/sql/arrow.flight.protocol.sql.rs
@@ -1077,10 +1077,10 @@ pub enum SqlInfo {
     /// The returned bitmask should be parsed in order to retrieve the supported commands.
     ///
     /// For instance:
-    /// - return 0 (\b0)   => [] (GROUP BY is unsupported);
+    /// - return 0 (\b0)   => \[\] (GROUP BY is unsupported);
     /// - return 1 (\b1)   => \[SQL_GROUP_BY_UNRELATED\];
     /// - return 2 (\b10)  => \[SQL_GROUP_BY_BEYOND_SELECT\];
-    /// - return 3 (\b11)  => [SQL_GROUP_BY_UNRELATED, SQL_GROUP_BY_BEYOND_SELECT].
+    /// - return 3 (\b11)  => \[SQL_GROUP_BY_UNRELATED, SQL_GROUP_BY_BEYOND_SELECT\].
     /// Valid GROUP BY types are described under `arrow.flight.protocol.sql.SqlSupportedGroupBy`.
     SqlSupportedGroupBy = 522,
     ///
@@ -1104,14 +1104,14 @@ pub enum SqlInfo {
     /// The returned bitmask should be parsed in order to retrieve the supported grammar levels.
     ///
     /// For instance:
-    /// - return 0 (\b0)   => [] (SQL grammar is unsupported);
+    /// - return 0 (\b0)   => \[\] (SQL grammar is unsupported);
     /// - return 1 (\b1)   => \[SQL_MINIMUM_GRAMMAR\];
     /// - return 2 (\b10)  => \[SQL_CORE_GRAMMAR\];
-    /// - return 3 (\b11)  => [SQL_MINIMUM_GRAMMAR, SQL_CORE_GRAMMAR];
+    /// - return 3 (\b11)  => \[SQL_MINIMUM_GRAMMAR, SQL_CORE_GRAMMAR\];
     /// - return 4 (\b100) => \[SQL_EXTENDED_GRAMMAR\];
-    /// - return 5 (\b101) => [SQL_MINIMUM_GRAMMAR, SQL_EXTENDED_GRAMMAR];
-    /// - return 6 (\b110) => [SQL_CORE_GRAMMAR, SQL_EXTENDED_GRAMMAR];
-    /// - return 7 (\b111) => [SQL_MINIMUM_GRAMMAR, SQL_CORE_GRAMMAR, SQL_EXTENDED_GRAMMAR].
+    /// - return 5 (\b101) => \[SQL_MINIMUM_GRAMMAR, SQL_EXTENDED_GRAMMAR\];
+    /// - return 6 (\b110) => \[SQL_CORE_GRAMMAR, SQL_EXTENDED_GRAMMAR\];
+    /// - return 7 (\b111) => \[SQL_MINIMUM_GRAMMAR, SQL_CORE_GRAMMAR, SQL_EXTENDED_GRAMMAR\].
     /// Valid SQL grammar levels are described under `arrow.flight.protocol.sql.SupportedSqlGrammar`.
     SqlSupportedGrammar = 525,
     ///
@@ -1121,14 +1121,14 @@ pub enum SqlInfo {
     /// The returned bitmask should be parsed in order to retrieve the supported commands.
     ///
     /// For instance:
-    /// - return 0 (\b0)   => [] (ANSI92 SQL grammar is unsupported);
+    /// - return 0 (\b0)   => \[\] (ANSI92 SQL grammar is unsupported);
     /// - return 1 (\b1)   => \[ANSI92_ENTRY_SQL\];
     /// - return 2 (\b10)  => \[ANSI92_INTERMEDIATE_SQL\];
-    /// - return 3 (\b11)  => [ANSI92_ENTRY_SQL, ANSI92_INTERMEDIATE_SQL];
+    /// - return 3 (\b11)  => \[ANSI92_ENTRY_SQL, ANSI92_INTERMEDIATE_SQL\];
     /// - return 4 (\b100) => \[ANSI92_FULL_SQL\];
-    /// - return 5 (\b101) => [ANSI92_ENTRY_SQL, ANSI92_FULL_SQL];
-    /// - return 6 (\b110) => [ANSI92_INTERMEDIATE_SQL, ANSI92_FULL_SQL];
-    /// - return 7 (\b111) => [ANSI92_ENTRY_SQL, ANSI92_INTERMEDIATE_SQL, ANSI92_FULL_SQL].
+    /// - return 5 (\b101) => \[ANSI92_ENTRY_SQL, ANSI92_FULL_SQL\];
+    /// - return 6 (\b110) => \[ANSI92_INTERMEDIATE_SQL, ANSI92_FULL_SQL\];
+    /// - return 7 (\b111) => \[ANSI92_ENTRY_SQL, ANSI92_INTERMEDIATE_SQL, ANSI92_FULL_SQL\].
     /// Valid ANSI92 SQL grammar levels are described under `arrow.flight.protocol.sql.SupportedAnsi92SqlGrammarLevel`.
     SqlAnsi92SupportedLevel = 526,
     ///
@@ -1165,14 +1165,14 @@ pub enum SqlInfo {
     /// The returned bitmask should be parsed in order to retrieve the supported actions for a SQL schema.
     ///
     /// For instance:
-    /// - return 0 (\b0)   => [] (no supported actions for SQL schema);
+    /// - return 0 (\b0)   => \[\] (no supported actions for SQL schema);
     /// - return 1 (\b1)   => \[SQL_ELEMENT_IN_PROCEDURE_CALLS\];
     /// - return 2 (\b10)  => \[SQL_ELEMENT_IN_INDEX_DEFINITIONS\];
-    /// - return 3 (\b11)  => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS];
+    /// - return 3 (\b11)  => \[SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS\];
     /// - return 4 (\b100) => \[SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS\];
-    /// - return 5 (\b101) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS];
-    /// - return 6 (\b110) => [SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS];
-    /// - return 7 (\b111) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS].
+    /// - return 5 (\b101) => \[SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS\];
+    /// - return 6 (\b110) => \[SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS\];
+    /// - return 7 (\b111) => \[SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS\].
     /// Valid actions for a SQL schema described under `arrow.flight.protocol.sql.SqlSupportedElementActions`.
     SqlSchemasSupportedActions = 533,
     ///
@@ -1182,14 +1182,14 @@ pub enum SqlInfo {
     /// The returned bitmask should be parsed in order to retrieve the supported actions for a SQL catalog.
     ///
     /// For instance:
-    /// - return 0 (\b0)   => [] (no supported actions for SQL catalog);
+    /// - return 0 (\b0)   => \[\] (no supported actions for SQL catalog);
     /// - return 1 (\b1)   => \[SQL_ELEMENT_IN_PROCEDURE_CALLS\];
     /// - return 2 (\b10)  => \[SQL_ELEMENT_IN_INDEX_DEFINITIONS\];
-    /// - return 3 (\b11)  => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS];
+    /// - return 3 (\b11)  => \[SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS\];
     /// - return 4 (\b100) => \[SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS\];
-    /// - return 5 (\b101) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS];
-    /// - return 6 (\b110) => [SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS];
-    /// - return 7 (\b111) => [SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS].
+    /// - return 5 (\b101) => \[SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS\];
+    /// - return 6 (\b110) => \[SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS\];
+    /// - return 7 (\b111) => \[SQL_ELEMENT_IN_PROCEDURE_CALLS, SQL_ELEMENT_IN_INDEX_DEFINITIONS, SQL_ELEMENT_IN_PRIVILEGE_DEFINITIONS\].
     /// Valid actions for a SQL catalog are described under `arrow.flight.protocol.sql.SqlSupportedElementActions`.
     SqlCatalogsSupportedActions = 534,
     ///
@@ -1199,10 +1199,10 @@ pub enum SqlInfo {
     /// The returned bitmask should be parsed in order to retrieve the supported SQL positioned commands.
     ///
     /// For instance:
-    /// - return 0 (\b0)   => [] (no supported SQL positioned commands);
+    /// - return 0 (\b0)   => \[\] (no supported SQL positioned commands);
     /// - return 1 (\b1)   => \[SQL_POSITIONED_DELETE\];
     /// - return 2 (\b10)  => \[SQL_POSITIONED_UPDATE\];
-    /// - return 3 (\b11)  => [SQL_POSITIONED_DELETE, SQL_POSITIONED_UPDATE].
+    /// - return 3 (\b11)  => \[SQL_POSITIONED_DELETE, SQL_POSITIONED_UPDATE\].
     /// Valid SQL positioned commands are described under `arrow.flight.protocol.sql.SqlSupportedPositionedCommands`.
     SqlSupportedPositionedCommands = 535,
     ///
@@ -1227,22 +1227,22 @@ pub enum SqlInfo {
     /// The returned bitmask should be parsed in order to retrieve the supported SQL subqueries.
     ///
     /// For instance:
-    /// - return 0   (\b0)     => [] (no supported SQL subqueries);
+    /// - return 0   (\b0)     => \[\] (no supported SQL subqueries);
     /// - return 1   (\b1)     => \[SQL_SUBQUERIES_IN_COMPARISONS\];
     /// - return 2   (\b10)    => \[SQL_SUBQUERIES_IN_EXISTS\];
-    /// - return 3   (\b11)    => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS];
+    /// - return 3   (\b11)    => \[SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS\];
     /// - return 4   (\b100)   => \[SQL_SUBQUERIES_IN_INS\];
-    /// - return 5   (\b101)   => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_INS];
-    /// - return 6   (\b110)   => [SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_EXISTS];
-    /// - return 7   (\b111)   => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_INS];
+    /// - return 5   (\b101)   => \[SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_INS\];
+    /// - return 6   (\b110)   => \[SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_EXISTS\];
+    /// - return 7   (\b111)   => \[SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_INS\];
     /// - return 8   (\b1000)  => \[SQL_SUBQUERIES_IN_QUANTIFIEDS\];
-    /// - return 9   (\b1001)  => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_QUANTIFIEDS];
-    /// - return 10  (\b1010)  => [SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_QUANTIFIEDS];
-    /// - return 11  (\b1011)  => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_QUANTIFIEDS];
-    /// - return 12  (\b1100)  => [SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS];
-    /// - return 13  (\b1101)  => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS];
-    /// - return 14  (\b1110)  => [SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS];
-    /// - return 15  (\b1111)  => [SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS];
+    /// - return 9   (\b1001)  => \[SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_QUANTIFIEDS\];
+    /// - return 10  (\b1010)  => \[SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_QUANTIFIEDS\];
+    /// - return 11  (\b1011)  => \[SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_QUANTIFIEDS\];
+    /// - return 12  (\b1100)  => \[SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS\];
+    /// - return 13  (\b1101)  => \[SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS\];
+    /// - return 14  (\b1110)  => \[SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS\];
+    /// - return 15  (\b1111)  => \[SQL_SUBQUERIES_IN_COMPARISONS, SQL_SUBQUERIES_IN_EXISTS, SQL_SUBQUERIES_IN_INS, SQL_SUBQUERIES_IN_QUANTIFIEDS\];
     /// - ...
     /// Valid SQL subqueries are described under `arrow.flight.protocol.sql.SqlSupportedSubqueries`.
     SqlSupportedSubqueries = 538,
@@ -1260,10 +1260,10 @@ pub enum SqlInfo {
     /// The returned bitmask should be parsed in order to retrieve the supported SQL UNIONs.
     ///
     /// For instance:
-    /// - return 0 (\b0)   => [] (no supported SQL positioned commands);
+    /// - return 0 (\b0)   => \[\] (no supported SQL positioned commands);
     /// - return 1 (\b1)   => \[SQL_UNION\];
     /// - return 2 (\b10)  => \[SQL_UNION_ALL\];
-    /// - return 3 (\b11)  => [SQL_UNION, SQL_UNION_ALL].
+    /// - return 3 (\b11)  => \[SQL_UNION, SQL_UNION_ALL\].
     /// Valid SQL positioned commands are described under `arrow.flight.protocol.sql.SqlSupportedUnions`.
     SqlSupportedUnions = 540,
     /// Retrieves a int64 value representing the maximum number of hex characters allowed in an inline binary literal.
@@ -1341,22 +1341,22 @@ pub enum SqlInfo {
     /// The returned bitmask should be parsed in order to retrieve the supported transactions isolation levels.
     ///
     /// For instance:
-    /// - return 0   (\b0)     => [] (no supported SQL transactions isolation levels);
+    /// - return 0   (\b0)     => \[\] (no supported SQL transactions isolation levels);
     /// - return 1   (\b1)     => \[SQL_TRANSACTION_NONE\];
     /// - return 2   (\b10)    => \[SQL_TRANSACTION_READ_UNCOMMITTED\];
-    /// - return 3   (\b11)    => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED];
+    /// - return 3   (\b11)    => \[SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED\];
     /// - return 4   (\b100)   => \[SQL_TRANSACTION_REPEATABLE_READ\];
-    /// - return 5   (\b101)   => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_REPEATABLE_READ];
-    /// - return 6   (\b110)   => [SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ];
-    /// - return 7   (\b111)   => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ];
+    /// - return 5   (\b101)   => \[SQL_TRANSACTION_NONE, SQL_TRANSACTION_REPEATABLE_READ\];
+    /// - return 6   (\b110)   => \[SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ\];
+    /// - return 7   (\b111)   => \[SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ\];
     /// - return 8   (\b1000)  => \[SQL_TRANSACTION_REPEATABLE_READ\];
-    /// - return 9   (\b1001)  => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_REPEATABLE_READ];
-    /// - return 10  (\b1010)  => [SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ];
-    /// - return 11  (\b1011)  => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ];
-    /// - return 12  (\b1100)  => [SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ];
-    /// - return 13  (\b1101)  => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ];
-    /// - return 14  (\b1110)  => [SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ];
-    /// - return 15  (\b1111)  => [SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ];
+    /// - return 9   (\b1001)  => \[SQL_TRANSACTION_NONE, SQL_TRANSACTION_REPEATABLE_READ\];
+    /// - return 10  (\b1010)  => \[SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ\];
+    /// - return 11  (\b1011)  => \[SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ\];
+    /// - return 12  (\b1100)  => \[SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ\];
+    /// - return 13  (\b1101)  => \[SQL_TRANSACTION_NONE, SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ\];
+    /// - return 14  (\b1110)  => \[SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ\];
+    /// - return 15  (\b1111)  => \[SQL_TRANSACTION_NONE, SQL_TRANSACTION_READ_UNCOMMITTED, SQL_TRANSACTION_REPEATABLE_READ, SQL_TRANSACTION_REPEATABLE_READ\];
     /// - return 16  (\b10000) => \[SQL_TRANSACTION_SERIALIZABLE\];
     /// - ...
     /// Valid SQL positioned commands are described under `arrow.flight.protocol.sql.SqlTransactionIsolationLevel`.
@@ -1381,14 +1381,14 @@ pub enum SqlInfo {
     /// The returned bitmask should be parsed in order to retrieve the supported result set types.
     ///
     /// For instance:
-    /// - return 0   (\b0)     => [] (no supported result set types);
+    /// - return 0   (\b0)     => \[\] (no supported result set types);
     /// - return 1   (\b1)     => \[SQL_RESULT_SET_TYPE_UNSPECIFIED\];
     /// - return 2   (\b10)    => \[SQL_RESULT_SET_TYPE_FORWARD_ONLY\];
-    /// - return 3   (\b11)    => [SQL_RESULT_SET_TYPE_UNSPECIFIED, SQL_RESULT_SET_TYPE_FORWARD_ONLY];
+    /// - return 3   (\b11)    => \[SQL_RESULT_SET_TYPE_UNSPECIFIED, SQL_RESULT_SET_TYPE_FORWARD_ONLY\];
     /// - return 4   (\b100)   => \[SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE\];
-    /// - return 5   (\b101)   => [SQL_RESULT_SET_TYPE_UNSPECIFIED, SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE];
-    /// - return 6   (\b110)   => [SQL_RESULT_SET_TYPE_FORWARD_ONLY, SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE];
-    /// - return 7   (\b111)   => [SQL_RESULT_SET_TYPE_UNSPECIFIED, SQL_RESULT_SET_TYPE_FORWARD_ONLY, SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE];
+    /// - return 5   (\b101)   => \[SQL_RESULT_SET_TYPE_UNSPECIFIED, SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE\];
+    /// - return 6   (\b110)   => \[SQL_RESULT_SET_TYPE_FORWARD_ONLY, SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE\];
+    /// - return 7   (\b111)   => \[SQL_RESULT_SET_TYPE_UNSPECIFIED, SQL_RESULT_SET_TYPE_FORWARD_ONLY, SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE\];
     /// - return 8   (\b1000)  => \[SQL_RESULT_SET_TYPE_SCROLL_SENSITIVE\];
     /// - ...
     /// Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetType`.
@@ -1398,14 +1398,14 @@ pub enum SqlInfo {
     /// `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_UNSPECIFIED`.
     ///
     /// For instance:
-    /// - return 0 (\b0)   => [] (no supported concurrency types for this result set type)
+    /// - return 0 (\b0)   => \[\] (no supported concurrency types for this result set type)
     /// - return 1 (\b1)   => \[SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED\]
     /// - return 2 (\b10)  => \[SQL_RESULT_SET_CONCURRENCY_READ_ONLY\]
-    /// - return 3 (\b11)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY]
+    /// - return 3 (\b11)  => \[SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY\]
     /// - return 4 (\b100) => \[SQL_RESULT_SET_CONCURRENCY_UPDATABLE\]
-    /// - return 5 (\b101) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
-    /// - return 6 (\b110)  => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
-    /// - return 7 (\b111)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+    /// - return 5 (\b101) => \[SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE\]
+    /// - return 6 (\b110)  => \[SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE\]
+    /// - return 7 (\b111)  => \[SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE\]
     /// Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`.
     SqlSupportedConcurrenciesForResultSetUnspecified = 568,
     ///
@@ -1413,14 +1413,14 @@ pub enum SqlInfo {
     /// `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_FORWARD_ONLY`.
     ///
     /// For instance:
-    /// - return 0 (\b0)   => [] (no supported concurrency types for this result set type)
+    /// - return 0 (\b0)   => \[\] (no supported concurrency types for this result set type)
     /// - return 1 (\b1)   => \[SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED\]
     /// - return 2 (\b10)  => \[SQL_RESULT_SET_CONCURRENCY_READ_ONLY\]
-    /// - return 3 (\b11)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY]
+    /// - return 3 (\b11)  => \[SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY\]
     /// - return 4 (\b100) => \[SQL_RESULT_SET_CONCURRENCY_UPDATABLE\]
-    /// - return 5 (\b101) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
-    /// - return 6 (\b110)  => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
-    /// - return 7 (\b111)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+    /// - return 5 (\b101) => \[SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE\]
+    /// - return 6 (\b110)  => \[SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE\]
+    /// - return 7 (\b111)  => \[SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE\]
     /// Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`.
     SqlSupportedConcurrenciesForResultSetForwardOnly = 569,
     ///
@@ -1428,14 +1428,14 @@ pub enum SqlInfo {
     /// `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_SCROLL_SENSITIVE`.
     ///
     /// For instance:
-    /// - return 0 (\b0)   => [] (no supported concurrency types for this result set type)
+    /// - return 0 (\b0)   => \[\] (no supported concurrency types for this result set type)
     /// - return 1 (\b1)   => \[SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED\]
     /// - return 2 (\b10)  => \[SQL_RESULT_SET_CONCURRENCY_READ_ONLY\]
-    /// - return 3 (\b11)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY]
+    /// - return 3 (\b11)  => \[SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY\]
     /// - return 4 (\b100) => \[SQL_RESULT_SET_CONCURRENCY_UPDATABLE\]
-    /// - return 5 (\b101) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
-    /// - return 6 (\b110)  => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
-    /// - return 7 (\b111)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+    /// - return 5 (\b101) => \[SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE\]
+    /// - return 6 (\b110)  => \[SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE\]
+    /// - return 7 (\b111)  => \[SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE\]
     /// Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`.
     SqlSupportedConcurrenciesForResultSetScrollSensitive = 570,
     ///
@@ -1443,14 +1443,14 @@ pub enum SqlInfo {
     /// `arrow.flight.protocol.sql.SqlSupportedResultSetType.SQL_RESULT_SET_TYPE_SCROLL_INSENSITIVE`.
     ///
     /// For instance:
-    /// - return 0 (\b0)   => [] (no supported concurrency types for this result set type)
+    /// - return 0 (\b0)   => \[\] (no supported concurrency types for this result set type)
     /// - return 1 (\b1)   => \[SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED\]
     /// - return 2 (\b10)  => \[SQL_RESULT_SET_CONCURRENCY_READ_ONLY\]
-    /// - return 3 (\b11)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY]
+    /// - return 3 (\b11)  => \[SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY\]
     /// - return 4 (\b100) => \[SQL_RESULT_SET_CONCURRENCY_UPDATABLE\]
-    /// - return 5 (\b101) => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
-    /// - return 6 (\b110)  => [SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
-    /// - return 7 (\b111)  => [SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE]
+    /// - return 5 (\b101) => \[SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_UPDATABLE\]
+    /// - return 6 (\b110)  => \[SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE\]
+    /// - return 7 (\b111)  => \[SQL_RESULT_SET_CONCURRENCY_UNSPECIFIED, SQL_RESULT_SET_CONCURRENCY_READ_ONLY, SQL_RESULT_SET_CONCURRENCY_UPDATABLE\]
     /// Valid result set types are described under `arrow.flight.protocol.sql.SqlSupportedResultSetConcurrency`.
     SqlSupportedConcurrenciesForResultSetScrollInsensitive = 571,
     ///
diff --git a/arrow-flight/src/sql/client.rs b/arrow-flight/src/sql/client.rs
index 4b1f38ebcbb7..133df5b044cf 100644
--- a/arrow-flight/src/sql/client.rs
+++ b/arrow-flight/src/sql/client.rs
@@ -24,20 +24,23 @@ use std::collections::HashMap;
 use std::str::FromStr;
 use tonic::metadata::AsciiMetadataKey;
 
+use crate::decode::FlightRecordBatchStream;
+use crate::encode::FlightDataEncoderBuilder;
+use crate::error::FlightError;
 use crate::flight_service_client::FlightServiceClient;
 use crate::sql::server::{CLOSE_PREPARED_STATEMENT, CREATE_PREPARED_STATEMENT};
 use crate::sql::{
     ActionClosePreparedStatementRequest, ActionCreatePreparedStatementRequest,
-    ActionCreatePreparedStatementResult, Any, CommandGetCatalogs,
-    CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys,
-    CommandGetImportedKeys, CommandGetPrimaryKeys, CommandGetSqlInfo,
-    CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo,
-    CommandPreparedStatementQuery, CommandStatementQuery, CommandStatementUpdate,
-    DoPutUpdateResult, ProstMessageExt, SqlInfo,
+    ActionCreatePreparedStatementResult, Any, CommandGetCatalogs, CommandGetCrossReference,
+    CommandGetDbSchemas, CommandGetExportedKeys, CommandGetImportedKeys, CommandGetPrimaryKeys,
+    CommandGetSqlInfo, CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo,
+    CommandPreparedStatementQuery, CommandPreparedStatementUpdate, CommandStatementQuery,
+    CommandStatementUpdate, DoPutUpdateResult, ProstMessageExt, SqlInfo,
 };
+use crate::trailers::extract_lazy_trailers;
 use crate::{
-    Action, FlightData, FlightDescriptor, FlightInfo, HandshakeRequest,
-    HandshakeResponse, IpcMessage, PutResult, Ticket,
+    Action, FlightData, FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse,
+    IpcMessage, PutResult, Ticket,
 };
 use arrow_array::RecordBatch;
 use arrow_buffer::Buffer;
@@ -130,11 +133,7 @@ impl FlightSqlServiceClient<Channel> {
 
     /// Perform a `handshake` with the server, passing credentials and establishing a session
     /// Returns arbitrary auth/handshake info binary blob
-    pub async fn handshake(
-        &mut self,
-        username: &str,
-        password: &str,
-    ) -> Result<Bytes, ArrowError> {
+    pub async fn handshake(&mut self, username: &str, password: &str) -> Result<Bytes, ArrowError> {
         let cmd = HandshakeRequest {
             protocol_version: 0,
             payload: Default::default(),
@@ -152,9 +151,9 @@ impl FlightSqlServiceClient<Channel> {
             .await
             .map_err(|e| ArrowError::IpcError(format!("Can't handshake {e}")))?;
         if let Some(auth) = resp.metadata().get("authorization") {
-            let auth = auth.to_str().map_err(|_| {
-                ArrowError::ParseError("Can't read auth header".to_string())
-            })?;
+            let auth = auth
+                .to_str()
+                .map_err(|_| ArrowError::ParseError("Can't read auth header".to_string()))?;
             let bearer = "Bearer ";
             if !auth.starts_with(bearer) {
                 Err(ArrowError::ParseError("Invalid auth header!".to_string()))?;
@@ -162,10 +161,11 @@ impl FlightSqlServiceClient<Channel> {
             let auth = auth[bearer.len()..].to_string();
             self.token = Some(auth);
         }
-        let responses: Vec<HandshakeResponse> =
-            resp.into_inner().try_collect().await.map_err(|_| {
-                ArrowError::ParseError("Can't collect responses".to_string())
-            })?;
+        let responses: Vec<HandshakeResponse> = resp
+            .into_inner()
+            .try_collect()
+            .await
+            .map_err(|_| ArrowError::ParseError("Can't collect responses".to_string()))?;
         let resp = match responses.as_slice() {
             [resp] => resp.payload.clone(),
             [] => Bytes::new(),
@@ -205,8 +205,7 @@ impl FlightSqlServiceClient<Channel> {
             .await
             .map_err(status_to_arrow_error)?
             .unwrap();
-        let any =
-            Any::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
+        let any = Any::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
         let result: DoPutUpdateResult = any.unpack()?.unwrap();
         Ok(result.record_count)
     }
@@ -229,14 +228,22 @@ impl FlightSqlServiceClient<Channel> {
     pub async fn do_get(
         &mut self,
         ticket: impl IntoRequest<Ticket>,
-    ) -> Result<Streaming<FlightData>, ArrowError> {
+    ) -> Result<FlightRecordBatchStream, ArrowError> {
         let req = self.set_request_headers(ticket.into_request())?;
-        Ok(self
+
+        let (md, response_stream, _ext) = self
             .flight_client
             .do_get(req)
             .await
             .map_err(status_to_arrow_error)?
-            .into_inner())
+            .into_parts();
+        let (response_stream, trailers) = extract_lazy_trailers(response_stream);
+
+        Ok(FlightRecordBatchStream::new_from_flight_data(
+            response_stream.map_err(FlightError::Tonic),
+        )
+        .with_headers(md)
+        .with_trailers(trailers))
     }
 
     /// Push a stream to the flight service associated with a particular flight stream.
@@ -393,17 +400,13 @@ impl FlightSqlServiceClient<Channel> {
                 ArrowError::ParseError(format!("Cannot convert header key \"{k}\": {e}"))
             })?;
             let v = v.parse().map_err(|e| {
-                ArrowError::ParseError(format!(
-                    "Cannot convert header value \"{v}\": {e}"
-                ))
+                ArrowError::ParseError(format!("Cannot convert header value \"{v}\": {e}"))
             })?;
             req.metadata_mut().insert(k, v);
         }
         if let Some(token) = &self.token {
             let val = format!("Bearer {token}").parse().map_err(|e| {
-                ArrowError::ParseError(format!(
-                    "Cannot convert token to header value: {e}"
-                ))
+                ArrowError::ParseError(format!("Cannot convert token to header value: {e}"))
             })?;
             req.metadata_mut().insert("authorization", val);
         }
@@ -439,9 +442,12 @@ impl PreparedStatement<Channel> {
 
     /// Executes the prepared statement query on the server.
     pub async fn execute(&mut self) -> Result<FlightInfo, ArrowError> {
+        self.write_bind_params().await?;
+
         let cmd = CommandPreparedStatementQuery {
             prepared_statement_handle: self.handle.clone(),
         };
+
         let result = self
             .flight_sql_client
             .get_flight_info_for_command(cmd)
@@ -451,7 +457,9 @@ impl PreparedStatement<Channel> {
 
     /// Executes the prepared statement update query on the server.
     pub async fn execute_update(&mut self) -> Result<i64, ArrowError> {
-        let cmd = CommandPreparedStatementQuery {
+        self.write_bind_params().await?;
+
+        let cmd = CommandPreparedStatementUpdate {
             prepared_statement_handle: self.handle.clone(),
         };
         let descriptor = FlightDescriptor::new_cmd(cmd.as_any().encode_to_vec());
@@ -467,8 +475,7 @@ impl PreparedStatement<Channel> {
             .await
             .map_err(status_to_arrow_error)?
             .unwrap();
-        let any =
-            Any::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
+        let any = Any::decode(&*result.app_metadata).map_err(decode_error_to_arrow_error)?;
         let result: DoPutUpdateResult = any.unpack()?.unwrap();
         Ok(result.record_count)
     }
@@ -484,14 +491,41 @@ impl PreparedStatement<Channel> {
     }
 
     /// Set a RecordBatch that contains the parameters that will be bind.
-    pub fn set_parameters(
-        &mut self,
-        parameter_binding: RecordBatch,
-    ) -> Result<(), ArrowError> {
+    pub fn set_parameters(&mut self, parameter_binding: RecordBatch) -> Result<(), ArrowError> {
         self.parameter_binding = Some(parameter_binding);
         Ok(())
     }
 
+    /// Submit parameters to the server, if any have been set on this prepared statement instance
+    async fn write_bind_params(&mut self) -> Result<(), ArrowError> {
+        if let Some(ref params_batch) = self.parameter_binding {
+            let cmd = CommandPreparedStatementQuery {
+                prepared_statement_handle: self.handle.clone(),
+            };
+
+            let descriptor = FlightDescriptor::new_cmd(cmd.as_any().encode_to_vec());
+            let flight_stream_builder = FlightDataEncoderBuilder::new()
+                .with_flight_descriptor(Some(descriptor))
+                .with_schema(params_batch.schema());
+            let flight_data = flight_stream_builder
+                .build(futures::stream::iter(
+                    self.parameter_binding.clone().map(Ok),
+                ))
+                .try_collect::<Vec<_>>()
+                .await
+                .map_err(flight_error_to_arrow_error)?;
+
+            self.flight_sql_client
+                .do_put(stream::iter(flight_data))
+                .await?
+                .try_collect::<Vec<_>>()
+                .await
+                .map_err(status_to_arrow_error)?;
+        }
+
+        Ok(())
+    }
+
     /// Close the prepared statement, so that this PreparedStatement can not used
     /// anymore and server can free up any resources.
     pub async fn close(mut self) -> Result<(), ArrowError> {
@@ -515,6 +549,13 @@ fn status_to_arrow_error(status: tonic::Status) -> ArrowError {
     ArrowError::IpcError(format!("{status:?}"))
 }
 
+fn flight_error_to_arrow_error(err: FlightError) -> ArrowError {
+    match err {
+        FlightError::Arrow(e) => e,
+        e => ArrowError::ExternalError(Box::new(e)),
+    }
+}
+
 // A polymorphic structure to natively represent different types of data contained in `FlightData`
 pub enum ArrowFlightData {
     RecordBatch(RecordBatch),
@@ -526,19 +567,16 @@ pub fn arrow_data_from_flight_data(
     flight_data: FlightData,
     arrow_schema_ref: &SchemaRef,
 ) -> Result<ArrowFlightData, ArrowError> {
-    let ipc_message = root_as_message(&flight_data.data_header[..]).map_err(|err| {
-        ArrowError::ParseError(format!("Unable to get root as message: {err:?}"))
-    })?;
+    let ipc_message = root_as_message(&flight_data.data_header[..])
+        .map_err(|err| ArrowError::ParseError(format!("Unable to get root as message: {err:?}")))?;
 
     match ipc_message.header_type() {
         MessageHeader::RecordBatch => {
-            let ipc_record_batch =
-                ipc_message.header_as_record_batch().ok_or_else(|| {
-                    ArrowError::ComputeError(
-                        "Unable to convert flight data header to a record batch"
-                            .to_string(),
-                    )
-                })?;
+            let ipc_record_batch = ipc_message.header_as_record_batch().ok_or_else(|| {
+                ArrowError::ComputeError(
+                    "Unable to convert flight data header to a record batch".to_string(),
+                )
+            })?;
 
             let dictionaries_by_field = HashMap::new();
             let record_batch = read_record_batch(
@@ -564,13 +602,11 @@ pub fn arrow_data_from_flight_data(
         MessageHeader::DictionaryBatch => {
             let _ = ipc_message.header_as_dictionary_batch().ok_or_else(|| {
                 ArrowError::ComputeError(
-                    "Unable to convert flight data header to a dictionary batch"
-                        .to_string(),
+                    "Unable to convert flight data header to a dictionary batch".to_string(),
                 )
             })?;
             Err(ArrowError::NotYetImplemented(
-                "no idea on how to convert an ipc dictionary batch to an arrow type"
-                    .to_string(),
+                "no idea on how to convert an ipc dictionary batch to an arrow type".to_string(),
             ))
         }
         MessageHeader::Tensor => {
@@ -590,8 +626,7 @@ pub fn arrow_data_from_flight_data(
                 )
             })?;
             Err(ArrowError::NotYetImplemented(
-                "no idea on how to convert an ipc sparse tensor to an arrow type"
-                    .to_string(),
+                "no idea on how to convert an ipc sparse tensor to an arrow type".to_string(),
             ))
         }
         _ => Err(ArrowError::ComputeError(format!(
diff --git a/arrow-flight/src/sql/metadata/db_schemas.rs b/arrow-flight/src/sql/metadata/db_schemas.rs
index 642802b058d5..303d11cd74ca 100644
--- a/arrow-flight/src/sql/metadata/db_schemas.rs
+++ b/arrow-flight/src/sql/metadata/db_schemas.rs
@@ -95,11 +95,7 @@ impl GetDbSchemasBuilder {
     /// Append a row
     ///
     /// In case the catalog should be considered as empty, pass in an empty string '""'.
-    pub fn append(
-        &mut self,
-        catalog_name: impl AsRef<str>,
-        schema_name: impl AsRef<str>,
-    ) {
+    pub fn append(&mut self, catalog_name: impl AsRef<str>, schema_name: impl AsRef<str>) {
         self.catalog_name.append_value(catalog_name);
         self.db_schema_name.append_value(schema_name);
     }
diff --git a/arrow-flight/src/sql/metadata/mod.rs b/arrow-flight/src/sql/metadata/mod.rs
index 71551f1849ae..1e9881ffa70e 100644
--- a/arrow-flight/src/sql/metadata/mod.rs
+++ b/arrow-flight/src/sql/metadata/mod.rs
@@ -53,7 +53,7 @@ fn lexsort_to_indices(arrays: &[ArrayRef]) -> UInt32Array {
         .iter()
         .map(|a| SortField::new(a.data_type().clone()))
         .collect();
-    let mut converter = RowConverter::new(fields).unwrap();
+    let converter = RowConverter::new(fields).unwrap();
     let rows = converter.convert_columns(arrays).unwrap();
     let mut sort: Vec<_> = rows.iter().enumerate().collect();
     sort.sort_unstable_by(|(_, a), (_, b)| a.cmp(b));
diff --git a/arrow-flight/src/sql/metadata/sql_info.rs b/arrow-flight/src/sql/metadata/sql_info.rs
index 88c97227814d..d4584f4a6827 100644
--- a/arrow-flight/src/sql/metadata/sql_info.rs
+++ b/arrow-flight/src/sql/metadata/sql_info.rs
@@ -30,8 +30,8 @@ use std::sync::Arc;
 use arrow_arith::boolean::or;
 use arrow_array::array::{Array, UInt32Array, UnionArray};
 use arrow_array::builder::{
-    ArrayBuilder, BooleanBuilder, Int32Builder, Int64Builder, Int8Builder, ListBuilder,
-    MapBuilder, StringBuilder, UInt32Builder,
+    ArrayBuilder, BooleanBuilder, Int32Builder, Int64Builder, Int8Builder, ListBuilder, MapBuilder,
+    StringBuilder, UInt32Builder,
 };
 use arrow_array::{RecordBatch, Scalar};
 use arrow_data::ArrayData;
@@ -184,11 +184,7 @@ static UNION_TYPE: Lazy<DataType> = Lazy::new(|| {
                         Field::new("keys", DataType::Int32, false),
                         Field::new(
                             "values",
-                            DataType::List(Arc::new(Field::new(
-                                "item",
-                                DataType::Int32,
-                                true,
-                            ))),
+                            DataType::List(Arc::new(Field::new("item", DataType::Int32, true))),
                             true,
                         ),
                     ])),
@@ -420,10 +416,7 @@ pub struct SqlInfoData {
 impl SqlInfoData {
     /// Return a  [`RecordBatch`] containing only the requested `u32`, if any
     /// from [`CommandGetSqlInfo`]
-    pub fn record_batch(
-        &self,
-        info: impl IntoIterator<Item = u32>,
-    ) -> Result<RecordBatch> {
+    pub fn record_batch(&self, info: impl IntoIterator<Item = u32>) -> Result<RecordBatch> {
         let arr = self.batch.column(0);
         let type_filter = info
             .into_iter()
@@ -493,9 +486,7 @@ mod tests {
 
     use super::SqlInfoDataBuilder;
     use crate::sql::metadata::tests::assert_batches_eq;
-    use crate::sql::{
-        SqlInfo, SqlNullOrdering, SqlSupportedTransaction, SqlSupportsConvert,
-    };
+    use crate::sql::{SqlInfo, SqlNullOrdering, SqlSupportedTransaction, SqlSupportsConvert};
 
     #[test]
     fn test_sql_infos() {
diff --git a/arrow-flight/src/sql/metadata/tables.rs b/arrow-flight/src/sql/metadata/tables.rs
index 00502a76db53..7ffb76fa1d5f 100644
--- a/arrow-flight/src/sql/metadata/tables.rs
+++ b/arrow-flight/src/sql/metadata/tables.rs
@@ -329,12 +329,12 @@ mod tests {
                     "b_catalog",
                 ])) as ArrayRef,
                 Arc::new(StringArray::from(vec![
-                    "a_schema", "a_schema", "b_schema", "b_schema", "a_schema",
-                    "a_schema", "b_schema", "b_schema",
+                    "a_schema", "a_schema", "b_schema", "b_schema", "a_schema", "a_schema",
+                    "b_schema", "b_schema",
                 ])) as ArrayRef,
                 Arc::new(StringArray::from(vec![
-                    "a_table", "b_table", "a_table", "b_table", "a_table", "a_table",
-                    "b_table", "b_table",
+                    "a_table", "b_table", "a_table", "b_table", "a_table", "a_table", "b_table",
+                    "b_table",
                 ])) as ArrayRef,
                 Arc::new(StringArray::from(vec![
                     "TABLE", "TABLE", "TABLE", "TABLE", "TABLE", "VIEW", "TABLE", "VIEW",
diff --git a/arrow-flight/src/sql/metadata/xdbc_info.rs b/arrow-flight/src/sql/metadata/xdbc_info.rs
index 8212c847a4fa..2e635d3037bc 100644
--- a/arrow-flight/src/sql/metadata/xdbc_info.rs
+++ b/arrow-flight/src/sql/metadata/xdbc_info.rs
@@ -36,9 +36,7 @@ use once_cell::sync::Lazy;
 
 use super::lexsort_to_indices;
 use crate::error::*;
-use crate::sql::{
-    CommandGetXdbcTypeInfo, Nullable, Searchable, XdbcDataType, XdbcDatetimeSubcode,
-};
+use crate::sql::{CommandGetXdbcTypeInfo, Nullable, Searchable, XdbcDataType, XdbcDatetimeSubcode};
 
 /// Data structure representing type information for xdbc types.
 #[derive(Debug, Clone, Default)]
@@ -201,8 +199,7 @@ impl XdbcTypeInfoDataBuilder {
             minimum_scale_builder.append_option(info.minimum_scale);
             maximum_scale_builder.append_option(info.maximum_scale);
             sql_data_type_builder.append_value(info.sql_data_type as i32);
-            datetime_subcode_builder
-                .append_option(info.datetime_subcode.map(|code| code as i32));
+            datetime_subcode_builder.append_option(info.datetime_subcode.map(|code| code as i32));
             num_prec_radix_builder.append_option(info.num_prec_radix);
             interval_precision_builder.append_option(info.interval_precision);
         });
@@ -215,8 +212,7 @@ impl XdbcTypeInfoDataBuilder {
         let (field, offsets, values, nulls) = create_params_builder.finish().into_parts();
         // Re-defined the field to be non-nullable
         let new_field = Arc::new(field.as_ref().clone().with_nullable(false));
-        let create_params =
-            Arc::new(ListArray::new(new_field, offsets, values, nulls)) as ArrayRef;
+        let create_params = Arc::new(ListArray::new(new_field, offsets, values, nulls)) as ArrayRef;
         let nullable = Arc::new(nullable_builder.finish());
         let case_sensitive = Arc::new(case_sensitive_builder.finish());
         let searchable = Arc::new(searchable_builder.finish());
diff --git a/arrow-flight/src/sql/mod.rs b/arrow-flight/src/sql/mod.rs
index 4bb8ce8b36e5..97645ae7840d 100644
--- a/arrow-flight/src/sql/mod.rs
+++ b/arrow-flight/src/sql/mod.rs
@@ -93,6 +93,7 @@ pub use gen::SqlSupportedTransactions;
 pub use gen::SqlSupportedUnions;
 pub use gen::SqlSupportsConvert;
 pub use gen::SqlTransactionIsolationLevel;
+pub use gen::SubstraitPlan;
 pub use gen::SupportedSqlGrammar;
 pub use gen::TicketStatementQuery;
 pub use gen::UpdateDeleteRules;
@@ -294,9 +295,8 @@ impl Any {
         if !self.is::<M>() {
             return Ok(None);
         }
-        let m = Message::decode(&*self.value).map_err(|err| {
-            ArrowError::ParseError(format!("Unable to decode Any value: {err}"))
-        })?;
+        let m = Message::decode(&*self.value)
+            .map_err(|err| ArrowError::ParseError(format!("Unable to decode Any value: {err}")))?;
         Ok(Some(m))
     }
 
diff --git a/arrow-flight/src/sql/server.rs b/arrow-flight/src/sql/server.rs
index 102d97105a2e..f1656aca882a 100644
--- a/arrow-flight/src/sql/server.rs
+++ b/arrow-flight/src/sql/server.rs
@@ -19,28 +19,26 @@
 
 use std::pin::Pin;
 
-use futures::Stream;
+use futures::{stream::Peekable, Stream, StreamExt};
 use prost::Message;
 use tonic::{Request, Response, Status, Streaming};
 
 use super::{
-    ActionBeginSavepointRequest, ActionBeginSavepointResult,
-    ActionBeginTransactionRequest, ActionBeginTransactionResult,
-    ActionCancelQueryRequest, ActionCancelQueryResult,
+    ActionBeginSavepointRequest, ActionBeginSavepointResult, ActionBeginTransactionRequest,
+    ActionBeginTransactionResult, ActionCancelQueryRequest, ActionCancelQueryResult,
     ActionClosePreparedStatementRequest, ActionCreatePreparedStatementRequest,
     ActionCreatePreparedStatementResult, ActionCreatePreparedSubstraitPlanRequest,
-    ActionEndSavepointRequest, ActionEndTransactionRequest, Any, Command,
-    CommandGetCatalogs, CommandGetCrossReference, CommandGetDbSchemas,
-    CommandGetExportedKeys, CommandGetImportedKeys, CommandGetPrimaryKeys,
-    CommandGetSqlInfo, CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo,
-    CommandPreparedStatementQuery, CommandPreparedStatementUpdate, CommandStatementQuery,
-    CommandStatementSubstraitPlan, CommandStatementUpdate, DoPutUpdateResult,
-    ProstMessageExt, SqlInfo, TicketStatementQuery,
+    ActionEndSavepointRequest, ActionEndTransactionRequest, Any, Command, CommandGetCatalogs,
+    CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys, CommandGetImportedKeys,
+    CommandGetPrimaryKeys, CommandGetSqlInfo, CommandGetTableTypes, CommandGetTables,
+    CommandGetXdbcTypeInfo, CommandPreparedStatementQuery, CommandPreparedStatementUpdate,
+    CommandStatementQuery, CommandStatementSubstraitPlan, CommandStatementUpdate,
+    DoPutUpdateResult, ProstMessageExt, SqlInfo, TicketStatementQuery,
 };
 use crate::{
-    flight_service_server::FlightService, Action, ActionType, Criteria, Empty,
-    FlightData, FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse,
-    PutResult, SchemaResult, Ticket,
+    flight_service_server::FlightService, Action, ActionType, Criteria, Empty, FlightData,
+    FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse, PutResult, SchemaResult,
+    Ticket,
 };
 
 pub(crate) static CREATE_PREPARED_STATEMENT: &str = "CreatePreparedStatement";
@@ -227,6 +225,18 @@ pub trait FlightSqlService: Sync + Send + Sized + 'static {
         ))
     }
 
+    /// Implementors may override to handle additional calls to get_flight_info()
+    async fn get_flight_info_fallback(
+        &self,
+        cmd: Command,
+        _request: Request<FlightDescriptor>,
+    ) -> Result<Response<FlightInfo>, Status> {
+        Err(Status::unimplemented(format!(
+            "get_flight_info: The defined request is invalid: {}",
+            cmd.type_url()
+        )))
+    }
+
     // do_get
 
     /// Get a FlightDataStream containing the query results.
@@ -366,7 +376,7 @@ pub trait FlightSqlService: Sync + Send + Sized + 'static {
     /// Implementors may override to handle additional calls to do_put()
     async fn do_put_fallback(
         &self,
-        _request: Request<Streaming<FlightData>>,
+        _request: Request<PeekableFlightDataStream>,
         message: Any,
     ) -> Result<Response<<Self as FlightService>::DoPutStream>, Status> {
         Err(Status::unimplemented(format!(
@@ -379,7 +389,7 @@ pub trait FlightSqlService: Sync + Send + Sized + 'static {
     async fn do_put_statement_update(
         &self,
         _ticket: CommandStatementUpdate,
-        _request: Request<Streaming<FlightData>>,
+        _request: Request<PeekableFlightDataStream>,
     ) -> Result<i64, Status> {
         Err(Status::unimplemented(
             "do_put_statement_update has no default implementation",
@@ -390,7 +400,7 @@ pub trait FlightSqlService: Sync + Send + Sized + 'static {
     async fn do_put_prepared_statement_query(
         &self,
         _query: CommandPreparedStatementQuery,
-        _request: Request<Streaming<FlightData>>,
+        _request: Request<PeekableFlightDataStream>,
     ) -> Result<Response<<Self as FlightService>::DoPutStream>, Status> {
         Err(Status::unimplemented(
             "do_put_prepared_statement_query has no default implementation",
@@ -401,7 +411,7 @@ pub trait FlightSqlService: Sync + Send + Sized + 'static {
     async fn do_put_prepared_statement_update(
         &self,
         _query: CommandPreparedStatementUpdate,
-        _request: Request<Streaming<FlightData>>,
+        _request: Request<PeekableFlightDataStream>,
     ) -> Result<i64, Status> {
         Err(Status::unimplemented(
             "do_put_prepared_statement_update has no default implementation",
@@ -412,7 +422,7 @@ pub trait FlightSqlService: Sync + Send + Sized + 'static {
     async fn do_put_substrait_plan(
         &self,
         _query: CommandStatementSubstraitPlan,
-        _request: Request<Streaming<FlightData>>,
+        _request: Request<PeekableFlightDataStream>,
     ) -> Result<i64, Status> {
         Err(Status::unimplemented(
             "do_put_substrait_plan has no default implementation",
@@ -549,13 +559,10 @@ where
         Pin<Box<dyn Stream<Item = Result<HandshakeResponse, Status>> + Send + 'static>>;
     type ListFlightsStream =
         Pin<Box<dyn Stream<Item = Result<FlightInfo, Status>> + Send + 'static>>;
-    type DoGetStream =
-        Pin<Box<dyn Stream<Item = Result<FlightData, Status>> + Send + 'static>>;
-    type DoPutStream =
-        Pin<Box<dyn Stream<Item = Result<PutResult, Status>> + Send + 'static>>;
-    type DoActionStream = Pin<
-        Box<dyn Stream<Item = Result<super::super::Result, Status>> + Send + 'static>,
-    >;
+    type DoGetStream = Pin<Box<dyn Stream<Item = Result<FlightData, Status>> + Send + 'static>>;
+    type DoPutStream = Pin<Box<dyn Stream<Item = Result<PutResult, Status>> + Send + 'static>>;
+    type DoActionStream =
+        Pin<Box<dyn Stream<Item = Result<super::super::Result, Status>> + Send + 'static>>;
     type ListActionsStream =
         Pin<Box<dyn Stream<Item = Result<ActionType, Status>> + Send + 'static>>;
     type DoExchangeStream =
@@ -580,8 +587,7 @@ where
         &self,
         request: Request<FlightDescriptor>,
     ) -> Result<Response<FlightInfo>, Status> {
-        let message =
-            Any::decode(&*request.get_ref().cmd).map_err(decode_error_to_status)?;
+        let message = Any::decode(&*request.get_ref().cmd).map_err(decode_error_to_status)?;
 
         match Command::try_from(message).map_err(arrow_error_to_status)? {
             Command::CommandStatementQuery(token) => {
@@ -600,9 +606,7 @@ where
             Command::CommandGetDbSchemas(token) => {
                 return self.get_flight_info_schemas(token, request).await
             }
-            Command::CommandGetTables(token) => {
-                self.get_flight_info_tables(token, request).await
-            }
+            Command::CommandGetTables(token) => self.get_flight_info_tables(token, request).await,
             Command::CommandGetTableTypes(token) => {
                 self.get_flight_info_table_types(token, request).await
             }
@@ -624,10 +628,7 @@ where
             Command::CommandGetXdbcTypeInfo(token) => {
                 self.get_flight_info_xdbc_type_info(token, request).await
             }
-            cmd => Err(Status::unimplemented(format!(
-                "get_flight_info: The defined request is invalid: {}",
-                cmd.type_url()
-            ))),
+            cmd => self.get_flight_info_fallback(cmd, request).await,
         }
     }
 
@@ -642,31 +643,21 @@ where
         &self,
         request: Request<Ticket>,
     ) -> Result<Response<Self::DoGetStream>, Status> {
-        let msg: Any = Message::decode(&*request.get_ref().ticket)
-            .map_err(decode_error_to_status)?;
+        let msg: Any =
+            Message::decode(&*request.get_ref().ticket).map_err(decode_error_to_status)?;
 
         match Command::try_from(msg).map_err(arrow_error_to_status)? {
-            Command::TicketStatementQuery(command) => {
-                self.do_get_statement(command, request).await
-            }
+            Command::TicketStatementQuery(command) => self.do_get_statement(command, request).await,
             Command::CommandPreparedStatementQuery(command) => {
                 self.do_get_prepared_statement(command, request).await
             }
-            Command::CommandGetCatalogs(command) => {
-                self.do_get_catalogs(command, request).await
-            }
-            Command::CommandGetDbSchemas(command) => {
-                self.do_get_schemas(command, request).await
-            }
-            Command::CommandGetTables(command) => {
-                self.do_get_tables(command, request).await
-            }
+            Command::CommandGetCatalogs(command) => self.do_get_catalogs(command, request).await,
+            Command::CommandGetDbSchemas(command) => self.do_get_schemas(command, request).await,
+            Command::CommandGetTables(command) => self.do_get_tables(command, request).await,
             Command::CommandGetTableTypes(command) => {
                 self.do_get_table_types(command, request).await
             }
-            Command::CommandGetSqlInfo(command) => {
-                self.do_get_sql_info(command, request).await
-            }
+            Command::CommandGetSqlInfo(command) => self.do_get_sql_info(command, request).await,
             Command::CommandGetPrimaryKeys(command) => {
                 self.do_get_primary_keys(command, request).await
             }
@@ -688,11 +679,19 @@ where
 
     async fn do_put(
         &self,
-        mut request: Request<Streaming<FlightData>>,
+        request: Request<Streaming<FlightData>>,
     ) -> Result<Response<Self::DoPutStream>, Status> {
-        let cmd = request.get_mut().message().await?.unwrap();
-        let message = Any::decode(&*cmd.flight_descriptor.unwrap().cmd)
-            .map_err(decode_error_to_status)?;
+        // See issue #4658: https://github.com/apache/arrow-rs/issues/4658
+        // To dispatch to the correct `do_put` method, we cannot discard the first message,
+        // as it may contain the Arrow schema, which the `do_put` handler may need.
+        // To allow the first message to be reused by the `do_put` handler,
+        // we wrap this stream in a `Peekable` one, which allows us to peek at
+        // the first message without discarding it.
+        let mut request = request.map(PeekableFlightDataStream::new);
+        let cmd = Pin::new(request.get_mut()).peek().await.unwrap().clone()?;
+
+        let message =
+            Any::decode(&*cmd.flight_descriptor.unwrap().cmd).map_err(decode_error_to_status)?;
         match Command::try_from(message).map_err(arrow_error_to_status)? {
             Command::CommandStatementUpdate(command) => {
                 let record_count = self.do_put_statement_update(command, request).await?;
@@ -747,11 +746,10 @@ where
         };
         let create_prepared_substrait_plan_action_type = ActionType {
             r#type: CREATE_PREPARED_SUBSTRAIT_PLAN.to_string(),
-            description:
-                "Creates a reusable prepared substrait plan resource on the server.\n
+            description: "Creates a reusable prepared substrait plan resource on the server.\n
                 Request Message: ActionCreatePreparedSubstraitPlanRequest\n
                 Response Message: ActionCreatePreparedStatementResult"
-                    .into(),
+                .into(),
         };
         let begin_transaction_action_type = ActionType {
             r#type: BEGIN_TRANSACTION.to_string(),
@@ -812,8 +810,7 @@ where
         request: Request<Action>,
     ) -> Result<Response<Self::DoActionStream>, Status> {
         if request.get_ref().r#type == CREATE_PREPARED_STATEMENT {
-            let any =
-                Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
+            let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
 
             let cmd: ActionCreatePreparedStatementRequest = any
                 .unpack()
@@ -831,8 +828,7 @@ where
             })]);
             return Ok(Response::new(Box::pin(output)));
         } else if request.get_ref().r#type == CLOSE_PREPARED_STATEMENT {
-            let any =
-                Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
+            let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
 
             let cmd: ActionClosePreparedStatementRequest = any
                 .unpack()
@@ -846,8 +842,7 @@ where
                 .await?;
             return Ok(Response::new(Box::pin(futures::stream::empty())));
         } else if request.get_ref().r#type == CREATE_PREPARED_SUBSTRAIT_PLAN {
-            let any =
-                Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
+            let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
 
             let cmd: ActionCreatePreparedSubstraitPlanRequest = any
                 .unpack()
@@ -861,47 +856,38 @@ where
                 .await?;
             return Ok(Response::new(Box::pin(futures::stream::empty())));
         } else if request.get_ref().r#type == BEGIN_TRANSACTION {
-            let any =
-                Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
+            let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
 
             let cmd: ActionBeginTransactionRequest = any
                 .unpack()
                 .map_err(arrow_error_to_status)?
                 .ok_or_else(|| {
-                    Status::invalid_argument(
-                        "Unable to unpack ActionBeginTransactionRequest.",
-                    )
-                })?;
+                Status::invalid_argument("Unable to unpack ActionBeginTransactionRequest.")
+            })?;
             let stmt = self.do_action_begin_transaction(cmd, request).await?;
             let output = futures::stream::iter(vec![Ok(super::super::gen::Result {
                 body: stmt.as_any().encode_to_vec().into(),
             })]);
             return Ok(Response::new(Box::pin(output)));
         } else if request.get_ref().r#type == END_TRANSACTION {
-            let any =
-                Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
+            let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
 
             let cmd: ActionEndTransactionRequest = any
                 .unpack()
                 .map_err(arrow_error_to_status)?
                 .ok_or_else(|| {
-                    Status::invalid_argument(
-                        "Unable to unpack ActionEndTransactionRequest.",
-                    )
+                    Status::invalid_argument("Unable to unpack ActionEndTransactionRequest.")
                 })?;
             self.do_action_end_transaction(cmd, request).await?;
             return Ok(Response::new(Box::pin(futures::stream::empty())));
         } else if request.get_ref().r#type == BEGIN_SAVEPOINT {
-            let any =
-                Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
+            let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
 
             let cmd: ActionBeginSavepointRequest = any
                 .unpack()
                 .map_err(arrow_error_to_status)?
                 .ok_or_else(|| {
-                    Status::invalid_argument(
-                        "Unable to unpack ActionBeginSavepointRequest.",
-                    )
+                    Status::invalid_argument("Unable to unpack ActionBeginSavepointRequest.")
                 })?;
             let stmt = self.do_action_begin_savepoint(cmd, request).await?;
             let output = futures::stream::iter(vec![Ok(super::super::gen::Result {
@@ -909,22 +895,18 @@ where
             })]);
             return Ok(Response::new(Box::pin(output)));
         } else if request.get_ref().r#type == END_SAVEPOINT {
-            let any =
-                Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
+            let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
 
             let cmd: ActionEndSavepointRequest = any
                 .unpack()
                 .map_err(arrow_error_to_status)?
                 .ok_or_else(|| {
-                    Status::invalid_argument(
-                        "Unable to unpack ActionEndSavepointRequest.",
-                    )
+                    Status::invalid_argument("Unable to unpack ActionEndSavepointRequest.")
                 })?;
             self.do_action_end_savepoint(cmd, request).await?;
             return Ok(Response::new(Box::pin(futures::stream::empty())));
         } else if request.get_ref().r#type == CANCEL_QUERY {
-            let any =
-                Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
+            let any = Any::decode(&*request.get_ref().body).map_err(decode_error_to_status)?;
 
             let cmd: ActionCancelQueryRequest = any
                 .unpack()
@@ -957,3 +939,89 @@ fn decode_error_to_status(err: prost::DecodeError) -> Status {
 fn arrow_error_to_status(err: arrow_schema::ArrowError) -> Status {
     Status::internal(format!("{err:?}"))
 }
+
+/// A wrapper around [`Streaming<FlightData>`] that allows "peeking" at the
+/// message at the front of the stream without consuming it.
+/// This is needed because sometimes the first message in the stream will contain
+/// a [`FlightDescriptor`] in addition to potentially any data, and the dispatch logic
+/// must inspect this information.
+///
+/// # Example
+///
+/// [`PeekableFlightDataStream::peek`] can be used to peek at the first message without
+/// discarding it; otherwise, `PeekableFlightDataStream` can be used as a regular stream.
+/// See the following example:
+///
+/// ```no_run
+/// use arrow_array::RecordBatch;
+/// use arrow_flight::decode::FlightRecordBatchStream;
+/// use arrow_flight::FlightDescriptor;
+/// use arrow_flight::error::FlightError;
+/// use arrow_flight::sql::server::PeekableFlightDataStream;
+/// use tonic::{Request, Status};
+/// use futures::TryStreamExt;
+///
+/// #[tokio::main]
+/// async fn main() -> Result<(), Status> {
+///     let request: Request<PeekableFlightDataStream> = todo!();
+///     let stream: PeekableFlightDataStream = request.into_inner();
+///
+///     // The first message contains the flight descriptor and the schema.
+///     // Read the flight descriptor without discarding the schema:
+///     let flight_descriptor: FlightDescriptor = stream
+///         .peek()
+///         .await
+///         .cloned()
+///         .transpose()?
+///         .and_then(|data| data.flight_descriptor)
+///         .expect("first message should contain flight descriptor");
+///
+///     // Pass the stream through a decoder
+///     let batches: Vec<RecordBatch> = FlightRecordBatchStream::new_from_flight_data(
+///         request.into_inner().map_err(|e| e.into()),
+///     )
+///     .try_collect()
+///     .await?;
+/// }
+/// ```
+pub struct PeekableFlightDataStream {
+    inner: Peekable<Streaming<FlightData>>,
+}
+
+impl PeekableFlightDataStream {
+    fn new(stream: Streaming<FlightData>) -> Self {
+        Self {
+            inner: stream.peekable(),
+        }
+    }
+
+    /// Convert this stream into a `Streaming<FlightData>`.
+    /// Any messages observed through [`Self::peek`] will be lost
+    /// after the conversion.
+    pub fn into_inner(self) -> Streaming<FlightData> {
+        self.inner.into_inner()
+    }
+
+    /// Convert this stream into a `Peekable<Streaming<FlightData>>`.
+    /// Preserves the state of the stream, so that calls to [`Self::peek`]
+    /// and [`Self::poll_next`] are the same.
+    pub fn into_peekable(self) -> Peekable<Streaming<FlightData>> {
+        self.inner
+    }
+
+    /// Peek at the head of this stream without advancing it.
+    pub async fn peek(&mut self) -> Option<&Result<FlightData, Status>> {
+        Pin::new(&mut self.inner).peek().await
+    }
+}
+
+impl Stream for PeekableFlightDataStream {
+    type Item = Result<FlightData, Status>;
+
+    fn poll_next(
+        mut self: Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+    ) -> std::task::Poll<Option<Self::Item>> {
+        self.inner.poll_next_unpin(cx)
+    }
+}
diff --git a/arrow-flight/src/trailers.rs b/arrow-flight/src/trailers.rs
index d652542da779..73136379d69f 100644
--- a/arrow-flight/src/trailers.rs
+++ b/arrow-flight/src/trailers.rs
@@ -28,9 +28,7 @@ use tonic::{metadata::MetadataMap, Status, Streaming};
 ///
 /// Note that [`LazyTrailers`] has inner mutability and will only hold actual data after [`ExtractTrailersStream`] is
 /// fully consumed (dropping it is not required though).
-pub fn extract_lazy_trailers<T>(
-    s: Streaming<T>,
-) -> (ExtractTrailersStream<T>, LazyTrailers) {
+pub fn extract_lazy_trailers<T>(s: Streaming<T>) -> (ExtractTrailersStream<T>, LazyTrailers) {
     let trailers: SharedTrailers = Default::default();
     let stream = ExtractTrailersStream {
         inner: s,
@@ -54,10 +52,7 @@ pub struct ExtractTrailersStream<T> {
 impl<T> Stream for ExtractTrailersStream<T> {
     type Item = Result<T, Status>;
 
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
         let res = ready!(self.inner.poll_next_unpin(cx));
 
         if res.is_none() {
diff --git a/arrow-flight/src/utils.rs b/arrow-flight/src/utils.rs
index 145626b6608f..b75d61d200cb 100644
--- a/arrow-flight/src/utils.rs
+++ b/arrow-flight/src/utils.rs
@@ -52,26 +52,23 @@ pub fn flight_data_from_arrow_batch(
 }
 
 /// Convert a slice of wire protocol `FlightData`s into a vector of `RecordBatch`es
-pub fn flight_data_to_batches(
-    flight_data: &[FlightData],
-) -> Result<Vec<RecordBatch>, ArrowError> {
+pub fn flight_data_to_batches(flight_data: &[FlightData]) -> Result<Vec<RecordBatch>, ArrowError> {
     let schema = flight_data.get(0).ok_or_else(|| {
         ArrowError::CastError("Need at least one FlightData for schema".to_string())
     })?;
     let message = root_as_message(&schema.data_header[..])
         .map_err(|_| ArrowError::CastError("Cannot get root as message".to_string()))?;
 
-    let ipc_schema: arrow_ipc::Schema = message.header_as_schema().ok_or_else(|| {
-        ArrowError::CastError("Cannot get header as Schema".to_string())
-    })?;
+    let ipc_schema: arrow_ipc::Schema = message
+        .header_as_schema()
+        .ok_or_else(|| ArrowError::CastError("Cannot get header as Schema".to_string()))?;
     let schema = fb_to_schema(ipc_schema);
     let schema = Arc::new(schema);
 
     let mut batches = vec![];
     let dictionaries_by_id = HashMap::new();
     for datum in flight_data[1..].iter() {
-        let batch =
-            flight_data_to_arrow_batch(datum, schema.clone(), &dictionaries_by_id)?;
+        let batch = flight_data_to_arrow_batch(datum, schema.clone(), &dictionaries_by_id)?;
         batches.push(batch);
     }
     Ok(batches)
@@ -84,9 +81,8 @@ pub fn flight_data_to_arrow_batch(
     dictionaries_by_id: &HashMap<i64, ArrayRef>,
 ) -> Result<RecordBatch, ArrowError> {
     // check that the data_header is a record batch message
-    let message = arrow_ipc::root_as_message(&data.data_header[..]).map_err(|err| {
-        ArrowError::ParseError(format!("Unable to get root as message: {err:?}"))
-    })?;
+    let message = arrow_ipc::root_as_message(&data.data_header[..])
+        .map_err(|err| ArrowError::ParseError(format!("Unable to get root as message: {err:?}")))?;
 
     message
         .header_as_record_batch()
@@ -124,10 +120,7 @@ pub fn flight_schema_from_arrow_schema(
     since = "4.4.0",
     note = "Use From trait, e.g.: SchemaAsIpc::new(schema, options).into()"
 )]
-pub fn flight_data_from_arrow_schema(
-    schema: &Schema,
-    options: &IpcWriteOptions,
-) -> FlightData {
+pub fn flight_data_from_arrow_schema(schema: &Schema, options: &IpcWriteOptions) -> FlightData {
     SchemaAsIpc::new(schema, options).into()
 }
 
diff --git a/arrow-flight/tests/client.rs b/arrow-flight/tests/client.rs
index 1b9891e121fa..3ad9ee7a45ca 100644
--- a/arrow-flight/tests/client.rs
+++ b/arrow-flight/tests/client.rs
@@ -23,9 +23,9 @@ mod common {
 }
 use arrow_array::{RecordBatch, UInt64Array};
 use arrow_flight::{
-    decode::FlightRecordBatchStream, encode::FlightDataEncoderBuilder,
-    error::FlightError, Action, ActionType, Criteria, Empty, FlightClient, FlightData,
-    FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse, PutResult, Ticket,
+    decode::FlightRecordBatchStream, encode::FlightDataEncoderBuilder, error::FlightError, Action,
+    ActionType, Criteria, Empty, FlightClient, FlightData, FlightDescriptor, FlightInfo,
+    HandshakeRequest, HandshakeResponse, PutResult, Ticket,
 };
 use arrow_schema::{DataType, Field, Schema};
 use bytes::Bytes;
@@ -271,8 +271,7 @@ async fn test_do_put() {
             },
         ];
 
-        test_server
-            .set_do_put_response(expected_response.clone().into_iter().map(Ok).collect());
+        test_server.set_do_put_response(expected_response.clone().into_iter().map(Ok).collect());
 
         let input_stream = futures::stream::iter(input_flight_data.clone()).map(Ok);
 
@@ -446,9 +445,8 @@ async fn test_do_exchange() {
         let input_flight_data = test_flight_data().await;
         let output_flight_data = test_flight_data2().await;
 
-        test_server.set_do_exchange_response(
-            output_flight_data.clone().into_iter().map(Ok).collect(),
-        );
+        test_server
+            .set_do_exchange_response(output_flight_data.clone().into_iter().map(Ok).collect());
 
         let response_stream = client
             .do_exchange(futures::stream::iter(input_flight_data.clone()))
diff --git a/arrow-flight/tests/common/server.rs b/arrow-flight/tests/common/server.rs
index c575d12bbf52..8b162d398c4b 100644
--- a/arrow-flight/tests/common/server.rs
+++ b/arrow-flight/tests/common/server.rs
@@ -174,10 +174,7 @@ impl TestFlightServer {
     }
 
     /// Specify the response returned from the next call to `do_action`
-    pub fn set_do_action_response(
-        &self,
-        response: Vec<Result<arrow_flight::Result, Status>>,
-    ) {
+    pub fn set_do_action_response(&self, response: Vec<Result<arrow_flight::Result, Status>>) {
         let mut state = self.state.lock().expect("mutex not poisoned");
         state.do_action_response.replace(response);
     }
@@ -278,9 +275,10 @@ impl FlightService for TestFlightServer {
         let mut state = self.state.lock().expect("mutex not poisoned");
         state.handshake_request = Some(handshake_request);
 
-        let response = state.handshake_response.take().unwrap_or_else(|| {
-            Err(Status::internal("No handshake response configured"))
-        })?;
+        let response = state
+            .handshake_response
+            .take()
+            .unwrap_or_else(|| Err(Status::internal("No handshake response configured")))?;
 
         // turn into a streaming response
         let output = futures::stream::iter(std::iter::once(Ok(response)));
@@ -313,9 +311,10 @@ impl FlightService for TestFlightServer {
         self.save_metadata(&request);
         let mut state = self.state.lock().expect("mutex not poisoned");
         state.get_flight_info_request = Some(request.into_inner());
-        let response = state.get_flight_info_response.take().unwrap_or_else(|| {
-            Err(Status::internal("No get_flight_info response configured"))
-        })?;
+        let response = state
+            .get_flight_info_response
+            .take()
+            .unwrap_or_else(|| Err(Status::internal("No get_flight_info response configured")))?;
         Ok(Response::new(response))
     }
 
@@ -326,9 +325,10 @@ impl FlightService for TestFlightServer {
         self.save_metadata(&request);
         let mut state = self.state.lock().expect("mutex not poisoned");
         state.get_schema_request = Some(request.into_inner());
-        let schema = state.get_schema_response.take().unwrap_or_else(|| {
-            Err(Status::internal("No get_schema response configured"))
-        })?;
+        let schema = state
+            .get_schema_response
+            .take()
+            .unwrap_or_else(|| Err(Status::internal("No get_schema response configured")))?;
 
         // encode the schema
         let options = arrow_ipc::writer::IpcWriteOptions::default();
diff --git a/arrow-flight/tests/common/trailers_layer.rs b/arrow-flight/tests/common/trailers_layer.rs
index 9e6be0dcf0da..b2ab74f7d925 100644
--- a/arrow-flight/tests/common/trailers_layer.rs
+++ b/arrow-flight/tests/common/trailers_layer.rs
@@ -81,9 +81,7 @@ where
             ready!(self.as_mut().project().inner.poll(cx));
 
         match result {
-            Ok(response) => {
-                Poll::Ready(Ok(response.map(|body| WrappedBody { inner: body })))
-            }
+            Ok(response) => Poll::Ready(Ok(response.map(|body| WrappedBody { inner: body }))),
             Err(e) => Poll::Ready(Err(e)),
         }
     }
diff --git a/arrow-flight/tests/encode_decode.rs b/arrow-flight/tests/encode_decode.rs
index 71bcf4e0521a..f4741d743e57 100644
--- a/arrow-flight/tests/encode_decode.rs
+++ b/arrow-flight/tests/encode_decode.rs
@@ -195,8 +195,7 @@ async fn test_app_metadata() {
     let encode_stream = encoder.build(input_batch_stream);
 
     // use lower level stream to get access to app metadata
-    let decode_stream =
-        FlightRecordBatchStream::new_from_flight_data(encode_stream).into_inner();
+    let decode_stream = FlightRecordBatchStream::new_from_flight_data(encode_stream).into_inner();
 
     let mut messages: Vec<_> = decode_stream.try_collect().await.expect("encode fails");
 
@@ -225,8 +224,7 @@ async fn test_max_message_size() {
     let encode_stream = encoder.build(input_batch_stream);
 
     // use lower level stream to get access to app metadata
-    let decode_stream =
-        FlightRecordBatchStream::new_from_flight_data(encode_stream).into_inner();
+    let decode_stream = FlightRecordBatchStream::new_from_flight_data(encode_stream).into_inner();
 
     let messages: Vec<_> = decode_stream.try_collect().await.expect("encode fails");
 
@@ -254,8 +252,8 @@ async fn test_max_message_size_fuzz() {
     ];
 
     for max_message_size_bytes in [10, 1024, 2048, 6400, 3211212] {
-        let encoder = FlightDataEncoderBuilder::default()
-            .with_max_flight_data_size(max_message_size_bytes);
+        let encoder =
+            FlightDataEncoderBuilder::default().with_max_flight_data_size(max_message_size_bytes);
 
         let input_batch_stream = futures::stream::iter(input.clone()).map(Ok);
 
@@ -299,10 +297,10 @@ async fn test_chained_streams_batch_decoder() {
     let batch2 = make_dictionary_batch(3);
 
     // Model sending two flight streams back to back, with different schemas
-    let encode_stream1 = FlightDataEncoderBuilder::default()
-        .build(futures::stream::iter(vec![Ok(batch1.clone())]));
-    let encode_stream2 = FlightDataEncoderBuilder::default()
-        .build(futures::stream::iter(vec![Ok(batch2.clone())]));
+    let encode_stream1 =
+        FlightDataEncoderBuilder::default().build(futures::stream::iter(vec![Ok(batch1.clone())]));
+    let encode_stream2 =
+        FlightDataEncoderBuilder::default().build(futures::stream::iter(vec![Ok(batch2.clone())]));
 
     // append the two streams (so they will have two different schema messages)
     let encode_stream = encode_stream1.chain(encode_stream2);
@@ -324,10 +322,10 @@ async fn test_chained_streams_data_decoder() {
     let batch2 = make_dictionary_batch(3);
 
     // Model sending two flight streams back to back, with different schemas
-    let encode_stream1 = FlightDataEncoderBuilder::default()
-        .build(futures::stream::iter(vec![Ok(batch1.clone())]));
-    let encode_stream2 = FlightDataEncoderBuilder::default()
-        .build(futures::stream::iter(vec![Ok(batch2.clone())]));
+    let encode_stream1 =
+        FlightDataEncoderBuilder::default().build(futures::stream::iter(vec![Ok(batch1.clone())]));
+    let encode_stream2 =
+        FlightDataEncoderBuilder::default().build(futures::stream::iter(vec![Ok(batch2.clone())]));
 
     // append the two streams (so they will have two different schema messages)
     let encode_stream = encode_stream1.chain(encode_stream2);
@@ -335,8 +333,7 @@ async fn test_chained_streams_data_decoder() {
     // lower level decode stream can handle multiple schema messages
     let decode_stream = FlightDataDecoder::new(encode_stream);
 
-    let decoded_data: Vec<_> =
-        decode_stream.try_collect().await.expect("encode / decode");
+    let decoded_data: Vec<_> = decode_stream.try_collect().await.expect("encode / decode");
 
     println!("decoded data: {decoded_data:#?}");
 
@@ -425,8 +422,7 @@ fn make_primitive_batch(num_rows: usize) -> RecordBatch {
         })
         .collect();
 
-    RecordBatch::try_from_iter(vec![("i", Arc::new(i) as ArrayRef), ("f", Arc::new(f))])
-        .unwrap()
+    RecordBatch::try_from_iter(vec![("i", Arc::new(i) as ArrayRef), ("f", Arc::new(f))]).unwrap()
 }
 
 /// Make a dictionary batch for testing
@@ -459,8 +455,7 @@ fn make_dictionary_batch(num_rows: usize) -> RecordBatch {
 /// match the input.
 async fn roundtrip(input: Vec<RecordBatch>) {
     let expected_output = input.clone();
-    roundtrip_with_encoder(FlightDataEncoderBuilder::default(), input, expected_output)
-        .await
+    roundtrip_with_encoder(FlightDataEncoderBuilder::default(), input, expected_output).await
 }
 
 /// Encodes input as a FlightData stream, and then decodes it using
@@ -475,8 +470,7 @@ async fn roundtrip_dictionary(input: Vec<RecordBatch>) {
         .iter()
         .map(|batch| prepare_batch_for_flight(batch, schema.clone()).unwrap())
         .collect();
-    roundtrip_with_encoder(FlightDataEncoderBuilder::default(), input, expected_output)
-        .await
+    roundtrip_with_encoder(FlightDataEncoderBuilder::default(), input, expected_output).await
 }
 
 async fn roundtrip_with_encoder(
@@ -491,8 +485,7 @@ async fn roundtrip_with_encoder(
     let encode_stream = encoder.build(input_batch_stream);
 
     let decode_stream = FlightRecordBatchStream::new_from_flight_data(encode_stream);
-    let output_batches: Vec<_> =
-        decode_stream.try_collect().await.expect("encode / decode");
+    let output_batches: Vec<_> = decode_stream.try_collect().await.expect("encode / decode");
 
     // remove any empty batches from input as they are not transmitted
     let expected_batches: Vec<_> = expected_batches
diff --git a/arrow-flight/tests/flight_sql_client_cli.rs b/arrow-flight/tests/flight_sql_client_cli.rs
index 912bcc75a9df..a28080450bc2 100644
--- a/arrow-flight/tests/flight_sql_client_cli.rs
+++ b/arrow-flight/tests/flight_sql_client_cli.rs
@@ -19,35 +19,37 @@ use std::{net::SocketAddr, pin::Pin, sync::Arc, time::Duration};
 
 use arrow_array::{ArrayRef, Int64Array, RecordBatch, StringArray};
 use arrow_flight::{
+    decode::FlightRecordBatchStream,
     flight_service_server::{FlightService, FlightServiceServer},
     sql::{
-        server::FlightSqlService, ActionBeginSavepointRequest,
-        ActionBeginSavepointResult, ActionBeginTransactionRequest,
+        server::{FlightSqlService, PeekableFlightDataStream},
+        ActionBeginSavepointRequest, ActionBeginSavepointResult, ActionBeginTransactionRequest,
         ActionBeginTransactionResult, ActionCancelQueryRequest, ActionCancelQueryResult,
         ActionClosePreparedStatementRequest, ActionCreatePreparedStatementRequest,
         ActionCreatePreparedStatementResult, ActionCreatePreparedSubstraitPlanRequest,
         ActionEndSavepointRequest, ActionEndTransactionRequest, Any, CommandGetCatalogs,
         CommandGetCrossReference, CommandGetDbSchemas, CommandGetExportedKeys,
-        CommandGetImportedKeys, CommandGetPrimaryKeys, CommandGetSqlInfo,
-        CommandGetTableTypes, CommandGetTables, CommandGetXdbcTypeInfo,
-        CommandPreparedStatementQuery, CommandPreparedStatementUpdate,
-        CommandStatementQuery, CommandStatementSubstraitPlan, CommandStatementUpdate,
-        ProstMessageExt, SqlInfo, TicketStatementQuery,
+        CommandGetImportedKeys, CommandGetPrimaryKeys, CommandGetSqlInfo, CommandGetTableTypes,
+        CommandGetTables, CommandGetXdbcTypeInfo, CommandPreparedStatementQuery,
+        CommandPreparedStatementUpdate, CommandStatementQuery, CommandStatementSubstraitPlan,
+        CommandStatementUpdate, ProstMessageExt, SqlInfo, TicketStatementQuery,
     },
     utils::batches_to_flight_data,
     Action, FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest,
-    HandshakeResponse, Ticket,
+    HandshakeResponse, IpcMessage, PutResult, SchemaAsIpc, Ticket,
 };
+use arrow_ipc::writer::IpcWriteOptions;
 use arrow_schema::{ArrowError, DataType, Field, Schema};
 use assert_cmd::Command;
-use futures::Stream;
+use bytes::Bytes;
+use futures::{Stream, StreamExt, TryStreamExt};
 use prost::Message;
 use tokio::{net::TcpListener, task::JoinHandle};
 use tonic::{Request, Response, Status, Streaming};
 
 const QUERY: &str = "SELECT * FROM table;";
 
-#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
+#[tokio::test]
 async fn test_simple() {
     let test_server = FlightSqlServiceImpl {};
     let fixture = TestFixture::new(&test_server).await;
@@ -63,6 +65,7 @@ async fn test_simple() {
             .arg(addr.ip().to_string())
             .arg("--port")
             .arg(addr.port().to_string())
+            .arg("statement-query")
             .arg(QUERY)
             .assert()
             .success()
@@ -87,10 +90,56 @@ async fn test_simple() {
     );
 }
 
+const PREPARED_QUERY: &str = "SELECT * FROM table WHERE field = $1";
+const PREPARED_STATEMENT_HANDLE: &str = "prepared_statement_handle";
+
+#[tokio::test]
+async fn test_do_put_prepared_statement() {
+    let test_server = FlightSqlServiceImpl {};
+    let fixture = TestFixture::new(&test_server).await;
+    let addr = fixture.addr;
+
+    let stdout = tokio::task::spawn_blocking(move || {
+        Command::cargo_bin("flight_sql_client")
+            .unwrap()
+            .env_clear()
+            .env("RUST_BACKTRACE", "1")
+            .env("RUST_LOG", "warn")
+            .arg("--host")
+            .arg(addr.ip().to_string())
+            .arg("--port")
+            .arg(addr.port().to_string())
+            .arg("prepared-statement-query")
+            .arg(PREPARED_QUERY)
+            .args(["-p", "$1=string"])
+            .args(["-p", "$2=64"])
+            .assert()
+            .success()
+            .get_output()
+            .stdout
+            .clone()
+    })
+    .await
+    .unwrap();
+
+    fixture.shutdown_and_wait().await;
+
+    assert_eq!(
+        std::str::from_utf8(&stdout).unwrap().trim(),
+        "+--------------+-----------+\
+        \n| field_string | field_int |\
+        \n+--------------+-----------+\
+        \n| Hello        | 42        |\
+        \n| lovely       |           |\
+        \n| FlightSQL!   | 1337      |\
+        \n+--------------+-----------+",
+    );
+}
+
 /// All tests must complete within this many seconds or else the test server is shutdown
 const DEFAULT_TIMEOUT_SECONDS: u64 = 30;
 
-#[derive(Clone)]
+#[derive(Clone, Default)]
 pub struct FlightSqlServiceImpl {}
 
 impl FlightSqlServiceImpl {
@@ -116,6 +165,58 @@ impl FlightSqlServiceImpl {
         ];
         RecordBatch::try_new(Arc::new(schema), cols)
     }
+
+    fn create_fake_prepared_stmt() -> Result<ActionCreatePreparedStatementResult, ArrowError> {
+        let handle = PREPARED_STATEMENT_HANDLE.to_string();
+        let schema = Schema::new(vec![
+            Field::new("field_string", DataType::Utf8, false),
+            Field::new("field_int", DataType::Int64, true),
+        ]);
+
+        let parameter_schema = Schema::new(vec![
+            Field::new("$1", DataType::Utf8, false),
+            Field::new("$2", DataType::Int64, true),
+        ]);
+
+        Ok(ActionCreatePreparedStatementResult {
+            prepared_statement_handle: handle.into(),
+            dataset_schema: serialize_schema(&schema)?,
+            parameter_schema: serialize_schema(&parameter_schema)?,
+        })
+    }
+
+    fn fake_flight_info(&self) -> Result<FlightInfo, ArrowError> {
+        let batch = Self::fake_result()?;
+
+        Ok(FlightInfo::new()
+            .try_with_schema(&batch.schema())
+            .expect("encoding schema")
+            .with_endpoint(
+                FlightEndpoint::new().with_ticket(Ticket::new(
+                    FetchResults {
+                        handle: String::from("part_1"),
+                    }
+                    .as_any()
+                    .encode_to_vec(),
+                )),
+            )
+            .with_endpoint(
+                FlightEndpoint::new().with_ticket(Ticket::new(
+                    FetchResults {
+                        handle: String::from("part_2"),
+                    }
+                    .as_any()
+                    .encode_to_vec(),
+                )),
+            )
+            .with_total_records(batch.num_rows() as i64)
+            .with_total_bytes(batch.get_array_memory_size() as i64)
+            .with_ordered(false))
+    }
+}
+
+fn serialize_schema(schema: &Schema) -> Result<Bytes, ArrowError> {
+    Ok(IpcMessage::try_from(SchemaAsIpc::new(schema, &IpcWriteOptions::default()))?.0)
 }
 
 #[tonic::async_trait]
@@ -164,45 +265,21 @@ impl FlightSqlService for FlightSqlServiceImpl {
     ) -> Result<Response<FlightInfo>, Status> {
         assert_eq!(query.query, QUERY);
 
-        let batch = Self::fake_result().unwrap();
-
-        let info = FlightInfo::new()
-            .try_with_schema(&batch.schema())
-            .expect("encoding schema")
-            .with_endpoint(
-                FlightEndpoint::new().with_ticket(Ticket::new(
-                    FetchResults {
-                        handle: String::from("part_1"),
-                    }
-                    .as_any()
-                    .encode_to_vec(),
-                )),
-            )
-            .with_endpoint(
-                FlightEndpoint::new().with_ticket(Ticket::new(
-                    FetchResults {
-                        handle: String::from("part_2"),
-                    }
-                    .as_any()
-                    .encode_to_vec(),
-                )),
-            )
-            .with_total_records(batch.num_rows() as i64)
-            .with_total_bytes(batch.get_array_memory_size() as i64)
-            .with_ordered(false);
-
-        let resp = Response::new(info);
+        let resp = Response::new(self.fake_flight_info().unwrap());
         Ok(resp)
     }
 
     async fn get_flight_info_prepared_statement(
         &self,
-        _cmd: CommandPreparedStatementQuery,
+        cmd: CommandPreparedStatementQuery,
         _request: Request<FlightDescriptor>,
     ) -> Result<Response<FlightInfo>, Status> {
-        Err(Status::unimplemented(
-            "get_flight_info_prepared_statement not implemented",
-        ))
+        assert_eq!(
+            cmd.prepared_statement_handle,
+            PREPARED_STATEMENT_HANDLE.as_bytes()
+        );
+        let resp = Response::new(self.fake_flight_info().unwrap());
+        Ok(resp)
     }
 
     async fn get_flight_info_substrait_plan(
@@ -426,7 +503,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
     async fn do_put_statement_update(
         &self,
         _ticket: CommandStatementUpdate,
-        _request: Request<Streaming<FlightData>>,
+        _request: Request<PeekableFlightDataStream>,
     ) -> Result<i64, Status> {
         Err(Status::unimplemented(
             "do_put_statement_update not implemented",
@@ -436,7 +513,7 @@ impl FlightSqlService for FlightSqlServiceImpl {
     async fn do_put_substrait_plan(
         &self,
         _ticket: CommandStatementSubstraitPlan,
-        _request: Request<Streaming<FlightData>>,
+        _request: Request<PeekableFlightDataStream>,
     ) -> Result<i64, Status> {
         Err(Status::unimplemented(
             "do_put_substrait_plan not implemented",
@@ -446,17 +523,36 @@ impl FlightSqlService for FlightSqlServiceImpl {
     async fn do_put_prepared_statement_query(
         &self,
         _query: CommandPreparedStatementQuery,
-        _request: Request<Streaming<FlightData>>,
+        request: Request<PeekableFlightDataStream>,
     ) -> Result<Response<<Self as FlightService>::DoPutStream>, Status> {
-        Err(Status::unimplemented(
-            "do_put_prepared_statement_query not implemented",
+        // just make sure decoding the parameters works
+        let parameters = FlightRecordBatchStream::new_from_flight_data(
+            request.into_inner().map_err(|e| e.into()),
+        )
+        .try_collect::<Vec<_>>()
+        .await?;
+
+        for (left, right) in parameters[0].schema().all_fields().iter().zip(vec![
+            Field::new("$1", DataType::Utf8, false),
+            Field::new("$2", DataType::Int64, true),
+        ]) {
+            if left.name() != right.name() || left.data_type() != right.data_type() {
+                return Err(Status::invalid_argument(format!(
+                    "Parameters did not match parameter schema\ngot {}",
+                    parameters[0].schema(),
+                )));
+            }
+        }
+
+        Ok(Response::new(
+            futures::stream::once(async { Ok(PutResult::default()) }).boxed(),
         ))
     }
 
     async fn do_put_prepared_statement_update(
         &self,
         _query: CommandPreparedStatementUpdate,
-        _request: Request<Streaming<FlightData>>,
+        _request: Request<PeekableFlightDataStream>,
     ) -> Result<i64, Status> {
         Err(Status::unimplemented(
             "do_put_prepared_statement_update not implemented",
@@ -468,9 +564,8 @@ impl FlightSqlService for FlightSqlServiceImpl {
         _query: ActionCreatePreparedStatementRequest,
         _request: Request<Action>,
     ) -> Result<ActionCreatePreparedStatementResult, Status> {
-        Err(Status::unimplemented(
-            "do_action_create_prepared_statement not implemented",
-        ))
+        Self::create_fake_prepared_stmt()
+            .map_err(|e| Status::internal(format!("Unable to serialize schema: {e}")))
     }
 
     async fn do_action_close_prepared_statement(
diff --git a/arrow-integration-test/src/datatype.rs b/arrow-integration-test/src/datatype.rs
index 47bacc7cc74b..42ac71fbbd7e 100644
--- a/arrow-integration-test/src/datatype.rs
+++ b/arrow-integration-test/src/datatype.rs
@@ -124,26 +124,16 @@ pub fn data_type_from_json(json: &serde_json::Value) -> Result<DataType> {
             }
             Some(s) if s == "duration" => match map.get("unit") {
                 Some(p) if p == "SECOND" => Ok(DataType::Duration(TimeUnit::Second)),
-                Some(p) if p == "MILLISECOND" => {
-                    Ok(DataType::Duration(TimeUnit::Millisecond))
-                }
-                Some(p) if p == "MICROSECOND" => {
-                    Ok(DataType::Duration(TimeUnit::Microsecond))
-                }
-                Some(p) if p == "NANOSECOND" => {
-                    Ok(DataType::Duration(TimeUnit::Nanosecond))
-                }
+                Some(p) if p == "MILLISECOND" => Ok(DataType::Duration(TimeUnit::Millisecond)),
+                Some(p) if p == "MICROSECOND" => Ok(DataType::Duration(TimeUnit::Microsecond)),
+                Some(p) if p == "NANOSECOND" => Ok(DataType::Duration(TimeUnit::Nanosecond)),
                 _ => Err(ArrowError::ParseError(
                     "time unit missing or invalid".to_string(),
                 )),
             },
             Some(s) if s == "interval" => match map.get("unit") {
-                Some(p) if p == "DAY_TIME" => {
-                    Ok(DataType::Interval(IntervalUnit::DayTime))
-                }
-                Some(p) if p == "YEAR_MONTH" => {
-                    Ok(DataType::Interval(IntervalUnit::YearMonth))
-                }
+                Some(p) if p == "DAY_TIME" => Ok(DataType::Interval(IntervalUnit::DayTime)),
+                Some(p) if p == "YEAR_MONTH" => Ok(DataType::Interval(IntervalUnit::YearMonth)),
                 Some(p) if p == "MONTH_DAY_NANO" => {
                     Ok(DataType::Interval(IntervalUnit::MonthDayNano))
                 }
diff --git a/arrow-integration-test/src/field.rs b/arrow-integration-test/src/field.rs
index f59314ca02db..32edc4165938 100644
--- a/arrow-integration-test/src/field.rs
+++ b/arrow-integration-test/src/field.rs
@@ -63,18 +63,17 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
                                         "Field 'metadata' must have exact two entries for each key-value map".to_string(),
                                     ));
                                 }
-                                if let (Some(k), Some(v)) =
-                                    (map.get("key"), map.get("value"))
-                                {
-                                    if let (Some(k_str), Some(v_str)) =
-                                        (k.as_str(), v.as_str())
-                                    {
+                                if let (Some(k), Some(v)) = (map.get("key"), map.get("value")) {
+                                    if let (Some(k_str), Some(v_str)) = (k.as_str(), v.as_str()) {
                                         res.insert(
                                             k_str.to_string().clone(),
                                             v_str.to_string().clone(),
                                         );
                                     } else {
-                                        return Err(ArrowError::ParseError("Field 'metadata' must have map value of string type".to_string()));
+                                        return Err(ArrowError::ParseError(
+                                            "Field 'metadata' must have map value of string type"
+                                                .to_string(),
+                                        ));
                                     }
                                 } else {
                                     return Err(ArrowError::ParseError("Field 'metadata' lacks map keys named \"key\" or \"value\"".to_string()));
@@ -115,46 +114,47 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
 
             // if data_type is a struct or list, get its children
             let data_type = match data_type {
-                DataType::List(_)
-                | DataType::LargeList(_)
-                | DataType::FixedSizeList(_, _) => match map.get("children") {
-                    Some(Value::Array(values)) => {
-                        if values.len() != 1 {
+                DataType::List(_) | DataType::LargeList(_) | DataType::FixedSizeList(_, _) => {
+                    match map.get("children") {
+                        Some(Value::Array(values)) => {
+                            if values.len() != 1 {
+                                return Err(ArrowError::ParseError(
+                                    "Field 'children' must have one element for a list data type"
+                                        .to_string(),
+                                ));
+                            }
+                            match data_type {
+                                DataType::List(_) => {
+                                    DataType::List(Arc::new(field_from_json(&values[0])?))
+                                }
+                                DataType::LargeList(_) => {
+                                    DataType::LargeList(Arc::new(field_from_json(&values[0])?))
+                                }
+                                DataType::FixedSizeList(_, int) => DataType::FixedSizeList(
+                                    Arc::new(field_from_json(&values[0])?),
+                                    int,
+                                ),
+                                _ => unreachable!(
+                                    "Data type should be a list, largelist or fixedsizelist"
+                                ),
+                            }
+                        }
+                        Some(_) => {
                             return Err(ArrowError::ParseError(
-                                "Field 'children' must have one element for a list data type".to_string(),
-                            ));
+                                "Field 'children' must be an array".to_string(),
+                            ))
                         }
-                        match data_type {
-                            DataType::List(_) => {
-                                DataType::List(Arc::new(field_from_json(&values[0])?))
-                            }
-                            DataType::LargeList(_) => DataType::LargeList(Arc::new(
-                                field_from_json(&values[0])?,
-                            )),
-                            DataType::FixedSizeList(_, int) => DataType::FixedSizeList(
-                                Arc::new(field_from_json(&values[0])?),
-                                int,
-                            ),
-                            _ => unreachable!(
-                                "Data type should be a list, largelist or fixedsizelist"
-                            ),
+                        None => {
+                            return Err(ArrowError::ParseError(
+                                "Field missing 'children' attribute".to_string(),
+                            ));
                         }
                     }
-                    Some(_) => {
-                        return Err(ArrowError::ParseError(
-                            "Field 'children' must be an array".to_string(),
-                        ))
-                    }
-                    None => {
-                        return Err(ArrowError::ParseError(
-                            "Field missing 'children' attribute".to_string(),
-                        ));
-                    }
-                },
+                }
                 DataType::Struct(_) => match map.get("children") {
-                    Some(Value::Array(values)) => DataType::Struct(
-                        values.iter().map(field_from_json).collect::<Result<_>>()?,
-                    ),
+                    Some(Value::Array(values)) => {
+                        DataType::Struct(values.iter().map(field_from_json).collect::<Result<_>>()?)
+                    }
                     Some(_) => {
                         return Err(ArrowError::ParseError(
                             "Field 'children' must be an array".to_string(),
@@ -175,17 +175,16 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
                                 DataType::Struct(map_fields) if map_fields.len() == 2 => {
                                     DataType::Map(Arc::new(child), keys_sorted)
                                 }
-                                t  => {
-                                    return Err(ArrowError::ParseError(
-                                        format!("Map children should be a struct with 2 fields, found {t:?}")
-                                    ))
+                                t => {
+                                    return Err(ArrowError::ParseError(format!(
+                                    "Map children should be a struct with 2 fields, found {t:?}"
+                                )))
                                 }
                             }
                         }
                         Some(_) => {
                             return Err(ArrowError::ParseError(
-                                "Field 'children' must be an array with 1 element"
-                                    .to_string(),
+                                "Field 'children' must be an array with 1 element".to_string(),
                             ))
                         }
                         None => {
@@ -200,9 +199,7 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
                         let fields = fields
                             .iter()
                             .zip(values)
-                            .map(|((id, _), value)| {
-                                Ok((id, Arc::new(field_from_json(value)?)))
-                            })
+                            .map(|((id, _), value)| Ok((id, Arc::new(field_from_json(value)?))))
                             .collect::<Result<_>>()?;
 
                         DataType::Union(fields, mode)
@@ -255,8 +252,7 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
                 _ => data_type,
             };
 
-            let mut field =
-                Field::new_dict(name, data_type, nullable, dict_id, dict_is_ordered);
+            let mut field = Field::new_dict(name, data_type, nullable, dict_id, dict_is_ordered);
             field.set_metadata(metadata);
             Ok(field)
         }
@@ -269,9 +265,7 @@ pub fn field_from_json(json: &serde_json::Value) -> Result<Field> {
 /// Generate a JSON representation of the `Field`.
 pub fn field_to_json(field: &Field) -> serde_json::Value {
     let children: Vec<serde_json::Value> = match field.data_type() {
-        DataType::Struct(fields) => {
-            fields.iter().map(|x| field_to_json(x.as_ref())).collect()
-        }
+        DataType::Struct(fields) => fields.iter().map(|x| field_to_json(x.as_ref())).collect(),
         DataType::List(field)
         | DataType::LargeList(field)
         | DataType::FixedSizeList(field, _)
diff --git a/arrow-integration-test/src/lib.rs b/arrow-integration-test/src/lib.rs
index 04bbcf3f6f23..7b797aa07061 100644
--- a/arrow-integration-test/src/lib.rs
+++ b/arrow-integration-test/src/lib.rs
@@ -183,7 +183,8 @@ impl ArrowJson {
                         return Ok(false);
                     }
                 }
-                _ => return Ok(false),
+                Some(Err(e)) => return Err(e),
+                None => return Ok(false),
             }
         }
 
@@ -260,9 +261,7 @@ impl ArrowJsonField {
                 true
             }
             Err(e) => {
-                eprintln!(
-                    "Encountered error while converting JSON field to Arrow field: {e:?}"
-                );
+                eprintln!("Encountered error while converting JSON field to Arrow field: {e:?}");
                 false
             }
         }
@@ -272,8 +271,8 @@ impl ArrowJsonField {
     /// TODO: convert to use an Into
     fn to_arrow_field(&self) -> Result<Field> {
         // a bit regressive, but we have to convert the field to JSON in order to convert it
-        let field = serde_json::to_value(self)
-            .map_err(|error| ArrowError::JsonError(error.to_string()))?;
+        let field =
+            serde_json::to_value(self).map_err(|error| ArrowError::JsonError(error.to_string()))?;
         field_from_json(&field)
     }
 }
@@ -388,12 +387,9 @@ pub fn array_from_json(
                 match is_valid {
                     1 => b.append_value(match value {
                         Value::Number(n) => n.as_i64().unwrap(),
-                        Value::String(s) => {
-                            s.parse().expect("Unable to parse string as i64")
-                        }
+                        Value::String(s) => s.parse().expect("Unable to parse string as i64"),
                         Value::Object(ref map)
-                            if map.contains_key("days")
-                                && map.contains_key("milliseconds") =>
+                            if map.contains_key("days") && map.contains_key("milliseconds") =>
                         {
                             match field.data_type() {
                                 DataType::Interval(IntervalUnit::DayTime) => {
@@ -403,23 +399,19 @@ pub fn array_from_json(
                                     match (days, milliseconds) {
                                         (Value::Number(d), Value::Number(m)) => {
                                             let mut bytes = [0_u8; 8];
-                                            let m = (m.as_i64().unwrap() as i32)
-                                                .to_le_bytes();
-                                            let d = (d.as_i64().unwrap() as i32)
-                                                .to_le_bytes();
+                                            let m = (m.as_i64().unwrap() as i32).to_le_bytes();
+                                            let d = (d.as_i64().unwrap() as i32).to_le_bytes();
 
                                             let c = [d, m].concat();
                                             bytes.copy_from_slice(c.as_slice());
                                             i64::from_le_bytes(bytes)
                                         }
-                                        _ => panic!(
-                                            "Unable to parse {value:?} as interval daytime"
-                                        ),
+                                        _ => {
+                                            panic!("Unable to parse {value:?} as interval daytime")
+                                        }
                                     }
                                 }
-                                _ => panic!(
-                                    "Unable to parse {value:?} as interval daytime"
-                                ),
+                                _ => panic!("Unable to parse {value:?} as interval daytime"),
                             }
                         }
                         _ => panic!("Unable to parse {value:?} as number"),
@@ -498,9 +490,7 @@ pub fn array_from_json(
                                     .expect("Unable to parse string as u64"),
                             )
                         } else if value.is_number() {
-                            b.append_value(
-                                value.as_u64().expect("Unable to read number as u64"),
-                            )
+                            b.append_value(value.as_u64().expect("Unable to read number as u64"))
                         } else {
                             panic!("Unable to parse value {value:?} as u64")
                         }
@@ -534,11 +524,10 @@ pub fn array_from_json(
                                     let months = months.as_i64().unwrap() as i32;
                                     let days = days.as_i64().unwrap() as i32;
                                     let nanoseconds = nanoseconds.as_i64().unwrap();
-                                    let months_days_ns: i128 = ((nanoseconds as i128)
-                                        & 0xFFFFFFFFFFFFFFFF)
-                                        << 64
-                                        | ((days as i128) & 0xFFFFFFFF) << 32
-                                        | ((months as i128) & 0xFFFFFFFF);
+                                    let months_days_ns: i128 =
+                                        ((nanoseconds as i128) & 0xFFFFFFFFFFFFFFFF) << 64
+                                            | ((days as i128) & 0xFFFFFFFF) << 32
+                                            | ((months as i128) & 0xFFFFFFFF);
                                     months_days_ns
                                 }
                                 (_, _, _) => {
@@ -677,11 +666,8 @@ pub fn array_from_json(
         DataType::List(child_field) => {
             let null_buf = create_null_buf(&json_col);
             let children = json_col.children.clone().unwrap();
-            let child_array = array_from_json(
-                child_field,
-                children.get(0).unwrap().clone(),
-                dictionaries,
-            )?;
+            let child_array =
+                array_from_json(child_field, children.get(0).unwrap().clone(), dictionaries)?;
             let offsets: Vec<i32> = json_col
                 .offset
                 .unwrap()
@@ -701,11 +687,8 @@ pub fn array_from_json(
         DataType::LargeList(child_field) => {
             let null_buf = create_null_buf(&json_col);
             let children = json_col.children.clone().unwrap();
-            let child_array = array_from_json(
-                child_field,
-                children.get(0).unwrap().clone(),
-                dictionaries,
-            )?;
+            let child_array =
+                array_from_json(child_field, children.get(0).unwrap().clone(), dictionaries)?;
             let offsets: Vec<i64> = json_col
                 .offset
                 .unwrap()
@@ -728,11 +711,8 @@ pub fn array_from_json(
         }
         DataType::FixedSizeList(child_field, _) => {
             let children = json_col.children.clone().unwrap();
-            let child_array = array_from_json(
-                child_field,
-                children.get(0).unwrap().clone(),
-                dictionaries,
-            )?;
+            let child_array =
+                array_from_json(child_field, children.get(0).unwrap().clone(), dictionaries)?;
             let null_buf = create_null_buf(&json_col);
             let list_data = ArrayData::builder(field.data_type().clone())
                 .len(json_col.count)
@@ -759,9 +739,7 @@ pub fn array_from_json(
         }
         DataType::Dictionary(key_type, value_type) => {
             let dict_id = field.dict_id().ok_or_else(|| {
-                ArrowError::JsonError(format!(
-                    "Unable to find dict_id for field {field:?}"
-                ))
+                ArrowError::JsonError(format!("Unable to find dict_id for field {field:?}"))
             })?;
             // find dictionary
             let dictionary = dictionaries
@@ -822,8 +800,7 @@ pub fn array_from_json(
                         } else {
                             [255_u8; 32]
                         };
-                        bytes[0..integer_bytes.len()]
-                            .copy_from_slice(integer_bytes.as_slice());
+                        bytes[0..integer_bytes.len()].copy_from_slice(integer_bytes.as_slice());
                         b.append_value(i256::from_le_bytes(bytes));
                     }
                     _ => b.append_null(),
@@ -836,11 +813,8 @@ pub fn array_from_json(
         DataType::Map(child_field, _) => {
             let null_buf = create_null_buf(&json_col);
             let children = json_col.children.clone().unwrap();
-            let child_array = array_from_json(
-                child_field,
-                children.get(0).unwrap().clone(),
-                dictionaries,
-            )?;
+            let child_array =
+                array_from_json(child_field, children.get(0).unwrap().clone(), dictionaries)?;
             let offsets: Vec<i32> = json_col
                 .offset
                 .unwrap()
@@ -945,9 +919,7 @@ pub fn dictionary_array_from_json(
                 .unwrap();
 
             let array = match dict_key {
-                DataType::Int8 => {
-                    Arc::new(Int8DictionaryArray::from(dict_data)) as ArrayRef
-                }
+                DataType::Int8 => Arc::new(Int8DictionaryArray::from(dict_data)) as ArrayRef,
                 DataType::Int16 => Arc::new(Int16DictionaryArray::from(dict_data)),
                 DataType::Int32 => Arc::new(Int32DictionaryArray::from(dict_data)),
                 DataType::Int64 => Arc::new(Int64DictionaryArray::from(dict_data)),
@@ -1098,11 +1070,7 @@ mod tests {
             Field::new("c3", DataType::Utf8, true),
             Field::new(
                 "c4",
-                DataType::List(Arc::new(Field::new(
-                    "custom_item",
-                    DataType::Int32,
-                    false,
-                ))),
+                DataType::List(Arc::new(Field::new("custom_item", DataType::Int32, false))),
                 true,
             ),
         ]);
@@ -1198,10 +1166,8 @@ mod tests {
             ),
         ]);
 
-        let bools_with_metadata_map =
-            BooleanArray::from(vec![Some(true), None, Some(false)]);
-        let bools_with_metadata_vec =
-            BooleanArray::from(vec![Some(true), None, Some(false)]);
+        let bools_with_metadata_map = BooleanArray::from(vec![Some(true), None, Some(false)]);
+        let bools_with_metadata_vec = BooleanArray::from(vec![Some(true), None, Some(false)]);
         let bools = BooleanArray::from(vec![Some(true), None, Some(false)]);
         let int8s = Int8Array::from(vec![Some(1), None, Some(3)]);
         let int16s = Int16Array::from(vec![Some(1), None, Some(3)]);
@@ -1219,39 +1185,24 @@ mod tests {
             Some(29923997007884),
             Some(30612271819236),
         ]);
-        let time_secs =
-            Time32SecondArray::from(vec![Some(27974), Some(78592), Some(43207)]);
-        let time_millis = Time32MillisecondArray::from(vec![
-            Some(6613125),
-            Some(74667230),
-            Some(52260079),
-        ]);
-        let time_micros =
-            Time64MicrosecondArray::from(vec![Some(62522958593), None, None]);
-        let time_nanos = Time64NanosecondArray::from(vec![
-            Some(73380123595985),
-            None,
-            Some(16584393546415),
-        ]);
+        let time_secs = Time32SecondArray::from(vec![Some(27974), Some(78592), Some(43207)]);
+        let time_millis =
+            Time32MillisecondArray::from(vec![Some(6613125), Some(74667230), Some(52260079)]);
+        let time_micros = Time64MicrosecondArray::from(vec![Some(62522958593), None, None]);
+        let time_nanos =
+            Time64NanosecondArray::from(vec![Some(73380123595985), None, Some(16584393546415)]);
         let ts_secs = TimestampSecondArray::from(vec![None, Some(193438817552), None]);
-        let ts_millis = TimestampMillisecondArray::from(vec![
-            None,
-            Some(38606916383008),
-            Some(58113709376587),
-        ]);
+        let ts_millis =
+            TimestampMillisecondArray::from(vec![None, Some(38606916383008), Some(58113709376587)]);
         let ts_micros = TimestampMicrosecondArray::from(vec![None, None, None]);
-        let ts_nanos =
-            TimestampNanosecondArray::from(vec![None, None, Some(-6473623571954960143)]);
+        let ts_nanos = TimestampNanosecondArray::from(vec![None, None, Some(-6473623571954960143)]);
         let ts_secs_tz = TimestampSecondArray::from(vec![None, Some(193438817552), None])
             .with_timezone_opt(secs_tz);
-        let ts_millis_tz = TimestampMillisecondArray::from(vec![
-            None,
-            Some(38606916383008),
-            Some(58113709376587),
-        ])
-        .with_timezone_opt(millis_tz);
-        let ts_micros_tz = TimestampMicrosecondArray::from(vec![None, None, None])
-            .with_timezone_opt(micros_tz);
+        let ts_millis_tz =
+            TimestampMillisecondArray::from(vec![None, Some(38606916383008), Some(58113709376587)])
+                .with_timezone_opt(millis_tz);
+        let ts_micros_tz =
+            TimestampMicrosecondArray::from(vec![None, None, None]).with_timezone_opt(micros_tz);
         let ts_nanos_tz =
             TimestampNanosecondArray::from(vec![None, None, Some(-6473623571954960143)])
                 .with_timezone_opt(nanos_tz);
@@ -1259,8 +1210,7 @@ mod tests {
 
         let value_data = Int32Array::from(vec![None, Some(2), None, None]);
         let value_offsets = Buffer::from_slice_ref([0, 3, 4, 4]);
-        let list_data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
             .add_buffer(value_offsets)
diff --git a/arrow-integration-test/src/schema.rs b/arrow-integration-test/src/schema.rs
index 6e143c2838d9..b5f6c5e86b38 100644
--- a/arrow-integration-test/src/schema.rs
+++ b/arrow-integration-test/src/schema.rs
@@ -65,11 +65,9 @@ fn from_metadata(json: &serde_json::Value) -> Result<HashMap<String, String>> {
     match json {
         Value::Array(_) => {
             let mut hashmap = HashMap::new();
-            let values: Vec<MetadataKeyValue> = serde_json::from_value(json.clone())
-                .map_err(|_| {
-                    ArrowError::JsonError(
-                        "Unable to parse object into key-value pair".to_string(),
-                    )
+            let values: Vec<MetadataKeyValue> =
+                serde_json::from_value(json.clone()).map_err(|_| {
+                    ArrowError::JsonError("Unable to parse object into key-value pair".to_string())
                 })?;
             for meta in values {
                 hashmap.insert(meta.key.clone(), meta.value);
@@ -110,11 +108,10 @@ mod tests {
     #[test]
     fn schema_json() {
         // Add some custom metadata
-        let metadata: HashMap<String, String> =
-            [("Key".to_string(), "Value".to_string())]
-                .iter()
-                .cloned()
-                .collect();
+        let metadata: HashMap<String, String> = [("Key".to_string(), "Value".to_string())]
+            .iter()
+            .cloned()
+            .collect();
 
         let schema = Schema::new_with_metadata(
             vec![
@@ -140,10 +137,7 @@ mod tests {
                 ),
                 Field::new(
                     "c17",
-                    DataType::Timestamp(
-                        TimeUnit::Microsecond,
-                        Some("Africa/Johannesburg".into()),
-                    ),
+                    DataType::Timestamp(TimeUnit::Microsecond, Some("Africa/Johannesburg".into())),
                     false,
                 ),
                 Field::new(
@@ -197,10 +191,7 @@ mod tests {
                 Field::new("c32", DataType::Duration(TimeUnit::Nanosecond), false),
                 Field::new_dict(
                     "c33",
-                    DataType::Dictionary(
-                        Box::new(DataType::Int32),
-                        Box::new(DataType::Utf8),
-                    ),
+                    DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
                     true,
                     123,
                     true,
diff --git a/arrow-integration-testing/Cargo.toml b/arrow-integration-testing/Cargo.toml
index 7f78cf50a9d7..c29860f09d64 100644
--- a/arrow-integration-testing/Cargo.toml
+++ b/arrow-integration-testing/Cargo.toml
@@ -27,11 +27,14 @@ edition = { workspace = true }
 publish = false
 rust-version = { workspace = true }
 
+[lib]
+crate-type = ["lib", "cdylib"]
+
 [features]
 logging = ["tracing-subscriber"]
 
 [dependencies]
-arrow = { path = "../arrow", default-features = false, features = ["test_utils", "ipc", "ipc_compression", "json"] }
+arrow = { path = "../arrow", default-features = false, features = ["test_utils", "ipc", "ipc_compression", "json", "ffi"] }
 arrow-flight = { path = "../arrow-flight", default-features = false }
 arrow-buffer = { path = "../arrow-buffer", default-features = false }
 arrow-integration-test = { path = "../arrow-integration-test", default-features = false }
@@ -39,11 +42,11 @@ async-trait = { version = "0.1.41", default-features = false }
 clap = { version = "4", default-features = false, features = ["std", "derive", "help", "error-context", "usage"] }
 futures = { version = "0.3", default-features = false }
 hex = { version = "0.4", default-features = false, features = ["std"] }
-prost = { version = "0.11", default-features = false }
+prost = { version = "0.12", default-features = false }
 serde = { version = "1.0", default-features = false, features = ["rc", "derive"] }
 serde_json = { version = "1.0", default-features = false, features = ["std"] }
 tokio = { version = "1.0", default-features = false }
-tonic = { version = "0.9", default-features = false }
+tonic = { version = "0.10", default-features = false }
 tracing-subscriber = { version = "0.3.1", default-features = false, features = ["fmt"], optional = true }
 num = { version = "0.4", default-features = false, features = ["std"] }
 flate2 = { version = "1", default-features = false, features = ["rust_backend"] }
diff --git a/arrow-integration-testing/README.md b/arrow-integration-testing/README.md
index e82591e6b139..dcf39c27fbc5 100644
--- a/arrow-integration-testing/README.md
+++ b/arrow-integration-testing/README.md
@@ -48,7 +48,7 @@ ln -s <path_to_arrow_rs> arrow/rust
 
 ```shell
 cd arrow
-pip install -e dev/archery[docker]
+pip install -e dev/archery[integration]
 ```
 
 ### Build the C++ binaries:
diff --git a/arrow-integration-testing/src/bin/arrow-json-integration-test.rs b/arrow-integration-testing/src/bin/arrow-json-integration-test.rs
index 2c36e8d9b8ae..9f1abb16a668 100644
--- a/arrow-integration-testing/src/bin/arrow-json-integration-test.rs
+++ b/arrow-integration-testing/src/bin/arrow-json-integration-test.rs
@@ -15,16 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::datatypes::{DataType, Field};
-use arrow::datatypes::{Fields, Schema};
 use arrow::error::{ArrowError, Result};
 use arrow::ipc::reader::FileReader;
 use arrow::ipc::writer::FileWriter;
 use arrow_integration_test::*;
-use arrow_integration_testing::read_json_file;
+use arrow_integration_testing::{canonicalize_schema, open_json_file};
 use clap::Parser;
 use std::fs::File;
-use std::sync::Arc;
 
 #[derive(clap::ValueEnum, Debug, Clone)]
 #[clap(rename_all = "SCREAMING_SNAKE_CASE")]
@@ -66,12 +63,12 @@ fn json_to_arrow(json_name: &str, arrow_name: &str, verbose: bool) -> Result<()>
         eprintln!("Converting {json_name} to {arrow_name}");
     }
 
-    let json_file = read_json_file(json_name)?;
+    let json_file = open_json_file(json_name)?;
 
     let arrow_file = File::create(arrow_name)?;
     let mut writer = FileWriter::try_new(arrow_file, &json_file.schema)?;
 
-    for b in json_file.batches {
+    for b in json_file.read_batches()? {
         writer.write(&b)?;
     }
 
@@ -113,55 +110,13 @@ fn arrow_to_json(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()>
     Ok(())
 }
 
-fn canonicalize_schema(schema: &Schema) -> Schema {
-    let fields = schema
-        .fields()
-        .iter()
-        .map(|field| match field.data_type() {
-            DataType::Map(child_field, sorted) => match child_field.data_type() {
-                DataType::Struct(fields) if fields.len() == 2 => {
-                    let first_field = fields.get(0).unwrap();
-                    let key_field = Arc::new(Field::new(
-                        "key",
-                        first_field.data_type().clone(),
-                        first_field.is_nullable(),
-                    ));
-                    let second_field = fields.get(1).unwrap();
-                    let value_field = Arc::new(Field::new(
-                        "value",
-                        second_field.data_type().clone(),
-                        second_field.is_nullable(),
-                    ));
-
-                    let fields = Fields::from([key_field, value_field]);
-                    let struct_type = DataType::Struct(fields);
-                    let child_field =
-                        Field::new("entries", struct_type, child_field.is_nullable());
-
-                    Arc::new(Field::new(
-                        field.name().as_str(),
-                        DataType::Map(Arc::new(child_field), *sorted),
-                        field.is_nullable(),
-                    ))
-                }
-                _ => panic!(
-                    "The child field of Map type should be Struct type with 2 fields."
-                ),
-            },
-            _ => field.clone(),
-        })
-        .collect::<Fields>();
-
-    Schema::new(fields).with_metadata(schema.metadata().clone())
-}
-
 fn validate(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()> {
     if verbose {
         eprintln!("Validating {arrow_name} and {json_name}");
     }
 
     // open JSON file
-    let json_file = read_json_file(json_name)?;
+    let json_file = open_json_file(json_name)?;
 
     // open Arrow file
     let arrow_file = File::open(arrow_name)?;
@@ -176,7 +131,7 @@ fn validate(arrow_name: &str, json_name: &str, verbose: bool) -> Result<()> {
         )));
     }
 
-    let json_batches = &json_file.batches;
+    let json_batches = json_file.read_batches()?;
 
     // compare number of batches
     assert!(
diff --git a/arrow-integration-testing/src/bin/flight-test-integration-client.rs b/arrow-integration-testing/src/bin/flight-test-integration-client.rs
index d46b4fac759e..b8bbb952837b 100644
--- a/arrow-integration-testing/src/bin/flight-test-integration-client.rs
+++ b/arrow-integration-testing/src/bin/flight-test-integration-client.rs
@@ -62,8 +62,7 @@ async fn main() -> Result {
         }
         None => {
             let path = args.path.expect("No path is given");
-            flight_client_scenarios::integration_test::run_scenario(&host, port, &path)
-                .await?;
+            flight_client_scenarios::integration_test::run_scenario(&host, port, &path).await?;
         }
     }
 
diff --git a/arrow-integration-testing/src/flight_client_scenarios/auth_basic_proto.rs b/arrow-integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
index 9f66abf50106..376e31e15553 100644
--- a/arrow-integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
+++ b/arrow-integration-testing/src/flight_client_scenarios/auth_basic_proto.rs
@@ -17,9 +17,7 @@
 
 use crate::{AUTH_PASSWORD, AUTH_USERNAME};
 
-use arrow_flight::{
-    flight_service_client::FlightServiceClient, BasicAuth, HandshakeRequest,
-};
+use arrow_flight::{flight_service_client::FlightServiceClient, BasicAuth, HandshakeRequest};
 use futures::{stream, StreamExt};
 use prost::Message;
 use tonic::{metadata::MetadataValue, Request, Status};
@@ -78,11 +76,7 @@ pub async fn run_scenario(host: &str, port: u16) -> Result {
     Ok(())
 }
 
-async fn authenticate(
-    client: &mut Client,
-    username: &str,
-    password: &str,
-) -> Result<String> {
+async fn authenticate(client: &mut Client, username: &str, password: &str) -> Result<String> {
     let auth = BasicAuth {
         username: username.into(),
         password: password.into(),
diff --git a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
index a55c2dec0580..c6b5a72ca6e2 100644
--- a/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
+++ b/arrow-integration-testing/src/flight_client_scenarios/integration_test.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::{read_json_file, ArrowFile};
+use crate::open_json_file;
 use std::collections::HashMap;
 
 use arrow::{
@@ -27,8 +27,7 @@ use arrow::{
 };
 use arrow_flight::{
     flight_descriptor::DescriptorType, flight_service_client::FlightServiceClient,
-    utils::flight_data_to_arrow_batch, FlightData, FlightDescriptor, Location,
-    SchemaAsIpc, Ticket,
+    utils::flight_data_to_arrow_batch, FlightData, FlightDescriptor, Location, SchemaAsIpc, Ticket,
 };
 use futures::{channel::mpsc, sink::SinkExt, stream, StreamExt};
 use tonic::{Request, Streaming};
@@ -46,23 +45,16 @@ pub async fn run_scenario(host: &str, port: u16, path: &str) -> Result {
 
     let client = FlightServiceClient::connect(url).await?;
 
-    let ArrowFile {
-        schema, batches, ..
-    } = read_json_file(path)?;
+    let json_file = open_json_file(path)?;
 
-    let schema = Arc::new(schema);
+    let batches = json_file.read_batches()?;
+    let schema = Arc::new(json_file.schema);
 
     let mut descriptor = FlightDescriptor::default();
     descriptor.set_type(DescriptorType::Path);
     descriptor.path = vec![path.to_string()];
 
-    upload_data(
-        client.clone(),
-        schema.clone(),
-        descriptor.clone(),
-        batches.clone(),
-    )
-    .await?;
+    upload_data(client.clone(), schema, descriptor.clone(), batches.clone()).await?;
     verify_data(client, descriptor, &batches).await?;
 
     Ok(())
@@ -203,19 +195,16 @@ async fn consume_flight_location(
     let mut dictionaries_by_id = HashMap::new();
 
     for (counter, expected_batch) in expected_data.iter().enumerate() {
-        let data = receive_batch_flight_data(
-            &mut resp,
-            actual_schema.clone(),
-            &mut dictionaries_by_id,
-        )
-        .await
-        .unwrap_or_else(|| {
-            panic!(
-                "Got fewer batches than expected, received so far: {} expected: {}",
-                counter,
-                expected_data.len(),
-            )
-        });
+        let data =
+            receive_batch_flight_data(&mut resp, actual_schema.clone(), &mut dictionaries_by_id)
+                .await
+                .unwrap_or_else(|| {
+                    panic!(
+                        "Got fewer batches than expected, received so far: {} expected: {}",
+                        counter,
+                        expected_data.len(),
+                    )
+                });
 
         let metadata = counter.to_string().into_bytes();
         assert_eq!(metadata, data.app_metadata);
@@ -250,8 +239,8 @@ async fn consume_flight_location(
 
 async fn receive_schema_flight_data(resp: &mut Streaming<FlightData>) -> Option<Schema> {
     let data = resp.next().await?.ok()?;
-    let message = arrow::ipc::root_as_message(&data.data_header[..])
-        .expect("Error parsing message");
+    let message =
+        arrow::ipc::root_as_message(&data.data_header[..]).expect("Error parsing message");
 
     // message header is a Schema, so read it
     let ipc_schema: ipc::Schema = message
@@ -268,8 +257,8 @@ async fn receive_batch_flight_data(
     dictionaries_by_id: &mut HashMap<i64, ArrayRef>,
 ) -> Option<FlightData> {
     let mut data = resp.next().await?.ok()?;
-    let mut message = arrow::ipc::root_as_message(&data.data_header[..])
-        .expect("Error parsing first message");
+    let mut message =
+        arrow::ipc::root_as_message(&data.data_header[..]).expect("Error parsing first message");
 
     while message.header_type() == ipc::MessageHeader::DictionaryBatch {
         reader::read_dictionary(
@@ -284,8 +273,8 @@ async fn receive_batch_flight_data(
         .expect("Error reading dictionary");
 
         data = resp.next().await?.ok()?;
-        message = arrow::ipc::root_as_message(&data.data_header[..])
-            .expect("Error parsing message");
+        message =
+            arrow::ipc::root_as_message(&data.data_header[..]).expect("Error parsing message");
     }
 
     Some(data)
diff --git a/arrow-integration-testing/src/flight_client_scenarios/middleware.rs b/arrow-integration-testing/src/flight_client_scenarios/middleware.rs
index 773919ff72af..3b71edf446a3 100644
--- a/arrow-integration-testing/src/flight_client_scenarios/middleware.rs
+++ b/arrow-integration-testing/src/flight_client_scenarios/middleware.rs
@@ -16,8 +16,7 @@
 // under the License.
 
 use arrow_flight::{
-    flight_descriptor::DescriptorType, flight_service_client::FlightServiceClient,
-    FlightDescriptor,
+    flight_descriptor::DescriptorType, flight_service_client::FlightServiceClient, FlightDescriptor,
 };
 use prost::bytes::Bytes;
 use tonic::{Request, Status};
diff --git a/arrow-integration-testing/src/flight_server_scenarios/auth_basic_proto.rs b/arrow-integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
index 72d47b1391ee..ff4fc12f2523 100644
--- a/arrow-integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
+++ b/arrow-integration-testing/src/flight_server_scenarios/auth_basic_proto.rs
@@ -19,15 +19,13 @@ use std::pin::Pin;
 use std::sync::Arc;
 
 use arrow_flight::{
-    flight_service_server::FlightService, flight_service_server::FlightServiceServer,
-    Action, ActionType, BasicAuth, Criteria, Empty, FlightData, FlightDescriptor,
-    FlightInfo, HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket,
+    flight_service_server::FlightService, flight_service_server::FlightServiceServer, Action,
+    ActionType, BasicAuth, Criteria, Empty, FlightData, FlightDescriptor, FlightInfo,
+    HandshakeRequest, HandshakeResponse, PutResult, SchemaResult, Ticket,
 };
 use futures::{channel::mpsc, sink::SinkExt, Stream, StreamExt};
 use tokio::sync::Mutex;
-use tonic::{
-    metadata::MetadataMap, transport::Server, Request, Response, Status, Streaming,
-};
+use tonic::{metadata::MetadataMap, transport::Server, Request, Response, Status, Streaming};
 type TonicStream<T> = Pin<Box<dyn Stream<Item = T> + Send + Sync + 'static>>;
 
 type Error = Box<dyn std::error::Error + Send + Sync + 'static>;
@@ -63,10 +61,7 @@ pub struct AuthBasicProtoScenarioImpl {
 }
 
 impl AuthBasicProtoScenarioImpl {
-    async fn check_auth(
-        &self,
-        metadata: &MetadataMap,
-    ) -> Result<GrpcServerCallContext, Status> {
+    async fn check_auth(&self, metadata: &MetadataMap) -> Result<GrpcServerCallContext, Status> {
         let token = metadata
             .get_bin("auth-token-bin")
             .and_then(|v| v.to_bytes().ok())
@@ -74,10 +69,7 @@ impl AuthBasicProtoScenarioImpl {
         self.is_valid(token).await
     }
 
-    async fn is_valid(
-        &self,
-        token: Option<String>,
-    ) -> Result<GrpcServerCallContext, Status> {
+    async fn is_valid(&self, token: Option<String>) -> Result<GrpcServerCallContext, Status> {
         match token {
             Some(t) if t == *self.username => Ok(GrpcServerCallContext {
                 peer_identity: self.username.to_string(),
@@ -142,12 +134,10 @@ impl FlightService for AuthBasicProtoScenarioImpl {
                         let req = req.expect("Error reading handshake request");
                         let HandshakeRequest { payload, .. } = req;
 
-                        let auth = BasicAuth::decode(&*payload)
-                            .expect("Error parsing handshake request");
+                        let auth =
+                            BasicAuth::decode(&*payload).expect("Error parsing handshake request");
 
-                        let resp = if *auth.username == *username
-                            && *auth.password == *password
-                        {
+                        let resp = if *auth.username == *username && *auth.password == *password {
                             Ok(HandshakeResponse {
                                 payload: username.as_bytes().to_vec().into(),
                                 ..HandshakeResponse::default()
diff --git a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
index e2c4cb5d88f3..2011031e921a 100644
--- a/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
+++ b/arrow-integration-testing/src/flight_server_scenarios/integration_test.rs
@@ -30,9 +30,9 @@ use arrow::{
 };
 use arrow_flight::{
     flight_descriptor::DescriptorType, flight_service_server::FlightService,
-    flight_service_server::FlightServiceServer, Action, ActionType, Criteria, Empty,
-    FlightData, FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest,
-    HandshakeResponse, IpcMessage, PutResult, SchemaAsIpc, SchemaResult, Ticket,
+    flight_service_server::FlightServiceServer, Action, ActionType, Criteria, Empty, FlightData,
+    FlightDescriptor, FlightEndpoint, FlightInfo, HandshakeRequest, HandshakeResponse, IpcMessage,
+    PutResult, SchemaAsIpc, SchemaResult, Ticket,
 };
 use futures::{channel::mpsc, sink::SinkExt, Stream, StreamExt};
 use std::convert::TryInto;
@@ -113,8 +113,7 @@ impl FlightService for FlightServiceImpl {
 
         let options = arrow::ipc::writer::IpcWriteOptions::default();
 
-        let schema =
-            std::iter::once(Ok(SchemaAsIpc::new(&flight.schema, &options).into()));
+        let schema = std::iter::once(Ok(SchemaAsIpc::new(&flight.schema, &options).into()));
 
         let batches = flight
             .chunks
@@ -126,12 +125,9 @@ impl FlightService for FlightServiceImpl {
 
                 let (encoded_dictionaries, encoded_batch) = data_gen
                     .encoded_batch(batch, &mut dictionary_tracker, &options)
-                    .expect(
-                        "DictionaryTracker configured above to not error on replacement",
-                    );
+                    .expect("DictionaryTracker configured above to not error on replacement");
 
-                let dictionary_flight_data =
-                    encoded_dictionaries.into_iter().map(Into::into);
+                let dictionary_flight_data = encoded_dictionaries.into_iter().map(Into::into);
                 let mut batch_flight_data: FlightData = encoded_batch.into();
 
                 // Only the record batch's FlightData gets app_metadata
@@ -182,8 +178,7 @@ impl FlightService for FlightServiceImpl {
 
                 let endpoint = self.endpoint_from_path(&path[0]);
 
-                let total_records: usize =
-                    flight.chunks.iter().map(|chunk| chunk.num_rows()).sum();
+                let total_records: usize = flight.chunks.iter().map(|chunk| chunk.num_rows()).sum();
 
                 let options = arrow::ipc::writer::IpcWriteOptions::default();
                 let message = SchemaAsIpc::new(&flight.schema, &options)
@@ -224,8 +219,7 @@ impl FlightService for FlightServiceImpl {
             .clone()
             .ok_or_else(|| Status::invalid_argument("Must have a descriptor"))?;
 
-        if descriptor.r#type != DescriptorType::Path as i32 || descriptor.path.is_empty()
-        {
+        if descriptor.r#type != DescriptorType::Path as i32 || descriptor.path.is_empty() {
             return Err(Status::invalid_argument("Must specify a path"));
         }
 
@@ -297,9 +291,9 @@ async fn record_batch_from_message(
     schema_ref: SchemaRef,
     dictionaries_by_id: &HashMap<i64, ArrayRef>,
 ) -> Result<RecordBatch, Status> {
-    let ipc_batch = message.header_as_record_batch().ok_or_else(|| {
-        Status::internal("Could not parse message header as record batch")
-    })?;
+    let ipc_batch = message
+        .header_as_record_batch()
+        .ok_or_else(|| Status::internal("Could not parse message header as record batch"))?;
 
     let arrow_batch_result = reader::read_record_batch(
         data_body,
@@ -320,9 +314,9 @@ async fn dictionary_from_message(
     schema_ref: SchemaRef,
     dictionaries_by_id: &mut HashMap<i64, ArrayRef>,
 ) -> Result<(), Status> {
-    let ipc_batch = message.header_as_dictionary_batch().ok_or_else(|| {
-        Status::internal("Could not parse message header as dictionary batch")
-    })?;
+    let ipc_batch = message
+        .header_as_dictionary_batch()
+        .ok_or_else(|| Status::internal("Could not parse message header as dictionary batch"))?;
 
     let dictionary_batch_result = reader::read_dictionary(
         data_body,
diff --git a/arrow-integration-testing/src/flight_server_scenarios/middleware.rs b/arrow-integration-testing/src/flight_server_scenarios/middleware.rs
index 9b1c84b57119..68d871b528a6 100644
--- a/arrow-integration-testing/src/flight_server_scenarios/middleware.rs
+++ b/arrow-integration-testing/src/flight_server_scenarios/middleware.rs
@@ -19,9 +19,9 @@ use std::pin::Pin;
 
 use arrow_flight::{
     flight_descriptor::DescriptorType, flight_service_server::FlightService,
-    flight_service_server::FlightServiceServer, Action, ActionType, Criteria, Empty,
-    FlightData, FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse,
-    PutResult, SchemaResult, Ticket,
+    flight_service_server::FlightServiceServer, Action, ActionType, Criteria, Empty, FlightData,
+    FlightDescriptor, FlightInfo, HandshakeRequest, HandshakeResponse, PutResult, SchemaResult,
+    Ticket,
 };
 use futures::Stream;
 use tonic::{transport::Server, Request, Response, Status, Streaming};
@@ -93,8 +93,7 @@ impl FlightService for MiddlewareScenarioImpl {
 
         let descriptor = request.into_inner();
 
-        if descriptor.r#type == DescriptorType::Cmd as i32
-            && descriptor.cmd.as_ref() == b"success"
+        if descriptor.r#type == DescriptorType::Cmd as i32 && descriptor.cmd.as_ref() == b"success"
         {
             // Return a fake location - the test doesn't read it
             let endpoint = super::endpoint("foo", "grpc+tcp://localhost:10010");
diff --git a/arrow-integration-testing/src/lib.rs b/arrow-integration-testing/src/lib.rs
index fe0cc68a4205..553e69b0a1a0 100644
--- a/arrow-integration-testing/src/lib.rs
+++ b/arrow-integration-testing/src/lib.rs
@@ -19,14 +19,20 @@
 
 use serde_json::Value;
 
-use arrow::datatypes::Schema;
-use arrow::error::Result;
+use arrow::array::{Array, StructArray};
+use arrow::datatypes::{DataType, Field, Fields, Schema};
+use arrow::error::{ArrowError, Result};
+use arrow::ffi::{from_ffi_and_data_type, FFI_ArrowArray, FFI_ArrowSchema};
 use arrow::record_batch::RecordBatch;
 use arrow::util::test_util::arrow_test_data;
 use arrow_integration_test::*;
 use std::collections::HashMap;
+use std::ffi::{c_int, CStr, CString};
 use std::fs::File;
 use std::io::BufReader;
+use std::iter::zip;
+use std::ptr;
+use std::sync::Arc;
 
 /// The expected username for the basic auth integration test.
 pub const AUTH_USERNAME: &str = "arrow";
@@ -40,11 +46,68 @@ pub struct ArrowFile {
     pub schema: Schema,
     // we can evolve this into a concrete Arrow type
     // this is temporarily not being read from
-    pub _dictionaries: HashMap<i64, ArrowJsonDictionaryBatch>,
-    pub batches: Vec<RecordBatch>,
+    dictionaries: HashMap<i64, ArrowJsonDictionaryBatch>,
+    arrow_json: Value,
 }
 
-pub fn read_json_file(json_name: &str) -> Result<ArrowFile> {
+impl ArrowFile {
+    pub fn read_batch(&self, batch_num: usize) -> Result<RecordBatch> {
+        let b = self.arrow_json["batches"].get(batch_num).unwrap();
+        let json_batch: ArrowJsonBatch = serde_json::from_value(b.clone()).unwrap();
+        record_batch_from_json(&self.schema, json_batch, Some(&self.dictionaries))
+    }
+
+    pub fn read_batches(&self) -> Result<Vec<RecordBatch>> {
+        self.arrow_json["batches"]
+            .as_array()
+            .unwrap()
+            .iter()
+            .map(|b| {
+                let json_batch: ArrowJsonBatch = serde_json::from_value(b.clone()).unwrap();
+                record_batch_from_json(&self.schema, json_batch, Some(&self.dictionaries))
+            })
+            .collect()
+    }
+}
+
+// Canonicalize the names of map fields in a schema
+pub fn canonicalize_schema(schema: &Schema) -> Schema {
+    let fields = schema
+        .fields()
+        .iter()
+        .map(|field| match field.data_type() {
+            DataType::Map(child_field, sorted) => match child_field.data_type() {
+                DataType::Struct(fields) if fields.len() == 2 => {
+                    let first_field = fields.get(0).unwrap();
+                    let key_field =
+                        Arc::new(Field::new("key", first_field.data_type().clone(), false));
+                    let second_field = fields.get(1).unwrap();
+                    let value_field = Arc::new(Field::new(
+                        "value",
+                        second_field.data_type().clone(),
+                        second_field.is_nullable(),
+                    ));
+
+                    let fields = Fields::from([key_field, value_field]);
+                    let struct_type = DataType::Struct(fields);
+                    let child_field = Field::new("entries", struct_type, false);
+
+                    Arc::new(Field::new(
+                        field.name().as_str(),
+                        DataType::Map(Arc::new(child_field), *sorted),
+                        field.is_nullable(),
+                    ))
+                }
+                _ => panic!("The child field of Map type should be Struct type with 2 fields."),
+            },
+            _ => field.clone(),
+        })
+        .collect::<Fields>();
+
+    Schema::new(fields).with_metadata(schema.metadata().clone())
+}
+
+pub fn open_json_file(json_name: &str) -> Result<ArrowFile> {
     let json_file = File::open(json_name)?;
     let reader = BufReader::new(json_file);
     let arrow_json: Value = serde_json::from_reader(reader).unwrap();
@@ -56,23 +119,16 @@ pub fn read_json_file(json_name: &str) -> Result<ArrowFile> {
             .as_array()
             .expect("Unable to get dictionaries as array")
         {
-            let json_dict: ArrowJsonDictionaryBatch = serde_json::from_value(d.clone())
-                .expect("Unable to get dictionary from JSON");
+            let json_dict: ArrowJsonDictionaryBatch =
+                serde_json::from_value(d.clone()).expect("Unable to get dictionary from JSON");
             // TODO: convert to a concrete Arrow type
             dictionaries.insert(json_dict.id, json_dict);
         }
     }
-
-    let mut batches = vec![];
-    for b in arrow_json["batches"].as_array().unwrap() {
-        let json_batch: ArrowJsonBatch = serde_json::from_value(b.clone()).unwrap();
-        let batch = record_batch_from_json(&schema, json_batch, Some(&dictionaries))?;
-        batches.push(batch);
-    }
     Ok(ArrowFile {
         schema,
-        _dictionaries: dictionaries,
-        batches,
+        dictionaries,
+        arrow_json,
     })
 }
 
@@ -100,3 +156,147 @@ pub fn read_gzip_json(version: &str, path: &str) -> ArrowJson {
     let arrow_json: ArrowJson = serde_json::from_str(&s).unwrap();
     arrow_json
 }
+
+//
+// C Data Integration entrypoints
+//
+
+fn cdata_integration_export_schema_from_json(
+    c_json_name: *const i8,
+    out: *mut FFI_ArrowSchema,
+) -> Result<()> {
+    let json_name = unsafe { CStr::from_ptr(c_json_name) };
+    let f = open_json_file(json_name.to_str()?)?;
+    let c_schema = FFI_ArrowSchema::try_from(&f.schema)?;
+    // Move exported schema into output struct
+    unsafe { ptr::write(out, c_schema) };
+    Ok(())
+}
+
+fn cdata_integration_export_batch_from_json(
+    c_json_name: *const i8,
+    batch_num: c_int,
+    out: *mut FFI_ArrowArray,
+) -> Result<()> {
+    let json_name = unsafe { CStr::from_ptr(c_json_name) };
+    let b = open_json_file(json_name.to_str()?)?.read_batch(batch_num.try_into().unwrap())?;
+    let a = StructArray::from(b).into_data();
+    let c_array = FFI_ArrowArray::new(&a);
+    // Move exported array into output struct
+    unsafe { ptr::write(out, c_array) };
+    Ok(())
+}
+
+fn cdata_integration_import_schema_and_compare_to_json(
+    c_json_name: *const i8,
+    c_schema: *mut FFI_ArrowSchema,
+) -> Result<()> {
+    let json_name = unsafe { CStr::from_ptr(c_json_name) };
+    let json_schema = open_json_file(json_name.to_str()?)?.schema;
+
+    // The source ArrowSchema will be released when this is dropped
+    let imported_schema = unsafe { FFI_ArrowSchema::from_raw(c_schema) };
+    let imported_schema = Schema::try_from(&imported_schema)?;
+
+    // compare schemas
+    if canonicalize_schema(&json_schema) != canonicalize_schema(&imported_schema) {
+        return Err(ArrowError::ComputeError(format!(
+            "Schemas do not match.\n- JSON: {:?}\n- Imported: {:?}",
+            json_schema, imported_schema
+        )));
+    }
+    Ok(())
+}
+
+fn compare_batches(a: &RecordBatch, b: &RecordBatch) -> Result<()> {
+    if a.num_columns() != b.num_columns() {
+        return Err(ArrowError::InvalidArgumentError(
+            "batches do not have the same number of columns".to_string(),
+        ));
+    }
+    for (a_column, b_column) in zip(a.columns(), b.columns()) {
+        if a_column != b_column {
+            return Err(ArrowError::InvalidArgumentError(
+                "batch columns are not the same".to_string(),
+            ));
+        }
+    }
+    Ok(())
+}
+
+fn cdata_integration_import_batch_and_compare_to_json(
+    c_json_name: *const i8,
+    batch_num: c_int,
+    c_array: *mut FFI_ArrowArray,
+) -> Result<()> {
+    let json_name = unsafe { CStr::from_ptr(c_json_name) };
+    let json_batch =
+        open_json_file(json_name.to_str()?)?.read_batch(batch_num.try_into().unwrap())?;
+    let schema = json_batch.schema();
+
+    let data_type_for_import = DataType::Struct(schema.fields.clone());
+    let imported_array = unsafe { FFI_ArrowArray::from_raw(c_array) };
+    let imported_array = unsafe { from_ffi_and_data_type(imported_array, data_type_for_import) }?;
+    imported_array.validate_full()?;
+    let imported_batch = RecordBatch::from(StructArray::from(imported_array));
+
+    compare_batches(&json_batch, &imported_batch)
+}
+
+// If Result is an error, then export a const char* to its string display, otherwise NULL
+fn result_to_c_error<T, E: std::fmt::Display>(result: &std::result::Result<T, E>) -> *mut i8 {
+    match result {
+        Ok(_) => ptr::null_mut(),
+        Err(e) => CString::new(format!("{}", e)).unwrap().into_raw(),
+    }
+}
+
+/// Release a const char* exported by result_to_c_error()
+///
+/// # Safety
+///
+/// The pointer is assumed to have been obtained using CString::into_raw.
+#[no_mangle]
+pub unsafe extern "C" fn arrow_rs_free_error(c_error: *mut i8) {
+    if !c_error.is_null() {
+        drop(unsafe { CString::from_raw(c_error) });
+    }
+}
+
+#[no_mangle]
+pub extern "C" fn arrow_rs_cdata_integration_export_schema_from_json(
+    c_json_name: *const i8,
+    out: *mut FFI_ArrowSchema,
+) -> *mut i8 {
+    let r = cdata_integration_export_schema_from_json(c_json_name, out);
+    result_to_c_error(&r)
+}
+
+#[no_mangle]
+pub extern "C" fn arrow_rs_cdata_integration_import_schema_and_compare_to_json(
+    c_json_name: *const i8,
+    c_schema: *mut FFI_ArrowSchema,
+) -> *mut i8 {
+    let r = cdata_integration_import_schema_and_compare_to_json(c_json_name, c_schema);
+    result_to_c_error(&r)
+}
+
+#[no_mangle]
+pub extern "C" fn arrow_rs_cdata_integration_export_batch_from_json(
+    c_json_name: *const i8,
+    batch_num: c_int,
+    out: *mut FFI_ArrowArray,
+) -> *mut i8 {
+    let r = cdata_integration_export_batch_from_json(c_json_name, batch_num, out);
+    result_to_c_error(&r)
+}
+
+#[no_mangle]
+pub extern "C" fn arrow_rs_cdata_integration_import_batch_and_compare_to_json(
+    c_json_name: *const i8,
+    batch_num: c_int,
+    c_array: *mut FFI_ArrowArray,
+) -> *mut i8 {
+    let r = cdata_integration_import_batch_and_compare_to_json(c_json_name, batch_num, c_array);
+    result_to_c_error(&r)
+}
diff --git a/arrow-integration-testing/tests/ipc_reader.rs b/arrow-integration-testing/tests/ipc_reader.rs
index 696ab6e6053a..11b8fa84534e 100644
--- a/arrow-integration-testing/tests/ipc_reader.rs
+++ b/arrow-integration-testing/tests/ipc_reader.rs
@@ -63,9 +63,7 @@ fn read_1_0_0_bigendian_decimal_should_panic() {
 }
 
 #[test]
-#[should_panic(
-    expected = "Last offset 687865856 of Utf8 is larger than values length 41"
-)]
+#[should_panic(expected = "Last offset 687865856 of Utf8 is larger than values length 41")]
 fn read_1_0_0_bigendian_dictionary_should_panic() {
     // The offsets are not translated for big-endian files
     // https://github.com/apache/arrow-rs/issues/859
@@ -160,8 +158,7 @@ fn read_2_0_0_compression() {
 /// Verification json file
 /// `arrow-ipc-stream/integration/<version>/<path>.json.gz
 fn verify_arrow_file(testdata: &str, version: &str, path: &str) {
-    let filename =
-        format!("{testdata}/arrow-ipc-stream/integration/{version}/{path}.arrow_file");
+    let filename = format!("{testdata}/arrow-ipc-stream/integration/{version}/{path}.arrow_file");
     println!("Verifying {filename}");
 
     // Compare contents to the expected output format in JSON
@@ -197,8 +194,7 @@ fn verify_arrow_file(testdata: &str, version: &str, path: &str) {
 /// Verification json file
 /// `arrow-ipc-stream/integration/<version>/<path>.json.gz
 fn verify_arrow_stream(testdata: &str, version: &str, path: &str) {
-    let filename =
-        format!("{testdata}/arrow-ipc-stream/integration/{version}/{path}.stream");
+    let filename = format!("{testdata}/arrow-ipc-stream/integration/{version}/{path}.stream");
     println!("Verifying {filename}");
 
     // Compare contents to the expected output format in JSON
diff --git a/arrow-integration-testing/tests/ipc_writer.rs b/arrow-integration-testing/tests/ipc_writer.rs
index 11707d935540..d780eb2ee0b5 100644
--- a/arrow-integration-testing/tests/ipc_writer.rs
+++ b/arrow-integration-testing/tests/ipc_writer.rs
@@ -113,12 +113,7 @@ fn write_2_0_0_compression() {
         for options in &all_options {
             println!("Using options {options:?}");
             roundtrip_arrow_file_with_options(&testdata, version, path, options.clone());
-            roundtrip_arrow_stream_with_options(
-                &testdata,
-                version,
-                path,
-                options.clone(),
-            );
+            roundtrip_arrow_stream_with_options(&testdata, version, path, options.clone());
         }
     });
 }
@@ -143,8 +138,7 @@ fn roundtrip_arrow_file_with_options(
     path: &str,
     options: IpcWriteOptions,
 ) {
-    let filename =
-        format!("{testdata}/arrow-ipc-stream/integration/{version}/{path}.arrow_file");
+    let filename = format!("{testdata}/arrow-ipc-stream/integration/{version}/{path}.arrow_file");
     println!("Verifying {filename}");
 
     let mut tempfile = tempfile::tempfile().unwrap();
@@ -156,12 +150,8 @@ fn roundtrip_arrow_file_with_options(
 
         // read and rewrite the file to a temp location
         {
-            let mut writer = FileWriter::try_new_with_options(
-                &mut tempfile,
-                &reader.schema(),
-                options,
-            )
-            .unwrap();
+            let mut writer =
+                FileWriter::try_new_with_options(&mut tempfile, &reader.schema(), options).unwrap();
             while let Some(Ok(batch)) = reader.next() {
                 writer.write(&batch).unwrap();
             }
@@ -207,12 +197,7 @@ fn roundtrip_arrow_file_with_options(
 /// Verification json file
 /// `arrow-ipc-stream/integration/<version>/<path>.json.gz
 fn roundtrip_arrow_stream(testdata: &str, version: &str, path: &str) {
-    roundtrip_arrow_stream_with_options(
-        testdata,
-        version,
-        path,
-        IpcWriteOptions::default(),
-    )
+    roundtrip_arrow_stream_with_options(testdata, version, path, IpcWriteOptions::default())
 }
 
 fn roundtrip_arrow_stream_with_options(
@@ -221,8 +206,7 @@ fn roundtrip_arrow_stream_with_options(
     path: &str,
     options: IpcWriteOptions,
 ) {
-    let filename =
-        format!("{testdata}/arrow-ipc-stream/integration/{version}/{path}.stream");
+    let filename = format!("{testdata}/arrow-ipc-stream/integration/{version}/{path}.stream");
     println!("Verifying {filename}");
 
     let mut tempfile = tempfile::tempfile().unwrap();
@@ -234,12 +218,9 @@ fn roundtrip_arrow_stream_with_options(
 
         // read and rewrite the file to a temp location
         {
-            let mut writer = StreamWriter::try_new_with_options(
-                &mut tempfile,
-                &reader.schema(),
-                options,
-            )
-            .unwrap();
+            let mut writer =
+                StreamWriter::try_new_with_options(&mut tempfile, &reader.schema(), options)
+                    .unwrap();
             while let Some(Ok(batch)) = reader.next() {
                 writer.write(&batch).unwrap();
             }
diff --git a/arrow-ipc/Cargo.toml b/arrow-ipc/Cargo.toml
index a03f53d6641c..83ad044d25e7 100644
--- a/arrow-ipc/Cargo.toml
+++ b/arrow-ipc/Cargo.toml
@@ -40,8 +40,12 @@ arrow-cast = { workspace = true }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
 flatbuffers = { version = "23.1.21", default-features = false }
-lz4 = { version = "1.23", default-features = false, optional = true }
-zstd = { version = "0.12.0", default-features = false, optional = true }
+lz4_flex = { version = "0.11", default-features = false, features = ["std", "frame"], optional = true }
+zstd = { version = "0.13.0", default-features = false, optional = true }
+
+[features]
+default = []
+lz4 = ["lz4_flex"]
 
 [dev-dependencies]
 tempfile = "3.3"
diff --git a/arrow-ipc/src/compression.rs b/arrow-ipc/src/compression.rs
index db05e9a6a6c6..0d8b7b4c1bd4 100644
--- a/arrow-ipc/src/compression.rs
+++ b/arrow-ipc/src/compression.rs
@@ -90,10 +90,7 @@ impl CompressionCodec {
     /// [8 bytes]:         uncompressed length
     /// [remaining bytes]: compressed data stream
     /// ```
-    pub(crate) fn decompress_to_buffer(
-        &self,
-        input: &Buffer,
-    ) -> Result<Buffer, ArrowError> {
+    pub(crate) fn decompress_to_buffer(&self, input: &Buffer) -> Result<Buffer, ArrowError> {
         // read the first 8 bytes to determine if the data is
         // compressed
         let decompressed_length = read_uncompressed_size(input);
@@ -103,13 +100,15 @@ impl CompressionCodec {
         } else if decompressed_length == LENGTH_NO_COMPRESSED_DATA {
             // no compression
             input.slice(LENGTH_OF_PREFIX_DATA as usize)
-        } else {
+        } else if let Ok(decompressed_length) = usize::try_from(decompressed_length) {
             // decompress data using the codec
-            let mut uncompressed_buffer =
-                Vec::with_capacity(decompressed_length as usize);
             let input_data = &input[(LENGTH_OF_PREFIX_DATA as usize)..];
-            self.decompress(input_data, &mut uncompressed_buffer)?;
-            Buffer::from(uncompressed_buffer)
+            self.decompress(input_data, decompressed_length as _)?
+                .into()
+        } else {
+            return Err(ArrowError::IpcError(format!(
+                "Invalid uncompressed length: {decompressed_length}"
+            )));
         };
         Ok(buffer)
     }
@@ -125,24 +124,29 @@ impl CompressionCodec {
 
     /// Decompress the data in input buffer and write to output buffer
     /// using the specified compression
-    fn decompress(
-        &self,
-        input: &[u8],
-        output: &mut Vec<u8>,
-    ) -> Result<usize, ArrowError> {
-        match self {
-            CompressionCodec::Lz4Frame => decompress_lz4(input, output),
-            CompressionCodec::Zstd => decompress_zstd(input, output),
+    fn decompress(&self, input: &[u8], decompressed_size: usize) -> Result<Vec<u8>, ArrowError> {
+        let ret = match self {
+            CompressionCodec::Lz4Frame => decompress_lz4(input, decompressed_size)?,
+            CompressionCodec::Zstd => decompress_zstd(input, decompressed_size)?,
+        };
+        if ret.len() != decompressed_size {
+            return Err(ArrowError::IpcError(format!(
+                "Expected compressed length of {decompressed_size} got {}",
+                ret.len()
+            )));
         }
+        Ok(ret)
     }
 }
 
 #[cfg(feature = "lz4")]
 fn compress_lz4(input: &[u8], output: &mut Vec<u8>) -> Result<(), ArrowError> {
     use std::io::Write;
-    let mut encoder = lz4::EncoderBuilder::new().build(output)?;
+    let mut encoder = lz4_flex::frame::FrameEncoder::new(output);
     encoder.write_all(input)?;
-    encoder.finish().1?;
+    encoder
+        .finish()
+        .map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
     Ok(())
 }
 
@@ -155,14 +159,16 @@ fn compress_lz4(_input: &[u8], _output: &mut Vec<u8>) -> Result<(), ArrowError>
 }
 
 #[cfg(feature = "lz4")]
-fn decompress_lz4(input: &[u8], output: &mut Vec<u8>) -> Result<usize, ArrowError> {
+fn decompress_lz4(input: &[u8], decompressed_size: usize) -> Result<Vec<u8>, ArrowError> {
     use std::io::Read;
-    Ok(lz4::Decoder::new(input)?.read_to_end(output)?)
+    let mut output = Vec::with_capacity(decompressed_size);
+    lz4_flex::frame::FrameDecoder::new(input).read_to_end(&mut output)?;
+    Ok(output)
 }
 
 #[cfg(not(feature = "lz4"))]
 #[allow(clippy::ptr_arg)]
-fn decompress_lz4(_input: &[u8], _output: &mut Vec<u8>) -> Result<usize, ArrowError> {
+fn decompress_lz4(_input: &[u8], _decompressed_size: usize) -> Result<Vec<u8>, ArrowError> {
     Err(ArrowError::InvalidArgumentError(
         "lz4 IPC decompression requires the lz4 feature".to_string(),
     ))
@@ -186,14 +192,16 @@ fn compress_zstd(_input: &[u8], _output: &mut Vec<u8>) -> Result<(), ArrowError>
 }
 
 #[cfg(feature = "zstd")]
-fn decompress_zstd(input: &[u8], output: &mut Vec<u8>) -> Result<usize, ArrowError> {
+fn decompress_zstd(input: &[u8], decompressed_size: usize) -> Result<Vec<u8>, ArrowError> {
     use std::io::Read;
-    Ok(zstd::Decoder::new(input)?.read_to_end(output)?)
+    let mut output = Vec::with_capacity(decompressed_size);
+    zstd::Decoder::with_buffer(input)?.read_to_end(&mut output)?;
+    Ok(output)
 }
 
 #[cfg(not(feature = "zstd"))]
 #[allow(clippy::ptr_arg)]
-fn decompress_zstd(_input: &[u8], _output: &mut Vec<u8>) -> Result<usize, ArrowError> {
+fn decompress_zstd(_input: &[u8], _decompressed_size: usize) -> Result<Vec<u8>, ArrowError> {
     Err(ArrowError::InvalidArgumentError(
         "zstd IPC decompression requires the zstd feature".to_string(),
     ))
@@ -216,28 +224,26 @@ mod tests {
     #[test]
     #[cfg(feature = "lz4")]
     fn test_lz4_compression() {
-        let input_bytes = "hello lz4".as_bytes();
+        let input_bytes = b"hello lz4";
         let codec = super::CompressionCodec::Lz4Frame;
         let mut output_bytes: Vec<u8> = Vec::new();
         codec.compress(input_bytes, &mut output_bytes).unwrap();
-        let mut result_output_bytes: Vec<u8> = Vec::new();
-        codec
-            .decompress(output_bytes.as_slice(), &mut result_output_bytes)
+        let result = codec
+            .decompress(output_bytes.as_slice(), input_bytes.len())
             .unwrap();
-        assert_eq!(input_bytes, result_output_bytes.as_slice());
+        assert_eq!(input_bytes, result.as_slice());
     }
 
     #[test]
     #[cfg(feature = "zstd")]
     fn test_zstd_compression() {
-        let input_bytes = "hello zstd".as_bytes();
+        let input_bytes = b"hello zstd";
         let codec = super::CompressionCodec::Zstd;
         let mut output_bytes: Vec<u8> = Vec::new();
         codec.compress(input_bytes, &mut output_bytes).unwrap();
-        let mut result_output_bytes: Vec<u8> = Vec::new();
-        codec
-            .decompress(output_bytes.as_slice(), &mut result_output_bytes)
+        let result = codec
+            .decompress(output_bytes.as_slice(), input_bytes.len())
             .unwrap();
-        assert_eq!(input_bytes, result_output_bytes.as_slice());
+        assert_eq!(input_bytes, result.as_slice());
     }
 }
diff --git a/arrow-ipc/src/convert.rs b/arrow-ipc/src/convert.rs
index a78ccde6e169..b290a09acf5d 100644
--- a/arrow-ipc/src/convert.rs
+++ b/arrow-ipc/src/convert.rs
@@ -18,9 +18,7 @@
 //! Utilities for converting between IPC types and native Arrow types
 
 use arrow_schema::*;
-use flatbuffers::{
-    FlatBufferBuilder, ForwardsUOffset, UnionWIPOffset, Vector, WIPOffset,
-};
+use flatbuffers::{FlatBufferBuilder, ForwardsUOffset, UnionWIPOffset, Vector, WIPOffset};
 use std::collections::HashMap;
 use std::sync::Arc;
 
@@ -186,16 +184,11 @@ pub fn try_schema_from_ipc_buffer(buffer: &[u8]) -> Result<Schema, ArrowError> {
             // buffer
             0
         };
-        let msg =
-            size_prefixed_root_as_message(&buffer[begin_offset..]).map_err(|err| {
-                ArrowError::ParseError(format!(
-                    "Unable to convert flight info to a message: {err}"
-                ))
-            })?;
+        let msg = size_prefixed_root_as_message(&buffer[begin_offset..]).map_err(|err| {
+            ArrowError::ParseError(format!("Unable to convert flight info to a message: {err}"))
+        })?;
         let ipc_schema = msg.header_as_schema().ok_or_else(|| {
-            ArrowError::ParseError(
-                "Unable to convert flight info to a schema".to_string(),
-            )
+            ArrowError::ParseError("Unable to convert flight info to a schema".to_string())
         })?;
         Ok(fb_to_schema(ipc_schema))
     } else {
@@ -277,15 +270,9 @@ pub(crate) fn get_data_type(field: crate::Field, may_be_dictionary: bool) -> Dat
             let time = field.type_as_time().unwrap();
             match (time.bitWidth(), time.unit()) {
                 (32, crate::TimeUnit::SECOND) => DataType::Time32(TimeUnit::Second),
-                (32, crate::TimeUnit::MILLISECOND) => {
-                    DataType::Time32(TimeUnit::Millisecond)
-                }
-                (64, crate::TimeUnit::MICROSECOND) => {
-                    DataType::Time64(TimeUnit::Microsecond)
-                }
-                (64, crate::TimeUnit::NANOSECOND) => {
-                    DataType::Time64(TimeUnit::Nanosecond)
-                }
+                (32, crate::TimeUnit::MILLISECOND) => DataType::Time32(TimeUnit::Millisecond),
+                (64, crate::TimeUnit::MICROSECOND) => DataType::Time64(TimeUnit::Microsecond),
+                (64, crate::TimeUnit::NANOSECOND) => DataType::Time64(TimeUnit::Nanosecond),
                 z => panic!(
                     "Time type with bit width of {} and unit of {:?} not supported",
                     z.0, z.1
@@ -296,30 +283,22 @@ pub(crate) fn get_data_type(field: crate::Field, may_be_dictionary: bool) -> Dat
             let timestamp = field.type_as_timestamp().unwrap();
             let timezone: Option<_> = timestamp.timezone().map(|tz| tz.into());
             match timestamp.unit() {
-                crate::TimeUnit::SECOND => {
-                    DataType::Timestamp(TimeUnit::Second, timezone)
-                }
+                crate::TimeUnit::SECOND => DataType::Timestamp(TimeUnit::Second, timezone),
                 crate::TimeUnit::MILLISECOND => {
                     DataType::Timestamp(TimeUnit::Millisecond, timezone)
                 }
                 crate::TimeUnit::MICROSECOND => {
                     DataType::Timestamp(TimeUnit::Microsecond, timezone)
                 }
-                crate::TimeUnit::NANOSECOND => {
-                    DataType::Timestamp(TimeUnit::Nanosecond, timezone)
-                }
+                crate::TimeUnit::NANOSECOND => DataType::Timestamp(TimeUnit::Nanosecond, timezone),
                 z => panic!("Timestamp type with unit of {z:?} not supported"),
             }
         }
         crate::Type::Interval => {
             let interval = field.type_as_interval().unwrap();
             match interval.unit() {
-                crate::IntervalUnit::YEAR_MONTH => {
-                    DataType::Interval(IntervalUnit::YearMonth)
-                }
-                crate::IntervalUnit::DAY_TIME => {
-                    DataType::Interval(IntervalUnit::DayTime)
-                }
+                crate::IntervalUnit::YEAR_MONTH => DataType::Interval(IntervalUnit::YearMonth),
+                crate::IntervalUnit::DAY_TIME => DataType::Interval(IntervalUnit::DayTime),
                 crate::IntervalUnit::MONTH_DAY_NANO => {
                     DataType::Interval(IntervalUnit::MonthDayNano)
                 }
@@ -775,8 +754,8 @@ pub(crate) fn get_fb_field_type<'a>(
                 UnionMode::Dense => crate::UnionMode::Dense,
             };
 
-            let fbb_type_ids = fbb
-                .create_vector(&fields.iter().map(|(t, _)| t as i32).collect::<Vec<_>>());
+            let fbb_type_ids =
+                fbb.create_vector(&fields.iter().map(|(t, _)| t as i32).collect::<Vec<_>>());
             let mut builder = crate::UnionBuilder::new(fbb);
             builder.add_mode(union_mode);
             builder.add_typeIds(fbb_type_ids);
@@ -872,10 +851,7 @@ mod tests {
                 ),
                 Field::new(
                     "timestamp[us]",
-                    DataType::Timestamp(
-                        TimeUnit::Microsecond,
-                        Some("Africa/Johannesburg".into()),
-                    ),
+                    DataType::Timestamp(TimeUnit::Microsecond, Some("Africa/Johannesburg".into())),
                     false,
                 ),
                 Field::new(
@@ -900,11 +876,7 @@ mod tests {
                 ),
                 Field::new("utf8", DataType::Utf8, false),
                 Field::new("binary", DataType::Binary, false),
-                Field::new_list(
-                    "list[u8]",
-                    Field::new("item", DataType::UInt8, false),
-                    true,
-                ),
+                Field::new_list("list[u8]", Field::new("item", DataType::UInt8, false), true),
                 Field::new_list(
                     "list[struct<float32, int32, bool>]",
                     Field::new_struct(
@@ -1013,20 +985,14 @@ mod tests {
                 ),
                 Field::new_dict(
                     "dictionary<int32, utf8>",
-                    DataType::Dictionary(
-                        Box::new(DataType::Int32),
-                        Box::new(DataType::Utf8),
-                    ),
+                    DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
                     true,
                     123,
                     true,
                 ),
                 Field::new_dict(
                     "dictionary<uint8, uint32>",
-                    DataType::Dictionary(
-                        Box::new(DataType::UInt8),
-                        Box::new(DataType::UInt32),
-                    ),
+                    DataType::Dictionary(Box::new(DataType::UInt8), Box::new(DataType::UInt32)),
                     true,
                     123,
                     true,
@@ -1056,20 +1022,18 @@ mod tests {
         // # stripping continuation & length prefix & suffix bytes to get only schema bytes
         // [x for x in sink.getvalue().to_pybytes()][8:-8]
         let bytes: Vec<u8> = vec![
-            16, 0, 0, 0, 0, 0, 10, 0, 12, 0, 6, 0, 5, 0, 8, 0, 10, 0, 0, 0, 0, 1, 4, 0,
-            12, 0, 0, 0, 8, 0, 8, 0, 0, 0, 4, 0, 8, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 20,
-            0, 0, 0, 16, 0, 20, 0, 8, 0, 0, 0, 7, 0, 12, 0, 0, 0, 16, 0, 16, 0, 0, 0, 0,
-            0, 0, 2, 16, 0, 0, 0, 32, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 102,
-            105, 101, 108, 100, 49, 0, 0, 0, 0, 6, 0, 8, 0, 4, 0, 6, 0, 0, 0, 32, 0, 0,
-            0,
+            16, 0, 0, 0, 0, 0, 10, 0, 12, 0, 6, 0, 5, 0, 8, 0, 10, 0, 0, 0, 0, 1, 4, 0, 12, 0, 0,
+            0, 8, 0, 8, 0, 0, 0, 4, 0, 8, 0, 0, 0, 4, 0, 0, 0, 1, 0, 0, 0, 20, 0, 0, 0, 16, 0, 20,
+            0, 8, 0, 0, 0, 7, 0, 12, 0, 0, 0, 16, 0, 16, 0, 0, 0, 0, 0, 0, 2, 16, 0, 0, 0, 32, 0,
+            0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 102, 105, 101, 108, 100, 49, 0, 0, 0, 0, 6,
+            0, 8, 0, 4, 0, 6, 0, 0, 0, 32, 0, 0, 0,
         ];
         let ipc = crate::root_as_message(&bytes).unwrap();
         let schema = ipc.header_as_schema().unwrap();
 
         // generate same message with Rust
         let data_gen = crate::writer::IpcDataGenerator::default();
-        let arrow_schema =
-            Schema::new(vec![Field::new("field1", DataType::UInt32, false)]);
+        let arrow_schema = Schema::new(vec![Field::new("field1", DataType::UInt32, false)]);
         let bytes = data_gen
             .schema_to_bytes(&arrow_schema, &crate::writer::IpcWriteOptions::default())
             .ipc_message;
diff --git a/arrow-ipc/src/gen/File.rs b/arrow-ipc/src/gen/File.rs
index 0e9427813788..c0c2fb183237 100644
--- a/arrow-ipc/src/gen/File.rs
+++ b/arrow-ipc/src/gen/File.rs
@@ -61,10 +61,7 @@ impl<'b> flatbuffers::Push for Block {
     type Output = Block;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        let src = ::core::slice::from_raw_parts(
-            self as *const Block as *const u8,
-            Self::size(),
-        );
+        let src = ::core::slice::from_raw_parts(self as *const Block as *const u8, Self::size());
         dst.copy_from_slice(src);
     }
 }
@@ -307,11 +304,7 @@ impl flatbuffers::Verifiable for Footer<'_> {
         use flatbuffers::Verifiable;
         v.visit_table(pos)?
             .visit_field::<MetadataVersion>("version", Self::VT_VERSION, false)?
-            .visit_field::<flatbuffers::ForwardsUOffset<Schema>>(
-                "schema",
-                Self::VT_SCHEMA,
-                false,
-            )?
+            .visit_field::<flatbuffers::ForwardsUOffset<Schema>>("schema", Self::VT_SCHEMA, false)?
             .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, Block>>>(
                 "dictionaries",
                 Self::VT_DICTIONARIES,
@@ -335,9 +328,7 @@ pub struct FooterArgs<'a> {
     pub dictionaries: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, Block>>>,
     pub recordBatches: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, Block>>>,
     pub custom_metadata: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>,
-        >,
+        flatbuffers::WIPOffset<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>>,
     >,
 }
 impl<'a> Default for FooterArgs<'a> {
@@ -360,39 +351,29 @@ pub struct FooterBuilder<'a: 'b, 'b> {
 impl<'a: 'b, 'b> FooterBuilder<'a, 'b> {
     #[inline]
     pub fn add_version(&mut self, version: MetadataVersion) {
-        self.fbb_.push_slot::<MetadataVersion>(
-            Footer::VT_VERSION,
-            version,
-            MetadataVersion::V1,
-        );
+        self.fbb_
+            .push_slot::<MetadataVersion>(Footer::VT_VERSION, version, MetadataVersion::V1);
     }
     #[inline]
     pub fn add_schema(&mut self, schema: flatbuffers::WIPOffset<Schema<'b>>) {
         self.fbb_
-            .push_slot_always::<flatbuffers::WIPOffset<Schema>>(
-                Footer::VT_SCHEMA,
-                schema,
-            );
+            .push_slot_always::<flatbuffers::WIPOffset<Schema>>(Footer::VT_SCHEMA, schema);
     }
     #[inline]
     pub fn add_dictionaries(
         &mut self,
         dictionaries: flatbuffers::WIPOffset<flatbuffers::Vector<'b, Block>>,
     ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            Footer::VT_DICTIONARIES,
-            dictionaries,
-        );
+        self.fbb_
+            .push_slot_always::<flatbuffers::WIPOffset<_>>(Footer::VT_DICTIONARIES, dictionaries);
     }
     #[inline]
     pub fn add_recordBatches(
         &mut self,
         recordBatches: flatbuffers::WIPOffset<flatbuffers::Vector<'b, Block>>,
     ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            Footer::VT_RECORDBATCHES,
-            recordBatches,
-        );
+        self.fbb_
+            .push_slot_always::<flatbuffers::WIPOffset<_>>(Footer::VT_RECORDBATCHES, recordBatches);
     }
     #[inline]
     pub fn add_custom_metadata(
@@ -407,9 +388,7 @@ impl<'a: 'b, 'b> FooterBuilder<'a, 'b> {
         );
     }
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> FooterBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FooterBuilder<'a, 'b> {
         let start = _fbb.start_table();
         FooterBuilder {
             fbb_: _fbb,
@@ -451,9 +430,7 @@ pub fn root_as_footer(buf: &[u8]) -> Result<Footer, flatbuffers::InvalidFlatbuff
 /// catch every error, or be maximally performant. For the
 /// previous, unchecked, behavior use
 /// `size_prefixed_root_as_footer_unchecked`.
-pub fn size_prefixed_root_as_footer(
-    buf: &[u8],
-) -> Result<Footer, flatbuffers::InvalidFlatbuffer> {
+pub fn size_prefixed_root_as_footer(buf: &[u8]) -> Result<Footer, flatbuffers::InvalidFlatbuffer> {
     flatbuffers::size_prefixed_root::<Footer>(buf)
 }
 #[inline]
diff --git a/arrow-ipc/src/gen/Message.rs b/arrow-ipc/src/gen/Message.rs
index 2b9f79766e31..a546b54d9170 100644
--- a/arrow-ipc/src/gen/Message.rs
+++ b/arrow-ipc/src/gen/Message.rs
@@ -380,10 +380,8 @@ impl<'b> flatbuffers::Push for FieldNode {
     type Output = FieldNode;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        let src = ::core::slice::from_raw_parts(
-            self as *const FieldNode as *const u8,
-            Self::size(),
-        );
+        let src =
+            ::core::slice::from_raw_parts(self as *const FieldNode as *const u8, Self::size());
         dst.copy_from_slice(src);
     }
 }
@@ -520,10 +518,7 @@ impl<'a> BodyCompression<'a> {
         // which contains a valid value in this slot
         unsafe {
             self._tab
-                .get::<CompressionType>(
-                    BodyCompression::VT_CODEC,
-                    Some(CompressionType::LZ4_FRAME),
-                )
+                .get::<CompressionType>(BodyCompression::VT_CODEC, Some(CompressionType::LZ4_FRAME))
                 .unwrap()
         }
     }
@@ -594,9 +589,7 @@ impl<'a: 'b, 'b> BodyCompressionBuilder<'a, 'b> {
         );
     }
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> BodyCompressionBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> BodyCompressionBuilder<'a, 'b> {
         let start = _fbb.start_table();
         BodyCompressionBuilder {
             fbb_: _fbb,
@@ -737,11 +730,23 @@ impl flatbuffers::Verifiable for RecordBatch<'_> {
     ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
         use flatbuffers::Verifiable;
         v.visit_table(pos)?
-     .visit_field::<i64>("length", Self::VT_LENGTH, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, FieldNode>>>("nodes", Self::VT_NODES, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, Buffer>>>("buffers", Self::VT_BUFFERS, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<BodyCompression>>("compression", Self::VT_COMPRESSION, false)?
-     .finish();
+            .visit_field::<i64>("length", Self::VT_LENGTH, false)?
+            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, FieldNode>>>(
+                "nodes",
+                Self::VT_NODES,
+                false,
+            )?
+            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, Buffer>>>(
+                "buffers",
+                Self::VT_BUFFERS,
+                false,
+            )?
+            .visit_field::<flatbuffers::ForwardsUOffset<BodyCompression>>(
+                "compression",
+                Self::VT_COMPRESSION,
+                false,
+            )?
+            .finish();
         Ok(())
     }
 }
@@ -774,10 +779,7 @@ impl<'a: 'b, 'b> RecordBatchBuilder<'a, 'b> {
             .push_slot::<i64>(RecordBatch::VT_LENGTH, length, 0);
     }
     #[inline]
-    pub fn add_nodes(
-        &mut self,
-        nodes: flatbuffers::WIPOffset<flatbuffers::Vector<'b, FieldNode>>,
-    ) {
+    pub fn add_nodes(&mut self, nodes: flatbuffers::WIPOffset<flatbuffers::Vector<'b, FieldNode>>) {
         self.fbb_
             .push_slot_always::<flatbuffers::WIPOffset<_>>(RecordBatch::VT_NODES, nodes);
     }
@@ -786,16 +788,11 @@ impl<'a: 'b, 'b> RecordBatchBuilder<'a, 'b> {
         &mut self,
         buffers: flatbuffers::WIPOffset<flatbuffers::Vector<'b, Buffer>>,
     ) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            RecordBatch::VT_BUFFERS,
-            buffers,
-        );
+        self.fbb_
+            .push_slot_always::<flatbuffers::WIPOffset<_>>(RecordBatch::VT_BUFFERS, buffers);
     }
     #[inline]
-    pub fn add_compression(
-        &mut self,
-        compression: flatbuffers::WIPOffset<BodyCompression<'b>>,
-    ) {
+    pub fn add_compression(&mut self, compression: flatbuffers::WIPOffset<BodyCompression<'b>>) {
         self.fbb_
             .push_slot_always::<flatbuffers::WIPOffset<BodyCompression>>(
                 RecordBatch::VT_COMPRESSION,
@@ -803,9 +800,7 @@ impl<'a: 'b, 'b> RecordBatchBuilder<'a, 'b> {
             );
     }
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> RecordBatchBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> RecordBatchBuilder<'a, 'b> {
         let start = _fbb.start_table();
         RecordBatchBuilder {
             fbb_: _fbb,
@@ -892,10 +887,8 @@ impl<'a> DictionaryBatch<'a> {
         // Created from valid Table for this object
         // which contains a valid value in this slot
         unsafe {
-            self._tab.get::<flatbuffers::ForwardsUOffset<RecordBatch>>(
-                DictionaryBatch::VT_DATA,
-                None,
-            )
+            self._tab
+                .get::<flatbuffers::ForwardsUOffset<RecordBatch>>(DictionaryBatch::VT_DATA, None)
         }
     }
     /// If isDelta is true the values in the dictionary are to be appended to a
@@ -923,11 +916,7 @@ impl flatbuffers::Verifiable for DictionaryBatch<'_> {
         use flatbuffers::Verifiable;
         v.visit_table(pos)?
             .visit_field::<i64>("id", Self::VT_ID, false)?
-            .visit_field::<flatbuffers::ForwardsUOffset<RecordBatch>>(
-                "data",
-                Self::VT_DATA,
-                false,
-            )?
+            .visit_field::<flatbuffers::ForwardsUOffset<RecordBatch>>("data", Self::VT_DATA, false)?
             .visit_field::<bool>("isDelta", Self::VT_ISDELTA, false)?
             .finish();
         Ok(())
@@ -972,9 +961,7 @@ impl<'a: 'b, 'b> DictionaryBatchBuilder<'a, 'b> {
             .push_slot::<bool>(DictionaryBatch::VT_ISDELTA, isDelta, false);
     }
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> DictionaryBatchBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> DictionaryBatchBuilder<'a, 'b> {
         let start = _fbb.start_table();
         DictionaryBatchBuilder {
             fbb_: _fbb,
@@ -1186,20 +1173,47 @@ impl flatbuffers::Verifiable for Message<'_> {
     ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
         use flatbuffers::Verifiable;
         v.visit_table(pos)?
-     .visit_field::<MetadataVersion>("version", Self::VT_VERSION, false)?
-     .visit_union::<MessageHeader, _>("header_type", Self::VT_HEADER_TYPE, "header", Self::VT_HEADER, false, |key, v, pos| {
-        match key {
-          MessageHeader::Schema => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Schema>>("MessageHeader::Schema", pos),
-          MessageHeader::DictionaryBatch => v.verify_union_variant::<flatbuffers::ForwardsUOffset<DictionaryBatch>>("MessageHeader::DictionaryBatch", pos),
-          MessageHeader::RecordBatch => v.verify_union_variant::<flatbuffers::ForwardsUOffset<RecordBatch>>("MessageHeader::RecordBatch", pos),
-          MessageHeader::Tensor => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Tensor>>("MessageHeader::Tensor", pos),
-          MessageHeader::SparseTensor => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SparseTensor>>("MessageHeader::SparseTensor", pos),
-          _ => Ok(()),
-        }
-     })?
-     .visit_field::<i64>("bodyLength", Self::VT_BODYLENGTH, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<KeyValue>>>>("custom_metadata", Self::VT_CUSTOM_METADATA, false)?
-     .finish();
+            .visit_field::<MetadataVersion>("version", Self::VT_VERSION, false)?
+            .visit_union::<MessageHeader, _>(
+                "header_type",
+                Self::VT_HEADER_TYPE,
+                "header",
+                Self::VT_HEADER,
+                false,
+                |key, v, pos| match key {
+                    MessageHeader::Schema => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Schema>>(
+                            "MessageHeader::Schema",
+                            pos,
+                        ),
+                    MessageHeader::DictionaryBatch => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<DictionaryBatch>>(
+                            "MessageHeader::DictionaryBatch",
+                            pos,
+                        ),
+                    MessageHeader::RecordBatch => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<RecordBatch>>(
+                            "MessageHeader::RecordBatch",
+                            pos,
+                        ),
+                    MessageHeader::Tensor => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Tensor>>(
+                            "MessageHeader::Tensor",
+                            pos,
+                        ),
+                    MessageHeader::SparseTensor => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<SparseTensor>>(
+                            "MessageHeader::SparseTensor",
+                            pos,
+                        ),
+                    _ => Ok(()),
+                },
+            )?
+            .visit_field::<i64>("bodyLength", Self::VT_BODYLENGTH, false)?
+            .visit_field::<flatbuffers::ForwardsUOffset<
+                flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<KeyValue>>,
+            >>("custom_metadata", Self::VT_CUSTOM_METADATA, false)?
+            .finish();
         Ok(())
     }
 }
@@ -1209,9 +1223,7 @@ pub struct MessageArgs<'a> {
     pub header: Option<flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>>,
     pub bodyLength: i64,
     pub custom_metadata: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>,
-        >,
+        flatbuffers::WIPOffset<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>>,
     >,
 }
 impl<'a> Default for MessageArgs<'a> {
@@ -1234,11 +1246,8 @@ pub struct MessageBuilder<'a: 'b, 'b> {
 impl<'a: 'b, 'b> MessageBuilder<'a, 'b> {
     #[inline]
     pub fn add_version(&mut self, version: MetadataVersion) {
-        self.fbb_.push_slot::<MetadataVersion>(
-            Message::VT_VERSION,
-            version,
-            MetadataVersion::V1,
-        );
+        self.fbb_
+            .push_slot::<MetadataVersion>(Message::VT_VERSION, version, MetadataVersion::V1);
     }
     #[inline]
     pub fn add_header_type(&mut self, header_type: MessageHeader) {
@@ -1249,10 +1258,7 @@ impl<'a: 'b, 'b> MessageBuilder<'a, 'b> {
         );
     }
     #[inline]
-    pub fn add_header(
-        &mut self,
-        header: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
-    ) {
+    pub fn add_header(&mut self, header: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>) {
         self.fbb_
             .push_slot_always::<flatbuffers::WIPOffset<_>>(Message::VT_HEADER, header);
     }
@@ -1274,9 +1280,7 @@ impl<'a: 'b, 'b> MessageBuilder<'a, 'b> {
         );
     }
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> MessageBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> MessageBuilder<'a, 'b> {
         let start = _fbb.start_table();
         MessageBuilder {
             fbb_: _fbb,
diff --git a/arrow-ipc/src/gen/Schema.rs b/arrow-ipc/src/gen/Schema.rs
index cf3ea0bd4abd..282b38b67195 100644
--- a/arrow-ipc/src/gen/Schema.rs
+++ b/arrow-ipc/src/gen/Schema.rs
@@ -69,8 +69,7 @@ impl MetadataVersion {
 
     pub const ENUM_MIN: i16 = 0;
     pub const ENUM_MAX: i16 = 4;
-    pub const ENUM_VALUES: &'static [Self] =
-        &[Self::V1, Self::V2, Self::V3, Self::V4, Self::V5];
+    pub const ENUM_VALUES: &'static [Self] = &[Self::V1, Self::V2, Self::V3, Self::V4, Self::V5];
     /// Returns the variant's name or "" if unknown.
     pub fn variant_name(self) -> Option<&'static str> {
         match self {
@@ -1132,10 +1131,7 @@ impl<'b> flatbuffers::Push for Buffer {
     type Output = Buffer;
     #[inline]
     unsafe fn push(&self, dst: &mut [u8], _written_len: usize) {
-        let src = ::core::slice::from_raw_parts(
-            self as *const Buffer as *const u8,
-            Self::size(),
-        );
+        let src = ::core::slice::from_raw_parts(self as *const Buffer as *const u8, Self::size());
         dst.copy_from_slice(src);
     }
 }
@@ -1364,9 +1360,7 @@ pub struct Struct_Builder<'a: 'b, 'b> {
 }
 impl<'a: 'b, 'b> Struct_Builder<'a, 'b> {
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> Struct_Builder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> Struct_Builder<'a, 'b> {
         let start = _fbb.start_table();
         Struct_Builder {
             fbb_: _fbb,
@@ -1522,9 +1516,7 @@ pub struct LargeListBuilder<'a: 'b, 'b> {
 }
 impl<'a: 'b, 'b> LargeListBuilder<'a, 'b> {
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> LargeListBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> LargeListBuilder<'a, 'b> {
         let start = _fbb.start_table();
         LargeListBuilder {
             fbb_: _fbb,
@@ -1626,9 +1618,7 @@ impl<'a: 'b, 'b> FixedSizeListBuilder<'a, 'b> {
             .push_slot::<i32>(FixedSizeList::VT_LISTSIZE, listSize, 0);
     }
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> FixedSizeListBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FixedSizeListBuilder<'a, 'b> {
         let start = _fbb.start_table();
         FixedSizeListBuilder {
             fbb_: _fbb,
@@ -1888,10 +1878,7 @@ impl<'a: 'b, 'b> UnionBuilder<'a, 'b> {
             .push_slot::<UnionMode>(Union::VT_MODE, mode, UnionMode::Sparse);
     }
     #[inline]
-    pub fn add_typeIds(
-        &mut self,
-        typeIds: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>,
-    ) {
+    pub fn add_typeIds(&mut self, typeIds: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i32>>) {
         self.fbb_
             .push_slot_always::<flatbuffers::WIPOffset<_>>(Union::VT_TYPEIDS, typeIds);
     }
@@ -2118,16 +2105,11 @@ pub struct FloatingPointBuilder<'a: 'b, 'b> {
 impl<'a: 'b, 'b> FloatingPointBuilder<'a, 'b> {
     #[inline]
     pub fn add_precision(&mut self, precision: Precision) {
-        self.fbb_.push_slot::<Precision>(
-            FloatingPoint::VT_PRECISION,
-            precision,
-            Precision::HALF,
-        );
+        self.fbb_
+            .push_slot::<Precision>(FloatingPoint::VT_PRECISION, precision, Precision::HALF);
     }
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> FloatingPointBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FloatingPointBuilder<'a, 'b> {
         let start = _fbb.start_table();
         FloatingPointBuilder {
             fbb_: _fbb,
@@ -2284,9 +2266,7 @@ pub struct BinaryBuilder<'a: 'b, 'b> {
 }
 impl<'a: 'b, 'b> BinaryBuilder<'a, 'b> {
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> BinaryBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> BinaryBuilder<'a, 'b> {
         let start = _fbb.start_table();
         BinaryBuilder {
             fbb_: _fbb,
@@ -2365,9 +2345,7 @@ pub struct LargeUtf8Builder<'a: 'b, 'b> {
 }
 impl<'a: 'b, 'b> LargeUtf8Builder<'a, 'b> {
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> LargeUtf8Builder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> LargeUtf8Builder<'a, 'b> {
         let start = _fbb.start_table();
         LargeUtf8Builder {
             fbb_: _fbb,
@@ -2446,9 +2424,7 @@ pub struct LargeBinaryBuilder<'a: 'b, 'b> {
 }
 impl<'a: 'b, 'b> LargeBinaryBuilder<'a, 'b> {
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> LargeBinaryBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> LargeBinaryBuilder<'a, 'b> {
         let start = _fbb.start_table();
         LargeBinaryBuilder {
             fbb_: _fbb,
@@ -2550,9 +2526,7 @@ impl<'a: 'b, 'b> FixedSizeBinaryBuilder<'a, 'b> {
             .push_slot::<i32>(FixedSizeBinary::VT_BYTEWIDTH, byteWidth, 0);
     }
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> FixedSizeBinaryBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FixedSizeBinaryBuilder<'a, 'b> {
         let start = _fbb.start_table();
         FixedSizeBinaryBuilder {
             fbb_: _fbb,
@@ -2712,9 +2686,7 @@ pub struct RunEndEncodedBuilder<'a: 'b, 'b> {
 }
 impl<'a: 'b, 'b> RunEndEncodedBuilder<'a, 'b> {
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> RunEndEncodedBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> RunEndEncodedBuilder<'a, 'b> {
         let start = _fbb.start_table();
         RunEndEncodedBuilder {
             fbb_: _fbb,
@@ -2862,9 +2834,7 @@ impl<'a: 'b, 'b> DecimalBuilder<'a, 'b> {
             .push_slot::<i32>(Decimal::VT_BITWIDTH, bitWidth, 128);
     }
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> DecimalBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> DecimalBuilder<'a, 'b> {
         let start = _fbb.start_table();
         DecimalBuilder {
             fbb_: _fbb,
@@ -3352,15 +3322,11 @@ impl<'a: 'b, 'b> TimestampBuilder<'a, 'b> {
     }
     #[inline]
     pub fn add_timezone(&mut self, timezone: flatbuffers::WIPOffset<&'b str>) {
-        self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(
-            Timestamp::VT_TIMEZONE,
-            timezone,
-        );
+        self.fbb_
+            .push_slot_always::<flatbuffers::WIPOffset<_>>(Timestamp::VT_TIMEZONE, timezone);
     }
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> TimestampBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> TimestampBuilder<'a, 'b> {
         let start = _fbb.start_table();
         TimestampBuilder {
             fbb_: _fbb,
@@ -3461,16 +3427,11 @@ pub struct IntervalBuilder<'a: 'b, 'b> {
 impl<'a: 'b, 'b> IntervalBuilder<'a, 'b> {
     #[inline]
     pub fn add_unit(&mut self, unit: IntervalUnit) {
-        self.fbb_.push_slot::<IntervalUnit>(
-            Interval::VT_UNIT,
-            unit,
-            IntervalUnit::YEAR_MONTH,
-        );
+        self.fbb_
+            .push_slot::<IntervalUnit>(Interval::VT_UNIT, unit, IntervalUnit::YEAR_MONTH);
     }
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> IntervalBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> IntervalBuilder<'a, 'b> {
         let start = _fbb.start_table();
         IntervalBuilder {
             fbb_: _fbb,
@@ -3574,9 +3535,7 @@ impl<'a: 'b, 'b> DurationBuilder<'a, 'b> {
             .push_slot::<TimeUnit>(Duration::VT_UNIT, unit, TimeUnit::MILLISECOND);
     }
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> DurationBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> DurationBuilder<'a, 'b> {
         let start = _fbb.start_table();
         DurationBuilder {
             fbb_: _fbb,
@@ -3670,16 +3629,8 @@ impl flatbuffers::Verifiable for KeyValue<'_> {
     ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
         use flatbuffers::Verifiable;
         v.visit_table(pos)?
-            .visit_field::<flatbuffers::ForwardsUOffset<&str>>(
-                "key",
-                Self::VT_KEY,
-                false,
-            )?
-            .visit_field::<flatbuffers::ForwardsUOffset<&str>>(
-                "value",
-                Self::VT_VALUE,
-                false,
-            )?
+            .visit_field::<flatbuffers::ForwardsUOffset<&str>>("key", Self::VT_KEY, false)?
+            .visit_field::<flatbuffers::ForwardsUOffset<&str>>("value", Self::VT_VALUE, false)?
             .finish();
         Ok(())
     }
@@ -3714,9 +3665,7 @@ impl<'a: 'b, 'b> KeyValueBuilder<'a, 'b> {
             .push_slot_always::<flatbuffers::WIPOffset<_>>(KeyValue::VT_VALUE, value);
     }
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> KeyValueBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> KeyValueBuilder<'a, 'b> {
         let start = _fbb.start_table();
         KeyValueBuilder {
             fbb_: _fbb,
@@ -3805,10 +3754,8 @@ impl<'a> DictionaryEncoding<'a> {
         // Created from valid Table for this object
         // which contains a valid value in this slot
         unsafe {
-            self._tab.get::<flatbuffers::ForwardsUOffset<Int>>(
-                DictionaryEncoding::VT_INDEXTYPE,
-                None,
-            )
+            self._tab
+                .get::<flatbuffers::ForwardsUOffset<Int>>(DictionaryEncoding::VT_INDEXTYPE, None)
         }
     }
     /// By default, dictionaries are not ordered, or the order does not have
@@ -3857,11 +3804,7 @@ impl flatbuffers::Verifiable for DictionaryEncoding<'_> {
                 false,
             )?
             .visit_field::<bool>("isOrdered", Self::VT_ISORDERED, false)?
-            .visit_field::<DictionaryKind>(
-                "dictionaryKind",
-                Self::VT_DICTIONARYKIND,
-                false,
-            )?
+            .visit_field::<DictionaryKind>("dictionaryKind", Self::VT_DICTIONARYKIND, false)?
             .finish();
         Ok(())
     }
@@ -4041,10 +3984,7 @@ impl<'a> Field<'a> {
         // which contains a valid value in this slot
         unsafe {
             self._tab
-                .get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(
-                    Field::VT_TYPE_,
-                    None,
-                )
+                .get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(Field::VT_TYPE_, None)
         }
     }
     /// Present only if the field is dictionary encoded.
@@ -4055,10 +3995,7 @@ impl<'a> Field<'a> {
         // which contains a valid value in this slot
         unsafe {
             self._tab
-                .get::<flatbuffers::ForwardsUOffset<DictionaryEncoding>>(
-                    Field::VT_DICTIONARY,
-                    None,
-                )
+                .get::<flatbuffers::ForwardsUOffset<DictionaryEncoding>>(Field::VT_DICTIONARY, None)
         }
     }
     /// children apply only to nested data types like Struct, List and Union. For
@@ -4429,39 +4366,130 @@ impl flatbuffers::Verifiable for Field<'_> {
     ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
         use flatbuffers::Verifiable;
         v.visit_table(pos)?
-     .visit_field::<flatbuffers::ForwardsUOffset<&str>>("name", Self::VT_NAME, false)?
-     .visit_field::<bool>("nullable", Self::VT_NULLABLE, false)?
-     .visit_union::<Type, _>("type_type", Self::VT_TYPE_TYPE, "type_", Self::VT_TYPE_, false, |key, v, pos| {
-        match key {
-          Type::Null => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Null>>("Type::Null", pos),
-          Type::Int => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Int>>("Type::Int", pos),
-          Type::FloatingPoint => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FloatingPoint>>("Type::FloatingPoint", pos),
-          Type::Binary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Binary>>("Type::Binary", pos),
-          Type::Utf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Utf8>>("Type::Utf8", pos),
-          Type::Bool => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Bool>>("Type::Bool", pos),
-          Type::Decimal => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Decimal>>("Type::Decimal", pos),
-          Type::Date => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Date>>("Type::Date", pos),
-          Type::Time => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Time>>("Type::Time", pos),
-          Type::Timestamp => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Timestamp>>("Type::Timestamp", pos),
-          Type::Interval => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Interval>>("Type::Interval", pos),
-          Type::List => v.verify_union_variant::<flatbuffers::ForwardsUOffset<List>>("Type::List", pos),
-          Type::Struct_ => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Struct_>>("Type::Struct_", pos),
-          Type::Union => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Union>>("Type::Union", pos),
-          Type::FixedSizeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeBinary>>("Type::FixedSizeBinary", pos),
-          Type::FixedSizeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeList>>("Type::FixedSizeList", pos),
-          Type::Map => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Map>>("Type::Map", pos),
-          Type::Duration => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Duration>>("Type::Duration", pos),
-          Type::LargeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeBinary>>("Type::LargeBinary", pos),
-          Type::LargeUtf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeUtf8>>("Type::LargeUtf8", pos),
-          Type::LargeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeList>>("Type::LargeList", pos),
-          Type::RunEndEncoded => v.verify_union_variant::<flatbuffers::ForwardsUOffset<RunEndEncoded>>("Type::RunEndEncoded", pos),
-          _ => Ok(()),
-        }
-     })?
-     .visit_field::<flatbuffers::ForwardsUOffset<DictionaryEncoding>>("dictionary", Self::VT_DICTIONARY, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<Field>>>>("children", Self::VT_CHILDREN, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<KeyValue>>>>("custom_metadata", Self::VT_CUSTOM_METADATA, false)?
-     .finish();
+            .visit_field::<flatbuffers::ForwardsUOffset<&str>>("name", Self::VT_NAME, false)?
+            .visit_field::<bool>("nullable", Self::VT_NULLABLE, false)?
+            .visit_union::<Type, _>(
+                "type_type",
+                Self::VT_TYPE_TYPE,
+                "type_",
+                Self::VT_TYPE_,
+                false,
+                |key, v, pos| match key {
+                    Type::Null => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Null>>(
+                        "Type::Null",
+                        pos,
+                    ),
+                    Type::Int => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Int>>(
+                        "Type::Int",
+                        pos,
+                    ),
+                    Type::FloatingPoint => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<FloatingPoint>>(
+                            "Type::FloatingPoint",
+                            pos,
+                        ),
+                    Type::Binary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Binary>>(
+                        "Type::Binary",
+                        pos,
+                    ),
+                    Type::Utf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Utf8>>(
+                        "Type::Utf8",
+                        pos,
+                    ),
+                    Type::Bool => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Bool>>(
+                        "Type::Bool",
+                        pos,
+                    ),
+                    Type::Decimal => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Decimal>>(
+                            "Type::Decimal",
+                            pos,
+                        ),
+                    Type::Date => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Date>>(
+                        "Type::Date",
+                        pos,
+                    ),
+                    Type::Time => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Time>>(
+                        "Type::Time",
+                        pos,
+                    ),
+                    Type::Timestamp => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Timestamp>>(
+                            "Type::Timestamp",
+                            pos,
+                        ),
+                    Type::Interval => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Interval>>(
+                            "Type::Interval",
+                            pos,
+                        ),
+                    Type::List => v.verify_union_variant::<flatbuffers::ForwardsUOffset<List>>(
+                        "Type::List",
+                        pos,
+                    ),
+                    Type::Struct_ => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Struct_>>(
+                            "Type::Struct_",
+                            pos,
+                        ),
+                    Type::Union => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Union>>(
+                        "Type::Union",
+                        pos,
+                    ),
+                    Type::FixedSizeBinary => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeBinary>>(
+                            "Type::FixedSizeBinary",
+                            pos,
+                        ),
+                    Type::FixedSizeList => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeList>>(
+                            "Type::FixedSizeList",
+                            pos,
+                        ),
+                    Type::Map => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Map>>(
+                        "Type::Map",
+                        pos,
+                    ),
+                    Type::Duration => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Duration>>(
+                            "Type::Duration",
+                            pos,
+                        ),
+                    Type::LargeBinary => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<LargeBinary>>(
+                            "Type::LargeBinary",
+                            pos,
+                        ),
+                    Type::LargeUtf8 => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<LargeUtf8>>(
+                            "Type::LargeUtf8",
+                            pos,
+                        ),
+                    Type::LargeList => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<LargeList>>(
+                            "Type::LargeList",
+                            pos,
+                        ),
+                    Type::RunEndEncoded => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<RunEndEncoded>>(
+                            "Type::RunEndEncoded",
+                            pos,
+                        ),
+                    _ => Ok(()),
+                },
+            )?
+            .visit_field::<flatbuffers::ForwardsUOffset<DictionaryEncoding>>(
+                "dictionary",
+                Self::VT_DICTIONARY,
+                false,
+            )?
+            .visit_field::<flatbuffers::ForwardsUOffset<
+                flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<Field>>,
+            >>("children", Self::VT_CHILDREN, false)?
+            .visit_field::<flatbuffers::ForwardsUOffset<
+                flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<KeyValue>>,
+            >>("custom_metadata", Self::VT_CUSTOM_METADATA, false)?
+            .finish();
         Ok(())
     }
 }
@@ -4472,14 +4500,10 @@ pub struct FieldArgs<'a> {
     pub type_: Option<flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>>,
     pub dictionary: Option<flatbuffers::WIPOffset<DictionaryEncoding<'a>>>,
     pub children: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Field<'a>>>,
-        >,
+        flatbuffers::WIPOffset<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Field<'a>>>>,
     >,
     pub custom_metadata: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>,
-        >,
+        flatbuffers::WIPOffset<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>>,
     >,
 }
 impl<'a> Default for FieldArgs<'a> {
@@ -4518,18 +4542,12 @@ impl<'a: 'b, 'b> FieldBuilder<'a, 'b> {
             .push_slot::<Type>(Field::VT_TYPE_TYPE, type_type, Type::NONE);
     }
     #[inline]
-    pub fn add_type_(
-        &mut self,
-        type_: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
-    ) {
+    pub fn add_type_(&mut self, type_: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>) {
         self.fbb_
             .push_slot_always::<flatbuffers::WIPOffset<_>>(Field::VT_TYPE_, type_);
     }
     #[inline]
-    pub fn add_dictionary(
-        &mut self,
-        dictionary: flatbuffers::WIPOffset<DictionaryEncoding<'b>>,
-    ) {
+    pub fn add_dictionary(&mut self, dictionary: flatbuffers::WIPOffset<DictionaryEncoding<'b>>) {
         self.fbb_
             .push_slot_always::<flatbuffers::WIPOffset<DictionaryEncoding>>(
                 Field::VT_DICTIONARY,
@@ -4923,25 +4941,29 @@ impl flatbuffers::Verifiable for Schema<'_> {
     ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
         use flatbuffers::Verifiable;
         v.visit_table(pos)?
-     .visit_field::<Endianness>("endianness", Self::VT_ENDIANNESS, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<Field>>>>("fields", Self::VT_FIELDS, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<KeyValue>>>>("custom_metadata", Self::VT_CUSTOM_METADATA, false)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, Feature>>>("features", Self::VT_FEATURES, false)?
-     .finish();
+            .visit_field::<Endianness>("endianness", Self::VT_ENDIANNESS, false)?
+            .visit_field::<flatbuffers::ForwardsUOffset<
+                flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<Field>>,
+            >>("fields", Self::VT_FIELDS, false)?
+            .visit_field::<flatbuffers::ForwardsUOffset<
+                flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<KeyValue>>,
+            >>("custom_metadata", Self::VT_CUSTOM_METADATA, false)?
+            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, Feature>>>(
+                "features",
+                Self::VT_FEATURES,
+                false,
+            )?
+            .finish();
         Ok(())
     }
 }
 pub struct SchemaArgs<'a> {
     pub endianness: Endianness,
     pub fields: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Field<'a>>>,
-        >,
+        flatbuffers::WIPOffset<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Field<'a>>>>,
     >,
     pub custom_metadata: Option<
-        flatbuffers::WIPOffset<
-            flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>,
-        >,
+        flatbuffers::WIPOffset<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<KeyValue<'a>>>>,
     >,
     pub features: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, Feature>>>,
 }
@@ -4964,11 +4986,8 @@ pub struct SchemaBuilder<'a: 'b, 'b> {
 impl<'a: 'b, 'b> SchemaBuilder<'a, 'b> {
     #[inline]
     pub fn add_endianness(&mut self, endianness: Endianness) {
-        self.fbb_.push_slot::<Endianness>(
-            Schema::VT_ENDIANNESS,
-            endianness,
-            Endianness::Little,
-        );
+        self.fbb_
+            .push_slot::<Endianness>(Schema::VT_ENDIANNESS, endianness, Endianness::Little);
     }
     #[inline]
     pub fn add_fields(
@@ -5001,9 +5020,7 @@ impl<'a: 'b, 'b> SchemaBuilder<'a, 'b> {
             .push_slot_always::<flatbuffers::WIPOffset<_>>(Schema::VT_FEATURES, features);
     }
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SchemaBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> SchemaBuilder<'a, 'b> {
         let start = _fbb.start_table();
         SchemaBuilder {
             fbb_: _fbb,
@@ -5044,9 +5061,7 @@ pub fn root_as_schema(buf: &[u8]) -> Result<Schema, flatbuffers::InvalidFlatbuff
 /// catch every error, or be maximally performant. For the
 /// previous, unchecked, behavior use
 /// `size_prefixed_root_as_schema_unchecked`.
-pub fn size_prefixed_root_as_schema(
-    buf: &[u8],
-) -> Result<Schema, flatbuffers::InvalidFlatbuffer> {
+pub fn size_prefixed_root_as_schema(buf: &[u8]) -> Result<Schema, flatbuffers::InvalidFlatbuffer> {
     flatbuffers::size_prefixed_root::<Schema>(buf)
 }
 #[inline]
diff --git a/arrow-ipc/src/gen/SparseTensor.rs b/arrow-ipc/src/gen/SparseTensor.rs
index 83fed4873b62..e03510ec0c8d 100644
--- a/arrow-ipc/src/gen/SparseTensor.rs
+++ b/arrow-ipc/src/gen/SparseTensor.rs
@@ -425,18 +425,13 @@ impl<'a: 'b, 'b> SparseTensorIndexCOOBuilder<'a, 'b> {
     }
     #[inline]
     pub fn add_indicesBuffer(&mut self, indicesBuffer: &Buffer) {
-        self.fbb_.push_slot_always::<&Buffer>(
-            SparseTensorIndexCOO::VT_INDICESBUFFER,
-            indicesBuffer,
-        );
+        self.fbb_
+            .push_slot_always::<&Buffer>(SparseTensorIndexCOO::VT_INDICESBUFFER, indicesBuffer);
     }
     #[inline]
     pub fn add_isCanonical(&mut self, isCanonical: bool) {
-        self.fbb_.push_slot::<bool>(
-            SparseTensorIndexCOO::VT_ISCANONICAL,
-            isCanonical,
-            false,
-        );
+        self.fbb_
+            .push_slot::<bool>(SparseTensorIndexCOO::VT_ISCANONICAL, isCanonical, false);
     }
     #[inline]
     pub fn new(
@@ -543,10 +538,7 @@ impl<'a> SparseMatrixIndexCSX<'a> {
         // which contains a valid value in this slot
         unsafe {
             self._tab
-                .get::<flatbuffers::ForwardsUOffset<Int>>(
-                    SparseMatrixIndexCSX::VT_INDPTRTYPE,
-                    None,
-                )
+                .get::<flatbuffers::ForwardsUOffset<Int>>(SparseMatrixIndexCSX::VT_INDPTRTYPE, None)
                 .unwrap()
         }
     }
@@ -692,10 +684,8 @@ impl<'a: 'b, 'b> SparseMatrixIndexCSXBuilder<'a, 'b> {
     }
     #[inline]
     pub fn add_indptrBuffer(&mut self, indptrBuffer: &Buffer) {
-        self.fbb_.push_slot_always::<&Buffer>(
-            SparseMatrixIndexCSX::VT_INDPTRBUFFER,
-            indptrBuffer,
-        );
+        self.fbb_
+            .push_slot_always::<&Buffer>(SparseMatrixIndexCSX::VT_INDPTRBUFFER, indptrBuffer);
     }
     #[inline]
     pub fn add_indicesType(&mut self, indicesType: flatbuffers::WIPOffset<Int<'b>>) {
@@ -706,10 +696,8 @@ impl<'a: 'b, 'b> SparseMatrixIndexCSXBuilder<'a, 'b> {
     }
     #[inline]
     pub fn add_indicesBuffer(&mut self, indicesBuffer: &Buffer) {
-        self.fbb_.push_slot_always::<&Buffer>(
-            SparseMatrixIndexCSX::VT_INDICESBUFFER,
-            indicesBuffer,
-        );
+        self.fbb_
+            .push_slot_always::<&Buffer>(SparseMatrixIndexCSX::VT_INDICESBUFFER, indicesBuffer);
     }
     #[inline]
     pub fn new(
@@ -838,10 +826,7 @@ impl<'a> SparseTensorIndexCSF<'a> {
         // which contains a valid value in this slot
         unsafe {
             self._tab
-                .get::<flatbuffers::ForwardsUOffset<Int>>(
-                    SparseTensorIndexCSF::VT_INDPTRTYPE,
-                    None,
-                )
+                .get::<flatbuffers::ForwardsUOffset<Int>>(SparseTensorIndexCSF::VT_INDPTRTYPE, None)
                 .unwrap()
         }
     }
@@ -1163,9 +1148,7 @@ impl<'a> SparseTensor<'a> {
     }
     /// The dimensions of the tensor, optionally named.
     #[inline]
-    pub fn shape(
-        &self,
-    ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorDim<'a>>> {
+    pub fn shape(&self) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorDim<'a>>> {
         // Safety:
         // Created from valid Table for this object
         // which contains a valid value in this slot
@@ -1540,9 +1523,7 @@ impl<'a> SparseTensor<'a> {
 
     #[inline]
     #[allow(non_snake_case)]
-    pub fn sparseIndex_as_sparse_tensor_index_coo(
-        &self,
-    ) -> Option<SparseTensorIndexCOO<'a>> {
+    pub fn sparseIndex_as_sparse_tensor_index_coo(&self) -> Option<SparseTensorIndexCOO<'a>> {
         if self.sparseIndex_type() == SparseTensorIndex::SparseTensorIndexCOO {
             let u = self.sparseIndex();
             // Safety:
@@ -1556,9 +1537,7 @@ impl<'a> SparseTensor<'a> {
 
     #[inline]
     #[allow(non_snake_case)]
-    pub fn sparseIndex_as_sparse_matrix_index_csx(
-        &self,
-    ) -> Option<SparseMatrixIndexCSX<'a>> {
+    pub fn sparseIndex_as_sparse_matrix_index_csx(&self) -> Option<SparseMatrixIndexCSX<'a>> {
         if self.sparseIndex_type() == SparseTensorIndex::SparseMatrixIndexCSX {
             let u = self.sparseIndex();
             // Safety:
@@ -1572,9 +1551,7 @@ impl<'a> SparseTensor<'a> {
 
     #[inline]
     #[allow(non_snake_case)]
-    pub fn sparseIndex_as_sparse_tensor_index_csf(
-        &self,
-    ) -> Option<SparseTensorIndexCSF<'a>> {
+    pub fn sparseIndex_as_sparse_tensor_index_csf(&self) -> Option<SparseTensorIndexCSF<'a>> {
         if self.sparseIndex_type() == SparseTensorIndex::SparseTensorIndexCSF {
             let u = self.sparseIndex();
             // Safety:
@@ -1595,45 +1572,147 @@ impl flatbuffers::Verifiable for SparseTensor<'_> {
     ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
         use flatbuffers::Verifiable;
         v.visit_table(pos)?
-     .visit_union::<Type, _>("type_type", Self::VT_TYPE_TYPE, "type_", Self::VT_TYPE_, true, |key, v, pos| {
-        match key {
-          Type::Null => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Null>>("Type::Null", pos),
-          Type::Int => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Int>>("Type::Int", pos),
-          Type::FloatingPoint => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FloatingPoint>>("Type::FloatingPoint", pos),
-          Type::Binary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Binary>>("Type::Binary", pos),
-          Type::Utf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Utf8>>("Type::Utf8", pos),
-          Type::Bool => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Bool>>("Type::Bool", pos),
-          Type::Decimal => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Decimal>>("Type::Decimal", pos),
-          Type::Date => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Date>>("Type::Date", pos),
-          Type::Time => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Time>>("Type::Time", pos),
-          Type::Timestamp => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Timestamp>>("Type::Timestamp", pos),
-          Type::Interval => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Interval>>("Type::Interval", pos),
-          Type::List => v.verify_union_variant::<flatbuffers::ForwardsUOffset<List>>("Type::List", pos),
-          Type::Struct_ => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Struct_>>("Type::Struct_", pos),
-          Type::Union => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Union>>("Type::Union", pos),
-          Type::FixedSizeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeBinary>>("Type::FixedSizeBinary", pos),
-          Type::FixedSizeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeList>>("Type::FixedSizeList", pos),
-          Type::Map => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Map>>("Type::Map", pos),
-          Type::Duration => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Duration>>("Type::Duration", pos),
-          Type::LargeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeBinary>>("Type::LargeBinary", pos),
-          Type::LargeUtf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeUtf8>>("Type::LargeUtf8", pos),
-          Type::LargeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeList>>("Type::LargeList", pos),
-          Type::RunEndEncoded => v.verify_union_variant::<flatbuffers::ForwardsUOffset<RunEndEncoded>>("Type::RunEndEncoded", pos),
-          _ => Ok(()),
-        }
-     })?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<TensorDim>>>>("shape", Self::VT_SHAPE, true)?
-     .visit_field::<i64>("non_zero_length", Self::VT_NON_ZERO_LENGTH, false)?
-     .visit_union::<SparseTensorIndex, _>("sparseIndex_type", Self::VT_SPARSEINDEX_TYPE, "sparseIndex", Self::VT_SPARSEINDEX, true, |key, v, pos| {
-        match key {
-          SparseTensorIndex::SparseTensorIndexCOO => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SparseTensorIndexCOO>>("SparseTensorIndex::SparseTensorIndexCOO", pos),
-          SparseTensorIndex::SparseMatrixIndexCSX => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SparseMatrixIndexCSX>>("SparseTensorIndex::SparseMatrixIndexCSX", pos),
-          SparseTensorIndex::SparseTensorIndexCSF => v.verify_union_variant::<flatbuffers::ForwardsUOffset<SparseTensorIndexCSF>>("SparseTensorIndex::SparseTensorIndexCSF", pos),
-          _ => Ok(()),
-        }
-     })?
-     .visit_field::<Buffer>("data", Self::VT_DATA, true)?
-     .finish();
+            .visit_union::<Type, _>(
+                "type_type",
+                Self::VT_TYPE_TYPE,
+                "type_",
+                Self::VT_TYPE_,
+                true,
+                |key, v, pos| match key {
+                    Type::Null => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Null>>(
+                        "Type::Null",
+                        pos,
+                    ),
+                    Type::Int => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Int>>(
+                        "Type::Int",
+                        pos,
+                    ),
+                    Type::FloatingPoint => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<FloatingPoint>>(
+                            "Type::FloatingPoint",
+                            pos,
+                        ),
+                    Type::Binary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Binary>>(
+                        "Type::Binary",
+                        pos,
+                    ),
+                    Type::Utf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Utf8>>(
+                        "Type::Utf8",
+                        pos,
+                    ),
+                    Type::Bool => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Bool>>(
+                        "Type::Bool",
+                        pos,
+                    ),
+                    Type::Decimal => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Decimal>>(
+                            "Type::Decimal",
+                            pos,
+                        ),
+                    Type::Date => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Date>>(
+                        "Type::Date",
+                        pos,
+                    ),
+                    Type::Time => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Time>>(
+                        "Type::Time",
+                        pos,
+                    ),
+                    Type::Timestamp => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Timestamp>>(
+                            "Type::Timestamp",
+                            pos,
+                        ),
+                    Type::Interval => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Interval>>(
+                            "Type::Interval",
+                            pos,
+                        ),
+                    Type::List => v.verify_union_variant::<flatbuffers::ForwardsUOffset<List>>(
+                        "Type::List",
+                        pos,
+                    ),
+                    Type::Struct_ => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Struct_>>(
+                            "Type::Struct_",
+                            pos,
+                        ),
+                    Type::Union => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Union>>(
+                        "Type::Union",
+                        pos,
+                    ),
+                    Type::FixedSizeBinary => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeBinary>>(
+                            "Type::FixedSizeBinary",
+                            pos,
+                        ),
+                    Type::FixedSizeList => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeList>>(
+                            "Type::FixedSizeList",
+                            pos,
+                        ),
+                    Type::Map => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Map>>(
+                        "Type::Map",
+                        pos,
+                    ),
+                    Type::Duration => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Duration>>(
+                            "Type::Duration",
+                            pos,
+                        ),
+                    Type::LargeBinary => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<LargeBinary>>(
+                            "Type::LargeBinary",
+                            pos,
+                        ),
+                    Type::LargeUtf8 => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<LargeUtf8>>(
+                            "Type::LargeUtf8",
+                            pos,
+                        ),
+                    Type::LargeList => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<LargeList>>(
+                            "Type::LargeList",
+                            pos,
+                        ),
+                    Type::RunEndEncoded => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<RunEndEncoded>>(
+                            "Type::RunEndEncoded",
+                            pos,
+                        ),
+                    _ => Ok(()),
+                },
+            )?
+            .visit_field::<flatbuffers::ForwardsUOffset<
+                flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<TensorDim>>,
+            >>("shape", Self::VT_SHAPE, true)?
+            .visit_field::<i64>("non_zero_length", Self::VT_NON_ZERO_LENGTH, false)?
+            .visit_union::<SparseTensorIndex, _>(
+                "sparseIndex_type",
+                Self::VT_SPARSEINDEX_TYPE,
+                "sparseIndex",
+                Self::VT_SPARSEINDEX,
+                true,
+                |key, v, pos| match key {
+                    SparseTensorIndex::SparseTensorIndexCOO => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<SparseTensorIndexCOO>>(
+                            "SparseTensorIndex::SparseTensorIndexCOO",
+                            pos,
+                        ),
+                    SparseTensorIndex::SparseMatrixIndexCSX => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<SparseMatrixIndexCSX>>(
+                            "SparseTensorIndex::SparseMatrixIndexCSX",
+                            pos,
+                        ),
+                    SparseTensorIndex::SparseTensorIndexCSF => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<SparseTensorIndexCSF>>(
+                            "SparseTensorIndex::SparseTensorIndexCSF",
+                            pos,
+                        ),
+                    _ => Ok(()),
+                },
+            )?
+            .visit_field::<Buffer>("data", Self::VT_DATA, true)?
+            .finish();
         Ok(())
     }
 }
@@ -1676,10 +1755,7 @@ impl<'a: 'b, 'b> SparseTensorBuilder<'a, 'b> {
             .push_slot::<Type>(SparseTensor::VT_TYPE_TYPE, type_type, Type::NONE);
     }
     #[inline]
-    pub fn add_type_(
-        &mut self,
-        type_: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
-    ) {
+    pub fn add_type_(&mut self, type_: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>) {
         self.fbb_
             .push_slot_always::<flatbuffers::WIPOffset<_>>(SparseTensor::VT_TYPE_, type_);
     }
@@ -1722,9 +1798,7 @@ impl<'a: 'b, 'b> SparseTensorBuilder<'a, 'b> {
             .push_slot_always::<&Buffer>(SparseTensor::VT_DATA, data);
     }
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SparseTensorBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> SparseTensorBuilder<'a, 'b> {
         let start = _fbb.start_table();
         SparseTensorBuilder {
             fbb_: _fbb,
@@ -2023,9 +2097,7 @@ impl core::fmt::Debug for SparseTensor<'_> {
 /// catch every error, or be maximally performant. For the
 /// previous, unchecked, behavior use
 /// `root_as_sparse_tensor_unchecked`.
-pub fn root_as_sparse_tensor(
-    buf: &[u8],
-) -> Result<SparseTensor, flatbuffers::InvalidFlatbuffer> {
+pub fn root_as_sparse_tensor(buf: &[u8]) -> Result<SparseTensor, flatbuffers::InvalidFlatbuffer> {
     flatbuffers::root::<SparseTensor>(buf)
 }
 #[inline]
diff --git a/arrow-ipc/src/gen/Tensor.rs b/arrow-ipc/src/gen/Tensor.rs
index 43133fec036d..1766d95144c2 100644
--- a/arrow-ipc/src/gen/Tensor.rs
+++ b/arrow-ipc/src/gen/Tensor.rs
@@ -94,11 +94,7 @@ impl flatbuffers::Verifiable for TensorDim<'_> {
         use flatbuffers::Verifiable;
         v.visit_table(pos)?
             .visit_field::<i64>("size_", Self::VT_SIZE_, false)?
-            .visit_field::<flatbuffers::ForwardsUOffset<&str>>(
-                "name",
-                Self::VT_NAME,
-                false,
-            )?
+            .visit_field::<flatbuffers::ForwardsUOffset<&str>>("name", Self::VT_NAME, false)?
             .finish();
         Ok(())
     }
@@ -132,9 +128,7 @@ impl<'a: 'b, 'b> TensorDimBuilder<'a, 'b> {
             .push_slot_always::<flatbuffers::WIPOffset<_>>(TensorDim::VT_NAME, name);
     }
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> TensorDimBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> TensorDimBuilder<'a, 'b> {
         let start = _fbb.start_table();
         TensorDimBuilder {
             fbb_: _fbb,
@@ -226,18 +220,13 @@ impl<'a> Tensor<'a> {
         // which contains a valid value in this slot
         unsafe {
             self._tab
-                .get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(
-                    Tensor::VT_TYPE_,
-                    None,
-                )
+                .get::<flatbuffers::ForwardsUOffset<flatbuffers::Table<'a>>>(Tensor::VT_TYPE_, None)
                 .unwrap()
         }
     }
     /// The dimensions of the tensor, optionally named
     #[inline]
-    pub fn shape(
-        &self,
-    ) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorDim<'a>>> {
+    pub fn shape(&self) -> flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<TensorDim<'a>>> {
         // Safety:
         // Created from valid Table for this object
         // which contains a valid value in this slot
@@ -589,37 +578,126 @@ impl flatbuffers::Verifiable for Tensor<'_> {
     ) -> Result<(), flatbuffers::InvalidFlatbuffer> {
         use flatbuffers::Verifiable;
         v.visit_table(pos)?
-     .visit_union::<Type, _>("type_type", Self::VT_TYPE_TYPE, "type_", Self::VT_TYPE_, true, |key, v, pos| {
-        match key {
-          Type::Null => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Null>>("Type::Null", pos),
-          Type::Int => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Int>>("Type::Int", pos),
-          Type::FloatingPoint => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FloatingPoint>>("Type::FloatingPoint", pos),
-          Type::Binary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Binary>>("Type::Binary", pos),
-          Type::Utf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Utf8>>("Type::Utf8", pos),
-          Type::Bool => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Bool>>("Type::Bool", pos),
-          Type::Decimal => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Decimal>>("Type::Decimal", pos),
-          Type::Date => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Date>>("Type::Date", pos),
-          Type::Time => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Time>>("Type::Time", pos),
-          Type::Timestamp => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Timestamp>>("Type::Timestamp", pos),
-          Type::Interval => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Interval>>("Type::Interval", pos),
-          Type::List => v.verify_union_variant::<flatbuffers::ForwardsUOffset<List>>("Type::List", pos),
-          Type::Struct_ => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Struct_>>("Type::Struct_", pos),
-          Type::Union => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Union>>("Type::Union", pos),
-          Type::FixedSizeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeBinary>>("Type::FixedSizeBinary", pos),
-          Type::FixedSizeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeList>>("Type::FixedSizeList", pos),
-          Type::Map => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Map>>("Type::Map", pos),
-          Type::Duration => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Duration>>("Type::Duration", pos),
-          Type::LargeBinary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeBinary>>("Type::LargeBinary", pos),
-          Type::LargeUtf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeUtf8>>("Type::LargeUtf8", pos),
-          Type::LargeList => v.verify_union_variant::<flatbuffers::ForwardsUOffset<LargeList>>("Type::LargeList", pos),
-          Type::RunEndEncoded => v.verify_union_variant::<flatbuffers::ForwardsUOffset<RunEndEncoded>>("Type::RunEndEncoded", pos),
-          _ => Ok(()),
-        }
-     })?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<TensorDim>>>>("shape", Self::VT_SHAPE, true)?
-     .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i64>>>("strides", Self::VT_STRIDES, false)?
-     .visit_field::<Buffer>("data", Self::VT_DATA, true)?
-     .finish();
+            .visit_union::<Type, _>(
+                "type_type",
+                Self::VT_TYPE_TYPE,
+                "type_",
+                Self::VT_TYPE_,
+                true,
+                |key, v, pos| match key {
+                    Type::Null => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Null>>(
+                        "Type::Null",
+                        pos,
+                    ),
+                    Type::Int => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Int>>(
+                        "Type::Int",
+                        pos,
+                    ),
+                    Type::FloatingPoint => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<FloatingPoint>>(
+                            "Type::FloatingPoint",
+                            pos,
+                        ),
+                    Type::Binary => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Binary>>(
+                        "Type::Binary",
+                        pos,
+                    ),
+                    Type::Utf8 => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Utf8>>(
+                        "Type::Utf8",
+                        pos,
+                    ),
+                    Type::Bool => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Bool>>(
+                        "Type::Bool",
+                        pos,
+                    ),
+                    Type::Decimal => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Decimal>>(
+                            "Type::Decimal",
+                            pos,
+                        ),
+                    Type::Date => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Date>>(
+                        "Type::Date",
+                        pos,
+                    ),
+                    Type::Time => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Time>>(
+                        "Type::Time",
+                        pos,
+                    ),
+                    Type::Timestamp => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Timestamp>>(
+                            "Type::Timestamp",
+                            pos,
+                        ),
+                    Type::Interval => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Interval>>(
+                            "Type::Interval",
+                            pos,
+                        ),
+                    Type::List => v.verify_union_variant::<flatbuffers::ForwardsUOffset<List>>(
+                        "Type::List",
+                        pos,
+                    ),
+                    Type::Struct_ => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Struct_>>(
+                            "Type::Struct_",
+                            pos,
+                        ),
+                    Type::Union => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Union>>(
+                        "Type::Union",
+                        pos,
+                    ),
+                    Type::FixedSizeBinary => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeBinary>>(
+                            "Type::FixedSizeBinary",
+                            pos,
+                        ),
+                    Type::FixedSizeList => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<FixedSizeList>>(
+                            "Type::FixedSizeList",
+                            pos,
+                        ),
+                    Type::Map => v.verify_union_variant::<flatbuffers::ForwardsUOffset<Map>>(
+                        "Type::Map",
+                        pos,
+                    ),
+                    Type::Duration => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Duration>>(
+                            "Type::Duration",
+                            pos,
+                        ),
+                    Type::LargeBinary => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<LargeBinary>>(
+                            "Type::LargeBinary",
+                            pos,
+                        ),
+                    Type::LargeUtf8 => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<LargeUtf8>>(
+                            "Type::LargeUtf8",
+                            pos,
+                        ),
+                    Type::LargeList => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<LargeList>>(
+                            "Type::LargeList",
+                            pos,
+                        ),
+                    Type::RunEndEncoded => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<RunEndEncoded>>(
+                            "Type::RunEndEncoded",
+                            pos,
+                        ),
+                    _ => Ok(()),
+                },
+            )?
+            .visit_field::<flatbuffers::ForwardsUOffset<
+                flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<TensorDim>>,
+            >>("shape", Self::VT_SHAPE, true)?
+            .visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, i64>>>(
+                "strides",
+                Self::VT_STRIDES,
+                false,
+            )?
+            .visit_field::<Buffer>("data", Self::VT_DATA, true)?
+            .finish();
         Ok(())
     }
 }
@@ -658,10 +736,7 @@ impl<'a: 'b, 'b> TensorBuilder<'a, 'b> {
             .push_slot::<Type>(Tensor::VT_TYPE_TYPE, type_type, Type::NONE);
     }
     #[inline]
-    pub fn add_type_(
-        &mut self,
-        type_: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>,
-    ) {
+    pub fn add_type_(&mut self, type_: flatbuffers::WIPOffset<flatbuffers::UnionWIPOffset>) {
         self.fbb_
             .push_slot_always::<flatbuffers::WIPOffset<_>>(Tensor::VT_TYPE_, type_);
     }
@@ -676,10 +751,7 @@ impl<'a: 'b, 'b> TensorBuilder<'a, 'b> {
             .push_slot_always::<flatbuffers::WIPOffset<_>>(Tensor::VT_SHAPE, shape);
     }
     #[inline]
-    pub fn add_strides(
-        &mut self,
-        strides: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i64>>,
-    ) {
+    pub fn add_strides(&mut self, strides: flatbuffers::WIPOffset<flatbuffers::Vector<'b, i64>>) {
         self.fbb_
             .push_slot_always::<flatbuffers::WIPOffset<_>>(Tensor::VT_STRIDES, strides);
     }
@@ -688,9 +760,7 @@ impl<'a: 'b, 'b> TensorBuilder<'a, 'b> {
         self.fbb_.push_slot_always::<&Buffer>(Tensor::VT_DATA, data);
     }
     #[inline]
-    pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> TensorBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> TensorBuilder<'a, 'b> {
         let start = _fbb.start_table();
         TensorBuilder {
             fbb_: _fbb,
@@ -960,9 +1030,7 @@ pub fn root_as_tensor(buf: &[u8]) -> Result<Tensor, flatbuffers::InvalidFlatbuff
 /// catch every error, or be maximally performant. For the
 /// previous, unchecked, behavior use
 /// `size_prefixed_root_as_tensor_unchecked`.
-pub fn size_prefixed_root_as_tensor(
-    buf: &[u8],
-) -> Result<Tensor, flatbuffers::InvalidFlatbuffer> {
+pub fn size_prefixed_root_as_tensor(buf: &[u8]) -> Result<Tensor, flatbuffers::InvalidFlatbuffer> {
     flatbuffers::size_prefixed_root::<Tensor>(buf)
 }
 #[inline]
diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs
index 96cb4393ba58..6f2cb30a1629 100644
--- a/arrow-ipc/src/reader.rs
+++ b/arrow-ipc/src/reader.rs
@@ -141,19 +141,13 @@ fn create_array(reader: &mut ArrayReader, field: &Field) -> Result<ArrayRef, Arr
                 ArrowError::ParseError(format!("Field {field} does not have dict id"))
             })?;
 
-            let value_array =
-                reader.dictionaries_by_id.get(&dict_id).ok_or_else(|| {
-                    ArrowError::ParseError(format!(
-                        "Cannot find a dictionary batch with dict id: {dict_id}"
-                    ))
-                })?;
+            let value_array = reader.dictionaries_by_id.get(&dict_id).ok_or_else(|| {
+                ArrowError::ParseError(format!(
+                    "Cannot find a dictionary batch with dict id: {dict_id}"
+                ))
+            })?;
 
-            create_dictionary_array(
-                index_node,
-                data_type,
-                &index_buffers,
-                value_array.clone(),
-            )
+            create_dictionary_array(index_node, data_type, &index_buffers, value_array.clone())
         }
         Union(fields, mode) => {
             let union_node = reader.next_node(field)?;
@@ -232,9 +226,7 @@ fn create_primitive_array(
                 .null_bit_buffer(null_buffer)
                 .build_aligned()?
         }
-        _ if data_type.is_primitive()
-            || matches!(data_type, Boolean | FixedSizeBinary(_)) =>
-        {
+        _ if data_type.is_primitive() || matches!(data_type, Boolean | FixedSizeBinary(_)) => {
             // read 2 buffers: null buffer (optional) and data buffer
             ArrayData::builder(data_type.clone())
                 .len(length)
@@ -560,10 +552,7 @@ impl<R: Read + Seek> FileReader<R> {
     ///
     /// Returns errors if the file does not meet the Arrow Format header and footer
     /// requirements
-    pub fn try_new(
-        reader: R,
-        projection: Option<Vec<usize>>,
-    ) -> Result<Self, ArrowError> {
+    pub fn try_new(reader: R, projection: Option<Vec<usize>>) -> Result<Self, ArrowError> {
         let mut reader = BufReader::new(reader);
         // check if header and footer contain correct magic bytes
         let mut magic_buffer: [u8; 6] = [0; 6];
@@ -596,9 +585,7 @@ impl<R: Read + Seek> FileReader<R> {
         })?;
 
         let blocks = footer.recordBatches().ok_or_else(|| {
-            ArrowError::ParseError(
-                "Unable to get record batches from IPC Footer".to_string(),
-            )
+            ArrowError::ParseError("Unable to get record batches from IPC Footer".to_string())
         })?;
 
         let total_blocks = blocks.len();
@@ -633,9 +620,7 @@ impl<R: Read + Seek> FileReader<R> {
                 reader.read_exact(&mut block_data)?;
 
                 let message = crate::root_as_message(&block_data[..]).map_err(|err| {
-                    ArrowError::ParseError(format!(
-                        "Unable to get root as message: {err:?}"
-                    ))
+                    ArrowError::ParseError(format!("Unable to get root as message: {err:?}"))
                 })?;
 
                 match message.header_type() {
@@ -643,8 +628,7 @@ impl<R: Read + Seek> FileReader<R> {
                         let batch = message.header_as_dictionary_batch().unwrap();
 
                         // read the block that makes up the dictionary batch into a buffer
-                        let mut buf =
-                            MutableBuffer::from_len_zeroed(message.bodyLength() as usize);
+                        let mut buf = MutableBuffer::from_len_zeroed(message.bodyLength() as usize);
                         reader.seek(SeekFrom::Start(
                             block.offset() as u64 + block.metaDataLength() as u64,
                         ))?;
@@ -752,9 +736,7 @@ impl<R: Read + Seek> FileReader<R> {
             )),
             crate::MessageHeader::RecordBatch => {
                 let batch = message.header_as_record_batch().ok_or_else(|| {
-                    ArrowError::IpcError(
-                        "Unable to read IPC message as record batch".to_string(),
-                    )
+                    ArrowError::IpcError("Unable to read IPC message as record batch".to_string())
                 })?;
                 // read the block that makes up the record batch into a buffer
                 let mut buf = MutableBuffer::from_len_zeroed(message.bodyLength() as usize);
@@ -769,13 +751,11 @@ impl<R: Read + Seek> FileReader<R> {
                     self.schema(),
                     &self.dictionaries_by_id,
                     self.projection.as_ref().map(|x| x.0.as_ref()),
-                    &message.version()
-
-                ).map(Some)
-            }
-            crate::MessageHeader::NONE => {
-                Ok(None)
+                    &message.version(),
+                )
+                .map(Some)
             }
+            crate::MessageHeader::NONE => Ok(None),
             t => Err(ArrowError::InvalidArgumentError(format!(
                 "Reading types other than record batches not yet supported, unable to read {t:?}"
             ))),
@@ -856,10 +836,7 @@ impl<R: Read> StreamReader<BufReader<R>> {
     /// The first message in the stream is the schema, the reader will fail if it does not
     /// encounter a schema.
     /// To check if the reader is done, use `is_finished(self)`
-    pub fn try_new(
-        reader: R,
-        projection: Option<Vec<usize>>,
-    ) -> Result<Self, ArrowError> {
+    pub fn try_new(reader: R, projection: Option<Vec<usize>>) -> Result<Self, ArrowError> {
         Self::try_new_unbuffered(BufReader::new(reader), projection)
     }
 }
@@ -976,15 +953,21 @@ impl<R: Read> StreamReader<R> {
             )),
             crate::MessageHeader::RecordBatch => {
                 let batch = message.header_as_record_batch().ok_or_else(|| {
-                    ArrowError::IpcError(
-                        "Unable to read IPC message as record batch".to_string(),
-                    )
+                    ArrowError::IpcError("Unable to read IPC message as record batch".to_string())
                 })?;
                 // read the block that makes up the record batch into a buffer
                 let mut buf = MutableBuffer::from_len_zeroed(message.bodyLength() as usize);
                 self.reader.read_exact(&mut buf)?;
 
-                read_record_batch(&buf.into(), batch, self.schema(), &self.dictionaries_by_id, self.projection.as_ref().map(|x| x.0.as_ref()), &message.version()).map(Some)
+                read_record_batch(
+                    &buf.into(),
+                    batch,
+                    self.schema(),
+                    &self.dictionaries_by_id,
+                    self.projection.as_ref().map(|x| x.0.as_ref()),
+                    &message.version(),
+                )
+                .map(Some)
             }
             crate::MessageHeader::DictionaryBatch => {
                 let batch = message.header_as_dictionary_batch().ok_or_else(|| {
@@ -997,18 +980,20 @@ impl<R: Read> StreamReader<R> {
                 self.reader.read_exact(&mut buf)?;
 
                 read_dictionary(
-                    &buf.into(), batch, &self.schema, &mut self.dictionaries_by_id, &message.version()
+                    &buf.into(),
+                    batch,
+                    &self.schema,
+                    &mut self.dictionaries_by_id,
+                    &message.version(),
                 )?;
 
                 // read the next message until we encounter a RecordBatch
                 self.maybe_next()
             }
-            crate::MessageHeader::NONE => {
-                Ok(None)
-            }
-            t => Err(ArrowError::InvalidArgumentError(
-                format!("Reading types other than record batches not yet supported, unable to read {t:?} ")
-            )),
+            crate::MessageHeader::NONE => Ok(None),
+            t => Err(ArrowError::InvalidArgumentError(format!(
+                "Reading types other than record batches not yet supported, unable to read {t:?} "
+            ))),
         }
     }
 
@@ -1055,13 +1040,10 @@ mod tests {
 
     fn create_test_projection_schema() -> Schema {
         // define field types
-        let list_data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
 
-        let fixed_size_list_data_type = DataType::FixedSizeList(
-            Arc::new(Field::new("item", DataType::Int32, false)),
-            3,
-        );
+        let fixed_size_list_data_type =
+            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3);
 
         let union_fields = UnionFields::new(
             vec![0, 1],
@@ -1124,8 +1106,7 @@ mod tests {
         ];
         let array6 = ListArray::from_iter_primitive::<Int32Type, _, _>(array6_values);
         let array7_values = vec![vec![11, 12, 13], vec![22, 23, 24], vec![33, 34, 35]];
-        let array7 =
-            FixedSizeBinaryArray::try_from_iter(array7_values.into_iter()).unwrap();
+        let array7 = FixedSizeBinaryArray::try_from_iter(array7_values.into_iter()).unwrap();
 
         let array8_values = ArrayData::builder(DataType::Int32)
             .len(9)
@@ -1201,8 +1182,7 @@ mod tests {
         // write record batch in IPC format
         let mut buf = Vec::new();
         {
-            let mut writer =
-                crate::writer::FileWriter::try_new(&mut buf, &schema).unwrap();
+            let mut writer = crate::writer::FileWriter::try_new(&mut buf, &schema).unwrap();
             writer.write(&batch).unwrap();
             writer.finish().unwrap();
         }
@@ -1210,8 +1190,7 @@ mod tests {
         // read record batch with projection
         for index in 0..12 {
             let projection = vec![index];
-            let reader =
-                FileReader::try_new(std::io::Cursor::new(buf.clone()), Some(projection));
+            let reader = FileReader::try_new(std::io::Cursor::new(buf.clone()), Some(projection));
             let read_batch = reader.unwrap().next().unwrap().unwrap();
             let projected_column = read_batch.column(0);
             let expected_column = batch.column(index);
@@ -1222,10 +1201,8 @@ mod tests {
 
         {
             // read record batch with reversed projection
-            let reader = FileReader::try_new(
-                std::io::Cursor::new(buf.clone()),
-                Some(vec![3, 2, 1]),
-            );
+            let reader =
+                FileReader::try_new(std::io::Cursor::new(buf.clone()), Some(vec![3, 2, 1]));
             let read_batch = reader.unwrap().next().unwrap().unwrap();
             let expected_batch = batch.project(&[3, 2, 1]).unwrap();
             assert_eq!(read_batch, expected_batch);
@@ -1249,8 +1226,7 @@ mod tests {
         let batch = RecordBatch::try_new(Arc::new(schema.clone()), arrays).unwrap();
         // create stream writer
         let mut file = tempfile::tempfile().unwrap();
-        let mut stream_writer =
-            crate::writer::StreamWriter::try_new(&mut file, &schema).unwrap();
+        let mut stream_writer = crate::writer::StreamWriter::try_new(&mut file, &schema).unwrap();
         stream_writer.write(&batch).unwrap();
         stream_writer.finish().unwrap();
 
@@ -1298,8 +1274,7 @@ mod tests {
 
     fn roundtrip_ipc(rb: &RecordBatch) -> RecordBatch {
         let mut buf = Vec::new();
-        let mut writer =
-            crate::writer::FileWriter::try_new(&mut buf, &rb.schema()).unwrap();
+        let mut writer = crate::writer::FileWriter::try_new(&mut buf, &rb.schema()).unwrap();
         writer.write(rb).unwrap();
         writer.finish().unwrap();
         drop(writer);
@@ -1310,15 +1285,13 @@ mod tests {
 
     fn roundtrip_ipc_stream(rb: &RecordBatch) -> RecordBatch {
         let mut buf = Vec::new();
-        let mut writer =
-            crate::writer::StreamWriter::try_new(&mut buf, &rb.schema()).unwrap();
+        let mut writer = crate::writer::StreamWriter::try_new(&mut buf, &rb.schema()).unwrap();
         writer.write(rb).unwrap();
         writer.finish().unwrap();
         drop(writer);
 
         let mut reader =
-            crate::reader::StreamReader::try_new(std::io::Cursor::new(buf), None)
-                .unwrap();
+            crate::reader::StreamReader::try_new(std::io::Cursor::new(buf), None).unwrap();
         reader.next().unwrap().unwrap()
     }
 
@@ -1336,8 +1309,7 @@ mod tests {
         writer.finish().unwrap();
         drop(writer);
 
-        let reader =
-            crate::reader::FileReader::try_new(std::io::Cursor::new(buf), None).unwrap();
+        let reader = crate::reader::FileReader::try_new(std::io::Cursor::new(buf), None).unwrap();
         assert_eq!(reader.custom_metadata(), &test_metadata);
     }
 
@@ -1434,8 +1406,7 @@ mod tests {
         // can be compared as such.
         assert_eq!(input_batch.column(1), output_batch.column(1));
 
-        let run_array_1_unsliced =
-            unslice_run_array(run_array_1_sliced.into_data()).unwrap();
+        let run_array_1_unsliced = unslice_run_array(run_array_1_sliced.into_data()).unwrap();
         assert_eq!(run_array_1_unsliced, output_batch.column(0).into_data());
     }
 
@@ -1487,7 +1458,7 @@ mod tests {
         let keys_field = Arc::new(Field::new_dict(
             "keys",
             DataType::Dictionary(Box::new(DataType::Int8), Box::new(DataType::Utf8)),
-            true,
+            true, // It is technically not legal for this field to be null.
             1,
             false,
         ));
@@ -1506,7 +1477,7 @@ mod tests {
             Arc::new(Field::new(
                 "entries",
                 entry_struct.data_type().clone(),
-                true,
+                false,
             )),
             false,
         );
@@ -1528,8 +1499,7 @@ mod tests {
             dict_dict_array.data_type().clone(),
             false,
         )]));
-        let input_batch =
-            RecordBatch::try_new(schema, vec![Arc::new(dict_dict_array)]).unwrap();
+        let input_batch = RecordBatch::try_new(schema, vec![Arc::new(dict_dict_array)]).unwrap();
         let output_batch = roundtrip_ipc_stream(&input_batch);
         assert_eq!(input_batch, output_batch);
     }
@@ -1564,8 +1534,7 @@ mod tests {
             dict_dict_array.data_type().clone(),
             false,
         )]));
-        let input_batch =
-            RecordBatch::try_new(schema, vec![Arc::new(dict_dict_array)]).unwrap();
+        let input_batch = RecordBatch::try_new(schema, vec![Arc::new(dict_dict_array)]).unwrap();
         let output_batch = roundtrip_ipc_stream(&input_batch);
         assert_eq!(input_batch, output_batch);
     }
@@ -1581,10 +1550,7 @@ mod tests {
             false,
         )));
         let offsets: &[i32; 5] = &[0, 2, 4, 4, 6];
-        test_roundtrip_stream_dict_of_list_of_dict_impl::<i32, i32>(
-            list_data_type,
-            offsets,
-        );
+        test_roundtrip_stream_dict_of_list_of_dict_impl::<i32, i32>(list_data_type, offsets);
 
         // large list
         let list_data_type = DataType::LargeList(Arc::new(Field::new_dict(
@@ -1595,10 +1561,7 @@ mod tests {
             false,
         )));
         let offsets: &[i64; 5] = &[0, 2, 4, 4, 7];
-        test_roundtrip_stream_dict_of_list_of_dict_impl::<i64, i64>(
-            list_data_type,
-            offsets,
-        );
+        test_roundtrip_stream_dict_of_list_of_dict_impl::<i64, i64>(list_data_type, offsets);
     }
 
     #[test]
@@ -1633,8 +1596,7 @@ mod tests {
             dict_dict_array.data_type().clone(),
             false,
         )]));
-        let input_batch =
-            RecordBatch::try_new(schema, vec![Arc::new(dict_dict_array)]).unwrap();
+        let input_batch = RecordBatch::try_new(schema, vec![Arc::new(dict_dict_array)]).unwrap();
         let output_batch = roundtrip_ipc_stream(&input_batch);
         assert_eq!(input_batch, output_batch);
     }
@@ -1645,8 +1607,7 @@ mod tests {
         let options = RecordBatchOptions::new()
             .with_match_field_names(true)
             .with_row_count(Some(10));
-        let input_batch =
-            RecordBatch::try_new_with_options(schema, vec![], &options).unwrap();
+        let input_batch = RecordBatch::try_new_with_options(schema, vec![], &options).unwrap();
         let output_batch = roundtrip_ipc_stream(&input_batch);
         assert_eq!(input_batch, output_batch);
     }
diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs
index 0e01e51231d6..1f6bf5f6fa85 100644
--- a/arrow-ipc/src/writer.rs
+++ b/arrow-ipc/src/writer.rs
@@ -23,6 +23,7 @@
 use std::cmp::min;
 use std::collections::HashMap;
 use std::io::{BufWriter, Write};
+use std::sync::Arc;
 
 use flatbuffers::FlatBufferBuilder;
 
@@ -106,8 +107,7 @@ impl IpcWriteOptions {
             crate::MetadataVersion::V5 => {
                 if write_legacy_ipc_format {
                     Err(ArrowError::InvalidArgumentError(
-                        "Legacy IPC format only supported on metadata version 4"
-                            .to_string(),
+                        "Legacy IPC format only supported on metadata version 4".to_string(),
                     ))
                 } else {
                     Ok(Self {
@@ -172,11 +172,7 @@ impl Default for IpcWriteOptions {
 pub struct IpcDataGenerator {}
 
 impl IpcDataGenerator {
-    pub fn schema_to_bytes(
-        &self,
-        schema: &Schema,
-        write_options: &IpcWriteOptions,
-    ) -> EncodedData {
+    pub fn schema_to_bytes(&self, schema: &Schema, write_options: &IpcWriteOptions) -> EncodedData {
         let mut fbb = FlatBufferBuilder::new();
         let schema = {
             let fb = crate::convert::schema_to_fb_offset(&mut fbb, schema);
@@ -275,9 +271,7 @@ impl IpcDataGenerator {
                 let map_array = as_map_array(column);
 
                 let (keys, values) = match field.data_type() {
-                    DataType::Struct(fields) if fields.len() == 2 => {
-                        (&fields[0], &fields[1])
-                    }
+                    DataType::Struct(fields) if fields.len() == 2 => (&fields[0], &fields[1]),
                     _ => panic!("Incorrect field data type {:?}", field.data_type()),
                 };
 
@@ -556,18 +550,15 @@ impl IpcDataGenerator {
 pub(crate) fn unslice_run_array(arr: ArrayData) -> Result<ArrayData, ArrowError> {
     match arr.data_type() {
         DataType::RunEndEncoded(k, _) => match k.data_type() {
-            DataType::Int16 => Ok(into_zero_offset_run_array(
-                RunArray::<Int16Type>::from(arr),
-            )?
-            .into_data()),
-            DataType::Int32 => Ok(into_zero_offset_run_array(
-                RunArray::<Int32Type>::from(arr),
-            )?
-            .into_data()),
-            DataType::Int64 => Ok(into_zero_offset_run_array(
-                RunArray::<Int64Type>::from(arr),
-            )?
-            .into_data()),
+            DataType::Int16 => {
+                Ok(into_zero_offset_run_array(RunArray::<Int16Type>::from(arr))?.into_data())
+            }
+            DataType::Int32 => {
+                Ok(into_zero_offset_run_array(RunArray::<Int32Type>::from(arr))?.into_data())
+            }
+            DataType::Int64 => {
+                Ok(into_zero_offset_run_array(RunArray::<Int64Type>::from(arr))?.into_data())
+            }
             d => unreachable!("Unexpected data type {d}"),
         },
         d => Err(ArrowError::InvalidArgumentError(format!(
@@ -656,11 +647,7 @@ impl DictionaryTracker {
     /// * If the tracker has not been configured to error on replacement or this dictionary
     ///   has never been seen before, return `Ok(true)` to indicate that the dictionary was just
     ///   inserted.
-    pub fn insert(
-        &mut self,
-        dict_id: i64,
-        column: &ArrayRef,
-    ) -> Result<bool, ArrowError> {
+    pub fn insert(&mut self, dict_id: i64, column: &ArrayRef) -> Result<bool, ArrowError> {
         let dict_data = column.to_data();
         let dict_values = &dict_data.child_data()[0];
 
@@ -696,7 +683,7 @@ pub struct FileWriter<W: Write> {
     /// IPC write options
     write_options: IpcWriteOptions,
     /// A reference to the schema, used in validating record batches
-    schema: Schema,
+    schema: SchemaRef,
     /// The number of bytes between each block of bytes, as an offset for random access
     block_offsets: usize,
     /// Dictionary blocks that will be written as part of the IPC footer
@@ -739,7 +726,7 @@ impl<W: Write> FileWriter<W> {
         Ok(Self {
             writer,
             write_options,
-            schema: schema.clone(),
+            schema: Arc::new(schema.clone()),
             block_offsets: meta + data + header_size,
             dictionary_blocks: vec![],
             record_blocks: vec![],
@@ -772,14 +759,12 @@ impl<W: Write> FileWriter<W> {
             let (meta, data) =
                 write_message(&mut self.writer, encoded_dictionary, &self.write_options)?;
 
-            let block =
-                crate::Block::new(self.block_offsets as i64, meta as i32, data as i64);
+            let block = crate::Block::new(self.block_offsets as i64, meta as i32, data as i64);
             self.dictionary_blocks.push(block);
             self.block_offsets += meta + data;
         }
 
-        let (meta, data) =
-            write_message(&mut self.writer, encoded_message, &self.write_options)?;
+        let (meta, data) = write_message(&mut self.writer, encoded_message, &self.write_options)?;
         // add a record block for the footer
         let block = crate::Block::new(
             self.block_offsets as i64,
@@ -832,6 +817,11 @@ impl<W: Write> FileWriter<W> {
         Ok(())
     }
 
+    /// Returns the arrow [`SchemaRef`] for this arrow file.
+    pub fn schema(&self) -> &SchemaRef {
+        &self.schema
+    }
+
     /// Gets a reference to the underlying writer.
     pub fn get_ref(&self) -> &W {
         self.writer.get_ref()
@@ -1091,9 +1081,7 @@ fn write_continuation<W: Write>(
 
     // the version of the writer determines whether continuation markers should be added
     match write_options.metadata_version {
-        crate::MetadataVersion::V1
-        | crate::MetadataVersion::V2
-        | crate::MetadataVersion::V3 => {
+        crate::MetadataVersion::V1 | crate::MetadataVersion::V2 | crate::MetadataVersion::V3 => {
             unreachable!("Options with the metadata version cannot be created")
         }
         crate::MetadataVersion::V4 => {
@@ -1151,6 +1139,29 @@ fn get_buffer_element_width(spec: &BufferSpec) -> usize {
     }
 }
 
+/// Common functionality for re-encoding offsets. Returns the new offsets as well as
+/// original start offset and length for use in slicing child data.
+fn reencode_offsets<O: OffsetSizeTrait>(
+    offsets: &Buffer,
+    data: &ArrayData,
+) -> (Buffer, usize, usize) {
+    let offsets_slice: &[O] = offsets.typed_data::<O>();
+    let offset_slice = &offsets_slice[data.offset()..data.offset() + data.len() + 1];
+
+    let start_offset = offset_slice.first().unwrap();
+    let end_offset = offset_slice.last().unwrap();
+
+    let offsets = match start_offset.as_usize() {
+        0 => offsets.clone(),
+        _ => offset_slice.iter().map(|x| *x - *start_offset).collect(),
+    };
+
+    let start_offset = start_offset.as_usize();
+    let end_offset = end_offset.as_usize();
+
+    (offsets, start_offset, end_offset - start_offset)
+}
+
 /// Returns the values and offsets [`Buffer`] for a ByteArray with offset type `O`
 ///
 /// In particular, this handles re-encoding the offsets if they don't start at `0`,
@@ -1161,23 +1172,24 @@ fn get_byte_array_buffers<O: OffsetSizeTrait>(data: &ArrayData) -> (Buffer, Buff
         return (MutableBuffer::new(0).into(), MutableBuffer::new(0).into());
     }
 
-    let buffers = data.buffers();
-    let offsets: &[O] = buffers[0].typed_data::<O>();
-    let offset_slice = &offsets[data.offset()..data.offset() + data.len() + 1];
-
-    let start_offset = offset_slice.first().unwrap();
-    let end_offset = offset_slice.last().unwrap();
+    let (offsets, original_start_offset, len) = reencode_offsets::<O>(&data.buffers()[0], data);
+    let values = data.buffers()[1].slice_with_length(original_start_offset, len);
+    (offsets, values)
+}
 
-    let offsets = match start_offset.as_usize() {
-        0 => buffers[0].clone(),
-        _ => offset_slice.iter().map(|x| *x - *start_offset).collect(),
-    };
+/// Similar logic as [`get_byte_array_buffers()`] but slices the child array instead
+/// of a values buffer.
+fn get_list_array_buffers<O: OffsetSizeTrait>(data: &ArrayData) -> (Buffer, ArrayData) {
+    if data.is_empty() {
+        return (
+            MutableBuffer::new(0).into(),
+            data.child_data()[0].slice(0, 0),
+        );
+    }
 
-    let values = buffers[1].slice_with_length(
-        start_offset.as_usize(),
-        end_offset.as_usize() - start_offset.as_usize(),
-    );
-    (offsets, values)
+    let (offsets, original_start_offset, len) = reencode_offsets::<O>(&data.buffers()[0], data);
+    let child_data = data.child_data()[0].slice(original_start_offset, len);
+    (offsets, child_data)
 }
 
 /// Write array data to a vector of bytes
@@ -1262,27 +1274,14 @@ fn write_array_data(
 
         let byte_width = get_buffer_element_width(spec);
         let min_length = array_data.len() * byte_width;
-        if buffer_need_truncate(array_data.offset(), buffer, spec, min_length) {
+        let buffer_slice = if buffer_need_truncate(array_data.offset(), buffer, spec, min_length) {
             let byte_offset = array_data.offset() * byte_width;
             let buffer_length = min(min_length, buffer.len() - byte_offset);
-            let buffer_slice =
-                &buffer.as_slice()[byte_offset..(byte_offset + buffer_length)];
-            offset = write_buffer(
-                buffer_slice,
-                buffers,
-                arrow_data,
-                offset,
-                compression_codec,
-            )?;
+            &buffer.as_slice()[byte_offset..(byte_offset + buffer_length)]
         } else {
-            offset = write_buffer(
-                buffer.as_slice(),
-                buffers,
-                arrow_data,
-                offset,
-                compression_codec,
-            )?;
-        }
+            buffer.as_slice()
+        };
+        offset = write_buffer(buffer_slice, buffers, arrow_data, offset, compression_codec)?;
     } else if matches!(data_type, DataType::Boolean) {
         // Bools are special because the payload (= 1 bit) is smaller than the physical container elements (= bytes).
         // The array data may not start at the physical boundary of the underlying buffer, so we need to shift bits around.
@@ -1291,10 +1290,42 @@ fn write_array_data(
         let buffer = &array_data.buffers()[0];
         let buffer = buffer.bit_slice(array_data.offset(), array_data.len());
         offset = write_buffer(&buffer, buffers, arrow_data, offset, compression_codec)?;
+    } else if matches!(
+        data_type,
+        DataType::List(_) | DataType::LargeList(_) | DataType::Map(_, _)
+    ) {
+        assert_eq!(array_data.buffers().len(), 1);
+        assert_eq!(array_data.child_data().len(), 1);
+
+        // Truncate offsets and the child data to avoid writing unnecessary data
+        let (offsets, sliced_child_data) = match data_type {
+            DataType::List(_) => get_list_array_buffers::<i32>(array_data),
+            DataType::Map(_, _) => get_list_array_buffers::<i32>(array_data),
+            DataType::LargeList(_) => get_list_array_buffers::<i64>(array_data),
+            _ => unreachable!(),
+        };
+        offset = write_buffer(
+            offsets.as_slice(),
+            buffers,
+            arrow_data,
+            offset,
+            compression_codec,
+        )?;
+        offset = write_array_data(
+            &sliced_child_data,
+            buffers,
+            arrow_data,
+            nodes,
+            offset,
+            sliced_child_data.len(),
+            sliced_child_data.null_count(),
+            compression_codec,
+            write_options,
+        )?;
+        return Ok(offset);
     } else {
         for buffer in array_data.buffers() {
-            offset =
-                write_buffer(buffer, buffers, arrow_data, offset, compression_codec)?;
+            offset = write_buffer(buffer, buffers, arrow_data, offset, compression_codec)?;
         }
     }
 
@@ -1368,9 +1399,7 @@ fn write_buffer(
     }
     .try_into()
     .map_err(|e| {
-        ArrowError::InvalidArgumentError(format!(
-            "Could not convert compressed size to i64: {e}"
-        ))
+        ArrowError::InvalidArgumentError(format!("Could not convert compressed size to i64: {e}"))
     })?;
 
     // make new index entry
@@ -1394,8 +1423,10 @@ mod tests {
     use std::io::Seek;
     use std::sync::Arc;
 
+    use arrow_array::builder::GenericListBuilder;
+    use arrow_array::builder::MapBuilder;
     use arrow_array::builder::UnionBuilder;
-    use arrow_array::builder::{ListBuilder, PrimitiveRunBuilder, UInt32Builder};
+    use arrow_array::builder::{PrimitiveRunBuilder, UInt32Builder};
     use arrow_array::types::*;
     use arrow_schema::DataType;
 
@@ -1404,6 +1435,30 @@ mod tests {
 
     use super::*;
 
+    fn serialize_file(rb: &RecordBatch) -> Vec<u8> {
+        let mut writer = FileWriter::try_new(vec![], &rb.schema()).unwrap();
+        writer.write(rb).unwrap();
+        writer.finish().unwrap();
+        writer.into_inner().unwrap()
+    }
+
+    fn deserialize_file(bytes: Vec<u8>) -> RecordBatch {
+        let mut reader = FileReader::try_new(Cursor::new(bytes), None).unwrap();
+        reader.next().unwrap().unwrap()
+    }
+
+    fn serialize_stream(record: &RecordBatch) -> Vec<u8> {
+        let mut stream_writer = StreamWriter::try_new(vec![], &record.schema()).unwrap();
+        stream_writer.write(record).unwrap();
+        stream_writer.finish().unwrap();
+        stream_writer.into_inner().unwrap()
+    }
+
+    fn deserialize_stream(bytes: Vec<u8>) -> RecordBatch {
+        let mut stream_reader = StreamReader::try_new(Cursor::new(bytes), None).unwrap();
+        stream_reader.next().unwrap().unwrap()
+    }
+
     #[test]
     #[cfg(feature = "lz4")]
     fn test_write_empty_record_batch_lz4_compression() {
@@ -1411,48 +1466,36 @@ mod tests {
         let values: Vec<Option<i32>> = vec![];
         let array = Int32Array::from(values);
         let record_batch =
-            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(array)])
-                .unwrap();
+            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(array)]).unwrap();
 
         let mut file = tempfile::tempfile().unwrap();
 
         {
-            let write_option =
-                IpcWriteOptions::try_new(8, false, crate::MetadataVersion::V5)
-                    .unwrap()
-                    .try_with_compression(Some(crate::CompressionType::LZ4_FRAME))
-                    .unwrap();
+            let write_option = IpcWriteOptions::try_new(8, false, crate::MetadataVersion::V5)
+                .unwrap()
+                .try_with_compression(Some(crate::CompressionType::LZ4_FRAME))
+                .unwrap();
 
             let mut writer =
-                FileWriter::try_new_with_options(&mut file, &schema, write_option)
-                    .unwrap();
+                FileWriter::try_new_with_options(&mut file, &schema, write_option).unwrap();
             writer.write(&record_batch).unwrap();
             writer.finish().unwrap();
         }
         file.rewind().unwrap();
         {
             // read file
-            let mut reader = FileReader::try_new(file, None).unwrap();
-            loop {
-                match reader.next() {
-                    Some(Ok(read_batch)) => {
-                        read_batch
-                            .columns()
-                            .iter()
-                            .zip(record_batch.columns())
-                            .for_each(|(a, b)| {
-                                assert_eq!(a.data_type(), b.data_type());
-                                assert_eq!(a.len(), b.len());
-                                assert_eq!(a.null_count(), b.null_count());
-                            });
-                    }
-                    Some(Err(e)) => {
-                        panic!("{}", e);
-                    }
-                    None => {
-                        break;
-                    }
-                }
+            let reader = FileReader::try_new(file, None).unwrap();
+            for read_batch in reader {
+                read_batch
+                    .unwrap()
+                    .columns()
+                    .iter()
+                    .zip(record_batch.columns())
+                    .for_each(|(a, b)| {
+                        assert_eq!(a.data_type(), b.data_type());
+                        assert_eq!(a.len(), b.len());
+                        assert_eq!(a.null_count(), b.null_count());
+                    });
             }
         }
     }
@@ -1464,47 +1507,35 @@ mod tests {
         let values: Vec<Option<i32>> = vec![Some(12), Some(1)];
         let array = Int32Array::from(values);
         let record_batch =
-            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(array)])
-                .unwrap();
+            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(array)]).unwrap();
 
         let mut file = tempfile::tempfile().unwrap();
         {
-            let write_option =
-                IpcWriteOptions::try_new(8, false, crate::MetadataVersion::V5)
-                    .unwrap()
-                    .try_with_compression(Some(crate::CompressionType::LZ4_FRAME))
-                    .unwrap();
+            let write_option = IpcWriteOptions::try_new(8, false, crate::MetadataVersion::V5)
+                .unwrap()
+                .try_with_compression(Some(crate::CompressionType::LZ4_FRAME))
+                .unwrap();
 
             let mut writer =
-                FileWriter::try_new_with_options(&mut file, &schema, write_option)
-                    .unwrap();
+                FileWriter::try_new_with_options(&mut file, &schema, write_option).unwrap();
             writer.write(&record_batch).unwrap();
             writer.finish().unwrap();
         }
         file.rewind().unwrap();
         {
             // read file
-            let mut reader = FileReader::try_new(file, None).unwrap();
-            loop {
-                match reader.next() {
-                    Some(Ok(read_batch)) => {
-                        read_batch
-                            .columns()
-                            .iter()
-                            .zip(record_batch.columns())
-                            .for_each(|(a, b)| {
-                                assert_eq!(a.data_type(), b.data_type());
-                                assert_eq!(a.len(), b.len());
-                                assert_eq!(a.null_count(), b.null_count());
-                            });
-                    }
-                    Some(Err(e)) => {
-                        panic!("{}", e);
-                    }
-                    None => {
-                        break;
-                    }
-                }
+            let reader = FileReader::try_new(file, None).unwrap();
+            for read_batch in reader {
+                read_batch
+                    .unwrap()
+                    .columns()
+                    .iter()
+                    .zip(record_batch.columns())
+                    .for_each(|(a, b)| {
+                        assert_eq!(a.data_type(), b.data_type());
+                        assert_eq!(a.len(), b.len());
+                        assert_eq!(a.null_count(), b.null_count());
+                    });
             }
         }
     }
@@ -1516,46 +1547,34 @@ mod tests {
         let values: Vec<Option<i32>> = vec![Some(12), Some(1)];
         let array = Int32Array::from(values);
         let record_batch =
-            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(array)])
-                .unwrap();
+            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(array)]).unwrap();
         let mut file = tempfile::tempfile().unwrap();
         {
-            let write_option =
-                IpcWriteOptions::try_new(8, false, crate::MetadataVersion::V5)
-                    .unwrap()
-                    .try_with_compression(Some(crate::CompressionType::ZSTD))
-                    .unwrap();
+            let write_option = IpcWriteOptions::try_new(8, false, crate::MetadataVersion::V5)
+                .unwrap()
+                .try_with_compression(Some(crate::CompressionType::ZSTD))
+                .unwrap();
 
             let mut writer =
-                FileWriter::try_new_with_options(&mut file, &schema, write_option)
-                    .unwrap();
+                FileWriter::try_new_with_options(&mut file, &schema, write_option).unwrap();
             writer.write(&record_batch).unwrap();
             writer.finish().unwrap();
         }
         file.rewind().unwrap();
         {
             // read file
-            let mut reader = FileReader::try_new(file, None).unwrap();
-            loop {
-                match reader.next() {
-                    Some(Ok(read_batch)) => {
-                        read_batch
-                            .columns()
-                            .iter()
-                            .zip(record_batch.columns())
-                            .for_each(|(a, b)| {
-                                assert_eq!(a.data_type(), b.data_type());
-                                assert_eq!(a.len(), b.len());
-                                assert_eq!(a.null_count(), b.null_count());
-                            });
-                    }
-                    Some(Err(e)) => {
-                        panic!("{}", e);
-                    }
-                    None => {
-                        break;
-                    }
-                }
+            let reader = FileReader::try_new(file, None).unwrap();
+            for read_batch in reader {
+                read_batch
+                    .unwrap()
+                    .columns()
+                    .iter()
+                    .zip(record_batch.columns())
+                    .for_each(|(a, b)| {
+                        assert_eq!(a.data_type(), b.data_type());
+                        assert_eq!(a.len(), b.len());
+                        assert_eq!(a.null_count(), b.null_count());
+                    });
             }
         }
     }
@@ -1575,11 +1594,9 @@ mod tests {
             None,
         ];
         let array1 = UInt32Array::from(values);
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(array1) as ArrayRef],
-        )
-        .unwrap();
+        let batch =
+            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(array1) as ArrayRef])
+                .unwrap();
         let mut file = tempfile::tempfile().unwrap();
         {
             let mut writer = FileWriter::try_new(&mut file, &schema).unwrap();
@@ -1628,8 +1645,7 @@ mod tests {
         .unwrap();
         let mut file = tempfile::tempfile().unwrap();
         {
-            let mut writer =
-                FileWriter::try_new_with_options(&mut file, &schema, options).unwrap();
+            let mut writer = FileWriter::try_new_with_options(&mut file, &schema, options).unwrap();
 
             writer.write(&batch).unwrap();
             writer.finish().unwrap();
@@ -1657,18 +1673,14 @@ mod tests {
     fn test_write_null_file_v4() {
         write_null_file(IpcWriteOptions::try_new(8, false, MetadataVersion::V4).unwrap());
         write_null_file(IpcWriteOptions::try_new(8, true, MetadataVersion::V4).unwrap());
-        write_null_file(
-            IpcWriteOptions::try_new(64, false, MetadataVersion::V4).unwrap(),
-        );
+        write_null_file(IpcWriteOptions::try_new(64, false, MetadataVersion::V4).unwrap());
         write_null_file(IpcWriteOptions::try_new(64, true, MetadataVersion::V4).unwrap());
     }
 
     #[test]
     fn test_write_null_file_v5() {
         write_null_file(IpcWriteOptions::try_new(8, false, MetadataVersion::V5).unwrap());
-        write_null_file(
-            IpcWriteOptions::try_new(64, false, MetadataVersion::V5).unwrap(),
-        );
+        write_null_file(IpcWriteOptions::try_new(64, false, MetadataVersion::V5).unwrap());
     }
 
     #[test]
@@ -1678,15 +1690,13 @@ mod tests {
         let array = Arc::new(inner) as ArrayRef;
 
         // Dict field with id 2
-        let dctfield =
-            Field::new_dict("dict", array.data_type().clone(), false, 2, false);
+        let dctfield = Field::new_dict("dict", array.data_type().clone(), false, 2, false);
 
         let types = Buffer::from_slice_ref([0_i8, 0, 0]);
         let offsets = Buffer::from_slice_ref([0_i32, 1, 2]);
 
         let union =
-            UnionArray::try_new(&[0], types, Some(offsets), vec![(dctfield, array)])
-                .unwrap();
+            UnionArray::try_new(&[0], types, Some(offsets), vec![(dctfield, array)]).unwrap();
 
         let schema = Arc::new(Schema::new(vec![Field::new(
             "union",
@@ -1758,16 +1768,13 @@ mod tests {
         builder.append::<Int32Type>("a", 4).unwrap();
         let union = builder.build().unwrap();
 
-        let batch = RecordBatch::try_new(
-            Arc::new(schema.clone()),
-            vec![Arc::new(union) as ArrayRef],
-        )
-        .unwrap();
+        let batch =
+            RecordBatch::try_new(Arc::new(schema.clone()), vec![Arc::new(union) as ArrayRef])
+                .unwrap();
 
         let mut file = tempfile::tempfile().unwrap();
         {
-            let mut writer =
-                FileWriter::try_new_with_options(&mut file, &schema, options).unwrap();
+            let mut writer = FileWriter::try_new_with_options(&mut file, &schema, options).unwrap();
 
             writer.write(&batch).unwrap();
             writer.finish().unwrap();
@@ -1793,27 +1800,8 @@ mod tests {
 
     #[test]
     fn test_write_union_file_v4_v5() {
-        write_union_file(
-            IpcWriteOptions::try_new(8, false, MetadataVersion::V4).unwrap(),
-        );
-        write_union_file(
-            IpcWriteOptions::try_new(8, false, MetadataVersion::V5).unwrap(),
-        );
-    }
-
-    fn serialize(record: &RecordBatch) -> Vec<u8> {
-        let buffer: Vec<u8> = Vec::new();
-        let mut stream_writer = StreamWriter::try_new(buffer, &record.schema()).unwrap();
-        stream_writer.write(record).unwrap();
-        stream_writer.finish().unwrap();
-        stream_writer.into_inner().unwrap()
-    }
-
-    fn deserialize(bytes: Vec<u8>) -> RecordBatch {
-        let mut stream_reader =
-            crate::reader::StreamReader::try_new(std::io::Cursor::new(bytes), None)
-                .unwrap();
-        stream_reader.next().unwrap().unwrap()
+        write_union_file(IpcWriteOptions::try_new(8, false, MetadataVersion::V4).unwrap());
+        write_union_file(IpcWriteOptions::try_new(8, false, MetadataVersion::V5).unwrap());
     }
 
     #[test]
@@ -1827,8 +1815,7 @@ mod tests {
             let a = Int32Array::from_iter_values(0..rows as i32);
             let b = StringArray::from_iter_values((0..rows).map(|i| i.to_string()));
 
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
-                .unwrap()
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap()
         }
 
         let big_record_batch = create_batch(65536);
@@ -1839,15 +1826,15 @@ mod tests {
         let offset = 2;
         let record_batch_slice = big_record_batch.slice(offset, length);
         assert!(
-            serialize(&big_record_batch).len() > serialize(&small_record_batch).len()
+            serialize_stream(&big_record_batch).len() > serialize_stream(&small_record_batch).len()
         );
         assert_eq!(
-            serialize(&small_record_batch).len(),
-            serialize(&record_batch_slice).len()
+            serialize_stream(&small_record_batch).len(),
+            serialize_stream(&record_batch_slice).len()
         );
 
         assert_eq!(
-            deserialize(serialize(&record_batch_slice)),
+            deserialize_stream(serialize_stream(&record_batch_slice)),
             record_batch_slice
         );
     }
@@ -1863,15 +1850,16 @@ mod tests {
             let a = Int32Array::from(vec![Some(1), None, Some(1), None, Some(1)]);
             let b = StringArray::from(vec![None, Some("a"), Some("a"), None, Some("a")]);
 
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
-                .unwrap()
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap()
         }
 
         let record_batch = create_batch();
         let record_batch_slice = record_batch.slice(1, 2);
-        let deserialized_batch = deserialize(serialize(&record_batch_slice));
+        let deserialized_batch = deserialize_stream(serialize_stream(&record_batch_slice));
 
-        assert!(serialize(&record_batch).len() > serialize(&record_batch_slice).len());
+        assert!(
+            serialize_stream(&record_batch).len() > serialize_stream(&record_batch_slice).len()
+        );
 
         assert!(deserialized_batch.column(0).is_null(0));
         assert!(deserialized_batch.column(0).is_valid(1));
@@ -1887,22 +1875,22 @@ mod tests {
             let values: StringArray = [Some("foo"), Some("bar"), Some("baz")]
                 .into_iter()
                 .collect();
-            let keys: Int32Array =
-                [Some(0), Some(2), None, Some(1)].into_iter().collect();
+            let keys: Int32Array = [Some(0), Some(2), None, Some(1)].into_iter().collect();
 
             let array = DictionaryArray::new(keys, Arc::new(values));
 
-            let schema =
-                Schema::new(vec![Field::new("dict", array.data_type().clone(), true)]);
+            let schema = Schema::new(vec![Field::new("dict", array.data_type().clone(), true)]);
 
             RecordBatch::try_new(Arc::new(schema), vec![Arc::new(array)]).unwrap()
         }
 
         let record_batch = create_batch();
         let record_batch_slice = record_batch.slice(1, 2);
-        let deserialized_batch = deserialize(serialize(&record_batch_slice));
+        let deserialized_batch = deserialize_stream(serialize_stream(&record_batch_slice));
 
-        assert!(serialize(&record_batch).len() > serialize(&record_batch_slice).len());
+        assert!(
+            serialize_stream(&record_batch).len() > serialize_stream(&record_batch_slice).len()
+        );
 
         assert!(deserialized_batch.column(0).is_valid(0));
         assert!(deserialized_batch.column(0).is_null(1));
@@ -1916,8 +1904,7 @@ mod tests {
             let strings: StringArray = [Some("foo"), None, Some("bar"), Some("baz")]
                 .into_iter()
                 .collect();
-            let ints: Int32Array =
-                [Some(0), Some(2), None, Some(1)].into_iter().collect();
+            let ints: Int32Array = [Some(0), Some(2), None, Some(1)].into_iter().collect();
 
             let struct_array = StructArray::from(vec![
                 (
@@ -1941,9 +1928,11 @@ mod tests {
 
         let record_batch = create_batch();
         let record_batch_slice = record_batch.slice(1, 2);
-        let deserialized_batch = deserialize(serialize(&record_batch_slice));
+        let deserialized_batch = deserialize_stream(serialize_stream(&record_batch_slice));
 
-        assert!(serialize(&record_batch).len() > serialize(&record_batch_slice).len());
+        assert!(
+            serialize_stream(&record_batch).len() > serialize_stream(&record_batch_slice).len()
+        );
 
         let structs = deserialized_batch
             .column(0)
@@ -1962,16 +1951,17 @@ mod tests {
     fn truncate_ipc_string_array_with_all_empty_string() {
         fn create_batch() -> RecordBatch {
             let schema = Schema::new(vec![Field::new("a", DataType::Utf8, true)]);
-            let a =
-                StringArray::from(vec![Some(""), Some(""), Some(""), Some(""), Some("")]);
+            let a = StringArray::from(vec![Some(""), Some(""), Some(""), Some(""), Some("")]);
             RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a)]).unwrap()
         }
 
         let record_batch = create_batch();
         let record_batch_slice = record_batch.slice(0, 1);
-        let deserialized_batch = deserialize(serialize(&record_batch_slice));
+        let deserialized_batch = deserialize_stream(serialize_stream(&record_batch_slice));
 
-        assert!(serialize(&record_batch).len() > serialize(&record_batch_slice).len());
+        assert!(
+            serialize_stream(&record_batch).len() > serialize_stream(&record_batch_slice).len()
+        );
         assert_eq!(record_batch_slice, deserialized_batch);
     }
 
@@ -1992,8 +1982,7 @@ mod tests {
         )
         .expect("new batch");
 
-        let mut writer =
-            StreamWriter::try_new(vec![], &batch.schema()).expect("new writer");
+        let mut writer = StreamWriter::try_new(vec![], &batch.schema()).expect("new writer");
         writer.write(&batch).expect("write");
         let outbuf = writer.into_inner().expect("inner");
 
@@ -2015,9 +2004,9 @@ mod tests {
         // slice somewhere in the middle
         assert_bool_roundtrip(
             [
-                true, false, true, true, false, false, true, true, true, false, false,
-                false, true, true, true, true, false, false, false, false, true, true,
-                true, true, true, false, false, false, false, false,
+                true, false, true, true, false, false, true, true, true, false, false, false, true,
+                true, true, true, false, false, false, false, true, true, true, true, true, false,
+                false, false, false, false,
             ],
             13,
             17,
@@ -2026,8 +2015,7 @@ mod tests {
         // start at byte boundary, end in the middle
         assert_bool_roundtrip(
             [
-                true, false, true, true, false, false, true, true, true, false, false,
-                false,
+                true, false, true, true, false, false, true, true, true, false, false, false,
             ],
             8,
             2,
@@ -2036,44 +2024,33 @@ mod tests {
         // start and stop and byte boundary
         assert_bool_roundtrip(
             [
-                true, false, true, true, false, false, true, true, true, false, false,
-                false, true, true, true, true, true, false, false, false, false, false,
+                true, false, true, true, false, false, true, true, true, false, false, false, true,
+                true, true, true, true, false, false, false, false, false,
             ],
             8,
             8,
         );
     }
 
-    fn assert_bool_roundtrip<const N: usize>(
-        bools: [bool; N],
-        offset: usize,
-        length: usize,
-    ) {
+    fn assert_bool_roundtrip<const N: usize>(bools: [bool; N], offset: usize, length: usize) {
         let val_bool_field = Field::new("val", DataType::Boolean, false);
 
         let schema = Arc::new(Schema::new(vec![val_bool_field]));
 
         let bools = BooleanArray::from(bools.to_vec());
 
-        let batch =
-            RecordBatch::try_new(Arc::clone(&schema), vec![Arc::new(bools)]).unwrap();
+        let batch = RecordBatch::try_new(Arc::clone(&schema), vec![Arc::new(bools)]).unwrap();
         let batch = batch.slice(offset, length);
 
-        let mut writer = StreamWriter::try_new(Vec::<u8>::new(), &schema).unwrap();
-        writer.write(&batch).unwrap();
-        writer.finish().unwrap();
-        let data = writer.into_inner().unwrap();
-
-        let mut reader = StreamReader::try_new(Cursor::new(data), None).unwrap();
-        let batch2 = reader.next().unwrap().unwrap();
+        let data = serialize_stream(&batch);
+        let batch2 = deserialize_stream(data);
         assert_eq!(batch, batch2);
     }
 
     #[test]
     fn test_run_array_unslice() {
         let total_len = 80;
-        let vals: Vec<Option<i32>> =
-            vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
+        let vals: Vec<Option<i32>> = vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
         let repeats: Vec<usize> = vec![3, 4, 1, 2];
         let mut input_array: Vec<Option<i32>> = Vec::with_capacity(total_len);
         for ix in 0_usize..32 {
@@ -2095,13 +2072,11 @@ mod tests {
                 run_array.slice(0, slice_len).into_data().into();
 
             // Create unsliced run array.
-            let unsliced_run_array =
-                into_zero_offset_run_array(sliced_run_array).unwrap();
+            let unsliced_run_array = into_zero_offset_run_array(sliced_run_array).unwrap();
             let typed = unsliced_run_array
                 .downcast::<PrimitiveArray<Int32Type>>()
                 .unwrap();
-            let expected: Vec<Option<i32>> =
-                input_array.iter().take(slice_len).copied().collect();
+            let expected: Vec<Option<i32>> = input_array.iter().take(slice_len).copied().collect();
             let actual: Vec<Option<i32>> = typed.into_iter().collect();
             assert_eq!(expected, actual);
 
@@ -2112,8 +2087,7 @@ mod tests {
                 .into();
 
             // Create unsliced run array.
-            let unsliced_run_array =
-                into_zero_offset_run_array(sliced_run_array).unwrap();
+            let unsliced_run_array = into_zero_offset_run_array(sliced_run_array).unwrap();
             let typed = unsliced_run_array
                 .downcast::<PrimitiveArray<Int32Type>>()
                 .unwrap();
@@ -2127,38 +2101,137 @@ mod tests {
         }
     }
 
+    fn generate_list_data<O: OffsetSizeTrait>() -> GenericListArray<O> {
+        let mut ls = GenericListBuilder::<O, _>::new(UInt32Builder::new());
+
+        for i in 0..100_000 {
+            for value in [i, i, i] {
+                ls.values().append_value(value);
+            }
+            ls.append(true)
+        }
+
+        ls.finish()
+    }
+
+    fn generate_nested_list_data<O: OffsetSizeTrait>() -> GenericListArray<O> {
+        let mut ls =
+            GenericListBuilder::<O, _>::new(GenericListBuilder::<O, _>::new(UInt32Builder::new()));
+
+        for _i in 0..10_000 {
+            for j in 0..10 {
+                for value in [j, j, j, j] {
+                    ls.values().values().append_value(value);
+                }
+                ls.values().append(true)
+            }
+            ls.append(true);
+        }
+
+        ls.finish()
+    }
+
+    fn generate_map_array_data() -> MapArray {
+        let keys_builder = UInt32Builder::new();
+        let values_builder = UInt32Builder::new();
+
+        let mut builder = MapBuilder::new(None, keys_builder, values_builder);
+
+        for i in 0..100_000 {
+            for _j in 0..3 {
+                builder.keys().append_value(i);
+                builder.values().append_value(i * 2);
+            }
+            builder.append(true).unwrap();
+        }
+
+        builder.finish()
+    }
+
+    /// Ensure when serde full & sliced versions they are equal to original input.
+    /// Also ensure serialized sliced version is significantly smaller than serialized full.
+    fn roundtrip_ensure_sliced_smaller(in_batch: RecordBatch, expected_size_factor: usize) {
+        // test both full and sliced versions
+        let in_sliced = in_batch.slice(999, 1);
+
+        let bytes_batch = serialize_file(&in_batch);
+        let bytes_sliced = serialize_file(&in_sliced);
+
+        // serializing 1 row should be significantly smaller than serializing 100,000
+        assert!(bytes_sliced.len() < (bytes_batch.len() / expected_size_factor));
+
+        // ensure both are still valid and equal to originals
+        let out_batch = deserialize_file(bytes_batch);
+        assert_eq!(in_batch, out_batch);
+
+        let out_sliced = deserialize_file(bytes_sliced);
+        assert_eq!(in_sliced, out_sliced);
+    }
+
     #[test]
     fn encode_lists() {
         let val_inner = Field::new("item", DataType::UInt32, true);
-        let val_list_field = Field::new_list("val", val_inner, false);
+        let val_list_field = Field::new("val", DataType::List(Arc::new(val_inner)), false);
+        let schema = Arc::new(Schema::new(vec![val_list_field]));
+
+        let values = Arc::new(generate_list_data::<i32>());
 
+        let in_batch = RecordBatch::try_new(schema, vec![values]).unwrap();
+        roundtrip_ensure_sliced_smaller(in_batch, 1000);
+    }
+
+    #[test]
+    fn encode_empty_list() {
+        let val_inner = Field::new("item", DataType::UInt32, true);
+        let val_list_field = Field::new("val", DataType::List(Arc::new(val_inner)), false);
         let schema = Arc::new(Schema::new(vec![val_list_field]));
 
-        let values = {
-            let u32 = UInt32Builder::new();
-            let mut ls = ListBuilder::new(u32);
+        let values = Arc::new(generate_list_data::<i32>());
 
-            for list in [vec![1u32, 2, 3], vec![4, 5, 6], vec![7, 8, 9, 10]] {
-                for value in list {
-                    ls.values().append_value(value);
-                }
-                ls.append(true)
-            }
+        let in_batch = RecordBatch::try_new(schema, vec![values])
+            .unwrap()
+            .slice(999, 0);
+        let out_batch = deserialize_file(serialize_file(&in_batch));
+        assert_eq!(in_batch, out_batch);
+    }
 
-            ls.finish()
-        };
+    #[test]
+    fn encode_large_lists() {
+        let val_inner = Field::new("item", DataType::UInt32, true);
+        let val_list_field = Field::new("val", DataType::LargeList(Arc::new(val_inner)), false);
+        let schema = Arc::new(Schema::new(vec![val_list_field]));
 
-        let batch =
-            RecordBatch::try_new(Arc::clone(&schema), vec![Arc::new(values)]).unwrap();
-        let batch = batch.slice(1, 1);
+        let values = Arc::new(generate_list_data::<i64>());
 
-        let mut writer = FileWriter::try_new(Vec::<u8>::new(), &schema).unwrap();
-        writer.write(&batch).unwrap();
-        writer.finish().unwrap();
-        let data = writer.into_inner().unwrap();
+        // ensure when serde full & sliced versions they are equal to original input
+        // also ensure serialized sliced version is significantly smaller than serialized full
+        let in_batch = RecordBatch::try_new(schema, vec![values]).unwrap();
+        roundtrip_ensure_sliced_smaller(in_batch, 1000);
+    }
 
-        let mut reader = FileReader::try_new(Cursor::new(data), None).unwrap();
-        let batch2 = reader.next().unwrap().unwrap();
-        assert_eq!(batch, batch2);
+    #[test]
+    fn encode_nested_lists() {
+        let inner_int = Arc::new(Field::new("item", DataType::UInt32, true));
+        let inner_list_field = Arc::new(Field::new("item", DataType::List(inner_int), true));
+        let list_field = Field::new("val", DataType::List(inner_list_field), true);
+        let schema = Arc::new(Schema::new(vec![list_field]));
+
+        let values = Arc::new(generate_nested_list_data::<i32>());
+
+        let in_batch = RecordBatch::try_new(schema, vec![values]).unwrap();
+        roundtrip_ensure_sliced_smaller(in_batch, 1000);
+    }
+
+    #[test]
+    fn encode_map_array() {
+        let keys = Arc::new(Field::new("keys", DataType::UInt32, false));
+        let values = Arc::new(Field::new("values", DataType::UInt32, true));
+        let map_field = Field::new_map("map", "entries", keys, values, false, true);
+        let schema = Arc::new(Schema::new(vec![map_field]));
+
+        let values = Arc::new(generate_map_array_data());
+
+        let in_batch = RecordBatch::try_new(schema, vec![values]).unwrap();
+        roundtrip_ensure_sliced_smaller(in_batch, 1000);
     }
 }
diff --git a/arrow-json/Cargo.toml b/arrow-json/Cargo.toml
index 137d53557790..7e49a57fbd6c 100644
--- a/arrow-json/Cargo.toml
+++ b/arrow-json/Cargo.toml
@@ -34,17 +34,17 @@ path = "src/lib.rs"
 bench = false
 
 [dependencies]
-arrow-array = { workspace = true  }
-arrow-buffer = { workspace = true  }
-arrow-cast = { workspace = true  }
-arrow-data = { workspace = true  }
-arrow-schema = { workspace = true  }
+arrow-array = { workspace = true }
+arrow-buffer = { workspace = true }
+arrow-cast = { workspace = true }
+arrow-data = { workspace = true }
+arrow-schema = { workspace = true }
 half = { version = "2.1", default-features = false }
 indexmap = { version = "2.0", default-features = false, features = ["std"] }
 num = { version = "0.4", default-features = false, features = ["std"] }
 serde = { version = "1.0", default-features = false }
 serde_json = { version = "1.0", default-features = false, features = ["std"] }
-chrono = { version = "0.4.23", default-features = false, features = ["clock"] }
+chrono = { workspace = true }
 lexical-core = { version = "0.8", default-features = false }
 
 [dev-dependencies]
@@ -54,3 +54,10 @@ serde = { version = "1.0", default-features = false, features = ["derive"] }
 futures = "0.3"
 tokio = { version = "1.27", default-features = false, features = ["io-util"] }
 bytes = "1.4"
+criterion = { version = "0.5", default-features = false }
+rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
+
+[[bench]]
+name = "serde"
+harness = false
+
diff --git a/arrow-json/benches/serde.rs b/arrow-json/benches/serde.rs
new file mode 100644
index 000000000000..7636b9c9dff9
--- /dev/null
+++ b/arrow-json/benches/serde.rs
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_json::ReaderBuilder;
+use arrow_schema::{DataType, Field, Schema};
+use criterion::*;
+use rand::{thread_rng, Rng};
+use serde::Serialize;
+use std::sync::Arc;
+
+#[allow(deprecated)]
+fn do_bench<R: Serialize>(c: &mut Criterion, name: &str, rows: &[R], schema: &Schema) {
+    let schema = Arc::new(schema.clone());
+    c.bench_function(name, |b| {
+        b.iter(|| {
+            let builder = ReaderBuilder::new(schema.clone()).with_batch_size(64);
+            let mut decoder = builder.build_decoder().unwrap();
+            decoder.serialize(rows)
+        })
+    });
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let mut rng = thread_rng();
+    let schema = Schema::new(vec![Field::new("i32", DataType::Int32, false)]);
+    let v: Vec<i32> = (0..2048).map(|_| rng.gen_range(0..10000)).collect();
+
+    do_bench(c, "small_i32", &v, &schema);
+    let v: Vec<i32> = (0..2048).map(|_| rng.gen()).collect();
+    do_bench(c, "large_i32", &v, &schema);
+
+    let schema = Schema::new(vec![Field::new("i64", DataType::Int64, false)]);
+    let v: Vec<i64> = (0..2048).map(|_| rng.gen_range(0..10000)).collect();
+    do_bench(c, "small_i64", &v, &schema);
+    let v: Vec<i64> = (0..2048).map(|_| rng.gen_range(0..i32::MAX as _)).collect();
+    do_bench(c, "medium_i64", &v, &schema);
+    let v: Vec<i64> = (0..2048).map(|_| rng.gen()).collect();
+    do_bench(c, "large_i64", &v, &schema);
+
+    let schema = Schema::new(vec![Field::new("f32", DataType::Float32, false)]);
+    let v: Vec<f32> = (0..2048).map(|_| rng.gen_range(0.0..10000.)).collect();
+    do_bench(c, "small_f32", &v, &schema);
+    let v: Vec<f32> = (0..2048).map(|_| rng.gen_range(0.0..f32::MAX)).collect();
+    do_bench(c, "large_f32", &v, &schema);
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/arrow-json/src/lib.rs b/arrow-json/src/lib.rs
index 88415ff2ecac..e39882e52620 100644
--- a/arrow-json/src/lib.rs
+++ b/arrow-json/src/lib.rs
@@ -15,9 +15,53 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Transfer data between the Arrow memory format and JSON
-//! line-delimited records. See the module level documentation for the
+//! Transfer data between the Arrow memory format and JSON line-delimited records.
+//!
+//! See the module level documentation for the
 //! [`reader`] and [`writer`] for usage examples.
+//!
+//! # Binary Data
+//!
+//! As per [RFC7159] JSON cannot encode arbitrary binary data. A common approach to workaround
+//! this is to use a [binary-to-text encoding] scheme, such as base64, to encode the
+//! input data and then decode it on output.
+//!
+//! ```
+//! # use std::io::Cursor;
+//! # use std::sync::Arc;
+//! # use arrow_array::{BinaryArray, RecordBatch, StringArray};
+//! # use arrow_array::cast::AsArray;
+//! # use arrow_cast::base64::{b64_decode, b64_encode, BASE64_STANDARD};
+//! # use arrow_json::{LineDelimitedWriter, ReaderBuilder};
+//! #
+//! // The data we want to write
+//! let input = BinaryArray::from(vec![b"\xDE\x00\xFF".as_ref()]);
+//!
+//! // Base64 encode it to a string
+//! let encoded: StringArray = b64_encode(&BASE64_STANDARD, &input);
+//!
+//! // Write the StringArray to JSON
+//! let batch = RecordBatch::try_from_iter([("col", Arc::new(encoded) as _)]).unwrap();
+//! let mut buf = Vec::with_capacity(1024);
+//! let mut writer = LineDelimitedWriter::new(&mut buf);
+//! writer.write(&batch).unwrap();
+//! writer.finish().unwrap();
+//!
+//! // Read the JSON data
+//! let cursor = Cursor::new(buf);
+//! let mut reader = ReaderBuilder::new(batch.schema()).build(cursor).unwrap();
+//! let batch = reader.next().unwrap().unwrap();
+//!
+//! // Reverse the base64 encoding
+//! let col: BinaryArray = batch.column(0).as_string::<i32>().clone().into();
+//! let output = b64_decode(&BASE64_STANDARD, &col).unwrap();
+//!
+//! assert_eq!(input, output);
+//! ```
+//!
+//! [RFC7159]: https://datatracker.ietf.org/doc/html/rfc7159#section-8.1
+//! [binary-to-text encoding]: https://en.wikipedia.org/wiki/Binary-to-text_encoding
+//!
 
 #![deny(rustdoc::broken_intra_doc_links)]
 #![warn(missing_docs)]
@@ -38,7 +82,7 @@ pub type RawReader<R> = Reader<R>;
 pub type RawReaderBuilder = ReaderBuilder;
 
 pub use self::reader::{Reader, ReaderBuilder};
-pub use self::writer::{ArrayWriter, LineDelimitedWriter, Writer};
+pub use self::writer::{ArrayWriter, LineDelimitedWriter, Writer, WriterBuilder};
 use half::f16;
 use serde_json::{Number, Value};
 
diff --git a/arrow-json/src/reader/list_array.rs b/arrow-json/src/reader/list_array.rs
index d6f7670f2dc9..b6f8c18ea9c3 100644
--- a/arrow-json/src/reader/list_array.rs
+++ b/arrow-json/src/reader/list_array.rs
@@ -92,10 +92,7 @@ impl<O: OffsetSizeTrait> ArrayDecoder for ListArrayDecoder<O> {
             }
 
             let offset = O::from_usize(child_pos.len()).ok_or_else(|| {
-                ArrowError::JsonError(format!(
-                    "offset overflow decoding {}",
-                    self.data_type
-                ))
+                ArrowError::JsonError(format!("offset overflow decoding {}", self.data_type))
             })?;
             offsets.append(offset)
         }
diff --git a/arrow-json/src/reader/map_array.rs b/arrow-json/src/reader/map_array.rs
index a1f7e5ace66e..cd1ca5f71fa9 100644
--- a/arrow-json/src/reader/map_array.rs
+++ b/arrow-json/src/reader/map_array.rs
@@ -121,10 +121,7 @@ impl ArrayDecoder for MapArrayDecoder {
             }
 
             let offset = i32::from_usize(key_pos.len()).ok_or_else(|| {
-                ArrowError::JsonError(format!(
-                    "offset overflow decoding {}",
-                    self.data_type
-                ))
+                ArrowError::JsonError(format!("offset overflow decoding {}", self.data_type))
             })?;
             offsets.append(offset)
         }
diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index 4e98e2fd873a..5afe0dec279a 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -17,9 +17,13 @@
 
 //! JSON reader
 //!
-//! This JSON reader allows JSON line-delimited files to be read into the Arrow memory
-//! model. Records are loaded in batches and are then converted from row-based data to
-//! columnar data.
+//! This JSON reader allows JSON records to be read into the Arrow memory
+//! model. Records are loaded in batches and are then converted from the record-oriented
+//! representation to the columnar arrow data model.
+//!
+//! The reader ignores whitespace between JSON values, including `\n` and `\r`, allowing
+//! parsing of sequences of one or more arbitrarily formatted JSON values, including
+//! but not limited to newline-delimited JSON.
 //!
 //! # Basic Usage
 //!
@@ -130,6 +134,7 @@
 //!
 
 use std::io::BufRead;
+use std::sync::Arc;
 
 use chrono::Utc;
 use serde::Serialize;
@@ -137,9 +142,9 @@ use serde::Serialize;
 use arrow_array::timezone::Tz;
 use arrow_array::types::Float32Type;
 use arrow_array::types::*;
-use arrow_array::{downcast_integer, RecordBatch, RecordBatchReader, StructArray};
+use arrow_array::{downcast_integer, make_array, RecordBatch, RecordBatchReader, StructArray};
 use arrow_data::ArrayData;
-use arrow_schema::{ArrowError, DataType, SchemaRef, TimeUnit};
+use arrow_schema::{ArrowError, DataType, FieldRef, Schema, SchemaRef, TimeUnit};
 pub use schema::*;
 
 use crate::reader::boolean_array::BooleanArrayDecoder;
@@ -150,7 +155,7 @@ use crate::reader::null_array::NullArrayDecoder;
 use crate::reader::primitive_array::PrimitiveArrayDecoder;
 use crate::reader::string_array::StringArrayDecoder;
 use crate::reader::struct_array::StructArrayDecoder;
-use crate::reader::tape::{Tape, TapeDecoder, TapeElement};
+use crate::reader::tape::{Tape, TapeDecoder};
 use crate::reader::timestamp_array::TimestampArrayDecoder;
 
 mod boolean_array;
@@ -171,6 +176,7 @@ pub struct ReaderBuilder {
     batch_size: usize,
     coerce_primitive: bool,
     strict_mode: bool,
+    is_field: bool,
 
     schema: SchemaRef,
 }
@@ -189,10 +195,51 @@ impl ReaderBuilder {
             batch_size: 1024,
             coerce_primitive: false,
             strict_mode: false,
+            is_field: false,
             schema,
         }
     }
 
+    /// Create a new [`ReaderBuilder`] that will parse JSON values of `field.data_type()`
+    ///
+    /// Unlike [`ReaderBuilder::new`] this does not require the root of the JSON data
+    /// to be an object, i.e. `{..}`, allowing for parsing of any valid JSON value(s)
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow_array::cast::AsArray;
+    /// # use arrow_array::types::Int32Type;
+    /// # use arrow_json::ReaderBuilder;
+    /// # use arrow_schema::{DataType, Field};
+    /// // Root of JSON schema is a numeric type
+    /// let data = "1\n2\n3\n";
+    /// let field = Arc::new(Field::new("int", DataType::Int32, true));
+    /// let mut reader = ReaderBuilder::new_with_field(field.clone()).build(data.as_bytes()).unwrap();
+    /// let b = reader.next().unwrap().unwrap();
+    /// let values = b.column(0).as_primitive::<Int32Type>().values();
+    /// assert_eq!(values, &[1, 2, 3]);
+    ///
+    /// // Root of JSON schema is a list type
+    /// let data = "[1, 2, 3, 4, 5, 6, 7]\n[1, 2, 3]";
+    /// let field = Field::new_list("int", field.clone(), true);
+    /// let mut reader = ReaderBuilder::new_with_field(field).build(data.as_bytes()).unwrap();
+    /// let b = reader.next().unwrap().unwrap();
+    /// let list = b.column(0).as_list::<i32>();
+    ///
+    /// assert_eq!(list.offsets().as_ref(), &[0, 7, 10]);
+    /// let list_values = list.values().as_primitive::<Int32Type>();
+    /// assert_eq!(list_values.values(), &[1, 2, 3, 4, 5, 6, 7, 1, 2, 3]);
+    /// ```
+    pub fn new_with_field(field: impl Into<FieldRef>) -> Self {
+        Self {
+            batch_size: 1024,
+            coerce_primitive: false,
+            strict_mode: false,
+            is_field: true,
+            schema: Arc::new(Schema::new([field.into()])),
+        }
+    }
+
     /// Sets the batch size in rows to read
     pub fn with_batch_size(self, batch_size: usize) -> Self {
         Self { batch_size, ..self }
@@ -233,16 +280,21 @@ impl ReaderBuilder {
 
     /// Create a [`Decoder`]
     pub fn build_decoder(self) -> Result<Decoder, ArrowError> {
-        let decoder = make_decoder(
-            DataType::Struct(self.schema.fields.clone()),
-            self.coerce_primitive,
-            self.strict_mode,
-            false,
-        )?;
+        let (data_type, nullable) = match self.is_field {
+            false => (DataType::Struct(self.schema.fields.clone()), false),
+            true => {
+                let field = &self.schema.fields[0];
+                (field.data_type().clone(), field.is_nullable())
+            }
+        };
+
+        let decoder = make_decoder(data_type, self.coerce_primitive, self.strict_mode, nullable)?;
+
         let num_fields = self.schema.all_fields().len();
 
         Ok(Decoder {
             decoder,
+            is_field: self.is_field,
             tape_decoder: TapeDecoder::new(self.batch_size, num_fields),
             batch_size: self.batch_size,
             schema: self.schema,
@@ -344,6 +396,7 @@ pub struct Decoder {
     tape_decoder: TapeDecoder,
     decoder: Box<dyn ArrayDecoder>,
     batch_size: usize,
+    is_field: bool,
     schema: SchemaRef,
 }
 
@@ -563,24 +616,21 @@ impl Decoder {
         let mut next_object = 1;
         let pos: Vec<_> = (0..tape.num_rows())
             .map(|_| {
-                let end = match tape.get(next_object) {
-                    TapeElement::StartObject(end) => end,
-                    _ => unreachable!("corrupt tape"),
-                };
-                std::mem::replace(&mut next_object, end + 1)
+                let next = tape.next(next_object, "row").unwrap();
+                std::mem::replace(&mut next_object, next)
             })
             .collect();
 
         let decoded = self.decoder.decode(&tape, &pos)?;
         self.tape_decoder.clear();
 
-        // Sanity check
-        assert!(matches!(decoded.data_type(), DataType::Struct(_)));
-        assert_eq!(decoded.null_count(), 0);
-        assert_eq!(decoded.len(), pos.len());
+        let batch = match self.is_field {
+            true => RecordBatch::try_new(self.schema.clone(), vec![make_array(decoded)])?,
+            false => {
+                RecordBatch::from(StructArray::from(decoded)).with_schema(self.schema.clone())?
+            }
+        };
 
-        let batch = RecordBatch::from(StructArray::from(decoded))
-            .with_schema(self.schema.clone())?;
         Ok(Some(batch))
     }
 }
@@ -668,7 +718,7 @@ mod tests {
     use arrow_array::cast::AsArray;
     use arrow_array::types::Int32Type;
     use arrow_array::{
-        make_array, Array, BooleanArray, ListArray, StringArray, StructArray,
+        make_array, Array, BooleanArray, Float64Array, ListArray, StringArray, StructArray,
     };
     use arrow_buffer::{ArrowNativeType, Buffer};
     use arrow_cast::display::{ArrayFormatter, FormatOptions};
@@ -1493,11 +1543,7 @@ mod tests {
         let schema = Arc::new(Schema::new(vec![
             Field::new("a", DataType::Int16, false),
             Field::new("b", DataType::Utf8, false),
-            Field::new_struct(
-                "c",
-                vec![Field::new("a", DataType::Boolean, false)],
-                false,
-            ),
+            Field::new_struct("c", vec![Field::new("a", DataType::Boolean, false)], false),
         ]));
 
         let err = ReaderBuilder::new(schema)
@@ -1518,7 +1564,7 @@ mod tests {
         let file = File::open(path).unwrap();
         let mut reader = BufReader::new(file);
         let schema = schema.unwrap_or_else(|| {
-            let schema = infer_json_schema(&mut reader, None).unwrap();
+            let (schema, _) = infer_json_schema(&mut reader, None).unwrap();
             reader.rewind().unwrap();
             schema
         });
@@ -1780,15 +1826,13 @@ mod tests {
 
     #[test]
     fn test_nested_list_json_arrays() {
-        let c_field =
-            Field::new_struct("c", vec![Field::new("d", DataType::Utf8, true)], true);
+        let c_field = Field::new_struct("c", vec![Field::new("d", DataType::Utf8, true)], true);
         let a_struct_field = Field::new_struct(
             "a",
             vec![Field::new("b", DataType::Boolean, true), c_field.clone()],
             true,
         );
-        let a_field =
-            Field::new("a", DataType::List(Arc::new(a_struct_field.clone())), true);
+        let a_field = Field::new("a", DataType::List(Arc::new(a_struct_field.clone())), true);
         let schema = Arc::new(Schema::new(vec![a_field.clone()]));
         let builder = ReaderBuilder::new(schema).with_batch_size(64);
         let json_content = r#"
@@ -1897,7 +1941,7 @@ mod tests {
     fn test_with_multiple_batches() {
         let file = File::open("test/data/basic_nulls.json").unwrap();
         let mut reader = BufReader::new(file);
-        let schema = infer_json_schema(&mut reader, None).unwrap();
+        let (schema, _) = infer_json_schema(&mut reader, None).unwrap();
         reader.rewind().unwrap();
 
         let builder = ReaderBuilder::new(Arc::new(schema)).with_batch_size(5);
@@ -2037,7 +2081,7 @@ mod tests {
     fn test_json_iterator() {
         let file = File::open("test/data/basic.json").unwrap();
         let mut reader = BufReader::new(file);
-        let schema = infer_json_schema(&mut reader, None).unwrap();
+        let (schema, _) = infer_json_schema(&mut reader, None).unwrap();
         reader.rewind().unwrap();
 
         let builder = ReaderBuilder::new(Arc::new(schema)).with_batch_size(5);
@@ -2175,4 +2219,85 @@ mod tests {
         let values = batch.column(0).as_primitive::<TimestampSecondType>();
         assert_eq!(values.values(), &[1681319393, -7200]);
     }
+
+    #[test]
+    fn test_serde_field() {
+        let field = Field::new("int", DataType::Int32, true);
+        let mut decoder = ReaderBuilder::new_with_field(field)
+            .build_decoder()
+            .unwrap();
+        decoder.serialize(&[1_i32, 2, 3, 4]).unwrap();
+        let b = decoder.flush().unwrap().unwrap();
+        let values = b.column(0).as_primitive::<Int32Type>().values();
+        assert_eq!(values, &[1, 2, 3, 4]);
+    }
+
+    #[test]
+    fn test_serde_large_numbers() {
+        let field = Field::new("int", DataType::Int64, true);
+        let mut decoder = ReaderBuilder::new_with_field(field)
+            .build_decoder()
+            .unwrap();
+
+        decoder.serialize(&[1699148028689_u64, 2, 3, 4]).unwrap();
+        let b = decoder.flush().unwrap().unwrap();
+        let values = b.column(0).as_primitive::<Int64Type>().values();
+        assert_eq!(values, &[1699148028689, 2, 3, 4]);
+
+        let field = Field::new(
+            "int",
+            DataType::Timestamp(TimeUnit::Microsecond, None),
+            true,
+        );
+        let mut decoder = ReaderBuilder::new_with_field(field)
+            .build_decoder()
+            .unwrap();
+
+        decoder.serialize(&[1699148028689_u64, 2, 3, 4]).unwrap();
+        let b = decoder.flush().unwrap().unwrap();
+        let values = b
+            .column(0)
+            .as_primitive::<TimestampMicrosecondType>()
+            .values();
+        assert_eq!(values, &[1699148028689, 2, 3, 4]);
+    }
+
+    #[test]
+    fn test_coercing_primitive_into_string_decoder() {
+        let buf = &format!(
+            r#"[{{"a": 1, "b": "A", "c": "T"}}, {{"a": 2, "b": "BB", "c": "F"}}, {{"a": {}, "b": 123, "c": false}}, {{"a": {}, "b": 789, "c": true}}]"#,
+            (std::i32::MAX as i64 + 10),
+            std::i64::MAX - 10
+        );
+        let schema = Schema::new(vec![
+            Field::new("a", DataType::Float64, true),
+            Field::new("b", DataType::Utf8, true),
+            Field::new("c", DataType::Utf8, true),
+        ]);
+        let json_array: Vec<serde_json::Value> = serde_json::from_str(buf).unwrap();
+        let schema_ref = Arc::new(schema);
+
+        // read record batches
+        let reader = ReaderBuilder::new(schema_ref.clone()).with_coerce_primitive(true);
+        let mut decoder = reader.build_decoder().unwrap();
+        decoder.serialize(json_array.as_slice()).unwrap();
+        let batch = decoder.flush().unwrap().unwrap();
+        assert_eq!(
+            batch,
+            RecordBatch::try_new(
+                schema_ref,
+                vec![
+                    Arc::new(Float64Array::from(vec![
+                        1.0,
+                        2.0,
+                        (std::i32::MAX as i64 + 10) as f64,
+                        (std::i64::MAX - 10) as f64
+                    ])),
+                    Arc::new(StringArray::from(vec!["A", "BB", "123", "789"])),
+                    Arc::new(StringArray::from(vec!["T", "F", "false", "true"])),
+                ]
+            )
+            .unwrap()
+        );
+    }
 }
diff --git a/arrow-json/src/reader/primitive_array.rs b/arrow-json/src/reader/primitive_array.rs
index c78e4d914060..1bd1176131ae 100644
--- a/arrow-json/src/reader/primitive_array.rs
+++ b/arrow-json/src/reader/primitive_array.rs
@@ -91,11 +91,12 @@ impl<P: ArrowPrimitiveType> PrimitiveArrayDecoder<P> {
 impl<P> ArrayDecoder for PrimitiveArrayDecoder<P>
 where
     P: ArrowPrimitiveType + Parser,
-    P::Native: ParseJsonNumber,
+    P::Native: ParseJsonNumber + NumCast,
 {
     fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
-        let mut builder = PrimitiveBuilder::<P>::with_capacity(pos.len())
-            .with_data_type(self.data_type.clone());
+        let mut builder =
+            PrimitiveBuilder::<P>::with_capacity(pos.len()).with_data_type(self.data_type.clone());
+        let d = &self.data_type;
 
         for p in pos {
             match tape.get(*p) {
@@ -103,26 +104,52 @@ where
                 TapeElement::String(idx) => {
                     let s = tape.get_string(idx);
                     let value = P::parse(s).ok_or_else(|| {
-                        ArrowError::JsonError(format!(
-                            "failed to parse \"{s}\" as {}",
-                            self.data_type
-                        ))
+                        ArrowError::JsonError(format!("failed to parse \"{s}\" as {d}",))
                     })?;
 
                     builder.append_value(value)
                 }
                 TapeElement::Number(idx) => {
                     let s = tape.get_string(idx);
-                    let value =
-                        ParseJsonNumber::parse(s.as_bytes()).ok_or_else(|| {
-                            ArrowError::JsonError(format!(
-                                "failed to parse {s} as {}",
-                                self.data_type
-                            ))
-                        })?;
+                    let value = ParseJsonNumber::parse(s.as_bytes()).ok_or_else(|| {
+                        ArrowError::JsonError(format!("failed to parse {s} as {d}",))
+                    })?;
 
                     builder.append_value(value)
                 }
+                TapeElement::F32(v) => {
+                    let v = f32::from_bits(v);
+                    let value = NumCast::from(v).ok_or_else(|| {
+                        ArrowError::JsonError(format!("failed to parse {v} as {d}",))
+                    })?;
+                    builder.append_value(value)
+                }
+                TapeElement::I32(v) => {
+                    let value = NumCast::from(v).ok_or_else(|| {
+                        ArrowError::JsonError(format!("failed to parse {v} as {d}",))
+                    })?;
+                    builder.append_value(value)
+                }
+                TapeElement::F64(high) => match tape.get(p + 1) {
+                    TapeElement::F32(low) => {
+                        let v = f64::from_bits((high as u64) << 32 | low as u64);
+                        let value = NumCast::from(v).ok_or_else(|| {
+                            ArrowError::JsonError(format!("failed to parse {v} as {d}",))
+                        })?;
+                        builder.append_value(value)
+                    }
+                    _ => unreachable!(),
+                },
+                TapeElement::I64(high) => match tape.get(p + 1) {
+                    TapeElement::I32(low) => {
+                        let v = (high as i64) << 32 | (low as u32) as i64;
+                        let value = NumCast::from(v).ok_or_else(|| {
+                            ArrowError::JsonError(format!("failed to parse {v} as {d}",))
+                        })?;
+                        builder.append_value(value)
+                    }
+                    _ => unreachable!(),
+                },
                 _ => return Err(tape.error(*p, "primitive")),
             }
         }
diff --git a/arrow-json/src/reader/schema.rs b/arrow-json/src/reader/schema.rs
index c8250ac37716..97f1a0f29594 100644
--- a/arrow-json/src/reader/schema.rs
+++ b/arrow-json/src/reader/schema.rs
@@ -52,10 +52,7 @@ impl InferredType {
             }
             (_, InferredType::Any) => {}
             // convert a scalar type to a single-item scalar array type.
-            (
-                InferredType::Array(self_inner_type),
-                other_scalar @ InferredType::Scalar(_),
-            ) => {
+            (InferredType::Array(self_inner_type), other_scalar @ InferredType::Scalar(_)) => {
                 self_inner_type.merge(other_scalar)?;
             }
             (s @ InferredType::Scalar(_), InferredType::Array(mut other_inner_type)) => {
@@ -72,6 +69,15 @@ impl InferredType {
 
         Ok(())
     }
+
+    fn is_none_or_any(ty: Option<&Self>) -> bool {
+        matches!(ty, Some(Self::Any) | None)
+    }
+}
+
+/// Shorthand for building list data type of `ty`
+fn list_type_of(ty: DataType) -> DataType {
+    DataType::List(Arc::new(Field::new("item", ty, true)))
 }
 
 /// Coerce data type during inference
@@ -84,23 +90,18 @@ fn coerce_data_type(dt: Vec<&DataType>) -> DataType {
     let dt_init = dt_iter.next().unwrap_or(DataType::Utf8);
 
     dt_iter.fold(dt_init, |l, r| match (l, r) {
+        (DataType::Null, o) | (o, DataType::Null) => o,
         (DataType::Boolean, DataType::Boolean) => DataType::Boolean,
         (DataType::Int64, DataType::Int64) => DataType::Int64,
         (DataType::Float64, DataType::Float64)
         | (DataType::Float64, DataType::Int64)
         | (DataType::Int64, DataType::Float64) => DataType::Float64,
-        (DataType::List(l), DataType::List(r)) => DataType::List(Arc::new(Field::new(
-            "item",
-            coerce_data_type(vec![l.data_type(), r.data_type()]),
-            true,
-        ))),
+        (DataType::List(l), DataType::List(r)) => {
+            list_type_of(coerce_data_type(vec![l.data_type(), r.data_type()]))
+        }
         // coerce scalar and scalar array into scalar array
         (DataType::List(e), not_list) | (not_list, DataType::List(e)) => {
-            DataType::List(Arc::new(Field::new(
-                "item",
-                coerce_data_type(vec![e.data_type(), &not_list]),
-                true,
-            )))
+            list_type_of(coerce_data_type(vec![e.data_type(), &not_list]))
         }
         _ => DataType::Utf8,
     })
@@ -110,11 +111,7 @@ fn generate_datatype(t: &InferredType) -> Result<DataType, ArrowError> {
     Ok(match t {
         InferredType::Scalar(hs) => coerce_data_type(hs.iter().collect()),
         InferredType::Object(spec) => DataType::Struct(generate_fields(spec)?),
-        InferredType::Array(ele_type) => DataType::List(Arc::new(Field::new(
-            "item",
-            generate_datatype(ele_type)?,
-            true,
-        ))),
+        InferredType::Array(ele_type) => list_type_of(generate_datatype(ele_type)?),
         InferredType::Any => DataType::Null,
     })
 }
@@ -197,9 +194,10 @@ impl<R: BufRead> Iterator for ValueIter<R> {
                     }
 
                     self.record_count += 1;
-                    return Some(serde_json::from_str(trimmed_s).map_err(|e| {
-                        ArrowError::JsonError(format!("Not valid JSON: {e}"))
-                    }));
+                    return Some(
+                        serde_json::from_str(trimmed_s)
+                            .map_err(|e| ArrowError::JsonError(format!("Not valid JSON: {e}"))),
+                    );
                 }
             }
         }
@@ -211,6 +209,8 @@ impl<R: BufRead> Iterator for ValueIter<R> {
 ///
 /// If `max_read_records` is not set, the whole file is read to infer its field types.
 ///
+/// Returns inferred schema and number of records read.
+///
 /// Contrary to [`infer_json_schema`], this function will seek back to the start of the `reader`.
 /// That way, the `reader` can be used immediately afterwards to create a [`Reader`].
 ///
@@ -231,7 +231,7 @@ impl<R: BufRead> Iterator for ValueIter<R> {
 pub fn infer_json_schema_from_seekable<R: BufRead + Seek>(
     mut reader: R,
     max_read_records: Option<usize>,
-) -> Result<Schema, ArrowError> {
+) -> Result<(Schema, usize), ArrowError> {
     let schema = infer_json_schema(&mut reader, max_read_records);
     // return the reader seek back to the start
     reader.rewind()?;
@@ -244,6 +244,8 @@ pub fn infer_json_schema_from_seekable<R: BufRead + Seek>(
 ///
 /// If `max_read_records` is not set, the whole file is read to infer its field types.
 ///
+/// Returns inferred schema and number of records read.
+///
 /// This function will not seek back to the start of the `reader`. The user has to manage the
 /// original file's cursor. This function is useful when the `reader`'s cursor is not available
 /// (does not implement [`Seek`]), such is the case for compressed streams decoders.
@@ -268,8 +270,10 @@ pub fn infer_json_schema_from_seekable<R: BufRead + Seek>(
 pub fn infer_json_schema<R: BufRead>(
     reader: R,
     max_read_records: Option<usize>,
-) -> Result<Schema, ArrowError> {
-    infer_json_schema_from_iterator(ValueIter::new(reader, max_read_records))
+) -> Result<(Schema, usize), ArrowError> {
+    let mut values = ValueIter::new(reader, max_read_records);
+    let schema = infer_json_schema_from_iterator(&mut values)?;
+    Ok((schema, values.record_count))
 }
 
 fn set_object_scalar_field_type(
@@ -277,7 +281,7 @@ fn set_object_scalar_field_type(
     key: &str,
     ftype: DataType,
 ) -> Result<(), ArrowError> {
-    if !field_types.contains_key(key) {
+    if InferredType::is_none_or_any(field_types.get(key)) {
         field_types.insert(key.to_string(), InferredType::Scalar(HashSet::new()));
     }
 
@@ -388,22 +392,18 @@ fn collect_field_types_from_object(
             Value::Array(array) => {
                 let ele_type = infer_array_element_type(array)?;
 
-                if !field_types.contains_key(k) {
+                if InferredType::is_none_or_any(field_types.get(k)) {
                     match ele_type {
                         InferredType::Scalar(_) => {
                             field_types.insert(
                                 k.to_string(),
-                                InferredType::Array(Box::new(InferredType::Scalar(
-                                    HashSet::new(),
-                                ))),
+                                InferredType::Array(Box::new(InferredType::Scalar(HashSet::new()))),
                             );
                         }
                         InferredType::Object(_) => {
                             field_types.insert(
                                 k.to_string(),
-                                InferredType::Array(Box::new(InferredType::Object(
-                                    HashMap::new(),
-                                ))),
+                                InferredType::Array(Box::new(InferredType::Object(HashMap::new()))),
                             );
                         }
                         InferredType::Any | InferredType::Array(_) => {
@@ -438,8 +438,11 @@ fn collect_field_types_from_object(
                 set_object_scalar_field_type(field_types, k, DataType::Boolean)?;
             }
             Value::Null => {
-                // do nothing, we treat json as nullable by default when
-                // inferring
+                // we treat json as nullable by default when inferring, so just
+                // mark existence of a field if it wasn't known before
+                if !field_types.contains_key(k) {
+                    field_types.insert(k.to_string(), InferredType::Any);
+                }
             }
             Value::Number(n) => {
                 if n.is_i64() {
@@ -453,8 +456,7 @@ fn collect_field_types_from_object(
             }
             Value::Object(inner_map) => {
                 if !field_types.contains_key(k) {
-                    field_types
-                        .insert(k.to_string(), InferredType::Object(HashMap::new()));
+                    field_types.insert(k.to_string(), InferredType::Object(HashMap::new()));
                 }
                 match field_types.get_mut(k).unwrap() {
                     InferredType::Object(inner_field_types) => {
@@ -520,34 +522,34 @@ mod tests {
     fn test_json_infer_schema() {
         let schema = Schema::new(vec![
             Field::new("a", DataType::Int64, true),
-            Field::new(
-                "b",
-                DataType::List(Arc::new(Field::new("item", DataType::Float64, true))),
-                true,
-            ),
-            Field::new(
-                "c",
-                DataType::List(Arc::new(Field::new("item", DataType::Boolean, true))),
-                true,
-            ),
-            Field::new(
-                "d",
-                DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))),
-                true,
-            ),
+            Field::new("b", list_type_of(DataType::Float64), true),
+            Field::new("c", list_type_of(DataType::Boolean), true),
+            Field::new("d", list_type_of(DataType::Utf8), true),
         ]);
 
-        let mut reader =
-            BufReader::new(File::open("test/data/mixed_arrays.json").unwrap());
-        let inferred_schema = infer_json_schema_from_seekable(&mut reader, None).unwrap();
+        let mut reader = BufReader::new(File::open("test/data/mixed_arrays.json").unwrap());
+        let (inferred_schema, n_rows) = infer_json_schema_from_seekable(&mut reader, None).unwrap();
 
         assert_eq!(inferred_schema, schema);
+        assert_eq!(n_rows, 4);
 
         let file = File::open("test/data/mixed_arrays.json.gz").unwrap();
         let mut reader = BufReader::new(GzDecoder::new(&file));
-        let inferred_schema = infer_json_schema(&mut reader, None).unwrap();
+        let (inferred_schema, n_rows) = infer_json_schema(&mut reader, None).unwrap();
 
         assert_eq!(inferred_schema, schema);
+        assert_eq!(n_rows, 4);
+    }
+
+    #[test]
+    fn test_row_limit() {
+        let mut reader = BufReader::new(File::open("test/data/basic.json").unwrap());
+
+        let (_, n_rows) = infer_json_schema_from_seekable(&mut reader, None).unwrap();
+        assert_eq!(n_rows, 12);
+
+        let (_, n_rows) = infer_json_schema_from_seekable(&mut reader, Some(5)).unwrap();
+        assert_eq!(n_rows, 5);
     }
 
     #[test]
@@ -559,9 +561,7 @@ mod tests {
                     Field::new("a", DataType::Boolean, true),
                     Field::new(
                         "b",
-                        DataType::Struct(
-                            vec![Field::new("c", DataType::Utf8, true)].into(),
-                        ),
+                        DataType::Struct(vec![Field::new("c", DataType::Utf8, true)].into()),
                         true,
                     ),
                 ])),
@@ -577,9 +577,9 @@ mod tests {
                 Ok(serde_json::json!({"c1": {"a": false, "b": null}, "c2": 0})),
                 Ok(serde_json::json!({"c1": {"a": true, "b": {"c": "text"}}, "c3": "ok"})),
             ]
-                .into_iter(),
+            .into_iter(),
         )
-            .unwrap();
+        .unwrap();
 
         assert_eq!(inferred_schema, schema);
     }
@@ -589,22 +589,18 @@ mod tests {
         let schema = Schema::new(vec![
             Field::new(
                 "c1",
-                DataType::List(Arc::new(Field::new(
-                    "item",
-                    DataType::Struct(Fields::from(vec![
-                        Field::new("a", DataType::Utf8, true),
-                        Field::new("b", DataType::Int64, true),
-                        Field::new("c", DataType::Boolean, true),
-                    ])),
-                    true,
-                ))),
+                list_type_of(DataType::Struct(Fields::from(vec![
+                    Field::new("a", DataType::Utf8, true),
+                    Field::new("b", DataType::Int64, true),
+                    Field::new("c", DataType::Boolean, true),
+                ]))),
                 true,
             ),
             Field::new("c2", DataType::Float64, true),
             Field::new(
                 "c3",
                 // empty json array's inner types are inferred as null
-                DataType::List(Arc::new(Field::new("item", DataType::Null, true))),
+                list_type_of(DataType::Null),
                 true,
             ),
         ]);
@@ -619,9 +615,9 @@ mod tests {
                 })),
                 Ok(serde_json::json!({"c1": [], "c2": 0.5, "c3": []})),
             ]
-                .into_iter(),
+            .into_iter(),
         )
-            .unwrap();
+        .unwrap();
 
         assert_eq!(inferred_schema, schema);
     }
@@ -629,15 +625,7 @@ mod tests {
     #[test]
     fn test_json_infer_schema_nested_list() {
         let schema = Schema::new(vec![
-            Field::new(
-                "c1",
-                DataType::List(Arc::new(Field::new(
-                    "item",
-                    DataType::List(Arc::new(Field::new("item", DataType::Utf8, true))),
-                    true,
-                ))),
-                true,
-            ),
+            Field::new("c1", list_type_of(list_type_of(DataType::Utf8)), true),
             Field::new("c2", DataType::Float64, true),
         ]);
 
@@ -671,7 +659,7 @@ mod tests {
             bigger_than_i64_max, smaller_than_i64_min
         );
         let mut buf_reader = BufReader::new(json.as_bytes());
-        let inferred_schema = infer_json_schema(&mut buf_reader, Some(1)).unwrap();
+        let (inferred_schema, _) = infer_json_schema(&mut buf_reader, Some(1)).unwrap();
         let fields = inferred_schema.fields();
 
         let (_, big_field) = fields.find("bigger_than_i64_max").unwrap();
@@ -682,36 +670,22 @@ mod tests {
 
     #[test]
     fn test_coercion_scalar_and_list() {
-        use arrow_schema::DataType::*;
-
         assert_eq!(
-            List(Arc::new(Field::new("item", Float64, true))),
-            coerce_data_type(vec![
-                &Float64,
-                &List(Arc::new(Field::new("item", Float64, true)))
-            ])
+            list_type_of(DataType::Float64),
+            coerce_data_type(vec![&DataType::Float64, &list_type_of(DataType::Float64)])
         );
         assert_eq!(
-            List(Arc::new(Field::new("item", Float64, true))),
-            coerce_data_type(vec![
-                &Float64,
-                &List(Arc::new(Field::new("item", Int64, true)))
-            ])
+            list_type_of(DataType::Float64),
+            coerce_data_type(vec![&DataType::Float64, &list_type_of(DataType::Int64)])
         );
         assert_eq!(
-            List(Arc::new(Field::new("item", Int64, true))),
-            coerce_data_type(vec![
-                &Int64,
-                &List(Arc::new(Field::new("item", Int64, true)))
-            ])
+            list_type_of(DataType::Int64),
+            coerce_data_type(vec![&DataType::Int64, &list_type_of(DataType::Int64)])
         );
         // boolean and number are incompatible, return utf8
         assert_eq!(
-            List(Arc::new(Field::new("item", Utf8, true))),
-            coerce_data_type(vec![
-                &Boolean,
-                &List(Arc::new(Field::new("item", Float64, true)))
-            ])
+            list_type_of(DataType::Utf8),
+            coerce_data_type(vec![&DataType::Boolean, &list_type_of(DataType::Float64)])
         );
     }
 
@@ -723,4 +697,26 @@ mod tests {
             "Json error: Not valid JSON: expected value at line 1 column 1",
         );
     }
+
+    #[test]
+    fn test_null_field_inferred_as_null() {
+        let data = r#"
+            {"in":1,    "ni":null, "ns":null, "sn":"4",  "n":null, "an":[],   "na": null, "nas":null}
+            {"in":null, "ni":2,    "ns":"3",  "sn":null, "n":null, "an":null, "na": [],   "nas":["8"]}
+            {"in":1,    "ni":null, "ns":null, "sn":"4",  "n":null, "an":[],   "na": null, "nas":[]}
+        "#;
+        let (inferred_schema, _) =
+            infer_json_schema_from_seekable(Cursor::new(data), None).expect("infer");
+        let schema = Schema::new(vec![
+            Field::new("an", list_type_of(DataType::Null), true),
+            Field::new("in", DataType::Int64, true),
+            Field::new("n", DataType::Null, true),
+            Field::new("na", list_type_of(DataType::Null), true),
+            Field::new("nas", list_type_of(DataType::Utf8), true),
+            Field::new("ni", DataType::Int64, true),
+            Field::new("ns", DataType::Utf8, true),
+            Field::new("sn", DataType::Utf8, true),
+        ]);
+        assert_eq!(inferred_schema, schema);
+    }
 }
diff --git a/arrow-json/src/reader/serializer.rs b/arrow-json/src/reader/serializer.rs
index 2aa72de943f7..378d77bd9155 100644
--- a/arrow-json/src/reader/serializer.rs
+++ b/arrow-json/src/reader/serializer.rs
@@ -18,8 +18,7 @@
 use crate::reader::tape::TapeElement;
 use lexical_core::FormattedSize;
 use serde::ser::{
-    Impossible, SerializeMap, SerializeSeq, SerializeStruct, SerializeTuple,
-    SerializeTupleStruct,
+    Impossible, SerializeMap, SerializeSeq, SerializeStruct, SerializeTuple, SerializeTupleStruct,
 };
 use serde::{Serialize, Serializer};
 
@@ -77,22 +76,6 @@ impl<'a> TapeSerializer<'a> {
     }
 }
 
-/// The tape stores all values as strings, and so must serialize numeric types
-///
-/// Formatting to a string only to parse it back again is rather wasteful,
-/// it may be possible to tweak the tape representation to avoid this
-///
-/// Need to use macro as const generic expressions are unstable
-/// <https://github.com/rust-lang/rust/issues/76560>
-macro_rules! serialize_numeric {
-    ($s:ident, $t:ty, $v:ident) => {{
-        let mut buffer = [0_u8; <$t>::FORMATTED_SIZE];
-        let s = lexical_core::write($v, &mut buffer);
-        $s.serialize_number(s);
-        Ok(())
-    }};
-}
-
 impl<'a, 'b> Serializer for &'a mut TapeSerializer<'b> {
     type Ok = ();
 
@@ -115,43 +98,63 @@ impl<'a, 'b> Serializer for &'a mut TapeSerializer<'b> {
     }
 
     fn serialize_i8(self, v: i8) -> Result<(), SerializerError> {
-        serialize_numeric!(self, i8, v)
+        self.serialize_i32(v as _)
     }
 
     fn serialize_i16(self, v: i16) -> Result<(), SerializerError> {
-        serialize_numeric!(self, i16, v)
+        self.serialize_i32(v as _)
     }
 
     fn serialize_i32(self, v: i32) -> Result<(), SerializerError> {
-        serialize_numeric!(self, i32, v)
+        self.elements.push(TapeElement::I32(v));
+        Ok(())
     }
 
     fn serialize_i64(self, v: i64) -> Result<(), SerializerError> {
-        serialize_numeric!(self, i64, v)
+        let low = v as i32;
+        let high = (v >> 32) as i32;
+        self.elements.push(TapeElement::I64(high));
+        self.elements.push(TapeElement::I32(low));
+        Ok(())
     }
 
     fn serialize_u8(self, v: u8) -> Result<(), SerializerError> {
-        serialize_numeric!(self, u8, v)
+        self.serialize_i32(v as _)
     }
 
     fn serialize_u16(self, v: u16) -> Result<(), SerializerError> {
-        serialize_numeric!(self, u16, v)
+        self.serialize_i32(v as _)
     }
 
     fn serialize_u32(self, v: u32) -> Result<(), SerializerError> {
-        serialize_numeric!(self, u32, v)
+        match i32::try_from(v) {
+            Ok(v) => self.serialize_i32(v),
+            Err(_) => self.serialize_i64(v as _),
+        }
     }
 
     fn serialize_u64(self, v: u64) -> Result<(), SerializerError> {
-        serialize_numeric!(self, u64, v)
+        match i64::try_from(v) {
+            Ok(v) => self.serialize_i64(v),
+            Err(_) => {
+                let mut buffer = [0_u8; u64::FORMATTED_SIZE];
+                let s = lexical_core::write(v, &mut buffer);
+                self.serialize_number(s);
+                Ok(())
+            }
+        }
     }
 
     fn serialize_f32(self, v: f32) -> Result<(), SerializerError> {
-        serialize_numeric!(self, f32, v)
+        self.elements.push(TapeElement::F32(v.to_bits()));
+        Ok(())
     }
 
     fn serialize_f64(self, v: f64) -> Result<(), SerializerError> {
-        serialize_numeric!(self, f64, v)
+        let bits = v.to_bits();
+        self.elements.push(TapeElement::F64((bits >> 32) as u32));
+        self.elements.push(TapeElement::F32(bits as u32));
+        Ok(())
     }
 
     fn serialize_char(self, v: char) -> Result<(), SerializerError> {
@@ -227,17 +230,11 @@ impl<'a, 'b> Serializer for &'a mut TapeSerializer<'b> {
         Ok(())
     }
 
-    fn serialize_seq(
-        self,
-        _len: Option<usize>,
-    ) -> Result<Self::SerializeSeq, SerializerError> {
+    fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, SerializerError> {
         Ok(ListSerializer::new(self))
     }
 
-    fn serialize_tuple(
-        self,
-        len: usize,
-    ) -> Result<Self::SerializeTuple, SerializerError> {
+    fn serialize_tuple(self, len: usize) -> Result<Self::SerializeTuple, SerializerError> {
         self.serialize_seq(Some(len))
     }
 
@@ -262,10 +259,7 @@ impl<'a, 'b> Serializer for &'a mut TapeSerializer<'b> {
     }
 
     // Maps are represented in JSON as `{ K: V, K: V, ... }`.
-    fn serialize_map(
-        self,
-        _len: Option<usize>,
-    ) -> Result<Self::SerializeMap, SerializerError> {
+    fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, SerializerError> {
         Ok(ObjectSerializer::new(self))
     }
 
diff --git a/arrow-json/src/reader/string_array.rs b/arrow-json/src/reader/string_array.rs
index ea9a7157423f..5ab4d09d5d63 100644
--- a/arrow-json/src/reader/string_array.rs
+++ b/arrow-json/src/reader/string_array.rs
@@ -61,7 +61,18 @@ impl<O: OffsetSizeTrait> ArrayDecoder for StringArrayDecoder<O> {
                 TapeElement::Number(idx) if coerce_primitive => {
                     data_capacity += tape.get_string(idx).len();
                 }
-                _ => return Err(tape.error(*p, "string")),
+                TapeElement::I64(_)
+                | TapeElement::I32(_)
+                | TapeElement::F64(_)
+                | TapeElement::F32(_)
+                    if coerce_primitive =>
+                {
+                    // An arbitrary estimate
+                    data_capacity += 10;
+                }
+                _ => {
+                    return Err(tape.error(*p, "string"));
+                }
             }
         }
 
@@ -72,8 +83,7 @@ impl<O: OffsetSizeTrait> ArrayDecoder for StringArrayDecoder<O> {
             )));
         }
 
-        let mut builder =
-            GenericStringBuilder::<O>::with_capacity(pos.len(), data_capacity);
+        let mut builder = GenericStringBuilder::<O>::with_capacity(pos.len(), data_capacity);
 
         for p in pos {
             match tape.get(*p) {
@@ -90,6 +100,26 @@ impl<O: OffsetSizeTrait> ArrayDecoder for StringArrayDecoder<O> {
                 TapeElement::Number(idx) if coerce_primitive => {
                     builder.append_value(tape.get_string(idx));
                 }
+                TapeElement::I64(high) if coerce_primitive => match tape.get(p + 1) {
+                    TapeElement::I32(low) => {
+                        let val = (high as i64) << 32 | (low as u32) as i64;
+                        builder.append_value(val.to_string());
+                    }
+                    _ => unreachable!(),
+                },
+                TapeElement::I32(n) if coerce_primitive => {
+                    builder.append_value(n.to_string());
+                }
+                TapeElement::F32(n) if coerce_primitive => {
+                    builder.append_value(n.to_string());
+                }
+                TapeElement::F64(high) if coerce_primitive => match tape.get(p + 1) {
+                    TapeElement::F32(low) => {
+                        let val = f64::from_bits((high as u64) << 32 | low as u64);
+                        builder.append_value(val.to_string());
+                    }
+                    _ => unreachable!(),
+                },
                 _ => unreachable!(),
             }
         }
diff --git a/arrow-json/src/reader/struct_array.rs b/arrow-json/src/reader/struct_array.rs
index 77d7e170d07c..6c805591d390 100644
--- a/arrow-json/src/reader/struct_array.rs
+++ b/arrow-json/src/reader/struct_array.rs
@@ -64,8 +64,7 @@ impl StructArrayDecoder {
 impl ArrayDecoder for StructArrayDecoder {
     fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
         let fields = struct_fields(&self.data_type);
-        let mut child_pos: Vec<_> =
-            (0..fields.len()).map(|_| vec![0; pos.len()]).collect();
+        let mut child_pos: Vec<_> = (0..fields.len()).map(|_| vec![0; pos.len()]).collect();
 
         let mut nulls = self
             .is_nullable
@@ -118,10 +117,9 @@ impl ArrayDecoder for StructArrayDecoder {
             .zip(fields)
             .map(|((d, pos), f)| {
                 d.decode(tape, &pos).map_err(|e| match e {
-                    ArrowError::JsonError(s) => ArrowError::JsonError(format!(
-                        "whilst decoding field '{}': {s}",
-                        f.name()
-                    )),
+                    ArrowError::JsonError(s) => {
+                        ArrowError::JsonError(format!("whilst decoding field '{}': {s}", f.name()))
+                    }
                     e => e,
                 })
             })
@@ -133,11 +131,13 @@ impl ArrayDecoder for StructArrayDecoder {
             // Sanity check
             assert_eq!(c.len(), pos.len());
             if let Some(a) = c.nulls() {
-                let nulls_valid = f.is_nullable()
-                    || nulls.as_ref().map(|n| n.contains(a)).unwrap_or_default();
+                let nulls_valid =
+                    f.is_nullable() || nulls.as_ref().map(|n| n.contains(a)).unwrap_or_default();
 
                 if !nulls_valid {
-                    return Err(ArrowError::JsonError(format!("Encountered unmasked nulls in non-nullable StructArray child: {f}")));
+                    return Err(ArrowError::JsonError(format!(
+                        "Encountered unmasked nulls in non-nullable StructArray child: {f}"
+                    )));
                 }
             }
         }
diff --git a/arrow-json/src/reader/tape.rs b/arrow-json/src/reader/tape.rs
index 5eca7b43dcc7..c783f6a51022 100644
--- a/arrow-json/src/reader/tape.rs
+++ b/arrow-json/src/reader/tape.rs
@@ -18,6 +18,7 @@
 use crate::reader::serializer::TapeSerializer;
 use arrow_schema::ArrowError;
 use serde::Serialize;
+use std::fmt::Write;
 
 /// We decode JSON to a flattened tape representation,
 /// allowing for efficient traversal of the JSON data
@@ -54,6 +55,25 @@ pub enum TapeElement {
     ///
     /// Contains the offset into the [`Tape`] string data
     Number(u32),
+
+    /// The high bits of a i64
+    ///
+    /// Followed by [`Self::I32`] containing the low bits
+    I64(i32),
+
+    /// A 32-bit signed integer
+    ///
+    /// May be preceded by [`Self::I64`] containing high bits
+    I32(i32),
+
+    /// The high bits of a 64-bit float
+    ///
+    /// Followed by [`Self::F32`] containing the low bits
+    F64(u32),
+
+    /// A 32-bit float or the low-bits of a 64-bit float if preceded by [`Self::F64`]
+    F32(u32),
+
     /// A true literal
     True,
     /// A false literal
@@ -104,10 +124,15 @@ impl<'a> Tape<'a> {
             | TapeElement::Number(_)
             | TapeElement::True
             | TapeElement::False
-            | TapeElement::Null => Ok(cur_idx + 1),
+            | TapeElement::Null
+            | TapeElement::I32(_)
+            | TapeElement::F32(_) => Ok(cur_idx + 1),
+            TapeElement::I64(_) | TapeElement::F64(_) => Ok(cur_idx + 2),
             TapeElement::StartList(end_idx) => Ok(end_idx + 1),
             TapeElement::StartObject(end_idx) => Ok(end_idx + 1),
-            _ => Err(self.error(cur_idx, expected)),
+            TapeElement::EndObject(_) | TapeElement::EndList(_) => {
+                Err(self.error(cur_idx, expected))
+            }
         }
     }
 
@@ -153,6 +178,28 @@ impl<'a> Tape<'a> {
             TapeElement::True => out.push_str("true"),
             TapeElement::False => out.push_str("false"),
             TapeElement::Null => out.push_str("null"),
+            TapeElement::I64(high) => match self.get(idx + 1) {
+                TapeElement::I32(low) => {
+                    let val = (high as i64) << 32 | (low as u32) as i64;
+                    let _ = write!(out, "{val}");
+                    return idx + 2;
+                }
+                _ => unreachable!(),
+            },
+            TapeElement::I32(val) => {
+                let _ = write!(out, "{val}");
+            }
+            TapeElement::F64(high) => match self.get(idx + 1) {
+                TapeElement::F32(low) => {
+                    let val = f64::from_bits((high as u64) << 32 | low as u64);
+                    let _ = write!(out, "{val}");
+                    return idx + 2;
+                }
+                _ => unreachable!(),
+            },
+            TapeElement::F32(val) => {
+                let _ = write!(out, "{}", f32::from_bits(val));
+            }
         }
         idx + 1
     }
@@ -250,7 +297,8 @@ macro_rules! next {
 pub struct TapeDecoder {
     elements: Vec<TapeElement>,
 
-    num_rows: usize,
+    /// The number of rows decoded, including any in progress if `!stack.is_empty()`
+    cur_row: usize,
 
     /// Number of rows to read per batch
     batch_size: usize,
@@ -283,36 +331,34 @@ impl TapeDecoder {
             offsets,
             elements,
             batch_size,
-            num_rows: 0,
+            cur_row: 0,
             bytes: Vec::with_capacity(num_fields * 2 * 8),
             stack: Vec::with_capacity(10),
         }
     }
 
     pub fn decode(&mut self, buf: &[u8]) -> Result<usize, ArrowError> {
-        if self.num_rows >= self.batch_size {
-            return Ok(0);
-        }
-
         let mut iter = BufIter::new(buf);
 
         while !iter.is_empty() {
-            match self.stack.last_mut() {
-                // Start of row
+            let state = match self.stack.last_mut() {
+                Some(l) => l,
                 None => {
-                    // Skip over leading whitespace
                     iter.skip_whitespace();
-                    match next!(iter) {
-                        b'{' => {
-                            let idx = self.elements.len() as u32;
-                            self.stack.push(DecoderState::Object(idx));
-                            self.elements.push(TapeElement::StartObject(u32::MAX));
-                        }
-                        b => return Err(err(b, "trimming leading whitespace")),
+                    if iter.is_empty() || self.cur_row >= self.batch_size {
+                        break;
                     }
+
+                    // Start of row
+                    self.cur_row += 1;
+                    self.stack.push(DecoderState::Value);
+                    self.stack.last_mut().unwrap()
                 }
+            };
+
+            match state {
                 // Decoding an object
-                Some(DecoderState::Object(start_idx)) => {
+                DecoderState::Object(start_idx) => {
                     iter.advance_until(|b| !json_whitespace(b) && b != b',');
                     match next!(iter) {
                         b'"' => {
@@ -323,28 +369,22 @@ impl TapeDecoder {
                         b'}' => {
                             let start_idx = *start_idx;
                             let end_idx = self.elements.len() as u32;
-                            self.elements[start_idx as usize] =
-                                TapeElement::StartObject(end_idx);
+                            self.elements[start_idx as usize] = TapeElement::StartObject(end_idx);
                             self.elements.push(TapeElement::EndObject(start_idx));
                             self.stack.pop();
-                            self.num_rows += self.stack.is_empty() as usize;
-                            if self.num_rows >= self.batch_size {
-                                break;
-                            }
                         }
                         b => return Err(err(b, "parsing object")),
                     }
                 }
                 // Decoding a list
-                Some(DecoderState::List(start_idx)) => {
+                DecoderState::List(start_idx) => {
                     iter.advance_until(|b| !json_whitespace(b) && b != b',');
                     match iter.peek() {
                         Some(b']') => {
                             iter.next();
                             let start_idx = *start_idx;
                             let end_idx = self.elements.len() as u32;
-                            self.elements[start_idx as usize] =
-                                TapeElement::StartList(end_idx);
+                            self.elements[start_idx as usize] = TapeElement::StartList(end_idx);
                             self.elements.push(TapeElement::EndList(start_idx));
                             self.stack.pop();
                         }
@@ -353,7 +393,7 @@ impl TapeDecoder {
                     }
                 }
                 // Decoding a string
-                Some(DecoderState::String) => {
+                DecoderState::String => {
                     let s = iter.advance_until(|b| matches!(b, b'\\' | b'"'));
                     self.bytes.extend_from_slice(s);
 
@@ -368,7 +408,7 @@ impl TapeDecoder {
                         b => unreachable!("{}", b),
                     }
                 }
-                Some(state @ DecoderState::Value) => {
+                state @ DecoderState::Value => {
                     iter.skip_whitespace();
                     *state = match next!(iter) {
                         b'"' => DecoderState::String,
@@ -392,7 +432,7 @@ impl TapeDecoder {
                         b => return Err(err(b, "parsing value")),
                     };
                 }
-                Some(DecoderState::Number) => {
+                DecoderState::Number => {
                     let s = iter.advance_until(|b| {
                         !matches!(b, b'0'..=b'9' | b'-' | b'+' | b'.' | b'e' | b'E')
                     });
@@ -405,14 +445,14 @@ impl TapeDecoder {
                         self.offsets.push(self.bytes.len());
                     }
                 }
-                Some(DecoderState::Colon) => {
+                DecoderState::Colon => {
                     iter.skip_whitespace();
                     match next!(iter) {
                         b':' => self.stack.pop(),
                         b => return Err(err(b, "parsing colon")),
                     };
                 }
-                Some(DecoderState::Literal(literal, idx)) => {
+                DecoderState::Literal(literal, idx) => {
                     let bytes = literal.bytes();
                     let expected = bytes.iter().skip(*idx as usize).copied();
                     for (expected, b) in expected.zip(&mut iter) {
@@ -427,7 +467,7 @@ impl TapeDecoder {
                         self.elements.push(element);
                     }
                 }
-                Some(DecoderState::Escape) => {
+                DecoderState::Escape => {
                     let v = match next!(iter) {
                         b'u' => {
                             self.stack.pop();
@@ -449,7 +489,7 @@ impl TapeDecoder {
                     self.bytes.push(v);
                 }
                 // Parse a unicode escape sequence
-                Some(DecoderState::Unicode(high, low, idx)) => loop {
+                DecoderState::Unicode(high, low, idx) => loop {
                     match *idx {
                         0..=3 => *high = *high << 4 | parse_hex(next!(iter))? as u16,
                         4 => {
@@ -500,7 +540,7 @@ impl TapeDecoder {
             .try_for_each(|row| row.serialize(&mut serializer))
             .map_err(|e| ArrowError::JsonError(e.to_string()))?;
 
-        self.num_rows += rows.len();
+        self.cur_row += rows.len();
 
         Ok(())
     }
@@ -519,7 +559,10 @@ impl TapeDecoder {
         }
 
         if self.offsets.len() >= u32::MAX as usize {
-            return Err(ArrowError::JsonError(format!("Encountered more than {} JSON elements, consider using a smaller batch size", u32::MAX)));
+            return Err(ArrowError::JsonError(format!(
+                "Encountered more than {} JSON elements, consider using a smaller batch size",
+                u32::MAX
+            )));
         }
 
         // Sanity check
@@ -528,9 +571,8 @@ impl TapeDecoder {
             self.bytes.len()
         );
 
-        let strings = std::str::from_utf8(&self.bytes).map_err(|_| {
-            ArrowError::JsonError("Encountered non-UTF-8 data".to_string())
-        })?;
+        let strings = std::str::from_utf8(&self.bytes)
+            .map_err(|_| ArrowError::JsonError("Encountered non-UTF-8 data".to_string()))?;
 
         for offset in self.offsets.iter().copied() {
             if !strings.is_char_boundary(offset) {
@@ -544,7 +586,7 @@ impl TapeDecoder {
             strings,
             elements: &self.elements,
             string_offsets: &self.offsets,
-            num_rows: self.num_rows,
+            num_rows: self.cur_row,
         })
     }
 
@@ -552,7 +594,7 @@ impl TapeDecoder {
     pub fn clear(&mut self) {
         assert!(self.stack.is_empty());
 
-        self.num_rows = 0;
+        self.cur_row = 0;
         self.bytes.clear();
         self.elements.clear();
         self.elements.push(TapeElement::Null);
@@ -631,9 +673,8 @@ fn err(b: u8, ctx: &str) -> ArrowError {
 /// Creates a character from an UTF-16 surrogate pair
 fn char_from_surrogate_pair(low: u16, high: u16) -> Result<char, ArrowError> {
     let n = (((high - 0xD800) as u32) << 10 | (low - 0xDC00) as u32) + 0x1_0000;
-    char::from_u32(n).ok_or_else(|| {
-        ArrowError::JsonError(format!("Invalid UTF-16 surrogate pair {n}"))
-    })
+    char::from_u32(n)
+        .ok_or_else(|| ArrowError::JsonError(format!("Invalid UTF-16 surrogate pair {n}")))
 }
 
 /// Writes `c` as UTF-8 to `out`
@@ -776,9 +817,8 @@ mod tests {
         assert_eq!(
             &finished.string_offsets,
             &[
-                0, 5, 10, 13, 14, 17, 19, 22, 25, 28, 29, 30, 31, 32, 32, 32, 33, 34, 35,
-                41, 47, 52, 55, 57, 58, 59, 62, 63, 63, 66, 69, 70, 71, 72, 73, 74, 75,
-                76, 77
+                0, 5, 10, 13, 14, 17, 19, 22, 25, 28, 29, 30, 31, 32, 32, 32, 33, 34, 35, 41, 47,
+                52, 55, 57, 58, 59, 62, 63, 63, 66, 69, 70, 71, 72, 73, 74, 75, 76, 77
             ]
         )
     }
@@ -790,7 +830,7 @@ mod tests {
         let err = decoder.decode(b"hello").unwrap_err().to_string();
         assert_eq!(
             err,
-            "Json error: Encountered unexpected 'h' whilst trimming leading whitespace"
+            "Json error: Encountered unexpected 'h' whilst parsing value"
         );
 
         let mut decoder = TapeDecoder::new(16, 2);
diff --git a/arrow-json/src/reader/timestamp_array.rs b/arrow-json/src/reader/timestamp_array.rs
index ef69deabce2d..f68fc3dc3270 100644
--- a/arrow-json/src/reader/timestamp_array.rs
+++ b/arrow-json/src/reader/timestamp_array.rs
@@ -52,8 +52,8 @@ where
     Tz: TimeZone + Send,
 {
     fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
-        let mut builder = PrimitiveBuilder::<P>::with_capacity(pos.len())
-            .with_data_type(self.data_type.clone());
+        let mut builder =
+            PrimitiveBuilder::<P>::with_capacity(pos.len()).with_data_type(self.data_type.clone());
 
         for p in pos {
             match tape.get(*p) {
@@ -71,7 +71,12 @@ where
                         TimeUnit::Second => date.timestamp(),
                         TimeUnit::Millisecond => date.timestamp_millis(),
                         TimeUnit::Microsecond => date.timestamp_micros(),
-                        TimeUnit::Nanosecond => date.timestamp_nanos(),
+                        TimeUnit::Nanosecond => date.timestamp_nanos_opt().ok_or_else(|| {
+                            ArrowError::ParseError(format!(
+                                "{} would overflow 64-bit signed nanoseconds",
+                                date.to_rfc3339(),
+                            ))
+                        })?,
                     };
                     builder.append_value(value)
                 }
@@ -89,6 +94,13 @@ where
 
                     builder.append_value(value)
                 }
+                TapeElement::I32(v) => builder.append_value(v as i64),
+                TapeElement::I64(high) => match tape.get(p + 1) {
+                    TapeElement::I32(low) => {
+                        builder.append_value((high as i64) << 32 | (low as u32) as i64)
+                    }
+                    _ => unreachable!(),
+                },
                 _ => return Err(tape.error(*p, "primitive")),
             }
         }
diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs
index a918f44b54ff..4f74817ca1e3 100644
--- a/arrow-json/src/writer.rs
+++ b/arrow-json/src/writer.rs
@@ -92,6 +92,10 @@
 //! let buf = writer.into_inner();
 //! assert_eq!(r#"[{"a":1},{"a":2},{"a":3}]"#, String::from_utf8(buf).unwrap())
 //! ```
+//!
+//! [`LineDelimitedWriter`] and [`ArrayWriter`] will omit writing keys with null values.
+//! In order to explicitly write null values for keys, configure a custom [`Writer`] by
+//! using a [`WriterBuilder`] to construct a [`Writer`].
 
 use std::iter;
 use std::{fmt::Debug, io::Write};
@@ -124,6 +128,7 @@ where
 
 fn struct_array_to_jsonmap_array(
     array: &StructArray,
+    explicit_nulls: bool,
 ) -> Result<Vec<JsonMap<String, Value>>, ArrowError> {
     let inner_col_names = array.column_names();
 
@@ -132,13 +137,26 @@ fn struct_array_to_jsonmap_array(
         .collect::<Vec<JsonMap<String, Value>>>();
 
     for (j, struct_col) in array.columns().iter().enumerate() {
-        set_column_for_json_rows(&mut inner_objs, struct_col, inner_col_names[j])?
+        set_column_for_json_rows(
+            &mut inner_objs,
+            struct_col,
+            inner_col_names[j],
+            explicit_nulls,
+        )?
     }
     Ok(inner_objs)
 }
 
 /// Converts an arrow [`Array`] into a `Vec` of Serde JSON [`serde_json::Value`]'s
 pub fn array_to_json_array(array: &dyn Array) -> Result<Vec<Value>, ArrowError> {
+    // For backwards compatibility, default to skip nulls
+    array_to_json_array_internal(array, false)
+}
+
+fn array_to_json_array_internal(
+    array: &dyn Array,
+    explicit_nulls: bool,
+) -> Result<Vec<Value>, ArrowError> {
     match array.data_type() {
         DataType::Null => Ok(iter::repeat(Value::Null).take(array.len()).collect()),
         DataType::Boolean => Ok(array
@@ -180,32 +198,44 @@ pub fn array_to_json_array(array: &dyn Array) -> Result<Vec<Value>, ArrowError>
         DataType::List(_) => as_list_array(array)
             .iter()
             .map(|maybe_value| match maybe_value {
-                Some(v) => Ok(Value::Array(array_to_json_array(&v)?)),
+                Some(v) => Ok(Value::Array(array_to_json_array_internal(
+                    &v,
+                    explicit_nulls,
+                )?)),
                 None => Ok(Value::Null),
             })
             .collect(),
         DataType::LargeList(_) => as_large_list_array(array)
             .iter()
             .map(|maybe_value| match maybe_value {
-                Some(v) => Ok(Value::Array(array_to_json_array(&v)?)),
+                Some(v) => Ok(Value::Array(array_to_json_array_internal(
+                    &v,
+                    explicit_nulls,
+                )?)),
                 None => Ok(Value::Null),
             })
             .collect(),
         DataType::FixedSizeList(_, _) => as_fixed_size_list_array(array)
             .iter()
             .map(|maybe_value| match maybe_value {
-                Some(v) => Ok(Value::Array(array_to_json_array(&v)?)),
+                Some(v) => Ok(Value::Array(array_to_json_array_internal(
+                    &v,
+                    explicit_nulls,
+                )?)),
                 None => Ok(Value::Null),
             })
             .collect(),
         DataType::Struct(_) => {
-            let jsonmaps = struct_array_to_jsonmap_array(array.as_struct())?;
+            let jsonmaps = struct_array_to_jsonmap_array(array.as_struct(), explicit_nulls)?;
             Ok(jsonmaps.into_iter().map(Value::Object).collect())
         }
         DataType::Map(_, _) => as_map_array(array)
             .iter()
             .map(|maybe_value| match maybe_value {
-                Some(v) => Ok(Value::Array(array_to_json_array(&v)?)),
+                Some(v) => Ok(Value::Array(array_to_json_array_internal(
+                    &v,
+                    explicit_nulls,
+                )?)),
                 None => Ok(Value::Null),
             })
             .collect(),
@@ -216,14 +246,16 @@ pub fn array_to_json_array(array: &dyn Array) -> Result<Vec<Value>, ArrowError>
 }
 
 macro_rules! set_column_by_array_type {
-    ($cast_fn:ident, $col_name:ident, $rows:ident, $array:ident) => {
+    ($cast_fn:ident, $col_name:ident, $rows:ident, $array:ident, $explicit_nulls:ident) => {
         let arr = $cast_fn($array);
         $rows
             .iter_mut()
             .zip(arr.iter())
             .for_each(|(row, maybe_value)| {
-                if let Some(v) = maybe_value {
-                    row.insert($col_name.to_string(), v.into());
+                if let Some(j) = maybe_value.map(Into::into) {
+                    row.insert($col_name.to_string(), j);
+                } else if $explicit_nulls {
+                    row.insert($col_name.to_string(), Value::Null);
                 }
             });
     };
@@ -233,6 +265,7 @@ fn set_column_by_primitive_type<T>(
     rows: &mut [JsonMap<String, Value>],
     array: &ArrayRef,
     col_name: &str,
+    explicit_nulls: bool,
 ) where
     T: ArrowPrimitiveType,
     T::Native: JsonSerializable,
@@ -242,9 +275,10 @@ fn set_column_by_primitive_type<T>(
     rows.iter_mut()
         .zip(primitive_arr.iter())
         .for_each(|(row, maybe_value)| {
-            // when value is null, we simply skip setting the key
             if let Some(j) = maybe_value.and_then(|v| v.into_json_value()) {
                 row.insert(col_name.to_string(), j);
+            } else if explicit_nulls {
+                row.insert(col_name.to_string(), Value::Null);
             }
         });
 }
@@ -253,52 +287,57 @@ fn set_column_for_json_rows(
     rows: &mut [JsonMap<String, Value>],
     array: &ArrayRef,
     col_name: &str,
+    explicit_nulls: bool,
 ) -> Result<(), ArrowError> {
     match array.data_type() {
         DataType::Int8 => {
-            set_column_by_primitive_type::<Int8Type>(rows, array, col_name);
+            set_column_by_primitive_type::<Int8Type>(rows, array, col_name, explicit_nulls);
         }
         DataType::Int16 => {
-            set_column_by_primitive_type::<Int16Type>(rows, array, col_name);
+            set_column_by_primitive_type::<Int16Type>(rows, array, col_name, explicit_nulls);
         }
         DataType::Int32 => {
-            set_column_by_primitive_type::<Int32Type>(rows, array, col_name);
+            set_column_by_primitive_type::<Int32Type>(rows, array, col_name, explicit_nulls);
         }
         DataType::Int64 => {
-            set_column_by_primitive_type::<Int64Type>(rows, array, col_name);
+            set_column_by_primitive_type::<Int64Type>(rows, array, col_name, explicit_nulls);
         }
         DataType::UInt8 => {
-            set_column_by_primitive_type::<UInt8Type>(rows, array, col_name);
+            set_column_by_primitive_type::<UInt8Type>(rows, array, col_name, explicit_nulls);
         }
         DataType::UInt16 => {
-            set_column_by_primitive_type::<UInt16Type>(rows, array, col_name);
+            set_column_by_primitive_type::<UInt16Type>(rows, array, col_name, explicit_nulls);
         }
         DataType::UInt32 => {
-            set_column_by_primitive_type::<UInt32Type>(rows, array, col_name);
+            set_column_by_primitive_type::<UInt32Type>(rows, array, col_name, explicit_nulls);
         }
         DataType::UInt64 => {
-            set_column_by_primitive_type::<UInt64Type>(rows, array, col_name);
+            set_column_by_primitive_type::<UInt64Type>(rows, array, col_name, explicit_nulls);
         }
         DataType::Float16 => {
-            set_column_by_primitive_type::<Float16Type>(rows, array, col_name);
+            set_column_by_primitive_type::<Float16Type>(rows, array, col_name, explicit_nulls);
         }
         DataType::Float32 => {
-            set_column_by_primitive_type::<Float32Type>(rows, array, col_name);
+            set_column_by_primitive_type::<Float32Type>(rows, array, col_name, explicit_nulls);
         }
         DataType::Float64 => {
-            set_column_by_primitive_type::<Float64Type>(rows, array, col_name);
+            set_column_by_primitive_type::<Float64Type>(rows, array, col_name, explicit_nulls);
         }
         DataType::Null => {
-            // when value is null, we simply skip setting the key
+            if explicit_nulls {
+                rows.iter_mut().for_each(|row| {
+                    row.insert(col_name.to_string(), Value::Null);
+                });
+            }
         }
         DataType::Boolean => {
-            set_column_by_array_type!(as_boolean_array, col_name, rows, array);
+            set_column_by_array_type!(as_boolean_array, col_name, rows, array, explicit_nulls);
         }
         DataType::Utf8 => {
-            set_column_by_array_type!(as_string_array, col_name, rows, array);
+            set_column_by_array_type!(as_string_array, col_name, rows, array, explicit_nulls);
         }
         DataType::LargeUtf8 => {
-            set_column_by_array_type!(as_largestring_array, col_name, rows, array);
+            set_column_by_array_type!(as_largestring_array, col_name, rows, array, explicit_nulls);
         }
         DataType::Date32
         | DataType::Date64
@@ -310,16 +349,19 @@ fn set_column_for_json_rows(
             let formatter = ArrayFormatter::try_new(array.as_ref(), &options)?;
             let nulls = array.nulls();
             rows.iter_mut().enumerate().for_each(|(idx, row)| {
-                if nulls.map(|x| x.is_valid(idx)).unwrap_or(true) {
-                    row.insert(
-                        col_name.to_string(),
-                        formatter.value(idx).to_string().into(),
-                    );
-                }
+                let maybe_value = nulls
+                    .map(|x| x.is_valid(idx))
+                    .unwrap_or(true)
+                    .then(|| formatter.value(idx).to_string().into());
+                if let Some(j) = maybe_value {
+                    row.insert(col_name.to_string(), j);
+                } else if explicit_nulls {
+                    row.insert(col_name.to_string(), Value::Null);
+                };
             });
         }
         DataType::Struct(_) => {
-            let inner_objs = struct_array_to_jsonmap_array(array.as_struct())?;
+            let inner_objs = struct_array_to_jsonmap_array(array.as_struct(), explicit_nulls)?;
             rows.iter_mut().zip(inner_objs).for_each(|(row, obj)| {
                 row.insert(col_name.to_string(), Value::Object(obj));
             });
@@ -328,11 +370,13 @@ fn set_column_for_json_rows(
             let listarr = as_list_array(array);
             rows.iter_mut().zip(listarr.iter()).try_for_each(
                 |(row, maybe_value)| -> Result<(), ArrowError> {
-                    if let Some(v) = maybe_value {
-                        row.insert(
-                            col_name.to_string(),
-                            Value::Array(array_to_json_array(&v)?),
-                        );
+                    let maybe_value = maybe_value
+                        .map(|v| array_to_json_array_internal(&v, explicit_nulls).map(Value::Array))
+                        .transpose()?;
+                    if let Some(j) = maybe_value {
+                        row.insert(col_name.to_string(), j);
+                    } else if explicit_nulls {
+                        row.insert(col_name.to_string(), Value::Null);
                     }
                     Ok(())
                 },
@@ -342,9 +386,13 @@ fn set_column_for_json_rows(
             let listarr = as_large_list_array(array);
             rows.iter_mut().zip(listarr.iter()).try_for_each(
                 |(row, maybe_value)| -> Result<(), ArrowError> {
-                    if let Some(v) = maybe_value {
-                        let val = array_to_json_array(&v)?;
-                        row.insert(col_name.to_string(), Value::Array(val));
+                    let maybe_value = maybe_value
+                        .map(|v| array_to_json_array_internal(&v, explicit_nulls).map(Value::Array))
+                        .transpose()?;
+                    if let Some(j) = maybe_value {
+                        row.insert(col_name.to_string(), j);
+                    } else if explicit_nulls {
+                        row.insert(col_name.to_string(), Value::Null);
                     }
                     Ok(())
                 },
@@ -353,7 +401,7 @@ fn set_column_for_json_rows(
         DataType::Dictionary(_, value_type) => {
             let hydrated = arrow_cast::cast::cast(&array, value_type)
                 .expect("cannot cast dictionary to underlying values");
-            set_column_for_json_rows(rows, &hydrated, col_name)?;
+            set_column_for_json_rows(rows, &hydrated, col_name, explicit_nulls)?;
         }
         DataType::Map(_, _) => {
             let maparr = as_map_array(array);
@@ -370,7 +418,7 @@ fn set_column_for_json_rows(
             }
 
             let keys = keys.as_string::<i32>();
-            let values = array_to_json_array(values)?;
+            let values = array_to_json_array_internal(values, explicit_nulls)?;
 
             let mut kv = keys.iter().zip(values);
 
@@ -384,10 +432,7 @@ fn set_column_for_json_rows(
                 let mut obj = serde_json::Map::new();
 
                 for (_, (k, v)) in (0..len).zip(&mut kv) {
-                    obj.insert(
-                        k.expect("keys in a map should be non-null").to_string(),
-                        v,
-                    );
+                    obj.insert(k.expect("keys in a map should be non-null").to_string(), v);
                 }
 
                 row.insert(col_name.to_string(), serde_json::Value::Object(obj));
@@ -407,6 +452,14 @@ fn set_column_for_json_rows(
 /// [`JsonMap`]s (objects)
 pub fn record_batches_to_json_rows(
     batches: &[&RecordBatch],
+) -> Result<Vec<JsonMap<String, Value>>, ArrowError> {
+    // For backwards compatibility, default to skip nulls
+    record_batches_to_json_rows_internal(batches, false)
+}
+
+fn record_batches_to_json_rows_internal(
+    batches: &[&RecordBatch],
+    explicit_nulls: bool,
 ) -> Result<Vec<JsonMap<String, Value>>, ArrowError> {
     let mut rows: Vec<JsonMap<String, Value>> = iter::repeat(JsonMap::new())
         .take(batches.iter().map(|b| b.num_rows()).sum())
@@ -420,7 +473,7 @@ pub fn record_batches_to_json_rows(
             let row_slice = &mut rows[base..base + batch.num_rows()];
             for (j, col) in batch.columns().iter().enumerate() {
                 let col_name = schema.field(j).name();
-                set_column_for_json_rows(row_slice, col, col_name)?
+                set_column_for_json_rows(row_slice, col, col_name, explicit_nulls)?
             }
             base += row_count;
         }
@@ -440,11 +493,7 @@ pub trait JsonFormat: Debug + Default {
 
     #[inline]
     /// write any bytes needed for the start of each row
-    fn start_row<W: Write>(
-        &self,
-        _writer: &mut W,
-        _is_first_row: bool,
-    ) -> Result<(), ArrowError> {
+    fn start_row<W: Write>(&self, _writer: &mut W, _is_first_row: bool) -> Result<(), ArrowError> {
         Ok(())
     }
 
@@ -460,7 +509,9 @@ pub trait JsonFormat: Debug + Default {
     }
 }
 
-/// Produces JSON output with one record per line. For example
+/// Produces JSON output with one record per line.
+///
+/// For example:
 ///
 /// ```json
 /// {"foo":1}
@@ -477,7 +528,9 @@ impl JsonFormat for LineDelimited {
     }
 }
 
-/// Produces JSON output as a single JSON array. For example
+/// Produces JSON output as a single JSON array.
+///
+/// For example:
 ///
 /// ```json
 /// [{"foo":1},{"bar":1}]
@@ -491,11 +544,7 @@ impl JsonFormat for JsonArray {
         Ok(())
     }
 
-    fn start_row<W: Write>(
-        &self,
-        writer: &mut W,
-        is_first_row: bool,
-    ) -> Result<(), ArrowError> {
+    fn start_row<W: Write>(&self, writer: &mut W, is_first_row: bool) -> Result<(), ArrowError> {
         if !is_first_row {
             writer.write_all(b",")?;
         }
@@ -508,16 +557,101 @@ impl JsonFormat for JsonArray {
     }
 }
 
-/// A JSON writer which serializes [`RecordBatch`]es to newline delimited JSON objects
+/// A JSON writer which serializes [`RecordBatch`]es to newline delimited JSON objects.
 pub type LineDelimitedWriter<W> = Writer<W, LineDelimited>;
 
-/// A JSON writer which serializes [`RecordBatch`]es to JSON arrays
+/// A JSON writer which serializes [`RecordBatch`]es to JSON arrays.
 pub type ArrayWriter<W> = Writer<W, JsonArray>;
 
+/// JSON writer builder.
+#[derive(Debug, Clone, Default)]
+pub struct WriterBuilder {
+    /// Controls whether null values should be written explicitly for keys
+    /// in objects, or whether the key should be omitted entirely.
+    explicit_nulls: bool,
+}
+
+impl WriterBuilder {
+    /// Create a new builder for configuring JSON writing options.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// # use arrow_json::{Writer, WriterBuilder};
+    /// # use arrow_json::writer::LineDelimited;
+    /// # use std::fs::File;
+    ///
+    /// fn example() -> Writer<File, LineDelimited> {
+    ///     let file = File::create("target/out.json").unwrap();
+    ///
+    ///     // create a builder that keeps keys with null values
+    ///     let builder = WriterBuilder::new().with_explicit_nulls(true);
+    ///     let writer = builder.build::<_, LineDelimited>(file);
+    ///
+    ///     writer
+    /// }
+    /// ```
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Returns `true` if this writer is configured to keep keys with null values.
+    pub fn explicit_nulls(&self) -> bool {
+        self.explicit_nulls
+    }
+
+    /// Set whether to keep keys with null values, or to omit writing them.
+    ///
+    /// For example, with [`LineDelimited`] format:
+    ///
+    /// Skip nulls (set to `false`):
+    ///
+    /// ```json
+    /// {"foo":1}
+    /// {"foo":1,"bar":2}
+    /// {}
+    /// ```
+    ///
+    /// Keep nulls (set to `true`):
+    ///
+    /// ```json
+    /// {"foo":1,"bar":null}
+    /// {"foo":1,"bar":2}
+    /// {"foo":null,"bar":null}
+    /// ```
+    ///
+    /// Default is to skip nulls (set to `false`).
+    pub fn with_explicit_nulls(mut self, explicit_nulls: bool) -> Self {
+        self.explicit_nulls = explicit_nulls;
+        self
+    }
+
+    /// Create a new `Writer` with specified `JsonFormat` and builder options.
+    pub fn build<W, F>(self, writer: W) -> Writer<W, F>
+    where
+        W: Write,
+        F: JsonFormat,
+    {
+        Writer {
+            writer,
+            started: false,
+            finished: false,
+            format: F::default(),
+            explicit_nulls: self.explicit_nulls,
+        }
+    }
+}
+
 /// A JSON writer which serializes [`RecordBatch`]es to a stream of
-/// `u8` encoded JSON objects. See the module level documentation for
-/// detailed usage and examples. The specific format of the stream is
-/// controlled by the [`JsonFormat`] type parameter.
+/// `u8` encoded JSON objects.
+///
+/// See the module level documentation for detailed usage and examples.
+/// The specific format of the stream is controlled by the [`JsonFormat`]
+/// type parameter.
+///
+/// By default the writer will skip writing keys with null values for
+/// backward compatibility. See [`WriterBuilder`] on how to customize
+/// this behaviour when creating a new writer.
 #[derive(Debug)]
 pub struct Writer<W, F>
 where
@@ -535,6 +669,9 @@ where
 
     /// Determines how the byte stream is formatted
     format: F,
+
+    /// Whether keys with null values should be written or skipped
+    explicit_nulls: bool,
 }
 
 impl<W, F> Writer<W, F>
@@ -549,6 +686,7 @@ where
             started: false,
             finished: false,
             format: F::default(),
+            explicit_nulls: false,
         }
     }
 
@@ -562,8 +700,7 @@ where
 
         self.format.start_row(&mut self.writer, is_first_row)?;
         self.writer.write_all(
-            &serde_json::to_vec(row)
-                .map_err(|error| ArrowError::JsonError(error.to_string()))?,
+            &serde_json::to_vec(row).map_err(|error| ArrowError::JsonError(error.to_string()))?,
         )?;
         self.format.end_row(&mut self.writer)?;
         Ok(())
@@ -571,7 +708,7 @@ where
 
     /// Convert the `RecordBatch` into JSON rows, and write them to the output
     pub fn write(&mut self, batch: &RecordBatch) -> Result<(), ArrowError> {
-        for row in record_batches_to_json_rows(&[batch])? {
+        for row in record_batches_to_json_rows_internal(&[batch], self.explicit_nulls)? {
             self.write_row(&Value::Object(row))?;
         }
         Ok(())
@@ -579,7 +716,7 @@ where
 
     /// Convert the [`RecordBatch`] into JSON rows, and write them to the output
     pub fn write_batches(&mut self, batches: &[&RecordBatch]) -> Result<(), ArrowError> {
-        for row in record_batches_to_json_rows(batches)? {
+        for row in record_batches_to_json_rows_internal(batches, self.explicit_nulls)? {
             self.write_row(&Value::Object(row))?;
         }
         Ok(())
@@ -624,7 +761,7 @@ mod tests {
 
     use serde_json::json;
 
-    use arrow_array::builder::{Int32Builder, MapBuilder, StringBuilder};
+    use arrow_array::builder::{Int32Builder, Int64Builder, MapBuilder, StringBuilder};
     use arrow_buffer::{Buffer, ToByteSlice};
     use arrow_data::ArrayData;
 
@@ -657,9 +794,7 @@ mod tests {
         let a = Int32Array::from(vec![Some(1), Some(2), Some(3), None, Some(5)]);
         let b = StringArray::from(vec![Some("a"), Some("b"), Some("c"), Some("d"), None]);
 
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
-                .unwrap();
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap();
 
         let mut buf = Vec::new();
         {
@@ -688,9 +823,7 @@ mod tests {
         let a = StringArray::from(vec![Some("a"), None, Some("c"), Some("d"), None]);
         let b = LargeStringArray::from(vec![Some("a"), Some("b"), None, Some("d"), None]);
 
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
-                .unwrap();
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap();
 
         let mut buf = Vec::new();
         {
@@ -730,9 +863,7 @@ mod tests {
                 .into_iter()
                 .collect();
 
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
-                .unwrap();
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap();
 
         let mut buf = Vec::new();
         {
@@ -757,7 +888,8 @@ mod tests {
         let ts_nanos = ts_string
             .parse::<chrono::NaiveDateTime>()
             .unwrap()
-            .timestamp_nanos();
+            .timestamp_nanos_opt()
+            .unwrap();
         let ts_micros = ts_nanos / 1000;
         let ts_millis = ts_micros / 1000;
         let ts_secs = ts_millis / 1000;
@@ -809,7 +941,8 @@ mod tests {
         let ts_nanos = ts_string
             .parse::<chrono::NaiveDateTime>()
             .unwrap()
-            .timestamp_nanos();
+            .timestamp_nanos_opt()
+            .unwrap();
         let ts_micros = ts_nanos / 1000;
         let ts_millis = ts_micros / 1000;
         let ts_secs = ts_millis / 1000;
@@ -1003,9 +1136,7 @@ mod tests {
                     Field::new("c11", DataType::Int32, true),
                     Field::new(
                         "c12",
-                        DataType::Struct(
-                            vec![Field::new("c121", DataType::Utf8, false)].into(),
-                        ),
+                        DataType::Struct(vec![Field::new("c121", DataType::Utf8, false)].into()),
                         false,
                     ),
                 ])),
@@ -1022,23 +1153,19 @@ mod tests {
             (
                 Arc::new(Field::new(
                     "c12",
-                    DataType::Struct(
-                        vec![Field::new("c121", DataType::Utf8, false)].into(),
-                    ),
+                    DataType::Struct(vec![Field::new("c121", DataType::Utf8, false)].into()),
                     false,
                 )),
                 Arc::new(StructArray::from(vec![(
                     Arc::new(Field::new("c121", DataType::Utf8, false)),
-                    Arc::new(StringArray::from(vec![Some("e"), Some("f"), Some("g")]))
-                        as ArrayRef,
+                    Arc::new(StringArray::from(vec![Some("e"), Some("f"), Some("g")])) as ArrayRef,
                 )])) as ArrayRef,
             ),
         ]);
         let c2 = StringArray::from(vec![Some("a"), Some("b"), Some("c")]);
 
         let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)])
-                .unwrap();
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap();
 
         let mut buf = Vec::new();
         {
@@ -1079,9 +1206,7 @@ mod tests {
 
         let b = Int32Array::from(vec![1, 2, 3, 4, 5]);
 
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
-                .unwrap();
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap();
 
         let mut buf = Vec::new();
         {
@@ -1140,8 +1265,7 @@ mod tests {
         let c2 = StringArray::from(vec![Some("foo"), Some("bar"), None]);
 
         let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)])
-                .unwrap();
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap();
 
         let mut buf = Vec::new();
         {
@@ -1168,9 +1292,7 @@ mod tests {
                     Field::new("c11", DataType::Int32, true),
                     Field::new(
                         "c12",
-                        DataType::Struct(
-                            vec![Field::new("c121", DataType::Utf8, false)].into(),
-                        ),
+                        DataType::Struct(vec![Field::new("c121", DataType::Utf8, false)].into()),
                         false,
                     ),
                 ])),
@@ -1189,15 +1311,12 @@ mod tests {
             (
                 Arc::new(Field::new(
                     "c12",
-                    DataType::Struct(
-                        vec![Field::new("c121", DataType::Utf8, false)].into(),
-                    ),
+                    DataType::Struct(vec![Field::new("c121", DataType::Utf8, false)].into()),
                     false,
                 )),
                 Arc::new(StructArray::from(vec![(
                     Arc::new(Field::new("c121", DataType::Utf8, false)),
-                    Arc::new(StringArray::from(vec![Some("e"), Some("f"), Some("g")]))
-                        as ArrayRef,
+                    Arc::new(StringArray::from(vec![Some("e"), Some("f"), Some("g")])) as ArrayRef,
                 )])) as ArrayRef,
             ),
         ]);
@@ -1219,8 +1338,7 @@ mod tests {
         let c2 = Int32Array::from(vec![1, 2, 3]);
 
         let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)])
-                .unwrap();
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap();
 
         let mut buf = Vec::new();
         {
@@ -1237,10 +1355,10 @@ mod tests {
         );
     }
 
-    fn test_write_for_file(test_file: &str) {
+    fn test_write_for_file(test_file: &str, remove_nulls: bool) {
         let file = File::open(test_file).unwrap();
         let mut reader = BufReader::new(file);
-        let schema = infer_json_schema(&mut reader, None).unwrap();
+        let (schema, _) = infer_json_schema(&mut reader, None).unwrap();
         reader.rewind().unwrap();
 
         let builder = ReaderBuilder::new(Arc::new(schema)).with_batch_size(1024);
@@ -1249,19 +1367,27 @@ mod tests {
 
         let mut buf = Vec::new();
         {
-            let mut writer = LineDelimitedWriter::new(&mut buf);
-            writer.write_batches(&[&batch]).unwrap();
+            if remove_nulls {
+                let mut writer = LineDelimitedWriter::new(&mut buf);
+                writer.write_batches(&[&batch]).unwrap();
+            } else {
+                let mut writer = WriterBuilder::new()
+                    .with_explicit_nulls(true)
+                    .build::<_, LineDelimited>(&mut buf);
+                writer.write_batches(&[&batch]).unwrap();
+            }
         }
 
         let result = String::from_utf8(buf).unwrap();
         let expected = read_to_string(test_file).unwrap();
         for (r, e) in result.lines().zip(expected.lines()) {
             let mut expected_json = serde_json::from_str::<Value>(e).unwrap();
-            // remove null value from object to make comparison consistent:
-            if let Value::Object(obj) = expected_json {
-                expected_json = Value::Object(
-                    obj.into_iter().filter(|(_, v)| *v != Value::Null).collect(),
-                );
+            if remove_nulls {
+                // remove null value from object to make comparison consistent:
+                if let Value::Object(obj) = expected_json {
+                    expected_json =
+                        Value::Object(obj.into_iter().filter(|(_, v)| *v != Value::Null).collect());
+                }
             }
             assert_eq!(serde_json::from_str::<Value>(r).unwrap(), expected_json,);
         }
@@ -1269,17 +1395,22 @@ mod tests {
 
     #[test]
     fn write_basic_rows() {
-        test_write_for_file("test/data/basic.json");
+        test_write_for_file("test/data/basic.json", true);
     }
 
     #[test]
     fn write_arrays() {
-        test_write_for_file("test/data/arrays.json");
+        test_write_for_file("test/data/arrays.json", true);
     }
 
     #[test]
     fn write_basic_nulls() {
-        test_write_for_file("test/data/basic_nulls.json");
+        test_write_for_file("test/data/basic_nulls.json", true);
+    }
+
+    #[test]
+    fn write_nested_with_nulls() {
+        test_write_for_file("test/data/nested_with_nulls.json", false);
     }
 
     #[test]
@@ -1326,8 +1457,7 @@ mod tests {
         {"list": [{"ints": null}]}
         {"list": [null]}
         "#;
-        let ints_struct =
-            DataType::Struct(vec![Field::new("ints", DataType::Int32, true)].into());
+        let ints_struct = DataType::Struct(vec![Field::new("ints", DataType::Int32, true)].into());
         let list_type = DataType::List(Arc::new(Field::new("item", ints_struct, true)));
         let list_field = Field::new("list", list_type, true);
         let schema = Arc::new(Schema::new(vec![list_field]));
@@ -1336,11 +1466,7 @@ mod tests {
 
         let batch = reader.next().unwrap().unwrap();
 
-        let list_row = batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<ListArray>()
-            .unwrap();
+        let list_row = batch.column(0).as_list::<i32>();
         let values = list_row.values();
         assert_eq!(values.len(), 4);
         assert_eq!(values.null_count(), 1);
@@ -1370,8 +1496,7 @@ mod tests {
 
     #[test]
     fn json_writer_map() {
-        let keys_array =
-            super::StringArray::from(vec!["foo", "bar", "baz", "qux", "quux"]);
+        let keys_array = super::StringArray::from(vec!["foo", "bar", "baz", "qux", "quux"]);
         let values_array = super::Int64Array::from(vec![10, 20, 30, 40, 50]);
 
         let keys = Arc::new(Field::new("keys", DataType::Utf8, false));
@@ -1385,7 +1510,7 @@ mod tests {
             Arc::new(Field::new(
                 "entries",
                 entry_struct.data_type().clone(),
-                true,
+                false,
             )),
             false,
         );
@@ -1432,7 +1557,7 @@ mod tests {
         let test_file = "test/data/basic.json";
         let file = File::open(test_file).unwrap();
         let mut reader = BufReader::new(file);
-        let schema = infer_json_schema(&mut reader, None).unwrap();
+        let (schema, _) = infer_json_schema(&mut reader, None).unwrap();
         reader.rewind().unwrap();
 
         let builder = ReaderBuilder::new(Arc::new(schema)).with_batch_size(1024);
@@ -1451,9 +1576,8 @@ mod tests {
             let mut expected_json = serde_json::from_str::<Value>(e).unwrap();
             // remove null value from object to make comparison consistent:
             if let Value::Object(obj) = expected_json {
-                expected_json = Value::Object(
-                    obj.into_iter().filter(|(_, v)| *v != Value::Null).collect(),
-                );
+                expected_json =
+                    Value::Object(obj.into_iter().filter(|(_, v)| *v != Value::Null).collect());
             }
             assert_eq!(serde_json::from_str::<Value>(r).unwrap(), expected_json,);
         }
@@ -1496,9 +1620,8 @@ mod tests {
             let mut expected_json = serde_json::from_str::<Value>(e).unwrap();
             // remove null value from object to make comparison consistent:
             if let Value::Object(obj) = expected_json {
-                expected_json = Value::Object(
-                    obj.into_iter().filter(|(_, v)| *v != Value::Null).collect(),
-                );
+                expected_json =
+                    Value::Object(obj.into_iter().filter(|(_, v)| *v != Value::Null).collect());
             }
             assert_eq!(serde_json::from_str::<Value>(r).unwrap(), expected_json,);
         }
@@ -1520,8 +1643,7 @@ mod tests {
             Some(vec![Some(6), Some(7), Some(45)]),
         ];
 
-        let list_array =
-            FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(data, 3);
+        let list_array = FixedSizeListArray::from_iter_primitive::<Int32Type, _, _>(data, 3);
         let list_array = Arc::new(list_array) as ArrayRef;
 
         assert_eq!(array_to_json_array(&list_array).unwrap(), expected_json);
@@ -1574,4 +1696,233 @@ mod tests {
 
         assert_eq!(array_to_json_array(&map_array).unwrap(), expected_json);
     }
+
+    #[test]
+    fn test_writer_explicit_nulls() -> Result<(), ArrowError> {
+        fn nested_list() -> (Arc<ListArray>, Arc<Field>) {
+            let array = Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+                Some(vec![None, None, None]),
+                Some(vec![Some(1), Some(2), Some(3)]),
+                None,
+                Some(vec![None, None, None]),
+            ]));
+            let field = Arc::new(Field::new("list", array.data_type().clone(), true));
+            // [{"list":[null,null,null]},{"list":[1,2,3]},{"list":null},{"list":[null,null,null]}]
+            (array, field)
+        }
+
+        fn nested_dict() -> (Arc<DictionaryArray<Int32Type>>, Arc<Field>) {
+            let array = Arc::new(DictionaryArray::from_iter(vec![
+                Some("cupcakes"),
+                None,
+                Some("bear"),
+                Some("kuma"),
+            ]));
+            let field = Arc::new(Field::new("dict", array.data_type().clone(), true));
+            // [{"dict":"cupcakes"},{"dict":null},{"dict":"bear"},{"dict":"kuma"}]
+            (array, field)
+        }
+
+        fn nested_map() -> (Arc<MapArray>, Arc<Field>) {
+            let string_builder = StringBuilder::new();
+            let int_builder = Int64Builder::new();
+            let mut builder = MapBuilder::new(None, string_builder, int_builder);
+
+            // [{"foo": 10}, null, {}, {"bar": 20, "baz": 30, "qux": 40}]
+            builder.keys().append_value("foo");
+            builder.values().append_value(10);
+            builder.append(true).unwrap();
+
+            builder.append(false).unwrap();
+
+            builder.append(true).unwrap();
+
+            builder.keys().append_value("bar");
+            builder.values().append_value(20);
+            builder.keys().append_value("baz");
+            builder.values().append_value(30);
+            builder.keys().append_value("qux");
+            builder.values().append_value(40);
+            builder.append(true).unwrap();
+
+            let array = Arc::new(builder.finish());
+            let field = Arc::new(Field::new("map", array.data_type().clone(), true));
+            (array, field)
+        }
+
+        fn root_list() -> (Arc<ListArray>, Field) {
+            let struct_array = StructArray::from(vec![
+                (
+                    Arc::new(Field::new("utf8", DataType::Utf8, true)),
+                    Arc::new(StringArray::from(vec![Some("a"), Some("b"), None, None])) as ArrayRef,
+                ),
+                (
+                    Arc::new(Field::new("int32", DataType::Int32, true)),
+                    Arc::new(Int32Array::from(vec![Some(1), None, Some(5), None])) as ArrayRef,
+                ),
+            ]);
+
+            let field = Field::new_list(
+                "list",
+                Field::new("struct", struct_array.data_type().clone(), true),
+                true,
+            );
+
+            // [{"list":[{"int32":1,"utf8":"a"},{"int32":null,"utf8":"b"}]},{"list":null},{"list":[{int32":5,"utf8":null}]},{"list":null}]
+            let entry_offsets = Buffer::from(&[0, 2, 2, 3, 3].to_byte_slice());
+            let data = ArrayData::builder(field.data_type().clone())
+                .len(4)
+                .add_buffer(entry_offsets)
+                .add_child_data(struct_array.into_data())
+                .null_bit_buffer(Some([0b00000101].into()))
+                .build()
+                .unwrap();
+            let array = Arc::new(ListArray::from(data));
+            (array, field)
+        }
+
+        let (nested_list_array, nested_list_field) = nested_list();
+        let (nested_dict_array, nested_dict_field) = nested_dict();
+        let (nested_map_array, nested_map_field) = nested_map();
+        let (root_list_array, root_list_field) = root_list();
+
+        let schema = Schema::new(vec![
+            Field::new("date", DataType::Date32, true),
+            Field::new("null", DataType::Null, true),
+            Field::new_struct(
+                "struct",
+                vec![
+                    Arc::new(Field::new("utf8", DataType::Utf8, true)),
+                    nested_list_field.clone(),
+                    nested_dict_field.clone(),
+                    nested_map_field.clone(),
+                ],
+                true,
+            ),
+            root_list_field,
+        ]);
+
+        let arr_date32 = Date32Array::from(vec![Some(0), None, Some(1), None]);
+        let arr_null = NullArray::new(4);
+        let arr_struct = StructArray::from(vec![
+            // [{"utf8":"a"},{"utf8":null},{"utf8":null},{"utf8":"b"}]
+            (
+                Arc::new(Field::new("utf8", DataType::Utf8, true)),
+                Arc::new(StringArray::from(vec![Some("a"), None, None, Some("b")])) as ArrayRef,
+            ),
+            // [{"list":[null,null,null]},{"list":[1,2,3]},{"list":null},{"list":[null,null,null]}]
+            (nested_list_field, nested_list_array as ArrayRef),
+            // [{"dict":"cupcakes"},{"dict":null},{"dict":"bear"},{"dict":"kuma"}]
+            (nested_dict_field, nested_dict_array as ArrayRef),
+            // [{"foo": 10}, null, {}, {"bar": 20, "baz": 30, "qux": 40}]
+            (nested_map_field, nested_map_array as ArrayRef),
+        ]);
+
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![
+                // [{"date":"1970-01-01"},{"date":null},{"date":"1970-01-02"},{"date":null}]
+                Arc::new(arr_date32),
+                // [{"null":null},{"null":null},{"null":null},{"null":null}]
+                Arc::new(arr_null),
+                Arc::new(arr_struct),
+                // [{"list":[{"int32":1,"utf8":"a"},{"int32":null,"utf8":"b"}]},{"list":null},{"list":[{int32":5,"utf8":null}]},{"list":null}]
+                root_list_array,
+            ],
+        )?;
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = WriterBuilder::new()
+                .with_explicit_nulls(true)
+                .build::<_, JsonArray>(&mut buf);
+            writer.write_batches(&[&batch])?;
+            writer.finish()?;
+        }
+
+        let actual = serde_json::from_slice::<Vec<Value>>(&buf).unwrap();
+        let expected = serde_json::from_value::<Vec<Value>>(json!([
+          {
+            "date": "1970-01-01",
+            "list": [
+              {
+                "int32": 1,
+                "utf8": "a"
+              },
+              {
+                "int32": null,
+                "utf8": "b"
+              }
+            ],
+            "null": null,
+            "struct": {
+              "dict": "cupcakes",
+              "list": [
+                null,
+                null,
+                null
+              ],
+              "map": {
+                "foo": 10
+              },
+              "utf8": "a"
+            }
+          },
+          {
+            "date": null,
+            "list": null,
+            "null": null,
+            "struct": {
+              "dict": null,
+              "list": [
+                1,
+                2,
+                3
+              ],
+              "map": null,
+              "utf8": null
+            }
+          },
+          {
+            "date": "1970-01-02",
+            "list": [
+              {
+                "int32": 5,
+                "utf8": null
+              }
+            ],
+            "null": null,
+            "struct": {
+              "dict": "bear",
+              "list": null,
+              "map": {},
+              "utf8": null
+            }
+          },
+          {
+            "date": null,
+            "list": null,
+            "null": null,
+            "struct": {
+              "dict": "kuma",
+              "list": [
+                null,
+                null,
+                null
+              ],
+              "map": {
+                "bar": 20,
+                "baz": 30,
+                "qux": 40
+              },
+              "utf8": "b"
+            }
+          }
+        ]))
+        .unwrap();
+
+        assert_eq!(actual, expected);
+
+        Ok(())
+    }
 }
diff --git a/arrow-json/test/data/nested_with_nulls.json b/arrow-json/test/data/nested_with_nulls.json
new file mode 100644
index 000000000000..932565d56063
--- /dev/null
+++ b/arrow-json/test/data/nested_with_nulls.json
@@ -0,0 +1,4 @@
+{"a": null, "b": null, "c":  null, "d": {"d1":         null, "d2": [null, 1, 2, null]}}
+{"a": null, "b": -3.5, "c":  true, "d": {"d1":         null, "d2": null}}
+{"a": null, "b": null, "c": false, "d": {"d1": "1970-01-01", "d2": null}}
+{"a":    1, "b":  2.0, "c": false, "d": {"d1":         null, "d2": null}}
diff --git a/arrow-ord/src/cmp.rs b/arrow-ord/src/cmp.rs
index 19a16fe63a96..94a488c5aec2 100644
--- a/arrow-ord/src/cmp.rs
+++ b/arrow-ord/src/cmp.rs
@@ -24,12 +24,10 @@
 //!
 
 use arrow_array::cast::AsArray;
-use arrow_array::types::{
-    ByteArrayType, Int64Type, IntervalDayTimeType, IntervalMonthDayNanoType,
-};
+use arrow_array::types::{ByteArrayType, Int64Type, IntervalDayTimeType, IntervalMonthDayNanoType};
 use arrow_array::{
     downcast_primitive_array, AnyDictionaryArray, Array, ArrowNativeTypeOp, BooleanArray,
-    Datum, FixedSizeBinaryArray, GenericByteArray, PrimitiveArray,
+    BooleanArray, Datum, FixedSizeBinaryArray, GenericByteArray, PrimitiveArray,
 };
 use arrow_buffer::bit_util::ceil;
 use arrow_buffer::{BooleanBuffer, MutableBuffer, NullBuffer};
@@ -168,20 +166,13 @@ pub fn distinct(lhs: &dyn Datum, rhs: &dyn Datum) -> Result<BooleanArray, ArrowE
 /// to treat them as equal, please normalize zeros before calling this kernel.
 ///
 /// Please refer to [`f32::total_cmp`] and [`f64::total_cmp`]
-pub fn not_distinct(
-    lhs: &dyn Datum,
-    rhs: &dyn Datum,
-) -> Result<BooleanArray, ArrowError> {
+pub fn not_distinct(lhs: &dyn Datum, rhs: &dyn Datum) -> Result<BooleanArray, ArrowError> {
     compare_op(Op::NotDistinct, lhs, rhs)
 }
 
 /// Perform `op` on the provided `Datum`
 #[inline(never)]
-fn compare_op(
-    op: Op,
-    lhs: &dyn Datum,
-    rhs: &dyn Datum,
-) -> Result<BooleanArray, ArrowError> {
+fn compare_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<BooleanArray, ArrowError> {
     use arrow_schema::DataType::*;
     let (l, l_s) = lhs.get();
     let (r, r_s) = rhs.get();
@@ -332,12 +323,8 @@ fn apply<T: ArrayOrd>(
         assert_eq!(l_v.len(), r_v.len()); // Sanity check
 
         Some(match op {
-            Op::Equal | Op::NotDistinct => {
-                apply_op_vectored(l, &l_v, r, &r_v, false, T::is_eq)
-            }
-            Op::NotEqual | Op::Distinct => {
-                apply_op_vectored(l, &l_v, r, &r_v, true, T::is_eq)
-            }
+            Op::Equal | Op::NotDistinct => apply_op_vectored(l, &l_v, r, &r_v, false, T::is_eq),
+            Op::NotEqual | Op::Distinct => apply_op_vectored(l, &l_v, r, &r_v, true, T::is_eq),
             Op::Less => apply_op_vectored(l, &l_v, r, &r_v, false, T::is_lt),
             Op::LessEqual => apply_op_vectored(r, &r_v, l, &l_v, true, T::is_lt),
             Op::Greater => apply_op_vectored(r, &r_v, l, &l_v, false, T::is_lt),
@@ -665,16 +652,13 @@ fn mdn_in_nanos_min(mdn: i128) -> i64 {
 mod tests {
     use std::sync::Arc;
 
-    use arrow_array::{DictionaryArray, Int32Array, Scalar};
+    use arrow_array::{DictionaryArray, Int32Array, Scalar, StringArray};
 
     use super::*;
 
     #[test]
     fn test_null_dict() {
-        let a = DictionaryArray::new(
-            Int32Array::new_null(10),
-            Arc::new(Int32Array::new_null(0)),
-        );
+        let a = DictionaryArray::new(Int32Array::new_null(10), Arc::new(Int32Array::new_null(0)));
         let r = eq(&a, &a).unwrap();
         assert_eq!(r.null_count(), 10);
 
@@ -685,17 +669,13 @@ mod tests {
         let r = eq(&a, &a).unwrap();
         assert_eq!(r.null_count(), 6);
 
-        let scalar = DictionaryArray::new(
-            Int32Array::new_null(1),
-            Arc::new(Int32Array::new_null(0)),
-        );
+        let scalar =
+            DictionaryArray::new(Int32Array::new_null(1), Arc::new(Int32Array::new_null(0)));
         let r = eq(&a, &Scalar::new(&scalar)).unwrap();
         assert_eq!(r.null_count(), 6);
 
-        let scalar = DictionaryArray::new(
-            Int32Array::new_null(1),
-            Arc::new(Int32Array::new_null(0)),
-        );
+        let scalar =
+            DictionaryArray::new(Int32Array::new_null(1), Arc::new(Int32Array::new_null(0)));
         let r = eq(&Scalar::new(&scalar), &Scalar::new(&scalar)).unwrap();
         assert_eq!(r.null_count(), 1);
 
@@ -818,4 +798,16 @@ mod tests {
         let r = eq(&b, &a).unwrap();
         assert_eq!(r.len(), 0);
     }
+
+    #[test]
+    fn test_dictionary_nulls() {
+        let values = StringArray::from(vec![Some("us-west"), Some("us-east")]);
+        let nulls = NullBuffer::from(vec![false, true, true]);
+
+        let key_values = vec![100i32, 1i32, 0i32].into();
+        let keys = Int32Array::new(key_values, Some(nulls));
+        let col = DictionaryArray::try_new(keys, Arc::new(values)).unwrap();
+
+        neq(&col.slice(0, col.len() - 1), &col.slice(1, col.len() - 1)).unwrap();
+    }
 }
diff --git a/arrow-ord/src/comparison.rs b/arrow-ord/src/comparison.rs
index 35a75444de9b..7268156dea95 100644
--- a/arrow-ord/src/comparison.rs
+++ b/arrow-ord/src/comparison.rs
@@ -42,14 +42,8 @@ macro_rules! try_to_type {
 }
 
 // Avoids creating a closure for each combination of `$RIGHT` and `$TY`
-fn try_to_type_result<T>(
-    value: Option<T>,
-    right: &str,
-    ty: &str,
-) -> Result<T, ArrowError> {
-    value.ok_or_else(|| {
-        ArrowError::ComputeError(format!("Could not convert {right} with {ty}",))
-    })
+fn try_to_type_result<T>(value: Option<T>, right: &str, ty: &str) -> Result<T, ArrowError> {
+    value.ok_or_else(|| ArrowError::ComputeError(format!("Could not convert {right} with {ty}",)))
 }
 
 fn make_primitive_scalar<T: num::ToPrimitive + std::fmt::Debug>(
@@ -105,15 +99,16 @@ fn make_primitive_scalar<T: num::ToPrimitive + std::fmt::Debug>(
         }
         DataType::Decimal128(_, _) => {
             let right = try_to_type!(scalar, to_i128)?;
-            Ok(Arc::new(PrimitiveArray::<Decimal128Type>::from(vec![
-                right,
-            ])))
+            Ok(Arc::new(
+                PrimitiveArray::<Decimal128Type>::from(vec![right]).with_data_type(d.clone()),
+            ))
         }
         DataType::Decimal256(_, _) => {
             let right = try_to_type!(scalar, to_i128)?;
-            Ok(Arc::new(PrimitiveArray::<Decimal256Type>::from(vec![
-                i256::from_i128(right),
-            ])))
+            Ok(Arc::new(
+                PrimitiveArray::<Decimal256Type>::from(vec![i256::from_i128(right)])
+                    .with_data_type(d.clone()),
+            ))
         }
         DataType::Date32 => {
             let right = try_to_type!(scalar, to_i32)?;
@@ -125,27 +120,30 @@ fn make_primitive_scalar<T: num::ToPrimitive + std::fmt::Debug>(
         }
         DataType::Timestamp(TimeUnit::Nanosecond, _) => {
             let right = try_to_type!(scalar, to_i64)?;
-            Ok(Arc::new(PrimitiveArray::<TimestampNanosecondType>::from(
-                vec![right],
-            )))
+            Ok(Arc::new(
+                PrimitiveArray::<TimestampNanosecondType>::from(vec![right])
+                    .with_data_type(d.clone()),
+            ))
         }
         DataType::Timestamp(TimeUnit::Microsecond, _) => {
             let right = try_to_type!(scalar, to_i64)?;
-            Ok(Arc::new(PrimitiveArray::<TimestampMicrosecondType>::from(
-                vec![right],
-            )))
+            Ok(Arc::new(
+                PrimitiveArray::<TimestampMicrosecondType>::from(vec![right])
+                    .with_data_type(d.clone()),
+            ))
         }
         DataType::Timestamp(TimeUnit::Millisecond, _) => {
             let right = try_to_type!(scalar, to_i64)?;
-            Ok(Arc::new(PrimitiveArray::<TimestampMillisecondType>::from(
-                vec![right],
-            )))
+            Ok(Arc::new(
+                PrimitiveArray::<TimestampMillisecondType>::from(vec![right])
+                    .with_data_type(d.clone()),
+            ))
         }
         DataType::Timestamp(TimeUnit::Second, _) => {
             let right = try_to_type!(scalar, to_i64)?;
-            Ok(Arc::new(PrimitiveArray::<TimestampSecondType>::from(vec![
-                right,
-            ])))
+            Ok(Arc::new(
+                PrimitiveArray::<TimestampSecondType>::from(vec![right]).with_data_type(d.clone()),
+            ))
         }
         DataType::Time32(TimeUnit::Second) => {
             let right = try_to_type!(scalar, to_i32)?;
@@ -223,12 +221,10 @@ fn make_primitive_scalar<T: num::ToPrimitive + std::fmt::Debug>(
 fn make_binary_scalar(d: &DataType, scalar: &[u8]) -> Result<ArrayRef, ArrowError> {
     match d {
         DataType::Binary => Ok(Arc::new(BinaryArray::from_iter_values([scalar]))),
-        DataType::FixedSizeBinary(_) => Ok(Arc::new(
-            FixedSizeBinaryArray::try_from_iter([scalar].into_iter())?,
-        )),
-        DataType::LargeBinary => {
-            Ok(Arc::new(LargeBinaryArray::from_iter_values([scalar])))
-        }
+        DataType::FixedSizeBinary(_) => Ok(Arc::new(FixedSizeBinaryArray::try_from_iter(
+            [scalar].into_iter(),
+        )?)),
+        DataType::LargeBinary => Ok(Arc::new(LargeBinaryArray::from_iter_values([scalar]))),
         DataType::Dictionary(_, v) => make_binary_scalar(v.as_ref(), scalar),
         _ => Err(ArrowError::InvalidArgumentError(format!(
             "Unsupported binary scalar data type {d:?}",
@@ -259,8 +255,7 @@ where
 {
     if left.len() != right.len() {
         return Err(ArrowError::ComputeError(
-            "Cannot perform comparison operation on arrays of different length"
-                .to_string(),
+            "Cannot perform comparison operation on arrays of different length".to_string(),
         ));
     }
 
@@ -269,10 +264,7 @@ where
 
 /// Helper function to perform boolean lambda function on values from array accessor, this
 /// version does not attempt to use SIMD.
-fn compare_op_scalar<T: ArrayAccessor, F>(
-    left: T,
-    op: F,
-) -> Result<BooleanArray, ArrowError>
+fn compare_op_scalar<T: ArrayAccessor, F>(left: T, op: F) -> Result<BooleanArray, ArrowError>
 where
     F: Fn(T::Item) -> bool,
 {
@@ -330,114 +322,78 @@ pub fn eq_utf8_scalar<OffsetSize: OffsetSizeTrait>(
 
 /// Perform `left == right` operation on [`BooleanArray`]
 #[deprecated(note = "Use arrow_ord::cmp::eq")]
-pub fn eq_bool(
-    left: &BooleanArray,
-    right: &BooleanArray,
-) -> Result<BooleanArray, ArrowError> {
+pub fn eq_bool(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
     crate::cmp::eq(&left, &right)
 }
 
 /// Perform `left != right` operation on [`BooleanArray`]
 #[deprecated(note = "Use arrow_ord::cmp::neq")]
-pub fn neq_bool(
-    left: &BooleanArray,
-    right: &BooleanArray,
-) -> Result<BooleanArray, ArrowError> {
+pub fn neq_bool(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
     crate::cmp::neq(&left, &right)
 }
 
 /// Perform `left < right` operation on [`BooleanArray`]
 #[deprecated(note = "Use arrow_ord::cmp::lt")]
-pub fn lt_bool(
-    left: &BooleanArray,
-    right: &BooleanArray,
-) -> Result<BooleanArray, ArrowError> {
+pub fn lt_bool(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
     crate::cmp::lt(&left, &right)
 }
 
 /// Perform `left <= right` operation on [`BooleanArray`]
 #[deprecated(note = "Use arrow_ord::cmp::lt_eq")]
-pub fn lt_eq_bool(
-    left: &BooleanArray,
-    right: &BooleanArray,
-) -> Result<BooleanArray, ArrowError> {
+pub fn lt_eq_bool(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
     crate::cmp::lt_eq(&left, &right)
 }
 
 /// Perform `left > right` operation on [`BooleanArray`]
 #[deprecated(note = "Use arrow_ord::cmp::gt")]
-pub fn gt_bool(
-    left: &BooleanArray,
-    right: &BooleanArray,
-) -> Result<BooleanArray, ArrowError> {
+pub fn gt_bool(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
     crate::cmp::gt(&left, &right)
 }
 
 /// Perform `left >= right` operation on [`BooleanArray`]
 #[deprecated(note = "Use arrow_ord::cmp::gt_eq")]
-pub fn gt_eq_bool(
-    left: &BooleanArray,
-    right: &BooleanArray,
-) -> Result<BooleanArray, ArrowError> {
+pub fn gt_eq_bool(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray, ArrowError> {
     crate::cmp::gt_eq(&left, &right)
 }
 
 /// Perform `left == right` operation on [`BooleanArray`] and a scalar
 #[deprecated(note = "Use arrow_ord::cmp::eq")]
-pub fn eq_bool_scalar(
-    left: &BooleanArray,
-    right: bool,
-) -> Result<BooleanArray, ArrowError> {
+pub fn eq_bool_scalar(left: &BooleanArray, right: bool) -> Result<BooleanArray, ArrowError> {
     let right = BooleanArray::from(vec![right]);
     crate::cmp::eq(&left, &Scalar::new(&right))
 }
 
 /// Perform `left < right` operation on [`BooleanArray`] and a scalar
 #[deprecated(note = "Use arrow_ord::cmp::lt")]
-pub fn lt_bool_scalar(
-    left: &BooleanArray,
-    right: bool,
-) -> Result<BooleanArray, ArrowError> {
+pub fn lt_bool_scalar(left: &BooleanArray, right: bool) -> Result<BooleanArray, ArrowError> {
     let right = BooleanArray::from(vec![right]);
     crate::cmp::lt(&left, &Scalar::new(&right))
 }
 
 /// Perform `left <= right` operation on [`BooleanArray`] and a scalar
 #[deprecated(note = "Use arrow_ord::cmp::lt_eq")]
-pub fn lt_eq_bool_scalar(
-    left: &BooleanArray,
-    right: bool,
-) -> Result<BooleanArray, ArrowError> {
+pub fn lt_eq_bool_scalar(left: &BooleanArray, right: bool) -> Result<BooleanArray, ArrowError> {
     let right = BooleanArray::from(vec![right]);
     crate::cmp::lt_eq(&left, &Scalar::new(&right))
 }
 
 /// Perform `left > right` operation on [`BooleanArray`] and a scalar
 #[deprecated(note = "Use arrow_ord::cmp::gt")]
-pub fn gt_bool_scalar(
-    left: &BooleanArray,
-    right: bool,
-) -> Result<BooleanArray, ArrowError> {
+pub fn gt_bool_scalar(left: &BooleanArray, right: bool) -> Result<BooleanArray, ArrowError> {
     let right = BooleanArray::from(vec![right]);
     crate::cmp::gt(&left, &Scalar::new(&right))
 }
 
 /// Perform `left >= right` operation on [`BooleanArray`] and a scalar
 #[deprecated(note = "Use arrow_ord::cmp::gt_eq")]
-pub fn gt_eq_bool_scalar(
-    left: &BooleanArray,
-    right: bool,
-) -> Result<BooleanArray, ArrowError> {
+pub fn gt_eq_bool_scalar(left: &BooleanArray, right: bool) -> Result<BooleanArray, ArrowError> {
     let right = BooleanArray::from(vec![right]);
     crate::cmp::gt_eq(&left, &Scalar::new(&right))
 }
 
 /// Perform `left != right` operation on [`BooleanArray`] and a scalar
 #[deprecated(note = "Use arrow_ord::cmp::neq")]
-pub fn neq_bool_scalar(
-    left: &BooleanArray,
-    right: bool,
-) -> Result<BooleanArray, ArrowError> {
+pub fn neq_bool_scalar(left: &BooleanArray, right: bool) -> Result<BooleanArray, ArrowError> {
     let right = BooleanArray::from(vec![right]);
     crate::cmp::neq(&left, &Scalar::new(&right))
 }
@@ -762,10 +718,7 @@ where
 /// Perform `left == right` operation on an array and a numeric scalar
 /// value. Supports BinaryArray and LargeBinaryArray
 #[deprecated(note = "Use arrow_ord::cmp::eq")]
-pub fn eq_dyn_binary_scalar(
-    left: &dyn Array,
-    right: &[u8],
-) -> Result<BooleanArray, ArrowError> {
+pub fn eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray, ArrowError> {
     let right = make_binary_scalar(left.data_type(), right)?;
     crate::cmp::eq(&left, &Scalar::new(&right))
 }
@@ -773,10 +726,7 @@ pub fn eq_dyn_binary_scalar(
 /// Perform `left != right` operation on an array and a numeric scalar
 /// value. Supports BinaryArray and LargeBinaryArray
 #[deprecated(note = "Use arrow_ord::cmp::neq")]
-pub fn neq_dyn_binary_scalar(
-    left: &dyn Array,
-    right: &[u8],
-) -> Result<BooleanArray, ArrowError> {
+pub fn neq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray, ArrowError> {
     let right = make_binary_scalar(left.data_type(), right)?;
     crate::cmp::neq(&left, &Scalar::new(&right))
 }
@@ -784,10 +734,7 @@ pub fn neq_dyn_binary_scalar(
 /// Perform `left < right` operation on an array and a numeric scalar
 /// value. Supports BinaryArray and LargeBinaryArray
 #[deprecated(note = "Use arrow_ord::cmp::lt")]
-pub fn lt_dyn_binary_scalar(
-    left: &dyn Array,
-    right: &[u8],
-) -> Result<BooleanArray, ArrowError> {
+pub fn lt_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray, ArrowError> {
     let right = make_binary_scalar(left.data_type(), right)?;
     crate::cmp::lt(&left, &Scalar::new(&right))
 }
@@ -795,10 +742,7 @@ pub fn lt_dyn_binary_scalar(
 /// Perform `left <= right` operation on an array and a numeric scalar
 /// value. Supports BinaryArray and LargeBinaryArray
 #[deprecated(note = "Use arrow_ord::cmp::lt_eq")]
-pub fn lt_eq_dyn_binary_scalar(
-    left: &dyn Array,
-    right: &[u8],
-) -> Result<BooleanArray, ArrowError> {
+pub fn lt_eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray, ArrowError> {
     let right = make_binary_scalar(left.data_type(), right)?;
     crate::cmp::lt_eq(&left, &Scalar::new(&right))
 }
@@ -806,10 +750,7 @@ pub fn lt_eq_dyn_binary_scalar(
 /// Perform `left > right` operation on an array and a numeric scalar
 /// value. Supports BinaryArray and LargeBinaryArray
 #[deprecated(note = "Use arrow_ord::cmp::gt")]
-pub fn gt_dyn_binary_scalar(
-    left: &dyn Array,
-    right: &[u8],
-) -> Result<BooleanArray, ArrowError> {
+pub fn gt_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray, ArrowError> {
     let right = make_binary_scalar(left.data_type(), right)?;
     crate::cmp::gt(&left, &Scalar::new(&right))
 }
@@ -817,10 +758,7 @@ pub fn gt_dyn_binary_scalar(
 /// Perform `left >= right` operation on an array and a numeric scalar
 /// value. Supports BinaryArray and LargeBinaryArray
 #[deprecated(note = "Use arrow_ord::cmp::gt_eq")]
-pub fn gt_eq_dyn_binary_scalar(
-    left: &dyn Array,
-    right: &[u8],
-) -> Result<BooleanArray, ArrowError> {
+pub fn gt_eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray, ArrowError> {
     let right = make_binary_scalar(left.data_type(), right)?;
     crate::cmp::gt_eq(&left, &Scalar::new(&right))
 }
@@ -828,10 +766,7 @@ pub fn gt_eq_dyn_binary_scalar(
 /// Perform `left == right` operation on an array and a numeric scalar
 /// value. Supports StringArrays, and DictionaryArrays that have string values
 #[deprecated(note = "Use arrow_ord::cmp::eq")]
-pub fn eq_dyn_utf8_scalar(
-    left: &dyn Array,
-    right: &str,
-) -> Result<BooleanArray, ArrowError> {
+pub fn eq_dyn_utf8_scalar(left: &dyn Array, right: &str) -> Result<BooleanArray, ArrowError> {
     let right = make_utf8_scalar(left.data_type(), right)?;
     crate::cmp::eq(&left, &Scalar::new(&right))
 }
@@ -839,10 +774,7 @@ pub fn eq_dyn_utf8_scalar(
 /// Perform `left < right` operation on an array and a numeric scalar
 /// value. Supports StringArrays, and DictionaryArrays that have string values
 #[deprecated(note = "Use arrow_ord::cmp::lt")]
-pub fn lt_dyn_utf8_scalar(
-    left: &dyn Array,
-    right: &str,
-) -> Result<BooleanArray, ArrowError> {
+pub fn lt_dyn_utf8_scalar(left: &dyn Array, right: &str) -> Result<BooleanArray, ArrowError> {
     let right = make_utf8_scalar(left.data_type(), right)?;
     crate::cmp::lt(&left, &Scalar::new(&right))
 }
@@ -850,10 +782,7 @@ pub fn lt_dyn_utf8_scalar(
 /// Perform `left >= right` operation on an array and a numeric scalar
 /// value. Supports StringArrays, and DictionaryArrays that have string values
 #[deprecated(note = "Use arrow_ord::cmp::gt_eq")]
-pub fn gt_eq_dyn_utf8_scalar(
-    left: &dyn Array,
-    right: &str,
-) -> Result<BooleanArray, ArrowError> {
+pub fn gt_eq_dyn_utf8_scalar(left: &dyn Array, right: &str) -> Result<BooleanArray, ArrowError> {
     let right = make_utf8_scalar(left.data_type(), right)?;
     crate::cmp::gt_eq(&left, &Scalar::new(&right))
 }
@@ -861,10 +790,7 @@ pub fn gt_eq_dyn_utf8_scalar(
 /// Perform `left <= right` operation on an array and a numeric scalar
 /// value. Supports StringArrays, and DictionaryArrays that have string values
 #[deprecated(note = "Use arrow_ord::cmp::lt_eq")]
-pub fn lt_eq_dyn_utf8_scalar(
-    left: &dyn Array,
-    right: &str,
-) -> Result<BooleanArray, ArrowError> {
+pub fn lt_eq_dyn_utf8_scalar(left: &dyn Array, right: &str) -> Result<BooleanArray, ArrowError> {
     let right = make_utf8_scalar(left.data_type(), right)?;
     crate::cmp::lt_eq(&left, &Scalar::new(&right))
 }
@@ -872,10 +798,7 @@ pub fn lt_eq_dyn_utf8_scalar(
 /// Perform `left > right` operation on an array and a numeric scalar
 /// value. Supports StringArrays, and DictionaryArrays that have string values
 #[deprecated(note = "Use arrow_ord::cmp::gt")]
-pub fn gt_dyn_utf8_scalar(
-    left: &dyn Array,
-    right: &str,
-) -> Result<BooleanArray, ArrowError> {
+pub fn gt_dyn_utf8_scalar(left: &dyn Array, right: &str) -> Result<BooleanArray, ArrowError> {
     let right = make_utf8_scalar(left.data_type(), right)?;
     crate::cmp::gt(&left, &Scalar::new(&right))
 }
@@ -883,10 +806,7 @@ pub fn gt_dyn_utf8_scalar(
 /// Perform `left != right` operation on an array and a numeric scalar
 /// value. Supports StringArrays, and DictionaryArrays that have string values
 #[deprecated(note = "Use arrow_ord::cmp::neq")]
-pub fn neq_dyn_utf8_scalar(
-    left: &dyn Array,
-    right: &str,
-) -> Result<BooleanArray, ArrowError> {
+pub fn neq_dyn_utf8_scalar(left: &dyn Array, right: &str) -> Result<BooleanArray, ArrowError> {
     let right = make_utf8_scalar(left.data_type(), right)?;
     crate::cmp::neq(&left, &Scalar::new(&right))
 }
@@ -894,10 +814,7 @@ pub fn neq_dyn_utf8_scalar(
 /// Perform `left == right` operation on an array and a numeric scalar
 /// value.
 #[deprecated(note = "Use arrow_ord::cmp::eq")]
-pub fn eq_dyn_bool_scalar(
-    left: &dyn Array,
-    right: bool,
-) -> Result<BooleanArray, ArrowError> {
+pub fn eq_dyn_bool_scalar(left: &dyn Array, right: bool) -> Result<BooleanArray, ArrowError> {
     let right = BooleanArray::from(vec![right]);
     crate::cmp::eq(&left, &Scalar::new(&right))
 }
@@ -905,10 +822,7 @@ pub fn eq_dyn_bool_scalar(
 /// Perform `left < right` operation on an array and a numeric scalar
 /// value. Supports BooleanArrays.
 #[deprecated(note = "Use arrow_ord::cmp::lt")]
-pub fn lt_dyn_bool_scalar(
-    left: &dyn Array,
-    right: bool,
-) -> Result<BooleanArray, ArrowError> {
+pub fn lt_dyn_bool_scalar(left: &dyn Array, right: bool) -> Result<BooleanArray, ArrowError> {
     let right = BooleanArray::from(vec![right]);
     crate::cmp::lt(&left, &Scalar::new(&right))
 }
@@ -916,10 +830,7 @@ pub fn lt_dyn_bool_scalar(
 /// Perform `left > right` operation on an array and a numeric scalar
 /// value. Supports BooleanArrays.
 #[deprecated(note = "Use arrow_ord::cmp::gt")]
-pub fn gt_dyn_bool_scalar(
-    left: &dyn Array,
-    right: bool,
-) -> Result<BooleanArray, ArrowError> {
+pub fn gt_dyn_bool_scalar(left: &dyn Array, right: bool) -> Result<BooleanArray, ArrowError> {
     let right = BooleanArray::from(vec![right]);
     crate::cmp::gt(&left, &Scalar::new(&right))
 }
@@ -927,10 +838,7 @@ pub fn gt_dyn_bool_scalar(
 /// Perform `left <= right` operation on an array and a numeric scalar
 /// value. Supports BooleanArrays.
 #[deprecated(note = "Use arrow_ord::cmp::lt_eq")]
-pub fn lt_eq_dyn_bool_scalar(
-    left: &dyn Array,
-    right: bool,
-) -> Result<BooleanArray, ArrowError> {
+pub fn lt_eq_dyn_bool_scalar(left: &dyn Array, right: bool) -> Result<BooleanArray, ArrowError> {
     let right = BooleanArray::from(vec![right]);
     crate::cmp::lt_eq(&left, &Scalar::new(&right))
 }
@@ -938,10 +846,7 @@ pub fn lt_eq_dyn_bool_scalar(
 /// Perform `left >= right` operation on an array and a numeric scalar
 /// value. Supports BooleanArrays.
 #[deprecated(note = "Use arrow_ord::cmp::gt_eq")]
-pub fn gt_eq_dyn_bool_scalar(
-    left: &dyn Array,
-    right: bool,
-) -> Result<BooleanArray, ArrowError> {
+pub fn gt_eq_dyn_bool_scalar(left: &dyn Array, right: bool) -> Result<BooleanArray, ArrowError> {
     let right = BooleanArray::from(vec![right]);
     crate::cmp::gt_eq(&left, &Scalar::new(&right))
 }
@@ -949,10 +854,7 @@ pub fn gt_eq_dyn_bool_scalar(
 /// Perform `left != right` operation on an array and a numeric scalar
 /// value. Supports BooleanArrays.
 #[deprecated(note = "Use arrow_ord::cmp::neq")]
-pub fn neq_dyn_bool_scalar(
-    left: &dyn Array,
-    right: bool,
-) -> Result<BooleanArray, ArrowError> {
+pub fn neq_dyn_bool_scalar(left: &dyn Array, right: bool) -> Result<BooleanArray, ArrowError> {
     let right = BooleanArray::from(vec![right]);
     crate::cmp::neq(&left, &Scalar::new(&right))
 }
@@ -1057,10 +959,7 @@ pub fn lt_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray, Arrow
 /// assert_eq!(BooleanArray::from(vec![Some(false), Some(true), Some(true), None]), result);
 /// ```
 #[deprecated(note = "Use arrow_ord::cmp::lt_eq")]
-pub fn lt_eq_dyn(
-    left: &dyn Array,
-    right: &dyn Array,
-) -> Result<BooleanArray, ArrowError> {
+pub fn lt_eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray, ArrowError> {
     crate::cmp::lt_eq(&left, &right)
 }
 
@@ -1110,10 +1009,7 @@ pub fn gt_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray, Arrow
 /// assert_eq!(BooleanArray::from(vec![Some(false), Some(true), None]), result);
 /// ```
 #[deprecated(note = "Use arrow_ord::cmp::gt_eq")]
-pub fn gt_eq_dyn(
-    left: &dyn Array,
-    right: &dyn Array,
-) -> Result<BooleanArray, ArrowError> {
+pub fn gt_eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray, ArrowError> {
     crate::cmp::gt_eq(&left, &right)
 }
 
@@ -1145,10 +1041,7 @@ where
 /// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 #[deprecated(note = "Use arrow_ord::cmp::eq")]
-pub fn eq_scalar<T>(
-    left: &PrimitiveArray<T>,
-    right: T::Native,
-) -> Result<BooleanArray, ArrowError>
+pub fn eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray, ArrowError>
 where
     T: ArrowNumericType,
     T::Native: ArrowNativeTypeOp,
@@ -1158,10 +1051,7 @@ where
 }
 
 /// Applies an unary and infallible comparison function to a primitive array.
-pub fn unary_cmp<T, F>(
-    left: &PrimitiveArray<T>,
-    op: F,
-) -> Result<BooleanArray, ArrowError>
+pub fn unary_cmp<T, F>(left: &PrimitiveArray<T>, op: F) -> Result<BooleanArray, ArrowError>
 where
     T: ArrowNumericType,
     F: Fn(T::Native) -> bool,
@@ -1198,10 +1088,7 @@ where
 /// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 #[deprecated(note = "Use arrow_ord::cmp::neq")]
-pub fn neq_scalar<T>(
-    left: &PrimitiveArray<T>,
-    right: T::Native,
-) -> Result<BooleanArray, ArrowError>
+pub fn neq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray, ArrowError>
 where
     T: ArrowNumericType,
     T::Native: ArrowNativeTypeOp,
@@ -1241,10 +1128,7 @@ where
 /// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 #[deprecated(note = "Use arrow_ord::cmp::lt")]
-pub fn lt_scalar<T>(
-    left: &PrimitiveArray<T>,
-    right: T::Native,
-) -> Result<BooleanArray, ArrowError>
+pub fn lt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray, ArrowError>
 where
     T: ArrowNumericType,
     T::Native: ArrowNativeTypeOp,
@@ -1327,10 +1211,7 @@ where
 /// to treat them as equal, please normalize zeros before calling this kernel.
 /// Please refer to `f32::total_cmp` and `f64::total_cmp`.
 #[deprecated(note = "Use arrow_ord::cmp::gt")]
-pub fn gt_scalar<T>(
-    left: &PrimitiveArray<T>,
-    right: T::Native,
-) -> Result<BooleanArray, ArrowError>
+pub fn gt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray, ArrowError>
 where
     T: ArrowNumericType,
     T::Native: ArrowNativeTypeOp,
@@ -1394,8 +1275,7 @@ where
     let left_len = left.len();
     if left_len != right.len() {
         return Err(ArrowError::ComputeError(
-            "Cannot perform comparison operation on arrays of different length"
-                .to_string(),
+            "Cannot perform comparison operation on arrays of different length".to_string(),
         ));
     }
 
@@ -1435,8 +1315,7 @@ where
     let left_len = left.len();
     if left_len != right.len() {
         return Err(ArrowError::ComputeError(
-            "Cannot perform comparison operation on arrays of different length"
-                .to_string(),
+            "Cannot perform comparison operation on arrays of different length".to_string(),
         ));
     }
 
@@ -1672,11 +1551,9 @@ mod tests {
     #[test]
     fn test_boolean_array_eq() {
         let a: BooleanArray =
-            vec![Some(true), Some(false), Some(false), Some(true), Some(true), None]
-                .into();
+            vec![Some(true), Some(false), Some(false), Some(true), Some(true), None].into();
         let b: BooleanArray =
-            vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)]
-                .into();
+            vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)].into();
 
         let res: Vec<Option<bool>> = eq_bool(&a, &b).unwrap().iter().collect();
 
@@ -1689,11 +1566,9 @@ mod tests {
     #[test]
     fn test_boolean_array_neq() {
         let a: BooleanArray =
-            vec![Some(true), Some(false), Some(false), Some(true), Some(true), None]
-                .into();
+            vec![Some(true), Some(false), Some(false), Some(true), Some(true), None].into();
         let b: BooleanArray =
-            vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)]
-                .into();
+            vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)].into();
 
         let res: Vec<Option<bool>> = neq_bool(&a, &b).unwrap().iter().collect();
 
@@ -1706,11 +1581,9 @@ mod tests {
     #[test]
     fn test_boolean_array_lt() {
         let a: BooleanArray =
-            vec![Some(true), Some(false), Some(false), Some(true), Some(true), None]
-                .into();
+            vec![Some(true), Some(false), Some(false), Some(true), Some(true), None].into();
         let b: BooleanArray =
-            vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)]
-                .into();
+            vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)].into();
 
         let res: Vec<Option<bool>> = lt_bool(&a, &b).unwrap().iter().collect();
 
@@ -1723,11 +1596,9 @@ mod tests {
     #[test]
     fn test_boolean_array_lt_eq() {
         let a: BooleanArray =
-            vec![Some(true), Some(false), Some(false), Some(true), Some(true), None]
-                .into();
+            vec![Some(true), Some(false), Some(false), Some(true), Some(true), None].into();
         let b: BooleanArray =
-            vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)]
-                .into();
+            vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)].into();
 
         let res: Vec<Option<bool>> = lt_eq_bool(&a, &b).unwrap().iter().collect();
 
@@ -1740,11 +1611,9 @@ mod tests {
     #[test]
     fn test_boolean_array_gt() {
         let a: BooleanArray =
-            vec![Some(true), Some(false), Some(false), Some(true), Some(true), None]
-                .into();
+            vec![Some(true), Some(false), Some(false), Some(true), Some(true), None].into();
         let b: BooleanArray =
-            vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)]
-                .into();
+            vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)].into();
 
         let res: Vec<Option<bool>> = gt_bool(&a, &b).unwrap().iter().collect();
 
@@ -1757,11 +1626,9 @@ mod tests {
     #[test]
     fn test_boolean_array_gt_eq() {
         let a: BooleanArray =
-            vec![Some(true), Some(false), Some(false), Some(true), Some(true), None]
-                .into();
+            vec![Some(true), Some(false), Some(false), Some(true), Some(true), None].into();
         let b: BooleanArray =
-            vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)]
-                .into();
+            vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)].into();
 
         let res: Vec<Option<bool>> = gt_eq_bool(&a, &b).unwrap().iter().collect();
 
@@ -1788,8 +1655,7 @@ mod tests {
     fn test_boolean_array_neq_scalar() {
         let a: BooleanArray = vec![Some(true), Some(false), None].into();
 
-        let res1: Vec<Option<bool>> =
-            neq_bool_scalar(&a, false).unwrap().iter().collect();
+        let res1: Vec<Option<bool>> = neq_bool_scalar(&a, false).unwrap().iter().collect();
 
         assert_eq!(res1, vec![Some(true), Some(false), None]);
 
@@ -1815,13 +1681,11 @@ mod tests {
     fn test_boolean_array_lt_eq_scalar() {
         let a: BooleanArray = vec![Some(true), Some(false), None].into();
 
-        let res1: Vec<Option<bool>> =
-            lt_eq_bool_scalar(&a, false).unwrap().iter().collect();
+        let res1: Vec<Option<bool>> = lt_eq_bool_scalar(&a, false).unwrap().iter().collect();
 
         assert_eq!(res1, vec![Some(false), Some(true), None]);
 
-        let res2: Vec<Option<bool>> =
-            lt_eq_bool_scalar(&a, true).unwrap().iter().collect();
+        let res2: Vec<Option<bool>> = lt_eq_bool_scalar(&a, true).unwrap().iter().collect();
 
         assert_eq!(res2, vec![Some(true), Some(true), None]);
     }
@@ -1843,13 +1707,11 @@ mod tests {
     fn test_boolean_array_gt_eq_scalar() {
         let a: BooleanArray = vec![Some(true), Some(false), None].into();
 
-        let res1: Vec<Option<bool>> =
-            gt_eq_bool_scalar(&a, false).unwrap().iter().collect();
+        let res1: Vec<Option<bool>> = gt_eq_bool_scalar(&a, false).unwrap().iter().collect();
 
         assert_eq!(res1, vec![Some(true), Some(true), None]);
 
-        let res2: Vec<Option<bool>> =
-            gt_eq_bool_scalar(&a, true).unwrap().iter().collect();
+        let res2: Vec<Option<bool>> = gt_eq_bool_scalar(&a, true).unwrap().iter().collect();
 
         assert_eq!(res2, vec![Some(true), Some(false), None]);
     }
@@ -2134,25 +1996,19 @@ mod tests {
 
     #[test]
     fn test_interval_array() {
-        let a = IntervalDayTimeArray::from(
-            vec![Some(0), Some(6), Some(834), None, Some(3), None],
-        );
-        let b = IntervalDayTimeArray::from(
-            vec![Some(70), Some(6), Some(833), Some(6), Some(3), None],
-        );
+        let a = IntervalDayTimeArray::from(vec![Some(0), Some(6), Some(834), None, Some(3), None]);
+        let b =
+            IntervalDayTimeArray::from(vec![Some(70), Some(6), Some(833), Some(6), Some(3), None]);
         let res = eq(&a, &b).unwrap();
         let res_dyn = eq_dyn(&a, &b).unwrap();
         assert_eq!(res, res_dyn);
         assert_eq!(
             &res_dyn,
-            &BooleanArray::from(
-                vec![Some(false), Some(true), Some(false), None, Some(true), None]
-            )
+            &BooleanArray::from(vec![Some(false), Some(true), Some(false), None, Some(true), None])
         );
 
-        let a = IntervalMonthDayNanoArray::from(
-            vec![Some(0), Some(6), Some(834), None, Some(3), None],
-        );
+        let a =
+            IntervalMonthDayNanoArray::from(vec![Some(0), Some(6), Some(834), None, Some(3), None]);
         let b = IntervalMonthDayNanoArray::from(
             vec![Some(86), Some(5), Some(8), Some(6), Some(3), None],
         );
@@ -2166,9 +2022,8 @@ mod tests {
             )
         );
 
-        let a = IntervalYearMonthArray::from(
-            vec![Some(0), Some(623), Some(834), None, Some(3), None],
-        );
+        let a =
+            IntervalYearMonthArray::from(vec![Some(0), Some(623), Some(834), None, Some(3), None]);
         let b = IntervalYearMonthArray::from(
             vec![Some(86), Some(5), Some(834), Some(6), Some(86), None],
         );
@@ -2177,9 +2032,7 @@ mod tests {
         assert_eq!(res, res_dyn);
         assert_eq!(
             &res_dyn,
-            &BooleanArray::from(
-                vec![Some(false), Some(true), Some(true), None, Some(false), None]
-            )
+            &BooleanArray::from(vec![Some(false), Some(true), Some(true), None, Some(false), None])
         );
     }
 
@@ -2255,9 +2108,7 @@ mod tests {
 
     #[test]
     fn test_binary_eq_scalar_on_slice() {
-        let a = BinaryArray::from_opt_vec(
-            vec![Some(b"hi"), None, Some(b"hello"), Some(b"world")],
-        );
+        let a = BinaryArray::from_opt_vec(vec![Some(b"hi"), None, Some(b"hello"), Some(b"world")]);
         let a = a.slice(1, 3);
         let a = as_generic_binary_array::<i32>(&a);
         let a_eq = eq_binary_scalar(a, b"hello").unwrap();
@@ -2447,9 +2298,7 @@ mod tests {
 
     #[test]
     fn test_utf8_eq_scalar_on_slice() {
-        let a = StringArray::from(
-            vec![Some("hi"), None, Some("hello"), Some("world"), Some("")],
-        );
+        let a = StringArray::from(vec![Some("hi"), None, Some("hello"), Some("world"), Some("")]);
         let a = a.slice(1, 4);
         let a_eq = eq_utf8_scalar(&a, "hello").unwrap();
         assert_eq!(
@@ -2604,16 +2453,13 @@ mod tests {
         let a_eq = eq_dyn_scalar(&array, 8).unwrap();
         assert_eq!(
             a_eq,
-            BooleanArray::from(
-                vec![Some(false), Some(false), Some(true), Some(true), Some(false)]
-            )
+            BooleanArray::from(vec![Some(false), Some(false), Some(true), Some(true), Some(false)])
         );
     }
 
     #[test]
     fn test_eq_dyn_scalar_with_dict() {
-        let mut builder =
-            PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
+        let mut builder = PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
         builder.append(123).unwrap();
         builder.append_null();
         builder.append(23).unwrap();
@@ -2631,9 +2477,8 @@ mod tests {
             .into_iter()
             .map(Some)
             .collect();
-        let expected = BooleanArray::from(
-            vec![Some(false), Some(false), Some(true), Some(true), Some(false)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(false), Some(false), Some(true), Some(true), Some(false)]);
         assert_eq!(eq_dyn_scalar(&array, 8).unwrap(), expected);
 
         let array = array.unary::<_, Float64Type>(|x| x as f64);
@@ -2646,16 +2491,13 @@ mod tests {
         let a_eq = lt_dyn_scalar(&array, 8).unwrap();
         assert_eq!(
             a_eq,
-            BooleanArray::from(
-                vec![Some(true), Some(true), Some(false), Some(false), Some(false)]
-            )
+            BooleanArray::from(vec![Some(true), Some(true), Some(false), Some(false), Some(false)])
         );
     }
 
     #[test]
     fn test_lt_dyn_scalar_with_dict() {
-        let mut builder =
-            PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
+        let mut builder = PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
         builder.append(123).unwrap();
         builder.append_null();
         builder.append(23).unwrap();
@@ -2673,9 +2515,8 @@ mod tests {
             .into_iter()
             .map(Some)
             .collect();
-        let expected = BooleanArray::from(
-            vec![Some(true), Some(true), Some(false), Some(false), Some(false)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(true), Some(true), Some(false), Some(false), Some(false)]);
         assert_eq!(lt_dyn_scalar(&array, 8).unwrap(), expected);
 
         let array = array.unary::<_, Float64Type>(|x| x as f64);
@@ -2688,9 +2529,7 @@ mod tests {
         let a_eq = lt_eq_dyn_scalar(&array, 8).unwrap();
         assert_eq!(
             a_eq,
-            BooleanArray::from(
-                vec![Some(true), Some(true), Some(true), Some(true), Some(false)]
-            )
+            BooleanArray::from(vec![Some(true), Some(true), Some(true), Some(true), Some(false)])
         );
     }
 
@@ -2728,20 +2567,16 @@ mod tests {
 
     #[test]
     fn test_timestamp_dyn_scalar() {
-        let array =
-            TimestampSecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
+        let array = TimestampSecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
         test_primitive_dyn_scalar(array);
 
-        let array =
-            TimestampMicrosecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
+        let array = TimestampMicrosecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
         test_primitive_dyn_scalar(array);
 
-        let array =
-            TimestampMicrosecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
+        let array = TimestampMicrosecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
         test_primitive_dyn_scalar(array);
 
-        let array =
-            TimestampNanosecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
+        let array = TimestampNanosecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
         test_primitive_dyn_scalar(array);
     }
 
@@ -2762,60 +2597,49 @@ mod tests {
         let array = Time32SecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
         test_primitive_dyn_scalar(array);
 
-        let array =
-            Time32MillisecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
+        let array = Time32MillisecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
         test_primitive_dyn_scalar(array);
     }
 
     #[test]
     fn test_time64_dyn_scalar() {
-        let array =
-            Time64MicrosecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
+        let array = Time64MicrosecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
         test_primitive_dyn_scalar(array);
 
-        let array =
-            Time64NanosecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
+        let array = Time64NanosecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
         test_primitive_dyn_scalar(array);
     }
 
     #[test]
     fn test_interval_dyn_scalar() {
-        let array =
-            IntervalDayTimeArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
+        let array = IntervalDayTimeArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
         test_primitive_dyn_scalar(array);
 
-        let array =
-            IntervalMonthDayNanoArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
+        let array = IntervalMonthDayNanoArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
         test_primitive_dyn_scalar(array);
 
-        let array =
-            IntervalYearMonthArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
+        let array = IntervalYearMonthArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
         test_primitive_dyn_scalar(array);
     }
 
     #[test]
     fn test_duration_dyn_scalar() {
-        let array =
-            DurationSecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
+        let array = DurationSecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
         test_primitive_dyn_scalar(array);
 
-        let array =
-            DurationMicrosecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
+        let array = DurationMicrosecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
         test_primitive_dyn_scalar(array);
 
-        let array =
-            DurationMillisecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
+        let array = DurationMillisecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
         test_primitive_dyn_scalar(array);
 
-        let array =
-            DurationNanosecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
+        let array = DurationNanosecondArray::from(vec![Some(1), None, Some(8), None, Some(10)]);
         test_primitive_dyn_scalar(array);
     }
 
     #[test]
     fn test_lt_eq_dyn_scalar_with_dict() {
-        let mut builder =
-            PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
+        let mut builder = PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
         builder.append(123).unwrap();
         builder.append_null();
         builder.append(23).unwrap();
@@ -2833,9 +2657,8 @@ mod tests {
             .into_iter()
             .map(Some)
             .collect();
-        let expected = BooleanArray::from(
-            vec![Some(true), Some(true), Some(true), Some(true), Some(false)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(true), Some(true), Some(true), Some(true), Some(false)]);
         assert_eq!(lt_eq_dyn_scalar(&array, 8).unwrap(), expected);
 
         let array = array.unary::<_, Float64Type>(|x| x as f64);
@@ -2856,8 +2679,7 @@ mod tests {
 
     #[test]
     fn test_gt_dyn_scalar_with_dict() {
-        let mut builder =
-            PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
+        let mut builder = PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
         builder.append(123).unwrap();
         builder.append_null();
         builder.append(23).unwrap();
@@ -2890,16 +2712,13 @@ mod tests {
         let a_eq = gt_eq_dyn_scalar(&array, 8).unwrap();
         assert_eq!(
             a_eq,
-            BooleanArray::from(
-                vec![Some(false), Some(false), Some(true), Some(true), Some(true)]
-            )
+            BooleanArray::from(vec![Some(false), Some(false), Some(true), Some(true), Some(true)])
         );
     }
 
     #[test]
     fn test_gt_eq_dyn_scalar_with_dict() {
-        let mut builder =
-            PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
+        let mut builder = PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
         builder.append(22).unwrap();
         builder.append_null();
         builder.append(23).unwrap();
@@ -2917,9 +2736,8 @@ mod tests {
             .into_iter()
             .map(Some)
             .collect();
-        let expected = BooleanArray::from(
-            vec![Some(false), Some(false), Some(true), Some(true), Some(true)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(false), Some(false), Some(true), Some(true), Some(true)]);
         assert_eq!(gt_eq_dyn_scalar(&array, 8).unwrap(), expected);
 
         let array = array.unary::<_, Float64Type>(|x| x as f64);
@@ -2932,16 +2750,13 @@ mod tests {
         let a_eq = neq_dyn_scalar(&array, 8).unwrap();
         assert_eq!(
             a_eq,
-            BooleanArray::from(
-                vec![Some(true), Some(true), Some(false), Some(false), Some(true)]
-            )
+            BooleanArray::from(vec![Some(true), Some(true), Some(false), Some(false), Some(true)])
         );
     }
 
     #[test]
     fn test_neq_dyn_scalar_with_dict() {
-        let mut builder =
-            PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
+        let mut builder = PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
         builder.append(22).unwrap();
         builder.append_null();
         builder.append(23).unwrap();
@@ -2959,9 +2774,8 @@ mod tests {
             .into_iter()
             .map(Some)
             .collect();
-        let expected = BooleanArray::from(
-            vec![Some(true), Some(true), Some(false), Some(false), Some(true)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(true), Some(true), Some(false), Some(false), Some(true)]);
         assert_eq!(neq_dyn_scalar(&array, 8).unwrap(), expected);
 
         let array = array.unary::<_, Float64Type>(|x| x as f64);
@@ -2989,8 +2803,7 @@ mod tests {
         )
         .unwrap();
         let scalar = &[1u8];
-        let expected =
-            BooleanArray::from(vec![Some(false), Some(false), Some(false), Some(true)]);
+        let expected = BooleanArray::from(vec![Some(false), Some(false), Some(false), Some(true)]);
         assert_eq!(eq_dyn_binary_scalar(&fsb_array, scalar).unwrap(), expected);
     }
 
@@ -3015,8 +2828,7 @@ mod tests {
         )
         .unwrap();
         let scalar = &[1u8];
-        let expected =
-            BooleanArray::from(vec![Some(true), Some(true), Some(true), Some(false)]);
+        let expected = BooleanArray::from(vec![Some(true), Some(true), Some(true), Some(false)]);
         assert_eq!(neq_dyn_binary_scalar(&fsb_array, scalar).unwrap(), expected);
     }
 
@@ -3110,9 +2922,7 @@ mod tests {
         let a_eq = eq_dyn_utf8_scalar(&array, "def").unwrap();
         assert_eq!(
             a_eq,
-            BooleanArray::from(
-                vec![Some(false), None, Some(true), Some(true), Some(false)]
-            )
+            BooleanArray::from(vec![Some(false), None, Some(true), Some(true), Some(false)])
         );
     }
 
@@ -3138,9 +2948,7 @@ mod tests {
         let a_eq = lt_dyn_utf8_scalar(&array, "def").unwrap();
         assert_eq!(
             a_eq,
-            BooleanArray::from(
-                vec![Some(true), None, Some(false), Some(false), Some(true)]
-            )
+            BooleanArray::from(vec![Some(true), None, Some(false), Some(false), Some(true)])
         );
     }
 
@@ -3166,9 +2974,7 @@ mod tests {
         let a_eq = lt_eq_dyn_utf8_scalar(&array, "def").unwrap();
         assert_eq!(
             a_eq,
-            BooleanArray::from(
-                vec![Some(true), None, Some(true), Some(true), Some(false)]
-            )
+            BooleanArray::from(vec![Some(true), None, Some(true), Some(true), Some(false)])
         );
     }
 
@@ -3194,9 +3000,7 @@ mod tests {
         let a_eq = gt_eq_dyn_utf8_scalar(&array, "def").unwrap();
         assert_eq!(
             a_eq,
-            BooleanArray::from(
-                vec![Some(false), None, Some(true), Some(true), Some(true)]
-            )
+            BooleanArray::from(vec![Some(false), None, Some(true), Some(true), Some(true)])
         );
     }
 
@@ -3222,9 +3026,7 @@ mod tests {
         let a_eq = gt_dyn_utf8_scalar(&array, "def").unwrap();
         assert_eq!(
             a_eq,
-            BooleanArray::from(
-                vec![Some(false), None, Some(false), Some(false), Some(true)]
-            )
+            BooleanArray::from(vec![Some(false), None, Some(false), Some(false), Some(true)])
         );
     }
 
@@ -3250,9 +3052,7 @@ mod tests {
         let a_eq = neq_dyn_utf8_scalar(&array, "def").unwrap();
         assert_eq!(
             a_eq,
-            BooleanArray::from(
-                vec![Some(true), None, Some(false), Some(false), Some(true)]
-            )
+            BooleanArray::from(vec![Some(true), None, Some(false), Some(false), Some(true)])
         );
     }
 
@@ -3318,17 +3118,13 @@ mod tests {
 
     #[test]
     fn test_eq_dyn_neq_dyn_fixed_size_binary() {
-        let values1: Vec<Option<&[u8]>> =
-            vec![Some(&[0xfc, 0xa9]), None, Some(&[0x36, 0x01])];
-        let values2: Vec<Option<&[u8]>> =
-            vec![Some(&[0xfc, 0xa9]), None, Some(&[0x36, 0x00])];
+        let values1: Vec<Option<&[u8]>> = vec![Some(&[0xfc, 0xa9]), None, Some(&[0x36, 0x01])];
+        let values2: Vec<Option<&[u8]>> = vec![Some(&[0xfc, 0xa9]), None, Some(&[0x36, 0x00])];
 
         let array1 =
-            FixedSizeBinaryArray::try_from_sparse_iter_with_size(values1.into_iter(), 2)
-                .unwrap();
+            FixedSizeBinaryArray::try_from_sparse_iter_with_size(values1.into_iter(), 2).unwrap();
         let array2 =
-            FixedSizeBinaryArray::try_from_sparse_iter_with_size(values2.into_iter(), 2)
-                .unwrap();
+            FixedSizeBinaryArray::try_from_sparse_iter_with_size(values2.into_iter(), 2).unwrap();
 
         let result = eq_dyn(&array1, &array2).unwrap();
         assert_eq!(
@@ -3660,10 +3456,11 @@ mod tests {
     #[test]
     fn test_eq_dyn_neq_dyn_float_nan() {
         let array1 = Float16Array::from(vec![f16::NAN, f16::from_f32(7.0), f16::from_f32(8.0), f16::from_f32(8.0), f16::from_f32(10.0)]);
-        let array2 = Float16Array::from(vec![f16::NAN, f16::NAN, f16::from_f32(8.0), f16::from_f32(8.0), f16::from_f32(10.0)]);
-        let expected = BooleanArray::from(
-            vec![Some(true), Some(false), Some(true), Some(true), Some(true)],
+        let array2 = Float16Array::from(
+            vec![f16::NAN, f16::NAN, f16::from_f32(8.0), f16::from_f32(8.0), f16::from_f32(10.0)],
         );
+        let expected =
+            BooleanArray::from(vec![Some(true), Some(false), Some(true), Some(true), Some(true)]);
         assert_eq!(eq_dyn(&array1, &array2).unwrap(), expected);
 
         assert_eq!(eq(&array1, &array2).unwrap(), expected);
@@ -3677,9 +3474,8 @@ mod tests {
 
         let array1 = Float32Array::from(vec![f32::NAN, 7.0, 8.0, 8.0, 10.0]);
         let array2 = Float32Array::from(vec![f32::NAN, f32::NAN, 8.0, 8.0, 10.0]);
-        let expected = BooleanArray::from(
-            vec![Some(true), Some(false), Some(true), Some(true), Some(true)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(true), Some(false), Some(true), Some(true), Some(true)]);
         assert_eq!(eq_dyn(&array1, &array2).unwrap(), expected);
 
         assert_eq!(eq(&array1, &array2).unwrap(), expected);
@@ -3694,9 +3490,8 @@ mod tests {
         let array1 = Float64Array::from(vec![f64::NAN, 7.0, 8.0, 8.0, 10.0]);
         let array2 = Float64Array::from(vec![f64::NAN, f64::NAN, 8.0, 8.0, 10.0]);
 
-        let expected = BooleanArray::from(
-            vec![Some(true), Some(false), Some(true), Some(true), Some(true)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(true), Some(false), Some(true), Some(true), Some(true)]);
         assert_eq!(eq_dyn(&array1, &array2).unwrap(), expected);
 
         assert_eq!(eq(&array1, &array2).unwrap(), expected);
@@ -3832,9 +3627,8 @@ mod tests {
         );
         assert_eq!(eq_dyn_scalar(&array, f32::NAN).unwrap(), expected);
 
-        let expected = BooleanArray::from(
-            vec![Some(false), Some(true), Some(true), Some(true), Some(true)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(false), Some(true), Some(true), Some(true), Some(true)]);
         assert_eq!(neq_dyn_scalar(&array, f32::NAN).unwrap(), expected);
 
         let array = Float32Array::from(vec![f32::NAN, 7.0, 8.0, 8.0, 10.0]);
@@ -3843,9 +3637,8 @@ mod tests {
         );
         assert_eq!(eq_dyn_scalar(&array, f32::NAN).unwrap(), expected);
 
-        let expected = BooleanArray::from(
-            vec![Some(false), Some(true), Some(true), Some(true), Some(true)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(false), Some(true), Some(true), Some(true), Some(true)]);
         assert_eq!(neq_dyn_scalar(&array, f32::NAN).unwrap(), expected);
 
         let array = Float64Array::from(vec![f64::NAN, 7.0, 8.0, 8.0, 10.0]);
@@ -3854,9 +3647,8 @@ mod tests {
         );
         assert_eq!(eq_dyn_scalar(&array, f64::NAN).unwrap(), expected);
 
-        let expected = BooleanArray::from(
-            vec![Some(false), Some(true), Some(true), Some(true), Some(true)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(false), Some(true), Some(true), Some(true), Some(true)]);
         assert_eq!(neq_dyn_scalar(&array, f64::NAN).unwrap(), expected);
     }
 
@@ -3864,37 +3656,31 @@ mod tests {
     fn test_lt_dyn_scalar_lt_eq_dyn_scalar_float_nan() {
         let array = Float16Array::from(vec![f16::NAN, f16::from_f32(7.0), f16::from_f32(8.0), f16::from_f32(8.0), f16::from_f32(10.0)]);
 
-        let expected = BooleanArray::from(
-            vec![Some(false), Some(true), Some(true), Some(true), Some(true)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(false), Some(true), Some(true), Some(true), Some(true)]);
         assert_eq!(lt_dyn_scalar(&array, f16::NAN).unwrap(), expected);
 
-        let expected = BooleanArray::from(
-            vec![Some(true), Some(true), Some(true), Some(true), Some(true)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(true), Some(true), Some(true), Some(true), Some(true)]);
         assert_eq!(lt_eq_dyn_scalar(&array, f16::NAN).unwrap(), expected);
 
         let array = Float32Array::from(vec![f32::NAN, 7.0, 8.0, 8.0, 10.0]);
 
-        let expected = BooleanArray::from(
-            vec![Some(false), Some(true), Some(true), Some(true), Some(true)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(false), Some(true), Some(true), Some(true), Some(true)]);
         assert_eq!(lt_dyn_scalar(&array, f32::NAN).unwrap(), expected);
 
-        let expected = BooleanArray::from(
-            vec![Some(true), Some(true), Some(true), Some(true), Some(true)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(true), Some(true), Some(true), Some(true), Some(true)]);
         assert_eq!(lt_eq_dyn_scalar(&array, f32::NAN).unwrap(), expected);
 
         let array = Float64Array::from(vec![f64::NAN, 7.0, 8.0, 8.0, 10.0]);
-        let expected = BooleanArray::from(
-            vec![Some(false), Some(true), Some(true), Some(true), Some(true)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(false), Some(true), Some(true), Some(true), Some(true)]);
         assert_eq!(lt_dyn_scalar(&array, f64::NAN).unwrap(), expected);
 
-        let expected = BooleanArray::from(
-            vec![Some(true), Some(true), Some(true), Some(true), Some(true)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(true), Some(true), Some(true), Some(true), Some(true)]);
         assert_eq!(lt_eq_dyn_scalar(&array, f64::NAN).unwrap(), expected);
     }
 
@@ -4154,14 +3940,12 @@ mod tests {
             .into_iter()
             .map(Some)
             .collect();
-        let values =
-            Float16Array::from(vec![f16::NAN, f16::from_f32(8.0), f16::from_f32(10.0)]);
+        let values = Float16Array::from(vec![f16::NAN, f16::from_f32(8.0), f16::from_f32(10.0)]);
         let keys = Int8Array::from_iter_values([0_i8, 0, 1, 1, 2]);
         let array2 = DictionaryArray::new(keys, Arc::new(values));
 
-        let expected = BooleanArray::from(
-            vec![Some(true), Some(false), Some(true), Some(true), Some(true)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(true), Some(false), Some(true), Some(true), Some(true)]);
         assert_eq!(eq_dyn(&array1, &array2).unwrap(), expected);
 
         let expected = BooleanArray::from(
@@ -4177,9 +3961,8 @@ mod tests {
         let keys = Int8Array::from_iter_values([0_i8, 0, 1, 1, 2]);
         let array2 = DictionaryArray::new(keys, Arc::new(values));
 
-        let expected = BooleanArray::from(
-            vec![Some(true), Some(false), Some(true), Some(true), Some(true)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(true), Some(false), Some(true), Some(true), Some(true)]);
         assert_eq!(eq_dyn(&array1, &array2).unwrap(), expected);
 
         let expected = BooleanArray::from(
@@ -4195,9 +3978,8 @@ mod tests {
         let keys = Int8Array::from_iter_values([0_i8, 0, 1, 1, 2]);
         let array2 = DictionaryArray::new(keys, Arc::new(values));
 
-        let expected = BooleanArray::from(
-            vec![Some(true), Some(false), Some(true), Some(true), Some(true)],
-        );
+        let expected =
+            BooleanArray::from(vec![Some(true), Some(false), Some(true), Some(true), Some(true)]);
         assert_eq!(eq_dyn(&array1, &array2).unwrap(), expected);
 
         let expected = BooleanArray::from(
@@ -4454,8 +4236,7 @@ mod tests {
 
     #[test]
     fn test_cmp_dict_non_dict_decimal128() {
-        let array1: Decimal128Array =
-            Decimal128Array::from_iter_values([1, 2, 5, 4, 3, 0]);
+        let array1: Decimal128Array = Decimal128Array::from_iter_values([1, 2, 5, 4, 3, 0]);
 
         let values = Decimal128Array::from_iter_values([7, -3, 4, 3, 5]);
         let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
@@ -4489,15 +4270,13 @@ mod tests {
 
     #[test]
     fn test_cmp_dict_decimal256() {
-        let values = Decimal256Array::from_iter_values(
-            [0, 1, 2, 3, 4, 5].into_iter().map(i256::from_i128),
-        );
+        let values =
+            Decimal256Array::from_iter_values([0, 1, 2, 3, 4, 5].into_iter().map(i256::from_i128));
         let keys = Int8Array::from_iter_values([1_i8, 2, 5, 4, 3, 0]);
         let array1 = DictionaryArray::new(keys, Arc::new(values));
 
-        let values = Decimal256Array::from_iter_values(
-            [7, -3, 4, 3, 5].into_iter().map(i256::from_i128),
-        );
+        let values =
+            Decimal256Array::from_iter_values([7, -3, 4, 3, 5].into_iter().map(i256::from_i128));
         let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
         let array2 = DictionaryArray::new(keys, Arc::new(values));
 
@@ -4529,13 +4308,11 @@ mod tests {
 
     #[test]
     fn test_cmp_dict_non_dict_decimal256() {
-        let array1: Decimal256Array = Decimal256Array::from_iter_values(
-            [1, 2, 5, 4, 3, 0].into_iter().map(i256::from_i128),
-        );
+        let array1: Decimal256Array =
+            Decimal256Array::from_iter_values([1, 2, 5, 4, 3, 0].into_iter().map(i256::from_i128));
 
-        let values = Decimal256Array::from_iter_values(
-            [7, -3, 4, 3, 5].into_iter().map(i256::from_i128),
-        );
+        let values =
+            Decimal256Array::from_iter_values([7, -3, 4, 3, 5].into_iter().map(i256::from_i128));
         let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
         let array2 = DictionaryArray::new(keys, Arc::new(values));
 
@@ -4607,9 +4384,7 @@ mod tests {
 
     #[test]
     fn test_decimal128_scalar() {
-        let a = Decimal128Array::from(
-            vec![Some(1), Some(2), Some(3), None, Some(4), Some(5)],
-        );
+        let a = Decimal128Array::from(vec![Some(1), Some(2), Some(3), None, Some(4), Some(5)]);
         let b = 3_i128;
         // array eq scalar
         let e = BooleanArray::from(
@@ -4668,12 +4443,8 @@ mod tests {
 
     #[test]
     fn test_decimal256() {
-        let a = Decimal256Array::from_iter_values(
-            [1, 2, 4, 5].into_iter().map(i256::from_i128),
-        );
-        let b = Decimal256Array::from_iter_values(
-            [7, -3, 4, 3].into_iter().map(i256::from_i128),
-        );
+        let a = Decimal256Array::from_iter_values([1, 2, 4, 5].into_iter().map(i256::from_i128));
+        let b = Decimal256Array::from_iter_values([7, -3, 4, 3].into_iter().map(i256::from_i128));
         let e = BooleanArray::from(vec![false, false, true, false]);
         let r = eq(&a, &b).unwrap();
         assert_eq!(e, r);
@@ -4712,9 +4483,7 @@ mod tests {
 
     #[test]
     fn test_decimal256_scalar_i128() {
-        let a = Decimal256Array::from_iter_values(
-            [1, 2, 3, 4, 5].into_iter().map(i256::from_i128),
-        );
+        let a = Decimal256Array::from_iter_values([1, 2, 3, 4, 5].into_iter().map(i256::from_i128));
         let b = i256::from_i128(3);
         // array eq scalar
         let e = BooleanArray::from(
@@ -4726,45 +4495,40 @@ mod tests {
         assert_eq!(e, r);
 
         // array neq scalar
-        let e = BooleanArray::from(
-            vec![Some(true), Some(true), Some(false), Some(true), Some(true)],
-        );
+        let e =
+            BooleanArray::from(vec![Some(true), Some(true), Some(false), Some(true), Some(true)]);
         let r = neq_scalar(&a, b).unwrap();
         assert_eq!(e, r);
         let r = neq_dyn_scalar(&a, b).unwrap();
         assert_eq!(e, r);
 
         // array lt scalar
-        let e = BooleanArray::from(
-            vec![Some(true), Some(true), Some(false), Some(false), Some(false)],
-        );
+        let e =
+            BooleanArray::from(vec![Some(true), Some(true), Some(false), Some(false), Some(false)]);
         let r = lt_scalar(&a, b).unwrap();
         assert_eq!(e, r);
         let r = lt_dyn_scalar(&a, b).unwrap();
         assert_eq!(e, r);
 
         // array lt_eq scalar
-        let e = BooleanArray::from(
-            vec![Some(true), Some(true), Some(true), Some(false), Some(false)],
-        );
+        let e =
+            BooleanArray::from(vec![Some(true), Some(true), Some(true), Some(false), Some(false)]);
         let r = lt_eq_scalar(&a, b).unwrap();
         assert_eq!(e, r);
         let r = lt_eq_dyn_scalar(&a, b).unwrap();
         assert_eq!(e, r);
 
         // array gt scalar
-        let e = BooleanArray::from(
-            vec![Some(false), Some(false), Some(false), Some(true), Some(true)],
-        );
+        let e =
+            BooleanArray::from(vec![Some(false), Some(false), Some(false), Some(true), Some(true)]);
         let r = gt_scalar(&a, b).unwrap();
         assert_eq!(e, r);
         let r = gt_dyn_scalar(&a, b).unwrap();
         assert_eq!(e, r);
 
         // array gt_eq scalar
-        let e = BooleanArray::from(
-            vec![Some(false), Some(false), Some(true), Some(true), Some(true)],
-        );
+        let e =
+            BooleanArray::from(vec![Some(false), Some(false), Some(true), Some(true), Some(true)]);
         let r = gt_eq_scalar(&a, b).unwrap();
         assert_eq!(e, r);
         let r = gt_eq_dyn_scalar(&a, b).unwrap();
@@ -4773,9 +4537,7 @@ mod tests {
 
     #[test]
     fn test_decimal256_scalar_i256() {
-        let a = Decimal256Array::from_iter_values(
-            [1, 2, 3, 4, 5].into_iter().map(i256::from_i128),
-        );
+        let a = Decimal256Array::from_iter_values([1, 2, 3, 4, 5].into_iter().map(i256::from_i128));
         let b = i256::MAX;
         // array eq scalar
         let e = BooleanArray::from(
@@ -4787,27 +4549,24 @@ mod tests {
         assert!(r);
 
         // array neq scalar
-        let e = BooleanArray::from(
-            vec![Some(true), Some(true), Some(true), Some(true), Some(true)],
-        );
+        let e =
+            BooleanArray::from(vec![Some(true), Some(true), Some(true), Some(true), Some(true)]);
         let r = neq_scalar(&a, b).unwrap();
         assert_eq!(e, r);
         let r = neq_dyn_scalar(&a, b).is_err();
         assert!(r);
 
         // array lt scalar
-        let e = BooleanArray::from(
-            vec![Some(true), Some(true), Some(true), Some(true), Some(true)],
-        );
+        let e =
+            BooleanArray::from(vec![Some(true), Some(true), Some(true), Some(true), Some(true)]);
         let r = lt_scalar(&a, b).unwrap();
         assert_eq!(e, r);
         let r = lt_dyn_scalar(&a, b).is_err();
         assert!(r);
 
         // array lt_eq scalar
-        let e = BooleanArray::from(
-            vec![Some(true), Some(true), Some(true), Some(true), Some(true)],
-        );
+        let e =
+            BooleanArray::from(vec![Some(true), Some(true), Some(true), Some(true), Some(true)]);
         let r = lt_eq_scalar(&a, b).unwrap();
         assert_eq!(e, r);
         let r = lt_eq_dyn_scalar(&a, b).is_err();
diff --git a/arrow-ord/src/lib.rs b/arrow-ord/src/lib.rs
index 19ad8229417f..8fe4ecbc05aa 100644
--- a/arrow-ord/src/lib.rs
+++ b/arrow-ord/src/lib.rs
@@ -48,4 +48,5 @@ pub mod cmp;
 pub mod comparison;
 pub mod ord;
 pub mod partition;
+pub mod rank;
 pub mod sort;
diff --git a/arrow-ord/src/ord.rs b/arrow-ord/src/ord.rs
index 4d6e3bde9152..28ca07cce260 100644
--- a/arrow-ord/src/ord.rs
+++ b/arrow-ord/src/ord.rs
@@ -27,10 +27,7 @@ use std::cmp::Ordering;
 /// Compare the values at two arbitrary indices in two arrays.
 pub type DynComparator = Box<dyn Fn(usize, usize) -> Ordering + Send + Sync>;
 
-fn compare_primitive<T: ArrowPrimitiveType>(
-    left: &dyn Array,
-    right: &dyn Array,
-) -> DynComparator
+fn compare_primitive<T: ArrowPrimitiveType>(left: &dyn Array, right: &dyn Array) -> DynComparator
 where
     T::Native: ArrowNativeTypeOp,
 {
@@ -94,10 +91,7 @@ fn compare_dict<K: ArrowDictionaryKeyType>(
 /// ```
 // This is a factory of comparisons.
 // The lifetime 'a enforces that we cannot use the closure beyond any of the array's lifetime.
-pub fn build_compare(
-    left: &dyn Array,
-    right: &dyn Array,
-) -> Result<DynComparator, ArrowError> {
+pub fn build_compare(left: &dyn Array, right: &dyn Array) -> Result<DynComparator, ArrowError> {
     use arrow_schema::DataType::*;
     macro_rules! primitive_helper {
         ($t:ty, $left:expr, $right:expr) => {
diff --git a/arrow-ord/src/partition.rs b/arrow-ord/src/partition.rs
index 80b25ee2afba..8c87eefadbf0 100644
--- a/arrow-ord/src/partition.rs
+++ b/arrow-ord/src/partition.rs
@@ -78,7 +78,7 @@ impl Partitions {
 /// # Example:
 ///
 /// For example, given columns `x`, `y` and `z`, calling
-/// `lexicographical_partition_ranges(values, (x, y))` will divide the
+/// [`partition`]`(values, (x, y))` will divide the
 /// rows into ranges where the values of `(x, y)` are equal:
 ///
 /// ```text
@@ -160,8 +160,9 @@ fn find_boundaries(v: &dyn Array) -> Result<BooleanBuffer, ArrowError> {
     Ok(distinct(&v1, &v2)?.values().clone())
 }
 
-/// Given a list of already sorted columns, find partition ranges that would partition
-/// lexicographically equal values across columns.
+/// Use [`partition`] instead. Given a list of already sorted columns, find
+/// partition ranges that would partition lexicographically equal values across
+/// columns.
 ///
 /// The returned vec would be of size k where k is cardinality of the sorted values; Consecutive
 /// values will be connected: (a, b) and (b, c), where start = 0 and end = n for the first and last
@@ -300,9 +301,7 @@ mod tests {
             Arc::new(Int64Array::new(vec![1; 9].into(), None)) as _,
             Arc::new(Int64Array::new(
                 vec![1, 1, 2, 2, 2, 3, 3, 3, 3].into(),
-                Some(
-                    vec![false, true, true, true, true, false, false, true, false].into(),
-                ),
+                Some(vec![false, true, true, true, true, false, false, true, false].into()),
             )) as _,
             Arc::new(Int64Array::new(
                 vec![1, 1, 2, 2, 2, 2, 2, 3, 7].into(),
diff --git a/arrow-ord/src/rank.rs b/arrow-ord/src/rank.rs
new file mode 100644
index 000000000000..51b0b5b91ba9
--- /dev/null
+++ b/arrow-ord/src/rank.rs
@@ -0,0 +1,189 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::cast::AsArray;
+use arrow_array::types::*;
+use arrow_array::{downcast_primitive_array, Array, ArrowNativeTypeOp, GenericByteArray};
+use arrow_buffer::NullBuffer;
+use arrow_schema::{ArrowError, DataType, SortOptions};
+use std::cmp::Ordering;
+
+/// Assigns a rank to each value in `array` based on its position in the sorted order
+///
+/// Where values are equal, they will be assigned the highest of their ranks,
+/// leaving gaps in the overall rank assignment
+///
+/// ```
+/// # use arrow_array::StringArray;
+/// # use arrow_ord::rank::rank;
+/// let array = StringArray::from(vec![Some("foo"), None, Some("foo"), None, Some("bar")]);
+/// let ranks = rank(&array, None).unwrap();
+/// assert_eq!(ranks, &[5, 2, 5, 2, 3]);
+/// ```
+pub fn rank(array: &dyn Array, options: Option<SortOptions>) -> Result<Vec<u32>, ArrowError> {
+    let options = options.unwrap_or_default();
+    let ranks = downcast_primitive_array! {
+        array => primitive_rank(array.values(), array.nulls(), options),
+        DataType::Utf8 => bytes_rank(array.as_bytes::<Utf8Type>(), options),
+        DataType::LargeUtf8 => bytes_rank(array.as_bytes::<LargeUtf8Type>(), options),
+        DataType::Binary => bytes_rank(array.as_bytes::<BinaryType>(), options),
+        DataType::LargeBinary => bytes_rank(array.as_bytes::<LargeBinaryType>(), options),
+        d => return Err(ArrowError::ComputeError(format!("{d:?} not supported in rank")))
+    };
+    Ok(ranks)
+}
+
+#[inline(never)]
+fn primitive_rank<T: ArrowNativeTypeOp>(
+    values: &[T],
+    nulls: Option<&NullBuffer>,
+    options: SortOptions,
+) -> Vec<u32> {
+    let len: u32 = values.len().try_into().unwrap();
+    let to_sort = match nulls.filter(|n| n.null_count() > 0) {
+        Some(n) => n
+            .valid_indices()
+            .map(|idx| (values[idx], idx as u32))
+            .collect(),
+        None => values.iter().copied().zip(0..len).collect(),
+    };
+    rank_impl(values.len(), to_sort, options, T::compare, T::is_eq)
+}
+
+#[inline(never)]
+fn bytes_rank<T: ByteArrayType>(array: &GenericByteArray<T>, options: SortOptions) -> Vec<u32> {
+    let to_sort: Vec<(&[u8], u32)> = match array.nulls().filter(|n| n.null_count() > 0) {
+        Some(n) => n
+            .valid_indices()
+            .map(|idx| (array.value(idx).as_ref(), idx as u32))
+            .collect(),
+        None => (0..array.len())
+            .map(|idx| (array.value(idx).as_ref(), idx as u32))
+            .collect(),
+    };
+    rank_impl(array.len(), to_sort, options, Ord::cmp, PartialEq::eq)
+}
+
+fn rank_impl<T, C, E>(
+    len: usize,
+    mut valid: Vec<(T, u32)>,
+    options: SortOptions,
+    compare: C,
+    eq: E,
+) -> Vec<u32>
+where
+    T: Copy,
+    C: Fn(T, T) -> Ordering,
+    E: Fn(T, T) -> bool,
+{
+    // We can use an unstable sort as we combine equal values later
+    valid.sort_unstable_by(|a, b| compare(a.0, b.0));
+    if options.descending {
+        valid.reverse();
+    }
+
+    let (mut valid_rank, null_rank) = match options.nulls_first {
+        true => (len as u32, (len - valid.len()) as u32),
+        false => (valid.len() as u32, len as u32),
+    };
+
+    let mut out: Vec<_> = vec![null_rank; len];
+    if let Some(v) = valid.last() {
+        out[v.1 as usize] = valid_rank;
+    }
+
+    let mut count = 1; // Number of values in rank
+    for w in valid.windows(2).rev() {
+        match eq(w[0].0, w[1].0) {
+            true => {
+                count += 1;
+                out[w[0].1 as usize] = valid_rank;
+            }
+            false => {
+                valid_rank -= count;
+                count = 1;
+                out[w[0].1 as usize] = valid_rank
+            }
+        }
+    }
+
+    out
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use arrow_array::*;
+
+    #[test]
+    fn test_primitive() {
+        let descending = SortOptions {
+            descending: true,
+            nulls_first: true,
+        };
+
+        let nulls_last = SortOptions {
+            descending: false,
+            nulls_first: false,
+        };
+
+        let nulls_last_descending = SortOptions {
+            descending: true,
+            nulls_first: false,
+        };
+
+        let a = Int32Array::from(vec![Some(1), Some(1), None, Some(3), Some(3), Some(4)]);
+        let res = rank(&a, None).unwrap();
+        assert_eq!(res, &[3, 3, 1, 5, 5, 6]);
+
+        let res = rank(&a, Some(descending)).unwrap();
+        assert_eq!(res, &[6, 6, 1, 4, 4, 2]);
+
+        let res = rank(&a, Some(nulls_last)).unwrap();
+        assert_eq!(res, &[2, 2, 6, 4, 4, 5]);
+
+        let res = rank(&a, Some(nulls_last_descending)).unwrap();
+        assert_eq!(res, &[5, 5, 6, 3, 3, 1]);
+
+        // Test with non-zero null values
+        let nulls = NullBuffer::from(vec![true, true, false, true, false, false]);
+        let a = Int32Array::new(vec![1, 4, 3, 4, 5, 5].into(), Some(nulls));
+        let res = rank(&a, None).unwrap();
+        assert_eq!(res, &[4, 6, 3, 6, 3, 3]);
+    }
+
+    #[test]
+    fn test_bytes() {
+        let v = vec!["foo", "fo", "bar", "bar"];
+        let values = StringArray::from(v.clone());
+        let res = rank(&values, None).unwrap();
+        assert_eq!(res, &[4, 3, 2, 2]);
+
+        let values = LargeStringArray::from(v.clone());
+        let res = rank(&values, None).unwrap();
+        assert_eq!(res, &[4, 3, 2, 2]);
+
+        let v: Vec<&[u8]> = vec![&[1, 2], &[0], &[1, 2, 3], &[1, 2]];
+        let values = LargeBinaryArray::from(v.clone());
+        let res = rank(&values, None).unwrap();
+        assert_eq!(res, &[3, 1, 4, 3]);
+
+        let values = BinaryArray::from(v);
+        let res = rank(&values, None).unwrap();
+        assert_eq!(res, &[3, 1, 4, 3]);
+    }
+}
diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
index 6c8c3b8facef..7d749da51327 100644
--- a/arrow-ord/src/sort.rs
+++ b/arrow-ord/src/sort.rs
@@ -30,6 +30,7 @@ use arrow_select::take::take;
 use std::cmp::Ordering;
 use std::sync::Arc;
 
+use crate::rank::rank;
 pub use arrow_schema::SortOptions;
 
 /// Sort the `ArrayRef` using `SortOptions`.
@@ -53,10 +54,7 @@ pub use arrow_schema::SortOptions;
 /// let sorted_array = sort(&array, None).unwrap();
 /// assert_eq!(sorted_array.as_ref(), &Int32Array::from(vec![1, 2, 3, 4, 5]));
 /// ```
-pub fn sort(
-    values: &dyn Array,
-    options: Option<SortOptions>,
-) -> Result<ArrayRef, ArrowError> {
+pub fn sort(values: &dyn Array, options: Option<SortOptions>) -> Result<ArrayRef, ArrowError> {
     downcast_primitive_array!(
         values => sort_native_type(values, options),
         DataType::RunEndEncoded(_, _) => sort_run(values, options, None),
@@ -193,8 +191,7 @@ fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
 }
 
 /// Sort elements from `ArrayRef` into an unsigned integer (`UInt32Array`) of indices.
-/// For floating point arrays any NaN values are considered to be greater than any other non-null value.
-/// `limit` is an option for [partial_sort].
+/// Floats are sorted using IEEE 754 totalOrder.  `limit` is an option for [partial_sort].
 pub fn sort_to_indices(
     array: &dyn Array,
     options: Option<SortOptions>,
@@ -400,14 +397,7 @@ fn child_rank(values: &dyn Array, options: SortOptions) -> Result<Vec<u32>, Arro
         descending: false,
         nulls_first: options.nulls_first != options.descending,
     });
-
-    let sorted_value_indices = sort_to_indices(values, value_options, None)?;
-    let sorted_indices = sorted_value_indices.values();
-    let mut out: Vec<_> = vec![0_u32; sorted_indices.len()];
-    for (ix, val) in sorted_indices.iter().enumerate() {
-        out[*val as usize] = ix as u32;
-    }
-    Ok(out)
+    rank(values, value_options)
 }
 
 // Sort run array and return sorted run array.
@@ -459,8 +449,7 @@ fn sort_run_downcasted<R: RunEndIndexType>(
         new_run_ends_builder.append(R::Native::from_usize(new_run_end).unwrap());
     };
 
-    let (values_indices, run_values) =
-        sort_run_inner(run_array, options, output_len, consume_runs);
+    let (values_indices, run_values) = sort_run_inner(run_array, options, output_len, consume_runs);
 
     let new_run_ends = unsafe {
         // Safety:
@@ -562,8 +551,7 @@ where
             // and len, both of which are within bounds of run_array
             if physical_index == start_physical_index {
                 (
-                    run_ends.get_unchecked(physical_index).as_usize()
-                        - run_array.offset(),
+                    run_ends.get_unchecked(physical_index).as_usize() - run_array.offset(),
                     0,
                 )
             } else if physical_index == end_physical_index {
@@ -652,10 +640,7 @@ pub struct SortColumn {
 /// Note: for multi-column sorts without a limit, using the [row format](https://docs.rs/arrow-row/latest/arrow_row/)
 /// may be significantly faster
 ///
-pub fn lexsort(
-    columns: &[SortColumn],
-    limit: Option<usize>,
-) -> Result<Vec<ArrayRef>, ArrowError> {
+pub fn lexsort(columns: &[SortColumn], limit: Option<usize>) -> Result<Vec<ArrayRef>, ArrowError> {
     let indices = lexsort_to_indices(columns, limit)?;
     columns
         .iter()
@@ -778,9 +763,7 @@ impl LexicographicalComparator {
 
     /// Create a new lex comparator that will wrap the given sort columns and give comparison
     /// results with two indices.
-    pub fn try_new(
-        columns: &[SortColumn],
-    ) -> Result<LexicographicalComparator, ArrowError> {
+    pub fn try_new(columns: &[SortColumn]) -> Result<LexicographicalComparator, ArrowError> {
         let compare_items = columns
             .iter()
             .map(|column| {
@@ -800,7 +783,9 @@ impl LexicographicalComparator {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use arrow_array::builder::PrimitiveRunBuilder;
+    use arrow_array::builder::{
+        FixedSizeListBuilder, Int64Builder, ListBuilder, PrimitiveRunBuilder,
+    };
     use arrow_buffer::i256;
     use half::f16;
     use rand::rngs::StdRng;
@@ -830,8 +815,7 @@ mod tests {
     ) {
         let output = create_decimal128_array(data);
         let expected = UInt32Array::from(expected_data);
-        let output =
-            sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
+        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
         assert_eq!(output, expected)
     }
 
@@ -843,8 +827,7 @@ mod tests {
     ) {
         let output = create_decimal256_array(data);
         let expected = UInt32Array::from(expected_data);
-        let output =
-            sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
+        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
         assert_eq!(output, expected)
     }
 
@@ -857,9 +840,7 @@ mod tests {
         let output = create_decimal128_array(data);
         let expected = Arc::new(create_decimal128_array(expected_data)) as ArrayRef;
         let output = match limit {
-            Some(_) => {
-                sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap()
-            }
+            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
             _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
         };
         assert_eq!(&output, &expected)
@@ -874,9 +855,7 @@ mod tests {
         let output = create_decimal256_array(data);
         let expected = Arc::new(create_decimal256_array(expected_data)) as ArrayRef;
         let output = match limit {
-            Some(_) => {
-                sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap()
-            }
+            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
             _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
         };
         assert_eq!(&output, &expected)
@@ -890,8 +869,7 @@ mod tests {
     ) {
         let output = BooleanArray::from(data);
         let expected = UInt32Array::from(expected_data);
-        let output =
-            sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
+        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
         assert_eq!(output, expected)
     }
 
@@ -906,8 +884,7 @@ mod tests {
     {
         let output = PrimitiveArray::<T>::from(data);
         let expected = UInt32Array::from(expected_data);
-        let output =
-            sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
+        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
         assert_eq!(output, expected)
     }
 
@@ -923,9 +900,7 @@ mod tests {
         let output = PrimitiveArray::<T>::from(data);
         let expected = Arc::new(PrimitiveArray::<T>::from(expected_data)) as ArrayRef;
         let output = match limit {
-            Some(_) => {
-                sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap()
-            }
+            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
             _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
         };
         assert_eq!(&output, &expected)
@@ -939,8 +914,7 @@ mod tests {
     ) {
         let output = StringArray::from(data);
         let expected = UInt32Array::from(expected_data);
-        let output =
-            sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
+        let output = sort_to_indices(&(Arc::new(output) as ArrayRef), options, limit).unwrap();
         assert_eq!(output, expected)
     }
 
@@ -954,9 +928,7 @@ mod tests {
         let output = StringArray::from(data.clone());
         let expected = Arc::new(StringArray::from(expected_data.clone())) as ArrayRef;
         let output = match limit {
-            Some(_) => {
-                sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap()
-            }
+            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
             _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
         };
         assert_eq!(&output, &expected);
@@ -964,9 +936,7 @@ mod tests {
         let output = LargeStringArray::from(data);
         let expected = Arc::new(LargeStringArray::from(expected_data)) as ArrayRef;
         let output = match limit {
-            Some(_) => {
-                sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap()
-            }
+            Some(_) => sort_limit(&(Arc::new(output) as ArrayRef), options, limit).unwrap(),
             _ => sort(&(Arc::new(output) as ArrayRef), options).unwrap(),
         };
         assert_eq!(&output, &expected)
@@ -986,9 +956,7 @@ mod tests {
             .expect("Unable to get dictionary values");
 
         let sorted = match limit {
-            Some(_) => {
-                sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap()
-            }
+            Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
             _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
         };
         let sorted = sorted
@@ -1016,8 +984,7 @@ mod tests {
                 .collect::<Vec<Option<&str>>>(),
         )
         .expect("Unable to create string array from dictionary");
-        let expected =
-            StringArray::try_from(expected_data).expect("Unable to create string array");
+        let expected = StringArray::try_from(expected_data).expect("Unable to create string array");
 
         assert_eq!(sorted_strings, expected)
     }
@@ -1036,9 +1003,7 @@ mod tests {
         let dict = array_values.as_primitive::<T>();
 
         let sorted = match limit {
-            Some(_) => {
-                sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap()
-            }
+            Some(_) => sort_limit(&(Arc::new(array) as ArrayRef), options, limit).unwrap(),
             _ => sort(&(Arc::new(array) as ArrayRef), options).unwrap(),
         };
         let sorted = sorted
@@ -1066,8 +1031,7 @@ mod tests {
                 })
                 .collect::<Vec<Option<T::Native>>>(),
         );
-        let expected: PrimitiveArray<T> =
-            From::<Vec<Option<T::Native>>>::from(expected_data);
+        let expected: PrimitiveArray<T> = From::<Vec<Option<T::Native>>>::from(expected_data);
 
         assert_eq!(sorted_values, expected)
     }
@@ -1138,11 +1102,7 @@ mod tests {
     }
 
     /// slice all arrays in expected_output to offset/length
-    fn slice_arrays(
-        expected_output: Vec<ArrayRef>,
-        offset: usize,
-        length: usize,
-    ) -> Vec<ArrayRef> {
+    fn slice_arrays(expected_output: Vec<ArrayRef>, offset: usize, length: usize) -> Vec<ArrayRef> {
         expected_output
             .into_iter()
             .map(|array| array.slice(offset, length))
@@ -1159,11 +1119,8 @@ mod tests {
         // Fixed size binary array
         if let Some(length) = fixed_length {
             let input = Arc::new(
-                FixedSizeBinaryArray::try_from_sparse_iter_with_size(
-                    data.iter().cloned(),
-                    length,
-                )
-                .unwrap(),
+                FixedSizeBinaryArray::try_from_sparse_iter_with_size(data.iter().cloned(), length)
+                    .unwrap(),
             );
             let sorted = match limit {
                 Some(_) => sort_limit(&(input as ArrayRef), options, limit).unwrap(),
@@ -1672,12 +1629,7 @@ mod tests {
         ];
 
         // decimal default
-        test_sort_to_indices_decimal256_array(
-            data.clone(),
-            None,
-            None,
-            vec![0, 6, 4, 2, 3, 5, 1],
-        );
+        test_sort_to_indices_decimal256_array(data.clone(), None, None, vec![0, 6, 4, 2, 3, 5, 1]);
         // decimal descending
         test_sort_to_indices_decimal256_array(
             data.clone(),
@@ -2669,9 +2621,7 @@ mod tests {
 
     #[test]
     fn test_sort_run_to_run() {
-        test_sort_run_inner(|array, sort_options, limit| {
-            sort_run(array, sort_options, limit)
-        });
+        test_sort_run_inner(|array, sort_options, limit| sort_run(array, sort_options, limit));
     }
 
     #[test]
@@ -2684,16 +2634,11 @@ mod tests {
 
     fn test_sort_run_inner<F>(sort_fn: F)
     where
-        F: Fn(
-            &dyn Array,
-            Option<SortOptions>,
-            Option<usize>,
-        ) -> Result<ArrayRef, ArrowError>,
+        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
     {
         // Create an input array for testing
         let total_len = 80;
-        let vals: Vec<Option<i32>> =
-            vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
+        let vals: Vec<Option<i32>> = vec![Some(1), None, Some(2), Some(3), Some(4), None, Some(5)];
         let repeats: Vec<usize> = vec![1, 3, 2, 4];
         let mut input_array: Vec<Option<i32>> = Vec::with_capacity(total_len);
         for ix in 0_usize..32 {
@@ -2760,11 +2705,7 @@ mod tests {
         limit: Option<usize>,
         sort_fn: &F,
     ) where
-        F: Fn(
-            &dyn Array,
-            Option<SortOptions>,
-            Option<usize>,
-        ) -> Result<ArrayRef, ArrowError>,
+        F: Fn(&dyn Array, Option<SortOptions>, Option<usize>) -> Result<ArrayRef, ArrowError>,
     {
         // Run the sort and build actual result
         let sliced_array = run_array.slice(offset, length);
@@ -3653,11 +3594,7 @@ mod tests {
             ])) as ArrayRef,
         ];
         test_lex_sort_arrays(input.clone(), expected.clone(), None);
-        test_lex_sort_arrays(
-            input.clone(),
-            slice_arrays(expected.clone(), 0, 5),
-            Some(5),
-        );
+        test_lex_sort_arrays(input.clone(), slice_arrays(expected.clone(), 0, 5), Some(5));
 
         // Limiting by more rows than present is ok
         test_lex_sort_arrays(input, slice_arrays(expected, 0, 5), Some(10));
@@ -3692,8 +3629,7 @@ mod tests {
 
     #[test]
     fn test_sort_int8_dicts() {
-        let keys =
-            Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
+        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
         let values = Int8Array::from(vec![1, 3, 5]);
         test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
             keys,
@@ -3703,8 +3639,7 @@ mod tests {
             vec![None, None, Some(1), Some(3), Some(5), Some(5)],
         );
 
-        let keys =
-            Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
+        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
         let values = Int8Array::from(vec![1, 3, 5]);
         test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
             keys,
@@ -3717,8 +3652,7 @@ mod tests {
             vec![Some(5), Some(5), Some(3), Some(1), None, None],
         );
 
-        let keys =
-            Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
+        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
         let values = Int8Array::from(vec![1, 3, 5]);
         test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
             keys,
@@ -3731,8 +3665,7 @@ mod tests {
             vec![Some(1), Some(3), Some(5), Some(5), None, None],
         );
 
-        let keys =
-            Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
+        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
         let values = Int8Array::from(vec![1, 3, 5]);
         test_sort_primitive_dict_arrays::<Int8Type, Int8Type>(
             keys,
@@ -3830,8 +3763,7 @@ mod tests {
 
     #[test]
     fn test_sort_f32_dicts() {
-        let keys =
-            Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
+        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
         let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
         test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
             keys,
@@ -3841,8 +3773,7 @@ mod tests {
             vec![None, None, Some(1.2), Some(3.0), Some(5.1), Some(5.1)],
         );
 
-        let keys =
-            Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
+        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
         let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
         test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
             keys,
@@ -3855,8 +3786,7 @@ mod tests {
             vec![Some(5.1), Some(5.1), Some(3.0), Some(1.2), None, None],
         );
 
-        let keys =
-            Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
+        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
         let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
         test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
             keys,
@@ -3869,8 +3799,7 @@ mod tests {
             vec![Some(1.2), Some(3.0), Some(5.1), Some(5.1), None, None],
         );
 
-        let keys =
-            Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
+        let keys = Int8Array::from(vec![Some(1_i8), None, Some(2), None, Some(2), Some(0)]);
         let values = Float32Array::from(vec![1.2, 3.0, 5.1]);
         test_sort_primitive_dict_arrays::<Int8Type, Float32Type>(
             keys,
@@ -3991,4 +3920,31 @@ mod tests {
         // NULL.cmp(4)
         assert_eq!(comparator.compare(2, 3), Ordering::Less);
     }
+
+    #[test]
+    fn sort_list_equal() {
+        let a = {
+            let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), 2);
+            for value in [[1, 5], [0, 3], [1, 3]] {
+                builder.values().append_slice(&value);
+                builder.append(true);
+            }
+            builder.finish()
+        };
+
+        let sort_indices = sort_to_indices(&a, None, None).unwrap();
+        assert_eq!(sort_indices.values(), &[1, 2, 0]);
+
+        let a = {
+            let mut builder = ListBuilder::new(Int64Builder::new());
+            for value in [[1, 5], [0, 3], [1, 3]] {
+                builder.values().append_slice(&value);
+                builder.append(true);
+            }
+            builder.finish()
+        };
+
+        let sort_indices = sort_to_indices(&a, None, None).unwrap();
+        assert_eq!(sort_indices.values(), &[1, 2, 0]);
+    }
 }
diff --git a/arrow-pyarrow-integration-testing/Cargo.toml b/arrow-pyarrow-integration-testing/Cargo.toml
index 50987b03ca9e..8c60c086c29a 100644
--- a/arrow-pyarrow-integration-testing/Cargo.toml
+++ b/arrow-pyarrow-integration-testing/Cargo.toml
@@ -34,4 +34,4 @@ crate-type = ["cdylib"]
 
 [dependencies]
 arrow = { path = "../arrow", features = ["pyarrow"] }
-pyo3 = { version = "0.19", features = ["extension-module"] }
+pyo3 = { version = "0.20", features = ["extension-module"] }
diff --git a/arrow-pyarrow-integration-testing/README.md b/arrow-pyarrow-integration-testing/README.md
index e63953ad7900..5ca2ea76b88c 100644
--- a/arrow-pyarrow-integration-testing/README.md
+++ b/arrow-pyarrow-integration-testing/README.md
@@ -25,6 +25,7 @@ Note that this crate uses two languages and an external ABI:
 * `Rust`
 * `Python`
 * C ABI privately exposed by `Pyarrow`.
+* PyCapsule ABI publicly exposed by `pyarrow`
 
 ## Basic idea
 
@@ -36,6 +37,7 @@ we can use pyarrow's interface to move pointers from and to Rust.
 ## Relevant literature
 
 * [Arrow's CDataInterface](https://arrow.apache.org/docs/format/CDataInterface.html)
+* [Arrow PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html)
 * [Rust's FFI](https://doc.rust-lang.org/nomicon/ffi.html)
 * [Pyarrow private binds](https://github.com/apache/arrow/blob/ae1d24efcc3f1ac2a876d8d9f544a34eb04ae874/python/pyarrow/array.pxi#L1226)
 * [PyO3](https://docs.rs/pyo3/0.12.1/pyo3/index.html)
diff --git a/arrow-pyarrow-integration-testing/pyproject.toml b/arrow-pyarrow-integration-testing/pyproject.toml
index d75f8de1ac4c..d85db24c2e18 100644
--- a/arrow-pyarrow-integration-testing/pyproject.toml
+++ b/arrow-pyarrow-integration-testing/pyproject.toml
@@ -16,7 +16,7 @@
 # under the License.
 
 [build-system]
-requires = ["maturin"]
+requires = ["maturin>=1.0,<2.0"]
 build-backend = "maturin"
 
 dependencies = ["pyarrow>=1"]
diff --git a/arrow-pyarrow-integration-testing/src/lib.rs b/arrow-pyarrow-integration-testing/src/lib.rs
index adcec769f247..a53447b53c31 100644
--- a/arrow-pyarrow-integration-testing/src/lib.rs
+++ b/arrow-pyarrow-integration-testing/src/lib.rs
@@ -21,6 +21,7 @@
 use std::sync::Arc;
 
 use arrow::array::new_empty_array;
+use arrow::record_batch::{RecordBatchIterator, RecordBatchReader};
 use pyo3::exceptions::PyValueError;
 use pyo3::prelude::*;
 use pyo3::wrap_pyfunction;
@@ -152,6 +153,20 @@ fn reader_return_errors(obj: PyArrowType<ArrowArrayStreamReader>) -> PyResult<()
     }
 }
 
+#[pyfunction]
+fn boxed_reader_roundtrip(
+    obj: PyArrowType<ArrowArrayStreamReader>,
+) -> PyArrowType<Box<dyn RecordBatchReader + Send>> {
+    let schema = obj.0.schema();
+    let batches = obj
+        .0
+        .collect::<Result<Vec<RecordBatch>, ArrowError>>()
+        .unwrap();
+    let reader = RecordBatchIterator::new(batches.into_iter().map(Ok), schema);
+    let reader: Box<dyn RecordBatchReader + Send> = Box::new(reader);
+    PyArrowType(reader)
+}
+
 #[pymodule]
 fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) -> PyResult<()> {
     m.add_wrapped(wrap_pyfunction!(double))?;
@@ -166,5 +181,6 @@ fn arrow_pyarrow_integration_testing(_py: Python, m: &PyModule) -> PyResult<()>
     m.add_wrapped(wrap_pyfunction!(round_trip_record_batch))?;
     m.add_wrapped(wrap_pyfunction!(round_trip_record_batch_reader))?;
     m.add_wrapped(wrap_pyfunction!(reader_return_errors))?;
+    m.add_wrapped(wrap_pyfunction!(boxed_reader_roundtrip))?;
     Ok(())
 }
diff --git a/arrow-pyarrow-integration-testing/tests/test_sql.py b/arrow-pyarrow-integration-testing/tests/test_sql.py
index e2e8d66c0f29..16d4e0f12f88 100644
--- a/arrow-pyarrow-integration-testing/tests/test_sql.py
+++ b/arrow-pyarrow-integration-testing/tests/test_sql.py
@@ -27,6 +27,8 @@
 
 import arrow_pyarrow_integration_testing as rust
 
+PYARROW_PRE_14 = int(pa.__version__.split('.')[0]) < 14
+
 
 @contextlib.contextmanager
 def no_pyarrow_leak():
@@ -113,6 +115,34 @@ def assert_pyarrow_leak():
 _unsupported_pyarrow_types = [
 ]
 
+# As of pyarrow 14, pyarrow implements the Arrow PyCapsule interface
+# (https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html).
+# This defines that Arrow consumers should allow any object that has specific "dunder"
+# methods, `__arrow_c_*_`. These wrapper classes ensure that arrow-rs is able to handle
+# _any_ class, without pyarrow-specific handling.
+class SchemaWrapper:
+    def __init__(self, schema):
+        self.schema = schema
+
+    def __arrow_c_schema__(self):
+        return self.schema.__arrow_c_schema__()
+
+
+class ArrayWrapper:
+    def __init__(self, array):
+        self.array = array
+
+    def __arrow_c_array__(self):
+        return self.array.__arrow_c_array__()
+
+
+class StreamWrapper:
+    def __init__(self, stream):
+        self.stream = stream
+
+    def __arrow_c_stream__(self):
+        return self.stream.__arrow_c_stream__()
+
 
 @pytest.mark.parametrize("pyarrow_type", _supported_pyarrow_types, ids=str)
 def test_type_roundtrip(pyarrow_type):
@@ -120,6 +150,14 @@ def test_type_roundtrip(pyarrow_type):
     assert restored == pyarrow_type
     assert restored is not pyarrow_type
 
+@pytest.mark.skipif(PYARROW_PRE_14, reason="requires pyarrow 14")
+@pytest.mark.parametrize("pyarrow_type", _supported_pyarrow_types, ids=str)
+def test_type_roundtrip_pycapsule(pyarrow_type):
+    wrapped = SchemaWrapper(pyarrow_type)
+    restored = rust.round_trip_type(wrapped)
+    assert restored == pyarrow_type
+    assert restored is not pyarrow_type
+
 
 @pytest.mark.parametrize("pyarrow_type", _unsupported_pyarrow_types, ids=str)
 def test_type_roundtrip_raises(pyarrow_type):
@@ -138,6 +176,20 @@ def test_field_roundtrip(pyarrow_type):
         field = rust.round_trip_field(pyarrow_field)
         assert field == pyarrow_field
 
+@pytest.mark.skipif(PYARROW_PRE_14, reason="requires pyarrow 14")
+@pytest.mark.parametrize('pyarrow_type', _supported_pyarrow_types, ids=str)
+def test_field_roundtrip_pycapsule(pyarrow_type):
+    pyarrow_field = pa.field("test", pyarrow_type, nullable=True)
+    wrapped = SchemaWrapper(pyarrow_field)
+    field = rust.round_trip_field(wrapped)
+    assert field == wrapped.schema
+
+    if pyarrow_type != pa.null():
+        # A null type field may not be non-nullable
+        pyarrow_field = pa.field("test", pyarrow_type, nullable=False)
+        field = rust.round_trip_field(wrapped)
+        assert field == wrapped.schema
+
 def test_field_metadata_roundtrip():
     metadata = {"hello": "World! 😊", "x": "2"}
     pyarrow_field = pa.field("test", pa.int32(), metadata=metadata)
@@ -163,6 +215,17 @@ def test_primitive_python():
     del b
 
 
+@pytest.mark.skipif(PYARROW_PRE_14, reason="requires pyarrow 14")
+def test_primitive_python_pycapsule():
+    """
+    Python -> Rust -> Python
+    """
+    a = pa.array([1, 2, 3])
+    wrapped = ArrayWrapper(a)
+    b = rust.double(wrapped)
+    assert b == pa.array([2, 4, 6])
+
+
 def test_primitive_rust():
     """
     Rust -> Python -> Rust
@@ -393,6 +456,23 @@ def test_sparse_union_python():
     del a
     del b
 
+def test_tensor_array():
+    tensor_type = pa.fixed_shape_tensor(pa.float32(), [2, 3])
+    inner = pa.array([float(x) for x in range(1, 7)] + [None] * 12, pa.float32())
+    storage = pa.FixedSizeListArray.from_arrays(inner, 6)
+    f32_array = pa.ExtensionArray.from_storage(tensor_type, storage)
+
+    # Round-tripping as an array gives back storage type, because arrow-rs has
+    # no notion of extension types.
+    b = rust.round_trip_array(f32_array)
+    assert b == f32_array.storage
+
+    batch = pa.record_batch([f32_array], ["tensor"])
+    b = rust.round_trip_record_batch(batch)
+    assert b == batch
+
+    del b
+
 def test_record_batch_reader():
     """
     Python -> Rust -> Python
@@ -409,6 +489,40 @@ def test_record_batch_reader():
     got_batches = list(b)
     assert got_batches == batches
 
+    # Also try the boxed reader variant
+    a = pa.RecordBatchReader.from_batches(schema, batches)
+    b = rust.boxed_reader_roundtrip(a)
+    assert b.schema == schema
+    got_batches = list(b)
+    assert got_batches == batches
+
+@pytest.mark.skipif(PYARROW_PRE_14, reason="requires pyarrow 14")
+def test_record_batch_reader_pycapsule():
+    """
+    Python -> Rust -> Python
+    """
+    schema = pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'})
+    batches = [
+        pa.record_batch([[[1], [2, 42]]], schema),
+        pa.record_batch([[None, [], [5, 6]]], schema),
+    ]
+    a = pa.RecordBatchReader.from_batches(schema, batches)
+    wrapped = StreamWrapper(a)
+    b = rust.round_trip_record_batch_reader(wrapped)
+
+    assert b.schema == schema
+    got_batches = list(b)
+    assert got_batches == batches
+
+    # Also try the boxed reader variant
+    a = pa.RecordBatchReader.from_batches(schema, batches)
+    wrapped = StreamWrapper(a)
+    b = rust.boxed_reader_roundtrip(wrapped)
+    assert b.schema == schema
+    got_batches = list(b)
+    assert got_batches == batches
+
+
 def test_record_batch_reader_error():
     schema = pa.schema([('ints', pa.list_(pa.int32()))])
 
@@ -429,24 +543,64 @@ def iter_batches():
     with pytest.raises(ValueError, match="invalid utf-8"):
         rust.round_trip_record_batch_reader(reader)
 
+
+@pytest.mark.skipif(PYARROW_PRE_14, reason="requires pyarrow 14")
+def test_record_batch_pycapsule():
+    """
+    Python -> Rust -> Python
+    """
+    schema = pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'})
+    batch = pa.record_batch([[[1], [2, 42]]], schema)
+    wrapped = StreamWrapper(batch)
+    b = rust.round_trip_record_batch_reader(wrapped)
+    new_table = b.read_all()
+    new_batches = new_table.to_batches()
+
+    assert len(new_batches) == 1
+    new_batch = new_batches[0]
+
+    assert batch == new_batch
+    assert batch.schema == new_batch.schema
+
+
+@pytest.mark.skipif(PYARROW_PRE_14, reason="requires pyarrow 14")
+def test_table_pycapsule():
+    """
+    Python -> Rust -> Python
+    """
+    schema = pa.schema([('ints', pa.list_(pa.int32()))], metadata={b'key1': b'value1'})
+    batches = [
+        pa.record_batch([[[1], [2, 42]]], schema),
+        pa.record_batch([[None, [], [5, 6]]], schema),
+    ]
+    table = pa.Table.from_batches(batches)
+    wrapped = StreamWrapper(table)
+    b = rust.round_trip_record_batch_reader(wrapped)
+    new_table = b.read_all()
+
+    assert table.schema == new_table.schema
+    assert table == new_table
+    assert len(table.to_batches()) == len(new_table.to_batches())
+
+
 def test_reject_other_classes():
     # Arbitrary type that is not a PyArrow type
     not_pyarrow = ["hello"]
 
     with pytest.raises(TypeError, match="Expected instance of pyarrow.lib.Array, got builtins.list"):
         rust.round_trip_array(not_pyarrow)
-    
+
     with pytest.raises(TypeError, match="Expected instance of pyarrow.lib.Schema, got builtins.list"):
         rust.round_trip_schema(not_pyarrow)
-    
+
     with pytest.raises(TypeError, match="Expected instance of pyarrow.lib.Field, got builtins.list"):
         rust.round_trip_field(not_pyarrow)
-    
+
     with pytest.raises(TypeError, match="Expected instance of pyarrow.lib.DataType, got builtins.list"):
         rust.round_trip_type(not_pyarrow)
 
     with pytest.raises(TypeError, match="Expected instance of pyarrow.lib.RecordBatch, got builtins.list"):
         rust.round_trip_record_batch(not_pyarrow)
-    
+
     with pytest.raises(TypeError, match="Expected instance of pyarrow.lib.RecordBatchReader, got builtins.list"):
         rust.round_trip_record_batch_reader(not_pyarrow)
diff --git a/arrow-row/src/dictionary.rs b/arrow-row/src/dictionary.rs
deleted file mode 100644
index 740b2e205c04..000000000000
--- a/arrow-row/src/dictionary.rs
+++ /dev/null
@@ -1,296 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::fixed::{FixedLengthEncoding, FromSlice};
-use crate::interner::{Interned, OrderPreservingInterner};
-use crate::{null_sentinel, Row, Rows};
-use arrow_array::builder::*;
-use arrow_array::cast::*;
-use arrow_array::types::*;
-use arrow_array::*;
-use arrow_buffer::{ArrowNativeType, MutableBuffer, ToByteSlice};
-use arrow_data::{ArrayData, ArrayDataBuilder};
-use arrow_schema::{ArrowError, DataType, SortOptions};
-use std::collections::hash_map::Entry;
-use std::collections::HashMap;
-
-/// Computes the dictionary mapping for the given dictionary values
-pub fn compute_dictionary_mapping(
-    interner: &mut OrderPreservingInterner,
-    values: &ArrayRef,
-) -> Vec<Option<Interned>> {
-    downcast_primitive_array! {
-        values => interner
-            .intern(values.iter().map(|x| x.map(|x| x.encode()))),
-        DataType::Binary => {
-            let iter = as_generic_binary_array::<i32>(values).iter();
-            interner.intern(iter)
-        }
-        DataType::LargeBinary => {
-            let iter = as_generic_binary_array::<i64>(values).iter();
-            interner.intern(iter)
-        }
-        DataType::Utf8 => {
-            let iter = values.as_string::<i32>().iter().map(|x| x.map(|x| x.as_bytes()));
-            interner.intern(iter)
-        }
-        DataType::LargeUtf8 => {
-            let iter = values.as_string::<i64>().iter().map(|x| x.map(|x| x.as_bytes()));
-            interner.intern(iter)
-        }
-        _ => unreachable!(),
-    }
-}
-
-/// Encode dictionary values not preserving the dictionary encoding
-pub fn encode_dictionary_values<K: ArrowDictionaryKeyType>(
-    data: &mut [u8],
-    offsets: &mut [usize],
-    column: &DictionaryArray<K>,
-    values: &Rows,
-    null: &Row<'_>,
-) {
-    for (offset, k) in offsets.iter_mut().skip(1).zip(column.keys()) {
-        let row = match k {
-            Some(k) => values.row(k.as_usize()).data,
-            None => null.data,
-        };
-        let end_offset = *offset + row.len();
-        data[*offset..end_offset].copy_from_slice(row);
-        *offset = end_offset;
-    }
-}
-
-/// Dictionary types are encoded as
-///
-/// - single `0_u8` if null
-/// - the bytes of the corresponding normalized key including the null terminator
-pub fn encode_dictionary<K: ArrowDictionaryKeyType>(
-    data: &mut [u8],
-    offsets: &mut [usize],
-    column: &DictionaryArray<K>,
-    normalized_keys: &[Option<&[u8]>],
-    opts: SortOptions,
-) {
-    for (offset, k) in offsets.iter_mut().skip(1).zip(column.keys()) {
-        match k.and_then(|k| normalized_keys[k.as_usize()]) {
-            Some(normalized_key) => {
-                let end_offset = *offset + 1 + normalized_key.len();
-                data[*offset] = 1;
-                data[*offset + 1..end_offset].copy_from_slice(normalized_key);
-                // Negate if descending
-                if opts.descending {
-                    data[*offset..end_offset].iter_mut().for_each(|v| *v = !*v)
-                }
-                *offset = end_offset;
-            }
-            None => {
-                data[*offset] = null_sentinel(opts);
-                *offset += 1;
-            }
-        }
-    }
-}
-
-macro_rules! decode_primitive_helper {
-    ($t:ty, $values: ident, $data_type:ident) => {
-        decode_primitive::<$t>(&$values, $data_type.clone())
-    };
-}
-
-/// Decodes a string array from `rows` with the provided `options`
-///
-/// # Safety
-///
-/// `interner` must contain valid data for the provided `value_type`
-pub unsafe fn decode_dictionary<K: ArrowDictionaryKeyType>(
-    interner: &OrderPreservingInterner,
-    value_type: &DataType,
-    options: SortOptions,
-    rows: &mut [&[u8]],
-) -> Result<DictionaryArray<K>, ArrowError> {
-    let len = rows.len();
-    let mut dictionary: HashMap<Interned, K::Native> = HashMap::with_capacity(len);
-
-    let null_sentinel = null_sentinel(options);
-
-    // If descending, the null terminator will have been negated
-    let null_terminator = match options.descending {
-        true => 0xFF,
-        false => 0_u8,
-    };
-
-    let mut null_builder = BooleanBufferBuilder::new(len);
-    let mut keys = BufferBuilder::<K::Native>::new(len);
-    let mut values = Vec::with_capacity(len);
-    let mut null_count = 0;
-    let mut key_scratch = Vec::new();
-
-    for row in rows {
-        if row[0] == null_sentinel {
-            null_builder.append(false);
-            null_count += 1;
-            *row = &row[1..];
-            keys.append(K::Native::default());
-            continue;
-        }
-
-        let key_offset = row
-            .iter()
-            .skip(1)
-            .position(|x| *x == null_terminator)
-            .unwrap();
-
-        // Extract the normalized key including the null terminator
-        let key = &row[1..key_offset + 2];
-        *row = &row[key_offset + 2..];
-
-        let interned = match options.descending {
-            true => {
-                // If options.descending the normalized key will have been
-                // negated we must first reverse this
-                key_scratch.clear();
-                key_scratch.extend_from_slice(key);
-                key_scratch.iter_mut().for_each(|o| *o = !*o);
-                interner.lookup(&key_scratch).unwrap()
-            }
-            false => interner.lookup(key).unwrap(),
-        };
-
-        let k = match dictionary.entry(interned) {
-            Entry::Vacant(v) => {
-                let k = values.len();
-                values.push(interner.value(interned));
-                let key = K::Native::from_usize(k)
-                    .ok_or(ArrowError::DictionaryKeyOverflowError)?;
-                *v.insert(key)
-            }
-            Entry::Occupied(o) => *o.get(),
-        };
-
-        keys.append(k);
-        null_builder.append(true);
-    }
-
-    let child = downcast_primitive! {
-        value_type => (decode_primitive_helper, values, value_type),
-        DataType::Null => NullArray::new(values.len()).into_data(),
-        DataType::Boolean => decode_bool(&values),
-        DataType::Utf8 => decode_string::<i32>(&values),
-        DataType::LargeUtf8 => decode_string::<i64>(&values),
-        DataType::Binary => decode_binary::<i32>(&values),
-        DataType::LargeBinary => decode_binary::<i64>(&values),
-        _ => unreachable!(),
-    };
-
-    let data_type =
-        DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(value_type.clone()));
-
-    let builder = ArrayDataBuilder::new(data_type)
-        .len(len)
-        .null_bit_buffer(Some(null_builder.into()))
-        .null_count(null_count)
-        .add_buffer(keys.finish())
-        .add_child_data(child);
-
-    Ok(DictionaryArray::from(builder.build_unchecked()))
-}
-
-/// Decodes a binary array from dictionary values
-///
-/// # Safety
-///
-/// Values must be valid UTF-8
-fn decode_binary<O: OffsetSizeTrait>(values: &[&[u8]]) -> ArrayData {
-    let capacity = values.iter().map(|x| x.len()).sum();
-    let mut builder = GenericBinaryBuilder::<O>::with_capacity(values.len(), capacity);
-    for v in values {
-        builder.append_value(v)
-    }
-    builder.finish().into_data()
-}
-
-/// Decodes a string array from dictionary values
-///
-/// # Safety
-///
-/// Values must be valid UTF-8
-unsafe fn decode_string<O: OffsetSizeTrait>(values: &[&[u8]]) -> ArrayData {
-    let d = match O::IS_LARGE {
-        true => DataType::LargeUtf8,
-        false => DataType::Utf8,
-    };
-
-    decode_binary::<O>(values)
-        .into_builder()
-        .data_type(d)
-        .build_unchecked()
-}
-
-/// Decodes a boolean array from dictionary values
-fn decode_bool(values: &[&[u8]]) -> ArrayData {
-    let mut builder = BooleanBufferBuilder::new(values.len());
-    for value in values {
-        builder.append(bool::decode([value[0]]))
-    }
-
-    let builder = ArrayDataBuilder::new(DataType::Boolean)
-        .len(values.len())
-        .add_buffer(builder.into());
-
-    // SAFETY: Buffers correct length
-    unsafe { builder.build_unchecked() }
-}
-
-/// Decodes a fixed length type array from dictionary values
-///
-/// # Safety
-///
-/// `data_type` must be appropriate native type for `T`
-unsafe fn decode_fixed<T: FixedLengthEncoding + ToByteSlice>(
-    values: &[&[u8]],
-    data_type: DataType,
-) -> ArrayData {
-    let mut buffer = MutableBuffer::new(std::mem::size_of::<T>() * values.len());
-
-    for value in values {
-        let value = T::Encoded::from_slice(value, false);
-        buffer.push(T::decode(value))
-    }
-
-    let builder = ArrayDataBuilder::new(data_type)
-        .len(values.len())
-        .add_buffer(buffer.into());
-
-    // SAFETY: Buffers correct length
-    builder.build_unchecked()
-}
-
-/// Decodes a `PrimitiveArray` from dictionary values
-fn decode_primitive<T: ArrowPrimitiveType>(
-    values: &[&[u8]],
-    data_type: DataType,
-) -> ArrayData
-where
-    T::Native: FixedLengthEncoding,
-{
-    assert!(PrimitiveArray::<T>::is_compatible(&data_type));
-
-    // SAFETY:
-    // Validated data type above
-    unsafe { decode_fixed::<T::Native>(values, data_type) }
-}
diff --git a/arrow-row/src/interner.rs b/arrow-row/src/interner.rs
deleted file mode 100644
index 9f5f0b3d33d2..000000000000
--- a/arrow-row/src/interner.rs
+++ /dev/null
@@ -1,523 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use hashbrown::hash_map::RawEntryMut;
-use hashbrown::HashMap;
-use std::num::NonZeroU32;
-use std::ops::Index;
-
-/// An interned value
-#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
-pub struct Interned(NonZeroU32); // We use NonZeroU32 so that `Option<Interned>` is 32 bits
-
-/// A byte array interner that generates normalized keys that are sorted with respect
-/// to the interned values, e.g. `inter(a) < intern(b) => a < b`
-#[derive(Debug, Default)]
-pub struct OrderPreservingInterner {
-    /// Provides a lookup from [`Interned`] to the normalized key
-    keys: InternBuffer,
-    /// Provides a lookup from [`Interned`] to the normalized value
-    values: InternBuffer,
-    /// Key allocation data structure
-    bucket: Box<Bucket>,
-
-    // A hash table used to perform faster re-keying, and detect duplicates
-    hasher: ahash::RandomState,
-    lookup: HashMap<Interned, (), ()>,
-}
-
-impl OrderPreservingInterner {
-    /// Interns an iterator of values returning a list of [`Interned`] which can be
-    /// used with [`Self::normalized_key`] to retrieve the normalized keys with a
-    /// lifetime not tied to the mutable borrow passed to this method
-    pub fn intern<I, V>(&mut self, input: I) -> Vec<Option<Interned>>
-    where
-        I: IntoIterator<Item = Option<V>>,
-        V: AsRef<[u8]>,
-    {
-        let iter = input.into_iter();
-        let capacity = iter.size_hint().0;
-        let mut out = Vec::with_capacity(capacity);
-
-        // (index in output, hash value, value)
-        let mut to_intern: Vec<(usize, u64, V)> = Vec::with_capacity(capacity);
-        let mut to_intern_len = 0;
-
-        for (idx, item) in iter.enumerate() {
-            let value: V = match item {
-                Some(value) => value,
-                None => {
-                    out.push(None);
-                    continue;
-                }
-            };
-
-            let v = value.as_ref();
-            let hash = self.hasher.hash_one(v);
-            let entry = self
-                .lookup
-                .raw_entry_mut()
-                .from_hash(hash, |a| &self.values[*a] == v);
-
-            match entry {
-                RawEntryMut::Occupied(o) => out.push(Some(*o.key())),
-                RawEntryMut::Vacant(_) => {
-                    // Push placeholder
-                    out.push(None);
-                    to_intern_len += v.len();
-                    to_intern.push((idx, hash, value));
-                }
-            };
-        }
-
-        to_intern.sort_unstable_by(|(_, _, a), (_, _, b)| a.as_ref().cmp(b.as_ref()));
-
-        self.keys.offsets.reserve(to_intern.len());
-        self.keys.values.reserve(to_intern.len()); // Approximation
-        self.values.offsets.reserve(to_intern.len());
-        self.values.values.reserve(to_intern_len);
-
-        for (idx, hash, value) in to_intern {
-            let val = value.as_ref();
-
-            let entry = self
-                .lookup
-                .raw_entry_mut()
-                .from_hash(hash, |a| &self.values[*a] == val);
-
-            match entry {
-                RawEntryMut::Occupied(o) => {
-                    out[idx] = Some(*o.key());
-                }
-                RawEntryMut::Vacant(v) => {
-                    let val = value.as_ref();
-                    self.bucket
-                        .insert(&mut self.values, val, &mut self.keys.values);
-                    self.keys.values.push(0);
-                    let interned = self.keys.append();
-
-                    let hasher = &mut self.hasher;
-                    let values = &self.values;
-                    v.insert_with_hasher(hash, interned, (), |key| {
-                        hasher.hash_one(&values[*key])
-                    });
-                    out[idx] = Some(interned);
-                }
-            }
-        }
-
-        out
-    }
-
-    /// Returns a null-terminated byte array that can be compared against other normalized_key
-    /// returned by this instance, to establish ordering of the interned values
-    pub fn normalized_key(&self, key: Interned) -> &[u8] {
-        &self.keys[key]
-    }
-
-    /// Converts a normalized key returned by [`Self::normalized_key`] to [`Interned`]
-    /// returning `None` if it cannot be found
-    pub fn lookup(&self, normalized_key: &[u8]) -> Option<Interned> {
-        let len = normalized_key.len();
-        if len <= 1 {
-            return None;
-        }
-
-        let mut bucket = self.bucket.as_ref();
-        if len > 2 {
-            for v in normalized_key.iter().take(len - 2) {
-                if *v == 255 {
-                    bucket = bucket.next.as_ref()?;
-                } else {
-                    let bucket_idx = v.checked_sub(1)?;
-                    bucket = bucket.slots.get(bucket_idx as usize)?.child.as_ref()?;
-                }
-            }
-        }
-
-        let slot_idx = normalized_key[len - 2].checked_sub(2)?;
-        Some(bucket.slots.get(slot_idx as usize)?.value)
-    }
-
-    /// Returns the interned value for a given [`Interned`]
-    pub fn value(&self, key: Interned) -> &[u8] {
-        self.values.index(key)
-    }
-
-    /// Returns the size of this instance in bytes including self
-    pub fn size(&self) -> usize {
-        std::mem::size_of::<Self>()
-            + self.keys.buffer_size()
-            + self.values.buffer_size()
-            + self.bucket.size()
-            + self.lookup.capacity() * std::mem::size_of::<Interned>()
-    }
-}
-
-/// A buffer of `[u8]` indexed by `[Interned]`
-#[derive(Debug)]
-struct InternBuffer {
-    /// Raw values
-    values: Vec<u8>,
-    /// The ith value is `&values[offsets[i]..offsets[i+1]]`
-    offsets: Vec<usize>,
-}
-
-impl Default for InternBuffer {
-    fn default() -> Self {
-        Self {
-            values: Default::default(),
-            offsets: vec![0],
-        }
-    }
-}
-
-impl InternBuffer {
-    /// Insert `data` returning the corresponding [`Interned`]
-    fn insert(&mut self, data: &[u8]) -> Interned {
-        self.values.extend_from_slice(data);
-        self.append()
-    }
-
-    /// Appends the next value based on data written to `self.values`
-    /// returning the corresponding [`Interned`]
-    fn append(&mut self) -> Interned {
-        let idx: u32 = self.offsets.len().try_into().unwrap();
-        let key = Interned(NonZeroU32::new(idx).unwrap());
-        self.offsets.push(self.values.len());
-        key
-    }
-
-    /// Returns the byte size of the associated buffers
-    fn buffer_size(&self) -> usize {
-        self.values.capacity() + self.offsets.capacity() * std::mem::size_of::<usize>()
-    }
-}
-
-impl Index<Interned> for InternBuffer {
-    type Output = [u8];
-
-    fn index(&self, key: Interned) -> &Self::Output {
-        let index = key.0.get() as usize;
-        let end = self.offsets[index];
-        let start = self.offsets[index - 1];
-        // SAFETY:
-        // self.values is never reduced in size and values appended
-        // to self.offsets are always less than self.values at the time
-        unsafe { self.values.get_unchecked(start..end) }
-    }
-}
-
-/// A slot corresponds to a single byte-value in the generated normalized key
-///
-/// It may contain a value, if not the first slot, and may contain a child [`Bucket`] representing
-/// the next byte in the generated normalized key
-#[derive(Debug, Clone)]
-struct Slot {
-    value: Interned,
-    /// Child values less than `self.value` if any
-    child: Option<Box<Bucket>>,
-}
-
-/// Bucket is the root of the data-structure used to allocate normalized keys
-///
-/// In particular it needs to generate keys that
-///
-/// * Contain no `0` bytes other than the null terminator
-/// * Compare lexicographically in the same manner as the encoded `data`
-///
-/// The data structure consists of 254 slots, each of which can store a value.
-/// Additionally each slot may contain a child bucket, containing values smaller
-/// than the value within the slot.
-///
-/// Each bucket also may contain a child bucket, containing values greater than
-/// all values in the current bucket
-///
-/// # Allocation Strategy
-///
-/// The contiguous slice of slots containing values is searched to find the insertion
-/// point for the new value, according to the sort order.
-///
-/// If the insertion position exceeds 254, the number of slots, the value is inserted
-/// into the child bucket of the current bucket.
-///
-/// If the insertion position already contains a value, the value is inserted into the
-/// child bucket of that slot.
-///
-/// If the slot is not occupied, the value is inserted into that slot.
-///
-/// The final key consists of the slot indexes visited incremented by 1,
-/// with the final value incremented by 2, followed by a null terminator.
-///
-/// Consider the case of the integers `[8, 6, 5, 7]` inserted in that order
-///
-/// ```ignore
-/// 8: &[2, 0]
-/// 6: &[1, 2, 0]
-/// 5: &[1, 1, 2, 0]
-/// 7: &[1, 3, 0]
-/// ```
-///
-/// Note: this allocation strategy is optimised for interning values in sorted order
-///
-#[derive(Debug, Clone)]
-struct Bucket {
-    slots: Vec<Slot>,
-    /// Bucket containing values larger than all of `slots`
-    next: Option<Box<Bucket>>,
-}
-
-impl Default for Bucket {
-    fn default() -> Self {
-        Self {
-            slots: Vec::with_capacity(254),
-            next: None,
-        }
-    }
-}
-
-impl Bucket {
-    /// Insert `data` into this bucket or one of its children, appending the
-    /// normalized key to `out` as it is constructed
-    ///
-    /// # Panics
-    ///
-    /// Panics if the value already exists
-    fn insert(&mut self, values_buf: &mut InternBuffer, data: &[u8], out: &mut Vec<u8>) {
-        let slots_len = self.slots.len() as u8;
-        // We optimise the case of inserting a value directly after those already inserted
-        // as [`OrderPreservingInterner::intern`] sorts values prior to interning them
-        match self.slots.last() {
-            Some(slot) => {
-                if &values_buf[slot.value] < data {
-                    if slots_len == 254 {
-                        out.push(255);
-                        self.next
-                            .get_or_insert_with(Default::default)
-                            .insert(values_buf, data, out)
-                    } else {
-                        out.push(slots_len + 2);
-                        let value = values_buf.insert(data);
-                        self.slots.push(Slot { value, child: None });
-                    }
-                } else {
-                    // Find insertion point
-                    match self
-                        .slots
-                        .binary_search_by(|slot| values_buf[slot.value].cmp(data))
-                    {
-                        Ok(_) => unreachable!("value already exists"),
-                        Err(idx) => {
-                            out.push(idx as u8 + 1);
-                            self.slots[idx]
-                                .child
-                                .get_or_insert_with(Default::default)
-                                .insert(values_buf, data, out)
-                        }
-                    }
-                }
-            }
-            None => {
-                out.push(2);
-                let value = values_buf.insert(data);
-                self.slots.push(Slot { value, child: None })
-            }
-        }
-    }
-
-    /// Returns the size of this instance in bytes
-    fn size(&self) -> usize {
-        std::mem::size_of::<Self>()
-            + self.slots.capacity() * std::mem::size_of::<Slot>()
-        // and account for the size of any embedded buckets in the slots
-            + self.slot_child_bucket_size()
-            + self.next.as_ref().map(|x| x.size()).unwrap_or_default()
-    }
-
-    /// returns the total size of any recursively allocated `Bucket`s
-    /// in self.slots. This does not include the size of the child Slot itself
-    fn slot_child_bucket_size(&self) -> usize {
-        self.slots
-            .iter()
-            .map(|slot| slot.child.as_ref().map(|x| x.size()).unwrap_or_default())
-            .sum()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use rand::prelude::*;
-
-    // Clippy isn't smart enough to understand dropping mutability
-    #[allow(clippy::needless_collect)]
-    fn test_intern_values(values: &[u64]) {
-        let mut interner = OrderPreservingInterner::default();
-
-        // Intern a single value at a time to check ordering
-        let interned: Vec<_> = values
-            .iter()
-            .flat_map(|v| interner.intern([Some(&v.to_be_bytes())]))
-            .map(Option::unwrap)
-            .collect();
-
-        for (value, interned) in values.iter().zip(&interned) {
-            assert_eq!(interner.value(*interned), &value.to_be_bytes());
-        }
-
-        let normalized_keys: Vec<_> = interned
-            .iter()
-            .map(|x| interner.normalized_key(*x))
-            .collect();
-
-        for (interned, normalized) in interned.iter().zip(&normalized_keys) {
-            assert_eq!(*interned, interner.lookup(normalized).unwrap());
-        }
-
-        for (i, a) in normalized_keys.iter().enumerate() {
-            for (j, b) in normalized_keys.iter().enumerate() {
-                let interned_cmp = a.cmp(b);
-                let values_cmp = values[i].cmp(&values[j]);
-                assert_eq!(
-                    interned_cmp, values_cmp,
-                    "({:?} vs {:?}) vs ({} vs {})",
-                    a, b, values[i], values[j]
-                )
-            }
-        }
-    }
-
-    #[test]
-    #[cfg_attr(miri, ignore)]
-    fn test_interner() {
-        test_intern_values(&[8, 6, 5, 7]);
-
-        let mut values: Vec<_> = (0_u64..2000).collect();
-        test_intern_values(&values);
-
-        let mut rng = thread_rng();
-        values.shuffle(&mut rng);
-        test_intern_values(&values);
-    }
-
-    #[test]
-    fn test_intern_duplicates() {
-        // Unsorted with duplicates
-        let values = [0_u8, 1, 8, 4, 1, 0];
-        let mut interner = OrderPreservingInterner::default();
-
-        let interned = interner.intern(values.iter().map(std::slice::from_ref).map(Some));
-        let interned: Vec<_> = interned.into_iter().map(Option::unwrap).collect();
-
-        assert_eq!(interned[0], interned[5]);
-        assert_eq!(interned[1], interned[4]);
-        assert!(
-            interner.normalized_key(interned[0]) < interner.normalized_key(interned[1])
-        );
-        assert!(
-            interner.normalized_key(interned[1]) < interner.normalized_key(interned[2])
-        );
-        assert!(
-            interner.normalized_key(interned[1]) < interner.normalized_key(interned[3])
-        );
-        assert!(
-            interner.normalized_key(interned[3]) < interner.normalized_key(interned[2])
-        );
-    }
-
-    #[test]
-    fn test_intern_sizes() {
-        let mut interner = OrderPreservingInterner::default();
-
-        // Intern a 1K values each 8 bytes large
-        let num_items = 1000;
-        let mut values: Vec<usize> = (0..num_items).collect();
-        values.reverse();
-
-        // intern these values 1 at a time (otherwise the interner
-        // will sort them first);
-        for v in values {
-            interner.intern([Some(v.to_be_bytes())]);
-        }
-
-        let reported = interner.size();
-
-        // Figure out the expected size (this is a second
-        // implementation of size()) as a double check
-        let min_expected = BucketWalker::new()
-            .visit_bucket(interner.bucket.as_ref())
-            .memory_estimate()
-            // hash table  size
-            + interner.lookup.capacity() *  std::mem::size_of::<Interned>()
-            // key/value storage
-            + interner.keys.buffer_size()
-            + interner.values.buffer_size();
-
-        assert!(
-            reported > min_expected,
-            "reported size {reported} not larger than min expected size: {min_expected}"
-        )
-    }
-
-    // Walks over the buckets / slots counting counting them all
-    struct BucketWalker {
-        num_buckets: usize,
-        num_slots: usize,
-    }
-
-    impl BucketWalker {
-        fn new() -> Self {
-            Self {
-                num_buckets: 0,
-                num_slots: 0,
-            }
-        }
-
-        // recursively visit the bucket and any slots/buckets contained
-        fn visit_bucket(mut self, bucket: &Bucket) -> Self {
-            self.num_buckets += 1;
-            let acc = bucket
-                .slots
-                .iter()
-                .fold(self, |acc, slot| acc.visit_slot(slot));
-
-            if let Some(next) = bucket.next.as_ref() {
-                acc.visit_bucket(next.as_ref())
-            } else {
-                acc
-            }
-        }
-
-        // recursively visit slot and any slots/buckets
-        fn visit_slot(mut self, slot: &Slot) -> Self {
-            self.num_slots += 1;
-            if let Some(child) = slot.child.as_ref() {
-                self.visit_bucket(child.as_ref())
-            } else {
-                self
-            }
-        }
-
-        // estimate how much memory is used just for Buckets / Slots
-        // (an underestimate of the total memory used for the
-        // interner as it doesn't contain any actual values)
-        fn memory_estimate(self) -> usize {
-            self.num_buckets * std::mem::size_of::<Bucket>()
-                + self.num_slots * std::mem::size_of::<Slot>()
-        }
-    }
-}
diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index b59d84061a8a..86a76c0a74f7 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -61,7 +61,7 @@
 //! let arrays = vec![a1, a2];
 //!
 //! // Convert arrays to rows
-//! let mut converter = RowConverter::new(vec![
+//! let converter = RowConverter::new(vec![
 //!     SortField::new(DataType::Int32),
 //!     SortField::new(DataType::Utf8),
 //! ]).unwrap();
@@ -109,7 +109,7 @@
 //!         .iter()
 //!         .map(|a| SortField::new(a.data_type().clone()))
 //!         .collect();
-//!     let mut converter = RowConverter::new(fields).unwrap();
+//!     let converter = RowConverter::new(fields).unwrap();
 //!     let rows = converter.convert_columns(arrays).unwrap();
 //!     let mut sort: Vec<_> = rows.iter().enumerate().collect();
 //!     sort.sort_unstable_by(|(_, a), (_, b)| a.cmp(b));
@@ -130,22 +130,16 @@ use std::hash::{Hash, Hasher};
 use std::sync::Arc;
 
 use arrow_array::cast::*;
+use arrow_array::types::ArrowDictionaryKeyType;
 use arrow_array::*;
 use arrow_buffer::ArrowNativeType;
 use arrow_data::ArrayDataBuilder;
 use arrow_schema::*;
 
-use crate::dictionary::{
-    compute_dictionary_mapping, decode_dictionary, encode_dictionary,
-    encode_dictionary_values,
-};
 use crate::fixed::{decode_bool, decode_fixed_size_binary, decode_primitive};
-use crate::interner::OrderPreservingInterner;
 use crate::variable::{decode_binary, decode_string};
 
-mod dictionary;
 mod fixed;
-mod interner;
 mod list;
 mod variable;
 
@@ -232,13 +226,13 @@ mod variable;
 /// A non-null, non-empty byte array is encoded as `2_u8` followed by the byte array
 /// encoded using a block based scheme described below.
 ///
-/// The byte array is broken up into 32-byte blocks, each block is written in turn
+/// The byte array is broken up into fixed-width blocks, each block is written in turn
 /// to the output, followed by `0xFF_u8`. The final block is padded to 32-bytes
 /// with `0_u8` and written to the output, followed by the un-padded length in bytes
-/// of this final block as a `u8`.
+/// of this final block as a `u8`. The first 4 blocks have a length of 8, with subsequent
+/// blocks using a length of 32, this is to reduce space amplification for small strings.
 ///
-/// Note the following example encodings use a block size of 4 bytes,
-/// as opposed to 32 bytes for brevity:
+/// Note the following example encodings use a block size of 4 bytes for brevity:
 ///
 /// ```text
 ///                       ┌───┬───┬───┬───┬───┬───┐
@@ -271,53 +265,7 @@ mod variable;
 ///
 /// ## Dictionary Encoding
 ///
-/// [`RowConverter`] needs to support converting dictionary encoded arrays with unsorted, and
-/// potentially distinct dictionaries. One simple mechanism to avoid this would be to reverse
-/// the dictionary encoding, and encode the array values directly, however, this would lose
-/// the benefits of dictionary encoding to reduce memory and CPU consumption.
-///
-/// As such the [`RowConverter`] creates an order-preserving mapping
-/// for each dictionary encoded column, which allows new dictionary
-/// values to be added whilst preserving the sort order.
-///
-/// A null dictionary value is encoded as `0_u8`.
-///
-/// A non-null dictionary value is encoded as `1_u8` followed by a null-terminated byte array
-/// key determined by the order-preserving dictionary encoding
-///
-/// ```text
-/// ┌──────────┐                 ┌─────┐
-/// │  "Bar"   │ ───────────────▶│ 01  │
-/// └──────────┘                 └─────┘
-/// ┌──────────┐                 ┌─────┬─────┐
-/// │"Fabulous"│ ───────────────▶│ 01  │ 02  │
-/// └──────────┘                 └─────┴─────┘
-/// ┌──────────┐                 ┌─────┐
-/// │  "Soup"  │ ───────────────▶│ 05  │
-/// └──────────┘                 └─────┘
-/// ┌──────────┐                 ┌─────┐
-/// │   "ZZ"   │ ───────────────▶│ 07  │
-/// └──────────┘                 └─────┘
-///
-/// Example Order Preserving Mapping
-/// ```
-/// Using the map above, the corresponding row format will be
-///
-/// ```text
-///                           ┌─────┬─────┬─────┬─────┐
-///    "Fabulous"             │ 01  │ 01  │ 02  │ 00  │
-///                           └─────┴─────┴─────┴─────┘
-///
-///                           ┌─────┬─────┬─────┐
-///    "ZZ"                   │ 01  │ 07  │ 00  │
-///                           └─────┴─────┴─────┘
-///
-///                           ┌─────┐
-///     NULL                  │ 00  │
-///                           └─────┘
-///
-///      Input                  Row Format
-/// ```
+/// Dictionaries are hydrated to their underlying values
 ///
 /// ## Struct Encoding
 ///
@@ -426,15 +374,9 @@ pub struct RowConverter {
 enum Codec {
     /// No additional codec state is necessary
     Stateless,
-    /// The interner used to encode dictionary values
-    ///
-    /// Used when preserving the dictionary encoding
-    Dictionary(OrderPreservingInterner),
     /// A row converter for the dictionary values
     /// and the encoding of a row containing only nulls
-    ///
-    /// Used when not preserving dictionary encoding
-    DictionaryValues(RowConverter, OwnedRow),
+    Dictionary(RowConverter, OwnedRow),
     /// A row converter for the child fields
     /// and the encoding of a row containing only nulls
     Struct(RowConverter, OwnedRow),
@@ -445,25 +387,20 @@ enum Codec {
 impl Codec {
     fn new(sort_field: &SortField) -> Result<Self, ArrowError> {
         match &sort_field.data_type {
-            DataType::Dictionary(_, values) => match sort_field.preserve_dictionaries {
-                true => Ok(Self::Dictionary(Default::default())),
-                false => {
-                    let sort_field = SortField::new_with_options(
-                        values.as_ref().clone(),
-                        sort_field.options,
-                    );
+            DataType::Dictionary(_, values) => {
+                let sort_field =
+                    SortField::new_with_options(values.as_ref().clone(), sort_field.options);
 
-                    let mut converter = RowConverter::new(vec![sort_field])?;
-                    let null_array = new_null_array(values.as_ref(), 1);
-                    let nulls = converter.convert_columns(&[null_array])?;
+                let converter = RowConverter::new(vec![sort_field])?;
+                let null_array = new_null_array(values.as_ref(), 1);
+                let nulls = converter.convert_columns(&[null_array])?;
 
-                    let owned = OwnedRow {
-                        data: nulls.buffer.into(),
-                        config: nulls.config,
-                    };
-                    Ok(Self::DictionaryValues(converter, owned))
-                }
-            },
+                let owned = OwnedRow {
+                    data: nulls.buffer.into(),
+                    config: nulls.config,
+                };
+                Ok(Self::Dictionary(converter, owned))
+            }
             d if !d.is_nested() => Ok(Self::Stateless),
             DataType::List(f) | DataType::LargeList(f) => {
                 // The encoded contents will be inverted if descending is set to true
@@ -471,8 +408,7 @@ impl Codec {
                 // it set to true
                 let options = SortOptions {
                     descending: false,
-                    nulls_first: sort_field.options.nulls_first
-                        != sort_field.options.descending,
+                    nulls_first: sort_field.options.nulls_first != sort_field.options.descending,
                 };
 
                 let field = SortField::new_with_options(f.data_type().clone(), options);
@@ -482,17 +418,11 @@ impl Codec {
             DataType::Struct(f) => {
                 let sort_fields = f
                     .iter()
-                    .map(|x| {
-                        SortField::new_with_options(
-                            x.data_type().clone(),
-                            sort_field.options,
-                        )
-                    })
+                    .map(|x| SortField::new_with_options(x.data_type().clone(), sort_field.options))
                     .collect();
 
-                let mut converter = RowConverter::new(sort_fields)?;
-                let nulls: Vec<_> =
-                    f.iter().map(|x| new_null_array(x.data_type(), 1)).collect();
+                let converter = RowConverter::new(sort_fields)?;
+                let nulls: Vec<_> = f.iter().map(|x| new_null_array(x.data_type(), 1)).collect();
 
                 let nulls = converter.convert_columns(&nulls)?;
                 let owned = OwnedRow {
@@ -509,32 +439,13 @@ impl Codec {
         }
     }
 
-    fn encoder(&mut self, array: &dyn Array) -> Result<Encoder<'_>, ArrowError> {
+    fn encoder(&self, array: &dyn Array) -> Result<Encoder<'_>, ArrowError> {
         match self {
             Codec::Stateless => Ok(Encoder::Stateless),
-            Codec::Dictionary(interner) => {
-                let values = downcast_dictionary_array! {
-                    array => array.values(),
-                    _ => unreachable!()
-                };
-
-                let mapping = compute_dictionary_mapping(interner, values)
-                    .into_iter()
-                    .map(|maybe_interned| {
-                        maybe_interned.map(|interned| interner.normalized_key(interned))
-                    })
-                    .collect();
-
-                Ok(Encoder::Dictionary(mapping))
-            }
-            Codec::DictionaryValues(converter, nulls) => {
-                let values = downcast_dictionary_array! {
-                    array => array.values(),
-                    _ => unreachable!()
-                };
-
-                let rows = converter.convert_columns(&[values.clone()])?;
-                Ok(Encoder::DictionaryValues(rows, nulls.row()))
+            Codec::Dictionary(converter, nulls) => {
+                let values = array.as_any_dictionary().values().clone();
+                let rows = converter.convert_columns(&[values])?;
+                Ok(Encoder::Dictionary(rows, nulls.row()))
             }
             Codec::Struct(converter, null) => {
                 let v = as_struct_array(array);
@@ -556,10 +467,7 @@ impl Codec {
     fn size(&self) -> usize {
         match self {
             Codec::Stateless => 0,
-            Codec::Dictionary(interner) => interner.size(),
-            Codec::DictionaryValues(converter, nulls) => {
-                converter.size() + nulls.data.len()
-            }
+            Codec::Dictionary(converter, nulls) => converter.size() + nulls.data.len(),
             Codec::Struct(converter, nulls) => converter.size() + nulls.data.len(),
             Codec::List(converter) => converter.size(),
         }
@@ -570,10 +478,8 @@ impl Codec {
 enum Encoder<'a> {
     /// No additional encoder state is necessary
     Stateless,
-    /// The mapping from dictionary keys to normalized keys
-    Dictionary(Vec<Option<&'a [u8]>>),
     /// The encoding of the child array and the encoding of a null row
-    DictionaryValues(Rows, Row<'a>),
+    Dictionary(Rows, Row<'a>),
     /// The row encoding of the child arrays and the encoding of a null row
     ///
     /// It is necessary to encode to a temporary [`Rows`] to avoid serializing
@@ -591,8 +497,6 @@ pub struct SortField {
     options: SortOptions,
     /// Data type
     data_type: DataType,
-    /// Preserve dictionaries
-    preserve_dictionaries: bool,
 }
 
 impl SortField {
@@ -603,38 +507,14 @@ impl SortField {
 
     /// Create a new column with the given data type and [`SortOptions`]
     pub fn new_with_options(data_type: DataType, options: SortOptions) -> Self {
-        Self {
-            options,
-            data_type,
-            preserve_dictionaries: true,
-        }
-    }
-
-    /// By default dictionaries are preserved as described on [`RowConverter`]
-    ///
-    /// However, this process requires maintaining and incrementally updating
-    /// an order-preserving mapping of dictionary values. This is relatively expensive
-    /// computationally but reduces the size of the encoded rows, minimising memory
-    /// usage and potentially yielding faster comparisons.
-    ///
-    /// Some applications may wish to instead trade-off space efficiency, for improved
-    /// encoding performance, by instead encoding dictionary values directly
-    ///
-    /// When `preserve_dictionaries` is true, fields will instead be encoded as their
-    /// underlying value, reversing any dictionary encoding
-    pub fn preserve_dictionaries(self, preserve_dictionaries: bool) -> Self {
-        Self {
-            preserve_dictionaries,
-            ..self
-        }
+        Self { options, data_type }
     }
 
     /// Return size of this instance in bytes.
     ///
     /// Includes the size of `Self`.
     pub fn size(&self) -> usize {
-        self.data_type.size() + std::mem::size_of::<Self>()
-            - std::mem::size_of::<DataType>()
+        self.data_type.size() + std::mem::size_of::<Self>() - std::mem::size_of::<DataType>()
     }
 }
 
@@ -665,9 +545,7 @@ impl RowConverter {
             DataType::List(f) | DataType::LargeList(f) | DataType::Map(f, _) => {
                 Self::supports_datatype(f.data_type())
             }
-            DataType::Struct(f) => {
-                f.iter().all(|x| Self::supports_datatype(x.data_type()))
-            }
+            DataType::Struct(f) => f.iter().all(|x| Self::supports_datatype(x.data_type())),
             _ => false,
         }
     }
@@ -679,7 +557,7 @@ impl RowConverter {
     /// # Panics
     ///
     /// Panics if the schema of `columns` does not match that provided to [`RowConverter::new`]
-    pub fn convert_columns(&mut self, columns: &[ArrayRef]) -> Result<Rows, ArrowError> {
+    pub fn convert_columns(&self, columns: &[ArrayRef]) -> Result<Rows, ArrowError> {
         let num_rows = columns.first().map(|x| x.len()).unwrap_or(0);
         let mut rows = self.empty_rows(num_rows, 0);
         self.append(&mut rows, columns)?;
@@ -704,7 +582,7 @@ impl RowConverter {
     /// # use arrow_row::{Row, RowConverter, SortField};
     /// # use arrow_schema::DataType;
     /// #
-    /// let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
+    /// let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
     /// let a1 = StringArray::from(vec!["hello", "world"]);
     /// let a2 = StringArray::from(vec!["a", "a", "hello"]);
     ///
@@ -716,11 +594,7 @@ impl RowConverter {
     /// let values: Vec<_> = back[0].as_string::<i32>().iter().map(Option::unwrap).collect();
     /// assert_eq!(&values, &["hello", "world", "a", "a", "hello"]);
     /// ```
-    pub fn append(
-        &mut self,
-        rows: &mut Rows,
-        columns: &[ArrayRef],
-    ) -> Result<(), ArrowError> {
+    pub fn append(&self, rows: &mut Rows, columns: &[ArrayRef]) -> Result<(), ArrowError> {
         assert!(
             Arc::ptr_eq(&rows.config.fields, &self.fields),
             "rows were not produced by this RowConverter"
@@ -736,7 +610,7 @@ impl RowConverter {
 
         let encoders = columns
             .iter()
-            .zip(&mut self.codecs)
+            .zip(&self.codecs)
             .zip(self.fields.iter())
             .map(|((column, codec), field)| {
                 if !column.data_type().equals_datatype(&field.data_type) {
@@ -780,9 +654,7 @@ impl RowConverter {
         // encoders not assuming a zero-initialized buffer
         rows.buffer.resize(cur_offset, 0);
 
-        for ((column, field), encoder) in
-            columns.iter().zip(self.fields.iter()).zip(encoders)
-        {
+        for ((column, field), encoder) in columns.iter().zip(self.fields.iter()).zip(encoders) {
             // We encode a column at a time to minimise dispatch overheads
             encode_column(
                 &mut rows.buffer,
@@ -844,7 +716,7 @@ impl RowConverter {
     /// # use arrow_row::{Row, RowConverter, SortField};
     /// # use arrow_schema::DataType;
     /// #
-    /// let mut converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
+    /// let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
     /// let array = StringArray::from(vec!["hello", "world", "a", "a", "hello"]);
     ///
     /// // Convert to row format and deduplicate
@@ -1099,7 +971,7 @@ impl<'a> Eq for Row<'a> {}
 impl<'a> PartialOrd for Row<'a> {
     #[inline]
     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        self.data.partial_cmp(other.data)
+        Some(self.cmp(other))
     }
 }
 
@@ -1159,7 +1031,7 @@ impl Eq for OwnedRow {}
 impl PartialOrd for OwnedRow {
     #[inline]
     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        self.row().partial_cmp(&other.row())
+        Some(self.cmp(other))
     }
 }
 
@@ -1234,20 +1106,7 @@ fn row_lengths(cols: &[ArrayRef], encoders: &[Encoder]) -> Vec<usize> {
                     _ => unreachable!(),
                 }
             }
-            Encoder::Dictionary(dict) => {
-                downcast_dictionary_array! {
-                    array => {
-                        for (v, length) in array.keys().iter().zip(lengths.iter_mut()) {
-                            match v.and_then(|v| dict[v as usize]) {
-                                Some(k) => *length += k.len() + 1,
-                                None => *length += 1,
-                            }
-                        }
-                    }
-                    _ => unreachable!(),
-                }
-            }
-            Encoder::DictionaryValues(values, null) => {
+            Encoder::Dictionary(values, null) => {
                 downcast_dictionary_array! {
                     array => {
                         for (v, length) in array.keys().iter().zip(lengths.iter_mut()) {
@@ -1323,13 +1182,7 @@ fn encode_column(
                 _ => unreachable!(),
             }
         }
-        Encoder::Dictionary(dict) => {
-            downcast_dictionary_array! {
-                column => encode_dictionary(data, offsets, column, dict, opts),
-                _ => unreachable!()
-            }
-        }
-        Encoder::DictionaryValues(values, nulls) => {
+        Encoder::Dictionary(values, nulls) => {
             downcast_dictionary_array! {
                 column => encode_dictionary_values(data, offsets, column, values, nulls),
                 _ => unreachable!()
@@ -1354,9 +1207,7 @@ fn encode_column(
                 })
         }
         Encoder::List(rows) => match column.data_type() {
-            DataType::List(_) => {
-                list::encode(data, offsets, rows, opts, as_list_array(column))
-            }
+            DataType::List(_) => list::encode(data, offsets, rows, opts, as_list_array(column)),
             DataType::LargeList(_) => {
                 list::encode(data, offsets, rows, opts, as_large_list_array(column))
             }
@@ -1365,18 +1216,31 @@ fn encode_column(
     }
 }
 
+/// Encode dictionary values not preserving the dictionary encoding
+pub fn encode_dictionary_values<K: ArrowDictionaryKeyType>(
+    data: &mut [u8],
+    offsets: &mut [usize],
+    column: &DictionaryArray<K>,
+    values: &Rows,
+    null: &Row<'_>,
+) {
+    for (offset, k) in offsets.iter_mut().skip(1).zip(column.keys()) {
+        let row = match k {
+            Some(k) => values.row(k.as_usize()).data,
+            None => null.data,
+        };
+        let end_offset = *offset + row.len();
+        data[*offset..end_offset].copy_from_slice(row);
+        *offset = end_offset;
+    }
+}
+
 macro_rules! decode_primitive_helper {
     ($t:ty, $rows:ident, $data_type:ident, $options:ident) => {
         Arc::new(decode_primitive::<$t>($rows, $data_type, $options))
     };
 }
 
-macro_rules! decode_dictionary_helper {
-    ($t:ty, $interner:ident, $v:ident, $options:ident, $rows:ident) => {
-        Arc::new(decode_dictionary::<$t>($interner, $v, $options, $rows)?)
-    };
-}
-
 /// Decodes a the provided `field` from `rows`
 ///
 /// # Safety
@@ -1402,20 +1266,11 @@ unsafe fn decode_column(
                 DataType::FixedSizeBinary(size) => Arc::new(decode_fixed_size_binary(rows, size, options)),
                 DataType::Utf8 => Arc::new(decode_string::<i32>(rows, options, validate_utf8)),
                 DataType::LargeUtf8 => Arc::new(decode_string::<i64>(rows, options, validate_utf8)),
+                DataType::Dictionary(_, _) => todo!(),
                 _ => unreachable!()
             }
         }
-        Codec::Dictionary(interner) => {
-            let (k, v) = match &field.data_type {
-                DataType::Dictionary(k, v) => (k.as_ref(), v.as_ref()),
-                _ => unreachable!(),
-            };
-            downcast_integer! {
-                k => (decode_dictionary_helper, interner, v, options, rows),
-                _ => unreachable!()
-            }
-        }
-        Codec::DictionaryValues(converter, _) => {
+        Codec::Dictionary(converter, _) => {
             let cols = converter.convert_raw(rows, validate_utf8)?;
             cols.into_iter().next().unwrap()
         }
@@ -1487,7 +1342,7 @@ mod tests {
             ])) as ArrayRef,
         ];
 
-        let mut converter = RowConverter::new(vec![
+        let converter = RowConverter::new(vec![
             SortField::new(DataType::Int16),
             SortField::new(DataType::Float32),
         ])
@@ -1529,9 +1384,10 @@ mod tests {
 
     #[test]
     fn test_decimal128() {
-        let mut converter = RowConverter::new(vec![SortField::new(
-            DataType::Decimal128(DECIMAL128_MAX_PRECISION, 7),
-        )])
+        let converter = RowConverter::new(vec![SortField::new(DataType::Decimal128(
+            DECIMAL128_MAX_PRECISION,
+            7,
+        ))])
         .unwrap();
         let col = Arc::new(
             Decimal128Array::from_iter([
@@ -1558,9 +1414,10 @@ mod tests {
 
     #[test]
     fn test_decimal256() {
-        let mut converter = RowConverter::new(vec![SortField::new(
-            DataType::Decimal256(DECIMAL256_MAX_PRECISION, 7),
-        )])
+        let converter = RowConverter::new(vec![SortField::new(DataType::Decimal256(
+            DECIMAL256_MAX_PRECISION,
+            7,
+        ))])
         .unwrap();
         let col = Arc::new(
             Decimal256Array::from_iter([
@@ -1589,11 +1446,9 @@ mod tests {
 
     #[test]
     fn test_bool() {
-        let mut converter =
-            RowConverter::new(vec![SortField::new(DataType::Boolean)]).unwrap();
+        let converter = RowConverter::new(vec![SortField::new(DataType::Boolean)]).unwrap();
 
-        let col = Arc::new(BooleanArray::from_iter([None, Some(false), Some(true)]))
-            as ArrayRef;
+        let col = Arc::new(BooleanArray::from_iter([None, Some(false), Some(true)])) as ArrayRef;
 
         let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
         assert!(rows.row(2) > rows.row(1));
@@ -1603,7 +1458,7 @@ mod tests {
         let cols = converter.convert_rows(&rows).unwrap();
         assert_eq!(&cols[0], &col);
 
-        let mut converter = RowConverter::new(vec![SortField::new_with_options(
+        let converter = RowConverter::new(vec![SortField::new_with_options(
             DataType::Boolean,
             SortOptions {
                 descending: true,
@@ -1622,20 +1477,18 @@ mod tests {
 
     #[test]
     fn test_timezone() {
-        let a = TimestampNanosecondArray::from(vec![1, 2, 3, 4, 5])
-            .with_timezone("+01:00".to_string());
+        let a =
+            TimestampNanosecondArray::from(vec![1, 2, 3, 4, 5]).with_timezone("+01:00".to_string());
         let d = a.data_type().clone();
 
-        let mut converter =
-            RowConverter::new(vec![SortField::new(a.data_type().clone())]).unwrap();
+        let converter = RowConverter::new(vec![SortField::new(a.data_type().clone())]).unwrap();
         let rows = converter.convert_columns(&[Arc::new(a) as _]).unwrap();
         let back = converter.convert_rows(&rows).unwrap();
         assert_eq!(back.len(), 1);
         assert_eq!(back[0].data_type(), &d);
 
         // Test dictionary
-        let mut a =
-            PrimitiveDictionaryBuilder::<Int32Type, TimestampNanosecondType>::new();
+        let mut a = PrimitiveDictionaryBuilder::<Int32Type, TimestampNanosecondType>::new();
         a.append(34).unwrap();
         a.append_null();
         a.append(345).unwrap();
@@ -1644,29 +1497,23 @@ mod tests {
         let dict = a.finish();
         let values = TimestampNanosecondArray::from(dict.values().to_data());
         let dict_with_tz = dict.with_values(Arc::new(values.with_timezone("+02:00")));
-        let d = DataType::Dictionary(
-            Box::new(DataType::Int32),
-            Box::new(DataType::Timestamp(
-                TimeUnit::Nanosecond,
-                Some("+02:00".into()),
-            )),
-        );
+        let v = DataType::Timestamp(TimeUnit::Nanosecond, Some("+02:00".into()));
+        let d = DataType::Dictionary(Box::new(DataType::Int32), Box::new(v.clone()));
 
         assert_eq!(dict_with_tz.data_type(), &d);
-        let mut converter = RowConverter::new(vec![SortField::new(d.clone())]).unwrap();
+        let converter = RowConverter::new(vec![SortField::new(d.clone())]).unwrap();
         let rows = converter
             .convert_columns(&[Arc::new(dict_with_tz) as _])
             .unwrap();
         let back = converter.convert_rows(&rows).unwrap();
         assert_eq!(back.len(), 1);
-        assert_eq!(back[0].data_type(), &d);
+        assert_eq!(back[0].data_type(), &v);
     }
 
     #[test]
     fn test_null_encoding() {
         let col = Arc::new(NullArray::new(10));
-        let mut converter =
-            RowConverter::new(vec![SortField::new(DataType::Null)]).unwrap();
+        let converter = RowConverter::new(vec![SortField::new(DataType::Null)]).unwrap();
         let rows = converter.convert_columns(&[col]).unwrap();
         assert_eq!(rows.num_rows(), 10);
         assert_eq!(rows.row(1).data.len(), 0);
@@ -1682,8 +1529,7 @@ mod tests {
             Some(""),
         ])) as ArrayRef;
 
-        let mut converter =
-            RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
+        let converter = RowConverter::new(vec![SortField::new(DataType::Utf8)]).unwrap();
         let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
 
         assert!(rows.row(1) < rows.row(0));
@@ -1698,18 +1544,23 @@ mod tests {
             None,
             Some(vec![0_u8; 0]),
             Some(vec![0_u8; 6]),
+            Some(vec![0_u8; variable::MINI_BLOCK_SIZE]),
+            Some(vec![0_u8; variable::MINI_BLOCK_SIZE + 1]),
             Some(vec![0_u8; variable::BLOCK_SIZE]),
             Some(vec![0_u8; variable::BLOCK_SIZE + 1]),
             Some(vec![1_u8; 6]),
+            Some(vec![1_u8; variable::MINI_BLOCK_SIZE]),
+            Some(vec![1_u8; variable::MINI_BLOCK_SIZE + 1]),
             Some(vec![1_u8; variable::BLOCK_SIZE]),
             Some(vec![1_u8; variable::BLOCK_SIZE + 1]),
             Some(vec![0xFF_u8; 6]),
+            Some(vec![0xFF_u8; variable::MINI_BLOCK_SIZE]),
+            Some(vec![0xFF_u8; variable::MINI_BLOCK_SIZE + 1]),
             Some(vec![0xFF_u8; variable::BLOCK_SIZE]),
             Some(vec![0xFF_u8; variable::BLOCK_SIZE + 1]),
         ])) as ArrayRef;
 
-        let mut converter =
-            RowConverter::new(vec![SortField::new(DataType::Binary)]).unwrap();
+        let converter = RowConverter::new(vec![SortField::new(DataType::Binary)]).unwrap();
         let rows = converter.convert_columns(&[Arc::clone(&col)]).unwrap();
 
         for i in 0..rows.num_rows() {
@@ -1728,7 +1579,7 @@ mod tests {
         let cols = converter.convert_rows(&rows).unwrap();
         assert_eq!(&cols[0], &col);
 
-        let mut converter = RowConverter::new(vec![SortField::new_with_options(
+        let converter = RowConverter::new(vec![SortField::new_with_options(
             DataType::Binary,
             SortOptions {
                 descending: true,
@@ -1756,9 +1607,9 @@ mod tests {
     }
 
     /// If `exact` is false performs a logical comparison between a and dictionary-encoded b
-    fn dictionary_eq(exact: bool, a: &dyn Array, b: &dyn Array) {
+    fn dictionary_eq(a: &dyn Array, b: &dyn Array) {
         match b.data_type() {
-            DataType::Dictionary(_, v) if !exact => {
+            DataType::Dictionary(_, v) => {
                 assert_eq!(a.data_type(), v.as_ref());
                 let b = arrow_cast::cast(b, v).unwrap();
                 assert_eq!(a, b.as_ref())
@@ -1769,11 +1620,6 @@ mod tests {
 
     #[test]
     fn test_string_dictionary() {
-        test_string_dictionary_impl(false);
-        test_string_dictionary_impl(true);
-    }
-
-    fn test_string_dictionary_impl(preserve: bool) {
         let a = Arc::new(DictionaryArray::<Int32Type>::from_iter([
             Some("foo"),
             Some("hello"),
@@ -1785,8 +1631,8 @@ mod tests {
             Some("hello"),
         ])) as ArrayRef;
 
-        let field = SortField::new(a.data_type().clone()).preserve_dictionaries(preserve);
-        let mut converter = RowConverter::new(vec![field]).unwrap();
+        let field = SortField::new(a.data_type().clone());
+        let converter = RowConverter::new(vec![field]).unwrap();
         let rows_a = converter.convert_columns(&[Arc::clone(&a)]).unwrap();
 
         assert!(rows_a.row(3) < rows_a.row(5));
@@ -1799,7 +1645,7 @@ mod tests {
         assert_eq!(rows_a.row(1), rows_a.row(7));
 
         let cols = converter.convert_rows(&rows_a).unwrap();
-        dictionary_eq(preserve, &cols[0], &a);
+        dictionary_eq(&cols[0], &a);
 
         let b = Arc::new(DictionaryArray::<Int32Type>::from_iter([
             Some("hello"),
@@ -1813,16 +1659,15 @@ mod tests {
         assert!(rows_b.row(2) < rows_a.row(0));
 
         let cols = converter.convert_rows(&rows_b).unwrap();
-        dictionary_eq(preserve, &cols[0], &b);
+        dictionary_eq(&cols[0], &b);
 
-        let mut converter = RowConverter::new(vec![SortField::new_with_options(
+        let converter = RowConverter::new(vec![SortField::new_with_options(
             a.data_type().clone(),
             SortOptions {
                 descending: true,
                 nulls_first: false,
             },
-        )
-        .preserve_dictionaries(preserve)])
+        )])
         .unwrap();
 
         let rows_c = converter.convert_columns(&[Arc::clone(&a)]).unwrap();
@@ -1832,16 +1677,15 @@ mod tests {
         assert!(rows_c.row(3) > rows_c.row(0));
 
         let cols = converter.convert_rows(&rows_c).unwrap();
-        dictionary_eq(preserve, &cols[0], &a);
+        dictionary_eq(&cols[0], &a);
 
-        let mut converter = RowConverter::new(vec![SortField::new_with_options(
+        let converter = RowConverter::new(vec![SortField::new_with_options(
             a.data_type().clone(),
             SortOptions {
                 descending: true,
                 nulls_first: true,
             },
-        )
-        .preserve_dictionaries(preserve)])
+        )])
         .unwrap();
 
         let rows_c = converter.convert_columns(&[Arc::clone(&a)]).unwrap();
@@ -1851,7 +1695,7 @@ mod tests {
         assert!(rows_c.row(3) < rows_c.row(0));
 
         let cols = converter.convert_rows(&rows_c).unwrap();
-        dictionary_eq(preserve, &cols[0], &a);
+        dictionary_eq(&cols[0], &a);
     }
 
     #[test]
@@ -1864,7 +1708,7 @@ mod tests {
         let s1 = Arc::new(StructArray::from(vec![(a_f, a), (u_f, u)])) as ArrayRef;
 
         let sort_fields = vec![SortField::new(s1.data_type().clone())];
-        let mut converter = RowConverter::new(sort_fields).unwrap();
+        let converter = RowConverter::new(sort_fields).unwrap();
         let r1 = converter.convert_columns(&[Arc::clone(&s1)]).unwrap();
 
         for (a, b) in r1.iter().zip(r1.iter().skip(1)) {
@@ -1913,28 +1757,23 @@ mod tests {
         let data_type = a.data_type().clone();
         let columns = [Arc::new(a) as ArrayRef];
 
-        for preserve in [true, false] {
-            let field = SortField::new(data_type.clone()).preserve_dictionaries(preserve);
-            let mut converter = RowConverter::new(vec![field]).unwrap();
-            let rows = converter.convert_columns(&columns).unwrap();
-            assert!(rows.row(0) < rows.row(1));
-            assert!(rows.row(2) < rows.row(0));
-            assert!(rows.row(3) < rows.row(2));
-            assert!(rows.row(6) < rows.row(2));
-            assert!(rows.row(3) < rows.row(6));
-        }
+        let field = SortField::new(data_type.clone());
+        let converter = RowConverter::new(vec![field]).unwrap();
+        let rows = converter.convert_columns(&columns).unwrap();
+        assert!(rows.row(0) < rows.row(1));
+        assert!(rows.row(2) < rows.row(0));
+        assert!(rows.row(3) < rows.row(2));
+        assert!(rows.row(6) < rows.row(2));
+        assert!(rows.row(3) < rows.row(6));
     }
 
     #[test]
     fn test_dictionary_nulls() {
-        let values =
-            Int32Array::from_iter([Some(1), Some(-1), None, Some(4), None]).into_data();
+        let values = Int32Array::from_iter([Some(1), Some(-1), None, Some(4), None]).into_data();
         let keys =
-            Int32Array::from_iter([Some(0), Some(0), Some(1), Some(2), Some(4), None])
-                .into_data();
+            Int32Array::from_iter([Some(0), Some(0), Some(1), Some(2), Some(4), None]).into_data();
 
-        let data_type =
-            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Int32));
+        let data_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Int32));
         let data = keys
             .into_builder()
             .data_type(data_type.clone())
@@ -1943,23 +1782,20 @@ mod tests {
             .unwrap();
 
         let columns = [Arc::new(DictionaryArray::<Int32Type>::from(data)) as ArrayRef];
-        for preserve in [true, false] {
-            let field = SortField::new(data_type.clone()).preserve_dictionaries(preserve);
-            let mut converter = RowConverter::new(vec![field]).unwrap();
-            let rows = converter.convert_columns(&columns).unwrap();
-
-            assert_eq!(rows.row(0), rows.row(1));
-            assert_eq!(rows.row(3), rows.row(4));
-            assert_eq!(rows.row(4), rows.row(5));
-            assert!(rows.row(3) < rows.row(0));
-        }
+        let field = SortField::new(data_type.clone());
+        let converter = RowConverter::new(vec![field]).unwrap();
+        let rows = converter.convert_columns(&columns).unwrap();
+
+        assert_eq!(rows.row(0), rows.row(1));
+        assert_eq!(rows.row(3), rows.row(4));
+        assert_eq!(rows.row(4), rows.row(5));
+        assert!(rows.row(3) < rows.row(0));
     }
 
     #[test]
     #[should_panic(expected = "Encountered non UTF-8 data")]
     fn test_invalid_utf8() {
-        let mut converter =
-            RowConverter::new(vec![SortField::new(DataType::Binary)]).unwrap();
+        let converter = RowConverter::new(vec![SortField::new(DataType::Binary)]).unwrap();
         let array = Arc::new(BinaryArray::from_iter_values([&[0xFF]])) as _;
         let rows = converter.convert_columns(&[array]).unwrap();
         let binary_row = rows.row(0);
@@ -1975,8 +1811,7 @@ mod tests {
     #[should_panic(expected = "rows were not produced by this RowConverter")]
     fn test_different_converter() {
         let values = Arc::new(Int32Array::from_iter([Some(1), Some(-1)]));
-        let mut converter =
-            RowConverter::new(vec![SortField::new(DataType::Int32)]).unwrap();
+        let converter = RowConverter::new(vec![SortField::new(DataType::Int32)]).unwrap();
         let rows = converter.convert_columns(&[values]).unwrap();
 
         let converter = RowConverter::new(vec![SortField::new(DataType::Int32)]).unwrap();
@@ -2007,7 +1842,7 @@ mod tests {
         let list = Arc::new(builder.finish()) as ArrayRef;
         let d = list.data_type().clone();
 
-        let mut converter = RowConverter::new(vec![SortField::new(d.clone())]).unwrap();
+        let converter = RowConverter::new(vec![SortField::new(d.clone())]).unwrap();
 
         let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
         assert!(rows.row(0) > rows.row(1)); // [32, 52, 32] > [32, 52, 12]
@@ -2027,7 +1862,7 @@ mod tests {
             nulls_first: false,
         };
         let field = SortField::new_with_options(d.clone(), options);
-        let mut converter = RowConverter::new(vec![field]).unwrap();
+        let converter = RowConverter::new(vec![field]).unwrap();
         let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
 
         assert!(rows.row(0) > rows.row(1)); // [32, 52, 32] > [32, 52, 12]
@@ -2047,7 +1882,7 @@ mod tests {
             nulls_first: false,
         };
         let field = SortField::new_with_options(d.clone(), options);
-        let mut converter = RowConverter::new(vec![field]).unwrap();
+        let converter = RowConverter::new(vec![field]).unwrap();
         let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
 
         assert!(rows.row(0) < rows.row(1)); // [32, 52, 32] < [32, 52, 12]
@@ -2067,7 +1902,7 @@ mod tests {
             nulls_first: true,
         };
         let field = SortField::new_with_options(d, options);
-        let mut converter = RowConverter::new(vec![field]).unwrap();
+        let converter = RowConverter::new(vec![field]).unwrap();
         let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
 
         assert!(rows.row(0) < rows.row(1)); // [32, 52, 32] < [32, 52, 12]
@@ -2084,9 +1919,8 @@ mod tests {
     }
 
     fn test_nested_list<O: OffsetSizeTrait>() {
-        let mut builder = GenericListBuilder::<O, _>::new(
-            GenericListBuilder::<O, _>::new(Int32Builder::new()),
-        );
+        let mut builder =
+            GenericListBuilder::<O, _>::new(GenericListBuilder::<O, _>::new(Int32Builder::new()));
 
         builder.values().values().append_value(1);
         builder.values().values().append_value(2);
@@ -2131,7 +1965,7 @@ mod tests {
             nulls_first: true,
         };
         let field = SortField::new_with_options(d.clone(), options);
-        let mut converter = RowConverter::new(vec![field]).unwrap();
+        let converter = RowConverter::new(vec![field]).unwrap();
         let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
 
         assert!(rows.row(0) > rows.row(1));
@@ -2150,7 +1984,7 @@ mod tests {
             nulls_first: true,
         };
         let field = SortField::new_with_options(d.clone(), options);
-        let mut converter = RowConverter::new(vec![field]).unwrap();
+        let converter = RowConverter::new(vec![field]).unwrap();
         let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
 
         assert!(rows.row(0) > rows.row(1));
@@ -2169,7 +2003,7 @@ mod tests {
             nulls_first: false,
         };
         let field = SortField::new_with_options(d, options);
-        let mut converter = RowConverter::new(vec![field]).unwrap();
+        let converter = RowConverter::new(vec![field]).unwrap();
         let rows = converter.convert_columns(&[Arc::clone(&list)]).unwrap();
 
         assert!(rows.row(0) < rows.row(1));
@@ -2196,35 +2030,6 @@ mod tests {
         test_nested_list::<i64>();
     }
 
-    #[test]
-    fn test_dictionary_preserving() {
-        let mut dict = StringDictionaryBuilder::<Int32Type>::new();
-        dict.append_value("foo");
-        dict.append_value("foo");
-        dict.append_value("bar");
-        dict.append_value("bar");
-        dict.append_value("bar");
-        dict.append_value("bar");
-
-        let array = Arc::new(dict.finish()) as ArrayRef;
-        let preserve = SortField::new(array.data_type().clone());
-        let non_preserve = preserve.clone().preserve_dictionaries(false);
-
-        let mut c1 = RowConverter::new(vec![preserve]).unwrap();
-        let r1 = c1.convert_columns(&[array.clone()]).unwrap();
-
-        let mut c2 = RowConverter::new(vec![non_preserve]).unwrap();
-        let r2 = c2.convert_columns(&[array.clone()]).unwrap();
-
-        for r in r1.iter() {
-            assert_eq!(r.data.len(), 3);
-        }
-
-        for r in r2.iter() {
-            assert_eq!(r.data.len(), 34);
-        }
-    }
-
     fn generate_primitive_array<K>(len: usize, valid_percent: f64) -> PrimitiveArray<K>
     where
         K: ArrowPrimitiveType,
@@ -2271,10 +2076,8 @@ mod tests {
             })
             .collect();
 
-        let data_type = DataType::Dictionary(
-            Box::new(K::DATA_TYPE),
-            Box::new(values.data_type().clone()),
-        );
+        let data_type =
+            DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
 
         let data = keys
             .into_data()
@@ -2287,10 +2090,7 @@ mod tests {
         DictionaryArray::from(data)
     }
 
-    fn generate_fixed_size_binary(
-        len: usize,
-        valid_percent: f64,
-    ) -> FixedSizeBinaryArray {
+    fn generate_fixed_size_binary(len: usize, valid_percent: f64) -> FixedSizeBinaryArray {
         let mut rng = thread_rng();
         let width = rng.gen_range(0..20);
         let mut builder = FixedSizeBinaryBuilder::new(width);
@@ -2380,21 +2180,15 @@ mod tests {
                 })
                 .collect();
 
-            let preserve: Vec<_> = (0..num_columns).map(|_| rng.gen_bool(0.5)).collect();
-
             let comparator = LexicographicalComparator::try_new(&sort_columns).unwrap();
 
             let columns = options
                 .into_iter()
                 .zip(&arrays)
-                .zip(&preserve)
-                .map(|((o, a), p)| {
-                    SortField::new_with_options(a.data_type().clone(), o)
-                        .preserve_dictionaries(*p)
-                })
+                .map(|(o, a)| SortField::new_with_options(a.data_type().clone(), o))
                 .collect();
 
-            let mut converter = RowConverter::new(columns).unwrap();
+            let converter = RowConverter::new(columns).unwrap();
             let rows = converter.convert_columns(&arrays).unwrap();
 
             for i in 0..len {
@@ -2417,17 +2211,16 @@ mod tests {
             }
 
             let back = converter.convert_rows(&rows).unwrap();
-            for ((actual, expected), preserve) in back.iter().zip(&arrays).zip(preserve) {
+            for (actual, expected) in back.iter().zip(&arrays) {
                 actual.to_data().validate_full().unwrap();
-                dictionary_eq(preserve, actual, expected)
+                dictionary_eq(actual, expected)
             }
         }
     }
 
     #[test]
     fn test_clear() {
-        let mut converter =
-            RowConverter::new(vec![SortField::new(DataType::Int32)]).unwrap();
+        let converter = RowConverter::new(vec![SortField::new(DataType::Int32)]).unwrap();
         let mut rows = converter.empty_rows(3, 128);
 
         let first = Int32Array::from(vec![None, Some(2), Some(4)]);
@@ -2457,7 +2250,7 @@ mod tests {
     fn test_append_codec_dictionary_binary() {
         use DataType::*;
         // Dictionary RowConverter
-        let mut converter = RowConverter::new(vec![SortField::new(Dictionary(
+        let converter = RowConverter::new(vec![SortField::new(Dictionary(
             Box::new(Int32),
             Box::new(Binary),
         ))])
@@ -2478,6 +2271,6 @@ mod tests {
         converter.append(&mut rows, &[array.clone()]).unwrap();
         let back = converter.convert_rows(&rows).unwrap();
 
-        assert_eq!(&back[0], &array);
+        dictionary_eq(&back[0], &array);
     }
 }
diff --git a/arrow-row/src/list.rs b/arrow-row/src/list.rs
index 73c4b6fbfda5..511fb4ffb282 100644
--- a/arrow-row/src/list.rs
+++ b/arrow-row/src/list.rs
@@ -144,8 +144,7 @@ pub unsafe fn decode<O: OffsetSizeTrait>(
         let row = &canonical.value_data()[start..end];
         let element_count_start = row.len() - 4;
         let element_count =
-            u32::from_be_bytes((&row[element_count_start..]).try_into().unwrap())
-                as usize;
+            u32::from_be_bytes((&row[element_count_start..]).try_into().unwrap()) as usize;
 
         let lengths_start = element_count_start - (element_count * 4);
         let mut row_offset = 0;
diff --git a/arrow-row/src/variable.rs b/arrow-row/src/variable.rs
index e9f6160bf43c..4451c5287310 100644
--- a/arrow-row/src/variable.rs
+++ b/arrow-row/src/variable.rs
@@ -26,6 +26,14 @@ use arrow_schema::{DataType, SortOptions};
 /// The block size of the variable length encoding
 pub const BLOCK_SIZE: usize = 32;
 
+/// The first block is split into `MINI_BLOCK_COUNT` mini-blocks
+///
+/// This helps to reduce the space amplification for small strings
+pub const MINI_BLOCK_COUNT: usize = 4;
+
+/// The mini block size
+pub const MINI_BLOCK_SIZE: usize = BLOCK_SIZE / MINI_BLOCK_COUNT;
+
 /// The continuation token
 pub const BLOCK_CONTINUATION: u8 = 0xFF;
 
@@ -45,7 +53,10 @@ pub fn encoded_len(a: Option<&[u8]>) -> usize {
 #[inline]
 pub fn padded_length(a: Option<usize>) -> usize {
     match a {
-        Some(a) => 1 + ceil(a, BLOCK_SIZE) * (BLOCK_SIZE + 1),
+        Some(a) if a <= BLOCK_SIZE => 1 + ceil(a, MINI_BLOCK_SIZE) * (MINI_BLOCK_SIZE + 1),
+        // Each miniblock ends with a 1 byte continuation, therefore add
+        // `(MINI_BLOCK_COUNT - 1)` additional bytes over non-miniblock size
+        Some(a) => MINI_BLOCK_COUNT + ceil(a, BLOCK_SIZE) * (BLOCK_SIZE + 1),
         None => 1,
     }
 }
@@ -82,44 +93,23 @@ pub fn encode_one(out: &mut [u8], val: Option<&[u8]>, opts: SortOptions) -> usiz
             1
         }
         Some(val) => {
-            let block_count = ceil(val.len(), BLOCK_SIZE);
-            let end_offset = 1 + block_count * (BLOCK_SIZE + 1);
-            let to_write = &mut out[..end_offset];
-
             // Write `2_u8` to demarcate as non-empty, non-null string
-            to_write[0] = NON_EMPTY_SENTINEL;
-
-            let chunks = val.chunks_exact(BLOCK_SIZE);
-            let remainder = chunks.remainder();
-            for (input, output) in chunks
-                .clone()
-                .zip(to_write[1..].chunks_exact_mut(BLOCK_SIZE + 1))
-            {
-                let input: &[u8; BLOCK_SIZE] = input.try_into().unwrap();
-                let out_block: &mut [u8; BLOCK_SIZE] =
-                    (&mut output[..BLOCK_SIZE]).try_into().unwrap();
-
-                *out_block = *input;
-
-                // Indicate that there are further blocks to follow
-                output[BLOCK_SIZE] = BLOCK_CONTINUATION;
-            }
+            out[0] = NON_EMPTY_SENTINEL;
 
-            if !remainder.is_empty() {
-                let start_offset = 1 + (block_count - 1) * (BLOCK_SIZE + 1);
-                to_write[start_offset..start_offset + remainder.len()]
-                    .copy_from_slice(remainder);
-                *to_write.last_mut().unwrap() = remainder.len() as u8;
+            let len = if val.len() <= BLOCK_SIZE {
+                1 + encode_blocks::<MINI_BLOCK_SIZE>(&mut out[1..], val)
             } else {
-                // We must overwrite the continuation marker written by the loop above
-                *to_write.last_mut().unwrap() = BLOCK_SIZE as u8;
-            }
+                let (initial, rem) = val.split_at(BLOCK_SIZE);
+                let offset = encode_blocks::<MINI_BLOCK_SIZE>(&mut out[1..], initial);
+                out[offset] = BLOCK_CONTINUATION;
+                1 + offset + encode_blocks::<BLOCK_SIZE>(&mut out[1 + offset..], rem)
+            };
 
             if opts.descending {
                 // Invert bits
-                to_write.iter_mut().for_each(|v| *v = !*v)
+                out[..len].iter_mut().for_each(|v| *v = !*v)
             }
-            end_offset
+            len
         }
         None => {
             out[0] = null_sentinel(opts);
@@ -128,8 +118,37 @@ pub fn encode_one(out: &mut [u8], val: Option<&[u8]>, opts: SortOptions) -> usiz
     }
 }
 
-/// Returns the number of bytes of encoded data
-fn decoded_len(row: &[u8], options: SortOptions) -> usize {
+/// Writes `val` in `SIZE` blocks with the appropriate continuation tokens
+#[inline]
+fn encode_blocks<const SIZE: usize>(out: &mut [u8], val: &[u8]) -> usize {
+    let block_count = ceil(val.len(), SIZE);
+    let end_offset = block_count * (SIZE + 1);
+    let to_write = &mut out[..end_offset];
+
+    let chunks = val.chunks_exact(SIZE);
+    let remainder = chunks.remainder();
+    for (input, output) in chunks.clone().zip(to_write.chunks_exact_mut(SIZE + 1)) {
+        let input: &[u8; SIZE] = input.try_into().unwrap();
+        let out_block: &mut [u8; SIZE] = (&mut output[..SIZE]).try_into().unwrap();
+
+        *out_block = *input;
+
+        // Indicate that there are further blocks to follow
+        output[SIZE] = BLOCK_CONTINUATION;
+    }
+
+    if !remainder.is_empty() {
+        let start_offset = (block_count - 1) * (SIZE + 1);
+        to_write[start_offset..start_offset + remainder.len()].copy_from_slice(remainder);
+        *to_write.last_mut().unwrap() = remainder.len() as u8;
+    } else {
+        // We must overwrite the continuation marker written by the loop above
+        *to_write.last_mut().unwrap() = SIZE as u8;
+    }
+    end_offset
+}
+
+fn decode_blocks(row: &[u8], options: SortOptions, mut f: impl FnMut(&[u8])) -> usize {
     let (non_empty_sentinel, continuation) = match options.descending {
         true => (!NON_EMPTY_SENTINEL, !BLOCK_CONTINUATION),
         false => (NON_EMPTY_SENTINEL, BLOCK_CONTINUATION),
@@ -137,26 +156,44 @@ fn decoded_len(row: &[u8], options: SortOptions) -> usize {
 
     if row[0] != non_empty_sentinel {
         // Empty or null string
-        return 0;
+        return 1;
     }
 
-    let mut str_len = 0;
+    // Extracts the block length from the sentinel
+    let block_len = |sentinel: u8| match options.descending {
+        true => !sentinel as usize,
+        false => sentinel as usize,
+    };
+
     let mut idx = 1;
+    for _ in 0..MINI_BLOCK_COUNT {
+        let sentinel = row[idx + MINI_BLOCK_SIZE];
+        if sentinel != continuation {
+            f(&row[idx..idx + block_len(sentinel)]);
+            return idx + MINI_BLOCK_SIZE + 1;
+        }
+        f(&row[idx..idx + MINI_BLOCK_SIZE]);
+        idx += MINI_BLOCK_SIZE + 1;
+    }
+
     loop {
         let sentinel = row[idx + BLOCK_SIZE];
-        if sentinel == continuation {
-            idx += BLOCK_SIZE + 1;
-            str_len += BLOCK_SIZE;
-            continue;
+        if sentinel != continuation {
+            f(&row[idx..idx + block_len(sentinel)]);
+            return idx + BLOCK_SIZE + 1;
         }
-        let block_len = match options.descending {
-            true => !sentinel,
-            false => sentinel,
-        };
-        return str_len + block_len as usize;
+        f(&row[idx..idx + BLOCK_SIZE]);
+        idx += BLOCK_SIZE + 1;
     }
 }
 
+/// Returns the number of bytes of encoded data
+fn decoded_len(row: &[u8], options: SortOptions) -> usize {
+    let mut len = 0;
+    decode_blocks(row, options, |block| len += block.len());
+    len
+}
+
 /// Decodes a binary array from `rows` with the provided `options`
 pub fn decode_binary<I: OffsetSizeTrait>(
     rows: &mut [&[u8]],
@@ -176,22 +213,8 @@ pub fn decode_binary<I: OffsetSizeTrait>(
     let mut values = MutableBuffer::new(values_capacity);
 
     for row in rows {
-        let str_length = decoded_len(row, options);
-        let mut to_read = str_length;
-        let mut offset = 1;
-        while to_read >= BLOCK_SIZE {
-            to_read -= BLOCK_SIZE;
-
-            values.extend_from_slice(&row[offset..offset + BLOCK_SIZE]);
-            offset += BLOCK_SIZE + 1;
-        }
-
-        if to_read != 0 {
-            values.extend_from_slice(&row[offset..offset + to_read]);
-            offset += BLOCK_SIZE + 1;
-        }
+        let offset = decode_blocks(row, options, |b| values.extend_from_slice(b));
         *row = &row[offset..];
-
         offsets.append(I::from_usize(values.len()).expect("offset overflow"))
     }
 
diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs
index 4f8c8a18bd17..b78c785ae279 100644
--- a/arrow-schema/src/datatype.rs
+++ b/arrow-schema/src/datatype.rs
@@ -345,13 +345,7 @@ impl DataType {
         use DataType::*;
         matches!(
             self,
-            Date32
-                | Date64
-                | Timestamp(_, _)
-                | Time32(_)
-                | Time64(_)
-                | Duration(_)
-                | Interval(_)
+            Date32 | Date64 | Timestamp(_, _) | Time32(_) | Time64(_) | Duration(_) | Interval(_)
         )
     }
 
@@ -397,12 +391,9 @@ impl DataType {
         use DataType::*;
         match self {
             Dictionary(_, v) => DataType::is_nested(v.as_ref()),
-            List(_)
-            | FixedSizeList(_, _)
-            | LargeList(_)
-            | Struct(_)
-            | Union(_, _)
-            | Map(_, _) => true,
+            List(_) | FixedSizeList(_, _) | LargeList(_) | Struct(_) | Union(_, _) | Map(_, _) => {
+                true
+            }
             _ => false,
         }
     }
@@ -413,8 +404,7 @@ impl DataType {
         match (&self, other) {
             (DataType::List(a), DataType::List(b))
             | (DataType::LargeList(a), DataType::LargeList(b)) => {
-                a.is_nullable() == b.is_nullable()
-                    && a.data_type().equals_datatype(b.data_type())
+                a.is_nullable() == b.is_nullable() && a.data_type().equals_datatype(b.data_type())
             }
             (DataType::FixedSizeList(a, a_size), DataType::FixedSizeList(b, b_size)) => {
                 a_size == b_size
@@ -428,18 +418,14 @@ impl DataType {
                             && a.data_type().equals_datatype(b.data_type())
                     })
             }
-            (
-                DataType::Map(a_field, a_is_sorted),
-                DataType::Map(b_field, b_is_sorted),
-            ) => {
+            (DataType::Map(a_field, a_is_sorted), DataType::Map(b_field, b_is_sorted)) => {
                 a_field.is_nullable() == b_field.is_nullable()
                     && a_field.data_type().equals_datatype(b_field.data_type())
                     && a_is_sorted == b_is_sorted
             }
-            (
-                DataType::Dictionary(a_key, a_value),
-                DataType::Dictionary(b_key, b_value),
-            ) => a_key.equals_datatype(b_key) && a_value.equals_datatype(b_value),
+            (DataType::Dictionary(a_key, a_value), DataType::Dictionary(b_key, b_value)) => {
+                a_key.equals_datatype(b_key) && a_value.equals_datatype(b_value)
+            }
             (
                 DataType::RunEndEncoded(a_run_ends, a_values),
                 DataType::RunEndEncoded(b_run_ends, b_values),
@@ -534,9 +520,7 @@ impl DataType {
                 | DataType::LargeUtf8
                 | DataType::Decimal128(_, _)
                 | DataType::Decimal256(_, _) => 0,
-                DataType::Timestamp(_, s) => {
-                    s.as_ref().map(|s| s.len()).unwrap_or_default()
-                }
+                DataType::Timestamp(_, s) => s.as_ref().map(|s| s.len()).unwrap_or_default(),
                 DataType::List(field)
                 | DataType::FixedSizeList(field, _)
                 | DataType::LargeList(field)
@@ -617,8 +601,8 @@ mod tests {
             Field::new("first_name", DataType::Utf8, false).with_metadata(field_metadata);
 
         // Empty map: should be omitted.
-        let last_name = Field::new("last_name", DataType::Utf8, false)
-            .with_metadata(HashMap::default());
+        let last_name =
+            Field::new("last_name", DataType::Utf8, false).with_metadata(HashMap::default());
 
         let person = DataType::Struct(Fields::from(vec![
             first_name,
@@ -666,14 +650,10 @@ mod tests {
         assert!(!list_b.equals_datatype(&list_c));
         assert!(!list_a.equals_datatype(&list_d));
 
-        let list_e = DataType::FixedSizeList(
-            Arc::new(Field::new("item", list_a.clone(), false)),
-            3,
-        );
-        let list_f = DataType::FixedSizeList(
-            Arc::new(Field::new("array", list_b.clone(), false)),
-            3,
-        );
+        let list_e =
+            DataType::FixedSizeList(Arc::new(Field::new("item", list_a.clone(), false)), 3);
+        let list_f =
+            DataType::FixedSizeList(Arc::new(Field::new("array", list_b.clone(), false)), 3);
         let list_g = DataType::FixedSizeList(
             Arc::new(Field::new("item", DataType::FixedSizeBinary(3), true)),
             3,
@@ -683,10 +663,8 @@ mod tests {
         assert!(!list_f.equals_datatype(&list_g));
 
         let list_h = DataType::Struct(Fields::from(vec![Field::new("f1", list_e, true)]));
-        let list_i =
-            DataType::Struct(Fields::from(vec![Field::new("f1", list_f.clone(), true)]));
-        let list_j =
-            DataType::Struct(Fields::from(vec![Field::new("f1", list_f.clone(), false)]));
+        let list_i = DataType::Struct(Fields::from(vec![Field::new("f1", list_f.clone(), true)]));
+        let list_j = DataType::Struct(Fields::from(vec![Field::new("f1", list_f.clone(), false)]));
         let list_k = DataType::Struct(Fields::from(vec![
             Field::new("f1", list_f.clone(), false),
             Field::new("f2", list_g.clone(), false),
@@ -707,16 +685,11 @@ mod tests {
         assert!(!list_k.equals_datatype(&list_l));
         assert!(list_k.equals_datatype(&list_m));
 
-        let list_n =
-            DataType::Map(Arc::new(Field::new("f1", list_a.clone(), true)), true);
-        let list_o =
-            DataType::Map(Arc::new(Field::new("f2", list_b.clone(), true)), true);
-        let list_p =
-            DataType::Map(Arc::new(Field::new("f2", list_b.clone(), true)), false);
-        let list_q =
-            DataType::Map(Arc::new(Field::new("f2", list_c.clone(), true)), true);
-        let list_r =
-            DataType::Map(Arc::new(Field::new("f1", list_a.clone(), false)), true);
+        let list_n = DataType::Map(Arc::new(Field::new("f1", list_a.clone(), true)), true);
+        let list_o = DataType::Map(Arc::new(Field::new("f2", list_b.clone(), true)), true);
+        let list_p = DataType::Map(Arc::new(Field::new("f2", list_b.clone(), true)), false);
+        let list_q = DataType::Map(Arc::new(Field::new("f2", list_c.clone(), true)), true);
+        let list_r = DataType::Map(Arc::new(Field::new("f1", list_a.clone(), false)), true);
 
         assert!(list_n.equals_datatype(&list_o));
         assert!(!list_n.equals_datatype(&list_p));
@@ -724,8 +697,7 @@ mod tests {
         assert!(!list_n.equals_datatype(&list_r));
 
         let list_s = DataType::Dictionary(Box::new(DataType::UInt8), Box::new(list_a));
-        let list_t =
-            DataType::Dictionary(Box::new(DataType::UInt8), Box::new(list_b.clone()));
+        let list_t = DataType::Dictionary(Box::new(DataType::UInt8), Box::new(list_b.clone()));
         let list_u = DataType::Dictionary(Box::new(DataType::Int8), Box::new(list_b));
         let list_v = DataType::Dictionary(Box::new(DataType::UInt8), Box::new(list_c));
 
diff --git a/arrow-schema/src/error.rs b/arrow-schema/src/error.rs
index 8ea533db89af..b7bf8d6e12a6 100644
--- a/arrow-schema/src/error.rs
+++ b/arrow-schema/src/error.rs
@@ -58,6 +58,12 @@ impl From<std::io::Error> for ArrowError {
     }
 }
 
+impl From<std::str::Utf8Error> for ArrowError {
+    fn from(error: std::str::Utf8Error) -> Self {
+        ArrowError::ParseError(error.to_string())
+    }
+}
+
 impl From<std::string::FromUtf8Error> for ArrowError {
     fn from(error: std::string::FromUtf8Error) -> Self {
         ArrowError::ParseError(error.to_string())
diff --git a/arrow-schema/src/ffi.rs b/arrow-schema/src/ffi.rs
index cd3c207a56c5..8a18c77ea291 100644
--- a/arrow-schema/src/ffi.rs
+++ b/arrow-schema/src/ffi.rs
@@ -35,7 +35,7 @@
 //! ```
 
 use crate::{
-    ArrowError, DataType, Field, FieldRef, Schema, TimeUnit, UnionFields, UnionMode,
+    ArrowError, DataType, Field, FieldRef, IntervalUnit, Schema, TimeUnit, UnionFields, UnionMode,
 };
 use std::sync::Arc;
 use std::{
@@ -213,8 +213,7 @@ impl FFI_ArrowSchema {
         };
 
         unsafe {
-            let mut private_data =
-                Box::from_raw(self.private_data as *mut SchemaPrivateData);
+            let mut private_data = Box::from_raw(self.private_data as *mut SchemaPrivateData);
             private_data.metadata = new_metadata;
             self.private_data = Box::into_raw(private_data) as *mut c_void;
         }
@@ -222,6 +221,22 @@ impl FFI_ArrowSchema {
         Ok(self)
     }
 
+    /// Takes ownership of the pointed to [`FFI_ArrowSchema`]
+    ///
+    /// This acts to [move] the data out of `schema`, setting the release callback to NULL
+    ///
+    /// # Safety
+    ///
+    /// * `schema` must be [valid] for reads and writes
+    /// * `schema` must be properly aligned
+    /// * `schema` must point to a properly initialized value of [`FFI_ArrowSchema`]
+    ///
+    /// [move]: https://arrow.apache.org/docs/format/CDataInterface.html#moving-an-array
+    /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
+    pub unsafe fn from_raw(schema: *mut FFI_ArrowSchema) -> Self {
+        std::ptr::replace(schema, Self::empty())
+    }
+
     pub fn empty() -> Self {
         Self {
             format: std::ptr::null_mut(),
@@ -318,9 +333,8 @@ impl FFI_ArrowSchema {
                 ));
             }
 
-            let mut metadata = HashMap::with_capacity(
-                num_entries.try_into().expect("Too many metadata entries"),
-            );
+            let mut metadata =
+                HashMap::with_capacity(num_entries.try_into().expect("Too many metadata entries"));
 
             for _ in 0..num_entries {
                 let key_length = i32::from_ne_bytes(next_four_bytes(buffer, &mut pos));
@@ -329,18 +343,15 @@ impl FFI_ArrowSchema {
                         "Negative key length in metadata".to_string(),
                     ));
                 }
-                let key = String::from_utf8(
-                    next_n_bytes(buffer, &mut pos, key_length).to_vec(),
-                )?;
+                let key = String::from_utf8(next_n_bytes(buffer, &mut pos, key_length).to_vec())?;
                 let value_length = i32::from_ne_bytes(next_four_bytes(buffer, &mut pos));
                 if value_length < 0 {
                     return Err(ArrowError::CDataInterface(
                         "Negative value length in metadata".to_string(),
                     ));
                 }
-                let value = String::from_utf8(
-                    next_n_bytes(buffer, &mut pos, value_length).to_vec(),
-                )?;
+                let value =
+                    String::from_utf8(next_n_bytes(buffer, &mut pos, value_length).to_vec())?;
                 metadata.insert(key, value);
             }
 
@@ -358,6 +369,8 @@ impl Drop for FFI_ArrowSchema {
     }
 }
 
+unsafe impl Send for FFI_ArrowSchema {}
+
 impl TryFrom<&FFI_ArrowSchema> for DataType {
     type Error = ArrowError;
 
@@ -391,6 +404,9 @@ impl TryFrom<&FFI_ArrowSchema> for DataType {
             "tDm" => DataType::Duration(TimeUnit::Millisecond),
             "tDu" => DataType::Duration(TimeUnit::Microsecond),
             "tDn" => DataType::Duration(TimeUnit::Nanosecond),
+            "tiM" => DataType::Interval(IntervalUnit::YearMonth),
+            "tiD" => DataType::Interval(IntervalUnit::DayTime),
+            "tin" => DataType::Interval(IntervalUnit::MonthDayNano),
             "+l" => {
                 let c_child = c_schema.child(0);
                 DataType::List(Arc::new(Field::try_from(c_child)?))
@@ -639,9 +655,7 @@ fn get_format_string(dtype: &DataType) -> Result<String, ArrowError> {
         DataType::FixedSizeBinary(num_bytes) => Ok(format!("w:{num_bytes}")),
         DataType::FixedSizeList(_, num_elems) => Ok(format!("+w:{num_elems}")),
         DataType::Decimal128(precision, scale) => Ok(format!("d:{precision},{scale}")),
-        DataType::Decimal256(precision, scale) => {
-            Ok(format!("d:{precision},{scale},256"))
-        }
+        DataType::Decimal256(precision, scale) => Ok(format!("d:{precision},{scale},256")),
         DataType::Date32 => Ok("tdD".to_string()),
         DataType::Date64 => Ok("tdm".to_string()),
         DataType::Time32(TimeUnit::Second) => Ok("tts".to_string()),
@@ -660,6 +674,9 @@ fn get_format_string(dtype: &DataType) -> Result<String, ArrowError> {
         DataType::Duration(TimeUnit::Millisecond) => Ok("tDm".to_string()),
         DataType::Duration(TimeUnit::Microsecond) => Ok("tDu".to_string()),
         DataType::Duration(TimeUnit::Nanosecond) => Ok("tDn".to_string()),
+        DataType::Interval(IntervalUnit::YearMonth) => Ok("tiM".to_string()),
+        DataType::Interval(IntervalUnit::DayTime) => Ok("tiD".to_string()),
+        DataType::Interval(IntervalUnit::MonthDayNano) => Ok("tin".to_string()),
         DataType::List(_) => Ok("+l".to_string()),
         DataType::LargeList(_) => Ok("+L".to_string()),
         DataType::Struct(_) => Ok("+s".to_string()),
@@ -715,8 +732,7 @@ impl TryFrom<&Schema> for FFI_ArrowSchema {
 
     fn try_from(schema: &Schema) -> Result<Self, ArrowError> {
         let dtype = DataType::Struct(schema.fields().clone());
-        let c_schema =
-            FFI_ArrowSchema::try_from(&dtype)?.with_metadata(&schema.metadata)?;
+        let c_schema = FFI_ArrowSchema::try_from(&dtype)?.with_metadata(&schema.metadata)?;
         Ok(c_schema)
     }
 }
@@ -833,7 +849,7 @@ mod tests {
 
         // Construct a map array from the above two
         let map_data_type =
-            DataType::Map(Arc::new(Field::new("entries", entry_struct, true)), true);
+            DataType::Map(Arc::new(Field::new("entries", entry_struct, false)), true);
 
         let arrow_schema = FFI_ArrowSchema::try_from(map_data_type).unwrap();
         assert!(arrow_schema.map_keys_sorted());
diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index 00deecf06283..574c024bb9b9 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -173,11 +173,7 @@ impl Field {
     /// - `name`: the name of the [`DataType::Struct`] field
     /// - `fields`: the description of each struct element
     /// - `nullable`: if the [`DataType::Struct`] array is nullable
-    pub fn new_struct(
-        name: impl Into<String>,
-        fields: impl Into<Fields>,
-        nullable: bool,
-    ) -> Self {
+    pub fn new_struct(name: impl Into<String>, fields: impl Into<Fields>, nullable: bool) -> Self {
         Self::new(name, DataType::Struct(fields.into()), nullable)
     }
 
@@ -186,11 +182,7 @@ impl Field {
     /// - `name`: the name of the [`DataType::List`] field
     /// - `value`: the description of each list element
     /// - `nullable`: if the [`DataType::List`] array is nullable
-    pub fn new_list(
-        name: impl Into<String>,
-        value: impl Into<FieldRef>,
-        nullable: bool,
-    ) -> Self {
+    pub fn new_list(name: impl Into<String>, value: impl Into<FieldRef>, nullable: bool) -> Self {
         Self::new(name, DataType::List(value.into()), nullable)
     }
 
@@ -344,9 +336,7 @@ impl Field {
     fn _fields(dt: &DataType) -> Vec<&Field> {
         match dt {
             DataType::Struct(fields) => fields.iter().flat_map(|f| f.fields()).collect(),
-            DataType::Union(fields, _) => {
-                fields.iter().flat_map(|(_, f)| f.fields()).collect()
-            }
+            DataType::Union(fields, _) => fields.iter().flat_map(|(_, f)| f.fields()).collect(),
             DataType::List(field)
             | DataType::LargeList(field)
             | DataType::FixedSizeList(field, _)
@@ -363,8 +353,7 @@ impl Field {
         self.fields()
             .into_iter()
             .filter(|&field| {
-                matches!(field.data_type(), DataType::Dictionary(_, _))
-                    && field.dict_id == id
+                matches!(field.data_type(), DataType::Dictionary(_, _)) && field.dict_id == id
             })
             .collect()
     }
@@ -461,7 +450,10 @@ impl Field {
                     ));
                 }
             },
-            DataType::Null
+            DataType::Null => {
+                self.nullable = true;
+                self.data_type = from.data_type.clone();
+            }
             | DataType::Boolean
             | DataType::Int8
             | DataType::Int16
@@ -494,7 +486,9 @@ impl Field {
             | DataType::LargeUtf8
             | DataType::Decimal128(_, _)
             | DataType::Decimal256(_, _) => {
-                if self.data_type != from.data_type {
+                if from.data_type == DataType::Null {
+                    self.nullable = true;
+                } else if self.data_type != from.data_type {
                     return Err(ArrowError::SchemaError(
                         format!("Fail to merge schema field '{}' because the from data_type = {} does not equal {}",
                             self.name, from.data_type, self.data_type)
@@ -580,6 +574,21 @@ mod test {
         assert_eq!("Schema error: Fail to merge schema field 'c1' because the from data_type = Float32 does not equal Int64", result);
     }
 
+    #[test]
+    fn test_merge_with_null() {
+        let mut field1 = Field::new("c1", DataType::Null, true);
+        field1
+            .try_merge(&Field::new("c1", DataType::Float32, false))
+            .expect("should widen type to nullable float");
+        assert_eq!(Field::new("c1", DataType::Float32, true), field1);
+
+        let mut field2 = Field::new("c2", DataType::Utf8, false);
+        field2
+            .try_merge(&Field::new("c2", DataType::Null, true))
+            .expect("should widen type to nullable utf8");
+        assert_eq!(Field::new("c2", DataType::Utf8, true), field2);
+    }
+
     #[test]
     fn test_fields_with_dict_id() {
         let dict1 = Field::new_dict(
@@ -837,8 +846,7 @@ mod test {
     #[cfg(feature = "serde")]
     #[test]
     fn test_field_with_empty_metadata_serde() {
-        let field =
-            Field::new("name", DataType::Boolean, false).with_metadata(HashMap::new());
+        let field = Field::new("name", DataType::Boolean, false).with_metadata(HashMap::new());
 
         assert_binary_serde_round_trip(field)
     }
diff --git a/arrow-schema/src/fields.rs b/arrow-schema/src/fields.rs
index 07e9abeee56a..70cb1968e9a4 100644
--- a/arrow-schema/src/fields.rs
+++ b/arrow-schema/src/fields.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::{ArrowError, Field, FieldRef};
+use crate::{ArrowError, Field, FieldRef, SchemaBuilder};
 use std::ops::Deref;
 use std::sync::Arc;
 
@@ -27,7 +27,7 @@ use std::sync::Arc;
 ///
 /// ```
 /// # use std::sync::Arc;
-/// # use arrow_schema::{DataType, Field, Fields};
+/// # use arrow_schema::{DataType, Field, Fields, SchemaBuilder};
 /// // Can be constructed from Vec<Field>
 /// Fields::from(vec![Field::new("a", DataType::Boolean, false)]);
 /// // Can be constructed from Vec<FieldRef>
@@ -38,6 +38,21 @@ use std::sync::Arc;
 /// std::iter::once(Arc::new(Field::new("a", DataType::Boolean, false))).collect::<Fields>();
 /// ```
 ///
+/// See [`SchemaBuilder`] for mutating or updating [`Fields`]
+///
+/// ```
+/// # use arrow_schema::{DataType, Field, SchemaBuilder};
+/// let mut builder = SchemaBuilder::new();
+/// builder.push(Field::new("a", DataType::Boolean, false));
+/// builder.push(Field::new("b", DataType::Boolean, false));
+/// let fields = builder.finish().fields;
+///
+/// let mut builder = SchemaBuilder::from(&fields);
+/// builder.remove(0);
+/// let new = builder.finish().fields;
+/// ```
+///
+/// [`SchemaBuilder`]: crate::SchemaBuilder
 #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[cfg_attr(feature = "serde", serde(transparent))]
@@ -83,6 +98,31 @@ impl Fields {
                 .zip(other.iter())
                 .all(|(a, b)| Arc::ptr_eq(a, b) || a.contains(b))
     }
+
+    /// Remove a field by index and return it.
+    ///
+    /// # Panic
+    ///
+    /// Panics if `index` is out of bounds.
+    ///
+    /// # Example
+    /// ```
+    /// use arrow_schema::{DataType, Field, Fields};
+    /// let mut fields = Fields::from(vec![
+    ///   Field::new("a", DataType::Boolean, false),
+    ///   Field::new("b", DataType::Int8, false),
+    ///   Field::new("c", DataType::Utf8, false),
+    /// ]);
+    /// assert_eq!(fields.len(), 3);
+    /// assert_eq!(fields.remove(1), Field::new("b", DataType::Int8, false).into());
+    /// assert_eq!(fields.len(), 2);
+    /// ```
+    pub fn remove(&mut self, index: usize) -> FieldRef {
+        let mut builder = SchemaBuilder::from(Fields::from(&*self.0));
+        let field = builder.remove(index);
+        *self = builder.finish().fields;
+        field
+    }
 }
 
 impl Default for Fields {
diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs
index a00e8a588757..711e4cb3314d 100644
--- a/arrow-schema/src/schema.rs
+++ b/arrow-schema/src/schema.rs
@@ -28,6 +28,7 @@ use crate::{FieldRef, Fields};
 #[derive(Debug, Default)]
 pub struct SchemaBuilder {
     fields: Vec<FieldRef>,
+    metadata: HashMap<String, String>,
 }
 
 impl SchemaBuilder {
@@ -40,6 +41,7 @@ impl SchemaBuilder {
     pub fn with_capacity(capacity: usize) -> Self {
         Self {
             fields: Vec::with_capacity(capacity),
+            metadata: Default::default(),
         }
     }
 
@@ -48,6 +50,48 @@ impl SchemaBuilder {
         self.fields.push(field.into())
     }
 
+    /// Removes and returns the [`FieldRef`] as index `idx`
+    ///
+    /// # Panics
+    ///
+    /// Panics if index out of bounds
+    pub fn remove(&mut self, idx: usize) -> FieldRef {
+        self.fields.remove(idx)
+    }
+
+    /// Returns an immutable reference to the [`FieldRef`] at index `idx`
+    ///
+    /// # Panics
+    ///
+    /// Panics if index out of bounds
+    pub fn field(&mut self, idx: usize) -> &FieldRef {
+        &mut self.fields[idx]
+    }
+
+    /// Returns a mutable reference to the [`FieldRef`] at index `idx`
+    ///
+    /// # Panics
+    ///
+    /// Panics if index out of bounds
+    pub fn field_mut(&mut self, idx: usize) -> &mut FieldRef {
+        &mut self.fields[idx]
+    }
+
+    /// Returns an immutable reference to the Map of custom metadata key-value pairs.
+    pub fn metadata(&mut self) -> &HashMap<String, String> {
+        &self.metadata
+    }
+
+    /// Returns a mutable reference to the Map of custom metadata key-value pairs.
+    pub fn metadata_mut(&mut self) -> &mut HashMap<String, String> {
+        &mut self.metadata
+    }
+
+    /// Reverse the fileds
+    pub fn reverse(&mut self) {
+        self.fields.reverse();
+    }
+
     /// Appends a [`FieldRef`] to this [`SchemaBuilder`] checking for collision
     ///
     /// If an existing field exists with the same name, calls [`Field::try_merge`]
@@ -71,7 +115,10 @@ impl SchemaBuilder {
 
     /// Consume this [`SchemaBuilder`] yielding the final [`Schema`]
     pub fn finish(self) -> Schema {
-        Schema::new(self.fields)
+        Schema {
+            fields: self.fields.into(),
+            metadata: self.metadata,
+        }
     }
 }
 
@@ -79,6 +126,7 @@ impl From<&Fields> for SchemaBuilder {
     fn from(value: &Fields) -> Self {
         Self {
             fields: value.to_vec(),
+            metadata: Default::default(),
         }
     }
 }
@@ -87,6 +135,16 @@ impl From<Fields> for SchemaBuilder {
     fn from(value: Fields) -> Self {
         Self {
             fields: value.to_vec(),
+            metadata: Default::default(),
+        }
+    }
+}
+
+impl From<Schema> for SchemaBuilder {
+    fn from(value: Schema) -> Self {
+        Self {
+            fields: value.fields.to_vec(),
+            metadata: value.metadata,
         }
     }
 }
@@ -168,10 +226,7 @@ impl Schema {
     /// let schema = Schema::new_with_metadata(vec![field_a, field_b], metadata);
     /// ```
     #[inline]
-    pub fn new_with_metadata(
-        fields: impl Into<Fields>,
-        metadata: HashMap<String, String>,
-    ) -> Self {
+    pub fn new_with_metadata(fields: impl Into<Fields>, metadata: HashMap<String, String>) -> Self {
         Self {
             fields: fields.into(),
             metadata,
@@ -230,9 +285,7 @@ impl Schema {
     ///     ]),
     /// );
     /// ```
-    pub fn try_merge(
-        schemas: impl IntoIterator<Item = Self>,
-    ) -> Result<Self, ArrowError> {
+    pub fn try_merge(schemas: impl IntoIterator<Item = Self>) -> Result<Self, ArrowError> {
         let mut out_meta = HashMap::new();
         let mut out_fields = SchemaBuilder::new();
         for schema in schemas {
@@ -323,9 +376,34 @@ impl Schema {
     pub fn contains(&self, other: &Schema) -> bool {
         // make sure self.metadata is a superset of other.metadata
         self.fields.contains(&other.fields)
-            && other.metadata.iter().all(|(k, v1)| {
-                self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default()
-            })
+            && other
+                .metadata
+                .iter()
+                .all(|(k, v1)| self.metadata.get(k).map(|v2| v1 == v2).unwrap_or_default())
+    }
+
+    /// Remove field by index and return it. Recommend to use [`SchemaBuilder`]
+    /// if you are looking to remove multiple columns, as this will save allocations.
+    ///
+    /// # Panic
+    ///
+    /// Panics if `index` is out of bounds.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use arrow_schema::{DataType, Field, Schema};
+    /// let mut schema = Schema::new(vec![
+    ///   Field::new("a", DataType::Boolean, false),
+    ///   Field::new("b", DataType::Int8, false),
+    ///   Field::new("c", DataType::Utf8, false),
+    /// ]);
+    /// assert_eq!(schema.fields.len(), 3);
+    /// assert_eq!(schema.remove(1), Field::new("b", DataType::Int8, false).into());
+    /// assert_eq!(schema.fields.len(), 2);
+    /// ```
+    pub fn remove(&mut self, index: usize) -> FieldRef {
+        self.fields.remove(index)
     }
 }
 
@@ -381,8 +459,8 @@ mod tests {
         assert_eq!(schema, de_schema);
 
         // ser/de with non-empty metadata
-        let schema = schema
-            .with_metadata([("key".to_owned(), "val".to_owned())].into_iter().collect());
+        let schema =
+            schema.with_metadata([("key".to_owned(), "val".to_owned())].into_iter().collect());
         let json = serde_json::to_string(&schema).unwrap();
         let de_schema = serde_json::from_str(&json).unwrap();
 
@@ -636,18 +714,14 @@ mod tests {
             .collect();
         let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
 
-        assert!(
-            Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])])
-                .is_err()
-        );
+        assert!(Schema::try_merge(vec![Schema::new(vec![f1]), Schema::new(vec![f2])]).is_err());
 
         // 2. None + Some
         let mut f1 = Field::new("first_name", DataType::Utf8, false);
-        let metadata2: HashMap<String, String> =
-            [("missing".to_string(), "value".to_string())]
-                .iter()
-                .cloned()
-                .collect();
+        let metadata2: HashMap<String, String> = [("missing".to_string(), "value".to_string())]
+            .iter()
+            .cloned()
+            .collect();
         let f2 = Field::new("first_name", DataType::Utf8, false).with_metadata(metadata2);
 
         assert!(f1.try_merge(&f2).is_ok());
@@ -714,9 +788,7 @@ mod tests {
                 Field::new("last_name", DataType::Utf8, false),
                 Field::new(
                     "address",
-                    DataType::Struct(
-                        vec![Field::new("zip", DataType::UInt16, false)].into(),
-                    ),
+                    DataType::Struct(vec![Field::new("zip", DataType::UInt16, false)].into()),
                     false,
                 ),
             ]),
@@ -838,4 +910,48 @@ mod tests {
             "Could not find expected string '{expected}' in '{res}'"
         );
     }
+
+    #[test]
+    fn test_schema_builder_change_field() {
+        let mut builder = SchemaBuilder::new();
+        builder.push(Field::new("a", DataType::Int32, false));
+        builder.push(Field::new("b", DataType::Utf8, false));
+        *builder.field_mut(1) = Arc::new(Field::new("c", DataType::Int32, false));
+        assert_eq!(
+            builder.fields,
+            vec![
+                Arc::new(Field::new("a", DataType::Int32, false)),
+                Arc::new(Field::new("c", DataType::Int32, false))
+            ]
+        );
+    }
+
+    #[test]
+    fn test_schema_builder_reverse() {
+        let mut builder = SchemaBuilder::new();
+        builder.push(Field::new("a", DataType::Int32, false));
+        builder.push(Field::new("b", DataType::Utf8, true));
+        builder.reverse();
+        assert_eq!(
+            builder.fields,
+            vec![
+                Arc::new(Field::new("b", DataType::Utf8, true)),
+                Arc::new(Field::new("a", DataType::Int32, false))
+            ]
+        );
+    }
+
+    #[test]
+    fn test_schema_builder_metadata() {
+        let mut metadata = HashMap::with_capacity(1);
+        metadata.insert("key".to_string(), "value".to_string());
+
+        let fields = vec![Field::new("test", DataType::Int8, true)];
+        let mut builder: SchemaBuilder = Schema::new(fields).with_metadata(metadata).into();
+        builder.metadata_mut().insert("k".into(), "v".into());
+        let out = builder.finish();
+        assert_eq!(out.metadata.len(), 2);
+        assert_eq!(out.metadata["k"], "v");
+        assert_eq!(out.metadata["key"], "value");
+    }
 }
diff --git a/arrow-select/Cargo.toml b/arrow-select/Cargo.toml
index ff8a212c7b52..023788799c94 100644
--- a/arrow-select/Cargo.toml
+++ b/arrow-select/Cargo.toml
@@ -39,6 +39,7 @@ arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
 arrow-array = { workspace = true }
 num = { version = "0.4", default-features = false, features = ["std"] }
+ahash = { version = "0.8", default-features = false}
 
 [features]
 default = []
diff --git a/arrow-select/src/concat.rs b/arrow-select/src/concat.rs
index eed20699c239..04e3ab2f7424 100644
--- a/arrow-select/src/concat.rs
+++ b/arrow-select/src/concat.rs
@@ -30,20 +30,20 @@
 //! assert_eq!(arr.len(), 3);
 //! ```
 
+use crate::dictionary::{merge_dictionary_values, should_merge_dictionary_values};
+use arrow_array::cast::AsArray;
 use arrow_array::types::*;
 use arrow_array::*;
-use arrow_buffer::ArrowNativeType;
+use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder, NullBuffer};
 use arrow_data::transform::{Capacities, MutableArrayData};
 use arrow_schema::{ArrowError, DataType, SchemaRef};
+use std::sync::Arc;
 
 fn binary_capacity<T: ByteArrayType>(arrays: &[&dyn Array]) -> Capacities {
     let mut item_capacity = 0;
     let mut bytes_capacity = 0;
     for array in arrays {
-        let a = array
-            .as_any()
-            .downcast_ref::<GenericByteArray<T>>()
-            .unwrap();
+        let a = array.as_bytes::<T>();
 
         // Guaranteed to always have at least one element
         let offsets = a.value_offsets();
@@ -54,6 +54,59 @@ fn binary_capacity<T: ByteArrayType>(arrays: &[&dyn Array]) -> Capacities {
     Capacities::Binary(item_capacity, Some(bytes_capacity))
 }
 
+fn concat_dictionaries<K: ArrowDictionaryKeyType>(
+    arrays: &[&dyn Array],
+) -> Result<ArrayRef, ArrowError> {
+    let mut output_len = 0;
+    let dictionaries: Vec<_> = arrays
+        .iter()
+        .map(|x| x.as_dictionary::<K>())
+        .inspect(|d| output_len += d.len())
+        .collect();
+
+    if !should_merge_dictionary_values::<K>(&dictionaries, output_len) {
+        return concat_fallback(arrays, Capacities::Array(output_len));
+    }
+
+    let merged = merge_dictionary_values(&dictionaries, None)?;
+
+    // Recompute keys
+    let mut key_values = Vec::with_capacity(output_len);
+
+    let mut has_nulls = false;
+    for (d, mapping) in dictionaries.iter().zip(merged.key_mappings) {
+        has_nulls |= d.null_count() != 0;
+        for key in d.keys().values() {
+            // Use get to safely handle nulls
+            key_values.push(mapping.get(key.as_usize()).copied().unwrap_or_default())
+        }
+    }
+
+    let nulls = has_nulls.then(|| {
+        let mut nulls = BooleanBufferBuilder::new(output_len);
+        for d in &dictionaries {
+            match d.nulls() {
+                Some(n) => nulls.append_buffer(n.inner()),
+                None => nulls.append_n(d.len(), true),
+            }
+        }
+        NullBuffer::new(nulls.finish())
+    });
+
+    let keys = PrimitiveArray::<K>::new(key_values.into(), nulls);
+    // Sanity check
+    assert_eq!(keys.len(), output_len);
+
+    let array = unsafe { DictionaryArray::new_unchecked(keys, merged.values) };
+    Ok(Arc::new(array))
+}
+
+macro_rules! dict_helper {
+    ($t:ty, $arrays:expr) => {
+        return Ok(Arc::new(concat_dictionaries::<$t>($arrays)?) as _)
+    };
+}
+
 /// Concatenate multiple [Array] of the same type into a single [ArrayRef].
 pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
     if arrays.is_empty() {
@@ -68,8 +121,7 @@ pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
     let d = arrays[0].data_type();
     if arrays.iter().skip(1).any(|array| array.data_type() != d) {
         return Err(ArrowError::InvalidArgumentError(
-            "It is not possible to concatenate arrays of different data types."
-                .to_string(),
+            "It is not possible to concatenate arrays of different data types.".to_string(),
         ));
     }
 
@@ -78,9 +130,20 @@ pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
         DataType::LargeUtf8 => binary_capacity::<LargeUtf8Type>(arrays),
         DataType::Binary => binary_capacity::<BinaryType>(arrays),
         DataType::LargeBinary => binary_capacity::<LargeBinaryType>(arrays),
+        DataType::Dictionary(k, _) => downcast_integer! {
+            k.as_ref() => (dict_helper, arrays),
+            _ => unreachable!("illegal dictionary key type {k}")
+        },
         _ => Capacities::Array(arrays.iter().map(|a| a.len()).sum()),
     };
 
+    concat_fallback(arrays, capacity)
+}
+
+/// Concatenates arrays using MutableArrayData
+///
+/// This will naively concatenate dictionaries
+fn concat_fallback(arrays: &[&dyn Array], capacity: Capacities) -> Result<ArrayRef, ArrowError> {
     let array_data: Vec<_> = arrays.iter().map(|a| a.to_data()).collect::<Vec<_>>();
     let array_data = array_data.iter().collect();
     let mut mutable = MutableArrayData::with_capacities(array_data, false, capacity);
@@ -92,7 +155,12 @@ pub fn concat(arrays: &[&dyn Array]) -> Result<ArrayRef, ArrowError> {
     Ok(make_array(mutable.freeze()))
 }
 
-/// Concatenates `batches` together into a single record batch.
+/// Concatenates `batches` together into a single [`RecordBatch`].
+///
+/// The output batch has the specified `schemas`; The schema of the
+/// input are ignored.
+///
+/// Returns an error if the types of underlying arrays are different.
 pub fn concat_batches<'a>(
     schema: &SchemaRef,
     input_batches: impl IntoIterator<Item = &'a RecordBatch>,
@@ -109,20 +177,6 @@ pub fn concat_batches<'a>(
     if batches.is_empty() {
         return Ok(RecordBatch::new_empty(schema.clone()));
     }
-    if let Some((i, _)) = batches
-        .iter()
-        .enumerate()
-        .find(|&(_, batch)| batch.schema() != *schema)
-    {
-        return Err(ArrowError::InvalidArgumentError(format!(
-            "batches[{i}] schema is different with argument schema.
-            batches[{i}] schema: {:?},
-            argument schema: {:?}
-            ",
-            batches[i].schema(),
-            *schema
-        )));
-    }
     let field_num = schema.fields().len();
     let mut arrays = Vec::with_capacity(field_num);
     for i in 0..field_num {
@@ -140,6 +194,7 @@ pub fn concat_batches<'a>(
 #[cfg(test)]
 mod tests {
     use super::*;
+    use arrow_array::builder::StringDictionaryBuilder;
     use arrow_array::cast::AsArray;
     use arrow_schema::{Field, Schema};
     use std::sync::Arc;
@@ -157,8 +212,7 @@ mod tests {
 
         let mut options = RecordBatchOptions::default();
         options.row_count = Some(100);
-        let batch =
-            RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap();
+        let batch = RecordBatch::try_new_with_options(schema.clone(), vec![], &options).unwrap();
         // put in 2 batches of 100 rows each
         let re = concat_batches(&schema, &[batch.clone(), batch]).unwrap();
 
@@ -215,19 +269,8 @@ mod tests {
     #[test]
     fn test_concat_primitive_arrays() {
         let arr = concat(&[
-            &PrimitiveArray::<Int64Type>::from(vec![
-                Some(-1),
-                Some(-1),
-                Some(2),
-                None,
-                None,
-            ]),
-            &PrimitiveArray::<Int64Type>::from(vec![
-                Some(101),
-                Some(102),
-                Some(103),
-                None,
-            ]),
+            &PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(-1), Some(2), None, None]),
+            &PrimitiveArray::<Int64Type>::from(vec![Some(101), Some(102), Some(103), None]),
             &PrimitiveArray::<Int64Type>::from(vec![Some(256), Some(512), Some(1024)]),
         ])
         .unwrap();
@@ -252,22 +295,13 @@ mod tests {
 
     #[test]
     fn test_concat_primitive_array_slices() {
-        let input_1 = PrimitiveArray::<Int64Type>::from(vec![
-            Some(-1),
-            Some(-1),
-            Some(2),
-            None,
-            None,
-        ])
-        .slice(1, 3);
+        let input_1 =
+            PrimitiveArray::<Int64Type>::from(vec![Some(-1), Some(-1), Some(2), None, None])
+                .slice(1, 3);
 
-        let input_2 = PrimitiveArray::<Int64Type>::from(vec![
-            Some(101),
-            Some(102),
-            Some(103),
-            None,
-        ])
-        .slice(1, 3);
+        let input_2 =
+            PrimitiveArray::<Int64Type>::from(vec![Some(101), Some(102), Some(103), None])
+                .slice(1, 3);
         let arr = concat(&[&input_1, &input_2]).unwrap();
 
         let expected_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
@@ -321,20 +355,17 @@ mod tests {
             None,
             Some(vec![Some(10)]),
         ];
-        let list1_array =
-            ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
+        let list1_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list1.clone());
 
         let list2 = vec![
             None,
             Some(vec![Some(100), None, Some(101)]),
             Some(vec![Some(102)]),
         ];
-        let list2_array =
-            ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
+        let list2_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list2.clone());
 
         let list3 = vec![Some(vec![Some(1000), Some(1001)])];
-        let list3_array =
-            ListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone());
+        let list3_array = ListArray::from_iter_primitive::<Int64Type, _, _>(list3.clone());
 
         let array_result = concat(&[&list1_array, &list2_array, &list3_array]).unwrap();
 
@@ -347,31 +378,28 @@ mod tests {
     #[test]
     fn test_concat_struct_arrays() {
         let field = Arc::new(Field::new("field", DataType::Int64, true));
-        let input_primitive_1: ArrayRef =
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(-1),
-                Some(-1),
-                Some(2),
-                None,
-                None,
-            ]));
+        let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
+            Some(-1),
+            Some(-1),
+            Some(2),
+            None,
+            None,
+        ]));
         let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);
 
-        let input_primitive_2: ArrayRef =
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(101),
-                Some(102),
-                Some(103),
-                None,
-            ]));
+        let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
+            Some(101),
+            Some(102),
+            Some(103),
+            None,
+        ]));
         let input_struct_2 = StructArray::from(vec![(field.clone(), input_primitive_2)]);
 
-        let input_primitive_3: ArrayRef =
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(256),
-                Some(512),
-                Some(1024),
-            ]));
+        let input_primitive_3: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
+            Some(256),
+            Some(512),
+            Some(1024),
+        ]));
         let input_struct_3 = StructArray::from(vec![(field, input_primitive_3)]);
 
         let arr = concat(&[&input_struct_1, &input_struct_2, &input_struct_3]).unwrap();
@@ -402,27 +430,24 @@ mod tests {
     #[test]
     fn test_concat_struct_array_slices() {
         let field = Arc::new(Field::new("field", DataType::Int64, true));
-        let input_primitive_1: ArrayRef =
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(-1),
-                Some(-1),
-                Some(2),
-                None,
-                None,
-            ]));
+        let input_primitive_1: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
+            Some(-1),
+            Some(-1),
+            Some(2),
+            None,
+            None,
+        ]));
         let input_struct_1 = StructArray::from(vec![(field.clone(), input_primitive_1)]);
 
-        let input_primitive_2: ArrayRef =
-            Arc::new(PrimitiveArray::<Int64Type>::from(vec![
-                Some(101),
-                Some(102),
-                Some(103),
-                None,
-            ]));
+        let input_primitive_2: ArrayRef = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
+            Some(101),
+            Some(102),
+            Some(103),
+            None,
+        ]));
         let input_struct_2 = StructArray::from(vec![(field, input_primitive_2)]);
 
-        let arr =
-            concat(&[&input_struct_1.slice(1, 3), &input_struct_2.slice(1, 2)]).unwrap();
+        let arr = concat(&[&input_struct_1.slice(1, 3), &input_struct_2.slice(1, 2)]).unwrap();
 
         let expected_primitive_output = Arc::new(PrimitiveArray::<Int64Type>::from(vec![
             Some(-1),
@@ -467,72 +492,84 @@ mod tests {
         assert_eq!(actual_output, &expected_output);
     }
 
-    fn collect_string_dictionary(
-        dictionary: &DictionaryArray<Int32Type>,
-    ) -> Vec<Option<String>> {
-        let values = dictionary.values();
-        let values = values.as_any().downcast_ref::<StringArray>().unwrap();
-
-        dictionary
-            .keys()
-            .iter()
-            .map(|key| key.map(|key| values.value(key as _).to_string()))
-            .collect()
-    }
-
-    fn concat_dictionary(
-        input_1: DictionaryArray<Int32Type>,
-        input_2: DictionaryArray<Int32Type>,
-    ) -> Vec<Option<String>> {
-        let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
-        let concat = concat
-            .as_any()
-            .downcast_ref::<DictionaryArray<Int32Type>>()
-            .unwrap();
-
-        collect_string_dictionary(concat)
+    fn collect_string_dictionary(array: &DictionaryArray<Int32Type>) -> Vec<Option<&str>> {
+        let concrete = array.downcast_dict::<StringArray>().unwrap();
+        concrete.into_iter().collect()
     }
 
     #[test]
     fn test_string_dictionary_array() {
-        let input_1: DictionaryArray<Int32Type> =
-            vec!["hello", "A", "B", "hello", "hello", "C"]
-                .into_iter()
-                .collect();
-        let input_2: DictionaryArray<Int32Type> =
-            vec!["hello", "E", "E", "hello", "F", "E"]
-                .into_iter()
-                .collect();
+        let input_1: DictionaryArray<Int32Type> = vec!["hello", "A", "B", "hello", "hello", "C"]
+            .into_iter()
+            .collect();
+        let input_2: DictionaryArray<Int32Type> = vec!["hello", "E", "E", "hello", "F", "E"]
+            .into_iter()
+            .collect();
 
         let expected: Vec<_> = vec![
-            "hello", "A", "B", "hello", "hello", "C", "hello", "E", "E", "hello", "F",
-            "E",
+            "hello", "A", "B", "hello", "hello", "C", "hello", "E", "E", "hello", "F", "E",
         ]
         .into_iter()
-        .map(|x| Some(x.to_string()))
+        .map(Some)
         .collect();
 
-        let concat = concat_dictionary(input_1, input_2);
-        assert_eq!(concat, expected);
+        let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
+        let dictionary = concat.as_dictionary::<Int32Type>();
+        let actual = collect_string_dictionary(dictionary);
+        assert_eq!(actual, expected);
+
+        // Should have concatenated inputs together
+        assert_eq!(
+            dictionary.values().len(),
+            input_1.values().len() + input_2.values().len(),
+        )
     }
 
     #[test]
     fn test_string_dictionary_array_nulls() {
-        let input_1: DictionaryArray<Int32Type> =
-            vec![Some("foo"), Some("bar"), None, Some("fiz")]
-                .into_iter()
-                .collect();
+        let input_1: DictionaryArray<Int32Type> = vec![Some("foo"), Some("bar"), None, Some("fiz")]
+            .into_iter()
+            .collect();
         let input_2: DictionaryArray<Int32Type> = vec![None].into_iter().collect();
-        let expected = vec![
-            Some("foo".to_string()),
-            Some("bar".to_string()),
-            None,
-            Some("fiz".to_string()),
-            None,
-        ];
+        let expected = vec![Some("foo"), Some("bar"), None, Some("fiz"), None];
+
+        let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
+        let dictionary = concat.as_dictionary::<Int32Type>();
+        let actual = collect_string_dictionary(dictionary);
+        assert_eq!(actual, expected);
+
+        // Should have concatenated inputs together
+        assert_eq!(
+            dictionary.values().len(),
+            input_1.values().len() + input_2.values().len(),
+        )
+    }
+
+    #[test]
+    fn test_string_dictionary_merge() {
+        let mut builder = StringDictionaryBuilder::<Int32Type>::new();
+        for i in 0..20 {
+            builder.append(&i.to_string()).unwrap();
+        }
+        let input_1 = builder.finish();
+
+        let mut builder = StringDictionaryBuilder::<Int32Type>::new();
+        for i in 0..30 {
+            builder.append(&i.to_string()).unwrap();
+        }
+        let input_2 = builder.finish();
+
+        let expected: Vec<_> = (0..20).chain(0..30).map(|x| x.to_string()).collect();
+        let expected: Vec<_> = expected.iter().map(|x| Some(x.as_str())).collect();
 
-        let concat = concat_dictionary(input_1, input_2);
-        assert_eq!(concat, expected);
+        let concat = concat(&[&input_1 as _, &input_2 as _]).unwrap();
+        let dictionary = concat.as_dictionary::<Int32Type>();
+        let actual = collect_string_dictionary(dictionary);
+        assert_eq!(actual, expected);
+
+        // Should have merged inputs together
+        // Not 30 as this is done on a best-effort basis
+        assert_eq!(dictionary.values().len(), 33)
     }
 
     #[test]
@@ -554,9 +591,8 @@ mod tests {
 
     #[test]
     fn test_dictionary_concat_reuse() {
-        let array: DictionaryArray<Int8Type> =
-            vec!["a", "a", "b", "c"].into_iter().collect();
-        let copy: DictionaryArray<Int8Type> = array.to_data().into();
+        let array: DictionaryArray<Int8Type> = vec!["a", "a", "b", "c"].into_iter().collect();
+        let copy: DictionaryArray<Int8Type> = array.clone();
 
         // dictionary is "a", "b", "c"
         assert_eq!(
@@ -567,11 +603,7 @@ mod tests {
 
         // concatenate it with itself
         let combined = concat(&[&copy as _, &array as _]).unwrap();
-
-        let combined = combined
-            .as_any()
-            .downcast_ref::<DictionaryArray<Int8Type>>()
-            .unwrap();
+        let combined = combined.as_dictionary::<Int8Type>();
 
         assert_eq!(
             combined.values(),
@@ -645,36 +677,41 @@ mod tests {
     }
 
     #[test]
-    fn concat_record_batches_of_different_schemas() {
-        let schema1 = Arc::new(Schema::new(vec![
-            Field::new("a", DataType::Int32, false),
-            Field::new("b", DataType::Utf8, false),
-        ]));
-        let schema2 = Arc::new(Schema::new(vec![
-            Field::new("c", DataType::Int32, false),
-            Field::new("d", DataType::Utf8, false),
-        ]));
+    fn concat_record_batches_of_different_schemas_but_compatible_data() {
+        let schema1 = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
+        // column names differ
+        let schema2 = Arc::new(Schema::new(vec![Field::new("c", DataType::Int32, false)]));
         let batch1 = RecordBatch::try_new(
             schema1.clone(),
-            vec![
-                Arc::new(Int32Array::from(vec![1, 2])),
-                Arc::new(StringArray::from(vec!["a", "b"])),
-            ],
+            vec![Arc::new(Int32Array::from(vec![1, 2]))],
+        )
+        .unwrap();
+        let batch2 =
+            RecordBatch::try_new(schema2, vec![Arc::new(Int32Array::from(vec![3, 4]))]).unwrap();
+        // concat_batches simply uses the schema provided
+        let batch = concat_batches(&schema1, [&batch1, &batch2]).unwrap();
+        assert_eq!(batch.schema().as_ref(), schema1.as_ref());
+        assert_eq!(4, batch.num_rows());
+    }
+
+    #[test]
+    fn concat_record_batches_of_different_schemas_incompatible_data() {
+        let schema1 = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
+        // column names differ
+        let schema2 = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, false)]));
+        let batch1 = RecordBatch::try_new(
+            schema1.clone(),
+            vec![Arc::new(Int32Array::from(vec![1, 2]))],
         )
         .unwrap();
         let batch2 = RecordBatch::try_new(
             schema2,
-            vec![
-                Arc::new(Int32Array::from(vec![3, 4])),
-                Arc::new(StringArray::from(vec!["c", "d"])),
-            ],
+            vec![Arc::new(StringArray::from(vec!["foo", "bar"]))],
         )
         .unwrap();
+
         let error = concat_batches(&schema1, [&batch1, &batch2]).unwrap_err();
-        assert_eq!(
-            error.to_string(),
-            "Invalid argument error: batches[1] schema is different with argument schema.\n            batches[1] schema: Schema { fields: [Field { name: \"c\", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"d\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} },\n            argument schema: Schema { fields: [Field { name: \"a\", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: \"b\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }\n            "
-        );
+        assert_eq!(error.to_string(), "Invalid argument error: It is not possible to concatenate arrays of different data types.");
     }
 
     #[test]
@@ -715,8 +752,7 @@ mod tests {
         assert_eq!(data.buffers()[1].capacity(), 192); // Nearest multiple of 64
 
         let a = LargeBinaryArray::from_iter_values(std::iter::repeat(b"foo").take(100));
-        let b =
-            LargeBinaryArray::from_iter_values(std::iter::repeat(b"cupcakes").take(10));
+        let b = LargeBinaryArray::from_iter_values(std::iter::repeat(b"cupcakes").take(10));
 
         let a = concat(&[&a, &b]).unwrap();
         let data = a.to_data();
@@ -738,4 +774,16 @@ mod tests {
         assert_eq!(data.buffers()[1].len(), 200);
         assert_eq!(data.buffers()[1].capacity(), 256); // Nearest multiple of 64
     }
+
+    #[test]
+    fn concat_sparse_nulls() {
+        let values = StringArray::from_iter_values((0..100).map(|x| x.to_string()));
+        let keys = Int32Array::from(vec![1; 10]);
+        let dict_a = DictionaryArray::new(keys, Arc::new(values));
+        let values = StringArray::new_null(0);
+        let keys = Int32Array::new_null(10);
+        let dict_b = DictionaryArray::new(keys, Arc::new(values));
+        let array = concat(&[&dict_a, &dict_b]).unwrap();
+        assert_eq!(array.null_count(), 10);
+    }
 }
diff --git a/arrow-select/src/dictionary.rs b/arrow-select/src/dictionary.rs
new file mode 100644
index 000000000000..d0b6fcfc3ac9
--- /dev/null
+++ b/arrow-select/src/dictionary.rs
@@ -0,0 +1,331 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::interleave::interleave;
+use ahash::RandomState;
+use arrow_array::builder::BooleanBufferBuilder;
+use arrow_array::cast::AsArray;
+use arrow_array::types::{
+    ArrowDictionaryKeyType, BinaryType, ByteArrayType, LargeBinaryType, LargeUtf8Type, Utf8Type,
+};
+use arrow_array::{Array, ArrayRef, DictionaryArray, GenericByteArray};
+use arrow_buffer::{ArrowNativeType, BooleanBuffer, ScalarBuffer};
+use arrow_schema::{ArrowError, DataType};
+
+/// A best effort interner that maintains a fixed number of buckets
+/// and interns keys based on their hash value
+///
+/// Hash collisions will result in replacement
+struct Interner<'a, V> {
+    state: RandomState,
+    buckets: Vec<Option<(&'a [u8], V)>>,
+    shift: u32,
+}
+
+impl<'a, V> Interner<'a, V> {
+    /// Capacity controls the number of unique buckets allocated within the Interner
+    ///
+    /// A larger capacity reduces the probability of hash collisions, and should be set
+    /// based on an approximation of the upper bound of unique values
+    fn new(capacity: usize) -> Self {
+        // Add additional buckets to help reduce collisions
+        let shift = (capacity as u64 + 128).leading_zeros();
+        let num_buckets = (u64::MAX >> shift) as usize;
+        let buckets = (0..num_buckets.saturating_add(1)).map(|_| None).collect();
+        Self {
+            // A fixed seed to ensure deterministic behaviour
+            state: RandomState::with_seeds(0, 0, 0, 0),
+            buckets,
+            shift,
+        }
+    }
+
+    fn intern<F: FnOnce() -> Result<V, E>, E>(&mut self, new: &'a [u8], f: F) -> Result<&V, E> {
+        let hash = self.state.hash_one(new);
+        let bucket_idx = hash >> self.shift;
+        Ok(match &mut self.buckets[bucket_idx as usize] {
+            Some((current, v)) => {
+                if *current != new {
+                    *v = f()?;
+                    *current = new;
+                }
+                v
+            }
+            slot => &slot.insert((new, f()?)).1,
+        })
+    }
+}
+
+pub struct MergedDictionaries<K: ArrowDictionaryKeyType> {
+    /// Provides `key_mappings[`array_idx`][`old_key`] -> new_key`
+    pub key_mappings: Vec<Vec<K::Native>>,
+    /// The new values
+    pub values: ArrayRef,
+}
+
+/// Performs a cheap, pointer-based comparison of two byte array
+///
+/// See [`ScalarBuffer::ptr_eq`]
+fn bytes_ptr_eq<T: ByteArrayType>(a: &dyn Array, b: &dyn Array) -> bool {
+    match (a.as_bytes_opt::<T>(), b.as_bytes_opt::<T>()) {
+        (Some(a), Some(b)) => {
+            let values_eq = a.values().ptr_eq(b.values()) && a.offsets().ptr_eq(b.offsets());
+            match (a.nulls(), b.nulls()) {
+                (Some(a), Some(b)) => values_eq && a.inner().ptr_eq(b.inner()),
+                (None, None) => values_eq,
+                _ => false,
+            }
+        }
+        _ => false,
+    }
+}
+
+/// A type-erased function that compares two array for pointer equality
+type PtrEq = dyn Fn(&dyn Array, &dyn Array) -> bool;
+
+/// A weak heuristic of whether to merge dictionary values that aims to only
+/// perform the expensive merge computation when it is likely to yield at least
+/// some return over the naive approach used by MutableArrayData
+///
+/// `len` is the total length of the merged output
+pub fn should_merge_dictionary_values<K: ArrowDictionaryKeyType>(
+    dictionaries: &[&DictionaryArray<K>],
+    len: usize,
+) -> bool {
+    use DataType::*;
+    let first_values = dictionaries[0].values().as_ref();
+    let ptr_eq: Box<PtrEq> = match first_values.data_type() {
+        Utf8 => Box::new(bytes_ptr_eq::<Utf8Type>),
+        LargeUtf8 => Box::new(bytes_ptr_eq::<LargeUtf8Type>),
+        Binary => Box::new(bytes_ptr_eq::<BinaryType>),
+        LargeBinary => Box::new(bytes_ptr_eq::<LargeBinaryType>),
+        _ => return false,
+    };
+
+    let mut single_dictionary = true;
+    let mut total_values = first_values.len();
+    for dict in dictionaries.iter().skip(1) {
+        let values = dict.values().as_ref();
+        total_values += values.len();
+        if single_dictionary {
+            single_dictionary = ptr_eq(first_values, values)
+        }
+    }
+
+    let overflow = K::Native::from_usize(total_values).is_none();
+    let values_exceed_length = total_values >= len;
+
+    !single_dictionary && (overflow || values_exceed_length)
+}
+
+/// Given an array of dictionaries and an optional key mask compute a values array
+/// containing referenced values, along with mappings from the [`DictionaryArray`]
+/// keys to the new keys within this values array. Best-effort will be made to ensure
+/// that the dictionary values are unique
+///
+/// This method is meant to be very fast and the output dictionary values
+/// may not be unique, unlike `GenericByteDictionaryBuilder` which is slower
+/// but produces unique values
+pub fn merge_dictionary_values<K: ArrowDictionaryKeyType>(
+    dictionaries: &[&DictionaryArray<K>],
+    masks: Option<&[BooleanBuffer]>,
+) -> Result<MergedDictionaries<K>, ArrowError> {
+    let mut num_values = 0;
+
+    let mut values_arrays = Vec::with_capacity(dictionaries.len());
+    let mut value_slices = Vec::with_capacity(dictionaries.len());
+
+    for (idx, dictionary) in dictionaries.iter().enumerate() {
+        let mask = masks.and_then(|m| m.get(idx));
+        let key_mask = match (dictionary.logical_nulls(), mask) {
+            (Some(n), None) => Some(n.into_inner()),
+            (None, Some(n)) => Some(n.clone()),
+            (Some(n), Some(m)) => Some(n.inner() & m),
+            (None, None) => None,
+        };
+        let keys = dictionary.keys().values();
+        let values = dictionary.values().as_ref();
+        let values_mask = compute_values_mask(keys, key_mask.as_ref(), values.len());
+
+        let masked_values = get_masked_values(values, &values_mask);
+        num_values += masked_values.len();
+        value_slices.push(masked_values);
+        values_arrays.push(values)
+    }
+
+    // Map from value to new index
+    let mut interner = Interner::new(num_values);
+    // Interleave indices for new values array
+    let mut indices = Vec::with_capacity(num_values);
+
+    // Compute the mapping for each dictionary
+    let key_mappings = dictionaries
+        .iter()
+        .enumerate()
+        .zip(value_slices)
+        .map(|((dictionary_idx, dictionary), values)| {
+            let zero = K::Native::from_usize(0).unwrap();
+            let mut mapping = vec![zero; dictionary.values().len()];
+
+            for (value_idx, value) in values {
+                mapping[value_idx] =
+                    *interner.intern(value, || match K::Native::from_usize(indices.len()) {
+                        Some(idx) => {
+                            indices.push((dictionary_idx, value_idx));
+                            Ok(idx)
+                        }
+                        None => Err(ArrowError::DictionaryKeyOverflowError),
+                    })?;
+            }
+            Ok(mapping)
+        })
+        .collect::<Result<Vec<_>, ArrowError>>()?;
+
+    Ok(MergedDictionaries {
+        key_mappings,
+        values: interleave(&values_arrays, &indices)?,
+    })
+}
+
+/// Return a mask identifying the values that are referenced by keys in `dictionary`
+/// at the positions indicated by `selection`
+fn compute_values_mask<K: ArrowNativeType>(
+    keys: &ScalarBuffer<K>,
+    mask: Option<&BooleanBuffer>,
+    max_key: usize,
+) -> BooleanBuffer {
+    let mut builder = BooleanBufferBuilder::new(max_key);
+    builder.advance(max_key);
+
+    match mask {
+        Some(n) => n
+            .set_indices()
+            .for_each(|idx| builder.set_bit(keys[idx].as_usize(), true)),
+        None => keys
+            .iter()
+            .for_each(|k| builder.set_bit(k.as_usize(), true)),
+    }
+    builder.finish()
+}
+
+/// Return a Vec containing for each set index in `mask`, the index and byte value of that index
+fn get_masked_values<'a>(array: &'a dyn Array, mask: &BooleanBuffer) -> Vec<(usize, &'a [u8])> {
+    match array.data_type() {
+        DataType::Utf8 => masked_bytes(array.as_string::<i32>(), mask),
+        DataType::LargeUtf8 => masked_bytes(array.as_string::<i64>(), mask),
+        DataType::Binary => masked_bytes(array.as_binary::<i32>(), mask),
+        DataType::LargeBinary => masked_bytes(array.as_binary::<i64>(), mask),
+        _ => unimplemented!(),
+    }
+}
+
+/// Compute [`get_masked_values`] for a [`GenericByteArray`]
+///
+/// Note: this does not check the null mask and will return values contained in null slots
+fn masked_bytes<'a, T: ByteArrayType>(
+    array: &'a GenericByteArray<T>,
+    mask: &BooleanBuffer,
+) -> Vec<(usize, &'a [u8])> {
+    let mut out = Vec::with_capacity(mask.count_set_bits());
+    for idx in mask.set_indices() {
+        out.push((idx, array.value(idx).as_ref()))
+    }
+    out
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::dictionary::merge_dictionary_values;
+    use arrow_array::cast::as_string_array;
+    use arrow_array::types::Int32Type;
+    use arrow_array::{DictionaryArray, Int32Array, StringArray};
+    use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer, OffsetBuffer};
+    use std::sync::Arc;
+
+    #[test]
+    fn test_merge_strings() {
+        let a = DictionaryArray::<Int32Type>::from_iter(["a", "b", "a", "b", "d", "c", "e"]);
+        let b = DictionaryArray::<Int32Type>::from_iter(["c", "f", "c", "d", "a", "d"]);
+        let merged = merge_dictionary_values(&[&a, &b], None).unwrap();
+
+        let values = as_string_array(merged.values.as_ref());
+        let actual: Vec<_> = values.iter().map(Option::unwrap).collect();
+        assert_eq!(&actual, &["a", "b", "d", "c", "e", "f"]);
+
+        assert_eq!(merged.key_mappings.len(), 2);
+        assert_eq!(&merged.key_mappings[0], &[0, 1, 2, 3, 4]);
+        assert_eq!(&merged.key_mappings[1], &[3, 5, 2, 0]);
+
+        let a_slice = a.slice(1, 4);
+        let merged = merge_dictionary_values(&[&a_slice, &b], None).unwrap();
+
+        let values = as_string_array(merged.values.as_ref());
+        let actual: Vec<_> = values.iter().map(Option::unwrap).collect();
+        assert_eq!(&actual, &["a", "b", "d", "c", "f"]);
+
+        assert_eq!(merged.key_mappings.len(), 2);
+        assert_eq!(&merged.key_mappings[0], &[0, 1, 2, 0, 0]);
+        assert_eq!(&merged.key_mappings[1], &[3, 4, 2, 0]);
+
+        // Mask out only ["b", "b", "d"] from a
+        let a_mask = BooleanBuffer::from_iter([false, true, false, true, true, false, false]);
+        let b_mask = BooleanBuffer::new_set(b.len());
+        let merged = merge_dictionary_values(&[&a, &b], Some(&[a_mask, b_mask])).unwrap();
+
+        let values = as_string_array(merged.values.as_ref());
+        let actual: Vec<_> = values.iter().map(Option::unwrap).collect();
+        assert_eq!(&actual, &["b", "d", "c", "f", "a"]);
+
+        assert_eq!(merged.key_mappings.len(), 2);
+        assert_eq!(&merged.key_mappings[0], &[0, 0, 1, 0, 0]);
+        assert_eq!(&merged.key_mappings[1], &[2, 3, 1, 4]);
+    }
+
+    #[test]
+    fn test_merge_nulls() {
+        let buffer = Buffer::from("helloworldbingohelloworld");
+        let offsets = OffsetBuffer::from_lengths([5, 5, 5, 5, 5]);
+        let nulls = NullBuffer::from(vec![true, false, true, true, true]);
+        let values = StringArray::new(offsets, buffer, Some(nulls));
+
+        let key_values = vec![1, 2, 3, 1, 8, 2, 3];
+        let key_nulls = NullBuffer::from(vec![true, true, false, true, false, true, true]);
+        let keys = Int32Array::new(key_values.into(), Some(key_nulls));
+        let a = DictionaryArray::new(keys, Arc::new(values));
+        // [NULL, "bingo", NULL, NULL, NULL, "bingo", "hello"]
+
+        let b = DictionaryArray::new(Int32Array::new_null(10), Arc::new(StringArray::new_null(0)));
+
+        let merged = merge_dictionary_values(&[&a, &b], None).unwrap();
+        let expected = StringArray::from(vec!["bingo", "hello"]);
+        assert_eq!(merged.values.as_ref(), &expected);
+        assert_eq!(merged.key_mappings.len(), 2);
+        assert_eq!(&merged.key_mappings[0], &[0, 0, 0, 1, 0]);
+        assert_eq!(&merged.key_mappings[1], &[]);
+    }
+
+    #[test]
+    fn test_merge_keys_smaller() {
+        let values = StringArray::from_iter_values(["a", "b"]);
+        let keys = Int32Array::from_iter_values([1]);
+        let a = DictionaryArray::new(keys, Arc::new(values));
+
+        let merged = merge_dictionary_values(&[&a], None).unwrap();
+        let expected = StringArray::from(vec!["b"]);
+        assert_eq!(merged.values.as_ref(), &expected);
+    }
+}
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index 1afb8197bab6..ce51ecb58adb 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -162,10 +162,7 @@ pub fn prep_null_mask_filter(filter: &BooleanArray) -> BooleanArray {
 /// let c = c.as_any().downcast_ref::<Int32Array>().unwrap();
 /// assert_eq!(c, &Int32Array::from(vec![5, 8]));
 /// ```
-pub fn filter(
-    values: &dyn Array,
-    predicate: &BooleanArray,
-) -> Result<ArrayRef, ArrowError> {
+pub fn filter(values: &dyn Array, predicate: &BooleanArray) -> Result<ArrayRef, ArrowError> {
     let predicate = FilterBuilder::new(predicate).build();
     filter_array(values, &predicate)
 }
@@ -308,10 +305,7 @@ impl FilterPredicate {
     }
 }
 
-fn filter_array(
-    values: &dyn Array,
-    predicate: &FilterPredicate,
-) -> Result<ArrayRef, ArrowError> {
+fn filter_array(values: &dyn Array, predicate: &FilterPredicate) -> Result<ArrayRef, ArrowError> {
     if predicate.filter.len() > values.len() {
         return Err(ArrowError::InvalidArgumentError(format!(
             "Filter predicate of length {} is larger than target array of length {}",
@@ -423,16 +417,14 @@ fn filter_bits(buffer: &BooleanBuffer, predicate: &FilterPredicate) -> Buffer {
             unsafe { MutableBuffer::from_trusted_len_iter_bool(bits).into() }
         }
         IterationStrategy::SlicesIterator => {
-            let mut builder =
-                BooleanBufferBuilder::new(bit_util::ceil(predicate.count, 8));
+            let mut builder = BooleanBufferBuilder::new(bit_util::ceil(predicate.count, 8));
             for (start, end) in SlicesIterator::new(&predicate.filter) {
                 builder.append_packed_range(start + offset..end + offset, src)
             }
             builder.into()
         }
         IterationStrategy::Slices(slices) => {
-            let mut builder =
-                BooleanBufferBuilder::new(bit_util::ceil(predicate.count, 8));
+            let mut builder = BooleanBufferBuilder::new(bit_util::ceil(predicate.count, 8));
             for (start, end) in slices {
                 builder.append_packed_range(*start + offset..*end + offset, src)
             }
@@ -459,10 +451,7 @@ fn filter_boolean(array: &BooleanArray, predicate: &FilterPredicate) -> BooleanA
 }
 
 /// `filter` implementation for primitive arrays
-fn filter_primitive<T>(
-    array: &PrimitiveArray<T>,
-    predicate: &FilterPredicate,
-) -> PrimitiveArray<T>
+fn filter_primitive<T>(array: &PrimitiveArray<T>, predicate: &FilterPredicate) -> PrimitiveArray<T>
 where
     T: ArrowPrimitiveType,
 {
@@ -471,24 +460,21 @@ where
 
     let buffer = match &predicate.strategy {
         IterationStrategy::SlicesIterator => {
-            let mut buffer =
-                MutableBuffer::with_capacity(predicate.count * T::get_byte_width());
+            let mut buffer = MutableBuffer::with_capacity(predicate.count * T::get_byte_width());
             for (start, end) in SlicesIterator::new(&predicate.filter) {
                 buffer.extend_from_slice(&values[start..end]);
             }
             buffer
         }
         IterationStrategy::Slices(slices) => {
-            let mut buffer =
-                MutableBuffer::with_capacity(predicate.count * T::get_byte_width());
+            let mut buffer = MutableBuffer::with_capacity(predicate.count * T::get_byte_width());
             for (start, end) in slices {
                 buffer.extend_from_slice(&values[*start..*end]);
             }
             buffer
         }
         IterationStrategy::IndexIterator => {
-            let iter =
-                IndexIterator::new(&predicate.filter, predicate.count).map(|x| values[x]);
+            let iter = IndexIterator::new(&predicate.filter, predicate.count).map(|x| values[x]);
 
             // SAFETY: IndexIterator is trusted length
             unsafe { MutableBuffer::from_trusted_len_iter(iter) }
@@ -598,10 +584,7 @@ where
 ///
 /// Note: NULLs with a non-zero slot length in `array` will have the corresponding
 /// data copied across. This allows handling the null mask separately from the data
-fn filter_bytes<T>(
-    array: &GenericByteArray<T>,
-    predicate: &FilterPredicate,
-) -> GenericByteArray<T>
+fn filter_bytes<T>(array: &GenericByteArray<T>, predicate: &FilterPredicate) -> GenericByteArray<T>
 where
     T: ByteArrayType,
 {
@@ -633,10 +616,7 @@ where
 }
 
 /// `filter` implementation for dictionaries
-fn filter_dict<T>(
-    array: &DictionaryArray<T>,
-    predicate: &FilterPredicate,
-) -> DictionaryArray<T>
+fn filter_dict<T>(array: &DictionaryArray<T>, predicate: &FilterPredicate) -> DictionaryArray<T>
 where
     T: ArrowDictionaryKeyType,
     T::Native: num::Num,
@@ -765,8 +745,7 @@ mod tests {
     fn test_filter_array_low_density() {
         // this test exercises the all 0's branch of the filter algorithm
         let mut data_values = (1..=65).collect::<Vec<i32>>();
-        let mut filter_values =
-            (1..=65).map(|i| matches!(i % 65, 0)).collect::<Vec<bool>>();
+        let mut filter_values = (1..=65).map(|i| matches!(i % 65, 0)).collect::<Vec<bool>>();
         // set up two more values after the batch
         data_values.extend_from_slice(&[66, 67]);
         filter_values.extend_from_slice(&[false, true]);
@@ -852,8 +831,7 @@ mod tests {
 
     #[test]
     fn test_filter_array_slice_with_null() {
-        let a =
-            Int32Array::from(vec![Some(5), None, Some(7), Some(8), Some(9)]).slice(1, 4);
+        let a = Int32Array::from(vec![Some(5), None, Some(7), Some(8), Some(9)]).slice(1, 4);
         let b = BooleanArray::from(vec![true, false, false, true]);
         // filtering with sliced filter array is not currently supported
         // let b_slice = BooleanArray::from(vec![true, false, false, true, false]).slice(1, 4);
@@ -986,12 +964,8 @@ mod tests {
     fn test_filter_record_batch_no_columns() {
         let pred = BooleanArray::from(vec![Some(true), Some(true), None]);
         let options = RecordBatchOptions::default().with_row_count(Some(100));
-        let record_batch = RecordBatch::try_new_with_options(
-            Arc::new(Schema::empty()),
-            vec![],
-            &options,
-        )
-        .unwrap();
+        let record_batch =
+            RecordBatch::try_new_with_options(Arc::new(Schema::empty()), vec![], &options).unwrap();
         let out = filter_record_batch(&record_batch, &pred).unwrap();
 
         assert_eq!(out.num_rows(), 2);
@@ -999,8 +973,7 @@ mod tests {
 
     #[test]
     fn test_fast_path() {
-        let a: PrimitiveArray<Int64Type> =
-            PrimitiveArray::from(vec![Some(1), Some(2), None]);
+        let a: PrimitiveArray<Int64Type> = PrimitiveArray::from(vec![Some(1), Some(2), None]);
 
         // all true
         let mask = BooleanArray::from(vec![true, true, true]);
@@ -1149,9 +1122,7 @@ mod tests {
     }
 
     /// Returns an iterator that calls `Option::as_deref` on each item
-    fn as_deref<T: std::ops::Deref>(
-        src: &[Option<T>],
-    ) -> impl Iterator<Item = Option<&T::Target>> {
+    fn as_deref<T: std::ops::Deref>(src: &[Option<T>]) -> impl Iterator<Item = Option<&T::Target>> {
         src.iter().map(|x| x.as_deref())
     }
 
@@ -1290,10 +1261,8 @@ mod tests {
             .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8]))
             .build()
             .unwrap();
-        let list_data_type = DataType::FixedSizeList(
-            Arc::new(Field::new("item", DataType::Int32, false)),
-            3,
-        );
+        let list_data_type =
+            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3);
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
             .add_child_data(value_data)
@@ -1349,10 +1318,8 @@ mod tests {
         bit_util::set_bit(&mut null_bits, 3);
         bit_util::set_bit(&mut null_bits, 4);
 
-        let list_data_type = DataType::FixedSizeList(
-            Arc::new(Field::new("item", DataType::Int32, false)),
-            2,
-        );
+        let list_data_type =
+            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 2);
         let list_data = ArrayData::builder(list_data_type)
             .len(5)
             .add_child_data(value_data)
@@ -1534,13 +1501,11 @@ mod tests {
                         assert_eq!(value1, value2);
                     }
                     1 => {
-                        let slot1 =
-                            slot1.as_any().downcast_ref::<Float64Array>().unwrap();
+                        let slot1 = slot1.as_any().downcast_ref::<Float64Array>().unwrap();
                         assert_eq!(slot1.len(), 1);
                         let value1 = slot1.value(0);
 
-                        let slot2 =
-                            slot2.as_any().downcast_ref::<Float64Array>().unwrap();
+                        let slot2 = slot2.as_any().downcast_ref::<Float64Array>().unwrap();
                         assert_eq!(slot2.len(), 1);
                         let value2 = slot2.value(0);
                         assert_eq!(value1, value2);
diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs
index c0d2026808af..8229a8f3fe09 100644
--- a/arrow-select/src/interleave.rs
+++ b/arrow-select/src/interleave.rs
@@ -15,12 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
+use crate::dictionary::{merge_dictionary_values, should_merge_dictionary_values};
+use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder, PrimitiveBuilder};
+use arrow_array::cast::AsArray;
 use arrow_array::types::*;
 use arrow_array::*;
-use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer};
+use arrow_buffer::{ArrowNativeType, MutableBuffer, NullBuffer, NullBufferBuilder, OffsetBuffer};
 use arrow_data::transform::MutableArrayData;
-use arrow_data::ArrayDataBuilder;
 use arrow_schema::{ArrowError, DataType};
 use std::sync::Arc;
 
@@ -30,6 +31,12 @@ macro_rules! primitive_helper {
     };
 }
 
+macro_rules! dict_helper {
+    ($t:ty, $values:expr, $indices:expr) => {
+        Ok(Arc::new(interleave_dictionaries::<$t>($values, $indices)?) as _)
+    };
+}
+
 ///
 /// Takes elements by index from a list of [`Array`], creating a new [`Array`] from those values.
 ///
@@ -70,10 +77,11 @@ pub fn interleave(
 
     for array in values.iter().skip(1) {
         if array.data_type() != data_type {
-            return Err(ArrowError::InvalidArgumentError(
-                format!("It is not possible to interleave arrays of different data types ({} and {})",
-              data_type, array.data_type()),
-            ));
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "It is not possible to interleave arrays of different data types ({} and {})",
+                data_type,
+                array.data_type()
+            )));
         }
     }
 
@@ -87,6 +95,10 @@ pub fn interleave(
         DataType::LargeUtf8 => interleave_bytes::<LargeUtf8Type>(values, indices),
         DataType::Binary => interleave_bytes::<BinaryType>(values, indices),
         DataType::LargeBinary => interleave_bytes::<LargeBinaryType>(values, indices),
+        DataType::Dictionary(k, _) => downcast_integer! {
+            k.as_ref() => (dict_helper, values, indices),
+            _ => unreachable!("illegal dictionary key type {k}")
+        },
         _ => interleave_fallback(values, indices)
     }
 }
@@ -97,10 +109,8 @@ pub fn interleave(
 struct Interleave<'a, T> {
     /// The input arrays downcast to T
     arrays: Vec<&'a T>,
-    /// The number of nulls in the interleaved output
-    null_count: usize,
     /// The null buffer of the interleaved output
-    nulls: Option<Buffer>,
+    nulls: Option<NullBuffer>,
 }
 
 impl<'a, T: Array + 'static> Interleave<'a, T> {
@@ -114,22 +124,19 @@ impl<'a, T: Array + 'static> Interleave<'a, T> {
             })
             .collect();
 
-        let mut null_count = 0;
-        let nulls = has_nulls.then(|| {
-            let mut builder = BooleanBufferBuilder::new(indices.len());
-            for (a, b) in indices {
-                let v = arrays[*a].is_valid(*b);
-                null_count += !v as usize;
-                builder.append(v)
+        let nulls = match has_nulls {
+            true => {
+                let mut builder = NullBufferBuilder::new(indices.len());
+                for (a, b) in indices {
+                    let v = arrays[*a].is_valid(*b);
+                    builder.append(v)
+                }
+                builder.finish()
             }
-            builder.into()
-        });
+            false => None,
+        };
 
-        Self {
-            arrays,
-            null_count,
-            nulls,
-        }
+        Self { arrays, nulls }
     }
 }
 
@@ -140,20 +147,14 @@ fn interleave_primitive<T: ArrowPrimitiveType>(
 ) -> Result<ArrayRef, ArrowError> {
     let interleaved = Interleave::<'_, PrimitiveArray<T>>::new(values, indices);
 
-    let mut values = BufferBuilder::<T::Native>::new(indices.len());
+    let mut values = Vec::with_capacity(indices.len());
     for (a, b) in indices {
         let v = interleaved.arrays[*a].value(*b);
-        values.append(v)
+        values.push(v)
     }
 
-    let builder = ArrayDataBuilder::new(data_type.clone())
-        .len(indices.len())
-        .add_buffer(values.finish())
-        .null_bit_buffer(interleaved.nulls)
-        .null_count(interleaved.null_count);
-
-    let data = unsafe { builder.build_unchecked() };
-    Ok(Arc::new(PrimitiveArray::<T>::from(data)))
+    let array = PrimitiveArray::<T>::new(values.into(), interleaved.nulls);
+    Ok(Arc::new(array.with_data_type(data_type.clone())))
 }
 
 fn interleave_bytes<T: ByteArrayType>(
@@ -177,15 +178,55 @@ fn interleave_bytes<T: ByteArrayType>(
         values.extend_from_slice(interleaved.arrays[*a].value(*b).as_ref());
     }
 
-    let builder = ArrayDataBuilder::new(T::DATA_TYPE)
-        .len(indices.len())
-        .add_buffer(offsets.finish())
-        .add_buffer(values.into())
-        .null_bit_buffer(interleaved.nulls)
-        .null_count(interleaved.null_count);
+    // Safety: safe by construction
+    let array = unsafe {
+        let offsets = OffsetBuffer::new_unchecked(offsets.finish().into());
+        GenericByteArray::<T>::new_unchecked(offsets, values.into(), interleaved.nulls)
+    };
+    Ok(Arc::new(array))
+}
 
-    let data = unsafe { builder.build_unchecked() };
-    Ok(Arc::new(GenericByteArray::<T>::from(data)))
+fn interleave_dictionaries<K: ArrowDictionaryKeyType>(
+    arrays: &[&dyn Array],
+    indices: &[(usize, usize)],
+) -> Result<ArrayRef, ArrowError> {
+    let dictionaries: Vec<_> = arrays.iter().map(|x| x.as_dictionary::<K>()).collect();
+    if !should_merge_dictionary_values::<K>(&dictionaries, indices.len()) {
+        return interleave_fallback(arrays, indices);
+    }
+
+    let masks: Vec<_> = dictionaries
+        .iter()
+        .enumerate()
+        .map(|(a_idx, dictionary)| {
+            let mut key_mask = BooleanBufferBuilder::new_from_buffer(
+                MutableBuffer::new_null(dictionary.len()),
+                dictionary.len(),
+            );
+
+            for (_, key_idx) in indices.iter().filter(|(a, _)| *a == a_idx) {
+                key_mask.set_bit(*key_idx, true);
+            }
+            key_mask.finish()
+        })
+        .collect();
+
+    let merged = merge_dictionary_values(&dictionaries, Some(&masks))?;
+
+    // Recompute keys
+    let mut keys = PrimitiveBuilder::<K>::with_capacity(indices.len());
+    for (a, b) in indices {
+        let old_keys: &PrimitiveArray<K> = dictionaries[*a].keys();
+        match old_keys.is_valid(*b) {
+            true => {
+                let old_key = old_keys.values()[*b];
+                keys.append_value(merged.key_mappings[*a][old_key.as_usize()])
+            }
+            false => keys.append_null(),
+        }
+    }
+    let array = unsafe { DictionaryArray::new_unchecked(keys.finish(), merged.values) };
+    Ok(Arc::new(array))
 }
 
 /// Fallback implementation of interleave using [`MutableArrayData`]
@@ -236,8 +277,7 @@ mod tests {
         let a = Int32Array::from_iter_values([1, 2, 3, 4]);
         let b = Int32Array::from_iter_values([5, 6, 7]);
         let c = Int32Array::from_iter_values([8, 9, 10]);
-        let values =
-            interleave(&[&a, &b, &c], &[(0, 3), (0, 3), (2, 2), (2, 0), (1, 1)]).unwrap();
+        let values = interleave(&[&a, &b, &c], &[(0, 3), (0, 3), (2, 2), (2, 0), (1, 1)]).unwrap();
         let v = values.as_primitive::<Int32Type>();
         assert_eq!(v.values(), &[4, 4, 10, 8, 6]);
     }
@@ -246,8 +286,7 @@ mod tests {
     fn test_primitive_nulls() {
         let a = Int32Array::from_iter_values([1, 2, 3, 4]);
         let b = Int32Array::from_iter([Some(1), Some(4), None]);
-        let values =
-            interleave(&[&a, &b], &[(0, 1), (1, 2), (1, 2), (0, 3), (0, 2)]).unwrap();
+        let values = interleave(&[&a, &b], &[(0, 1), (1, 2), (1, 2), (0, 3), (0, 2)]).unwrap();
         let v: Vec<_> = values.as_primitive::<Int32Type>().into_iter().collect();
         assert_eq!(&v, &[Some(2), None, None, Some(4), Some(3)])
     }
@@ -264,8 +303,7 @@ mod tests {
     fn test_strings() {
         let a = StringArray::from_iter_values(["a", "b", "c"]);
         let b = StringArray::from_iter_values(["hello", "world", "foo"]);
-        let values =
-            interleave(&[&a, &b], &[(0, 2), (0, 2), (1, 0), (1, 1), (0, 1)]).unwrap();
+        let values = interleave(&[&a, &b], &[(0, 2), (0, 2), (1, 0), (1, 1), (0, 1)]).unwrap();
         let v = values.as_string::<i32>();
         let values: Vec<_> = v.into_iter().collect();
         assert_eq!(
@@ -280,6 +318,31 @@ mod tests {
         )
     }
 
+    #[test]
+    fn test_interleave_dictionary() {
+        let a = DictionaryArray::<Int32Type>::from_iter(["a", "b", "c", "a", "b"]);
+        let b = DictionaryArray::<Int32Type>::from_iter(["a", "c", "a", "c", "a"]);
+
+        // Should not recompute dictionary
+        let values =
+            interleave(&[&a, &b], &[(0, 2), (0, 2), (0, 2), (1, 0), (1, 1), (0, 1)]).unwrap();
+        let v = values.as_dictionary::<Int32Type>();
+        assert_eq!(v.values().len(), 5);
+
+        let vc = v.downcast_dict::<StringArray>().unwrap();
+        let collected: Vec<_> = vc.into_iter().map(Option::unwrap).collect();
+        assert_eq!(&collected, &["c", "c", "c", "a", "c", "b"]);
+
+        // Should recompute dictionary
+        let values = interleave(&[&a, &b], &[(0, 2), (0, 2), (1, 1)]).unwrap();
+        let v = values.as_dictionary::<Int32Type>();
+        assert_eq!(v.values().len(), 1);
+
+        let vc = v.downcast_dict::<StringArray>().unwrap();
+        let collected: Vec<_> = vc.into_iter().map(Option::unwrap).collect();
+        assert_eq!(&collected, &["c", "c", "c"]);
+    }
+
     #[test]
     fn test_lists() {
         // [[1, 2], null, [3]]
@@ -303,8 +366,7 @@ mod tests {
         b.append(true);
         let b = b.finish();
 
-        let values =
-            interleave(&[&a, &b], &[(0, 2), (0, 1), (1, 0), (1, 2), (1, 1)]).unwrap();
+        let values = interleave(&[&a, &b], &[(0, 2), (0, 1), (1, 0), (1, 2), (1, 1)]).unwrap();
         let v = values.as_any().downcast_ref::<ListArray>().unwrap();
 
         // [[3], null, [4], [5, 6, null], null]
@@ -323,4 +385,21 @@ mod tests {
 
         assert_eq!(v, &expected);
     }
+
+    #[test]
+    fn interleave_sparse_nulls() {
+        let values = StringArray::from_iter_values((0..100).map(|x| x.to_string()));
+        let keys = Int32Array::from_iter_values(0..10);
+        let dict_a = DictionaryArray::new(keys, Arc::new(values));
+        let values = StringArray::new_null(0);
+        let keys = Int32Array::new_null(10);
+        let dict_b = DictionaryArray::new(keys, Arc::new(values));
+
+        let indices = &[(0, 0), (0, 1), (0, 2), (1, 0)];
+        let array = interleave(&[&dict_a, &dict_b], indices).unwrap();
+
+        let expected =
+            DictionaryArray::<Int32Type>::from_iter(vec![Some("0"), Some("1"), Some("2"), None]);
+        assert_eq!(array.as_ref(), &expected)
+    }
 }
diff --git a/arrow-select/src/lib.rs b/arrow-select/src/lib.rs
index c468e20a511e..82f57a6af42b 100644
--- a/arrow-select/src/lib.rs
+++ b/arrow-select/src/lib.rs
@@ -18,6 +18,7 @@
 //! Arrow selection kernels
 
 pub mod concat;
+mod dictionary;
 pub mod filter;
 pub mod interleave;
 pub mod nullif;
diff --git a/arrow-select/src/nullif.rs b/arrow-select/src/nullif.rs
index f0bcb73cccb9..4025a5bacf80 100644
--- a/arrow-select/src/nullif.rs
+++ b/arrow-select/src/nullif.rs
@@ -29,8 +29,7 @@ pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result<ArrayRef, ArrowE
 
     if left_data.len() != right.len() {
         return Err(ArrowError::ComputeError(
-            "Cannot perform comparison operation on arrays of different length"
-                .to_string(),
+            "Cannot perform comparison operation on arrays of different length".to_string(),
         ));
     }
     let len = left_data.len();
@@ -75,12 +74,11 @@ pub fn nullif(left: &dyn Array, right: &BooleanArray) -> Result<ArrayRef, ArrowE
         }
         None => {
             let mut null_count = 0;
-            let buffer =
-                bitwise_unary_op_helper(right.inner(), right.offset(), len, |b| {
-                    let t = !b;
-                    null_count += t.count_zeros() as usize;
-                    t
-                });
+            let buffer = bitwise_unary_op_helper(right.inner(), right.offset(), len, |b| {
+                let t = !b;
+                null_count += t.count_zeros() as usize;
+                t
+            });
             (buffer, null_count)
         }
     };
@@ -110,8 +108,7 @@ mod tests {
     #[test]
     fn test_nullif_int_array() {
         let a = Int32Array::from(vec![Some(15), None, Some(8), Some(1), Some(9)]);
-        let comp =
-            BooleanArray::from(vec![Some(false), None, Some(true), Some(false), None]);
+        let comp = BooleanArray::from(vec![Some(false), None, Some(true), Some(false), None]);
         let res = nullif(&a, &comp).unwrap();
 
         let expected = Int32Array::from(vec![
@@ -448,8 +445,7 @@ mod tests {
     #[test]
     fn test_nullif_no_nulls() {
         let a = Int32Array::from(vec![Some(15), Some(7), Some(8), Some(1), Some(9)]);
-        let comp =
-            BooleanArray::from(vec![Some(false), None, Some(true), Some(false), None]);
+        let comp = BooleanArray::from(vec![Some(false), None, Some(true), Some(false), None]);
         let res = nullif(&a, &comp).unwrap();
         let res = res.as_primitive::<Int32Type>();
 
diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index 70b80e5878dd..d47b884ae38d 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -24,11 +24,10 @@ use arrow_array::cast::AsArray;
 use arrow_array::types::*;
 use arrow_array::*;
 use arrow_buffer::{
-    bit_util, ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer, NullBuffer,
-    ScalarBuffer,
+    bit_util, ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer, NullBuffer, ScalarBuffer,
 };
 use arrow_data::{ArrayData, ArrayDataBuilder};
-use arrow_schema::{ArrowError, DataType, FieldRef};
+use arrow_schema::{ArrowError, DataType, FieldRef, UnionMode};
 
 use num::{One, Zero};
 
@@ -102,25 +101,25 @@ fn check_bounds<T: ArrowPrimitiveType>(
 ) -> Result<(), ArrowError> {
     if indices.null_count() > 0 {
         indices.iter().flatten().try_for_each(|index| {
-            let ix = index.to_usize().ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
+            let ix = index
+                .to_usize()
+                .ok_or_else(|| ArrowError::ComputeError("Cast to usize failed".to_string()))?;
             if ix >= len {
-                return Err(ArrowError::ComputeError(
-                    format!("Array index out of bounds, cannot get item at index {ix} from {len} entries"))
-                );
+                return Err(ArrowError::ComputeError(format!(
+                    "Array index out of bounds, cannot get item at index {ix} from {len} entries"
+                )));
             }
             Ok(())
         })
     } else {
         indices.values().iter().try_for_each(|index| {
-            let ix = index.to_usize().ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
+            let ix = index
+                .to_usize()
+                .ok_or_else(|| ArrowError::ComputeError("Cast to usize failed".to_string()))?;
             if ix >= len {
-                return Err(ArrowError::ComputeError(
-                    format!("Array index out of bounds, cannot get item at index {ix} from {len} entries"))
-                );
+                return Err(ArrowError::ComputeError(format!(
+                    "Array index out of bounds, cannot get item at index {ix} from {len} entries"
+                )));
             }
             Ok(())
         })
@@ -223,6 +222,21 @@ fn take_impl<IndexType: ArrowPrimitiveType>(
                 Ok(new_null_array(&DataType::Null, indices.len()))
             }
         }
+        DataType::Union(fields, UnionMode::Sparse) => {
+            let mut field_type_ids = Vec::with_capacity(fields.len());
+            let mut children = Vec::with_capacity(fields.len());
+            let values = values.as_any().downcast_ref::<UnionArray>().unwrap();
+            let type_ids = take_native(values.type_ids(), indices).into_inner();
+            for (type_id, field) in fields.iter() {
+                let values = values.child(type_id);
+                let values = take_impl(values, indices)?;
+                let field = (**field).clone();
+                children.push((field, values));
+                field_type_ids.push(type_id);
+            }
+            let array = UnionArray::try_new(field_type_ids.as_slice(), type_ids, None, children)?;
+            Ok(Arc::new(array))
+        }
         t => unimplemented!("Take not supported for data type {:?}", t)
     }
 }
@@ -473,9 +487,10 @@ fn take_fixed_size_list<IndexType: ArrowPrimitiveType>(
     let null_slice = null_buf.as_slice_mut();
 
     for i in 0..indices.len() {
-        let index = indices.value(i).to_usize().ok_or_else(|| {
-            ArrowError::ComputeError("Cast to usize failed".to_string())
-        })?;
+        let index = indices
+            .value(i)
+            .to_usize()
+            .ok_or_else(|| ArrowError::ComputeError("Cast to usize failed".to_string()))?;
         if !indices.is_valid(i) || values.is_null(index) {
             bit_util::unset_bit(null_slice, i);
         }
@@ -550,15 +565,13 @@ fn take_run<T: RunEndIndexType, I: ArrowPrimitiveType>(
     let mut new_physical_len = 1;
     for ix in 1..physical_indices.len() {
         if physical_indices[ix] != physical_indices[ix - 1] {
-            take_value_indices
-                .append(I::Native::from_usize(physical_indices[ix - 1]).unwrap());
+            take_value_indices.append(I::Native::from_usize(physical_indices[ix - 1]).unwrap());
             new_run_ends_builder.append(T::Native::from_usize(ix).unwrap());
             new_physical_len += 1;
         }
     }
-    take_value_indices.append(
-        I::Native::from_usize(physical_indices[physical_indices.len() - 1]).unwrap(),
-    );
+    take_value_indices
+        .append(I::Native::from_usize(physical_indices[physical_indices.len() - 1]).unwrap());
     new_run_ends_builder.append(T::Native::from_usize(physical_indices.len()).unwrap());
     let new_run_ends = unsafe {
         // Safety:
@@ -635,9 +648,10 @@ where
     // compute the value indices, and set offsets accordingly
     for i in 0..indices.len() {
         if indices.is_valid(i) {
-            let ix = indices.value(i).to_usize().ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
+            let ix = indices
+                .value(i)
+                .to_usize()
+                .ok_or_else(|| ArrowError::ComputeError("Cast to usize failed".to_string()))?;
             let start = offsets[ix];
             let end = offsets[ix + 1];
             current_offset += end - start;
@@ -679,11 +693,11 @@ where
 
     for i in 0..indices.len() {
         if indices.is_valid(i) {
-            let index = indices.value(i).to_usize().ok_or_else(|| {
-                ArrowError::ComputeError("Cast to usize failed".to_string())
-            })?;
-            let start =
-                list.value_offset(index) as <UInt32Type as ArrowPrimitiveType>::Native;
+            let index = indices
+                .value(i)
+                .to_usize()
+                .ok_or_else(|| ArrowError::ComputeError("Cast to usize failed".to_string()))?;
+            let start = list.value_offset(index) as <UInt32Type as ArrowPrimitiveType>::Native;
 
             values.extend(start..start + length);
         }
@@ -706,8 +720,7 @@ macro_rules! to_indices_reinterpret {
             type T = $o;
 
             fn to_indices(&self) -> PrimitiveArray<$o> {
-                let cast =
-                    ScalarBuffer::new(self.values().inner().clone(), 0, self.len());
+                let cast = ScalarBuffer::new(self.values().inner().clone(), 0, self.len());
                 PrimitiveArray::new(cast, self.nulls().cloned())
             }
         }
@@ -848,9 +861,7 @@ mod tests {
     }
 
     // create a simple struct for testing purposes
-    fn create_test_struct(
-        values: Vec<Option<(Option<bool>, Option<i32>)>>,
-    ) -> StructArray {
+    fn create_test_struct(values: Vec<Option<(Option<bool>, Option<i32>)>>) -> StructArray {
         let mut struct_builder = StructBuilder::new(
             Fields::from(vec![
                 Field::new("a", DataType::Boolean, true),
@@ -946,8 +957,7 @@ mod tests {
 
     #[test]
     fn test_take_primitive_nullable_indices_non_null_values_with_offset() {
-        let index =
-            UInt32Array::from(vec![Some(0), Some(1), Some(2), Some(3), None, None]);
+        let index = UInt32Array::from(vec![Some(0), Some(1), Some(2), Some(3), None, None]);
         let index = index.slice(2, 4);
         let index = index.as_any().downcast_ref::<UInt32Array>().unwrap();
 
@@ -967,8 +977,7 @@ mod tests {
 
     #[test]
     fn test_take_primitive_nullable_indices_nullable_values_with_offset() {
-        let index =
-            UInt32Array::from(vec![Some(0), Some(1), Some(2), Some(3), None, None]);
+        let index = UInt32Array::from(vec![Some(0), Some(1), Some(2), Some(3), None, None]);
         let index = index.slice(2, 4);
         let index = index.as_any().downcast_ref::<UInt32Array>().unwrap();
 
@@ -1296,8 +1305,7 @@ mod tests {
 
     #[test]
     fn test_take_bool_with_offset() {
-        let index =
-            UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(2), None]);
+        let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(2), None]);
         let index = index.slice(2, 4);
         let index = index
             .as_any()
@@ -1331,8 +1339,7 @@ mod tests {
 
         let actual = actual.as_any().downcast_ref::<K>().unwrap();
 
-        let expected =
-            K::from(vec![Some("four"), None, None, Some("four"), Some("five")]);
+        let expected = K::from(vec![Some("four"), None, None, Some("four"), Some("five")]);
 
         assert_eq!(actual, &expected);
     }
@@ -1349,8 +1356,7 @@ mod tests {
 
     #[test]
     fn test_take_slice_string() {
-        let strings =
-            StringArray::from(vec![Some("hello"), None, Some("world"), None, Some("hi")]);
+        let strings = StringArray::from(vec![Some("hello"), None, Some("world"), None, Some("hi")]);
         let indices = Int32Array::from(vec![Some(0), Some(1), None, Some(0), Some(2)]);
         let indices_slice = indices.slice(1, 4);
         let expected = StringArray::from(vec![None, None, Some("hello"), Some("world")]);
@@ -1361,17 +1367,13 @@ mod tests {
     macro_rules! test_take_list {
         ($offset_type:ty, $list_data_type:ident, $list_array_type:ident) => {{
             // Construct a value array, [[0,0,0], [-1,-2,-1], [], [2,3]]
-            let value_data =
-                Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3]).into_data();
+            let value_data = Int32Array::from(vec![0, 0, 0, -1, -2, -1, 2, 3]).into_data();
             // Construct offsets
             let value_offsets: [$offset_type; 5] = [0, 3, 6, 6, 8];
             let value_offsets = Buffer::from_slice_ref(&value_offsets);
             // Construct a list array from the above two
-            let list_data_type = DataType::$list_data_type(Arc::new(Field::new(
-                "item",
-                DataType::Int32,
-                false,
-            )));
+            let list_data_type =
+                DataType::$list_data_type(Arc::new(Field::new("item", DataType::Int32, false)));
             let list_data = ArrayData::builder(list_data_type.clone())
                 .len(4)
                 .add_buffer(value_offsets)
@@ -1384,8 +1386,7 @@ mod tests {
             let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(2), Some(0)]);
 
             let a = take(&list_array, &index, None).unwrap();
-            let a: &$list_array_type =
-                a.as_any().downcast_ref::<$list_array_type>().unwrap();
+            let a: &$list_array_type = a.as_any().downcast_ref::<$list_array_type>().unwrap();
 
             // construct a value array with expected results:
             // [[2,3], null, [-1,-2,-1], [], [0,0,0]]
@@ -1437,11 +1438,8 @@ mod tests {
             let value_offsets: [$offset_type; 5] = [0, 3, 6, 7, 9];
             let value_offsets = Buffer::from_slice_ref(&value_offsets);
             // Construct a list array from the above two
-            let list_data_type = DataType::$list_data_type(Arc::new(Field::new(
-                "item",
-                DataType::Int32,
-                true,
-            )));
+            let list_data_type =
+                DataType::$list_data_type(Arc::new(Field::new("item", DataType::Int32, true)));
             let list_data = ArrayData::builder(list_data_type.clone())
                 .len(4)
                 .add_buffer(value_offsets)
@@ -1455,8 +1453,7 @@ mod tests {
             let index = UInt32Array::from(vec![Some(2), None, Some(1), Some(3), Some(0)]);
 
             let a = take(&list_array, &index, None).unwrap();
-            let a: &$list_array_type =
-                a.as_any().downcast_ref::<$list_array_type>().unwrap();
+            let a: &$list_array_type = a.as_any().downcast_ref::<$list_array_type>().unwrap();
 
             // construct a value array with expected results:
             // [[null], null, [-1,-2,3], [5,null], [0,null,0]]
@@ -1508,11 +1505,8 @@ mod tests {
             let value_offsets: [$offset_type; 5] = [0, 3, 6, 6, 8];
             let value_offsets = Buffer::from_slice_ref(&value_offsets);
             // Construct a list array from the above two
-            let list_data_type = DataType::$list_data_type(Arc::new(Field::new(
-                "item",
-                DataType::Int32,
-                true,
-            )));
+            let list_data_type =
+                DataType::$list_data_type(Arc::new(Field::new("item", DataType::Int32, true)));
             let list_data = ArrayData::builder(list_data_type.clone())
                 .len(4)
                 .add_buffer(value_offsets)
@@ -1526,8 +1520,7 @@ mod tests {
             let index = UInt32Array::from(vec![Some(2), None, Some(1), Some(3), Some(0)]);
 
             let a = take(&list_array, &index, None).unwrap();
-            let a: &$list_array_type =
-                a.as_any().downcast_ref::<$list_array_type>().unwrap();
+            let a: &$list_array_type = a.as_any().downcast_ref::<$list_array_type>().unwrap();
 
             // construct a value array with expected results:
             // [null, null, [-1,-2,3], [5,null], [0,null,0]]
@@ -1575,13 +1568,11 @@ mod tests {
     {
         let indices = UInt32Array::from(indices);
 
-        let input_array =
-            FixedSizeListArray::from_iter_primitive::<T, _, _>(input_data, length);
+        let input_array = FixedSizeListArray::from_iter_primitive::<T, _, _>(input_data, length);
 
         let output = take_fixed_size_list(&input_array, &indices, length as u32).unwrap();
 
-        let expected =
-            FixedSizeListArray::from_iter_primitive::<T, _, _>(expected_data, length);
+        let expected = FixedSizeListArray::from_iter_primitive::<T, _, _>(expected_data, length);
 
         assert_eq!(&output, &expected)
     }
@@ -1680,8 +1671,7 @@ mod tests {
         // Construct offsets
         let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
         // Construct a list array from the above two
-        let list_data_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+        let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
             .add_buffer(value_offsets)
@@ -1700,12 +1690,9 @@ mod tests {
     #[test]
     fn test_take_map() {
         let values = Int32Array::from(vec![1, 2, 3, 4]);
-        let array = MapArray::new_from_strings(
-            vec!["a", "b", "c", "a"].into_iter(),
-            &values,
-            &[0, 3, 4],
-        )
-        .unwrap();
+        let array =
+            MapArray::new_from_strings(vec!["a", "b", "c", "a"].into_iter(), &values, &[0, 3, 4])
+                .unwrap();
 
         let index = UInt32Array::from(vec![0]);
 
@@ -1759,8 +1746,7 @@ mod tests {
             None,
         ]);
 
-        let index =
-            UInt32Array::from(vec![None, Some(3), Some(1), None, Some(0), Some(4)]);
+        let index = UInt32Array::from(vec![None, Some(3), Some(1), None, Some(0), Some(4)]);
         let actual = take(&array, &index, None).unwrap();
         let actual: &StructArray = actual.as_any().downcast_ref::<StructArray>().unwrap();
         assert_eq!(index.len(), actual.len());
@@ -1912,8 +1898,7 @@ mod tests {
         ]);
         let indices = UInt32Array::from(vec![2, 0]);
 
-        let (indexed, offsets, null_buf) =
-            take_value_indices_from_list(&list, &indices).unwrap();
+        let (indexed, offsets, null_buf) = take_value_indices_from_list(&list, &indices).unwrap();
 
         assert_eq!(indexed, Int32Array::from(vec![5, 6, 7, 8, 9, 0, 1]));
         assert_eq!(offsets, vec![0, 5, 7]);
@@ -1971,14 +1956,12 @@ mod tests {
         );
 
         let indices = UInt32Array::from(vec![2, 1, 0]);
-        let indexed =
-            take_value_indices_from_fixed_size_list(&list, &indices, 3).unwrap();
+        let indexed = take_value_indices_from_fixed_size_list(&list, &indices, 3).unwrap();
 
         assert_eq!(indexed, UInt32Array::from(vec![6, 7, 8, 3, 4, 5, 0, 1, 2]));
 
         let indices = UInt32Array::from(vec![3, 2, 1, 2, 0]);
-        let indexed =
-            take_value_indices_from_fixed_size_list(&list, &indices, 3).unwrap();
+        let indexed = take_value_indices_from_fixed_size_list(&list, &indices, 3).unwrap();
 
         assert_eq!(
             indexed,
@@ -2013,4 +1996,40 @@ mod tests {
         let values = r.as_string::<i32>().iter().collect::<Vec<_>>();
         assert_eq!(&values, &[Some("foo"), None, None, None])
     }
+
+    #[test]
+    fn test_take_union() {
+        let structs = create_test_struct(vec![
+            Some((Some(true), Some(42))),
+            Some((Some(false), Some(28))),
+            Some((Some(false), Some(19))),
+            Some((Some(true), Some(31))),
+            None,
+        ]);
+        let strings = StringArray::from(vec![Some("a"), None, Some("c"), None, Some("d")]);
+        let type_ids = Buffer::from_slice_ref(vec![1i8; 5]);
+
+        let children: Vec<(Field, Arc<dyn Array>)> = vec![
+            (
+                Field::new("f1", structs.data_type().clone(), true),
+                Arc::new(structs),
+            ),
+            (
+                Field::new("f2", strings.data_type().clone(), true),
+                Arc::new(strings),
+            ),
+        ];
+        let array = UnionArray::try_new(&[0, 1], type_ids, None, children).unwrap();
+
+        let indices = vec![0, 3, 1, 0, 2, 4];
+        let index = UInt32Array::from(indices.clone());
+        let actual = take(&array, &index, None).unwrap();
+        let actual = actual.as_any().downcast_ref::<UnionArray>().unwrap();
+        let strings = actual.child(1);
+        let strings = strings.as_any().downcast_ref::<StringArray>().unwrap();
+
+        let actual = strings.iter().collect::<Vec<_>>();
+        let expected = vec![Some("a"), None, None, Some("a"), Some("c"), Some("d")];
+        assert_eq!(expected, actual);
+    }
 }
diff --git a/arrow-select/src/zip.rs b/arrow-select/src/zip.rs
index b5df891544a8..ff2380ef2420 100644
--- a/arrow-select/src/zip.rs
+++ b/arrow-select/src/zip.rs
@@ -29,19 +29,39 @@ use arrow_schema::ArrowError;
 /// * `falsy` - Values of this array are taken if mask evaluates `false`
 pub fn zip(
     mask: &BooleanArray,
-    truthy: &dyn Array,
-    falsy: &dyn Array,
+    truthy: &dyn Datum,
+    falsy: &dyn Datum,
 ) -> Result<ArrayRef, ArrowError> {
+    let (truthy, truthy_is_scalar) = truthy.get();
+    let (falsy, falsy_is_scalar) = falsy.get();
+
     if truthy.data_type() != falsy.data_type() {
         return Err(ArrowError::InvalidArgumentError(
             "arguments need to have the same data type".into(),
         ));
     }
-    if truthy.len() != falsy.len() || falsy.len() != mask.len() {
+
+    if truthy_is_scalar && truthy.len() != 1 {
+        return Err(ArrowError::InvalidArgumentError(
+            "scalar arrays must have 1 element".into(),
+        ));
+    }
+    if !truthy_is_scalar && truthy.len() != mask.len() {
+        return Err(ArrowError::InvalidArgumentError(
+            "all arrays should have the same length".into(),
+        ));
+    }
+    if truthy_is_scalar && truthy.len() != 1 {
+        return Err(ArrowError::InvalidArgumentError(
+            "scalar arrays must have 1 element".into(),
+        ));
+    }
+    if !falsy_is_scalar && falsy.len() != mask.len() {
         return Err(ArrowError::InvalidArgumentError(
             "all arrays should have the same length".into(),
         ));
     }
+
     let falsy = falsy.to_data();
     let truthy = truthy.to_data();
 
@@ -56,15 +76,36 @@ pub fn zip(
     SlicesIterator::new(mask).for_each(|(start, end)| {
         // the gap needs to be filled with falsy values
         if start > filled {
-            mutable.extend(1, filled, start);
+            if falsy_is_scalar {
+                for _ in filled..start {
+                    // Copy the first item from the 'falsy' array into the output buffer.
+                    mutable.extend(1, 0, 1);
+                }
+            } else {
+                mutable.extend(1, filled, start);
+            }
         }
         // fill with truthy values
-        mutable.extend(0, start, end);
+        if truthy_is_scalar {
+            for _ in start..end {
+                // Copy the first item from the 'truthy' array into the output buffer.
+                mutable.extend(0, 0, 1);
+            }
+        } else {
+            mutable.extend(0, start, end);
+        }
         filled = end;
     });
     // the remaining part is falsy
-    if filled < truthy.len() {
-        mutable.extend(1, filled, truthy.len());
+    if filled < mask.len() {
+        if falsy_is_scalar {
+            for _ in filled..mask.len() {
+                // Copy the first item from the 'falsy' array into the output buffer.
+                mutable.extend(1, 0, 1);
+            }
+        } else {
+            mutable.extend(1, filled, mask.len());
+        }
     }
 
     let data = mutable.freeze();
@@ -76,7 +117,7 @@ mod test {
     use super::*;
 
     #[test]
-    fn test_zip_kernel() {
+    fn test_zip_kernel_one() {
         let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
         let b = Int32Array::from(vec![None, Some(3), Some(6), Some(7), Some(3)]);
         let mask = BooleanArray::from(vec![true, true, false, false, true]);
@@ -85,4 +126,103 @@ mod test {
         let expected = Int32Array::from(vec![Some(5), None, Some(6), Some(7), Some(1)]);
         assert_eq!(actual, &expected);
     }
+
+    #[test]
+    fn test_zip_kernel_two() {
+        let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
+        let b = Int32Array::from(vec![None, Some(3), Some(6), Some(7), Some(3)]);
+        let mask = BooleanArray::from(vec![false, false, true, true, false]);
+        let out = zip(&mask, &a, &b).unwrap();
+        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
+        let expected = Int32Array::from(vec![None, Some(3), Some(7), None, Some(3)]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_falsy_1() {
+        let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
+
+        let fallback = Scalar::new(Int32Array::from_value(42, 1));
+
+        let mask = BooleanArray::from(vec![true, true, false, false, true]);
+        let out = zip(&mask, &a, &fallback).unwrap();
+        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
+        let expected = Int32Array::from(vec![Some(5), None, Some(42), Some(42), Some(1)]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_falsy_2() {
+        let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
+
+        let fallback = Scalar::new(Int32Array::from_value(42, 1));
+
+        let mask = BooleanArray::from(vec![false, false, true, true, false]);
+        let out = zip(&mask, &a, &fallback).unwrap();
+        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
+        let expected = Int32Array::from(vec![Some(42), Some(42), Some(7), None, Some(42)]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_truthy_1() {
+        let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
+
+        let fallback = Scalar::new(Int32Array::from_value(42, 1));
+
+        let mask = BooleanArray::from(vec![true, true, false, false, true]);
+        let out = zip(&mask, &fallback, &a).unwrap();
+        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
+        let expected = Int32Array::from(vec![Some(42), Some(42), Some(7), None, Some(42)]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_truthy_2() {
+        let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
+
+        let fallback = Scalar::new(Int32Array::from_value(42, 1));
+
+        let mask = BooleanArray::from(vec![false, false, true, true, false]);
+        let out = zip(&mask, &fallback, &a).unwrap();
+        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
+        let expected = Int32Array::from(vec![Some(5), None, Some(42), Some(42), Some(1)]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_both() {
+        let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
+        let scalar_falsy = Scalar::new(Int32Array::from_value(123, 1));
+
+        let mask = BooleanArray::from(vec![true, true, false, false, true]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
+        let expected = Int32Array::from(vec![Some(42), Some(42), Some(123), Some(123), Some(42)]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_none_1() {
+        let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
+        let scalar_falsy = Scalar::new(Int32Array::new_null(1));
+
+        let mask = BooleanArray::from(vec![true, true, false, false, true]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
+        let expected = Int32Array::from(vec![Some(42), Some(42), None, None, Some(42)]);
+        assert_eq!(actual, &expected);
+    }
+
+    #[test]
+    fn test_zip_kernel_scalar_none_2() {
+        let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
+        let scalar_falsy = Scalar::new(Int32Array::new_null(1));
+
+        let mask = BooleanArray::from(vec![false, false, true, true, false]);
+        let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
+        let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
+        let expected = Int32Array::from(vec![None, None, Some(42), Some(42), None]);
+        assert_eq!(actual, &expected);
+    }
 }
diff --git a/arrow-string/Cargo.toml b/arrow-string/Cargo.toml
index 0f88ffbac923..1ae7af8bdf41 100644
--- a/arrow-string/Cargo.toml
+++ b/arrow-string/Cargo.toml
@@ -40,11 +40,5 @@ arrow-schema = { workspace = true }
 arrow-array = { workspace = true }
 arrow-select = { workspace = true }
 regex = { version = "1.7.0", default-features = false, features = ["std", "unicode", "perf"] }
-regex-syntax = { version = "0.7.1", default-features = false, features = ["unicode"] }
+regex-syntax = { version = "0.8.0", default-features = false, features = ["unicode"] }
 num = { version = "0.4", default-features = false, features = ["std"] }
-
-[package.metadata.docs.rs]
-all-features = true
-
-[features]
-dyn_cmp_dict = []
diff --git a/arrow-string/src/concat_elements.rs b/arrow-string/src/concat_elements.rs
index a6e02d04dd3f..66ecd34868a5 100644
--- a/arrow-string/src/concat_elements.rs
+++ b/arrow-string/src/concat_elements.rs
@@ -54,11 +54,8 @@ pub fn concat_elements_bytes<T: ByteArrayType>(
     let mut output_offsets = BufferBuilder::<T::Offset>::new(left_offsets.len());
     output_offsets.append(T::Offset::usize_as(0));
     for (left_idx, right_idx) in left_offsets.windows(2).zip(right_offsets.windows(2)) {
-        output_values
-            .append_slice(&left_values[left_idx[0].as_usize()..left_idx[1].as_usize()]);
-        output_values.append_slice(
-            &right_values[right_idx[0].as_usize()..right_idx[1].as_usize()],
-        );
+        output_values.append_slice(&left_values[left_idx[0].as_usize()..left_idx[1].as_usize()]);
+        output_values.append_slice(&right_values[right_idx[0].as_usize()..right_idx[1].as_usize()]);
         output_offsets.append(T::Offset::from_usize(output_values.len()).unwrap());
     }
 
@@ -170,10 +167,7 @@ pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
     Ok(unsafe { builder.build_unchecked() }.into())
 }
 
-pub fn concat_elements_dyn(
-    left: &dyn Array,
-    right: &dyn Array,
-) -> Result<ArrayRef, ArrowError> {
+pub fn concat_elements_dyn(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef, ArrowError> {
     if left.data_type() != right.data_type() {
         return Err(ArrowError::ComputeError(format!(
             "Cannot concat arrays of different types: {} != {}",
@@ -392,8 +386,7 @@ mod tests {
 
         // test for LargeBinaryArray
         let left = LargeBinaryArray::from_opt_vec(vec![Some(b"foo"), Some(b"bar"), None]);
-        let right =
-            LargeBinaryArray::from_opt_vec(vec![None, Some(b"yyy"), Some(b"zzz")]);
+        let right = LargeBinaryArray::from_opt_vec(vec![None, Some(b"yyy"), Some(b"zzz")]);
         let output: LargeBinaryArray = concat_elements_dyn(&left, &right)
             .unwrap()
             .into_data()
@@ -410,8 +403,7 @@ mod tests {
         let output = concat_elements_dyn(&left, &right);
         assert_eq!(
             output.unwrap_err().to_string(),
-            "Compute error: Cannot concat arrays of different types: Utf8 != LargeUtf8"
-                .to_string()
+            "Compute error: Cannot concat arrays of different types: Utf8 != LargeUtf8".to_string()
         );
     }
 }
diff --git a/arrow-string/src/length.rs b/arrow-string/src/length.rs
index ab5fbb0c6425..1dd5933ce0e5 100644
--- a/arrow-string/src/length.rs
+++ b/arrow-string/src/length.rs
@@ -173,9 +173,7 @@ mod tests {
     macro_rules! length_list_helper {
         ($offset_ty: ty, $result_ty: ty, $element_ty: ty, $value: expr, $expected: expr) => {{
             let array =
-                GenericListArray::<$offset_ty>::from_iter_primitive::<$element_ty, _, _>(
-                    $value,
-                );
+                GenericListArray::<$offset_ty>::from_iter_primitive::<$element_ty, _, _>($value);
             let result = length(&array).unwrap();
             let result = result.as_any().downcast_ref::<$result_ty>().unwrap();
             let expected: $result_ty = $expected.into();
@@ -356,8 +354,7 @@ mod tests {
 
     #[test]
     fn length_offsets_binary() {
-        let value: Vec<Option<&[u8]>> =
-            vec![Some(b"hello"), Some(b" "), Some(&[0xff, 0xf8]), None];
+        let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(b" "), Some(&[0xff, 0xf8]), None];
         let a = BinaryArray::from(value);
         let b = a.slice(1, 3);
         let result = length(&b).unwrap();
@@ -506,8 +503,7 @@ mod tests {
 
     #[test]
     fn bit_length_offsets_binary() {
-        let value: Vec<Option<&[u8]>> =
-            vec![Some(b"hello"), Some(&[]), Some(b"world"), None];
+        let value: Vec<Option<&[u8]>> = vec![Some(b"hello"), Some(&[]), Some(b"world"), None];
         let a = BinaryArray::from(value);
         let b = a.slice(1, 3);
         let result = bit_length(&b).unwrap();
@@ -621,10 +617,8 @@ mod tests {
             .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8]))
             .build()
             .unwrap();
-        let list_data_type = DataType::FixedSizeList(
-            Arc::new(Field::new("item", DataType::Int32, false)),
-            3,
-        );
+        let list_data_type =
+            DataType::FixedSizeList(Arc::new(Field::new("item", DataType::Int32, false)), 3);
         let nulls = NullBuffer::from(vec![true, false, true]);
         let list_data = ArrayData::builder(list_data_type)
             .len(3)
diff --git a/arrow-string/src/like.rs b/arrow-string/src/like.rs
index 412f1e6cc89a..6f6dfe03133d 100644
--- a/arrow-string/src/like.rs
+++ b/arrow-string/src/like.rs
@@ -92,18 +92,12 @@ pub fn nilike(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, Arrow
 }
 
 /// Perform SQL `STARTSWITH(left, right)`
-pub fn starts_with(
-    left: &dyn Datum,
-    right: &dyn Datum,
-) -> Result<BooleanArray, ArrowError> {
+pub fn starts_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
     like_op(Op::StartsWith, left, right)
 }
 
 /// Perform SQL `ENDSWITH(left, right)`
-pub fn ends_with(
-    left: &dyn Datum,
-    right: &dyn Datum,
-) -> Result<BooleanArray, ArrowError> {
+pub fn ends_with(left: &dyn Datum, right: &dyn Datum) -> Result<BooleanArray, ArrowError> {
     like_op(Op::EndsWith, left, right)
 }
 
@@ -132,9 +126,7 @@ fn like_op(op: Op, lhs: &dyn Datum, rhs: &dyn Datum) -> Result<BooleanArray, Arr
     let r = r_v.map(|x| x.values().as_ref()).unwrap_or(r);
 
     match (l.data_type(), r.data_type()) {
-        (Utf8, Utf8) => {
-            apply::<i32>(op, l.as_string(), l_s, l_v, r.as_string(), r_s, r_v)
-        }
+        (Utf8, Utf8) => apply::<i32>(op, l.as_string(), l_s, l_v, r.as_string(), r_s, r_v),
         (LargeUtf8, LargeUtf8) => {
             apply::<i64>(op, l.as_string(), l_s, l_v, r.as_string(), r_s, r_v)
         }
@@ -155,20 +147,15 @@ fn apply<O: OffsetSizeTrait>(
 ) -> Result<BooleanArray, ArrowError> {
     let l_len = l_v.map(|l| l.len()).unwrap_or(l.len());
     if r_s {
-        let scalar = match r_v {
-            Some(dict) => match dict.nulls().filter(|n| n.null_count() != 0) {
-                Some(_) => return Ok(BooleanArray::new_null(l_len)),
-                None => {
-                    let idx = dict.normalized_keys()[0];
-                    if r.is_null(idx) {
-                        return Ok(BooleanArray::new_null(l_len));
-                    }
-                    r.value(idx)
-                }
-            },
-            None => r.value(0),
+        let idx = match r_v {
+            Some(dict) if dict.null_count() != 0 => return Ok(BooleanArray::new_null(l_len)),
+            Some(dict) => dict.normalized_keys()[0],
+            None => 0,
         };
-        op_scalar(op, l, l_v, scalar)
+        if r.is_null(idx) {
+            return Ok(BooleanArray::new_null(l_len));
+        }
+        op_scalar(op, l, l_v, r.value(idx))
     } else {
         match (l_s, l_v, r_v) {
             (true, None, None) => {
@@ -315,10 +302,7 @@ macro_rules! legacy_kernels {
 
         #[doc(hidden)]
         #[deprecated(note = $deprecation)]
-        pub fn $fn_scalar_dyn(
-            left: &dyn Array,
-            right: &str,
-        ) -> Result<BooleanArray, ArrowError> {
+        pub fn $fn_scalar_dyn(left: &dyn Array, right: &str) -> Result<BooleanArray, ArrowError> {
             let scalar = make_scalar(left.data_type(), right)?;
             $fn_datum(&left, &Scalar::new(&scalar))
         }
@@ -405,7 +389,6 @@ mod tests {
     macro_rules! test_dict_utf8 {
         ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
             #[test]
-            #[cfg(feature = "dyn_cmp_dict")]
             fn $test_name() {
                 let expected = BooleanArray::from($expected);
                 let left: DictionaryArray<Int8Type> = $left.into_iter().collect();
@@ -758,9 +741,7 @@ mod tests {
     test_utf8_scalar!(
         test_utf8_array_ilike_unicode,
         test_utf8_array_ilike_unicode_dyn,
-        vec![
-            "FFkoß", "FFkoSS", "FFkoss", "FFkoS", "FFkos", "ﬀkoSS", "ﬀkoß", "FFKoSS"
-        ],
+        vec!["FFkoß", "FFkoSS", "FFkoss", "FFkoS", "FFkos", "ﬀkoSS", "ﬀkoß", "FFKoSS"],
         "FFkoSS",
         ilike_utf8_scalar,
         ilike_utf8_scalar_dyn,
@@ -1540,4 +1521,35 @@ mod tests {
             ]),
         );
     }
+
+    #[test]
+    fn like_scalar_null() {
+        let a = StringArray::new_scalar("a");
+        let b = Scalar::new(StringArray::new_null(1));
+        let r = like(&a, &b).unwrap();
+        assert_eq!(r.len(), 1);
+        assert_eq!(r.null_count(), 1);
+        assert!(r.is_null(0));
+
+        let a = StringArray::from_iter_values(["a"]);
+        let b = Scalar::new(StringArray::new_null(1));
+        let r = like(&a, &b).unwrap();
+        assert_eq!(r.len(), 1);
+        assert_eq!(r.null_count(), 1);
+        assert!(r.is_null(0));
+
+        let a = StringArray::from_iter_values(["a"]);
+        let b = StringArray::new_null(1);
+        let r = like(&a, &b).unwrap();
+        assert_eq!(r.len(), 1);
+        assert_eq!(r.null_count(), 1);
+        assert!(r.is_null(0));
+
+        let a = StringArray::new_scalar("a");
+        let b = StringArray::new_null(1);
+        let r = like(&a, &b).unwrap();
+        assert_eq!(r.len(), 1);
+        assert_eq!(r.null_count(), 1);
+        assert!(r.is_null(0));
+    }
 }
diff --git a/arrow-string/src/predicate.rs b/arrow-string/src/predicate.rs
index 162e3c75027d..fe288f9de808 100644
--- a/arrow-string/src/predicate.rs
+++ b/arrow-string/src/predicate.rs
@@ -69,8 +69,7 @@ impl<'a> Predicate<'a> {
                 && !pattern[..pattern.len() - 1].contains(is_like_pattern)
             {
                 return Ok(Self::IStartsWithAscii(&pattern[..pattern.len() - 1]));
-            } else if pattern.starts_with('%') && !pattern[1..].contains(is_like_pattern)
-            {
+            } else if pattern.starts_with('%') && !pattern[1..].contains(is_like_pattern) {
                 return Ok(Self::IEndsWithAscii(&pattern[1..]));
             }
         }
@@ -110,17 +109,15 @@ impl<'a> Predicate<'a> {
             Predicate::Contains(v) => {
                 BooleanArray::from_unary(array, |haystack| haystack.contains(v) != negate)
             }
-            Predicate::StartsWith(v) => BooleanArray::from_unary(array, |haystack| {
-                haystack.starts_with(v) != negate
-            }),
-            Predicate::IStartsWithAscii(v) => {
-                BooleanArray::from_unary(array, |haystack| {
-                    starts_with_ignore_ascii_case(haystack, v) != negate
-                })
+            Predicate::StartsWith(v) => {
+                BooleanArray::from_unary(array, |haystack| haystack.starts_with(v) != negate)
             }
-            Predicate::EndsWith(v) => BooleanArray::from_unary(array, |haystack| {
-                haystack.ends_with(v) != negate
+            Predicate::IStartsWithAscii(v) => BooleanArray::from_unary(array, |haystack| {
+                starts_with_ignore_ascii_case(haystack, v) != negate
             }),
+            Predicate::EndsWith(v) => {
+                BooleanArray::from_unary(array, |haystack| haystack.ends_with(v) != negate)
+            }
             Predicate::IEndsWithAscii(v) => BooleanArray::from_unary(array, |haystack| {
                 ends_with_ignore_ascii_case(haystack, v) != negate
             }),
diff --git a/arrow-string/src/regexp.rs b/arrow-string/src/regexp.rs
index af4d66f97fd0..34bb1b0b4c41 100644
--- a/arrow-string/src/regexp.rs
+++ b/arrow-string/src/regexp.rs
@@ -41,8 +41,7 @@ pub fn regexp_is_match_utf8<OffsetSize: OffsetSizeTrait>(
 ) -> Result<BooleanArray, ArrowError> {
     if array.len() != regex_array.len() {
         return Err(ArrowError::ComputeError(
-            "Cannot perform comparison operation on arrays of different length"
-                .to_string(),
+            "Cannot perform comparison operation on arrays of different length".to_string(),
         ));
     }
     let nulls = NullBuffer::union(array.nulls(), regex_array.nulls());
@@ -51,14 +50,17 @@ pub fn regexp_is_match_utf8<OffsetSize: OffsetSizeTrait>(
     let mut result = BooleanBufferBuilder::new(array.len());
 
     let complete_pattern = match flags_array {
-        Some(flags) => Box::new(regex_array.iter().zip(flags.iter()).map(
-            |(pattern, flags)| {
-                pattern.map(|pattern| match flags {
-                    Some(flag) => format!("(?{flag}){pattern}"),
-                    None => pattern.to_string(),
-                })
-            },
-        )) as Box<dyn Iterator<Item = Option<String>>>,
+        Some(flags) => Box::new(
+            regex_array
+                .iter()
+                .zip(flags.iter())
+                .map(|(pattern, flags)| {
+                    pattern.map(|pattern| match flags {
+                        Some(flag) => format!("(?{flag}){pattern}"),
+                        None => pattern.to_string(),
+                    })
+                }),
+        ) as Box<dyn Iterator<Item = Option<String>>>,
         None => Box::new(
             regex_array
                 .iter()
@@ -178,19 +180,21 @@ pub fn regexp_match<OffsetSize: OffsetSizeTrait>(
     flags_array: Option<&GenericStringArray<OffsetSize>>,
 ) -> Result<ArrayRef, ArrowError> {
     let mut patterns: HashMap<String, Regex> = HashMap::new();
-    let builder: GenericStringBuilder<OffsetSize> =
-        GenericStringBuilder::with_capacity(0, 0);
+    let builder: GenericStringBuilder<OffsetSize> = GenericStringBuilder::with_capacity(0, 0);
     let mut list_builder = ListBuilder::new(builder);
 
     let complete_pattern = match flags_array {
-        Some(flags) => Box::new(regex_array.iter().zip(flags.iter()).map(
-            |(pattern, flags)| {
-                pattern.map(|pattern| match flags {
-                    Some(value) => format!("(?{value}){pattern}"),
-                    None => pattern.to_string(),
-                })
-            },
-        )) as Box<dyn Iterator<Item = Option<String>>>,
+        Some(flags) => Box::new(
+            regex_array
+                .iter()
+                .zip(flags.iter())
+                .map(|(pattern, flags)| {
+                    pattern.map(|pattern| match flags {
+                        Some(value) => format!("(?{value}){pattern}"),
+                        None => pattern.to_string(),
+                    })
+                }),
+        ) as Box<dyn Iterator<Item = Option<String>>>,
         None => Box::new(
             regex_array
                 .iter()
@@ -290,8 +294,7 @@ mod tests {
         let pattern = StringArray::from(vec![r"x.*-(\d*)-.*"; 4]);
         let flags = StringArray::from(vec!["i"; 4]);
         let actual = regexp_match(&array, &pattern, Some(&flags)).unwrap();
-        let elem_builder: GenericStringBuilder<i32> =
-            GenericStringBuilder::with_capacity(0, 0);
+        let elem_builder: GenericStringBuilder<i32> = GenericStringBuilder::with_capacity(0, 0);
         let mut expected_builder = ListBuilder::new(elem_builder);
         expected_builder.append(false);
         expected_builder.values().append_value("7");
diff --git a/arrow-string/src/substring.rs b/arrow-string/src/substring.rs
index 1075d106911e..f5fe811032fb 100644
--- a/arrow-string/src/substring.rs
+++ b/arrow-string/src/substring.rs
@@ -225,11 +225,7 @@ pub fn substring_by_char<OffsetSize: OffsetSizeTrait>(
 /// * `length` - the char length of the substring
 ///
 /// Return the `start` and `end` offset (by byte) of the substring
-fn get_start_end_offset(
-    val: &str,
-    start: usize,
-    length: Option<usize>,
-) -> (usize, usize) {
+fn get_start_end_offset(val: &str, start: usize, length: Option<usize>) -> (usize, usize) {
     let len = val.len();
     let mut offset_char_iter = val.char_indices();
     let start_offset = offset_char_iter
@@ -279,8 +275,7 @@ where
     };
 
     // start and end offsets of all substrings
-    let mut new_starts_ends: Vec<(T::Offset, T::Offset)> =
-        Vec::with_capacity(array.len());
+    let mut new_starts_ends: Vec<(T::Offset, T::Offset)> = Vec::with_capacity(array.len());
     let mut new_offsets: Vec<T::Offset> = Vec::with_capacity(array.len() + 1);
     let mut len_so_far = zero;
     new_offsets.push(zero);
@@ -347,8 +342,7 @@ fn fixed_size_binary_substring(
 
     // build value buffer
     let num_of_elements = array.len();
-    let values = array.value_data();
-    let data = values.as_slice();
+    let data = array.value_data();
     let mut new_values = MutableBuffer::new(num_of_elements * (new_len as usize));
     (0..num_of_elements)
         .map(|idx| {
@@ -660,8 +654,7 @@ mod tests {
     fn with_nulls_generic_string<O: OffsetSizeTrait>() {
         let input = vec![Some("hello"), None, Some("word")];
         // all-nulls array is always identical
-        let base_case =
-            gen_test_cases!(vec![None, None, None], (0, None, vec![None, None, None]));
+        let base_case = gen_test_cases!(vec![None, None, None], (0, None, vec![None, None, None]));
         let cases = gen_test_cases!(
             input,
             // identity
@@ -782,8 +775,7 @@ mod tests {
     fn with_nulls_generic_string_by_char<O: OffsetSizeTrait>() {
         let input = vec![Some("hello"), None, Some("Γ ⊢x:T")];
         // all-nulls array is always identical
-        let base_case =
-            gen_test_cases!(vec![None, None, None], (0, None, vec![None, None, None]));
+        let base_case = gen_test_cases!(vec![None, None, None], (0, None, vec![None, None, None]));
         let cases = gen_test_cases!(
             input,
             // identity
@@ -865,10 +857,8 @@ mod tests {
         let values = "S→T = Πx:S.T";
         let offsets = &[
             O::zero(),
-            O::from_usize(values.char_indices().nth(3).map(|(pos, _)| pos).unwrap())
-                .unwrap(),
-            O::from_usize(values.char_indices().nth(6).map(|(pos, _)| pos).unwrap())
-                .unwrap(),
+            O::from_usize(values.char_indices().nth(3).map(|(pos, _)| pos).unwrap()).unwrap(),
+            O::from_usize(values.char_indices().nth(6).map(|(pos, _)| pos).unwrap()).unwrap(),
             O::from_usize(values.len()).unwrap(),
         ];
         // set the first and third element to be valid
@@ -929,8 +919,7 @@ mod tests {
 
         let dict_array: DictionaryArray<K> = data.clone().into_iter().collect();
 
-        let expected: Vec<Option<&str>> =
-            data.iter().map(|opt| opt.map(|s| &s[1..3])).collect();
+        let expected: Vec<Option<&str>> = data.iter().map(|opt| opt.map(|s| &s[1..3])).collect();
 
         let res = substring(&dict_array, 1, Some(2)).unwrap();
         let actual = res.as_any().downcast_ref::<DictionaryArray<K>>().unwrap();
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 9456dd4b012c..37f03a05b3fa 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -60,10 +60,10 @@ arrow-select = { workspace = true }
 arrow-string = { workspace = true }
 
 rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true }
-pyo3 = { version = "0.19", default-features = false, optional = true }
+pyo3 = { version = "0.20", default-features = false, optional = true }
 
 [package.metadata.docs.rs]
-features = ["prettyprint", "ipc_compression", "dyn_cmp_dict", "ffi", "pyarrow"]
+features = ["prettyprint", "ipc_compression", "ffi", "pyarrow"]
 
 [features]
 default = ["csv", "ipc", "json"]
@@ -85,13 +85,10 @@ pyarrow = ["pyo3", "ffi"]
 force_validate = ["arrow-data/force_validate"]
 # Enable ffi support
 ffi = ["arrow-schema/ffi", "arrow-data/ffi"]
-# Enable dyn-comparison of dictionary arrays with other arrays
-# Note: this does not impact comparison against scalars
-dyn_cmp_dict = ["arrow-string/dyn_cmp_dict"]
 chrono-tz = ["arrow-array/chrono-tz"]
 
 [dev-dependencies]
-chrono = { version = "0.4.23", default-features = false, features = ["clock"] }
+chrono = { workspace = true }
 criterion = { version = "0.5", default-features = false }
 half = { version = "2.1", default-features = false }
 rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
diff --git a/arrow/README.md b/arrow/README.md
index fb2119e3bc15..6a91bc951cc1 100644
--- a/arrow/README.md
+++ b/arrow/README.md
@@ -54,7 +54,6 @@ The `arrow` crate provides the following features which may be enabled in your `
 - `chrono-tz` - support of parsing timezone using [chrono-tz](https://docs.rs/chrono-tz/0.6.0/chrono_tz/)
 - `ffi` - bindings for the Arrow C [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html)
 - `pyarrow` - bindings for pyo3 to call arrow-rs from python
-- `dyn_cmp_dict` - enables comparison of dictionary arrays within dyn comparison kernels
 
 ## Arrow Feature Status
 
diff --git a/arrow/benches/aggregate_kernels.rs b/arrow/benches/aggregate_kernels.rs
index c7b09f70f70e..1e7b9f894f2a 100644
--- a/arrow/benches/aggregate_kernels.rs
+++ b/arrow/benches/aggregate_kernels.rs
@@ -17,50 +17,55 @@
 
 #[macro_use]
 extern crate criterion;
-use criterion::Criterion;
+use criterion::{Criterion, Throughput};
+use rand::distributions::{Distribution, Standard};
 
 extern crate arrow;
 
 use arrow::compute::kernels::aggregate::*;
 use arrow::util::bench_util::*;
 use arrow::{array::*, datatypes::Float32Type};
+use arrow_array::types::{Float64Type, Int16Type, Int32Type, Int64Type, Int8Type};
 
-fn bench_sum(arr_a: &Float32Array) {
-    criterion::black_box(sum(arr_a).unwrap());
-}
-
-fn bench_min(arr_a: &Float32Array) {
-    criterion::black_box(min(arr_a).unwrap());
-}
-
-fn bench_max(arr_a: &Float32Array) {
-    criterion::black_box(max(arr_a).unwrap());
-}
+const BATCH_SIZE: usize = 64 * 1024;
 
-fn bench_min_string(arr_a: &StringArray) {
-    criterion::black_box(min_string(arr_a).unwrap());
+fn primitive_benchmark<T: ArrowNumericType>(c: &mut Criterion, name: &str)
+where
+    Standard: Distribution<T::Native>,
+{
+    let nonnull_array = create_primitive_array::<T>(BATCH_SIZE, 0.0);
+    let nullable_array = create_primitive_array::<T>(BATCH_SIZE, 0.5);
+    c.benchmark_group(name)
+        .throughput(Throughput::Bytes(
+            (std::mem::size_of::<T::Native>() * BATCH_SIZE) as u64,
+        ))
+        .bench_function("sum nonnull", |b| b.iter(|| sum(&nonnull_array)))
+        .bench_function("min nonnull", |b| b.iter(|| min(&nonnull_array)))
+        .bench_function("max nonnull", |b| b.iter(|| max(&nonnull_array)))
+        .bench_function("sum nullable", |b| b.iter(|| sum(&nullable_array)))
+        .bench_function("min nullable", |b| b.iter(|| min(&nullable_array)))
+        .bench_function("max nullable", |b| b.iter(|| max(&nullable_array)));
 }
 
 fn add_benchmark(c: &mut Criterion) {
-    let arr_a = create_primitive_array::<Float32Type>(512, 0.0);
-
-    c.bench_function("sum 512", |b| b.iter(|| bench_sum(&arr_a)));
-    c.bench_function("min 512", |b| b.iter(|| bench_min(&arr_a)));
-    c.bench_function("max 512", |b| b.iter(|| bench_max(&arr_a)));
-
-    let arr_a = create_primitive_array::<Float32Type>(512, 0.5);
-
-    c.bench_function("sum nulls 512", |b| b.iter(|| bench_sum(&arr_a)));
-    c.bench_function("min nulls 512", |b| b.iter(|| bench_min(&arr_a)));
-    c.bench_function("max nulls 512", |b| b.iter(|| bench_max(&arr_a)));
+    primitive_benchmark::<Float32Type>(c, "float32");
+    primitive_benchmark::<Float64Type>(c, "float64");
 
-    let arr_b = create_string_array::<i32>(512, 0.0);
-    c.bench_function("min string 512", |b| b.iter(|| bench_min_string(&arr_b)));
+    primitive_benchmark::<Int8Type>(c, "int8");
+    primitive_benchmark::<Int16Type>(c, "int16");
+    primitive_benchmark::<Int32Type>(c, "int32");
+    primitive_benchmark::<Int64Type>(c, "int64");
 
-    let arr_b = create_string_array::<i32>(512, 0.5);
-    c.bench_function("min nulls string 512", |b| {
-        b.iter(|| bench_min_string(&arr_b))
-    });
+    {
+        let nonnull_strings = create_string_array::<i32>(BATCH_SIZE, 0.0);
+        let nullable_strings = create_string_array::<i32>(BATCH_SIZE, 0.5);
+        c.benchmark_group("string")
+            .throughput(Throughput::Elements(BATCH_SIZE as u64))
+            .bench_function("min nonnull", |b| b.iter(|| min_string(&nonnull_strings)))
+            .bench_function("max nonnull", |b| b.iter(|| max_string(&nonnull_strings)))
+            .bench_function("min nullable", |b| b.iter(|| min_string(&nullable_strings)))
+            .bench_function("max nullable", |b| b.iter(|| max_string(&nullable_strings)));
+    }
 }
 
 criterion_group!(benches, add_benchmark);
diff --git a/arrow/benches/array_data_validate.rs b/arrow/benches/array_data_validate.rs
index 529205e7e28f..531462f2d8b5 100644
--- a/arrow/benches/array_data_validate.rs
+++ b/arrow/benches/array_data_validate.rs
@@ -53,8 +53,7 @@ fn validate_benchmark(c: &mut Criterion) {
         b.iter(|| validate_utf8_array(&str_arr))
     });
 
-    let byte_array =
-        BinaryArray::from_iter_values(std::iter::repeat(b"test").take(20000));
+    let byte_array = BinaryArray::from_iter_values(std::iter::repeat(b"test").take(20000));
     c.bench_function("byte_array_to_string_array 20000", |b| {
         b.iter(|| StringArray::from(BinaryArray::from(byte_array.to_data())))
     });
diff --git a/arrow/benches/array_from_vec.rs b/arrow/benches/array_from_vec.rs
index 229ac0b87d41..5fce3f113e43 100644
--- a/arrow/benches/array_from_vec.rs
+++ b/arrow/benches/array_from_vec.rs
@@ -70,9 +70,7 @@ fn struct_array_from_vec(
     let strings: ArrayRef = Arc::new(StringArray::from(strings.to_owned()));
     let ints: ArrayRef = Arc::new(Int32Array::from(ints.to_owned()));
 
-    criterion::black_box(
-        StructArray::try_from(vec![(field1, strings), (field2, ints)]).unwrap(),
-    );
+    criterion::black_box(StructArray::try_from(vec![(field1, strings), (field2, ints)]).unwrap());
 }
 
 fn decimal128_array_from_vec(array: &[Option<i128>]) {
diff --git a/arrow/benches/bitwise_kernel.rs b/arrow/benches/bitwise_kernel.rs
index 741eb96125a2..8604ea97eb3c 100644
--- a/arrow/benches/bitwise_kernel.rs
+++ b/arrow/benches/bitwise_kernel.rs
@@ -19,8 +19,8 @@
 extern crate criterion;
 
 use arrow::compute::kernels::bitwise::{
-    bitwise_and, bitwise_and_scalar, bitwise_not, bitwise_or, bitwise_or_scalar,
-    bitwise_xor, bitwise_xor_scalar,
+    bitwise_and, bitwise_and_scalar, bitwise_not, bitwise_or, bitwise_or_scalar, bitwise_xor,
+    bitwise_xor_scalar,
 };
 use arrow::datatypes::Int64Type;
 use criterion::{black_box, Criterion};
@@ -40,9 +40,7 @@ fn bitwise_array_benchmark(c: &mut Criterion) {
     // array and
     let mut group = c.benchmark_group("bench bitwise array: and");
     group.bench_function("bitwise array and, no nulls", |b| {
-        b.iter(|| {
-            black_box(bitwise_and(&left_without_null, &right_without_null).unwrap())
-        })
+        b.iter(|| black_box(bitwise_and(&left_without_null, &right_without_null).unwrap()))
     });
     group.bench_function("bitwise array and, 20% nulls", |b| {
         b.iter(|| black_box(bitwise_and(&left_with_null, &right_with_null).unwrap()))
@@ -60,9 +58,7 @@ fn bitwise_array_benchmark(c: &mut Criterion) {
     // xor
     let mut group = c.benchmark_group("bench bitwise: xor");
     group.bench_function("bitwise array xor, no nulls", |b| {
-        b.iter(|| {
-            black_box(bitwise_xor(&left_without_null, &right_without_null).unwrap())
-        })
+        b.iter(|| black_box(bitwise_xor(&left_without_null, &right_without_null).unwrap()))
     });
     group.bench_function("bitwise array xor, 20% nulls", |b| {
         b.iter(|| black_box(bitwise_xor(&left_with_null, &right_with_null).unwrap()))
diff --git a/arrow/benches/buffer_bit_ops.rs b/arrow/benches/buffer_bit_ops.rs
index 68b22df4b134..ab122ac94165 100644
--- a/arrow/benches/buffer_bit_ops.rs
+++ b/arrow/benches/buffer_bit_ops.rs
@@ -22,9 +22,7 @@ use criterion::{Criterion, Throughput};
 
 extern crate arrow;
 
-use arrow::buffer::{
-    buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer, MutableBuffer,
-};
+use arrow::buffer::{buffer_bin_and, buffer_bin_or, buffer_unary_not, Buffer, MutableBuffer};
 
 ///  Helper function to create arrays
 fn create_buffer(size: usize) -> Buffer {
@@ -82,14 +80,10 @@ fn bit_ops_benchmark(c: &mut Criterion) {
         .bench_function("and", |b| b.iter(|| bench_buffer_and(&left, &right)))
         .bench_function("or", |b| b.iter(|| bench_buffer_or(&left, &right)))
         .bench_function("and_with_offset", |b| {
-            b.iter(|| {
-                bench_buffer_and_with_offsets(&left, 1, &right, 2, left.len() * 8 - 5)
-            })
+            b.iter(|| bench_buffer_and_with_offsets(&left, 1, &right, 2, left.len() * 8 - 5))
         })
         .bench_function("or_with_offset", |b| {
-            b.iter(|| {
-                bench_buffer_or_with_offsets(&left, 1, &right, 2, left.len() * 8 - 5)
-            })
+            b.iter(|| bench_buffer_or_with_offsets(&left, 1, &right, 2, left.len() * 8 - 5))
         });
 
     c.benchmark_group("buffer_unary_ops")
diff --git a/arrow/benches/buffer_create.rs b/arrow/benches/buffer_create.rs
index d628e031ce6f..e7d24c2166d7 100644
--- a/arrow/benches/buffer_create.rs
+++ b/arrow/benches/buffer_create.rs
@@ -49,8 +49,8 @@ fn mutable_buffer_iter_bitset(data: &[Vec<bool>]) -> Vec<Buffer> {
     criterion::black_box({
         data.iter()
             .map(|datum| {
-                let mut result = MutableBuffer::new((data.len() + 7) / 8)
-                    .with_bitset(datum.len(), false);
+                let mut result =
+                    MutableBuffer::new((data.len() + 7) / 8).with_bitset(datum.len(), false);
                 for (i, value) in datum.iter().enumerate() {
                     if *value {
                         unsafe {
@@ -148,10 +148,7 @@ fn benchmark(c: &mut Criterion) {
 
     c.bench_function("mutable iter extend_from_slice", |b| {
         b.iter(|| {
-            mutable_iter_extend_from_slice(
-                criterion::black_box(&data),
-                criterion::black_box(0),
-            )
+            mutable_iter_extend_from_slice(criterion::black_box(&data), criterion::black_box(0))
         })
     });
     c.bench_function("mutable", |b| {
@@ -163,9 +160,7 @@ fn benchmark(c: &mut Criterion) {
     });
 
     c.bench_function("mutable prepared", |b| {
-        b.iter(|| {
-            mutable_buffer(criterion::black_box(&data), criterion::black_box(byte_cap))
-        })
+        b.iter(|| mutable_buffer(criterion::black_box(&data), criterion::black_box(byte_cap)))
     });
 
     c.bench_function("from_slice", |b| {
diff --git a/arrow/benches/builder.rs b/arrow/benches/builder.rs
index 8cb226e89056..87a02e7ad1fd 100644
--- a/arrow/benches/builder.rs
+++ b/arrow/benches/builder.rs
@@ -131,9 +131,8 @@ fn bench_decimal256(c: &mut Criterion) {
             let mut rng = rand::thread_rng();
             let mut decimal_builder = Decimal256Builder::with_capacity(BATCH_SIZE);
             for _ in 0..BATCH_SIZE {
-                decimal_builder.append_value(i256::from_i128(
-                    rng.gen_range::<i128, _>(0..99999999999),
-                ));
+                decimal_builder
+                    .append_value(i256::from_i128(rng.gen_range::<i128, _>(0..99999999999)));
             }
             black_box(
                 decimal_builder
diff --git a/arrow/benches/concatenate_kernel.rs b/arrow/benches/concatenate_kernel.rs
index 3fff2abd179c..2f5b654394e4 100644
--- a/arrow/benches/concatenate_kernel.rs
+++ b/arrow/benches/concatenate_kernel.rs
@@ -60,6 +60,28 @@ fn add_benchmark(c: &mut Criterion) {
     c.bench_function("concat str nulls 1024", |b| {
         b.iter(|| bench_concat(&v1, &v2))
     });
+
+    let v1 = create_string_array_with_len::<i32>(10, 0.0, 20);
+    let v1 = create_dict_from_values::<Int32Type>(1024, 0.0, &v1);
+    let v2 = create_string_array_with_len::<i32>(10, 0.0, 20);
+    let v2 = create_dict_from_values::<Int32Type>(1024, 0.0, &v2);
+    c.bench_function("concat str_dict 1024", |b| {
+        b.iter(|| bench_concat(&v1, &v2))
+    });
+
+    let v1 = create_string_array_with_len::<i32>(1024, 0.0, 20);
+    let v1 = create_sparse_dict_from_values::<Int32Type>(1024, 0.0, &v1, 10..20);
+    let v2 = create_string_array_with_len::<i32>(1024, 0.0, 20);
+    let v2 = create_sparse_dict_from_values::<Int32Type>(1024, 0.0, &v2, 30..40);
+    c.bench_function("concat str_dict_sparse 1024", |b| {
+        b.iter(|| bench_concat(&v1, &v2))
+    });
+
+    let v1 = create_string_array::<i32>(1024, 0.5);
+    let v2 = create_string_array::<i32>(1024, 0.5);
+    c.bench_function("concat str nulls 1024", |b| {
+        b.iter(|| bench_concat(&v1, &v2))
+    });
 }
 
 criterion_group!(benches, add_benchmark);
diff --git a/arrow/benches/csv_reader.rs b/arrow/benches/csv_reader.rs
index c2491a5a0b04..b5afac1f6a46 100644
--- a/arrow/benches/csv_reader.rs
+++ b/arrow/benches/csv_reader.rs
@@ -18,15 +18,18 @@
 extern crate arrow;
 extern crate criterion;
 
+use std::io::Cursor;
+use std::sync::Arc;
+
 use criterion::*;
+use rand::Rng;
 
 use arrow::array::*;
 use arrow::csv;
 use arrow::datatypes::*;
 use arrow::record_batch::RecordBatch;
 use arrow::util::bench_util::{create_primitive_array, create_string_array_with_len};
-use std::io::Cursor;
-use std::sync::Arc;
+use arrow::util::test_util::seedable_rng;
 
 fn do_bench(c: &mut Criterion, name: &str, cols: Vec<ArrayRef>) {
     let batch = RecordBatch::try_from_iter(cols.into_iter().map(|a| ("col", a))).unwrap();
@@ -42,7 +45,7 @@ fn do_bench(c: &mut Criterion, name: &str, cols: Vec<ArrayRef>) {
                 let cursor = Cursor::new(buf.as_slice());
                 let reader = csv::ReaderBuilder::new(batch.schema())
                     .with_batch_size(batch_size)
-                    .has_header(true)
+                    .with_header(true)
                     .build_buffered(cursor)
                     .unwrap();
 
@@ -55,34 +58,60 @@ fn do_bench(c: &mut Criterion, name: &str, cols: Vec<ArrayRef>) {
 }
 
 fn criterion_benchmark(c: &mut Criterion) {
-    let cols = vec![Arc::new(create_primitive_array::<UInt64Type>(4096, 0.)) as ArrayRef];
+    let mut rng = seedable_rng();
+
+    let values = Int32Array::from_iter_values((0..4096).map(|_| rng.gen_range(0..1024)));
+    let cols = vec![Arc::new(values) as ArrayRef];
+    do_bench(c, "4096 i32_small(0)", cols);
+
+    let values = Int32Array::from_iter_values((0..4096).map(|_| rng.gen()));
+    let cols = vec![Arc::new(values) as ArrayRef];
+    do_bench(c, "4096 i32(0)", cols);
+
+    let values = UInt64Array::from_iter_values((0..4096).map(|_| rng.gen_range(0..1024)));
+    let cols = vec![Arc::new(values) as ArrayRef];
+    do_bench(c, "4096 u64_small(0)", cols);
+
+    let values = UInt64Array::from_iter_values((0..4096).map(|_| rng.gen()));
+    let cols = vec![Arc::new(values) as ArrayRef];
     do_bench(c, "4096 u64(0)", cols);
 
-    let cols = vec![Arc::new(create_primitive_array::<Int64Type>(4096, 0.)) as ArrayRef];
+    let values = Int64Array::from_iter_values((0..4096).map(|_| rng.gen_range(0..1024) - 512));
+    let cols = vec![Arc::new(values) as ArrayRef];
+    do_bench(c, "4096 i64_small(0)", cols);
+
+    let values = Int64Array::from_iter_values((0..4096).map(|_| rng.gen()));
+    let cols = vec![Arc::new(values) as ArrayRef];
     do_bench(c, "4096 i64(0)", cols);
 
-    let cols =
-        vec![Arc::new(create_primitive_array::<Float32Type>(4096, 0.)) as ArrayRef];
+    let cols = vec![Arc::new(Float32Array::from_iter_values(
+        (0..4096).map(|_| rng.gen_range(0..1024000) as f32 / 1000.),
+    )) as _];
+    do_bench(c, "4096 f32_small(0)", cols);
+
+    let values = Float32Array::from_iter_values((0..4096).map(|_| rng.gen()));
+    let cols = vec![Arc::new(values) as ArrayRef];
     do_bench(c, "4096 f32(0)", cols);
 
-    let cols =
-        vec![Arc::new(create_primitive_array::<Float64Type>(4096, 0.)) as ArrayRef];
+    let cols = vec![Arc::new(Float64Array::from_iter_values(
+        (0..4096).map(|_| rng.gen_range(0..1024000) as f64 / 1000.),
+    )) as _];
+    do_bench(c, "4096 f64_small(0)", cols);
+
+    let values = Float64Array::from_iter_values((0..4096).map(|_| rng.gen()));
+    let cols = vec![Arc::new(values) as ArrayRef];
     do_bench(c, "4096 f64(0)", cols);
 
-    let cols =
-        vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 10)) as ArrayRef];
+    let cols = vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 10)) as ArrayRef];
     do_bench(c, "4096 string(10, 0)", cols);
 
-    let cols =
-        vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 30)) as ArrayRef];
+    let cols = vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 30)) as ArrayRef];
     do_bench(c, "4096 string(30, 0)", cols);
 
-    let cols =
-        vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 100)) as ArrayRef];
+    let cols = vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 100)) as ArrayRef];
     do_bench(c, "4096 string(100, 0)", cols);
 
-    let cols =
-        vec![Arc::new(create_string_array_with_len::<i32>(4096, 0.5, 100)) as ArrayRef];
+    let cols = vec![Arc::new(create_string_array_with_len::<i32>(4096, 0.5, 100)) as ArrayRef];
     do_bench(c, "4096 string(100, 0.5)", cols);
 
     let cols = vec![
diff --git a/arrow/benches/csv_writer.rs b/arrow/benches/csv_writer.rs
index 05c6c226c464..0c13428c9160 100644
--- a/arrow/benches/csv_writer.rs
+++ b/arrow/benches/csv_writer.rs
@@ -41,11 +41,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         "consectetur adipiscing elit",
         "sed do eiusmod tempor",
     ]);
-    let c2 = PrimitiveArray::<Float64Type>::from(vec![
-        Some(123.564532),
-        None,
-        Some(-556132.25),
-    ]);
+    let c2 = PrimitiveArray::<Float64Type>::from(vec![Some(123.564532), None, Some(-556132.25)]);
     let c3 = PrimitiveArray::<UInt32Type>::from(vec![3, 2, 1]);
     let c4 = BooleanArray::from(vec![Some(true), Some(false), None]);
 
diff --git a/arrow/benches/decimal_validate.rs b/arrow/benches/decimal_validate.rs
index a70da1d2cfb7..be812a225ca2 100644
--- a/arrow/benches/decimal_validate.rs
+++ b/arrow/benches/decimal_validate.rs
@@ -18,9 +18,7 @@
 #[macro_use]
 extern crate criterion;
 
-use arrow::array::{
-    Array, Decimal128Array, Decimal128Builder, Decimal256Array, Decimal256Builder,
-};
+use arrow::array::{Array, Decimal128Array, Decimal128Builder, Decimal256Array, Decimal256Builder};
 use criterion::Criterion;
 use rand::Rng;
 
diff --git a/arrow/benches/filter_kernels.rs b/arrow/benches/filter_kernels.rs
index 9dd3e7ebba09..65726a271009 100644
--- a/arrow/benches/filter_kernels.rs
+++ b/arrow/benches/filter_kernels.rs
@@ -210,8 +210,7 @@ fn add_benchmark(c: &mut Criterion) {
     let field = Field::new("c1", data_array.data_type().clone(), true);
     let schema = Schema::new(vec![field]);
 
-    let batch =
-        RecordBatch::try_new(Arc::new(schema), vec![Arc::new(data_array)]).unwrap();
+    let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(data_array)]).unwrap();
 
     c.bench_function("filter single record batch", |b| {
         b.iter(|| filter_record_batch(&batch, &filter_array))
diff --git a/arrow/benches/interleave_kernels.rs b/arrow/benches/interleave_kernels.rs
index 2bb430e40b0f..0941f1e3fd33 100644
--- a/arrow/benches/interleave_kernels.rs
+++ b/arrow/benches/interleave_kernels.rs
@@ -37,14 +37,21 @@ fn do_bench(
     base: &dyn Array,
     slices: &[Range<usize>],
 ) {
-    let mut rng = seedable_rng();
-
     let arrays: Vec<_> = slices
         .iter()
         .map(|r| base.slice(r.start, r.end - r.start))
         .collect();
     let values: Vec<_> = arrays.iter().map(|x| x.as_ref()).collect();
+    bench_values(
+        c,
+        &format!("interleave {prefix} {len} {slices:?}"),
+        len,
+        &values,
+    );
+}
 
+fn bench_values(c: &mut Criterion, name: &str, len: usize, values: &[&dyn Array]) {
+    let mut rng = seedable_rng();
     let indices: Vec<_> = (0..len)
         .map(|_| {
             let array_idx = rng.gen_range(0..values.len());
@@ -53,8 +60,8 @@ fn do_bench(
         })
         .collect();
 
-    c.bench_function(&format!("interleave {prefix} {len} {slices:?}"), |b| {
-        b.iter(|| criterion::black_box(interleave(&values, &indices).unwrap()))
+    c.bench_function(name, |b| {
+        b.iter(|| criterion::black_box(interleave(values, &indices).unwrap()))
     });
 }
 
@@ -63,12 +70,19 @@ fn add_benchmark(c: &mut Criterion) {
     let i32_opt = create_primitive_array::<Int32Type>(1024, 0.5);
     let string = create_string_array_with_len::<i32>(1024, 0., 20);
     let string_opt = create_string_array_with_len::<i32>(1024, 0.5, 20);
+    let values = create_string_array_with_len::<i32>(10, 0.0, 20);
+    let dict = create_dict_from_values::<Int32Type>(1024, 0.0, &values);
+
+    let values = create_string_array_with_len::<i32>(1024, 0.0, 20);
+    let sparse_dict = create_sparse_dict_from_values::<Int32Type>(1024, 0.0, &values, 10..20);
 
     let cases: &[(&str, &dyn Array)] = &[
         ("i32(0.0)", &i32),
         ("i32(0.5)", &i32_opt),
         ("str(20, 0.0)", &string),
         ("str(20, 0.5)", &string_opt),
+        ("dict(20, 0.0)", &dict),
+        ("dict_sparse(20, 0.0)", &sparse_dict),
     ];
 
     for (prefix, base) in cases {
@@ -83,6 +97,15 @@ fn add_benchmark(c: &mut Criterion) {
             do_bench(c, prefix, *len, *base, slice);
         }
     }
+
+    for len in [100, 1024, 2048] {
+        bench_values(
+            c,
+            &format!("interleave dict_distinct {len}"),
+            100,
+            &[&dict, &sparse_dict],
+        );
+    }
 }
 
 criterion_group!(benches, add_benchmark);
diff --git a/arrow/benches/lexsort.rs b/arrow/benches/lexsort.rs
index 30dab9a74667..bd2db1e5022d 100644
--- a/arrow/benches/lexsort.rs
+++ b/arrow/benches/lexsort.rs
@@ -52,12 +52,8 @@ impl std::fmt::Debug for Column {
 impl Column {
     fn generate(self, size: usize) -> ArrayRef {
         match self {
-            Column::RequiredI32 => {
-                Arc::new(create_primitive_array::<Int32Type>(size, 0.))
-            }
-            Column::OptionalI32 => {
-                Arc::new(create_primitive_array::<Int32Type>(size, 0.2))
-            }
+            Column::RequiredI32 => Arc::new(create_primitive_array::<Int32Type>(size, 0.)),
+            Column::OptionalI32 => Arc::new(create_primitive_array::<Int32Type>(size, 0.2)),
             Column::Required16CharString => {
                 Arc::new(create_string_array_with_len::<i32>(size, 0., 16))
             }
@@ -100,7 +96,7 @@ fn do_bench(c: &mut Criterion, columns: &[Column], len: usize) {
                     .iter()
                     .map(|a| SortField::new(a.data_type().clone()))
                     .collect();
-                let mut converter = RowConverter::new(fields).unwrap();
+                let converter = RowConverter::new(fields).unwrap();
                 let rows = converter.convert_columns(&arrays).unwrap();
                 let mut sort: Vec<_> = rows.iter().enumerate().collect();
                 sort.sort_unstable_by(|(_, a), (_, b)| a.cmp(b));
diff --git a/arrow/benches/primitive_run_accessor.rs b/arrow/benches/primitive_run_accessor.rs
index 868c314f9716..10c1e9ff39a9 100644
--- a/arrow/benches/primitive_run_accessor.rs
+++ b/arrow/benches/primitive_run_accessor.rs
@@ -25,16 +25,13 @@ fn criterion_benchmark(c: &mut Criterion) {
 
     let mut do_bench = |physical_array_len: usize, logical_array_len: usize| {
         group.bench_function(
-            format!(
-                "(run_array_len:{logical_array_len}, physical_array_len:{physical_array_len})"),
+            format!("(run_array_len:{logical_array_len}, physical_array_len:{physical_array_len})"),
             |b| {
                 let run_array = create_primitive_run_array::<Int32Type, Int32Type>(
                     logical_array_len,
                     physical_array_len,
                 );
-                let typed = run_array
-                    .downcast::<PrimitiveArray<Int32Type>>()
-                    .unwrap();
+                let typed = run_array.downcast::<PrimitiveArray<Int32Type>>().unwrap();
                 b.iter(|| {
                     for i in 0..logical_array_len {
                         let _ = unsafe { typed.value_unchecked(i) };
diff --git a/arrow/benches/primitive_run_take.rs b/arrow/benches/primitive_run_take.rs
index 82ff35949e79..c10c16bfee3a 100644
--- a/arrow/benches/primitive_run_take.rs
+++ b/arrow/benches/primitive_run_take.rs
@@ -41,9 +41,7 @@ fn create_random_index(size: usize, null_density: f32, max_value: usize) -> UInt
 fn criterion_benchmark(c: &mut Criterion) {
     let mut group = c.benchmark_group("primitive_run_take");
 
-    let mut do_bench = |physical_array_len: usize,
-                        logical_array_len: usize,
-                        take_len: usize| {
+    let mut do_bench = |physical_array_len: usize, logical_array_len: usize, take_len: usize| {
         let run_array = create_primitive_run_array::<Int32Type, Int64Type>(
             logical_array_len,
             physical_array_len,
diff --git a/arrow/benches/row_format.rs b/arrow/benches/row_format.rs
index 12ce71764f7e..cb7455939e0b 100644
--- a/arrow/benches/row_format.rs
+++ b/arrow/benches/row_format.rs
@@ -23,35 +23,28 @@ use arrow::array::ArrayRef;
 use arrow::datatypes::{Int64Type, UInt64Type};
 use arrow::row::{RowConverter, SortField};
 use arrow::util::bench_util::{
-    create_primitive_array, create_string_array_with_len, create_string_dict_array,
+    create_dict_from_values, create_primitive_array, create_string_array_with_len,
+    create_string_dict_array,
 };
 use arrow_array::types::Int32Type;
 use arrow_array::Array;
 use criterion::{black_box, Criterion};
 use std::sync::Arc;
 
-fn do_bench(
-    c: &mut Criterion,
-    name: &str,
-    cols: Vec<ArrayRef>,
-    preserve_dictionaries: bool,
-) {
+fn do_bench(c: &mut Criterion, name: &str, cols: Vec<ArrayRef>) {
     let fields: Vec<_> = cols
         .iter()
-        .map(|x| {
-            SortField::new(x.data_type().clone())
-                .preserve_dictionaries(preserve_dictionaries)
-        })
+        .map(|x| SortField::new(x.data_type().clone()))
         .collect();
 
     c.bench_function(&format!("convert_columns {name}"), |b| {
         b.iter(|| {
-            let mut converter = RowConverter::new(fields.clone()).unwrap();
+            let converter = RowConverter::new(fields.clone()).unwrap();
             black_box(converter.convert_columns(&cols).unwrap())
         });
     });
 
-    let mut converter = RowConverter::new(fields).unwrap();
+    let converter = RowConverter::new(fields).unwrap();
     let rows = converter.convert_columns(&cols).unwrap();
     // using a pre-prepared row converter should be faster than the first time
     c.bench_function(&format!("convert_columns_prepared {name}"), |b| {
@@ -65,46 +58,49 @@ fn do_bench(
 
 fn row_bench(c: &mut Criterion) {
     let cols = vec![Arc::new(create_primitive_array::<UInt64Type>(4096, 0.)) as ArrayRef];
-    do_bench(c, "4096 u64(0)", cols, true);
+    do_bench(c, "4096 u64(0)", cols);
 
     let cols = vec![Arc::new(create_primitive_array::<Int64Type>(4096, 0.)) as ArrayRef];
-    do_bench(c, "4096 i64(0)", cols, true);
+    do_bench(c, "4096 i64(0)", cols);
 
-    let cols =
-        vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 10)) as ArrayRef];
-    do_bench(c, "4096 string(10, 0)", cols, true);
+    let cols = vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 10)) as ArrayRef];
+    do_bench(c, "4096 string(10, 0)", cols);
 
-    let cols =
-        vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 30)) as ArrayRef];
-    do_bench(c, "4096 string(30, 0)", cols, true);
+    let cols = vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 30)) as ArrayRef];
+    do_bench(c, "4096 string(30, 0)", cols);
 
-    let cols =
-        vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 100)) as ArrayRef];
-    do_bench(c, "4096 string(100, 0)", cols, true);
+    let cols = vec![Arc::new(create_string_array_with_len::<i32>(4096, 0., 100)) as ArrayRef];
+    do_bench(c, "4096 string(100, 0)", cols);
 
-    let cols =
-        vec![Arc::new(create_string_array_with_len::<i32>(4096, 0.5, 100)) as ArrayRef];
-    do_bench(c, "4096 string(100, 0.5)", cols, true);
+    let cols = vec![Arc::new(create_string_array_with_len::<i32>(4096, 0.5, 100)) as ArrayRef];
+    do_bench(c, "4096 string(100, 0.5)", cols);
 
-    let cols =
-        vec![Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 10)) as ArrayRef];
-    do_bench(c, "4096 string_dictionary(10, 0)", cols, true);
+    let cols = vec![Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 10)) as ArrayRef];
+    do_bench(c, "4096 string_dictionary(10, 0)", cols);
 
-    let cols =
-        vec![Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 30)) as ArrayRef];
-    do_bench(c, "4096 string_dictionary(30, 0)", cols, true);
+    let cols = vec![Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 30)) as ArrayRef];
+    do_bench(c, "4096 string_dictionary(30, 0)", cols);
 
-    let cols =
-        vec![Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 100)) as ArrayRef];
-    do_bench(c, "4096 string_dictionary(100, 0)", cols.clone(), true);
-    let name = "4096 string_dictionary_non_preserving(100, 0)";
-    do_bench(c, name, cols, false);
+    let cols = vec![Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 100)) as ArrayRef];
+    do_bench(c, "4096 string_dictionary(100, 0)", cols.clone());
 
-    let cols =
-        vec![Arc::new(create_string_dict_array::<Int32Type>(4096, 0.5, 100)) as ArrayRef];
-    do_bench(c, "4096 string_dictionary(100, 0.5)", cols.clone(), true);
-    let name = "4096 string_dictionary_non_preserving(100, 0.5)";
-    do_bench(c, name, cols, false);
+    let cols = vec![Arc::new(create_string_dict_array::<Int32Type>(4096, 0.5, 100)) as ArrayRef];
+    do_bench(c, "4096 string_dictionary(100, 0.5)", cols.clone());
+
+    let values = create_string_array_with_len::<i32>(10, 0., 10);
+    let dict = create_dict_from_values::<Int32Type>(4096, 0., &values);
+    let cols = vec![Arc::new(dict) as ArrayRef];
+    do_bench(c, "4096 string_dictionary_low_cardinality(10, 0)", cols);
+
+    let values = create_string_array_with_len::<i32>(10, 0., 30);
+    let dict = create_dict_from_values::<Int32Type>(4096, 0., &values);
+    let cols = vec![Arc::new(dict) as ArrayRef];
+    do_bench(c, "4096 string_dictionary_low_cardinality(30, 0)", cols);
+
+    let values = create_string_array_with_len::<i32>(10, 0., 100);
+    let dict = create_dict_from_values::<Int32Type>(4096, 0., &values);
+    let cols = vec![Arc::new(dict) as ArrayRef];
+    do_bench(c, "4096 string_dictionary_low_cardinality(100, 0)", cols);
 
     let cols = vec![
         Arc::new(create_string_array_with_len::<i32>(4096, 0.5, 20)) as ArrayRef,
@@ -116,7 +112,6 @@ fn row_bench(c: &mut Criterion) {
         c,
         "4096 string(20, 0.5), string(30, 0), string(100, 0), i64(0)",
         cols,
-        false,
     );
 
     let cols = vec![
@@ -125,7 +120,7 @@ fn row_bench(c: &mut Criterion) {
         Arc::new(create_string_dict_array::<Int32Type>(4096, 0., 100)) as ArrayRef,
         Arc::new(create_primitive_array::<Int64Type>(4096, 0.)) as ArrayRef,
     ];
-    do_bench(c, "4096 4096 string_dictionary(20, 0.5), string_dictionary(30, 0), string_dictionary(100, 0), i64(0)", cols, false);
+    do_bench(c, "4096 4096 string_dictionary(20, 0.5), string_dictionary(30, 0), string_dictionary(100, 0), i64(0)", cols);
 }
 
 criterion_group!(benches, row_bench);
diff --git a/arrow/benches/sort_kernel.rs b/arrow/benches/sort_kernel.rs
index dd55076647a5..b6578d8d8026 100644
--- a/arrow/benches/sort_kernel.rs
+++ b/arrow/benches/sort_kernel.rs
@@ -27,6 +27,7 @@ use arrow::compute::{lexsort, sort, sort_to_indices, SortColumn};
 use arrow::datatypes::{Int16Type, Int32Type};
 use arrow::util::bench_util::*;
 use arrow::{array::*, datatypes::Float32Type};
+use arrow_ord::rank::rank;
 
 fn create_f32_array(size: usize, with_nulls: bool) -> ArrayRef {
     let null_density = if with_nulls { 0.5 } else { 0.0 };
@@ -121,10 +122,8 @@ fn add_benchmark(c: &mut Criterion) {
         b.iter(|| bench_sort_to_indices(&arr, None))
     });
 
-    let run_encoded_array = create_primitive_run_array::<Int16Type, Int32Type>(
-        2usize.pow(12),
-        2usize.pow(10),
-    );
+    let run_encoded_array =
+        create_primitive_run_array::<Int16Type, Int32Type>(2usize.pow(12), 2usize.pow(10));
 
     c.bench_function("sort primitive run 2^12", |b| {
         b.iter(|| bench_sort(&run_encoded_array))
@@ -213,6 +212,26 @@ fn add_benchmark(c: &mut Criterion) {
     c.bench_function("lexsort (f32, f32) nulls 2^12 limit 2^12", |b| {
         b.iter(|| bench_lexsort(&arr_a, &arr_b, Some(2usize.pow(12))))
     });
+
+    let arr = create_f32_array(2usize.pow(12), false);
+    c.bench_function("rank f32 2^12", |b| {
+        b.iter(|| black_box(rank(&arr, None).unwrap()))
+    });
+
+    let arr = create_f32_array(2usize.pow(12), true);
+    c.bench_function("rank f32 nulls 2^12", |b| {
+        b.iter(|| black_box(rank(&arr, None).unwrap()))
+    });
+
+    let arr = create_string_array_with_len::<i32>(2usize.pow(12), 0.0, 10);
+    c.bench_function("rank string[10] 2^12", |b| {
+        b.iter(|| black_box(rank(&arr, None).unwrap()))
+    });
+
+    let arr = create_string_array_with_len::<i32>(2usize.pow(12), 0.5, 10);
+    c.bench_function("rank string[10] nulls 2^12", |b| {
+        b.iter(|| black_box(rank(&arr, None).unwrap()))
+    });
 }
 
 criterion_group!(benches, add_benchmark);
diff --git a/arrow/benches/string_run_builder.rs b/arrow/benches/string_run_builder.rs
index dda0f35b801f..b4457b74dada 100644
--- a/arrow/benches/string_run_builder.rs
+++ b/arrow/benches/string_run_builder.rs
@@ -23,9 +23,7 @@ use criterion::{criterion_group, criterion_main, Criterion};
 fn criterion_benchmark(c: &mut Criterion) {
     let mut group = c.benchmark_group("string_run_builder");
 
-    let mut do_bench = |physical_array_len: usize,
-                        logical_array_len: usize,
-                        string_len: usize| {
+    let mut do_bench = |physical_array_len: usize, logical_array_len: usize, string_len: usize| {
         group.bench_function(
                 format!(
                     "(run_array_len:{logical_array_len}, physical_array_len:{physical_array_len}, string_len: {string_len})",
diff --git a/arrow/benches/string_run_iterator.rs b/arrow/benches/string_run_iterator.rs
index cfa44e66e30a..ac5cf7838408 100644
--- a/arrow/benches/string_run_iterator.rs
+++ b/arrow/benches/string_run_iterator.rs
@@ -47,9 +47,7 @@ fn build_strings_runs(
 fn criterion_benchmark(c: &mut Criterion) {
     let mut group = c.benchmark_group("string_run_iterator");
 
-    let mut do_bench = |physical_array_len: usize,
-                        logical_array_len: usize,
-                        string_len: usize| {
+    let mut do_bench = |physical_array_len: usize, logical_array_len: usize, string_len: usize| {
         group.bench_function(
             format!(
                 "(run_array_len:{logical_array_len}, physical_array_len:{physical_array_len}, string_len: {string_len})"),
diff --git a/arrow/benches/take_kernels.rs b/arrow/benches/take_kernels.rs
index 362b3f5cbf3c..9c3f1eb40909 100644
--- a/arrow/benches/take_kernels.rs
+++ b/arrow/benches/take_kernels.rs
@@ -47,9 +47,7 @@ fn bench_take(values: &dyn Array, indices: &UInt32Array) {
 }
 
 fn bench_take_bounds_check(values: &dyn Array, indices: &UInt32Array) {
-    criterion::black_box(
-        take(values, indices, Some(TakeOptions { check_bounds: true })).unwrap(),
-    );
+    criterion::black_box(take(values, indices, Some(TakeOptions { check_bounds: true })).unwrap());
 }
 
 fn add_benchmark(c: &mut Criterion) {
diff --git a/arrow/examples/builders.rs b/arrow/examples/builders.rs
index 250f5c39af10..ad6b879642ab 100644
--- a/arrow/examples/builders.rs
+++ b/arrow/examples/builders.rs
@@ -59,8 +59,7 @@ fn main() {
         vec![Some(1550902545147), None, Some(1550902545147)].into();
     println!("{date_array:?}");
 
-    let time_array: PrimitiveArray<Time64NanosecondType> =
-        (0..100).collect::<Vec<i64>>().into();
+    let time_array: PrimitiveArray<Time64NanosecondType> = (0..100).collect::<Vec<i64>>().into();
     println!("{time_array:?}");
 
     // We can build arrays directly from the underlying buffers.
@@ -98,8 +97,7 @@ fn main() {
     let value_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());
 
     // Construct a list array from the above two
-    let list_data_type =
-        DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
+    let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, false)));
     let list_data = ArrayData::builder(list_data_type)
         .len(3)
         .add_buffer(value_offsets)
@@ -119,8 +117,7 @@ fn main() {
     let struct_array = StructArray::from(vec![
         (
             Arc::new(Field::new("b", DataType::Boolean, false)),
-            Arc::new(BooleanArray::from(vec![false, false, true, true]))
-                as Arc<dyn Array>,
+            Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
         ),
         (
             Arc::new(Field::new("c", DataType::Int32, false)),
diff --git a/arrow/examples/dynamic_types.rs b/arrow/examples/dynamic_types.rs
index 8ec473c76d56..4c01f0ea8c72 100644
--- a/arrow/examples/dynamic_types.rs
+++ b/arrow/examples/dynamic_types.rs
@@ -23,7 +23,6 @@ extern crate arrow;
 use arrow::array::*;
 use arrow::datatypes::*;
 use arrow::error::Result;
-use arrow::record_batch::*;
 
 #[cfg(feature = "prettyprint")]
 use arrow::util::pretty::print_batches;
@@ -63,8 +62,7 @@ fn main() -> Result<()> {
     ]);
 
     // build a record batch
-    let batch =
-        RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id), Arc::new(nested)])?;
+    let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id), Arc::new(nested)])?;
 
     print_batches(&[batch.clone()]).unwrap();
 
diff --git a/arrow/src/array/ffi.rs b/arrow/src/array/ffi.rs
index 639ff980ebc5..d4d95a6e1770 100644
--- a/arrow/src/array/ffi.rs
+++ b/arrow/src/array/ffi.rs
@@ -55,8 +55,8 @@ mod tests {
     use crate::util::bit_util;
     use crate::{
         array::{
-            Array, ArrayData, BooleanArray, FixedSizeBinaryArray, Int64Array,
-            StructArray, UInt32Array, UInt64Array,
+            Array, ArrayData, BooleanArray, FixedSizeBinaryArray, Int64Array, StructArray,
+            UInt32Array, UInt64Array,
         },
         datatypes::{DataType, Field},
         ffi::{from_ffi, FFI_ArrowArray, FFI_ArrowSchema},
@@ -70,7 +70,7 @@ mod tests {
         let schema = FFI_ArrowSchema::try_from(expected.data_type())?;
 
         // simulate an external consumer by being the consumer
-        let result = &from_ffi(array, &schema)?;
+        let result = &unsafe { from_ffi(array, &schema) }?;
 
         assert_eq!(result, expected);
         Ok(())
@@ -102,8 +102,7 @@ mod tests {
         let inner = StructArray::from(vec![
             (
                 Arc::new(Field::new("a1", DataType::Boolean, false)),
-                Arc::new(BooleanArray::from(vec![true, true, false, false]))
-                    as Arc<dyn Array>,
+                Arc::new(BooleanArray::from(vec![true, true, false, false])) as Arc<dyn Array>,
             ),
             (
                 Arc::new(Field::new("a2", DataType::UInt32, false)),
@@ -118,8 +117,7 @@ mod tests {
             ),
             (
                 Arc::new(Field::new("b", DataType::Boolean, false)),
-                Arc::new(BooleanArray::from(vec![false, false, true, true]))
-                    as Arc<dyn Array>,
+                Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
             ),
             (
                 Arc::new(Field::new("c", DataType::UInt32, false)),
@@ -170,8 +168,7 @@ mod tests {
             Some(vec![30, 30, 30]),
             None,
         ];
-        let array =
-            FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
+        let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
 
         let data = array.into_data();
         test_round_trip(&data)
@@ -244,10 +241,8 @@ mod tests {
         let mut validity_bits: [u8; 1] = [0; 1];
         bit_util::set_bit(&mut validity_bits, 2);
 
-        let list_data_type = DataType::FixedSizeList(
-            Arc::new(Field::new("f", inner_list_data_type, false)),
-            2,
-        );
+        let list_data_type =
+            DataType::FixedSizeList(Arc::new(Field::new("f", inner_list_data_type, false)), 2);
         let list_data = ArrayData::builder(list_data_type)
             .len(4)
             .null_bit_buffer(Some(Buffer::from(validity_bits)))
diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs
index ff3a170c698a..fa01f4c4c15b 100644
--- a/arrow/src/array/mod.rs
+++ b/arrow/src/array/mod.rs
@@ -23,10 +23,10 @@
 mod ffi;
 
 // --------------------- Array & ArrayData ---------------------
-pub use arrow_array::array::*;
 pub use arrow_array::builder::*;
 pub use arrow_array::cast::*;
 pub use arrow_array::iterator::*;
+pub use arrow_array::*;
 pub use arrow_data::{
     layout, ArrayData, ArrayDataBuilder, ArrayDataRef, BufferSpec, DataTypeLayout,
 };
diff --git a/arrow/src/compute/kernels.rs b/arrow/src/compute/kernels.rs
index dba41625020b..4eeb5892c97c 100644
--- a/arrow/src/compute/kernels.rs
+++ b/arrow/src/compute/kernels.rs
@@ -17,12 +17,10 @@
 
 //! Computation kernels on Arrow Arrays
 
-pub use arrow_arith::{
-    aggregate, arithmetic, arity, bitwise, boolean, numeric, temporal,
-};
+pub use arrow_arith::{aggregate, arithmetic, arity, bitwise, boolean, numeric, temporal};
 pub use arrow_cast::cast;
 pub use arrow_cast::parse as cast_utils;
-pub use arrow_ord::{cmp, partition, sort};
+pub use arrow_ord::{cmp, partition, rank, sort};
 pub use arrow_select::{concat, filter, interleave, nullif, take, window, zip};
 pub use arrow_string::{concat_elements, length, regexp, substring};
 
diff --git a/arrow/src/compute/mod.rs b/arrow/src/compute/mod.rs
index 7cfe787b08cf..47a9d149aadb 100644
--- a/arrow/src/compute/mod.rs
+++ b/arrow/src/compute/mod.rs
@@ -30,6 +30,7 @@ pub use self::kernels::filter::*;
 pub use self::kernels::interleave::*;
 pub use self::kernels::nullif::*;
 pub use self::kernels::partition::*;
+pub use self::kernels::rank::*;
 pub use self::kernels::regexp::*;
 pub use self::kernels::sort::*;
 pub use self::kernels::take::*;
diff --git a/arrow/src/datatypes/mod.rs b/arrow/src/datatypes/mod.rs
index bc5b7d500b18..d41289d52e2a 100644
--- a/arrow/src/datatypes/mod.rs
+++ b/arrow/src/datatypes/mod.rs
@@ -27,11 +27,6 @@ pub use arrow_array::{ArrowNativeTypeOp, ArrowNumericType, ArrowPrimitiveType};
 pub use arrow_buffer::{i256, ArrowNativeType, ToByteSlice};
 pub use arrow_data::decimal::*;
 pub use arrow_schema::{
-    DataType, Field, FieldRef, Fields, IntervalUnit, Schema, SchemaBuilder, SchemaRef,
-    TimeUnit, UnionFields, UnionMode,
+    DataType, Field, FieldRef, Fields, IntervalUnit, Schema, SchemaBuilder, SchemaRef, TimeUnit,
+    UnionFields, UnionMode,
 };
-
-#[cfg(feature = "ffi")]
-mod ffi;
-#[cfg(feature = "ffi")]
-pub use ffi::*;
diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs
index 7fbbaa7a3907..b49f56c91574 100644
--- a/arrow/src/ffi.rs
+++ b/arrow/src/ffi.rs
@@ -43,7 +43,7 @@
 //! let (out_array, out_schema) = to_ffi(&data)?;
 //!
 //! // import it
-//! let data = from_ffi(out_array, &out_schema)?;
+//! let data = unsafe { from_ffi(out_array, &out_schema) }?;
 //! let array = Int32Array::from(data);
 //!
 //! // perform some operation
@@ -80,7 +80,7 @@
 //!     let mut schema = FFI_ArrowSchema::empty();
 //!     let mut array = FFI_ArrowArray::empty();
 //!     foreign.export_to_c(addr_of_mut!(array), addr_of_mut!(schema));
-//!     Ok(make_array(from_ffi(array, &schema)?))
+//!     Ok(make_array(unsafe { from_ffi(array, &schema) }?))
 //! }
 //! ```
 
@@ -108,6 +108,7 @@ use std::{mem::size_of, ptr::NonNull, sync::Arc};
 
 pub use arrow_data::ffi::FFI_ArrowArray;
 pub use arrow_schema::ffi::{FFI_ArrowSchema, Flags};
+
 use arrow_schema::UnionMode;
 
 use crate::array::{layout, ArrayData};
@@ -233,32 +234,53 @@ pub fn to_ffi(data: &ArrayData) -> Result<(FFI_ArrowArray, FFI_ArrowSchema)> {
 /// # Safety
 ///
 /// This struct assumes that the incoming data agrees with the C data interface.
-pub fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Result<ArrayData> {
+pub unsafe fn from_ffi(array: FFI_ArrowArray, schema: &FFI_ArrowSchema) -> Result<ArrayData> {
+    let dt = DataType::try_from(schema)?;
+    let array = Arc::new(array);
+    let tmp = ImportedArrowArray {
+        array: &array,
+        data_type: dt,
+        owner: &array,
+    };
+    tmp.consume()
+}
+
+/// Import [ArrayData] from the C Data Interface
+///
+/// # Safety
+///
+/// This struct assumes that the incoming data agrees with the C data interface.
+pub unsafe fn from_ffi_and_data_type(
+    array: FFI_ArrowArray,
+    data_type: DataType,
+) -> Result<ArrayData> {
     let array = Arc::new(array);
-    let tmp = ArrowArray {
+    let tmp = ImportedArrowArray {
         array: &array,
-        schema,
+        data_type,
         owner: &array,
     };
     tmp.consume()
 }
 
 #[derive(Debug)]
-struct ArrowArray<'a> {
+struct ImportedArrowArray<'a> {
     array: &'a FFI_ArrowArray,
-    schema: &'a FFI_ArrowSchema,
+    data_type: DataType,
     owner: &'a Arc<FFI_ArrowArray>,
 }
 
-impl<'a> ArrowArray<'a> {
+impl<'a> ImportedArrowArray<'a> {
     fn consume(self) -> Result<ArrayData> {
-        let dt = DataType::try_from(self.schema)?;
         let len = self.array.len();
         let offset = self.array.offset();
-        let null_count = self.array.null_count();
+        let null_count = match &self.data_type {
+            DataType::Null => 0,
+            _ => self.array.null_count(),
+        };
 
-        let data_layout = layout(&dt);
-        let buffers = self.buffers(data_layout.can_contain_null_mask, &dt)?;
+        let data_layout = layout(&self.data_type);
+        let buffers = self.buffers(data_layout.can_contain_null_mask)?;
 
         let null_bit_buffer = if data_layout.can_contain_null_mask {
             self.null_bit_buffer()
@@ -266,14 +288,9 @@ impl<'a> ArrowArray<'a> {
             None
         };
 
-        let mut child_data = (0..self.array.num_children())
-            .map(|i| {
-                let child = self.child(i);
-                child.consume()
-            })
-            .collect::<Result<Vec<_>>>()?;
+        let mut child_data = self.consume_children()?;
 
-        if let Some(d) = self.dictionary() {
+        if let Some(d) = self.dictionary()? {
             // For dictionary type there should only be a single child, so we don't need to worry if
             // there are other children added above.
             assert!(child_data.is_empty());
@@ -283,7 +300,7 @@ impl<'a> ArrowArray<'a> {
         // Should FFI be checking validity?
         Ok(unsafe {
             ArrayData::new_unchecked(
-                dt,
+                self.data_type,
                 len,
                 Some(null_count),
                 null_bit_buffer,
@@ -294,17 +311,51 @@ impl<'a> ArrowArray<'a> {
         })
     }
 
+    fn consume_children(&self) -> Result<Vec<ArrayData>> {
+        match &self.data_type {
+            DataType::List(field)
+            | DataType::FixedSizeList(field, _)
+            | DataType::LargeList(field)
+            | DataType::Map(field, _) => Ok([self.consume_child(0, field.data_type())?].to_vec()),
+            DataType::Struct(fields) => {
+                assert!(fields.len() == self.array.num_children());
+                fields
+                    .iter()
+                    .enumerate()
+                    .map(|(i, field)| self.consume_child(i, field.data_type()))
+                    .collect::<Result<Vec<_>>>()
+            }
+            DataType::Union(union_fields, _) => {
+                assert!(union_fields.len() == self.array.num_children());
+                union_fields
+                    .iter()
+                    .enumerate()
+                    .map(|(i, (_, field))| self.consume_child(i, field.data_type()))
+                    .collect::<Result<Vec<_>>>()
+            }
+            _ => Ok(Vec::new()),
+        }
+    }
+
+    fn consume_child(&self, index: usize, child_type: &DataType) -> Result<ArrayData> {
+        ImportedArrowArray {
+            array: self.array.child(index),
+            data_type: child_type.clone(),
+            owner: self.owner,
+        }
+        .consume()
+    }
+
     /// returns all buffers, as organized by Rust (i.e. null buffer is skipped if it's present
     /// in the spec of the type)
-    fn buffers(&self, can_contain_null_mask: bool, dt: &DataType) -> Result<Vec<Buffer>> {
+    fn buffers(&self, can_contain_null_mask: bool) -> Result<Vec<Buffer>> {
         // + 1: skip null buffer
         let buffer_begin = can_contain_null_mask as usize;
         (buffer_begin..self.array.num_buffers())
             .map(|index| {
-                let len = self.buffer_len(index, dt)?;
+                let len = self.buffer_len(index, &self.data_type)?;
 
-                match unsafe { create_buffer(self.owner.clone(), self.array, index, len) }
-                {
+                match unsafe { create_buffer(self.owner.clone(), self.array, index, len) } {
                     Some(buf) => Ok(buf),
                     None if len == 0 => {
                         // Null data buffer, which Rust doesn't allow. So create
@@ -389,23 +440,20 @@ impl<'a> ArrowArray<'a> {
         unsafe { create_buffer(self.owner.clone(), self.array, 0, buffer_len) }
     }
 
-    fn child(&self, index: usize) -> ArrowArray {
-        ArrowArray {
-            array: self.array.child(index),
-            schema: self.schema.child(index),
-            owner: self.owner,
-        }
-    }
-
-    fn dictionary(&self) -> Option<ArrowArray> {
-        match (self.array.dictionary(), self.schema.dictionary()) {
-            (Some(array), Some(schema)) => Some(ArrowArray {
+    fn dictionary(&self) -> Result<Option<ImportedArrowArray>> {
+        match (self.array.dictionary(), &self.data_type) {
+            (Some(array), DataType::Dictionary(_, value_type)) => Ok(Some(ImportedArrowArray {
                 array,
-                schema,
+                data_type: value_type.as_ref().clone(),
                 owner: self.owner,
-            }),
-            (None, None) => None,
-            _ => panic!("Dictionary should both be set or not set in FFI_ArrowArray and FFI_ArrowSchema")
+            })),
+            (Some(_), _) => Err(ArrowError::CDataInterface(
+                "Got dictionary in FFI_ArrowArray for non-dictionary data type".to_string(),
+            )),
+            (None, DataType::Dictionary(_, _)) => Err(ArrowError::CDataInterface(
+                "Missing dictionary in FFI_ArrowArray for dictionary data type".to_string(),
+            )),
+            (_, _) => Ok(None),
         }
     }
 }
@@ -424,9 +472,9 @@ mod tests {
 
     use crate::array::{
         make_array, Array, ArrayData, BooleanArray, Decimal128Array, DictionaryArray,
-        DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray,
-        GenericBinaryArray, GenericListArray, GenericStringArray, Int32Array, MapArray,
-        OffsetSizeTrait, Time32MillisecondArray, TimestampMillisecondArray, UInt32Array,
+        DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray, GenericBinaryArray,
+        GenericListArray, GenericStringArray, Int32Array, MapArray, OffsetSizeTrait,
+        Time32MillisecondArray, TimestampMillisecondArray, UInt32Array,
     };
     use crate::compute::kernels;
     use crate::datatypes::{Field, Int8Type};
@@ -442,7 +490,7 @@ mod tests {
         let (array, schema) = to_ffi(&array.into_data()).unwrap();
 
         // (simulate consumer) import it
-        let array = Int32Array::from(from_ffi(array, &schema).unwrap());
+        let array = Int32Array::from(unsafe { from_ffi(array, &schema) }.unwrap());
         let array = kernels::numeric::add(&array, &array).unwrap();
 
         // verify
@@ -468,9 +516,8 @@ mod tests {
         // We can read them back to memory
         // SAFETY:
         // Pointers are aligned and valid
-        let data = unsafe {
-            from_ffi(std::ptr::read(array_ptr), &std::ptr::read(schema_ptr)).unwrap()
-        };
+        let data =
+            unsafe { from_ffi(std::ptr::read(array_ptr), &std::ptr::read(schema_ptr)).unwrap() };
 
         let array = Int32Array::from(data);
         assert_eq!(array, Int32Array::from(vec![1, 2, 3]));
@@ -487,7 +534,7 @@ mod tests {
         let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = from_ffi(array, &schema)?;
+        let data = unsafe { from_ffi(array, &schema) }?;
         let array = make_array(data);
 
         // perform some operation
@@ -517,7 +564,7 @@ mod tests {
         let (array, schema) = to_ffi(&original_array.to_data())?;
 
         // (simulate consumer) import it
-        let data = from_ffi(array, &schema)?;
+        let data = unsafe { from_ffi(array, &schema) }?;
         let array = make_array(data);
 
         // perform some operation
@@ -533,14 +580,13 @@ mod tests {
 
     fn test_generic_string<Offset: OffsetSizeTrait>() -> Result<()> {
         // create an array natively
-        let array =
-            GenericStringArray::<Offset>::from(vec![Some("a"), None, Some("aaa")]);
+        let array = GenericStringArray::<Offset>::from(vec![Some("a"), None, Some("aaa")]);
 
         // export it
         let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = from_ffi(array, &schema)?;
+        let data = unsafe { from_ffi(array, &schema) }?;
         let array = make_array(data);
 
         // perform some operation
@@ -609,7 +655,7 @@ mod tests {
         let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = from_ffi(array, &schema)?;
+        let data = unsafe { from_ffi(array, &schema) }?;
         let array = make_array(data);
 
         // downcast
@@ -618,8 +664,6 @@ mod tests {
             .downcast_ref::<GenericListArray<Offset>>()
             .unwrap();
 
-        dbg!(&array);
-
         // verify
         let expected = GenericListArray::<Offset>::from(list_data);
         assert_eq!(&array.value(0), &expected.value(0));
@@ -649,7 +693,7 @@ mod tests {
         let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = from_ffi(array, &schema)?;
+        let data = unsafe { from_ffi(array, &schema) }?;
         let array = make_array(data);
 
         // perform some operation
@@ -694,7 +738,7 @@ mod tests {
         let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = from_ffi(array, &schema)?;
+        let data = unsafe { from_ffi(array, &schema) }?;
         let array = make_array(data);
 
         // perform some operation
@@ -720,7 +764,7 @@ mod tests {
         let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = from_ffi(array, &schema)?;
+        let data = unsafe { from_ffi(array, &schema) }?;
         let array = make_array(data);
 
         // perform some operation
@@ -733,14 +777,7 @@ mod tests {
         // verify
         assert_eq!(
             array,
-            &Time32MillisecondArray::from(vec![
-                None,
-                Some(1),
-                Some(2),
-                None,
-                Some(1),
-                Some(2)
-            ])
+            &Time32MillisecondArray::from(vec![None, Some(1), Some(2), None, Some(1), Some(2)])
         );
 
         // (drop/release)
@@ -756,7 +793,7 @@ mod tests {
         let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = from_ffi(array, &schema)?;
+        let data = unsafe { from_ffi(array, &schema) }?;
         let array = make_array(data);
 
         // perform some operation
@@ -769,14 +806,7 @@ mod tests {
         // verify
         assert_eq!(
             array,
-            &TimestampMillisecondArray::from(vec![
-                None,
-                Some(1),
-                Some(2),
-                None,
-                Some(1),
-                Some(2)
-            ])
+            &TimestampMillisecondArray::from(vec![None, Some(1), Some(2), None, Some(1), Some(2)])
         );
 
         // (drop/release)
@@ -793,14 +823,13 @@ mod tests {
             Some(vec![30, 30, 30]),
             None,
         ];
-        let array =
-            FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
+        let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size(values.into_iter(), 3)?;
 
         // export it
         let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = from_ffi(array, &schema)?;
+        let data = unsafe { from_ffi(array, &schema) }?;
         let array = make_array(data);
 
         // perform some operation
@@ -861,7 +890,7 @@ mod tests {
         let (array, schema) = to_ffi(&list_data)?;
 
         // (simulate consumer) import it
-        let data = from_ffi(array, &schema)?;
+        let data = unsafe { from_ffi(array, &schema) }?;
         let array = make_array(data);
 
         // perform some operation
@@ -906,7 +935,7 @@ mod tests {
         let (array, schema) = to_ffi(&dict_array.to_data())?;
 
         // (simulate consumer) import it
-        let data = from_ffi(array, &schema)?;
+        let data = unsafe { from_ffi(array, &schema) }?;
         let array = make_array(data);
 
         // perform some operation
@@ -944,7 +973,7 @@ mod tests {
         }
 
         // (simulate consumer) import it
-        let data = from_ffi(out_array, &out_schema)?;
+        let data = unsafe { from_ffi(out_array, &out_schema) }?;
         let array = make_array(data);
 
         // perform some operation
@@ -965,7 +994,7 @@ mod tests {
         let (array, schema) = to_ffi(&array.to_data())?;
 
         // (simulate consumer) import it
-        let data = from_ffi(array, &schema)?;
+        let data = unsafe { from_ffi(array, &schema) }?;
         let array = make_array(data);
 
         // perform some operation
@@ -978,14 +1007,7 @@ mod tests {
         // verify
         assert_eq!(
             array,
-            &DurationSecondArray::from(vec![
-                None,
-                Some(1),
-                Some(2),
-                None,
-                Some(1),
-                Some(2)
-            ])
+            &DurationSecondArray::from(vec![None, Some(1), Some(2), None, Some(1), Some(2)])
         );
 
         // (drop/release)
@@ -1001,18 +1023,15 @@ mod tests {
         //  [[a, b, c], [d, e, f], [g, h]]
         let entry_offsets = [0, 3, 6, 8];
 
-        let map_array = MapArray::new_from_strings(
-            keys.clone().into_iter(),
-            &values_data,
-            &entry_offsets,
-        )
-        .unwrap();
+        let map_array =
+            MapArray::new_from_strings(keys.clone().into_iter(), &values_data, &entry_offsets)
+                .unwrap();
 
         // export it
         let (array, schema) = to_ffi(&map_array.to_data())?;
 
         // (simulate consumer) import it
-        let data = from_ffi(array, &schema)?;
+        let data = unsafe { from_ffi(array, &schema) }?;
         let array = make_array(data);
 
         // perform some operation
@@ -1035,7 +1054,7 @@ mod tests {
         let (array, schema) = to_ffi(&struct_array.to_data())?;
 
         // (simulate consumer) import it
-        let data = from_ffi(array, &schema)?;
+        let data = unsafe { from_ffi(array, &schema) }?;
         let array = make_array(data);
 
         // perform some operation
@@ -1059,7 +1078,7 @@ mod tests {
         let (array, schema) = to_ffi(&union.to_data())?;
 
         // (simulate consumer) import it
-        let data = from_ffi(array, &schema)?;
+        let data = unsafe { from_ffi(array, &schema) }?;
         let array = make_array(data);
 
         let array = array.as_any().downcast_ref::<UnionArray>().unwrap();
@@ -1120,7 +1139,7 @@ mod tests {
         let (array, schema) = to_ffi(&union.to_data())?;
 
         // (simulate consumer) import it
-        let data = from_ffi(array, &schema)?;
+        let data = unsafe { from_ffi(array, &schema) }?;
         let array = UnionArray::from(data);
 
         let expected_type_ids = vec![0_i8, 0, 1, 0];
diff --git a/arrow/src/ffi_stream.rs b/arrow/src/ffi_stream.rs
index 865a8d0e0a29..bbec71e8837e 100644
--- a/arrow/src/ffi_stream.rs
+++ b/arrow/src/ffi_stream.rs
@@ -83,16 +83,10 @@ const ENOSYS: i32 = 78;
 #[derive(Debug)]
 pub struct FFI_ArrowArrayStream {
     pub get_schema: Option<
-        unsafe extern "C" fn(
-            arg1: *mut FFI_ArrowArrayStream,
-            out: *mut FFI_ArrowSchema,
-        ) -> c_int,
+        unsafe extern "C" fn(arg1: *mut FFI_ArrowArrayStream, out: *mut FFI_ArrowSchema) -> c_int,
     >,
     pub get_next: Option<
-        unsafe extern "C" fn(
-            arg1: *mut FFI_ArrowArrayStream,
-            out: *mut FFI_ArrowArray,
-        ) -> c_int,
+        unsafe extern "C" fn(arg1: *mut FFI_ArrowArrayStream, out: *mut FFI_ArrowArray) -> c_int,
     >,
     pub get_last_error:
         Option<unsafe extern "C" fn(arg1: *mut FFI_ArrowArrayStream) -> *const c_char>,
@@ -177,6 +171,22 @@ impl FFI_ArrowArrayStream {
         }
     }
 
+    /// Takes ownership of the pointed to [`FFI_ArrowArrayStream`]
+    ///
+    /// This acts to [move] the data out of `raw_stream`, setting the release callback to NULL
+    ///
+    /// # Safety
+    ///
+    /// * `raw_stream` must be [valid] for reads and writes
+    /// * `raw_stream` must be properly aligned
+    /// * `raw_stream` must point to a properly initialized value of [`FFI_ArrowArrayStream`]
+    ///
+    /// [move]: https://arrow.apache.org/docs/format/CDataInterface.html#moving-an-array
+    /// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
+    pub unsafe fn from_raw(raw_stream: *mut FFI_ArrowArrayStream) -> Self {
+        std::ptr::replace(raw_stream, Self::empty())
+    }
+
     /// Creates a new empty [FFI_ArrowArrayStream]. Used to import from the C Stream Interface.
     pub fn empty() -> Self {
         Self {
@@ -212,8 +222,7 @@ impl ExportedArrayStream {
             }
             Err(ref err) => {
                 private_data.last_error = Some(
-                    CString::new(err.to_string())
-                        .expect("Error string has a null byte in it."),
+                    CString::new(err.to_string()).expect("Error string has a null byte in it."),
                 );
                 get_error_code(err)
             }
@@ -240,8 +249,7 @@ impl ExportedArrayStream {
                 } else {
                     let err = &next_batch.unwrap_err();
                     private_data.last_error = Some(
-                        CString::new(err.to_string())
-                            .expect("Error string has a null byte in it."),
+                        CString::new(err.to_string()).expect("Error string has a null byte in it."),
                     );
                     get_error_code(err)
                 }
@@ -314,11 +322,10 @@ impl ArrowArrayStreamReader {
     /// the pointer.
     ///
     /// # Safety
-    /// This function dereferences a raw pointer of `FFI_ArrowArrayStream`.
+    ///
+    /// See [`FFI_ArrowArrayStream::from_raw`]
     pub unsafe fn from_raw(raw_stream: *mut FFI_ArrowArrayStream) -> Result<Self> {
-        let stream_data = std::ptr::replace(raw_stream, FFI_ArrowArrayStream::empty());
-
-        Self::try_new(stream_data)
+        Self::try_new(FFI_ArrowArrayStream::from_raw(raw_stream))
     }
 
     /// Get the last error from `ArrowArrayStreamReader`
@@ -341,8 +348,7 @@ impl Iterator for ArrowArrayStreamReader {
     fn next(&mut self) -> Option<Self::Item> {
         let mut array = FFI_ArrowArray::empty();
 
-        let ret_code =
-            unsafe { self.stream.get_next.unwrap()(&mut self.stream, &mut array) };
+        let ret_code = unsafe { self.stream.get_next.unwrap()(&mut self.stream, &mut array) };
 
         if ret_code == 0 {
             // The end of stream has been reached
@@ -351,9 +357,11 @@ impl Iterator for ArrowArrayStreamReader {
             }
 
             let schema_ref = self.schema();
+            // NOTE: this parses the FFI_ArrowSchema again on each iterator call;
+            // should probably use from_ffi_and_data_type() instead.
             let schema = FFI_ArrowSchema::try_from(schema_ref.as_ref()).ok()?;
 
-            let data = from_ffi(array, &schema).ok()?;
+            let data = unsafe { from_ffi(array, &schema) }.ok()?;
 
             let record_batch = RecordBatch::from(StructArray::from(data));
 
@@ -377,6 +385,7 @@ impl RecordBatchReader for ArrowArrayStreamReader {
 /// # Safety
 /// Assumes that the pointer represents valid C Stream Interfaces, both in memory
 /// representation and lifetime via the `release` mechanism.
+#[deprecated(note = "Use FFI_ArrowArrayStream::new")]
 pub unsafe fn export_reader_into_raw(
     reader: Box<dyn RecordBatchReader + Send>,
     out_stream: *mut FFI_ArrowArrayStream,
@@ -435,8 +444,7 @@ mod tests {
         let reader = TestRecordBatchReader::new(schema.clone(), iter);
 
         // Export a `RecordBatchReader` through `FFI_ArrowArrayStream`
-        let mut ffi_stream = FFI_ArrowArrayStream::empty();
-        unsafe { export_reader_into_raw(reader, &mut ffi_stream) };
+        let mut ffi_stream = FFI_ArrowArrayStream::new(reader);
 
         // Get schema from `FFI_ArrowArrayStream`
         let mut ffi_schema = FFI_ArrowSchema::empty();
@@ -458,7 +466,7 @@ mod tests {
                 break;
             }
 
-            let array = from_ffi(ffi_array, &ffi_schema).unwrap();
+            let array = unsafe { from_ffi(ffi_array, &ffi_schema) }.unwrap();
 
             let record_batch = RecordBatch::from(StructArray::from(array));
             produced_batches.push(record_batch);
@@ -517,8 +525,7 @@ mod tests {
     fn test_error_import() -> Result<()> {
         let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, true)]));
 
-        let iter =
-            Box::new(vec![Err(ArrowError::MemoryError("".to_string()))].into_iter());
+        let iter = Box::new(vec![Err(ArrowError::MemoryError("".to_string()))].into_iter());
 
         let reader = TestRecordBatchReader::new(schema.clone(), iter);
 
diff --git a/arrow/src/lib.rs b/arrow/src/lib.rs
index fb904c1908e6..78e2363e4825 100644
--- a/arrow/src/lib.rs
+++ b/arrow/src/lib.rs
@@ -375,7 +375,7 @@ pub mod pyarrow;
 
 pub mod record_batch {
     pub use arrow_array::{
-        RecordBatch, RecordBatchOptions, RecordBatchReader, RecordBatchWriter,
+        RecordBatch, RecordBatchIterator, RecordBatchOptions, RecordBatchReader, RecordBatchWriter,
     };
 }
 pub use arrow_array::temporal_conversions;
diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs
index 0e9669c5e9fa..8302f8741b60 100644
--- a/arrow/src/pyarrow.rs
+++ b/arrow/src/pyarrow.rs
@@ -15,29 +15,63 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Pass Arrow objects from and to Python, using Arrow's
+//! Pass Arrow objects from and to PyArrow, using Arrow's
 //! [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html)
 //! and [pyo3](https://docs.rs/pyo3/latest/pyo3/).
 //! For underlying implementation, see the [ffi] module.
+//!
+//! One can use these to write Python functions that take and return PyArrow
+//! objects, with automatic conversion to corresponding arrow-rs types.
+//!
+//! ```ignore
+//! #[pyfunction]
+//! fn double_array(array: PyArrowType<ArrayData>) -> PyResult<PyArrowType<ArrayData>> {
+//!     let array = array.0; // Extract from PyArrowType wrapper
+//!     let array: Arc<dyn Array> = make_array(array); // Convert ArrayData to ArrayRef
+//!     let array: &Int32Array = array.as_any().downcast_ref()
+//!         .ok_or_else(|| PyValueError::new_err("expected int32 array"))?;
+//!     let array: Int32Array = array.iter().map(|x| x.map(|x| x * 2)).collect();
+//!     Ok(PyArrowType(array.into_data()))
+//! }
+//! ```
+//!
+//! | pyarrow type                | arrow-rs type                                                      |
+//! |-----------------------------|--------------------------------------------------------------------|
+//! | `pyarrow.DataType`          | [DataType]                                                         |
+//! | `pyarrow.Field`             | [Field]                                                            |
+//! | `pyarrow.Schema`            | [Schema]                                                           |
+//! | `pyarrow.Array`             | [ArrayData]                                                        |
+//! | `pyarrow.RecordBatch`       | [RecordBatch]                                                      |
+//! | `pyarrow.RecordBatchReader` | [ArrowArrayStreamReader] / `Box<dyn RecordBatchReader + Send>` (1) |
+//!
+//! (1) `pyarrow.RecordBatchReader` can be imported as [ArrowArrayStreamReader]. Either
+//! [ArrowArrayStreamReader] or `Box<dyn RecordBatchReader + Send>` can be exported
+//! as `pyarrow.RecordBatchReader`. (`Box<dyn RecordBatchReader + Send>` is typically
+//! easier to create.)
+//!
+//! PyArrow has the notion of chunked arrays and tables, but arrow-rs doesn't
+//! have these same concepts. A chunked table is instead represented with
+//! `Vec<RecordBatch>`. A `pyarrow.Table` can be imported to Rust by calling
+//! [pyarrow.Table.to_reader()](https://arrow.apache.org/docs/python/generated/pyarrow.Table.html#pyarrow.Table.to_reader)
+//! and then importing the reader as a [ArrowArrayStreamReader].
 
 use std::convert::{From, TryFrom};
 use std::ptr::{addr_of, addr_of_mut};
 use std::sync::Arc;
 
+use arrow_array::{RecordBatchIterator, RecordBatchReader, StructArray};
 use pyo3::exceptions::{PyTypeError, PyValueError};
 use pyo3::ffi::Py_uintptr_t;
 use pyo3::import_exception;
 use pyo3::prelude::*;
-use pyo3::types::{PyDict, PyList, PyTuple};
+use pyo3::types::{PyCapsule, PyList, PyTuple};
 
-use crate::array::{make_array, Array, ArrayData};
+use crate::array::{make_array, ArrayData};
 use crate::datatypes::{DataType, Field, Schema};
 use crate::error::ArrowError;
 use crate::ffi;
 use crate::ffi::{FFI_ArrowArray, FFI_ArrowSchema};
-use crate::ffi_stream::{
-    export_reader_into_raw, ArrowArrayStreamReader, FFI_ArrowArrayStream,
-};
+use crate::ffi_stream::{export_reader_into_raw, ArrowArrayStreamReader, FFI_ArrowArrayStream};
 use crate::record_batch::RecordBatch;
 
 import_exception!(pyarrow, ArrowException);
@@ -84,8 +118,40 @@ fn validate_class(expected: &str, value: &PyAny) -> PyResult<()> {
     Ok(())
 }
 
+fn validate_pycapsule(capsule: &PyCapsule, name: &str) -> PyResult<()> {
+    let capsule_name = capsule.name()?;
+    if capsule_name.is_none() {
+        return Err(PyValueError::new_err(
+            "Expected schema PyCapsule to have name set.",
+        ));
+    }
+
+    let capsule_name = capsule_name.unwrap().to_str()?;
+    if capsule_name != name {
+        return Err(PyValueError::new_err(format!(
+            "Expected name '{}' in PyCapsule, instead got '{}'",
+            name, capsule_name
+        )));
+    }
+
+    Ok(())
+}
+
 impl FromPyArrow for DataType {
     fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+        // Newer versions of PyArrow as well as other libraries with Arrow data implement this
+        // method, so prefer it over _export_to_c.
+        // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+        if value.hasattr("__arrow_c_schema__")? {
+            let capsule: &PyCapsule =
+                PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
+            validate_pycapsule(capsule, "arrow_schema")?;
+
+            let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
+            let dtype = DataType::try_from(schema_ptr).map_err(to_py_err)?;
+            return Ok(dtype);
+        }
+
         validate_class("DataType", value)?;
 
         let c_schema = FFI_ArrowSchema::empty();
@@ -102,14 +168,26 @@ impl ToPyArrow for DataType {
         let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
         let module = py.import("pyarrow")?;
         let class = module.getattr("DataType")?;
-        let dtype =
-            class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
+        let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
         Ok(dtype.into())
     }
 }
 
 impl FromPyArrow for Field {
     fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+        // Newer versions of PyArrow as well as other libraries with Arrow data implement this
+        // method, so prefer it over _export_to_c.
+        // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+        if value.hasattr("__arrow_c_schema__")? {
+            let capsule: &PyCapsule =
+                PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
+            validate_pycapsule(capsule, "arrow_schema")?;
+
+            let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
+            let field = Field::try_from(schema_ptr).map_err(to_py_err)?;
+            return Ok(field);
+        }
+
         validate_class("Field", value)?;
 
         let c_schema = FFI_ArrowSchema::empty();
@@ -126,14 +204,26 @@ impl ToPyArrow for Field {
         let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
         let module = py.import("pyarrow")?;
         let class = module.getattr("Field")?;
-        let dtype =
-            class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
+        let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
         Ok(dtype.into())
     }
 }
 
 impl FromPyArrow for Schema {
     fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+        // Newer versions of PyArrow as well as other libraries with Arrow data implement this
+        // method, so prefer it over _export_to_c.
+        // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+        if value.hasattr("__arrow_c_schema__")? {
+            let capsule: &PyCapsule =
+                PyTryInto::try_into(value.getattr("__arrow_c_schema__")?.call0()?)?;
+            validate_pycapsule(capsule, "arrow_schema")?;
+
+            let schema_ptr = unsafe { capsule.reference::<FFI_ArrowSchema>() };
+            let schema = Schema::try_from(schema_ptr).map_err(to_py_err)?;
+            return Ok(schema);
+        }
+
         validate_class("Schema", value)?;
 
         let c_schema = FFI_ArrowSchema::empty();
@@ -150,14 +240,36 @@ impl ToPyArrow for Schema {
         let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
         let module = py.import("pyarrow")?;
         let class = module.getattr("Schema")?;
-        let schema =
-            class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
+        let schema = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
         Ok(schema.into())
     }
 }
 
 impl FromPyArrow for ArrayData {
     fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+        // Newer versions of PyArrow as well as other libraries with Arrow data implement this
+        // method, so prefer it over _export_to_c.
+        // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+        if value.hasattr("__arrow_c_array__")? {
+            let tuple = value.getattr("__arrow_c_array__")?.call0()?;
+
+            if !tuple.is_instance_of::<PyTuple>() {
+                return Err(PyTypeError::new_err(
+                    "Expected __arrow_c_array__ to return a tuple.",
+                ));
+            }
+
+            let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?;
+            let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?;
+
+            validate_pycapsule(schema_capsule, "arrow_schema")?;
+            validate_pycapsule(array_capsule, "arrow_array")?;
+
+            let schema_ptr = unsafe { schema_capsule.reference::<FFI_ArrowSchema>() };
+            let array = unsafe { FFI_ArrowArray::from_raw(array_capsule.pointer() as _) };
+            return unsafe { ffi::from_ffi(array, schema_ptr) }.map_err(to_py_err);
+        }
+
         validate_class("Array", value)?;
 
         // prepare a pointer to receive the Array struct
@@ -175,7 +287,7 @@ impl FromPyArrow for ArrayData {
             ),
         )?;
 
-        ffi::from_ffi(array, &schema).map_err(to_py_err)
+        unsafe { ffi::from_ffi(array, &schema) }.map_err(to_py_err)
     }
 }
 
@@ -216,6 +328,36 @@ impl<T: ToPyArrow> ToPyArrow for Vec<T> {
 
 impl FromPyArrow for RecordBatch {
     fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+        // Newer versions of PyArrow as well as other libraries with Arrow data implement this
+        // method, so prefer it over _export_to_c.
+        // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+        if value.hasattr("__arrow_c_array__")? {
+            let tuple = value.getattr("__arrow_c_array__")?.call0()?;
+
+            if !tuple.is_instance_of::<PyTuple>() {
+                return Err(PyTypeError::new_err(
+                    "Expected __arrow_c_array__ to return a tuple.",
+                ));
+            }
+
+            let schema_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(0)?)?;
+            let array_capsule: &PyCapsule = PyTryInto::try_into(tuple.get_item(1)?)?;
+
+            validate_pycapsule(schema_capsule, "arrow_schema")?;
+            validate_pycapsule(array_capsule, "arrow_array")?;
+
+            let schema_ptr = unsafe { schema_capsule.reference::<FFI_ArrowSchema>() };
+            let ffi_array = unsafe { FFI_ArrowArray::from_raw(array_capsule.pointer() as _) };
+            let array_data = unsafe { ffi::from_ffi(ffi_array, schema_ptr) }.map_err(to_py_err)?;
+            if !matches!(array_data.data_type(), DataType::Struct(_)) {
+                return Err(PyTypeError::new_err(
+                    "Expected Struct type from __arrow_c_array.",
+                ));
+            }
+            let array = StructArray::from(array_data);
+            return Ok(array.into());
+        }
+
         validate_class("RecordBatch", value)?;
         // TODO(kszucs): implement the FFI conversions in arrow-rs for RecordBatches
         let schema = value.getattr("schema")?;
@@ -234,30 +376,33 @@ impl FromPyArrow for RecordBatch {
 
 impl ToPyArrow for RecordBatch {
     fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
-        let mut py_arrays = vec![];
-
-        let schema = self.schema();
-        let columns = self.columns().iter();
-
-        for array in columns {
-            py_arrays.push(array.to_data().to_pyarrow(py)?);
-        }
-
-        let py_schema = schema.to_pyarrow(py)?;
-
-        let module = py.import("pyarrow")?;
-        let class = module.getattr("RecordBatch")?;
-        let args = (py_arrays,);
-        let kwargs = PyDict::new(py);
-        kwargs.set_item("schema", py_schema)?;
-        let record = class.call_method("from_arrays", args, Some(kwargs))?;
-
-        Ok(PyObject::from(record))
+        // Workaround apache/arrow#37669 by returning RecordBatchIterator
+        let reader = RecordBatchIterator::new(vec![Ok(self.clone())], self.schema().clone());
+        let reader: Box<dyn RecordBatchReader + Send> = Box::new(reader);
+        let py_reader = reader.into_pyarrow(py)?;
+        py_reader.call_method0(py, "read_next_batch")
     }
 }
 
+/// Supports conversion from `pyarrow.RecordBatchReader` to [ArrowArrayStreamReader].
 impl FromPyArrow for ArrowArrayStreamReader {
     fn from_pyarrow(value: &PyAny) -> PyResult<Self> {
+        // Newer versions of PyArrow as well as other libraries with Arrow data implement this
+        // method, so prefer it over _export_to_c.
+        // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
+        if value.hasattr("__arrow_c_stream__")? {
+            let capsule: &PyCapsule =
+                PyTryInto::try_into(value.getattr("__arrow_c_stream__")?.call0()?)?;
+            validate_pycapsule(capsule, "arrow_array_stream")?;
+
+            let stream = unsafe { FFI_ArrowArrayStream::from_raw(capsule.pointer() as _) };
+
+            let stream_reader = ArrowArrayStreamReader::try_new(stream)
+                .map_err(|err| PyValueError::new_err(err.to_string()))?;
+
+            return Ok(stream_reader);
+        }
+
         validate_class("RecordBatchReader", value)?;
 
         // prepare a pointer to receive the stream struct
@@ -277,10 +422,12 @@ impl FromPyArrow for ArrowArrayStreamReader {
     }
 }
 
-impl IntoPyArrow for ArrowArrayStreamReader {
+/// Convert a [`RecordBatchReader`] into a `pyarrow.RecordBatchReader`.
+impl IntoPyArrow for Box<dyn RecordBatchReader + Send> {
+    // We can't implement `ToPyArrow` for `T: RecordBatchReader + Send` because
+    // there is already a blanket implementation for `T: ToPyArrow`.
     fn into_pyarrow(self, py: Python) -> PyResult<PyObject> {
-        let mut stream = FFI_ArrowArrayStream::empty();
-        unsafe { export_reader_into_raw(Box::new(self), &mut stream) };
+        let mut stream = FFI_ArrowArrayStream::new(self);
 
         let stream_ptr = (&mut stream) as *mut FFI_ArrowArrayStream;
         let module = py.import("pyarrow")?;
@@ -292,8 +439,17 @@ impl IntoPyArrow for ArrowArrayStreamReader {
     }
 }
 
-/// A newtype wrapper around a `T: PyArrowConvert` that implements
-/// [`FromPyObject`] and [`IntoPy`] allowing usage with pyo3 macros
+/// Convert a [`ArrowArrayStreamReader`] into a `pyarrow.RecordBatchReader`.
+impl IntoPyArrow for ArrowArrayStreamReader {
+    fn into_pyarrow(self, py: Python) -> PyResult<PyObject> {
+        let boxed: Box<dyn RecordBatchReader + Send> = Box::new(self);
+        boxed.into_pyarrow(py)
+    }
+}
+
+/// A newtype wrapper. When wrapped around a type `T: FromPyArrow`, it
+/// implements `FromPyObject` for the PyArrow objects. When wrapped around a
+/// `T: IntoPyArrow`, it implements `IntoPy<PyObject>` for the wrapped type.
 #[derive(Debug)]
 pub struct PyArrowType<T>(pub T);
 
diff --git a/arrow/src/tensor.rs b/arrow/src/tensor.rs
index 299c4f2b8403..c2a262b399de 100644
--- a/arrow/src/tensor.rs
+++ b/arrow/src/tensor.rs
@@ -27,9 +27,7 @@ use crate::datatypes::*;
 use crate::error::{ArrowError, Result};
 
 /// Computes the strides required assuming a row major memory layout
-fn compute_row_major_strides<T: ArrowPrimitiveType>(
-    shape: &[usize],
-) -> Result<Vec<usize>> {
+fn compute_row_major_strides<T: ArrowPrimitiveType>(shape: &[usize]) -> Result<Vec<usize>> {
     let mut remaining_bytes = mem::size_of::<T::Native>();
 
     for i in shape {
@@ -52,9 +50,7 @@ fn compute_row_major_strides<T: ArrowPrimitiveType>(
 }
 
 /// Computes the strides required assuming a column major memory layout
-fn compute_column_major_strides<T: ArrowPrimitiveType>(
-    shape: &[usize],
-) -> Result<Vec<usize>> {
+fn compute_column_major_strides<T: ArrowPrimitiveType>(shape: &[usize]) -> Result<Vec<usize>> {
     let mut remaining_bytes = mem::size_of::<T::Native>();
     let mut strides = Vec::<usize>::new();
 
@@ -128,8 +124,7 @@ impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> {
             None => {
                 if buffer.len() != mem::size_of::<T::Native>() {
                     return Err(ArrowError::InvalidArgumentError(
-                        "underlying buffer should only contain a single tensor element"
-                            .to_string(),
+                        "underlying buffer should only contain a single tensor element".to_string(),
                     ));
                 }
 
@@ -158,8 +153,7 @@ impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> {
                 if let Some(ref n) = names {
                     if n.len() != s.len() {
                         return Err(ArrowError::InvalidArgumentError(
-                            "number of dimensions and number of dimension names differ"
-                                .to_string(),
+                            "number of dimensions and number of dimension names differ".to_string(),
                         ));
                     }
                 }
@@ -167,8 +161,7 @@ impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> {
                 let total_elements: usize = s.iter().product();
                 if total_elements != (buffer.len() / mem::size_of::<T::Native>()) {
                     return Err(ArrowError::InvalidArgumentError(
-                        "number of elements in buffer does not match dimensions"
-                            .to_string(),
+                        "number of elements in buffer does not match dimensions".to_string(),
                     ));
                 }
             }
@@ -185,8 +178,7 @@ impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> {
                         Some(st)
                     } else {
                         return Err(ArrowError::InvalidArgumentError(
-                            "the input stride does not match the selected shape"
-                                .to_string(),
+                            "the input stride does not match the selected shape".to_string(),
                         ));
                     }
                 } else {
@@ -306,9 +298,7 @@ impl<'a, T: ArrowPrimitiveType> Tensor<'a, T> {
     pub fn is_column_major(&self) -> Result<bool> {
         match self.shape {
             None => Ok(false),
-            Some(ref s) => {
-                Ok(Some(compute_column_major_strides::<T>(s)?) == self.strides)
-            }
+            Some(ref s) => Ok(Some(compute_column_major_strides::<T>(s)?) == self.strides),
         }
     }
 }
@@ -434,8 +424,7 @@ mod tests {
         }
         let buf = builder.finish();
         let names = vec!["Dim 1", "Dim 2"];
-        let tensor =
-            Int64Tensor::new_column_major(buf, Some(vec![2, 4]), Some(names)).unwrap();
+        let tensor = Int64Tensor::new_column_major(buf, Some(vec![2, 4]), Some(names)).unwrap();
         assert_eq!(8, tensor.size());
         assert_eq!(Some(vec![2_usize, 4]).as_ref(), tensor.shape());
         assert_eq!(Some(vec![8_usize, 16]).as_ref(), tensor.strides());
@@ -455,8 +444,7 @@ mod tests {
         }
         let buf = builder.finish();
 
-        let result =
-            Int32Tensor::try_new(buf, Some(vec![2, 8]), Some(vec![2, 8, 1]), None);
+        let result = Int32Tensor::try_new(buf, Some(vec![2, 8]), Some(vec![2, 8, 1]), None);
 
         if result.is_ok() {
             panic!("shape and stride dimensions are different")
diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs
index 9bdc24783736..b3fb2d293a72 100644
--- a/arrow/src/util/bench_util.rs
+++ b/arrow/src/util/bench_util.rs
@@ -29,6 +29,7 @@ use rand::{
     distributions::{Alphanumeric, Distribution, Standard},
     prelude::StdRng,
 };
+use std::ops::Range;
 
 /// Creates an random (but fixed-seeded) array of a given size and null density
 pub fn create_primitive_array<T>(size: usize, null_density: f32) -> PrimitiveArray<T>
@@ -72,11 +73,7 @@ where
 }
 
 /// Creates an random (but fixed-seeded) array of a given size and null density
-pub fn create_boolean_array(
-    size: usize,
-    null_density: f32,
-    true_density: f32,
-) -> BooleanArray
+pub fn create_boolean_array(size: usize, null_density: f32, true_density: f32) -> BooleanArray
 where
     Standard: Distribution<bool>,
 {
@@ -237,11 +234,7 @@ pub fn create_binary_array<Offset: OffsetSizeTrait>(
 }
 
 /// Creates an random (but fixed-seeded) array of a given size and null density
-pub fn create_fsb_array(
-    size: usize,
-    null_density: f32,
-    value_len: usize,
-) -> FixedSizeBinaryArray {
+pub fn create_fsb_array(size: usize, null_density: f32, value_len: usize) -> FixedSizeBinaryArray {
     let rng = &mut seedable_rng();
 
     FixedSizeBinaryArray::try_from_sparse_iter_with_size(
@@ -273,18 +266,34 @@ where
     Standard: Distribution<K::Native>,
     K::Native: SampleUniform,
 {
-    let mut rng = seedable_rng();
-    let data_type = DataType::Dictionary(
-        Box::new(K::DATA_TYPE),
-        Box::new(values.data_type().clone()),
-    );
-
     let min_key = K::Native::from_usize(0).unwrap();
     let max_key = K::Native::from_usize(values.len()).unwrap();
-    let keys: Buffer = (0..size).map(|_| rng.gen_range(min_key..max_key)).collect();
+    create_sparse_dict_from_values(size, null_density, values, min_key..max_key)
+}
+
+/// Creates a random (but fixed-seeded) dictionary array of a given size and null density
+/// with the provided values array and key range
+pub fn create_sparse_dict_from_values<K>(
+    size: usize,
+    null_density: f32,
+    values: &dyn Array,
+    key_range: Range<K::Native>,
+) -> DictionaryArray<K>
+where
+    K: ArrowDictionaryKeyType,
+    Standard: Distribution<K::Native>,
+    K::Native: SampleUniform,
+{
+    let mut rng = seedable_rng();
+    let data_type =
+        DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(values.data_type().clone()));
+
+    let keys: Buffer = (0..size)
+        .map(|_| rng.gen_range(key_range.clone()))
+        .collect();
 
-    let nulls: Option<Buffer> = (null_density != 0.)
-        .then(|| (0..size).map(|_| rng.gen_bool(null_density as _)).collect());
+    let nulls: Option<Buffer> =
+        (null_density != 0.).then(|| (0..size).map(|_| rng.gen_bool(null_density as _)).collect());
 
     let data = ArrayDataBuilder::new(data_type)
         .len(size)
diff --git a/arrow/src/util/data_gen.rs b/arrow/src/util/data_gen.rs
index c1094b127bba..5733fdf22add 100644
--- a/arrow/src/util/data_gen.rs
+++ b/arrow/src/util/data_gen.rs
@@ -85,8 +85,7 @@ pub fn create_random_array(
         Float64 => Arc::new(create_primitive_array::<Float64Type>(size, null_density)),
         Timestamp(_, _) => {
             let int64_array =
-                Arc::new(create_primitive_array::<Int64Type>(size, null_density))
-                    as ArrayRef;
+                Arc::new(create_primitive_array::<Int64Type>(size, null_density)) as ArrayRef;
             return crate::compute::cast(&int64_array, field.data_type());
         }
         Date32 => Arc::new(create_primitive_array::<Date32Type>(size, null_density)),
@@ -96,9 +95,10 @@ pub fn create_random_array(
                 size,
                 null_density,
             )) as ArrayRef,
-            TimeUnit::Millisecond => Arc::new(create_primitive_array::<
-                Time32MillisecondType,
-            >(size, null_density)),
+            TimeUnit::Millisecond => Arc::new(create_primitive_array::<Time32MillisecondType>(
+                size,
+                null_density,
+            )),
             _ => {
                 return Err(ArrowError::InvalidArgumentError(format!(
                     "Unsupported unit {unit:?} for Time32"
@@ -106,12 +106,14 @@ pub fn create_random_array(
             }
         },
         Time64(unit) => match unit {
-            TimeUnit::Microsecond => Arc::new(create_primitive_array::<
-                Time64MicrosecondType,
-            >(size, null_density)) as ArrayRef,
-            TimeUnit::Nanosecond => Arc::new(create_primitive_array::<
-                Time64NanosecondType,
-            >(size, null_density)),
+            TimeUnit::Microsecond => Arc::new(create_primitive_array::<Time64MicrosecondType>(
+                size,
+                null_density,
+            )) as ArrayRef,
+            TimeUnit::Nanosecond => Arc::new(create_primitive_array::<Time64NanosecondType>(
+                size,
+                null_density,
+            )),
             _ => {
                 return Err(ArrowError::InvalidArgumentError(format!(
                     "Unsupported unit {unit:?} for Time64"
@@ -122,13 +124,9 @@ pub fn create_random_array(
         LargeUtf8 => Arc::new(create_string_array::<i64>(size, null_density)),
         Binary => Arc::new(create_binary_array::<i32>(size, null_density)),
         LargeBinary => Arc::new(create_binary_array::<i64>(size, null_density)),
-        FixedSizeBinary(len) => {
-            Arc::new(create_fsb_array(size, null_density, *len as usize))
-        }
+        FixedSizeBinary(len) => Arc::new(create_fsb_array(size, null_density, *len as usize)),
         List(_) => create_random_list_array(field, size, null_density, true_density)?,
-        LargeList(_) => {
-            create_random_list_array(field, size, null_density, true_density)?
-        }
+        LargeList(_) => create_random_list_array(field, size, null_density, true_density)?,
         Struct(fields) => Arc::new(StructArray::try_from(
             fields
                 .iter()
@@ -138,9 +136,7 @@ pub fn create_random_array(
                 })
                 .collect::<Result<Vec<(&str, ArrayRef)>>>()?,
         )?),
-        d @ Dictionary(_, value_type)
-            if crate::compute::can_cast_types(value_type, d) =>
-        {
+        d @ Dictionary(_, value_type) if crate::compute::can_cast_types(value_type, d) => {
             let f = Field::new(
                 field.name(),
                 value_type.as_ref().clone(),
@@ -189,8 +185,7 @@ fn create_random_list_array(
     };
 
     // Create list's child data
-    let child_array =
-        create_random_array(list_field, child_len, null_density, true_density)?;
+    let child_array = create_random_array(list_field, child_len, null_density, true_density)?;
     let child_data = child_array.to_data();
     // Create list's null buffers, if it is nullable
     let null_buffer = match field.is_nullable() {
diff --git a/arrow/tests/arithmetic.rs b/arrow/tests/arithmetic.rs
index 982420902cc3..81a19d4b5e20 100644
--- a/arrow/tests/arithmetic.rs
+++ b/arrow/tests/arithmetic.rs
@@ -26,8 +26,8 @@ use chrono::{DateTime, TimeZone};
 
 #[test]
 fn test_temporal_array_timestamp_hour_with_timezone_using_chrono_tz() {
-    let a = TimestampSecondArray::from(vec![60 * 60 * 10])
-        .with_timezone("Asia/Kolkata".to_string());
+    let a =
+        TimestampSecondArray::from(vec![60 * 60 * 10]).with_timezone("Asia/Kolkata".to_string());
     let b = hour(&a).unwrap();
     assert_eq!(15, b.value(0));
 }
diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs
index 43dc6dd0eb0a..c73f4f50ac01 100644
--- a/arrow/tests/array_cast.rs
+++ b/arrow/tests/array_cast.rs
@@ -15,27 +15,22 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow_array::builder::{
-    PrimitiveDictionaryBuilder, StringDictionaryBuilder, UnionBuilder,
-};
+use arrow_array::builder::{PrimitiveDictionaryBuilder, StringDictionaryBuilder, UnionBuilder};
 use arrow_array::cast::AsArray;
 use arrow_array::types::{
-    ArrowDictionaryKeyType, Decimal128Type, Decimal256Type, Int16Type, Int32Type,
-    Int64Type, Int8Type, TimestampMicrosecondType, UInt16Type, UInt32Type, UInt64Type,
-    UInt8Type,
+    ArrowDictionaryKeyType, Decimal128Type, Decimal256Type, Int16Type, Int32Type, Int64Type,
+    Int8Type, TimestampMicrosecondType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
 };
 use arrow_array::{
-    Array, ArrayRef, ArrowPrimitiveType, BinaryArray, BooleanArray, Date32Array,
-    Date64Array, Decimal128Array, DurationMicrosecondArray, DurationMillisecondArray,
-    DurationNanosecondArray, DurationSecondArray, FixedSizeBinaryArray,
-    FixedSizeListArray, Float16Array, Float32Array, Float64Array, Int16Array, Int32Array,
-    Int64Array, Int8Array, IntervalDayTimeArray, IntervalMonthDayNanoArray,
-    IntervalYearMonthArray, LargeBinaryArray, LargeListArray, LargeStringArray,
-    ListArray, NullArray, PrimitiveArray, StringArray, StructArray,
-    Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray,
-    Time64NanosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
-    TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array,
-    UInt64Array, UInt8Array, UnionArray,
+    Array, ArrayRef, ArrowPrimitiveType, BinaryArray, BooleanArray, Date32Array, Date64Array,
+    Decimal128Array, DurationMicrosecondArray, DurationMillisecondArray, DurationNanosecondArray,
+    DurationSecondArray, FixedSizeBinaryArray, FixedSizeListArray, Float16Array, Float32Array,
+    Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, IntervalDayTimeArray,
+    IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeBinaryArray, LargeListArray,
+    LargeStringArray, ListArray, NullArray, PrimitiveArray, StringArray, StructArray,
+    Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
+    TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
+    TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, UnionArray,
 };
 use arrow_buffer::{i256, Buffer};
 use arrow_cast::pretty::pretty_format_columns;
@@ -49,14 +44,9 @@ use std::sync::Arc;
 
 #[test]
 fn test_cast_timestamp_to_string() {
-    let a = TimestampMillisecondArray::from(vec![
-        Some(864000000005),
-        Some(1545696000001),
-        None,
-    ])
-    .with_timezone("UTC".to_string());
+    let a = TimestampMillisecondArray::from(vec![Some(864000000005), Some(1545696000001), None])
+        .with_timezone("UTC".to_string());
     let array = Arc::new(a) as ArrayRef;
-    dbg!(&array);
     let b = cast(&array, &DataType::Utf8).unwrap();
     let c = b.as_any().downcast_ref::<StringArray>().unwrap();
     assert_eq!(&DataType::Utf8, c.data_type());
@@ -83,8 +73,7 @@ fn test_cast_timestamp_with_timezone_daylight_1() {
     let to_type = DataType::Timestamp(TimeUnit::Nanosecond, None);
     let timestamp_array = cast(&string_array, &to_type).unwrap();
 
-    let to_type =
-        DataType::Timestamp(TimeUnit::Microsecond, Some("America/New_York".into()));
+    let to_type = DataType::Timestamp(TimeUnit::Microsecond, Some("America/New_York".into()));
     let timestamp_array = cast(&timestamp_array, &to_type).unwrap();
 
     let string_array = cast(&timestamp_array, &DataType::Utf8).unwrap();
@@ -102,8 +91,7 @@ fn test_cast_timestamp_with_timezone_daylight_2() {
         Some("2010-07-01T07:00:00.123456789"),
         None,
     ]));
-    let to_type =
-        DataType::Timestamp(TimeUnit::Millisecond, Some("America/New_York".into()));
+    let to_type = DataType::Timestamp(TimeUnit::Millisecond, Some("America/New_York".into()));
     let timestamp_array = cast(&string_array, &to_type).unwrap();
 
     // Check intermediate representation is correct
@@ -135,8 +123,7 @@ fn test_cast_timestamp_with_timezone_daylight_3() {
         Some("2010-07-01T00:00:00.123456789"),
         None,
     ]));
-    let to_type =
-        DataType::Timestamp(TimeUnit::Microsecond, Some("America/New_York".into()));
+    let to_type = DataType::Timestamp(TimeUnit::Microsecond, Some("America/New_York".into()));
     let timestamp_array = cast(&string_array, &to_type).unwrap();
 
     // Check intermediate representation is correct
@@ -220,8 +207,7 @@ fn get_arrays_of_all_types() -> Vec<ArrayRef> {
         Arc::new(StructArray::from(vec![
             (
                 Arc::new(Field::new("a", DataType::Boolean, false)),
-                Arc::new(BooleanArray::from(vec![false, false, true, true]))
-                    as Arc<dyn Array>,
+                Arc::new(BooleanArray::from(vec![false, false, true, true])) as Arc<dyn Array>,
             ),
             (
                 Arc::new(Field::new("b", DataType::Int32, false)),
@@ -252,17 +238,9 @@ fn get_arrays_of_all_types() -> Vec<ArrayRef> {
         Arc::new(TimestampMillisecondArray::from(vec![1000, 2000])),
         Arc::new(TimestampMicrosecondArray::from(vec![1000, 2000])),
         Arc::new(TimestampNanosecondArray::from(vec![1000, 2000])),
-        Arc::new(
-            TimestampSecondArray::from(vec![1000, 2000]).with_timezone(tz_name.clone()),
-        ),
-        Arc::new(
-            TimestampMillisecondArray::from(vec![1000, 2000])
-                .with_timezone(tz_name.clone()),
-        ),
-        Arc::new(
-            TimestampMicrosecondArray::from(vec![1000, 2000])
-                .with_timezone(tz_name.clone()),
-        ),
+        Arc::new(TimestampSecondArray::from(vec![1000, 2000]).with_timezone(tz_name.clone())),
+        Arc::new(TimestampMillisecondArray::from(vec![1000, 2000]).with_timezone(tz_name.clone())),
+        Arc::new(TimestampMicrosecondArray::from(vec![1000, 2000]).with_timezone(tz_name.clone())),
         Arc::new(TimestampNanosecondArray::from(vec![1000, 2000]).with_timezone(tz_name)),
         Arc::new(Date32Array::from(vec![1000, 2000])),
         Arc::new(Date64Array::from(vec![1000, 2000])),
@@ -364,8 +342,7 @@ fn make_list_array() -> ListArray {
     let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]);
 
     // Construct a list array from the above two
-    let list_data_type =
-        DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+    let list_data_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
     let list_data = ArrayData::builder(list_data_type)
         .len(3)
         .add_buffer(value_offsets)
@@ -388,8 +365,7 @@ fn make_large_list_array() -> LargeListArray {
     let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]);
 
     // Construct a list array from the above two
-    let list_data_type =
-        DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true)));
+    let list_data_type = DataType::LargeList(Arc::new(Field::new("item", DataType::Int32, true)));
     let list_data = ArrayData::builder(list_data_type)
         .len(3)
         .add_buffer(value_offsets)
@@ -507,8 +483,7 @@ fn get_all_types() -> Vec<DataType> {
         Decimal128(38, 0),
     ];
 
-    let dictionary_key_types =
-        vec![Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64];
+    let dictionary_key_types = vec![Int8, Int16, Int32, Int64, UInt8, UInt16, UInt32, UInt64];
 
     let mut dictionary_types = dictionary_key_types
         .into_iter()
@@ -564,9 +539,7 @@ fn test_timestamp_cast_utf8() {
 }
 
 fn format_timezone(tz: &str) -> Result<String, ArrowError> {
-    let array = Arc::new(
-        TimestampSecondArray::from(vec![Some(11111111), None]).with_timezone(tz),
-    );
+    let array = Arc::new(TimestampSecondArray::from(vec![Some(11111111), None]).with_timezone(tz));
     Ok(pretty_format_columns("f", &[array])?.to_string())
 }
 
diff --git a/arrow/tests/array_equal.rs b/arrow/tests/array_equal.rs
index 317287c102f2..9bd276428880 100644
--- a/arrow/tests/array_equal.rs
+++ b/arrow/tests/array_equal.rs
@@ -18,8 +18,8 @@
 use arrow::array::{
     make_array, Array, ArrayRef, BooleanArray, Decimal128Array, FixedSizeBinaryArray,
     FixedSizeBinaryBuilder, FixedSizeListBuilder, GenericBinaryArray, GenericStringArray,
-    Int32Array, Int32Builder, Int64Builder, ListArray, ListBuilder, NullArray,
-    OffsetSizeTrait, StringArray, StringDictionaryBuilder, StructArray, UnionBuilder,
+    Int32Array, Int32Builder, Int64Builder, ListArray, ListBuilder, NullArray, OffsetSizeTrait,
+    StringArray, StringDictionaryBuilder, StructArray, UnionBuilder,
 };
 use arrow::datatypes::{Int16Type, Int32Type};
 use arrow_array::builder::{StringBuilder, StructBuilder};
@@ -530,22 +530,10 @@ fn test_fixed_size_binary_null() {
 #[test]
 fn test_fixed_size_binary_offsets() {
     // Test the case where offset != 0
-    let a = create_fixed_size_binary_array([
-        Some(b"hello"),
-        None,
-        None,
-        Some(b"world"),
-        None,
-        None,
-    ]);
-    let b = create_fixed_size_binary_array([
-        Some(b"hello"),
-        None,
-        None,
-        Some(b"arrow"),
-        None,
-        None,
-    ]);
+    let a =
+        create_fixed_size_binary_array([Some(b"hello"), None, None, Some(b"world"), None, None]);
+    let b =
+        create_fixed_size_binary_array([Some(b"hello"), None, None, Some(b"arrow"), None, None]);
 
     let a_slice = a.slice(0, 3);
     let b_slice = b.slice(0, 3);
@@ -682,22 +670,10 @@ fn test_fixed_size_list_equal() {
 // Test the case where null_count > 0
 #[test]
 fn test_fixed_list_null() {
-    let a = create_fixed_size_list_array([
-        Some(&[1, 2, 3]),
-        None,
-        None,
-        Some(&[4, 5, 6]),
-        None,
-        None,
-    ]);
-    let b = create_fixed_size_list_array([
-        Some(&[1, 2, 3]),
-        None,
-        None,
-        Some(&[4, 5, 6]),
-        None,
-        None,
-    ]);
+    let a =
+        create_fixed_size_list_array([Some(&[1, 2, 3]), None, None, Some(&[4, 5, 6]), None, None]);
+    let b =
+        create_fixed_size_list_array([Some(&[1, 2, 3]), None, None, Some(&[4, 5, 6]), None, None]);
     test_equal(&a, &b, true);
 
     let b = create_fixed_size_list_array([
@@ -710,14 +686,8 @@ fn test_fixed_list_null() {
     ]);
     test_equal(&a, &b, false);
 
-    let b = create_fixed_size_list_array([
-        Some(&[1, 2, 3]),
-        None,
-        None,
-        Some(&[3, 6, 9]),
-        None,
-        None,
-    ]);
+    let b =
+        create_fixed_size_list_array([Some(&[1, 2, 3]), None, None, Some(&[3, 6, 9]), None, None]);
     test_equal(&a, &b, false);
 
     let b = create_fixed_size_list_array([None, Some(&[4, 5, 6]), None, None]);
@@ -729,22 +699,10 @@ fn test_fixed_list_null() {
 #[test]
 fn test_fixed_list_offsets() {
     // Test the case where offset != 0
-    let a = create_fixed_size_list_array([
-        Some(&[1, 2, 3]),
-        None,
-        None,
-        Some(&[4, 5, 6]),
-        None,
-        None,
-    ]);
-    let b = create_fixed_size_list_array([
-        Some(&[1, 2, 3]),
-        None,
-        None,
-        Some(&[3, 6, 9]),
-        None,
-        None,
-    ]);
+    let a =
+        create_fixed_size_list_array([Some(&[1, 2, 3]), None, None, Some(&[4, 5, 6]), None, None]);
+    let b =
+        create_fixed_size_list_array([Some(&[1, 2, 3]), None, None, Some(&[3, 6, 9]), None, None]);
 
     let a_slice = a.slice(0, 3);
     let b_slice = b.slice(0, 3);
@@ -776,8 +734,7 @@ fn test_struct_equal() {
         Some(5),
     ]));
 
-    let a = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-        .unwrap();
+    let a = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]).unwrap();
 
     let b = StructArray::try_from(vec![("f1", strings), ("f2", ints)]).unwrap();
 
@@ -948,14 +905,10 @@ fn test_struct_equal_null_variable_size() {
     test_equal(&a, &c, false);
 }
 
-fn create_dictionary_array(
-    values: &[&str],
-    keys: &[Option<&str>],
-) -> DictionaryArray<Int16Type> {
+fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> DictionaryArray<Int16Type> {
     let values = StringArray::from(values.to_vec());
     let mut builder =
-        StringDictionaryBuilder::<Int16Type>::new_with_dictionary(keys.len(), &values)
-            .unwrap();
+        StringDictionaryBuilder::<Int16Type>::new_with_dictionary(keys.len(), &values).unwrap();
     for key in keys {
         if let Some(v) = key {
             builder.append(v).unwrap();
@@ -1002,40 +955,25 @@ fn test_dictionary_equal() {
 #[test]
 fn test_dictionary_equal_null() {
     // (a, b, c), (1, 2, 1, 3) => (a, b, a, c)
-    let a = create_dictionary_array(
-        &["a", "b", "c"],
-        &[Some("a"), None, Some("a"), Some("c")],
-    );
+    let a = create_dictionary_array(&["a", "b", "c"], &[Some("a"), None, Some("a"), Some("c")]);
 
     // equal to self
     test_equal(&a, &a, true);
 
     // different representation (values and keys are swapped), same result
-    let b = create_dictionary_array(
-        &["a", "c", "b"],
-        &[Some("a"), None, Some("a"), Some("c")],
-    );
+    let b = create_dictionary_array(&["a", "c", "b"], &[Some("a"), None, Some("a"), Some("c")]);
     test_equal(&a, &b, true);
 
     // different null position
-    let b = create_dictionary_array(
-        &["a", "c", "b"],
-        &[Some("a"), Some("b"), Some("a"), None],
-    );
+    let b = create_dictionary_array(&["a", "c", "b"], &[Some("a"), Some("b"), Some("a"), None]);
     test_equal(&a, &b, false);
 
     // different key
-    let b = create_dictionary_array(
-        &["a", "c", "b"],
-        &[Some("a"), None, Some("a"), Some("a")],
-    );
+    let b = create_dictionary_array(&["a", "c", "b"], &[Some("a"), None, Some("a"), Some("a")]);
     test_equal(&a, &b, false);
 
     // different values, same keys
-    let b = create_dictionary_array(
-        &["a", "b", "d"],
-        &[Some("a"), None, Some("a"), Some("d")],
-    );
+    let b = create_dictionary_array(&["a", "b", "d"], &[Some("a"), None, Some("a"), Some("d")]);
     test_equal(&a, &b, false);
 }
 
@@ -1234,9 +1172,7 @@ fn test_list_different_offsets() {
     assert_eq!(&a_slice, &b_slice);
 }
 
-fn make_struct(
-    elements: Vec<Option<(Option<&'static str>, Option<i32>)>>,
-) -> StructArray {
+fn make_struct(elements: Vec<Option<(Option<&'static str>, Option<i32>)>>) -> StructArray {
     let mut builder = StructBuilder::new(
         vec![
             Field::new("f1", DataType::Utf8, true),
diff --git a/arrow/tests/array_transform.rs b/arrow/tests/array_transform.rs
index 15141eb208e4..74e2a212736a 100644
--- a/arrow/tests/array_transform.rs
+++ b/arrow/tests/array_transform.rs
@@ -16,10 +16,10 @@
 // under the License.
 
 use arrow::array::{
-    Array, ArrayRef, BooleanArray, Decimal128Array, DictionaryArray,
-    FixedSizeBinaryArray, Int16Array, Int32Array, Int64Array, Int64Builder, ListArray,
-    ListBuilder, MapBuilder, NullArray, StringArray, StringBuilder,
-    StringDictionaryBuilder, StructArray, UInt8Array, UnionArray,
+    Array, ArrayRef, BooleanArray, Decimal128Array, DictionaryArray, FixedSizeBinaryArray,
+    Int16Array, Int32Array, Int64Array, Int64Builder, ListArray, ListBuilder, MapBuilder,
+    NullArray, StringArray, StringBuilder, StringDictionaryBuilder, StructArray, UInt8Array,
+    UnionArray,
 };
 use arrow::datatypes::Int16Type;
 use arrow_buffer::Buffer;
@@ -28,11 +28,7 @@ use arrow_data::ArrayData;
 use arrow_schema::{DataType, Field, Fields};
 use std::sync::Arc;
 
-fn create_decimal_array(
-    array: Vec<Option<i128>>,
-    precision: u8,
-    scale: i8,
-) -> Decimal128Array {
+fn create_decimal_array(array: Vec<Option<i128>>, precision: u8, scale: i8) -> Decimal128Array {
     array
         .into_iter()
         .collect::<Decimal128Array>()
@@ -57,8 +53,7 @@ fn test_decimal() {
 #[test]
 #[cfg(not(feature = "force_validate"))]
 fn test_decimal_offset() {
-    let decimal_array =
-        create_decimal_array(vec![Some(1), Some(2), None, Some(3)], 10, 3);
+    let decimal_array = create_decimal_array(vec![Some(1), Some(2), None, Some(3)], 10, 3);
     let decimal_array = decimal_array.slice(1, 3).into_data(); // 2, null, 3
     let arrays = vec![&decimal_array];
     let mut a = MutableArrayData::new(arrays, true, 2);
@@ -72,8 +67,7 @@ fn test_decimal_offset() {
 #[test]
 #[cfg(not(feature = "force_validate"))]
 fn test_decimal_null_offset_nulls() {
-    let decimal_array =
-        create_decimal_array(vec![Some(1), Some(2), None, Some(3)], 10, 3);
+    let decimal_array = create_decimal_array(vec![Some(1), Some(2), None, Some(3)], 10, 3);
     let decimal_array = decimal_array.slice(1, 3).into_data(); // 2, null, 3
     let arrays = vec![&decimal_array];
     let mut a = MutableArrayData::new(arrays, true, 2);
@@ -174,8 +168,7 @@ fn test_list_null_offset() {
 /// tests extending from a variable-sized (strings and binary) array w/ offset with nulls
 #[test]
 fn test_variable_sized_nulls() {
-    let array =
-        StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]).into_data();
+    let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]).into_data();
     let arrays = vec![&array];
 
     let mut mutable = MutableArrayData::new(arrays, false, 0);
@@ -267,8 +260,7 @@ fn test_string_null_offset_nulls() {
 
 #[test]
 fn test_bool() {
-    let array =
-        BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]).into_data();
+    let array = BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]).into_data();
     let arrays = vec![&array];
 
     let mut mutable = MutableArrayData::new(arrays, false, 0);
@@ -303,8 +295,7 @@ fn test_null() {
 fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayData {
     let values = StringArray::from(values.to_vec());
     let mut builder =
-        StringDictionaryBuilder::<Int16Type>::new_with_dictionary(keys.len(), &values)
-            .unwrap();
+        StringDictionaryBuilder::<Int16Type>::new_with_dictionary(keys.len(), &values).unwrap();
     for key in keys {
         if let Some(v) = key {
             builder.append(v).unwrap();
@@ -318,10 +309,7 @@ fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayData
 #[test]
 fn test_dictionary() {
     // (a, b, c), (0, 1, 0, 2) => (a, b, a, c)
-    let array = create_dictionary_array(
-        &["a", "b", "c"],
-        &[Some("a"), Some("b"), None, Some("c")],
-    );
+    let array = create_dictionary_array(&["a", "b", "c"], &[Some("a"), Some("b"), None, Some("c")]);
     let arrays = vec![&array];
 
     let mut mutable = MutableArrayData::new(arrays, false, 0);
@@ -352,10 +340,9 @@ fn test_struct() {
         Some(5),
     ]));
 
-    let array =
-        StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-            .unwrap()
-            .into_data();
+    let array = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
+        .unwrap()
+        .into_data();
     let arrays = vec![&array];
     let mut mutable = MutableArrayData::new(arrays, false, 0);
 
@@ -363,11 +350,8 @@ fn test_struct() {
     let data = mutable.freeze();
     let array = StructArray::from(data);
 
-    let expected = StructArray::try_from(vec![
-        ("f1", strings.slice(1, 2)),
-        ("f2", ints.slice(1, 2)),
-    ])
-    .unwrap();
+    let expected =
+        StructArray::try_from(vec![("f1", strings.slice(1, 2)), ("f2", ints.slice(1, 2))]).unwrap();
     assert_eq!(array, expected)
 }
 
@@ -388,11 +372,10 @@ fn test_struct_offset() {
         Some(5),
     ]));
 
-    let array =
-        StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-            .unwrap()
-            .into_data()
-            .slice(1, 3);
+    let array = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
+        .unwrap()
+        .into_data()
+        .slice(1, 3);
     let arrays = vec![&array];
     let mut mutable = MutableArrayData::new(arrays, false, 0);
 
@@ -400,11 +383,9 @@ fn test_struct_offset() {
     let data = mutable.freeze();
     let array = StructArray::from(data);
 
-    let expected_strings: ArrayRef =
-        Arc::new(StringArray::from(vec![None, Some("mark")]));
+    let expected_strings: ArrayRef = Arc::new(StringArray::from(vec![None, Some("mark")]));
     let expected =
-        StructArray::try_from(vec![("f1", expected_strings), ("f2", ints.slice(2, 2))])
-            .unwrap();
+        StructArray::try_from(vec![("f1", expected_strings), ("f2", ints.slice(2, 2))]).unwrap();
 
     assert_eq!(array, expected);
 }
@@ -426,10 +407,9 @@ fn test_struct_nulls() {
         Some(5),
     ]));
 
-    let array =
-        StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-            .unwrap()
-            .into_data();
+    let array = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
+        .unwrap()
+        .into_data();
     let arrays = vec![&array];
 
     let mut mutable = MutableArrayData::new(arrays, false, 0);
@@ -443,8 +423,7 @@ fn test_struct_nulls() {
     let expected_int = Arc::new(Int32Array::from(vec![Some(2), None])) as ArrayRef;
 
     let expected =
-        StructArray::try_from(vec![("f1", expected_string), ("f2", expected_int)])
-            .unwrap();
+        StructArray::try_from(vec![("f1", expected_string), ("f2", expected_int)]).unwrap();
     assert_eq!(array, expected)
 }
 
@@ -465,10 +444,9 @@ fn test_struct_many() {
         Some(5),
     ]));
 
-    let array =
-        StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
-            .unwrap()
-            .into_data();
+    let array = StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())])
+        .unwrap()
+        .into_data();
     let arrays = vec![&array, &array];
     let mut mutable = MutableArrayData::new(arrays, false, 0);
 
@@ -483,8 +461,7 @@ fn test_struct_many() {
         Arc::new(Int32Array::from(vec![Some(2), None, Some(1), Some(2)])) as ArrayRef;
 
     let expected =
-        StructArray::try_from(vec![("f1", expected_string), ("f2", expected_int)])
-            .unwrap();
+        StructArray::try_from(vec![("f1", expected_string), ("f2", expected_int)]).unwrap();
     assert_eq!(array, expected)
 }
 
@@ -547,10 +524,9 @@ fn test_union_dense() {
 
 #[test]
 fn test_binary_fixed_sized_offsets() {
-    let array = FixedSizeBinaryArray::try_from_iter(
-        vec![vec![0, 0], vec![0, 1], vec![0, 2]].into_iter(),
-    )
-    .expect("Failed to create FixedSizeBinaryArray from iterable");
+    let array =
+        FixedSizeBinaryArray::try_from_iter(vec![vec![0, 0], vec![0, 1], vec![0, 2]].into_iter())
+            .expect("Failed to create FixedSizeBinaryArray from iterable");
     let array = array.slice(1, 2).into_data();
     // = [[0, 1], [0, 2]] due to the offset = 1
 
@@ -564,9 +540,8 @@ fn test_binary_fixed_sized_offsets() {
     let result = mutable.freeze();
     let result = FixedSizeBinaryArray::from(result);
 
-    let expected =
-        FixedSizeBinaryArray::try_from_iter(vec![vec![0, 2], vec![0, 1]].into_iter())
-            .expect("Failed to create FixedSizeBinaryArray from iterable");
+    let expected = FixedSizeBinaryArray::try_from_iter(vec![vec![0, 2], vec![0, 1]].into_iter())
+        .expect("Failed to create FixedSizeBinaryArray from iterable");
     assert_eq!(result, expected);
 }
 
@@ -830,8 +805,7 @@ fn test_map_nulls_append() {
         ),
     ]);
 
-    let map_offsets =
-        Buffer::from_slice_ref([0, 3, 5, 5, 13, 15, 15, 15, 19, 19, 19, 19, 23]);
+    let map_offsets = Buffer::from_slice_ref([0, 3, 5, 5, 13, 15, 15, 15, 19, 19, 19, 19, 23]);
 
     let expected_list_data = ArrayData::try_new(
         DataType::Map(
@@ -972,10 +946,9 @@ fn test_fixed_size_binary_append() {
         Some(vec![9, 10]),
         // b[4..4]
     ];
-    let expected =
-        FixedSizeBinaryArray::try_from_sparse_iter_with_size(expected.into_iter(), 2)
-            .expect("Failed to create FixedSizeBinaryArray from iterable")
-            .into_data();
+    let expected = FixedSizeBinaryArray::try_from_sparse_iter_with_size(expected.into_iter(), 2)
+        .expect("Failed to create FixedSizeBinaryArray from iterable")
+        .into_data();
     assert_eq!(result, expected);
 }
 
@@ -1002,6 +975,14 @@ fn test_extend_nulls_panic() {
     mutable.extend_nulls(2);
 }
 
+#[test]
+#[should_panic(expected = "Arrays with inconsistent types passed to MutableArrayData")]
+fn test_mixed_types() {
+    let a = StringArray::from(vec!["abc", "def"]).to_data();
+    let b = Int32Array::from(vec![1, 2, 3]).to_data();
+    MutableArrayData::new(vec![&a, &b], false, 4);
+}
+
 /*
 // this is an old test used on a meanwhile removed dead code
 // that is still useful when `MutableArrayData` supports fixed-size lists.
diff --git a/arrow/tests/array_validation.rs b/arrow/tests/array_validation.rs
index fa80db1860cd..f5298f82e0a4 100644
--- a/arrow/tests/array_validation.rs
+++ b/arrow/tests/array_validation.rs
@@ -16,8 +16,8 @@
 // under the License.
 
 use arrow::array::{
-    make_array, Array, BooleanBuilder, Decimal128Builder, Int32Array, Int32Builder,
-    Int64Array, StringArray, StructBuilder, UInt64Array,
+    make_array, Array, BooleanBuilder, Decimal128Builder, Int32Array, Int32Builder, Int64Array,
+    StringArray, StructBuilder, UInt64Array,
 };
 use arrow_array::Decimal128Array;
 use arrow_buffer::{ArrowNativeType, Buffer};
@@ -27,9 +27,7 @@ use std::ptr::NonNull;
 use std::sync::Arc;
 
 #[test]
-#[should_panic(
-    expected = "Need at least 80 bytes in buffers[0] in array of type Int64, but got 8"
-)]
+#[should_panic(expected = "Need at least 80 bytes in buffers[0] in array of type Int64, but got 8")]
 fn test_buffer_too_small() {
     let buffer = Buffer::from_slice_ref([0i32, 2i32]);
     // should fail as the declared size (10*8 = 80) is larger than the underlying bfufer (8)
@@ -37,9 +35,7 @@ fn test_buffer_too_small() {
 }
 
 #[test]
-#[should_panic(
-    expected = "Need at least 16 bytes in buffers[0] in array of type Int64, but got 8"
-)]
+#[should_panic(expected = "Need at least 16 bytes in buffers[0] in array of type Int64, but got 8")]
 fn test_buffer_too_small_offset() {
     let buffer = Buffer::from_slice_ref([0i32, 2i32]);
     // should fail -- size is ok, but also has offset
@@ -51,8 +47,7 @@ fn test_buffer_too_small_offset() {
 fn test_bad_number_of_buffers() {
     let buffer1 = Buffer::from_slice_ref([0i32, 2i32]);
     let buffer2 = Buffer::from_slice_ref([0i32, 2i32]);
-    ArrayData::try_new(DataType::Int64, 1, None, 0, vec![buffer1, buffer2], vec![])
-        .unwrap();
+    ArrayData::try_new(DataType::Int64, 1, None, 0, vec![buffer1, buffer2], vec![]).unwrap();
 }
 
 #[test]
@@ -61,8 +56,7 @@ fn test_bad_number_of_buffers() {
 )]
 fn test_fixed_width_overflow() {
     let buffer = Buffer::from_slice_ref([0i32, 2i32]);
-    ArrayData::try_new(DataType::Int64, usize::MAX, None, 0, vec![buffer], vec![])
-        .unwrap();
+    ArrayData::try_new(DataType::Int64, usize::MAX, None, 0, vec![buffer], vec![]).unwrap();
 }
 
 #[test]
@@ -87,8 +81,7 @@ fn test_bitmap_too_small() {
 #[should_panic(expected = "Dictionary key type must be integer, but was Utf8")]
 fn test_non_int_dictionary() {
     let i32_buffer = Buffer::from_slice_ref([0i32, 2i32]);
-    let data_type =
-        DataType::Dictionary(Box::new(DataType::Utf8), Box::new(DataType::Int32));
+    let data_type = DataType::Dictionary(Box::new(DataType::Utf8), Box::new(DataType::Int32));
     let child_data = ArrayData::try_new(
         DataType::Int32,
         1,
@@ -116,11 +109,9 @@ fn test_mismatched_dictionary_types() {
     let string_array: StringArray = vec![Some("foo"), Some("bar")].into_iter().collect();
     let i32_buffer = Buffer::from_slice_ref([0i32, 1i32]);
     // Dict says LargeUtf8 but array is Utf8
-    let data_type =
-        DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::LargeUtf8));
+    let data_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::LargeUtf8));
     let child_data = string_array.into_data();
-    ArrayData::try_new(data_type, 1, None, 0, vec![i32_buffer], vec![child_data])
-        .unwrap();
+    ArrayData::try_new(data_type, 1, None, 0, vec![i32_buffer], vec![child_data]).unwrap();
 }
 
 #[test]
@@ -185,9 +176,7 @@ fn test_empty_utf8_array_with_non_zero_offset() {
 }
 
 #[test]
-#[should_panic(
-    expected = "Buffer 0 of LargeUtf8 isn't large enough. Expected 8 bytes got 4"
-)]
+#[should_panic(expected = "Buffer 0 of LargeUtf8 isn't large enough. Expected 8 bytes got 4")]
 fn test_empty_large_utf8_array_with_wrong_type_offsets() {
     let data_buffer = Buffer::from(&[]);
     let offsets_buffer = Buffer::from_slice_ref([0i32]);
@@ -219,9 +208,7 @@ fn test_validate_offsets_i32() {
 }
 
 #[test]
-#[should_panic(
-    expected = "Buffer 0 of LargeUtf8 isn't large enough. Expected 24 bytes got 16"
-)]
+#[should_panic(expected = "Buffer 0 of LargeUtf8 isn't large enough. Expected 24 bytes got 16")]
 fn test_validate_offsets_i64() {
     let data_buffer = Buffer::from_slice_ref("abcdef".as_bytes());
     let offsets_buffer = Buffer::from_slice_ref([0i64, 2i64]);
@@ -506,33 +493,25 @@ fn check_index_out_of_bounds_validation<T: ArrowNativeType>(data_type: DataType)
 }
 
 #[test]
-#[should_panic(
-    expected = "Offset invariant failure: offset at position 3 out of bounds: 5 > 4"
-)]
+#[should_panic(expected = "Offset invariant failure: offset at position 3 out of bounds: 5 > 4")]
 fn test_validate_utf8_out_of_bounds() {
     check_index_out_of_bounds_validation::<i32>(DataType::Utf8);
 }
 
 #[test]
-#[should_panic(
-    expected = "Offset invariant failure: offset at position 3 out of bounds: 5 > 4"
-)]
+#[should_panic(expected = "Offset invariant failure: offset at position 3 out of bounds: 5 > 4")]
 fn test_validate_large_utf8_out_of_bounds() {
     check_index_out_of_bounds_validation::<i64>(DataType::LargeUtf8);
 }
 
 #[test]
-#[should_panic(
-    expected = "Offset invariant failure: offset at position 3 out of bounds: 5 > 4"
-)]
+#[should_panic(expected = "Offset invariant failure: offset at position 3 out of bounds: 5 > 4")]
 fn test_validate_binary_out_of_bounds() {
     check_index_out_of_bounds_validation::<i32>(DataType::Binary);
 }
 
 #[test]
-#[should_panic(
-    expected = "Offset invariant failure: offset at position 3 out of bounds: 5 > 4"
-)]
+#[should_panic(expected = "Offset invariant failure: offset at position 3 out of bounds: 5 > 4")]
 fn test_validate_large_binary_out_of_bounds() {
     check_index_out_of_bounds_validation::<i64>(DataType::LargeBinary);
 }
@@ -559,33 +538,25 @@ fn check_index_backwards_validation<T: ArrowNativeType>(data_type: DataType) {
 }
 
 #[test]
-#[should_panic(
-    expected = "Offset invariant failure: non-monotonic offset at slot 3: 2 > 1"
-)]
+#[should_panic(expected = "Offset invariant failure: non-monotonic offset at slot 3: 2 > 1")]
 fn test_validate_utf8_index_backwards() {
     check_index_backwards_validation::<i32>(DataType::Utf8);
 }
 
 #[test]
-#[should_panic(
-    expected = "Offset invariant failure: non-monotonic offset at slot 3: 2 > 1"
-)]
+#[should_panic(expected = "Offset invariant failure: non-monotonic offset at slot 3: 2 > 1")]
 fn test_validate_large_utf8_index_backwards() {
     check_index_backwards_validation::<i64>(DataType::LargeUtf8);
 }
 
 #[test]
-#[should_panic(
-    expected = "Offset invariant failure: non-monotonic offset at slot 3: 2 > 1"
-)]
+#[should_panic(expected = "Offset invariant failure: non-monotonic offset at slot 3: 2 > 1")]
 fn test_validate_binary_index_backwards() {
     check_index_backwards_validation::<i32>(DataType::Binary);
 }
 
 #[test]
-#[should_panic(
-    expected = "Offset invariant failure: non-monotonic offset at slot 3: 2 > 1"
-)]
+#[should_panic(expected = "Offset invariant failure: non-monotonic offset at slot 3: 2 > 1")]
 fn test_validate_large_binary_index_backwards() {
     check_index_backwards_validation::<i64>(DataType::LargeBinary);
 }
@@ -712,18 +683,14 @@ fn check_list_offsets<T: ArrowNativeType>(data_type: DataType) {
 }
 
 #[test]
-#[should_panic(
-    expected = "Offset invariant failure: offset at position 2 out of bounds: 5 > 4"
-)]
+#[should_panic(expected = "Offset invariant failure: offset at position 2 out of bounds: 5 > 4")]
 fn test_validate_list_offsets() {
     let field_type = Field::new("f", DataType::Int32, true);
     check_list_offsets::<i32>(DataType::List(Arc::new(field_type)));
 }
 
 #[test]
-#[should_panic(
-    expected = "Offset invariant failure: offset at position 2 out of bounds: 5 > 4"
-)]
+#[should_panic(expected = "Offset invariant failure: offset at position 2 out of bounds: 5 > 4")]
 fn test_validate_large_list_offsets() {
     let field_type = Field::new("f", DataType::Int32, true);
     check_list_offsets::<i64>(DataType::LargeList(Arc::new(field_type)));
@@ -994,8 +961,7 @@ fn test_string_data_from_foreign() {
     let array = make_array(data);
     let array = array.as_any().downcast_ref::<StringArray>().unwrap();
 
-    let expected =
-        StringArray::from(vec![None, Some("foo"), Some("bar"), Some("foobar")]);
+    let expected = StringArray::from(vec![None, Some("foo"), Some("bar"), Some("foobar")]);
 
     assert_eq!(array, &expected);
 }
diff --git a/arrow/tests/csv.rs b/arrow/tests/csv.rs
index 3ee319101757..fd01f1663955 100644
--- a/arrow/tests/csv.rs
+++ b/arrow/tests/csv.rs
@@ -40,10 +40,8 @@ fn test_export_csv_timestamps() {
         vec![Some(1555584887378), Some(1635577147000)],
     )
     .with_timezone("Australia/Sydney".to_string());
-    let c2 =
-        TimestampMillisecondArray::from(vec![Some(1555584887378), Some(1635577147000)]);
-    let batch =
-        RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap();
+    let c2 = TimestampMillisecondArray::from(vec![Some(1555584887378), Some(1635577147000)]);
+    let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap();
 
     let mut sw = Vec::new();
     let mut writer = arrow_csv::Writer::new(&mut sw);
@@ -53,48 +51,6 @@ fn test_export_csv_timestamps() {
     }
     drop(writer);
 
-    let left = "c1,c2
-2019-04-18T20:54:47.378000000+10:00,2019-04-18T10:54:47.378000000
-2021-10-30T17:59:07.000000000+11:00,2021-10-30T06:59:07.000000000\n";
-    let right = String::from_utf8(sw).unwrap();
-    assert_eq!(left, right);
-}
-
-#[test]
-fn test_export_csv_timestamps_using_rfc3339() {
-    let schema = Schema::new(vec![
-        Field::new(
-            "c1",
-            DataType::Timestamp(TimeUnit::Millisecond, Some("Australia/Sydney".into())),
-            true,
-        ),
-        Field::new("c2", DataType::Timestamp(TimeUnit::Millisecond, None), true),
-    ]);
-
-    let c1 = TimestampMillisecondArray::from(
-        // 1555584887 converts to 2019-04-18, 20:54:47 in time zone Australia/Sydney (AEST).
-        // The offset (difference to UTC) is +10:00.
-        // 1635577147 converts to 2021-10-30 17:59:07 in time zone Australia/Sydney (AEDT)
-        // The offset (difference to UTC) is +11:00. Note that daylight savings is in effect on 2021-10-30.
-        //
-        vec![Some(1555584887378), Some(1635577147000)],
-    )
-    .with_timezone("Australia/Sydney");
-    let c2 =
-        TimestampMillisecondArray::from(vec![Some(1555584887378), Some(1635577147000)]);
-    let batch =
-        RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap();
-
-    let mut sw = Vec::new();
-    let mut writer = arrow_csv::WriterBuilder::new()
-        .with_rfc3339()
-        .build(&mut sw);
-    let batches = vec![&batch];
-    for batch in batches {
-        writer.write(batch).unwrap();
-    }
-    drop(writer);
-
     let left = "c1,c2
 2019-04-18T20:54:47.378+10:00,2019-04-18T10:54:47.378
 2021-10-30T17:59:07+11:00,2021-10-30T06:59:07\n";
diff --git a/dev/release/README.md b/dev/release/README.md
index 30b3a4a8a569..177f33bcbb4d 100644
--- a/dev/release/README.md
+++ b/dev/release/README.md
@@ -258,6 +258,7 @@ Rust Arrow Crates:
 (cd arrow-ipc && cargo publish)
 (cd arrow-csv && cargo publish)
 (cd arrow-json && cargo publish)
+(cd arrow-avro && cargo publish)
 (cd arrow-ord && cargo publish)
 (cd arrow-arith && cargo publish)
 (cd arrow-string && cargo publish)
diff --git a/dev/release/update_change_log.sh b/dev/release/update_change_log.sh
index 0b62e97383c2..0c40d91a3edd 100755
--- a/dev/release/update_change_log.sh
+++ b/dev/release/update_change_log.sh
@@ -29,8 +29,8 @@
 
 set -e
 
-SINCE_TAG="45.0.0"
-FUTURE_RELEASE="46.0.0"
+SINCE_TAG="48.0.0"
+FUTURE_RELEASE="49.0.0"
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
diff --git a/object_store/CHANGELOG-old.md b/object_store/CHANGELOG-old.md
index 3880205bc05e..6780f7db4c4d 100644
--- a/object_store/CHANGELOG-old.md
+++ b/object_store/CHANGELOG-old.md
@@ -19,6 +19,97 @@
 
 # Historical Changelog
 
+
+## [object_store_0.7.1](https://github.com/apache/arrow-rs/tree/object_store_0.7.1) (2023-09-26)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.7.0...object_store_0.7.1)
+
+**Implemented enhancements:**
+
+- Automatically Cleanup LocalFileSystem Temporary Files [\#4778](https://github.com/apache/arrow-rs/issues/4778) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- object-store: Expose an async reader API for object store [\#4762](https://github.com/apache/arrow-rs/issues/4762)
+- Improve proxy support by using reqwest::Proxy as configuration [\#4713](https://github.com/apache/arrow-rs/issues/4713) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+
+**Fixed bugs:**
+
+- object-store: http shouldn't perform range requests unless `accept-ranges: bytes` header is present [\#4839](https://github.com/apache/arrow-rs/issues/4839)
+- object-store: http-store fails when url doesn't have last-modified header on 0.7.0 [\#4831](https://github.com/apache/arrow-rs/issues/4831)
+- object-store fails to compile for `wasm32-unknown-unknown` with `http` feature [\#4776](https://github.com/apache/arrow-rs/issues/4776) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- object-store: could not find `header` in `client` for `http` feature [\#4775](https://github.com/apache/arrow-rs/issues/4775) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- LocalFileSystem Copy and Rename Don't Create Intermediate Directories [\#4760](https://github.com/apache/arrow-rs/issues/4760) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- LocalFileSystem Copy is not Atomic [\#4758](https://github.com/apache/arrow-rs/issues/4758) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+
+**Closed issues:**
+
+- object\_store Azure Government Cloud functionality? [\#4853](https://github.com/apache/arrow-rs/issues/4853)
+
+**Merged pull requests:**
+
+- Add ObjectStore BufReader \(\#4762\) [\#4857](https://github.com/apache/arrow-rs/pull/4857) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Allow overriding azure endpoint [\#4854](https://github.com/apache/arrow-rs/pull/4854) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Minor: Improve object\_store docs.rs landing page [\#4849](https://github.com/apache/arrow-rs/pull/4849) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([alamb](https://github.com/alamb))
+- Error if Remote Ignores HTTP Range Header [\#4841](https://github.com/apache/arrow-rs/pull/4841) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([universalmind303](https://github.com/universalmind303))
+- Perform HEAD request for HttpStore::head [\#4837](https://github.com/apache/arrow-rs/pull/4837) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- fix: object store http header last modified [\#4834](https://github.com/apache/arrow-rs/pull/4834) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([universalmind303](https://github.com/universalmind303))
+- Prepare arrow 47.0.0 [\#4827](https://github.com/apache/arrow-rs/pull/4827) ([tustvold](https://github.com/tustvold))
+- ObjectStore Wasm32 Fixes \(\#4775\) \(\#4776\) [\#4796](https://github.com/apache/arrow-rs/pull/4796) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Best effort cleanup of staged upload files \(\#4778\) [\#4792](https://github.com/apache/arrow-rs/pull/4792) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Relaxing type bounds on coalesce\_ranges and collect\_bytes [\#4787](https://github.com/apache/arrow-rs/pull/4787) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([sumerman](https://github.com/sumerman))
+- Update object\_store chrono deprecations [\#4786](https://github.com/apache/arrow-rs/pull/4786) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Make coalesce\_ranges and collect\_bytes available for crate users [\#4784](https://github.com/apache/arrow-rs/pull/4784) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([sumerman](https://github.com/sumerman))
+- Bump actions/checkout from 3 to 4 [\#4767](https://github.com/apache/arrow-rs/pull/4767) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Make ObjectStore::copy Atomic and Automatically Create Parent Directories \(\#4758\) \(\#4760\) [\#4759](https://github.com/apache/arrow-rs/pull/4759) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Update nix requirement from 0.26.1 to 0.27.1 in /object\_store [\#4744](https://github.com/apache/arrow-rs/pull/4744) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([viirya](https://github.com/viirya))
+- Add `with_proxy_ca_certificate` and `with_proxy_excludes` [\#4714](https://github.com/apache/arrow-rs/pull/4714) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([gordonwang0](https://github.com/gordonwang0))
+- Update object\_store Dependencies and Configure Dependabot [\#4700](https://github.com/apache/arrow-rs/pull/4700) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+
+## [object_store_0.7.0](https://github.com/apache/arrow-rs/tree/object_store_0.7.0) (2023-08-15)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.6.1...object_store_0.7.0)
+
+**Breaking changes:**
+
+- Add range and ObjectMeta to GetResult \(\#4352\) \(\#4495\) [\#4677](https://github.com/apache/arrow-rs/pull/4677) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+
+**Implemented enhancements:**
+
+- Add AzureConfigKey::ContainerName [\#4629](https://github.com/apache/arrow-rs/issues/4629) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- object\_store: multipart ranges for HTTP [\#4612](https://github.com/apache/arrow-rs/issues/4612)
+- Make object\_store::multipart public [\#4569](https://github.com/apache/arrow-rs/issues/4569) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- object\_store: Export `ClientConfigKey` and make the `HttpBuilder` more consistent with other builders [\#4515](https://github.com/apache/arrow-rs/issues/4515) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- object\_store/InMemory: Make `clone()` non-async [\#4496](https://github.com/apache/arrow-rs/issues/4496) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Add Range to GetResult::File [\#4352](https://github.com/apache/arrow-rs/issues/4352) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Support copy\_if\_not\_exists for Cloudflare R2 \(S3 API\)  [\#4190](https://github.com/apache/arrow-rs/issues/4190)
+
+**Fixed bugs:**
+
+- object\_store documentation is broken [\#4683](https://github.com/apache/arrow-rs/issues/4683) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Exports are not sufficient for configuring some object stores, for example minio running locally [\#4530](https://github.com/apache/arrow-rs/issues/4530) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- object\_store: Uploading empty file to S3 results in "411 Length Required" [\#4514](https://github.com/apache/arrow-rs/issues/4514) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- GCP doesn't fetch public objects [\#4417](https://github.com/apache/arrow-rs/issues/4417) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+
+**Closed issues:**
+
+- \[object\_store\] when Create a AmazonS3 instance  work with MinIO without set endpoint got error MissingRegion [\#4617](https://github.com/apache/arrow-rs/issues/4617)
+- AWS Profile credentials no longer working in object\_store 0.6.1 [\#4556](https://github.com/apache/arrow-rs/issues/4556) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+
+**Merged pull requests:**
+
+- Add AzureConfigKey::ContainerName \(\#4629\) [\#4686](https://github.com/apache/arrow-rs/pull/4686) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Fix MSRV CI [\#4671](https://github.com/apache/arrow-rs/pull/4671) ([tustvold](https://github.com/tustvold))
+- Use Config System for Object Store Integration Tests [\#4628](https://github.com/apache/arrow-rs/pull/4628) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Prepare arrow 45 [\#4590](https://github.com/apache/arrow-rs/pull/4590) ([tustvold](https://github.com/tustvold))
+- Add Support for Microsoft Fabric / OneLake [\#4573](https://github.com/apache/arrow-rs/pull/4573) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([vmuddassir-msft](https://github.com/vmuddassir-msft))
+- Cleanup multipart upload trait [\#4572](https://github.com/apache/arrow-rs/pull/4572) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Make object\_store::multipart public [\#4570](https://github.com/apache/arrow-rs/pull/4570) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([yjshen](https://github.com/yjshen))
+- Handle empty S3 payloads \(\#4514\) [\#4518](https://github.com/apache/arrow-rs/pull/4518) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- object\_store: Export `ClientConfigKey` and add `HttpBuilder::with_config` [\#4516](https://github.com/apache/arrow-rs/pull/4516) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([thehabbos007](https://github.com/thehabbos007))
+- object\_store: Implement `ObjectStore` for `Arc` [\#4502](https://github.com/apache/arrow-rs/pull/4502) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Turbo87](https://github.com/Turbo87))
+- object\_store/InMemory: Add `fork()` fn and deprecate `clone()` fn [\#4499](https://github.com/apache/arrow-rs/pull/4499) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Turbo87](https://github.com/Turbo87))
+- Bump actions/deploy-pages from 1 to 2 [\#4449](https://github.com/apache/arrow-rs/pull/4449) ([dependabot[bot]](https://github.com/apps/dependabot))
+- gcp: Exclude authorization header when bearer empty [\#4418](https://github.com/apache/arrow-rs/pull/4418) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([vrongmeal](https://github.com/vrongmeal))
+- Support copy\_if\_not\_exists for Cloudflare R2 \(\#4190\) [\#4239](https://github.com/apache/arrow-rs/pull/4239) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+
 ## [object_store_0.6.0](https://github.com/apache/arrow-rs/tree/object_store_0.6.0) (2023-05-18)
 
 [Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.5.6...object_store_0.6.0)
diff --git a/object_store/CHANGELOG.md b/object_store/CHANGELOG.md
index 125063943726..7a4fcd0850b8 100644
--- a/object_store/CHANGELOG.md
+++ b/object_store/CHANGELOG.md
@@ -19,51 +19,83 @@
 
 # Changelog
 
-## [object_store_0.7.0](https://github.com/apache/arrow-rs/tree/object_store_0.7.0) (2023-08-15)
+## [object_store_0.8.0](https://github.com/apache/arrow-rs/tree/object_store_0.8.0) (2023-11-02)
 
-[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.6.1...object_store_0.7.0)
+[Full Changelog](https://github.com/apache/arrow-rs/compare/object_store_0.7.1...object_store_0.8.0)
 
 **Breaking changes:**
 
-- Add range and ObjectMeta to GetResult \(\#4352\) \(\#4495\) [\#4677](https://github.com/apache/arrow-rs/pull/4677) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Remove ObjectStore::append [\#5016](https://github.com/apache/arrow-rs/pull/5016) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Don't panic on invalid Azure access key \(\#4972\) [\#4974](https://github.com/apache/arrow-rs/pull/4974) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Return `PutResult` with an ETag from ObjectStore::put \(\#4934\) [\#4944](https://github.com/apache/arrow-rs/pull/4944) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Add ObjectMeta::version and GetOptions::version \(\#4925\) [\#4935](https://github.com/apache/arrow-rs/pull/4935) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Add GetOptions::head [\#4931](https://github.com/apache/arrow-rs/pull/4931) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Remove Nested async and Fallibility from ObjectStore::list [\#4930](https://github.com/apache/arrow-rs/pull/4930) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Add ObjectStore::put_opts / Conditional Put [\#4879](https://github.com/apache/arrow-rs/pull/4984) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
 
 **Implemented enhancements:**
 
-- Add AzureConfigKey::ContainerName [\#4629](https://github.com/apache/arrow-rs/issues/4629) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
-- object\_store: multipart ranges for HTTP [\#4612](https://github.com/apache/arrow-rs/issues/4612)
-- Make object\_store::multipart public [\#4569](https://github.com/apache/arrow-rs/issues/4569) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
-- object\_store: Export `ClientConfigKey` and make the `HttpBuilder` more consistent with other builders [\#4515](https://github.com/apache/arrow-rs/issues/4515) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
-- object\_store/InMemory: Make `clone()` non-async [\#4496](https://github.com/apache/arrow-rs/issues/4496) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
-- Add Range to GetResult::File [\#4352](https://github.com/apache/arrow-rs/issues/4352) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
-- Support copy\_if\_not\_exists for Cloudflare R2 \(S3 API\)  [\#4190](https://github.com/apache/arrow-rs/issues/4190)
+- Relax Path Safety on Parse [\#5019](https://github.com/apache/arrow-rs/issues/5019) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- ObjectStore: hard to determine the cause of the error thrown from retry [\#5013](https://github.com/apache/arrow-rs/issues/5013) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- continue existing multi-part upload [\#4961](https://github.com/apache/arrow-rs/issues/4961) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Simplify ObjectStore::List [\#4946](https://github.com/apache/arrow-rs/issues/4946) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Return ETag and Version on Put [\#4934](https://github.com/apache/arrow-rs/issues/4934) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Support Not Signing Requests in AmazonS3 [\#4927](https://github.com/apache/arrow-rs/issues/4927) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Get Object By Version [\#4925](https://github.com/apache/arrow-rs/issues/4925) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Plans for supporting Extension Array to support Fixed shape tensor Array [\#4890](https://github.com/apache/arrow-rs/issues/4890)
+- Conditional Put Support [\#4879](https://github.com/apache/arrow-rs/issues/4879) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- creates\_dir\_if\_not\_present\_append Test is Flaky [\#4872](https://github.com/apache/arrow-rs/issues/4872) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Release object\_store `0.7.1` [\#4858](https://github.com/apache/arrow-rs/issues/4858) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Support User-Defined Object Metadata [\#4754](https://github.com/apache/arrow-rs/issues/4754) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- APIs for directly managing multi-part uploads and saving potential parquet footers [\#4608](https://github.com/apache/arrow-rs/issues/4608)
 
 **Fixed bugs:**
 
-- object\_store documentation is broken [\#4683](https://github.com/apache/arrow-rs/issues/4683) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
-- Exports are not sufficient for configuring some object stores, for example minio running locally [\#4530](https://github.com/apache/arrow-rs/issues/4530) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
-- object\_store: Uploading empty file to S3 results in "411 Length Required" [\#4514](https://github.com/apache/arrow-rs/issues/4514) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
-- GCP doesn't fetch public objects [\#4417](https://github.com/apache/arrow-rs/issues/4417) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- ObjectStore parse\_url Incorrectly Handles URLs with Spaces [\#5017](https://github.com/apache/arrow-rs/issues/5017) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- \[objects-store\]: periods/dots error in GCP bucket [\#4991](https://github.com/apache/arrow-rs/issues/4991) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Azure ImdsManagedIdentityProvider does not work in Azure functions [\#4976](https://github.com/apache/arrow-rs/issues/4976) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Panic when using an azure object store with an invalid access key [\#4972](https://github.com/apache/arrow-rs/issues/4972) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Handle Body Errors in AWS CompleteMultipartUpload [\#4965](https://github.com/apache/arrow-rs/issues/4965) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- ObjectStore multiple\_append Test is Flaky [\#4868](https://github.com/apache/arrow-rs/issues/4868) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- \[objectstore\] Problem with special characters in file path [\#4454](https://github.com/apache/arrow-rs/issues/4454)
 
 **Closed issues:**
 
-- \[object\_store\] when Create a AmazonS3 instance  work with MinIO without set endpoint got error MissingRegion [\#4617](https://github.com/apache/arrow-rs/issues/4617)
-- AWS Profile credentials no longer working in object\_store 0.6.1 [\#4556](https://github.com/apache/arrow-rs/issues/4556) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- Include onelake fabric path for https [\#5000](https://github.com/apache/arrow-rs/issues/5000) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
+- \[object\_store\] Support generating and using signed upload URLs [\#4763](https://github.com/apache/arrow-rs/issues/4763) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)]
 
 **Merged pull requests:**
 
-- Add AzureConfigKey::ContainerName \(\#4629\) [\#4686](https://github.com/apache/arrow-rs/pull/4686) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
-- Fix MSRV CI [\#4671](https://github.com/apache/arrow-rs/pull/4671) ([tustvold](https://github.com/tustvold))
-- Use Config System for Object Store Integration Tests [\#4628](https://github.com/apache/arrow-rs/pull/4628) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
-- Prepare arrow 45 [\#4590](https://github.com/apache/arrow-rs/pull/4590) ([tustvold](https://github.com/tustvold))
-- Add Support for Microsoft Fabric / OneLake [\#4573](https://github.com/apache/arrow-rs/pull/4573) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([vmuddassir-msft](https://github.com/vmuddassir-msft))
-- Cleanup multipart upload trait [\#4572](https://github.com/apache/arrow-rs/pull/4572) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
-- Make object\_store::multipart public [\#4570](https://github.com/apache/arrow-rs/pull/4570) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([yjshen](https://github.com/yjshen))
-- Handle empty S3 payloads \(\#4514\) [\#4518](https://github.com/apache/arrow-rs/pull/4518) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
-- object\_store: Export `ClientConfigKey` and add `HttpBuilder::with_config` [\#4516](https://github.com/apache/arrow-rs/pull/4516) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([thehabbos007](https://github.com/thehabbos007))
-- object\_store: Implement `ObjectStore` for `Arc` [\#4502](https://github.com/apache/arrow-rs/pull/4502) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Turbo87](https://github.com/Turbo87))
-- object\_store/InMemory: Add `fork()` fn and deprecate `clone()` fn [\#4499](https://github.com/apache/arrow-rs/pull/4499) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Turbo87](https://github.com/Turbo87))
-- Bump actions/deploy-pages from 1 to 2 [\#4449](https://github.com/apache/arrow-rs/pull/4449) ([dependabot[bot]](https://github.com/apps/dependabot))
-- gcp: Exclude authorization header when bearer empty [\#4418](https://github.com/apache/arrow-rs/pull/4418) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([vrongmeal](https://github.com/vrongmeal))
-- Support copy\_if\_not\_exists for Cloudflare R2 \(\#4190\) [\#4239](https://github.com/apache/arrow-rs/pull/4239) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Relax path safety \(\#5019\) [\#5020](https://github.com/apache/arrow-rs/pull/5020) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Decode URL paths \(\#5017\) [\#5018](https://github.com/apache/arrow-rs/pull/5018) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- ObjectStore: make error msg thrown from retry more detailed [\#5012](https://github.com/apache/arrow-rs/pull/5012) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([Rachelint](https://github.com/Rachelint))
+- Support onelake fabric paths in parse\_url \(\#5000\) [\#5002](https://github.com/apache/arrow-rs/pull/5002) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Object tagging \(\#4754\)  [\#4999](https://github.com/apache/arrow-rs/pull/4999) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- \[MINOR\] No need to jump to web pages [\#4994](https://github.com/apache/arrow-rs/pull/4994) ([smallzhongfeng](https://github.com/smallzhongfeng))
+- Pushdown list\_with\_offset for GCS [\#4993](https://github.com/apache/arrow-rs/pull/4993) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Support bucket name with `.` when parsing GCS URL \(\#4991\) [\#4992](https://github.com/apache/arrow-rs/pull/4992) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Increase default timeout to 30 seconds [\#4989](https://github.com/apache/arrow-rs/pull/4989) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Conditional Put \(\#4879\)  [\#4984](https://github.com/apache/arrow-rs/pull/4984) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Update quick-xml requirement from 0.30.0 to 0.31.0 in /object\_store [\#4983](https://github.com/apache/arrow-rs/pull/4983) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Bump actions/setup-node from 3 to 4 [\#4982](https://github.com/apache/arrow-rs/pull/4982) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Support ImdsManagedIdentityProvider in Azure Functions \(\#4976\) [\#4977](https://github.com/apache/arrow-rs/pull/4977) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Add MultiPartStore \(\#4961\) \(\#4608\) [\#4971](https://github.com/apache/arrow-rs/pull/4971) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Split gcp Module [\#4956](https://github.com/apache/arrow-rs/pull/4956) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Add module links in docs root [\#4955](https://github.com/apache/arrow-rs/pull/4955) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Prepare arrow 48.0.0 [\#4948](https://github.com/apache/arrow-rs/pull/4948) ([tustvold](https://github.com/tustvold))
+- Allow opting out of request signing \(\#4927\) [\#4929](https://github.com/apache/arrow-rs/pull/4929) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Default connection and request timeouts of 5 seconds [\#4928](https://github.com/apache/arrow-rs/pull/4928) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Support service\_account in ApplicationDefaultCredentials and Use SelfSignedJwt [\#4926](https://github.com/apache/arrow-rs/pull/4926) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Generate `ETag`s for `InMemory` and `LocalFileSystem` \(\#4879\) [\#4922](https://github.com/apache/arrow-rs/pull/4922) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Cleanup `object_store::retry` client error handling [\#4915](https://github.com/apache/arrow-rs/pull/4915) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Fix integration tests [\#4889](https://github.com/apache/arrow-rs/pull/4889) ([tustvold](https://github.com/tustvold))
+- Support Parsing Avro File Headers [\#4888](https://github.com/apache/arrow-rs/pull/4888) ([tustvold](https://github.com/tustvold))
+- Update ring requirement from 0.16 to 0.17 in /object\_store [\#4887](https://github.com/apache/arrow-rs/pull/4887) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Add AWS presigned URL support [\#4876](https://github.com/apache/arrow-rs/pull/4876) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([carols10cents](https://github.com/carols10cents))
+- Flush in creates\_dir\_if\_not\_present\_append \(\#4872\) [\#4874](https://github.com/apache/arrow-rs/pull/4874) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Flush in multiple\_append test \(\#4868\) [\#4869](https://github.com/apache/arrow-rs/pull/4869) [[object-store](https://github.com/apache/arrow-rs/labels/object-store)] ([tustvold](https://github.com/tustvold))
+- Enable new integration tests \(\#4828\) [\#4862](https://github.com/apache/arrow-rs/pull/4862) ([tustvold](https://github.com/tustvold))
+
+
 
 \* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)*
diff --git a/object_store/Cargo.toml b/object_store/Cargo.toml
index b8d4391321fd..2f5157c40e67 100644
--- a/object_store/Cargo.toml
+++ b/object_store/Cargo.toml
@@ -17,7 +17,7 @@
 
 [package]
 name = "object_store"
-version = "0.7.0"
+version = "0.8.0"
 edition = "2021"
 license = "MIT/Apache-2.0"
 readme = "README.md"
@@ -32,10 +32,10 @@ all-features = true
 [dependencies] # In alphabetical order
 async-trait = "0.1.53"
 bytes = "1.0"
-chrono = { version = "0.4.23", default-features = false, features = ["clock"] }
+chrono = { version = "0.4.31", default-features = false, features = ["clock"] }
 futures = "0.3"
 humantime = "2.1"
-itertools = "0.11.0"
+itertools = "0.12.0"
 parking_lot = { version = "0.12" }
 percent-encoding = "2.1"
 snafu = "0.7"
@@ -46,18 +46,13 @@ walkdir = "2"
 # Cloud storage support
 base64 = { version = "0.21", default-features = false, features = ["std"], optional = true }
 hyper = { version = "0.14", default-features = false, optional = true }
-quick-xml = { version = "0.30.0", features = ["serialize", "overlapped-lists"], optional = true }
+quick-xml = { version = "0.31.0", features = ["serialize", "overlapped-lists"], optional = true }
 serde = { version = "1.0", default-features = false, features = ["derive"], optional = true }
 serde_json = { version = "1.0", default-features = false, optional = true }
 rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true }
-reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"], optional = true }
-ring = { version = "0.16", default-features = false, features = ["std"], optional = true }
+reqwest = { version = "0.11", default-features = false, features = ["rustls-tls-native-roots"], optional = true }
+ring = { version = "0.17", default-features = false, features = ["std"], optional = true }
 rustls-pemfile = { version = "1.0", default-features = false, optional = true }
-
-[target.'cfg(not(target_arch = "wasm32"))'.dependencies]
-tokio = { version = "1.25.0", features = ["sync", "macros", "rt", "time", "io-util", "fs"] }
-
-[target.'cfg(target_arch = "wasm32")'.dependencies]
 tokio = { version = "1.25.0", features = ["sync", "macros", "rt", "time", "io-util"] }
 
 [target.'cfg(target_family="unix")'.dev-dependencies]
@@ -69,6 +64,7 @@ azure = ["cloud"]
 gcp = ["cloud", "rustls-pemfile"]
 aws = ["cloud"]
 http = ["cloud"]
+tls-webpki-roots = ["reqwest?/rustls-tls-webpki-roots"]
 
 [dev-dependencies] # In alphabetical order
 tempfile = "3.1.0"
diff --git a/object_store/README.md b/object_store/README.md
index 5b47a65c124f..fd09ec7205af 100644
--- a/object_store/README.md
+++ b/object_store/README.md
@@ -39,7 +39,7 @@ See [docs.rs](https://docs.rs/object_store) for usage instructions
 
 ## Support for `wasm32-unknown-unknown` target
 
-It's possible to build `object_store` for the `wasm32-unknown-unknown` target, however the cloud storage features `aws`, `azure`, and `gcp` are not supported.
+It's possible to build `object_store` for the `wasm32-unknown-unknown` target, however the cloud storage features `aws`, `azure`, `gcp`, and `http` are not supported.
 
 ```
 cargo build -p object_store --target wasm32-unknown-unknown
diff --git a/object_store/dev/release/update_change_log.sh b/object_store/dev/release/update_change_log.sh
index 48835c715552..33eeb33860f6 100755
--- a/object_store/dev/release/update_change_log.sh
+++ b/object_store/dev/release/update_change_log.sh
@@ -29,8 +29,8 @@
 
 set -e
 
-SINCE_TAG="object_store_0.6.1"
-FUTURE_RELEASE="object_store_0.7.0"
+SINCE_TAG="object_store_0.7.1"
+FUTURE_RELEASE="object_store_0.8.0"
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
diff --git a/object_store/dev/release/verify-release-candidate.sh b/object_store/dev/release/verify-release-candidate.sh
index 06a5d8bcb838..b24bd8fbb743 100755
--- a/object_store/dev/release/verify-release-candidate.sh
+++ b/object_store/dev/release/verify-release-candidate.sh
@@ -103,7 +103,7 @@ test_source_distribution() {
 
   # build and test rust
   cargo build
-  cargo test --all
+  cargo test --all --all-features
 
   # verify that the crate can be published to crates.io
   cargo publish --dry-run
diff --git a/object_store/src/aws/builder.rs b/object_store/src/aws/builder.rs
new file mode 100644
index 000000000000..cf9490d96eae
--- /dev/null
+++ b/object_store/src/aws/builder.rs
@@ -0,0 +1,1107 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::aws::client::{S3Client, S3Config};
+use crate::aws::credential::{
+    InstanceCredentialProvider, TaskCredentialProvider, WebIdentityProvider,
+};
+use crate::aws::{
+    AmazonS3, AwsCredential, AwsCredentialProvider, Checksum, S3ConditionalPut, S3CopyIfNotExists,
+    STORE,
+};
+use crate::client::TokenCredentialProvider;
+use crate::config::ConfigValue;
+use crate::{ClientConfigKey, ClientOptions, Result, RetryConfig, StaticCredentialProvider};
+use itertools::Itertools;
+use serde::{Deserialize, Serialize};
+use snafu::{OptionExt, ResultExt, Snafu};
+use std::str::FromStr;
+use std::sync::Arc;
+use tracing::info;
+use url::Url;
+
+/// Default metadata endpoint
+static DEFAULT_METADATA_ENDPOINT: &str = "http://169.254.169.254";
+
+/// A specialized `Error` for object store-related errors
+#[derive(Debug, Snafu)]
+#[allow(missing_docs)]
+enum Error {
+    #[snafu(display("Missing region"))]
+    MissingRegion,
+
+    #[snafu(display("Missing bucket name"))]
+    MissingBucketName,
+
+    #[snafu(display("Missing AccessKeyId"))]
+    MissingAccessKeyId,
+
+    #[snafu(display("Missing SecretAccessKey"))]
+    MissingSecretAccessKey,
+
+    #[snafu(display("Unable parse source url. Url: {}, Error: {}", url, source))]
+    UnableToParseUrl {
+        source: url::ParseError,
+        url: String,
+    },
+
+    #[snafu(display(
+        "Unknown url scheme cannot be parsed into storage location: {}",
+        scheme
+    ))]
+    UnknownUrlScheme { scheme: String },
+
+    #[snafu(display("URL did not match any known pattern for scheme: {}", url))]
+    UrlNotRecognised { url: String },
+
+    #[snafu(display("Configuration key: '{}' is not known.", key))]
+    UnknownConfigurationKey { key: String },
+
+    #[snafu(display("Bucket '{}' not found", bucket))]
+    BucketNotFound { bucket: String },
+
+    #[snafu(display("Failed to resolve region for bucket '{}'", bucket))]
+    ResolveRegion {
+        bucket: String,
+        source: reqwest::Error,
+    },
+
+    #[snafu(display("Failed to parse the region for bucket '{}'", bucket))]
+    RegionParse { bucket: String },
+}
+
+impl From<Error> for crate::Error {
+    fn from(source: Error) -> Self {
+        match source {
+            Error::UnknownConfigurationKey { key } => {
+                Self::UnknownConfigurationKey { store: STORE, key }
+            }
+            _ => Self::Generic {
+                store: STORE,
+                source: Box::new(source),
+            },
+        }
+    }
+}
+
+/// Configure a connection to Amazon S3 using the specified credentials in
+/// the specified Amazon region and bucket.
+///
+/// # Example
+/// ```
+/// # let REGION = "foo";
+/// # let BUCKET_NAME = "foo";
+/// # let ACCESS_KEY_ID = "foo";
+/// # let SECRET_KEY = "foo";
+/// # use object_store::aws::AmazonS3Builder;
+/// let s3 = AmazonS3Builder::new()
+///  .with_region(REGION)
+///  .with_bucket_name(BUCKET_NAME)
+///  .with_access_key_id(ACCESS_KEY_ID)
+///  .with_secret_access_key(SECRET_KEY)
+///  .build();
+/// ```
+#[derive(Debug, Default, Clone)]
+pub struct AmazonS3Builder {
+    /// Access key id
+    access_key_id: Option<String>,
+    /// Secret access_key
+    secret_access_key: Option<String>,
+    /// Region
+    region: Option<String>,
+    /// Bucket name
+    bucket_name: Option<String>,
+    /// Endpoint for communicating with AWS S3
+    endpoint: Option<String>,
+    /// Token to use for requests
+    token: Option<String>,
+    /// Url
+    url: Option<String>,
+    /// Retry config
+    retry_config: RetryConfig,
+    /// When set to true, fallback to IMDSv1
+    imdsv1_fallback: ConfigValue<bool>,
+    /// When set to true, virtual hosted style request has to be used
+    virtual_hosted_style_request: ConfigValue<bool>,
+    /// When set to true, unsigned payload option has to be used
+    unsigned_payload: ConfigValue<bool>,
+    /// Checksum algorithm which has to be used for object integrity check during upload
+    checksum_algorithm: Option<ConfigValue<Checksum>>,
+    /// Metadata endpoint, see <https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html>
+    metadata_endpoint: Option<String>,
+    /// Container credentials URL, see <https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html>
+    container_credentials_relative_uri: Option<String>,
+    /// Client options
+    client_options: ClientOptions,
+    /// Credentials
+    credentials: Option<AwsCredentialProvider>,
+    /// Skip signing requests
+    skip_signature: ConfigValue<bool>,
+    /// Copy if not exists
+    copy_if_not_exists: Option<ConfigValue<S3CopyIfNotExists>>,
+    /// Put precondition
+    conditional_put: Option<ConfigValue<S3ConditionalPut>>,
+    /// Ignore tags
+    disable_tagging: ConfigValue<bool>,
+}
+
+/// Configuration keys for [`AmazonS3Builder`]
+///
+/// Configuration via keys can be done via [`AmazonS3Builder::with_config`]
+///
+/// # Example
+/// ```
+/// # use object_store::aws::{AmazonS3Builder, AmazonS3ConfigKey};
+/// let builder = AmazonS3Builder::new()
+///     .with_config("aws_access_key_id".parse().unwrap(), "my-access-key-id")
+///     .with_config(AmazonS3ConfigKey::DefaultRegion, "my-default-region");
+/// ```
+#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, Serialize, Deserialize)]
+#[non_exhaustive]
+pub enum AmazonS3ConfigKey {
+    /// AWS Access Key
+    ///
+    /// See [`AmazonS3Builder::with_access_key_id`] for details.
+    ///
+    /// Supported keys:
+    /// - `aws_access_key_id`
+    /// - `access_key_id`
+    AccessKeyId,
+
+    /// Secret Access Key
+    ///
+    /// See [`AmazonS3Builder::with_secret_access_key`] for details.
+    ///
+    /// Supported keys:
+    /// - `aws_secret_access_key`
+    /// - `secret_access_key`
+    SecretAccessKey,
+
+    /// Region
+    ///
+    /// See [`AmazonS3Builder::with_region`] for details.
+    ///
+    /// Supported keys:
+    /// - `aws_region`
+    /// - `region`
+    Region,
+
+    /// Default region
+    ///
+    /// See [`AmazonS3Builder::with_region`] for details.
+    ///
+    /// Supported keys:
+    /// - `aws_default_region`
+    /// - `default_region`
+    DefaultRegion,
+
+    /// Bucket name
+    ///
+    /// See [`AmazonS3Builder::with_bucket_name`] for details.
+    ///
+    /// Supported keys:
+    /// - `aws_bucket`
+    /// - `aws_bucket_name`
+    /// - `bucket`
+    /// - `bucket_name`
+    Bucket,
+
+    /// Sets custom endpoint for communicating with AWS S3.
+    ///
+    /// See [`AmazonS3Builder::with_endpoint`] for details.
+    ///
+    /// Supported keys:
+    /// - `aws_endpoint`
+    /// - `aws_endpoint_url`
+    /// - `endpoint`
+    /// - `endpoint_url`
+    Endpoint,
+
+    /// Token to use for requests (passed to underlying provider)
+    ///
+    /// See [`AmazonS3Builder::with_token`] for details.
+    ///
+    /// Supported keys:
+    /// - `aws_session_token`
+    /// - `aws_token`
+    /// - `session_token`
+    /// - `token`
+    Token,
+
+    /// Fall back to ImdsV1
+    ///
+    /// See [`AmazonS3Builder::with_imdsv1_fallback`] for details.
+    ///
+    /// Supported keys:
+    /// - `aws_imdsv1_fallback`
+    /// - `imdsv1_fallback`
+    ImdsV1Fallback,
+
+    /// If virtual hosted style request has to be used
+    ///
+    /// See [`AmazonS3Builder::with_virtual_hosted_style_request`] for details.
+    ///
+    /// Supported keys:
+    /// - `aws_virtual_hosted_style_request`
+    /// - `virtual_hosted_style_request`
+    VirtualHostedStyleRequest,
+
+    /// Avoid computing payload checksum when calculating signature.
+    ///
+    /// See [`AmazonS3Builder::with_unsigned_payload`] for details.
+    ///
+    /// Supported keys:
+    /// - `aws_unsigned_payload`
+    /// - `unsigned_payload`
+    UnsignedPayload,
+
+    /// Set the checksum algorithm for this client
+    ///
+    /// See [`AmazonS3Builder::with_checksum_algorithm`]
+    Checksum,
+
+    /// Set the instance metadata endpoint
+    ///
+    /// See [`AmazonS3Builder::with_metadata_endpoint`] for details.
+    ///
+    /// Supported keys:
+    /// - `aws_metadata_endpoint`
+    /// - `metadata_endpoint`
+    MetadataEndpoint,
+
+    /// Set the container credentials relative URI
+    ///
+    /// <https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html>
+    ContainerCredentialsRelativeUri,
+
+    /// Configure how to provide `copy_if_not_exists`
+    ///
+    /// See [`S3CopyIfNotExists`]
+    CopyIfNotExists,
+
+    /// Configure how to provide conditional put operations
+    ///
+    /// See [`S3ConditionalPut`]
+    ConditionalPut,
+
+    /// Skip signing request
+    SkipSignature,
+
+    /// Disable tagging objects
+    ///
+    /// This can be desirable if not supported by the backing store
+    ///
+    /// Supported keys:
+    /// - `aws_disable_tagging`
+    /// - `disable_tagging`
+    DisableTagging,
+
+    /// Client options
+    Client(ClientConfigKey),
+}
+
+impl AsRef<str> for AmazonS3ConfigKey {
+    fn as_ref(&self) -> &str {
+        match self {
+            Self::AccessKeyId => "aws_access_key_id",
+            Self::SecretAccessKey => "aws_secret_access_key",
+            Self::Region => "aws_region",
+            Self::Bucket => "aws_bucket",
+            Self::Endpoint => "aws_endpoint",
+            Self::Token => "aws_session_token",
+            Self::ImdsV1Fallback => "aws_imdsv1_fallback",
+            Self::VirtualHostedStyleRequest => "aws_virtual_hosted_style_request",
+            Self::DefaultRegion => "aws_default_region",
+            Self::MetadataEndpoint => "aws_metadata_endpoint",
+            Self::UnsignedPayload => "aws_unsigned_payload",
+            Self::Checksum => "aws_checksum_algorithm",
+            Self::ContainerCredentialsRelativeUri => "aws_container_credentials_relative_uri",
+            Self::SkipSignature => "aws_skip_signature",
+            Self::CopyIfNotExists => "aws_copy_if_not_exists",
+            Self::ConditionalPut => "aws_conditional_put",
+            Self::DisableTagging => "aws_disable_tagging",
+            Self::Client(opt) => opt.as_ref(),
+        }
+    }
+}
+
+impl FromStr for AmazonS3ConfigKey {
+    type Err = crate::Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "aws_access_key_id" | "access_key_id" => Ok(Self::AccessKeyId),
+            "aws_secret_access_key" | "secret_access_key" => Ok(Self::SecretAccessKey),
+            "aws_default_region" | "default_region" => Ok(Self::DefaultRegion),
+            "aws_region" | "region" => Ok(Self::Region),
+            "aws_bucket" | "aws_bucket_name" | "bucket_name" | "bucket" => Ok(Self::Bucket),
+            "aws_endpoint_url" | "aws_endpoint" | "endpoint_url" | "endpoint" => Ok(Self::Endpoint),
+            "aws_session_token" | "aws_token" | "session_token" | "token" => Ok(Self::Token),
+            "aws_virtual_hosted_style_request" | "virtual_hosted_style_request" => {
+                Ok(Self::VirtualHostedStyleRequest)
+            }
+            "aws_imdsv1_fallback" | "imdsv1_fallback" => Ok(Self::ImdsV1Fallback),
+            "aws_metadata_endpoint" | "metadata_endpoint" => Ok(Self::MetadataEndpoint),
+            "aws_unsigned_payload" | "unsigned_payload" => Ok(Self::UnsignedPayload),
+            "aws_checksum_algorithm" | "checksum_algorithm" => Ok(Self::Checksum),
+            "aws_container_credentials_relative_uri" => Ok(Self::ContainerCredentialsRelativeUri),
+            "aws_skip_signature" | "skip_signature" => Ok(Self::SkipSignature),
+            "aws_copy_if_not_exists" | "copy_if_not_exists" => Ok(Self::CopyIfNotExists),
+            "aws_conditional_put" | "conditional_put" => Ok(Self::ConditionalPut),
+            "aws_disable_tagging" | "disable_tagging" => Ok(Self::DisableTagging),
+            // Backwards compatibility
+            "aws_allow_http" => Ok(Self::Client(ClientConfigKey::AllowHttp)),
+            _ => match s.parse() {
+                Ok(key) => Ok(Self::Client(key)),
+                Err(_) => Err(Error::UnknownConfigurationKey { key: s.into() }.into()),
+            },
+        }
+    }
+}
+
+impl AmazonS3Builder {
+    /// Create a new [`AmazonS3Builder`] with default values.
+    pub fn new() -> Self {
+        Default::default()
+    }
+
+    /// Fill the [`AmazonS3Builder`] with regular AWS environment variables
+    ///
+    /// Variables extracted from environment:
+    /// * `AWS_ACCESS_KEY_ID` -> access_key_id
+    /// * `AWS_SECRET_ACCESS_KEY` -> secret_access_key
+    /// * `AWS_DEFAULT_REGION` -> region
+    /// * `AWS_ENDPOINT` -> endpoint
+    /// * `AWS_SESSION_TOKEN` -> token
+    /// * `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` -> <https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html>
+    /// * `AWS_ALLOW_HTTP` -> set to "true" to permit HTTP connections without TLS
+    /// # Example
+    /// ```
+    /// use object_store::aws::AmazonS3Builder;
+    ///
+    /// let s3 = AmazonS3Builder::from_env()
+    ///     .with_bucket_name("foo")
+    ///     .build();
+    /// ```
+    pub fn from_env() -> Self {
+        let mut builder: Self = Default::default();
+
+        for (os_key, os_value) in std::env::vars_os() {
+            if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) {
+                if key.starts_with("AWS_") {
+                    if let Ok(config_key) = key.to_ascii_lowercase().parse() {
+                        builder = builder.with_config(config_key, value);
+                    }
+                }
+            }
+        }
+
+        builder
+    }
+
+    /// Parse available connection info form a well-known storage URL.
+    ///
+    /// The supported url schemes are:
+    ///
+    /// - `s3://<bucket>/<path>`
+    /// - `s3a://<bucket>/<path>`
+    /// - `https://s3.<region>.amazonaws.com/<bucket>`
+    /// - `https://<bucket>.s3.<region>.amazonaws.com`
+    /// - `https://ACCOUNT_ID.r2.cloudflarestorage.com/bucket`
+    ///
+    /// Note: Settings derived from the URL will override any others set on this builder
+    ///
+    /// # Example
+    /// ```
+    /// use object_store::aws::AmazonS3Builder;
+    ///
+    /// let s3 = AmazonS3Builder::from_env()
+    ///     .with_url("s3://bucket/path")
+    ///     .build();
+    /// ```
+    pub fn with_url(mut self, url: impl Into<String>) -> Self {
+        self.url = Some(url.into());
+        self
+    }
+
+    /// Set an option on the builder via a key - value pair.
+    pub fn with_config(mut self, key: AmazonS3ConfigKey, value: impl Into<String>) -> Self {
+        match key {
+            AmazonS3ConfigKey::AccessKeyId => self.access_key_id = Some(value.into()),
+            AmazonS3ConfigKey::SecretAccessKey => self.secret_access_key = Some(value.into()),
+            AmazonS3ConfigKey::Region => self.region = Some(value.into()),
+            AmazonS3ConfigKey::Bucket => self.bucket_name = Some(value.into()),
+            AmazonS3ConfigKey::Endpoint => self.endpoint = Some(value.into()),
+            AmazonS3ConfigKey::Token => self.token = Some(value.into()),
+            AmazonS3ConfigKey::ImdsV1Fallback => self.imdsv1_fallback.parse(value),
+            AmazonS3ConfigKey::VirtualHostedStyleRequest => {
+                self.virtual_hosted_style_request.parse(value)
+            }
+            AmazonS3ConfigKey::DefaultRegion => {
+                self.region = self.region.or_else(|| Some(value.into()))
+            }
+            AmazonS3ConfigKey::MetadataEndpoint => self.metadata_endpoint = Some(value.into()),
+            AmazonS3ConfigKey::UnsignedPayload => self.unsigned_payload.parse(value),
+            AmazonS3ConfigKey::Checksum => {
+                self.checksum_algorithm = Some(ConfigValue::Deferred(value.into()))
+            }
+            AmazonS3ConfigKey::ContainerCredentialsRelativeUri => {
+                self.container_credentials_relative_uri = Some(value.into())
+            }
+            AmazonS3ConfigKey::Client(key) => {
+                self.client_options = self.client_options.with_config(key, value)
+            }
+            AmazonS3ConfigKey::SkipSignature => self.skip_signature.parse(value),
+            AmazonS3ConfigKey::DisableTagging => self.disable_tagging.parse(value),
+            AmazonS3ConfigKey::CopyIfNotExists => {
+                self.copy_if_not_exists = Some(ConfigValue::Deferred(value.into()))
+            }
+            AmazonS3ConfigKey::ConditionalPut => {
+                self.conditional_put = Some(ConfigValue::Deferred(value.into()))
+            }
+        };
+        self
+    }
+
+    /// Set an option on the builder via a key - value pair.
+    ///
+    /// This method will return an `UnknownConfigKey` error if key cannot be parsed into [`AmazonS3ConfigKey`].
+    #[deprecated(note = "Use with_config")]
+    pub fn try_with_option(self, key: impl AsRef<str>, value: impl Into<String>) -> Result<Self> {
+        Ok(self.with_config(key.as_ref().parse()?, value))
+    }
+
+    /// Hydrate builder from key value pairs
+    ///
+    /// This method will return an `UnknownConfigKey` error if any key cannot be parsed into [`AmazonS3ConfigKey`].
+    #[deprecated(note = "Use with_config")]
+    #[allow(deprecated)]
+    pub fn try_with_options<I: IntoIterator<Item = (impl AsRef<str>, impl Into<String>)>>(
+        mut self,
+        options: I,
+    ) -> Result<Self> {
+        for (key, value) in options {
+            self = self.try_with_option(key, value)?;
+        }
+        Ok(self)
+    }
+
+    /// Get config value via a [`AmazonS3ConfigKey`].
+    ///
+    /// # Example
+    /// ```
+    /// use object_store::aws::{AmazonS3Builder, AmazonS3ConfigKey};
+    ///
+    /// let builder = AmazonS3Builder::from_env()
+    ///     .with_bucket_name("foo");
+    /// let bucket_name = builder.get_config_value(&AmazonS3ConfigKey::Bucket).unwrap_or_default();
+    /// assert_eq!("foo", &bucket_name);
+    /// ```
+    pub fn get_config_value(&self, key: &AmazonS3ConfigKey) -> Option<String> {
+        match key {
+            AmazonS3ConfigKey::AccessKeyId => self.access_key_id.clone(),
+            AmazonS3ConfigKey::SecretAccessKey => self.secret_access_key.clone(),
+            AmazonS3ConfigKey::Region | AmazonS3ConfigKey::DefaultRegion => self.region.clone(),
+            AmazonS3ConfigKey::Bucket => self.bucket_name.clone(),
+            AmazonS3ConfigKey::Endpoint => self.endpoint.clone(),
+            AmazonS3ConfigKey::Token => self.token.clone(),
+            AmazonS3ConfigKey::ImdsV1Fallback => Some(self.imdsv1_fallback.to_string()),
+            AmazonS3ConfigKey::VirtualHostedStyleRequest => {
+                Some(self.virtual_hosted_style_request.to_string())
+            }
+            AmazonS3ConfigKey::MetadataEndpoint => self.metadata_endpoint.clone(),
+            AmazonS3ConfigKey::UnsignedPayload => Some(self.unsigned_payload.to_string()),
+            AmazonS3ConfigKey::Checksum => {
+                self.checksum_algorithm.as_ref().map(ToString::to_string)
+            }
+            AmazonS3ConfigKey::Client(key) => self.client_options.get_config_value(key),
+            AmazonS3ConfigKey::ContainerCredentialsRelativeUri => {
+                self.container_credentials_relative_uri.clone()
+            }
+            AmazonS3ConfigKey::SkipSignature => Some(self.skip_signature.to_string()),
+            AmazonS3ConfigKey::CopyIfNotExists => {
+                self.copy_if_not_exists.as_ref().map(ToString::to_string)
+            }
+            AmazonS3ConfigKey::ConditionalPut => {
+                self.conditional_put.as_ref().map(ToString::to_string)
+            }
+            AmazonS3ConfigKey::DisableTagging => Some(self.disable_tagging.to_string()),
+        }
+    }
+
+    /// Sets properties on this builder based on a URL
+    ///
+    /// This is a separate member function to allow fallible computation to
+    /// be deferred until [`Self::build`] which in turn allows deriving [`Clone`]
+    fn parse_url(&mut self, url: &str) -> Result<()> {
+        let parsed = Url::parse(url).context(UnableToParseUrlSnafu { url })?;
+        let host = parsed.host_str().context(UrlNotRecognisedSnafu { url })?;
+        match parsed.scheme() {
+            "s3" | "s3a" => self.bucket_name = Some(host.to_string()),
+            "https" => match host.splitn(4, '.').collect_tuple() {
+                Some(("s3", region, "amazonaws", "com")) => {
+                    self.region = Some(region.to_string());
+                    let bucket = parsed.path_segments().into_iter().flatten().next();
+                    if let Some(bucket) = bucket {
+                        self.bucket_name = Some(bucket.into());
+                    }
+                }
+                Some((bucket, "s3", region, "amazonaws.com")) => {
+                    self.bucket_name = Some(bucket.to_string());
+                    self.region = Some(region.to_string());
+                    self.virtual_hosted_style_request = true.into();
+                }
+                Some((account, "r2", "cloudflarestorage", "com")) => {
+                    self.region = Some("auto".to_string());
+                    let endpoint = format!("https://{account}.r2.cloudflarestorage.com");
+                    self.endpoint = Some(endpoint);
+
+                    let bucket = parsed.path_segments().into_iter().flatten().next();
+                    if let Some(bucket) = bucket {
+                        self.bucket_name = Some(bucket.into());
+                    }
+                }
+                _ => return Err(UrlNotRecognisedSnafu { url }.build().into()),
+            },
+            scheme => return Err(UnknownUrlSchemeSnafu { scheme }.build().into()),
+        };
+        Ok(())
+    }
+
+    /// Set the AWS Access Key (required)
+    pub fn with_access_key_id(mut self, access_key_id: impl Into<String>) -> Self {
+        self.access_key_id = Some(access_key_id.into());
+        self
+    }
+
+    /// Set the AWS Secret Access Key (required)
+    pub fn with_secret_access_key(mut self, secret_access_key: impl Into<String>) -> Self {
+        self.secret_access_key = Some(secret_access_key.into());
+        self
+    }
+
+    /// Set the region (e.g. `us-east-1`) (required)
+    pub fn with_region(mut self, region: impl Into<String>) -> Self {
+        self.region = Some(region.into());
+        self
+    }
+
+    /// Set the bucket_name (required)
+    pub fn with_bucket_name(mut self, bucket_name: impl Into<String>) -> Self {
+        self.bucket_name = Some(bucket_name.into());
+        self
+    }
+
+    /// Sets the endpoint for communicating with AWS S3. Default value
+    /// is based on region. The `endpoint` field should be consistent with
+    /// the field `virtual_hosted_style_request'.
+    ///
+    /// For example, this might be set to `"http://localhost:4566:`
+    /// for testing against a localstack instance.
+    /// If `virtual_hosted_style_request` is set to true then `endpoint`
+    /// should have bucket name included.
+    pub fn with_endpoint(mut self, endpoint: impl Into<String>) -> Self {
+        self.endpoint = Some(endpoint.into());
+        self
+    }
+
+    /// Set the token to use for requests (passed to underlying provider)
+    pub fn with_token(mut self, token: impl Into<String>) -> Self {
+        self.token = Some(token.into());
+        self
+    }
+
+    /// Set the credential provider overriding any other options
+    pub fn with_credentials(mut self, credentials: AwsCredentialProvider) -> Self {
+        self.credentials = Some(credentials);
+        self
+    }
+
+    /// Sets what protocol is allowed. If `allow_http` is :
+    /// * false (default):  Only HTTPS are allowed
+    /// * true:  HTTP and HTTPS are allowed
+    pub fn with_allow_http(mut self, allow_http: bool) -> Self {
+        self.client_options = self.client_options.with_allow_http(allow_http);
+        self
+    }
+
+    /// Sets if virtual hosted style request has to be used.
+    /// If `virtual_hosted_style_request` is :
+    /// * false (default):  Path style request is used
+    /// * true:  Virtual hosted style request is used
+    ///
+    /// If the `endpoint` is provided then it should be
+    /// consistent with `virtual_hosted_style_request`.
+    /// i.e. if `virtual_hosted_style_request` is set to true
+    /// then `endpoint` should have bucket name included.
+    pub fn with_virtual_hosted_style_request(mut self, virtual_hosted_style_request: bool) -> Self {
+        self.virtual_hosted_style_request = virtual_hosted_style_request.into();
+        self
+    }
+
+    /// Set the retry configuration
+    pub fn with_retry(mut self, retry_config: RetryConfig) -> Self {
+        self.retry_config = retry_config;
+        self
+    }
+
+    /// By default instance credentials will only be fetched over [IMDSv2], as AWS recommends
+    /// against having IMDSv1 enabled on EC2 instances as it is vulnerable to [SSRF attack]
+    ///
+    /// However, certain deployment environments, such as those running old versions of kube2iam,
+    /// may not support IMDSv2. This option will enable automatic fallback to using IMDSv1
+    /// if the token endpoint returns a 403 error indicating that IMDSv2 is not supported.
+    ///
+    /// This option has no effect if not using instance credentials
+    ///
+    /// [IMDSv2]: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html
+    /// [SSRF attack]: https://aws.amazon.com/blogs/security/defense-in-depth-open-firewalls-reverse-proxies-ssrf-vulnerabilities-ec2-instance-metadata-service/
+    ///
+    pub fn with_imdsv1_fallback(mut self) -> Self {
+        self.imdsv1_fallback = true.into();
+        self
+    }
+
+    /// Sets if unsigned payload option has to be used.
+    /// See [unsigned payload option](https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html)
+    /// * false (default): Signed payload option is used, where the checksum for the request body is computed and included when constructing a canonical request.
+    /// * true: Unsigned payload option is used. `UNSIGNED-PAYLOAD` literal is included when constructing a canonical request,
+    pub fn with_unsigned_payload(mut self, unsigned_payload: bool) -> Self {
+        self.unsigned_payload = unsigned_payload.into();
+        self
+    }
+
+    /// If enabled, [`AmazonS3`] will not fetch credentials and will not sign requests
+    ///
+    /// This can be useful when interacting with public S3 buckets that deny authorized requests
+    pub fn with_skip_signature(mut self, skip_signature: bool) -> Self {
+        self.skip_signature = skip_signature.into();
+        self
+    }
+
+    /// Sets the [checksum algorithm] which has to be used for object integrity check during upload.
+    ///
+    /// [checksum algorithm]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html
+    pub fn with_checksum_algorithm(mut self, checksum_algorithm: Checksum) -> Self {
+        // Convert to String to enable deferred parsing of config
+        self.checksum_algorithm = Some(checksum_algorithm.into());
+        self
+    }
+
+    /// Set the [instance metadata endpoint](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html),
+    /// used primarily within AWS EC2.
+    ///
+    /// This defaults to the IPv4 endpoint: http://169.254.169.254. One can alternatively use the IPv6
+    /// endpoint http://fd00:ec2::254.
+    pub fn with_metadata_endpoint(mut self, endpoint: impl Into<String>) -> Self {
+        self.metadata_endpoint = Some(endpoint.into());
+        self
+    }
+
+    /// Set the proxy_url to be used by the underlying client
+    pub fn with_proxy_url(mut self, proxy_url: impl Into<String>) -> Self {
+        self.client_options = self.client_options.with_proxy_url(proxy_url);
+        self
+    }
+
+    /// Set a trusted proxy CA certificate
+    pub fn with_proxy_ca_certificate(mut self, proxy_ca_certificate: impl Into<String>) -> Self {
+        self.client_options = self
+            .client_options
+            .with_proxy_ca_certificate(proxy_ca_certificate);
+        self
+    }
+
+    /// Set a list of hosts to exclude from proxy connections
+    pub fn with_proxy_excludes(mut self, proxy_excludes: impl Into<String>) -> Self {
+        self.client_options = self.client_options.with_proxy_excludes(proxy_excludes);
+        self
+    }
+
+    /// Sets the client options, overriding any already set
+    pub fn with_client_options(mut self, options: ClientOptions) -> Self {
+        self.client_options = options;
+        self
+    }
+
+    /// Configure how to provide `copy_if_not_exists`
+    pub fn with_copy_if_not_exists(mut self, config: S3CopyIfNotExists) -> Self {
+        self.copy_if_not_exists = Some(config.into());
+        self
+    }
+
+    /// Configure how to provide conditional put operations
+    pub fn with_conditional_put(mut self, config: S3ConditionalPut) -> Self {
+        self.conditional_put = Some(config.into());
+        self
+    }
+
+    /// If set to `true` will ignore any tags provided to put_opts
+    pub fn with_disable_tagging(mut self, ignore: bool) -> Self {
+        self.disable_tagging = ignore.into();
+        self
+    }
+
+    /// Create a [`AmazonS3`] instance from the provided values,
+    /// consuming `self`.
+    pub fn build(mut self) -> Result<AmazonS3> {
+        if let Some(url) = self.url.take() {
+            self.parse_url(&url)?;
+        }
+
+        let bucket = self.bucket_name.context(MissingBucketNameSnafu)?;
+        let region = self.region.context(MissingRegionSnafu)?;
+        let checksum = self.checksum_algorithm.map(|x| x.get()).transpose()?;
+        let copy_if_not_exists = self.copy_if_not_exists.map(|x| x.get()).transpose()?;
+        let put_precondition = self.conditional_put.map(|x| x.get()).transpose()?;
+
+        let credentials = if let Some(credentials) = self.credentials {
+            credentials
+        } else if self.access_key_id.is_some() || self.secret_access_key.is_some() {
+            match (self.access_key_id, self.secret_access_key, self.token) {
+                (Some(key_id), Some(secret_key), token) => {
+                    info!("Using Static credential provider");
+                    let credential = AwsCredential {
+                        key_id,
+                        secret_key,
+                        token,
+                    };
+                    Arc::new(StaticCredentialProvider::new(credential)) as _
+                }
+                (None, Some(_), _) => return Err(Error::MissingAccessKeyId.into()),
+                (Some(_), None, _) => return Err(Error::MissingSecretAccessKey.into()),
+                (None, None, _) => unreachable!(),
+            }
+        } else if let (Ok(token_path), Ok(role_arn)) = (
+            std::env::var("AWS_WEB_IDENTITY_TOKEN_FILE"),
+            std::env::var("AWS_ROLE_ARN"),
+        ) {
+            // TODO: Replace with `AmazonS3Builder::credentials_from_env`
+            info!("Using WebIdentity credential provider");
+
+            let session_name = std::env::var("AWS_ROLE_SESSION_NAME")
+                .unwrap_or_else(|_| "WebIdentitySession".to_string());
+
+            let endpoint = format!("https://sts.{region}.amazonaws.com");
+
+            // Disallow non-HTTPs requests
+            let client = self
+                .client_options
+                .clone()
+                .with_allow_http(false)
+                .client()?;
+
+            let token = WebIdentityProvider {
+                token_path,
+                session_name,
+                role_arn,
+                endpoint,
+            };
+
+            Arc::new(TokenCredentialProvider::new(
+                token,
+                client,
+                self.retry_config.clone(),
+            )) as _
+        } else if let Some(uri) = self.container_credentials_relative_uri {
+            info!("Using Task credential provider");
+            Arc::new(TaskCredentialProvider {
+                url: format!("http://169.254.170.2{uri}"),
+                retry: self.retry_config.clone(),
+                // The instance metadata endpoint is access over HTTP
+                client: self.client_options.clone().with_allow_http(true).client()?,
+                cache: Default::default(),
+            }) as _
+        } else {
+            info!("Using Instance credential provider");
+
+            let token = InstanceCredentialProvider {
+                cache: Default::default(),
+                imdsv1_fallback: self.imdsv1_fallback.get()?,
+                metadata_endpoint: self
+                    .metadata_endpoint
+                    .unwrap_or_else(|| DEFAULT_METADATA_ENDPOINT.into()),
+            };
+
+            Arc::new(TokenCredentialProvider::new(
+                token,
+                self.client_options.metadata_client()?,
+                self.retry_config.clone(),
+            )) as _
+        };
+
+        let endpoint: String;
+        let bucket_endpoint: String;
+
+        // If `endpoint` is provided then its assumed to be consistent with
+        // `virtual_hosted_style_request`. i.e. if `virtual_hosted_style_request` is true then
+        // `endpoint` should have bucket name included.
+        if self.virtual_hosted_style_request.get()? {
+            endpoint = self
+                .endpoint
+                .unwrap_or_else(|| format!("https://{bucket}.s3.{region}.amazonaws.com"));
+            bucket_endpoint = endpoint.clone();
+        } else {
+            endpoint = self
+                .endpoint
+                .unwrap_or_else(|| format!("https://s3.{region}.amazonaws.com"));
+            bucket_endpoint = format!("{endpoint}/{bucket}");
+        }
+
+        let config = S3Config {
+            region,
+            endpoint,
+            bucket,
+            bucket_endpoint,
+            credentials,
+            retry_config: self.retry_config,
+            client_options: self.client_options,
+            sign_payload: !self.unsigned_payload.get()?,
+            skip_signature: self.skip_signature.get()?,
+            disable_tagging: self.disable_tagging.get()?,
+            checksum,
+            copy_if_not_exists,
+            conditional_put: put_precondition,
+        };
+
+        let client = Arc::new(S3Client::new(config)?);
+
+        Ok(AmazonS3 { client })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::collections::HashMap;
+
+    #[test]
+    fn s3_test_config_from_map() {
+        let aws_access_key_id = "object_store:fake_access_key_id".to_string();
+        let aws_secret_access_key = "object_store:fake_secret_key".to_string();
+        let aws_default_region = "object_store:fake_default_region".to_string();
+        let aws_endpoint = "object_store:fake_endpoint".to_string();
+        let aws_session_token = "object_store:fake_session_token".to_string();
+        let options = HashMap::from([
+            ("aws_access_key_id", aws_access_key_id.clone()),
+            ("aws_secret_access_key", aws_secret_access_key),
+            ("aws_default_region", aws_default_region.clone()),
+            ("aws_endpoint", aws_endpoint.clone()),
+            ("aws_session_token", aws_session_token.clone()),
+            ("aws_unsigned_payload", "true".to_string()),
+            ("aws_checksum_algorithm", "sha256".to_string()),
+        ]);
+
+        let builder = options
+            .into_iter()
+            .fold(AmazonS3Builder::new(), |builder, (key, value)| {
+                builder.with_config(key.parse().unwrap(), value)
+            })
+            .with_config(AmazonS3ConfigKey::SecretAccessKey, "new-secret-key");
+
+        assert_eq!(builder.access_key_id.unwrap(), aws_access_key_id.as_str());
+        assert_eq!(builder.secret_access_key.unwrap(), "new-secret-key");
+        assert_eq!(builder.region.unwrap(), aws_default_region);
+        assert_eq!(builder.endpoint.unwrap(), aws_endpoint);
+        assert_eq!(builder.token.unwrap(), aws_session_token);
+        assert_eq!(
+            builder.checksum_algorithm.unwrap().get().unwrap(),
+            Checksum::SHA256
+        );
+        assert!(builder.unsigned_payload.get().unwrap());
+    }
+
+    #[test]
+    fn s3_test_config_get_value() {
+        let aws_access_key_id = "object_store:fake_access_key_id".to_string();
+        let aws_secret_access_key = "object_store:fake_secret_key".to_string();
+        let aws_default_region = "object_store:fake_default_region".to_string();
+        let aws_endpoint = "object_store:fake_endpoint".to_string();
+        let aws_session_token = "object_store:fake_session_token".to_string();
+
+        let builder = AmazonS3Builder::new()
+            .with_config(AmazonS3ConfigKey::AccessKeyId, &aws_access_key_id)
+            .with_config(AmazonS3ConfigKey::SecretAccessKey, &aws_secret_access_key)
+            .with_config(AmazonS3ConfigKey::DefaultRegion, &aws_default_region)
+            .with_config(AmazonS3ConfigKey::Endpoint, &aws_endpoint)
+            .with_config(AmazonS3ConfigKey::Token, &aws_session_token)
+            .with_config(AmazonS3ConfigKey::UnsignedPayload, "true");
+
+        assert_eq!(
+            builder
+                .get_config_value(&AmazonS3ConfigKey::AccessKeyId)
+                .unwrap(),
+            aws_access_key_id
+        );
+        assert_eq!(
+            builder
+                .get_config_value(&AmazonS3ConfigKey::SecretAccessKey)
+                .unwrap(),
+            aws_secret_access_key
+        );
+        assert_eq!(
+            builder
+                .get_config_value(&AmazonS3ConfigKey::DefaultRegion)
+                .unwrap(),
+            aws_default_region
+        );
+        assert_eq!(
+            builder
+                .get_config_value(&AmazonS3ConfigKey::Endpoint)
+                .unwrap(),
+            aws_endpoint
+        );
+        assert_eq!(
+            builder.get_config_value(&AmazonS3ConfigKey::Token).unwrap(),
+            aws_session_token
+        );
+        assert_eq!(
+            builder
+                .get_config_value(&AmazonS3ConfigKey::UnsignedPayload)
+                .unwrap(),
+            "true"
+        );
+    }
+
+    #[test]
+    fn s3_test_urls() {
+        let mut builder = AmazonS3Builder::new();
+        builder.parse_url("s3://bucket/path").unwrap();
+        assert_eq!(builder.bucket_name, Some("bucket".to_string()));
+
+        let mut builder = AmazonS3Builder::new();
+        builder
+            .parse_url("s3://buckets.can.have.dots/path")
+            .unwrap();
+        assert_eq!(
+            builder.bucket_name,
+            Some("buckets.can.have.dots".to_string())
+        );
+
+        let mut builder = AmazonS3Builder::new();
+        builder
+            .parse_url("https://s3.region.amazonaws.com")
+            .unwrap();
+        assert_eq!(builder.region, Some("region".to_string()));
+
+        let mut builder = AmazonS3Builder::new();
+        builder
+            .parse_url("https://s3.region.amazonaws.com/bucket")
+            .unwrap();
+        assert_eq!(builder.region, Some("region".to_string()));
+        assert_eq!(builder.bucket_name, Some("bucket".to_string()));
+
+        let mut builder = AmazonS3Builder::new();
+        builder
+            .parse_url("https://s3.region.amazonaws.com/bucket.with.dot/path")
+            .unwrap();
+        assert_eq!(builder.region, Some("region".to_string()));
+        assert_eq!(builder.bucket_name, Some("bucket.with.dot".to_string()));
+
+        let mut builder = AmazonS3Builder::new();
+        builder
+            .parse_url("https://bucket.s3.region.amazonaws.com")
+            .unwrap();
+        assert_eq!(builder.bucket_name, Some("bucket".to_string()));
+        assert_eq!(builder.region, Some("region".to_string()));
+        assert!(builder.virtual_hosted_style_request.get().unwrap());
+
+        let mut builder = AmazonS3Builder::new();
+        builder
+            .parse_url("https://account123.r2.cloudflarestorage.com/bucket-123")
+            .unwrap();
+
+        assert_eq!(builder.bucket_name, Some("bucket-123".to_string()));
+        assert_eq!(builder.region, Some("auto".to_string()));
+        assert_eq!(
+            builder.endpoint,
+            Some("https://account123.r2.cloudflarestorage.com".to_string())
+        );
+
+        let err_cases = [
+            "mailto://bucket/path",
+            "https://s3.bucket.mydomain.com",
+            "https://s3.bucket.foo.amazonaws.com",
+            "https://bucket.mydomain.region.amazonaws.com",
+            "https://bucket.s3.region.bar.amazonaws.com",
+            "https://bucket.foo.s3.amazonaws.com",
+        ];
+        let mut builder = AmazonS3Builder::new();
+        for case in err_cases {
+            builder.parse_url(case).unwrap_err();
+        }
+    }
+
+    #[tokio::test]
+    async fn s3_test_proxy_url() {
+        let s3 = AmazonS3Builder::new()
+            .with_access_key_id("access_key_id")
+            .with_secret_access_key("secret_access_key")
+            .with_region("region")
+            .with_bucket_name("bucket_name")
+            .with_allow_http(true)
+            .with_proxy_url("https://example.com")
+            .build();
+
+        assert!(s3.is_ok());
+
+        let err = AmazonS3Builder::new()
+            .with_access_key_id("access_key_id")
+            .with_secret_access_key("secret_access_key")
+            .with_region("region")
+            .with_bucket_name("bucket_name")
+            .with_allow_http(true)
+            .with_proxy_url("asdf://example.com")
+            .build()
+            .unwrap_err()
+            .to_string();
+
+        assert_eq!(
+            "Generic HTTP client error: builder error: unknown proxy scheme",
+            err
+        );
+    }
+
+    #[test]
+    fn test_invalid_config() {
+        let err = AmazonS3Builder::new()
+            .with_config(AmazonS3ConfigKey::ImdsV1Fallback, "enabled")
+            .with_bucket_name("bucket")
+            .with_region("region")
+            .build()
+            .unwrap_err()
+            .to_string();
+
+        assert_eq!(
+            err,
+            "Generic Config error: failed to parse \"enabled\" as boolean"
+        );
+
+        let err = AmazonS3Builder::new()
+            .with_config(AmazonS3ConfigKey::Checksum, "md5")
+            .with_bucket_name("bucket")
+            .with_region("region")
+            .build()
+            .unwrap_err()
+            .to_string();
+
+        assert_eq!(
+            err,
+            "Generic Config error: \"md5\" is not a valid checksum algorithm"
+        );
+    }
+}
diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs
index 1c35586f8bc9..3e47abd4bcc5 100644
--- a/object_store/src/aws/client.rs
+++ b/object_store/src/aws/client.rs
@@ -18,33 +18,41 @@
 use crate::aws::checksum::Checksum;
 use crate::aws::credential::{AwsCredential, CredentialExt};
 use crate::aws::{
-    AwsCredentialProvider, S3CopyIfNotExists, STORE, STRICT_PATH_ENCODE_SET,
+    AwsCredentialProvider, S3ConditionalPut, S3CopyIfNotExists, STORE, STRICT_PATH_ENCODE_SET,
 };
 use crate::client::get::GetClient;
+use crate::client::header::HeaderConfig;
+use crate::client::header::{get_put_result, get_version};
 use crate::client::list::ListClient;
-use crate::client::list_response::ListResponse;
 use crate::client::retry::RetryExt;
+use crate::client::s3::{
+    CompleteMultipartUpload, CompleteMultipartUploadResult, InitiateMultipartUploadResult,
+    ListResponse,
+};
 use crate::client::GetOptionsExt;
 use crate::multipart::PartId;
 use crate::path::DELIMITER;
 use crate::{
-    ClientOptions, GetOptions, ListResult, MultipartId, Path, Result, RetryConfig,
+    ClientOptions, GetOptions, ListResult, MultipartId, Path, PutResult, Result, RetryConfig,
 };
 use async_trait::async_trait;
 use base64::prelude::BASE64_STANDARD;
 use base64::Engine;
 use bytes::{Buf, Bytes};
+use hyper::http::HeaderName;
 use itertools::Itertools;
 use percent_encoding::{utf8_percent_encode, PercentEncode};
 use quick_xml::events::{self as xml_events};
 use reqwest::{
     header::{CONTENT_LENGTH, CONTENT_TYPE},
-    Client as ReqwestClient, Method, Response, StatusCode,
+    Client as ReqwestClient, Method, RequestBuilder, Response, StatusCode,
 };
 use serde::{Deserialize, Serialize};
 use snafu::{ResultExt, Snafu};
 use std::sync::Arc;
 
+const VERSION_HEADER: &str = "x-amz-version-id";
+
 /// A specialized `Error` for object store-related errors
 #[derive(Debug, Snafu)]
 #[allow(missing_docs)]
@@ -117,11 +125,19 @@ pub(crate) enum Error {
     #[snafu(display("Error performing complete multipart request: {}", source))]
     CompleteMultipartRequest { source: crate::client::retry::Error },
 
+    #[snafu(display("Error getting complete multipart response body: {}", source))]
+    CompleteMultipartResponseBody { source: reqwest::Error },
+
     #[snafu(display("Got invalid list response: {}", source))]
     InvalidListResponse { source: quick_xml::de::DeError },
 
     #[snafu(display("Got invalid multipart response: {}", source))]
     InvalidMultipartResponse { source: quick_xml::de::DeError },
+
+    #[snafu(display("Unable to extract metadata from headers: {}", source))]
+    Metadata {
+        source: crate::client::header::Error,
+    },
 }
 
 impl From<Error> for crate::Error {
@@ -139,26 +155,6 @@ impl From<Error> for crate::Error {
     }
 }
 
-#[derive(Debug, Deserialize)]
-#[serde(rename_all = "PascalCase")]
-struct InitiateMultipart {
-    upload_id: String,
-}
-
-#[derive(Debug, Serialize)]
-#[serde(rename_all = "PascalCase", rename = "CompleteMultipartUpload")]
-struct CompleteMultipart {
-    part: Vec<MultipartPart>,
-}
-
-#[derive(Debug, Serialize)]
-struct MultipartPart {
-    #[serde(rename = "ETag")]
-    e_tag: String,
-    #[serde(rename = "PartNumber")]
-    part_number: usize,
-}
-
 #[derive(Deserialize)]
 #[serde(rename_all = "PascalCase", rename = "DeleteResult")]
 struct BatchDeleteResponse {
@@ -207,14 +203,65 @@ pub struct S3Config {
     pub retry_config: RetryConfig,
     pub client_options: ClientOptions,
     pub sign_payload: bool,
+    pub skip_signature: bool,
+    pub disable_tagging: bool,
     pub checksum: Option<Checksum>,
     pub copy_if_not_exists: Option<S3CopyIfNotExists>,
+    pub conditional_put: Option<S3ConditionalPut>,
 }
 
 impl S3Config {
-    fn path_url(&self, path: &Path) -> String {
+    pub(crate) fn path_url(&self, path: &Path) -> String {
         format!("{}/{}", self.bucket_endpoint, encode_path(path))
     }
+
+    async fn get_credential(&self) -> Result<Option<Arc<AwsCredential>>> {
+        Ok(match self.skip_signature {
+            false => Some(self.credentials.get_credential().await?),
+            true => None,
+        })
+    }
+}
+
+/// A builder for a put request allowing customisation of the headers and query string
+pub(crate) struct PutRequest<'a> {
+    path: &'a Path,
+    config: &'a S3Config,
+    builder: RequestBuilder,
+    payload_sha256: Option<Vec<u8>>,
+}
+
+impl<'a> PutRequest<'a> {
+    pub fn query<T: Serialize + ?Sized + Sync>(self, query: &T) -> Self {
+        let builder = self.builder.query(query);
+        Self { builder, ..self }
+    }
+
+    pub fn header(self, k: &HeaderName, v: &str) -> Self {
+        let builder = self.builder.header(k, v);
+        Self { builder, ..self }
+    }
+
+    pub async fn send(self) -> Result<PutResult> {
+        let credential = self.config.get_credential().await?;
+
+        let response = self
+            .builder
+            .with_aws_sigv4(
+                credential.as_deref(),
+                &self.config.region,
+                "s3",
+                self.config.sign_payload,
+                self.payload_sha256.as_deref(),
+            )
+            .send_retry(&self.config.retry_config)
+            .await
+            .context(PutRequestSnafu {
+                path: self.path.as_ref(),
+            })?;
+
+        Ok(get_put_result(response.headers(), VERSION_HEADER).context(MetadataSnafu)?)
+    }
 }
 
 #[derive(Debug)]
@@ -234,26 +281,17 @@ impl S3Client {
         &self.config
     }
 
-    async fn get_credential(&self) -> Result<Arc<AwsCredential>> {
-        self.config.credentials.get_credential().await
-    }
-
     /// Make an S3 PUT request <https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html>
-    pub async fn put_request<T: Serialize + ?Sized + Sync>(
-        &self,
-        path: &Path,
-        bytes: Bytes,
-        query: &T,
-    ) -> Result<Response> {
-        let credential = self.get_credential().await?;
+    ///
+    /// Returns the ETag
+    pub fn put_request<'a>(&'a self, path: &'a Path, bytes: Bytes) -> PutRequest<'a> {
         let url = self.config.path_url(path);
         let mut builder = self.client.request(Method::PUT, url);
         let mut payload_sha256 = None;
 
         if let Some(checksum) = self.config().checksum {
             let digest = checksum.digest(&bytes);
-            builder =
-                builder.header(checksum.header_name(), BASE64_STANDARD.encode(&digest));
+            builder = builder.header(checksum.header_name(), BASE64_STANDARD.encode(&digest));
             if checksum == Checksum::SHA256 {
                 payload_sha256 = Some(digest);
             }
@@ -268,22 +306,12 @@ impl S3Client {
             builder = builder.header(CONTENT_TYPE, value);
         }
 
-        let response = builder
-            .query(query)
-            .with_aws_sigv4(
-                credential.as_ref(),
-                &self.config.region,
-                "s3",
-                self.config.sign_payload,
-                payload_sha256.as_deref(),
-            )
-            .send_retry(&self.config.retry_config)
-            .await
-            .context(PutRequestSnafu {
-                path: path.as_ref(),
-            })?;
-
-        Ok(response)
+        PutRequest {
+            path,
+            builder,
+            payload_sha256,
+            config: &self.config,
+        }
     }
 
     /// Make an S3 Delete request <https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObject.html>
@@ -292,14 +320,14 @@ impl S3Client {
         path: &Path,
         query: &T,
     ) -> Result<()> {
-        let credential = self.get_credential().await?;
+        let credential = self.config.get_credential().await?;
         let url = self.config.path_url(path);
 
         self.client
             .request(Method::DELETE, url)
             .query(query)
             .with_aws_sigv4(
-                credential.as_ref(),
+                credential.as_deref(),
                 &self.config.region,
                 "s3",
                 self.config.sign_payload,
@@ -321,25 +349,20 @@ impl S3Client {
     /// there was an error for a certain path, the error will be returned in the
     /// vector. If there was an issue with making the overall request, an error
     /// will be returned at the top level.
-    pub async fn bulk_delete_request(
-        &self,
-        paths: Vec<Path>,
-    ) -> Result<Vec<Result<Path>>> {
+    pub async fn bulk_delete_request(&self, paths: Vec<Path>) -> Result<Vec<Result<Path>>> {
         if paths.is_empty() {
             return Ok(Vec::new());
         }
 
-        let credential = self.get_credential().await?;
+        let credential = self.config.get_credential().await?;
         let url = format!("{}?delete", self.config.bucket_endpoint);
 
         let mut buffer = Vec::new();
         let mut writer = quick_xml::Writer::new(&mut buffer);
         writer
             .write_event(xml_events::Event::Start(
-                xml_events::BytesStart::new("Delete").with_attributes([(
-                    "xmlns",
-                    "http://s3.amazonaws.com/doc/2006-03-01/",
-                )]),
+                xml_events::BytesStart::new("Delete")
+                    .with_attributes([("xmlns", "http://s3.amazonaws.com/doc/2006-03-01/")]),
             ))
             .unwrap();
         for path in &paths {
@@ -390,7 +413,7 @@ impl S3Client {
             .header(CONTENT_TYPE, "application/xml")
             .body(body)
             .with_aws_sigv4(
-                credential.as_ref(),
+                credential.as_deref(),
                 &self.config.region,
                 "s3",
                 self.config.sign_payload,
@@ -403,9 +426,11 @@ impl S3Client {
             .await
             .context(DeleteObjectsResponseSnafu {})?;
 
-        let response: BatchDeleteResponse = quick_xml::de::from_reader(response.reader())
-            .map_err(|err| Error::InvalidDeleteObjectsResponse {
-                source: Box::new(err),
+        let response: BatchDeleteResponse =
+            quick_xml::de::from_reader(response.reader()).map_err(|err| {
+                Error::InvalidDeleteObjectsResponse {
+                    source: Box::new(err),
+                }
             })?;
 
         // Assume all were ok, then fill in errors. This guarantees output order
@@ -413,11 +438,10 @@ impl S3Client {
         let mut results: Vec<Result<Path>> = paths.iter().cloned().map(Ok).collect();
         for content in response.content.into_iter() {
             if let DeleteObjectResult::Error(error) = content {
-                let path = Path::parse(&error.key).map_err(|err| {
-                    Error::InvalidDeleteObjectsResponse {
+                let path =
+                    Path::parse(&error.key).map_err(|err| Error::InvalidDeleteObjectsResponse {
                         source: Box::new(err),
-                    }
-                })?;
+                    })?;
                 let i = paths.iter().find_position(|&p| p == &path).unwrap().0;
                 results[i] = Err(Error::from(error).into());
             }
@@ -427,13 +451,8 @@ impl S3Client {
     }
 
     /// Make an S3 Copy request <https://docs.aws.amazon.com/AmazonS3/latest/API/API_CopyObject.html>
-    pub async fn copy_request(
-        &self,
-        from: &Path,
-        to: &Path,
-        overwrite: bool,
-    ) -> Result<()> {
-        let credential = self.get_credential().await?;
+    pub async fn copy_request(&self, from: &Path, to: &Path, overwrite: bool) -> Result<()> {
+        let credential = self.config.get_credential().await?;
         let url = self.config.path_url(to);
         let source = format!("{}/{}", self.config.bucket, encode_path(from));
 
@@ -449,9 +468,7 @@ impl S3Client {
                 }
                 None => {
                     return Err(crate::Error::NotSupported {
-                        source: "S3 does not support copy-if-not-exists"
-                            .to_string()
-                            .into(),
+                        source: "S3 does not support copy-if-not-exists".to_string().into(),
                     })
                 }
             }
@@ -459,7 +476,7 @@ impl S3Client {
 
         builder
             .with_aws_sigv4(
-                credential.as_ref(),
+                credential.as_deref(),
                 &self.config.region,
                 "s3",
                 self.config.sign_payload,
@@ -483,14 +500,14 @@ impl S3Client {
     }
 
     pub async fn create_multipart(&self, location: &Path) -> Result<MultipartId> {
-        let credential = self.get_credential().await?;
+        let credential = self.config.get_credential().await?;
         let url = format!("{}?uploads=", self.config.path_url(location),);
 
         let response = self
             .client
             .request(Method::POST, url)
             .with_aws_sigv4(
-                credential.as_ref(),
+                credential.as_deref(),
                 &self.config.region,
                 "s3",
                 self.config.sign_payload,
@@ -503,39 +520,51 @@ impl S3Client {
             .await
             .context(CreateMultipartResponseBodySnafu)?;
 
-        let response: InitiateMultipart = quick_xml::de::from_reader(response.reader())
-            .context(InvalidMultipartResponseSnafu)?;
+        let response: InitiateMultipartUploadResult =
+            quick_xml::de::from_reader(response.reader()).context(InvalidMultipartResponseSnafu)?;
 
         Ok(response.upload_id)
     }
 
+    pub async fn put_part(
+        &self,
+        path: &Path,
+        upload_id: &MultipartId,
+        part_idx: usize,
+        data: Bytes,
+    ) -> Result<PartId> {
+        let part = (part_idx + 1).to_string();
+
+        let result = self
+            .put_request(path, data)
+            .query(&[("partNumber", &part), ("uploadId", upload_id)])
+            .send()
+            .await?;
+
+        Ok(PartId {
+            content_id: result.e_tag.unwrap(),
+        })
+    }
+
     pub async fn complete_multipart(
         &self,
         location: &Path,
         upload_id: &str,
         parts: Vec<PartId>,
-    ) -> Result<()> {
-        let parts = parts
-            .into_iter()
-            .enumerate()
-            .map(|(part_idx, part)| MultipartPart {
-                e_tag: part.content_id,
-                part_number: part_idx + 1,
-            })
-            .collect();
-
-        let request = CompleteMultipart { part: parts };
+    ) -> Result<PutResult> {
+        let request = CompleteMultipartUpload::from(parts);
         let body = quick_xml::se::to_string(&request).unwrap();
 
-        let credential = self.get_credential().await?;
+        let credential = self.config.get_credential().await?;
         let url = self.config.path_url(location);
 
-        self.client
+        let response = self
+            .client
             .request(Method::POST, url)
             .query(&[("uploadId", upload_id)])
             .body(body)
             .with_aws_sigv4(
-                credential.as_ref(),
+                credential.as_deref(),
                 &self.config.region,
                 "s3",
                 self.config.sign_payload,
@@ -545,7 +574,42 @@ impl S3Client {
             .await
             .context(CompleteMultipartRequestSnafu)?;
 
-        Ok(())
+        let version = get_version(response.headers(), VERSION_HEADER).context(MetadataSnafu)?;
+
+        let data = response
+            .bytes()
+            .await
+            .context(CompleteMultipartResponseBodySnafu)?;
+
+        let response: CompleteMultipartUploadResult =
+            quick_xml::de::from_reader(data.reader()).context(InvalidMultipartResponseSnafu)?;
+
+        Ok(PutResult {
+            e_tag: Some(response.e_tag),
+            version,
+        })
+    }
+
+    #[cfg(test)]
+    pub async fn get_object_tagging(&self, path: &Path) -> Result<Response> {
+        let credential = self.config.get_credential().await?;
+        let url = format!("{}?tagging", self.config.path_url(path));
+        let response = self
+            .client
+            .request(Method::GET, url)
+            .with_aws_sigv4(
+                credential.as_deref(),
+                &self.config.region,
+                "s3",
+                self.config.sign_payload,
+                None,
+            )
+            .send_retry(&self.config.retry_config)
+            .await
+            .context(GetRequestSnafu {
+                path: path.as_ref(),
+            })?;
+        Ok(response)
     }
 }
 
@@ -553,26 +617,31 @@ impl S3Client {
 impl GetClient for S3Client {
     const STORE: &'static str = STORE;
 
+    const HEADER_CONFIG: HeaderConfig = HeaderConfig {
+        etag_required: false,
+        last_modified_required: false,
+        version_header: Some(VERSION_HEADER),
+    };
+
     /// Make an S3 GET request <https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObject.html>
-    async fn get_request(
-        &self,
-        path: &Path,
-        options: GetOptions,
-        head: bool,
-    ) -> Result<Response> {
-        let credential = self.get_credential().await?;
+    async fn get_request(&self, path: &Path, options: GetOptions) -> Result<Response> {
+        let credential = self.config.get_credential().await?;
         let url = self.config.path_url(path);
-        let method = match head {
+        let method = match options.head {
             true => Method::HEAD,
             false => Method::GET,
         };
 
-        let builder = self.client.request(method, url);
+        let mut builder = self.client.request(method, url);
+
+        if let Some(v) = &options.version {
+            builder = builder.query(&[("versionId", v)])
+        }
 
         let response = builder
             .with_get_options(options)
             .with_aws_sigv4(
-                credential.as_ref(),
+                credential.as_deref(),
                 &self.config.region,
                 "s3",
                 self.config.sign_payload,
@@ -598,7 +667,7 @@ impl ListClient for S3Client {
         token: Option<&str>,
         offset: Option<&str>,
     ) -> Result<(ListResult, Option<String>)> {
-        let credential = self.get_credential().await?;
+        let credential = self.config.get_credential().await?;
         let url = self.config.bucket_endpoint.clone();
 
         let mut query = Vec::with_capacity(4);
@@ -626,7 +695,7 @@ impl ListClient for S3Client {
             .request(Method::GET, &url)
             .query(&query)
             .with_aws_sigv4(
-                credential.as_ref(),
+                credential.as_deref(),
                 &self.config.region,
                 "s3",
                 self.config.sign_payload,
@@ -639,8 +708,8 @@ impl ListClient for S3Client {
             .await
             .context(ListResponseBodySnafu)?;
 
-        let mut response: ListResponse = quick_xml::de::from_reader(response.reader())
-            .context(InvalidListResponseSnafu)?;
+        let mut response: ListResponse =
+            quick_xml::de::from_reader(response.reader()).context(InvalidListResponseSnafu)?;
         let token = response.next_continuation_token.take();
 
         Ok((response.try_into()?, token))
diff --git a/object_store/src/aws/credential.rs b/object_store/src/aws/credential.rs
index be0ffa578d13..d290da838d78 100644
--- a/object_store/src/aws/credential.rs
+++ b/object_store/src/aws/credential.rs
@@ -30,15 +30,14 @@ use reqwest::{Client, Method, Request, RequestBuilder, StatusCode};
 use serde::Deserialize;
 use std::collections::BTreeMap;
 use std::sync::Arc;
-use std::time::Instant;
+use std::time::{Duration, Instant};
 use tracing::warn;
 use url::Url;
 
 type StdError = Box<dyn std::error::Error + Send + Sync>;
 
 /// SHA256 hash of empty string
-static EMPTY_SHA256_HASH: &str =
-    "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
+static EMPTY_SHA256_HASH: &str = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
 static UNSIGNED_PAYLOAD: &str = "UNSIGNED-PAYLOAD";
 static STREAMING_PAYLOAD: &str = "STREAMING-AWS4-HMAC-SHA256-PAYLOAD";
 
@@ -57,13 +56,7 @@ impl AwsCredential {
     /// Signs a string
     ///
     /// <https://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html>
-    fn sign(
-        &self,
-        to_sign: &str,
-        date: DateTime<Utc>,
-        region: &str,
-        service: &str,
-    ) -> String {
+    fn sign(&self, to_sign: &str, date: DateTime<Utc>, region: &str, service: &str) -> String {
         let date_string = date.format("%Y%m%d").to_string();
         let date_hmac = hmac_sha256(format!("AWS4{}", self.secret_key), date_string);
         let region_hmac = hmac_sha256(date_hmac, region);
@@ -89,6 +82,7 @@ const DATE_HEADER: &str = "x-amz-date";
 const HASH_HEADER: &str = "x-amz-content-sha256";
 const TOKEN_HEADER: &str = "x-amz-security-token";
 const AUTH_HEADER: &str = "authorization";
+const ALGORITHM: &str = "AWS4-HMAC-SHA256";
 
 impl<'a> AwsAuthorizer<'a> {
     /// Create a new [`AwsAuthorizer`]
@@ -154,21 +148,110 @@ impl<'a> AwsAuthorizer<'a> {
         let header_digest = HeaderValue::from_str(&digest).unwrap();
         request.headers_mut().insert(HASH_HEADER, header_digest);
 
-        // Each path segment must be URI-encoded twice (except for Amazon S3 which only gets URI-encoded once).
+        let (signed_headers, canonical_headers) = canonicalize_headers(request.headers());
+
+        let scope = self.scope(date);
+
+        let string_to_sign = self.string_to_sign(
+            date,
+            &scope,
+            request.method(),
+            request.url(),
+            &canonical_headers,
+            &signed_headers,
+            &digest,
+        );
+
+        // sign the string
+        let signature = self
+            .credential
+            .sign(&string_to_sign, date, self.region, self.service);
+
+        // build the actual auth header
+        let authorisation = format!(
+            "{} Credential={}/{}, SignedHeaders={}, Signature={}",
+            ALGORITHM, self.credential.key_id, scope, signed_headers, signature
+        );
+
+        let authorization_val = HeaderValue::from_str(&authorisation).unwrap();
+        request.headers_mut().insert(AUTH_HEADER, authorization_val);
+    }
+
+    pub(crate) fn sign(&self, method: Method, url: &mut Url, expires_in: Duration) {
+        let date = self.date.unwrap_or_else(Utc::now);
+        let scope = self.scope(date);
+
+        // https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html
+        url.query_pairs_mut()
+            .append_pair("X-Amz-Algorithm", ALGORITHM)
+            .append_pair(
+                "X-Amz-Credential",
+                &format!("{}/{}", self.credential.key_id, scope),
+            )
+            .append_pair("X-Amz-Date", &date.format("%Y%m%dT%H%M%SZ").to_string())
+            .append_pair("X-Amz-Expires", &expires_in.as_secs().to_string())
+            .append_pair("X-Amz-SignedHeaders", "host");
+
+        // For S3, you must include the X-Amz-Security-Token query parameter in the URL if
+        // using credentials sourced from the STS service.
+        if let Some(ref token) = self.credential.token {
+            url.query_pairs_mut()
+                .append_pair("X-Amz-Security-Token", token);
+        }
+
+        // We don't have a payload; the user is going to send the payload directly themselves.
+        let digest = UNSIGNED_PAYLOAD;
+
+        let host = &url[url::Position::BeforeHost..url::Position::AfterPort].to_string();
+        let mut headers = HeaderMap::new();
+        let host_val = HeaderValue::from_str(host).unwrap();
+        headers.insert("host", host_val);
+
+        let (signed_headers, canonical_headers) = canonicalize_headers(&headers);
+
+        let string_to_sign = self.string_to_sign(
+            date,
+            &scope,
+            &method,
+            url,
+            &canonical_headers,
+            &signed_headers,
+            digest,
+        );
+
+        let signature = self
+            .credential
+            .sign(&string_to_sign, date, self.region, self.service);
+
+        url.query_pairs_mut()
+            .append_pair("X-Amz-Signature", &signature);
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    fn string_to_sign(
+        &self,
+        date: DateTime<Utc>,
+        scope: &str,
+        request_method: &Method,
+        url: &Url,
+        canonical_headers: &str,
+        signed_headers: &str,
+        digest: &str,
+    ) -> String {
+        // Each path segment must be URI-encoded twice (except for Amazon S3 which only gets
+        // URI-encoded once).
         // see https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
         let canonical_uri = match self.service {
-            "s3" => request.url().path().to_string(),
-            _ => utf8_percent_encode(request.url().path(), &STRICT_PATH_ENCODE_SET)
-                .to_string(),
+            "s3" => url.path().to_string(),
+            _ => utf8_percent_encode(url.path(), &STRICT_PATH_ENCODE_SET).to_string(),
         };
 
-        let (signed_headers, canonical_headers) = canonicalize_headers(request.headers());
-        let canonical_query = canonicalize_query(request.url());
+        let canonical_query = canonicalize_query(url);
 
         // https://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
         let canonical_request = format!(
             "{}\n{}\n{}\n{}\n{}\n{}",
-            request.method().as_str(),
+            request_method.as_str(),
             canonical_uri,
             canonical_query,
             canonical_headers,
@@ -177,33 +260,23 @@ impl<'a> AwsAuthorizer<'a> {
         );
 
         let hashed_canonical_request = hex_digest(canonical_request.as_bytes());
-        let scope = format!(
-            "{}/{}/{}/aws4_request",
-            date.format("%Y%m%d"),
-            self.region,
-            self.service
-        );
 
-        let string_to_sign = format!(
-            "AWS4-HMAC-SHA256\n{}\n{}\n{}",
+        format!(
+            "{}\n{}\n{}\n{}",
+            ALGORITHM,
             date.format("%Y%m%dT%H%M%SZ"),
             scope,
             hashed_canonical_request
-        );
-
-        // sign the string
-        let signature =
-            self.credential
-                .sign(&string_to_sign, date, self.region, self.service);
-
-        // build the actual auth header
-        let authorisation = format!(
-            "AWS4-HMAC-SHA256 Credential={}/{}, SignedHeaders={}, Signature={}",
-            self.credential.key_id, scope, signed_headers, signature
-        );
+        )
+    }
 
-        let authorization_val = HeaderValue::from_str(&authorisation).unwrap();
-        request.headers_mut().insert(AUTH_HEADER, authorization_val);
+    fn scope(&self, date: DateTime<Utc>) -> String {
+        format!(
+            "{}/{}/{}/aws4_request",
+            date.format("%Y%m%d"),
+            self.region,
+            self.service
+        )
     }
 }
 
@@ -211,7 +284,7 @@ pub trait CredentialExt {
     /// Sign a request <https://docs.aws.amazon.com/general/latest/gr/sigv4_signing.html>
     fn with_aws_sigv4(
         self,
-        credential: &AwsCredential,
+        credential: Option<&AwsCredential>,
         region: &str,
         service: &str,
         sign_payload: bool,
@@ -222,20 +295,25 @@ pub trait CredentialExt {
 impl CredentialExt for RequestBuilder {
     fn with_aws_sigv4(
         self,
-        credential: &AwsCredential,
+        credential: Option<&AwsCredential>,
         region: &str,
         service: &str,
         sign_payload: bool,
         payload_sha256: Option<&[u8]>,
     ) -> Self {
-        let (client, request) = self.build_split();
-        let mut request = request.expect("request valid");
+        match credential {
+            Some(credential) => {
+                let (client, request) = self.build_split();
+                let mut request = request.expect("request valid");
 
-        AwsAuthorizer::new(credential, service, region)
-            .with_sign_payload(sign_payload)
-            .authorize(&mut request, payload_sha256);
+                AwsAuthorizer::new(credential, service, region)
+                    .with_sign_payload(sign_payload)
+                    .authorize(&mut request, payload_sha256);
 
-        Self::from_parts(client, request)
+                Self::from_parts(client, request)
+            }
+            None => self,
+        }
     }
 }
 
@@ -436,9 +514,7 @@ async fn instance_creds(
 
     let token = match token_result {
         Ok(t) => Some(t.text().await?),
-        Err(e)
-            if imdsv1_fallback && matches!(e.status(), Some(StatusCode::FORBIDDEN)) =>
-        {
+        Err(e) if imdsv1_fallback && matches!(e.status(), Some(StatusCode::FORBIDDEN)) => {
             warn!("received 403 from metadata endpoint, falling back to IMDSv1");
             None
         }
@@ -460,8 +536,7 @@ async fn instance_creds(
         creds_request = creds_request.header(AWS_EC2_METADATA_TOKEN_HEADER, token);
     }
 
-    let creds: InstanceCredentials =
-        creds_request.send_retry(retry_config).await?.json().await?;
+    let creds: InstanceCredentials = creds_request.send_retry(retry_config).await?.json().await?;
 
     let now = Utc::now();
     let ttl = (creds.expiration - now).to_std().unwrap_or_default();
@@ -574,8 +649,7 @@ async fn task_credential(
     retry: &RetryConfig,
     url: &str,
 ) -> Result<TemporaryToken<Arc<AwsCredential>>, StdError> {
-    let creds: InstanceCredentials =
-        client.get(url).send_retry(retry).await?.json().await?;
+    let creds: InstanceCredentials = client.get(url).send_retry(retry).await?.json().await?;
 
     let now = Utc::now();
     let ttl = (creds.expiration - now).to_std().unwrap_or_default();
@@ -667,7 +741,46 @@ mod tests {
         };
 
         authorizer.authorize(&mut request, None);
-        assert_eq!(request.headers().get(AUTH_HEADER).unwrap(), "AWS4-HMAC-SHA256 Credential=AKIAIOSFODNN7EXAMPLE/20220806/us-east-1/ec2/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=653c3d8ea261fd826207df58bc2bb69fbb5003e9eb3c0ef06e4a51f2a81d8699")
+        assert_eq!(request.headers().get(AUTH_HEADER).unwrap(), "AWS4-HMAC-SHA256 Credential=AKIAIOSFODNN7EXAMPLE/20220806/us-east-1/ec2/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date, Signature=653c3d8ea261fd826207df58bc2bb69fbb5003e9eb3c0ef06e4a51f2a81d8699");
+    }
+
+    #[test]
+    fn signed_get_url() {
+        // Values from https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html
+        let credential = AwsCredential {
+            key_id: "AKIAIOSFODNN7EXAMPLE".to_string(),
+            secret_key: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY".to_string(),
+            token: None,
+        };
+
+        let date = DateTime::parse_from_rfc3339("2013-05-24T00:00:00Z")
+            .unwrap()
+            .with_timezone(&Utc);
+
+        let authorizer = AwsAuthorizer {
+            date: Some(date),
+            credential: &credential,
+            service: "s3",
+            region: "us-east-1",
+            sign_payload: false,
+        };
+
+        let mut url = Url::parse("https://examplebucket.s3.amazonaws.com/test.txt").unwrap();
+        authorizer.sign(Method::GET, &mut url, Duration::from_secs(86400));
+
+        assert_eq!(
+            url,
+            Url::parse(
+                "https://examplebucket.s3.amazonaws.com/test.txt?\
+                X-Amz-Algorithm=AWS4-HMAC-SHA256&\
+                X-Amz-Credential=AKIAIOSFODNN7EXAMPLE%2F20130524%2Fus-east-1%2Fs3%2Faws4_request&\
+                X-Amz-Date=20130524T000000Z&\
+                X-Amz-Expires=86400&\
+                X-Amz-SignedHeaders=host&\
+                X-Amz-Signature=aeeed9bbccd4d02ee5c0109b86d86835f995330da4c265957d157751f604d404"
+            )
+            .unwrap()
+        );
     }
 
     #[test]
diff --git a/object_store/src/aws/mod.rs b/object_store/src/aws/mod.rs
index db3e1b9a4bbe..cbb3cffdf494 100644
--- a/object_store/src/aws/mod.rs
+++ b/object_store/src/aws/mod.rs
@@ -35,50 +35,46 @@ use async_trait::async_trait;
 use bytes::Bytes;
 use futures::stream::BoxStream;
 use futures::{StreamExt, TryStreamExt};
-use itertools::Itertools;
-use serde::{Deserialize, Serialize};
-use snafu::{ensure, OptionExt, ResultExt, Snafu};
-use std::str::FromStr;
-use std::sync::Arc;
+use reqwest::header::{HeaderName, IF_MATCH, IF_NONE_MATCH};
+use reqwest::Method;
+use std::{sync::Arc, time::Duration};
 use tokio::io::AsyncWrite;
-use tracing::info;
 use url::Url;
 
-use crate::aws::client::{S3Client, S3Config};
-use crate::aws::credential::{
-    InstanceCredentialProvider, TaskCredentialProvider, WebIdentityProvider,
-};
+use crate::aws::client::S3Client;
 use crate::client::get::GetClientExt;
 use crate::client::list::ListClientExt;
-use crate::client::{
-    ClientConfigKey, CredentialProvider, StaticCredentialProvider,
-    TokenCredentialProvider,
-};
-use crate::config::ConfigValue;
-use crate::multipart::{PartId, PutPart, WriteMultiPart};
+use crate::client::CredentialProvider;
+use crate::multipart::{MultiPartStore, PartId, PutPart, WriteMultiPart};
+use crate::signer::Signer;
 use crate::{
-    ClientOptions, GetOptions, GetResult, ListResult, MultipartId, ObjectMeta,
-    ObjectStore, Path, Result, RetryConfig,
+    Error, GetOptions, GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore, Path, PutMode,
+    PutOptions, PutResult, Result,
 };
 
+static TAGS_HEADER: HeaderName = HeaderName::from_static("x-amz-tagging");
+
+mod builder;
 mod checksum;
 mod client;
-mod copy;
 mod credential;
+mod precondition;
+mod resolve;
 
+pub use builder::{AmazonS3Builder, AmazonS3ConfigKey};
 pub use checksum::Checksum;
-pub use copy::S3CopyIfNotExists;
+pub use precondition::{S3ConditionalPut, S3CopyIfNotExists};
+pub use resolve::resolve_bucket_region;
 
 // http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
 //
 // Do not URI-encode any of the unreserved characters that RFC 3986 defines:
 // A-Z, a-z, 0-9, hyphen ( - ), underscore ( _ ), period ( . ), and tilde ( ~ ).
-pub(crate) const STRICT_ENCODE_SET: percent_encoding::AsciiSet =
-    percent_encoding::NON_ALPHANUMERIC
-        .remove(b'-')
-        .remove(b'.')
-        .remove(b'_')
-        .remove(b'~');
+pub(crate) const STRICT_ENCODE_SET: percent_encoding::AsciiSet = percent_encoding::NON_ALPHANUMERIC
+    .remove(b'-')
+    .remove(b'.')
+    .remove(b'_')
+    .remove(b'~');
 
 /// This struct is used to maintain the URI path encoding
 const STRICT_PATH_ENCODE_SET: percent_encoding::AsciiSet = STRICT_ENCODE_SET.remove(b'/');
@@ -89,109 +85,6 @@ const STORE: &str = "S3";
 pub type AwsCredentialProvider = Arc<dyn CredentialProvider<Credential = AwsCredential>>;
 pub use credential::{AwsAuthorizer, AwsCredential};
 
-/// Default metadata endpoint
-static DEFAULT_METADATA_ENDPOINT: &str = "http://169.254.169.254";
-
-/// A specialized `Error` for object store-related errors
-#[derive(Debug, Snafu)]
-#[allow(missing_docs)]
-enum Error {
-    #[snafu(display("Missing region"))]
-    MissingRegion,
-
-    #[snafu(display("Missing bucket name"))]
-    MissingBucketName,
-
-    #[snafu(display("Missing AccessKeyId"))]
-    MissingAccessKeyId,
-
-    #[snafu(display("Missing SecretAccessKey"))]
-    MissingSecretAccessKey,
-
-    #[snafu(display("ETag Header missing from response"))]
-    MissingEtag,
-
-    #[snafu(display("Received header containing non-ASCII data"))]
-    BadHeader { source: reqwest::header::ToStrError },
-
-    #[snafu(display("Unable parse source url. Url: {}, Error: {}", url, source))]
-    UnableToParseUrl {
-        source: url::ParseError,
-        url: String,
-    },
-
-    #[snafu(display(
-        "Unknown url scheme cannot be parsed into storage location: {}",
-        scheme
-    ))]
-    UnknownUrlScheme { scheme: String },
-
-    #[snafu(display("URL did not match any known pattern for scheme: {}", url))]
-    UrlNotRecognised { url: String },
-
-    #[snafu(display("Configuration key: '{}' is not known.", key))]
-    UnknownConfigurationKey { key: String },
-
-    #[snafu(display("Bucket '{}' not found", bucket))]
-    BucketNotFound { bucket: String },
-
-    #[snafu(display("Failed to resolve region for bucket '{}'", bucket))]
-    ResolveRegion {
-        bucket: String,
-        source: reqwest::Error,
-    },
-
-    #[snafu(display("Failed to parse the region for bucket '{}'", bucket))]
-    RegionParse { bucket: String },
-}
-
-impl From<Error> for super::Error {
-    fn from(source: Error) -> Self {
-        match source {
-            Error::UnknownConfigurationKey { key } => {
-                Self::UnknownConfigurationKey { store: STORE, key }
-            }
-            _ => Self::Generic {
-                store: STORE,
-                source: Box::new(source),
-            },
-        }
-    }
-}
-
-/// Get the bucket region using the [HeadBucket API]. This will fail if the bucket does not exist.
-///
-/// [HeadBucket API]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_HeadBucket.html
-pub async fn resolve_bucket_region(
-    bucket: &str,
-    client_options: &ClientOptions,
-) -> Result<String> {
-    use reqwest::StatusCode;
-
-    let endpoint = format!("https://{}.s3.amazonaws.com", bucket);
-
-    let client = client_options.client()?;
-
-    let response = client
-        .head(&endpoint)
-        .send()
-        .await
-        .context(ResolveRegionSnafu { bucket })?;
-
-    ensure!(
-        response.status() != StatusCode::NOT_FOUND,
-        BucketNotFoundSnafu { bucket }
-    );
-
-    let region = response
-        .headers()
-        .get("x-amz-bucket-region")
-        .and_then(|x| x.to_str().ok())
-        .context(RegionParseSnafu { bucket })?;
-
-    Ok(region.to_string())
-}
-
 /// Interface for [Amazon S3](https://aws.amazon.com/s3/).
 #[derive(Debug)]
 pub struct AmazonS3 {
@@ -209,13 +102,97 @@ impl AmazonS3 {
     pub fn credentials(&self) -> &AwsCredentialProvider {
         &self.client.config().credentials
     }
+
+    /// Create a full URL to the resource specified by `path` with this instance's configuration.
+    fn path_url(&self, path: &Path) -> String {
+        self.client.config().path_url(path)
+    }
+}
+
+#[async_trait]
+impl Signer for AmazonS3 {
+    /// Create a URL containing the relevant [AWS SigV4] query parameters that authorize a request
+    /// via `method` to the resource at `path` valid for the duration specified in `expires_in`.
+    ///
+    /// [AWS SigV4]: https://docs.aws.amazon.com/IAM/latest/UserGuide/create-signed-request.html
+    ///
+    /// # Example
+    ///
+    /// This example returns a URL that will enable a user to upload a file to
+    /// "some-folder/some-file.txt" in the next hour.
+    ///
+    /// ```
+    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
+    /// # use object_store::{aws::AmazonS3Builder, path::Path, signer::Signer};
+    /// # use reqwest::Method;
+    /// # use std::time::Duration;
+    /// #
+    /// let region = "us-east-1";
+    /// let s3 = AmazonS3Builder::new()
+    ///     .with_region(region)
+    ///     .with_bucket_name("my-bucket")
+    ///     .with_access_key_id("my-access-key-id")
+    ///     .with_secret_access_key("my-secret-access-key")
+    ///     .build()?;
+    ///
+    /// let url = s3.signed_url(
+    ///     Method::PUT,
+    ///     &Path::from("some-folder/some-file.txt"),
+    ///     Duration::from_secs(60 * 60)
+    /// ).await?;
+    /// #     Ok(())
+    /// # }
+    /// ```
+    async fn signed_url(&self, method: Method, path: &Path, expires_in: Duration) -> Result<Url> {
+        let credential = self.credentials().get_credential().await?;
+        let authorizer = AwsAuthorizer::new(&credential, "s3", &self.client.config().region);
+
+        let path_url = self.path_url(path);
+        let mut url = Url::parse(&path_url).map_err(|e| crate::Error::Generic {
+            store: STORE,
+            source: format!("Unable to parse url {path_url}: {e}").into(),
+        })?;
+
+        authorizer.sign(method, &mut url, expires_in);
+
+        Ok(url)
+    }
 }
 
 #[async_trait]
 impl ObjectStore for AmazonS3 {
-    async fn put(&self, location: &Path, bytes: Bytes) -> Result<()> {
-        self.client.put_request(location, bytes, &()).await?;
-        Ok(())
+    async fn put_opts(&self, location: &Path, bytes: Bytes, opts: PutOptions) -> Result<PutResult> {
+        let mut request = self.client.put_request(location, bytes);
+        let tags = opts.tags.encoded();
+        if !tags.is_empty() && !self.client.config().disable_tagging {
+            request = request.header(&TAGS_HEADER, tags);
+        }
+
+        match (opts.mode, &self.client.config().conditional_put) {
+            (PutMode::Overwrite, _) => request.send().await,
+            (PutMode::Create | PutMode::Update(_), None) => Err(Error::NotImplemented),
+            (PutMode::Create, Some(S3ConditionalPut::ETagMatch)) => {
+                match request.header(&IF_NONE_MATCH, "*").send().await {
+                    // Technically If-None-Match should return NotModified but some stores,
+                    // such as R2, instead return PreconditionFailed
+                    // https://developers.cloudflare.com/r2/api/s3/extensions/#conditional-operations-in-putobject
+                    Err(e @ Error::NotModified { .. } | e @ Error::Precondition { .. }) => {
+                        Err(Error::AlreadyExists {
+                            path: location.to_string(),
+                            source: Box::new(e),
+                        })
+                    }
+                    r => r,
+                }
+            }
+            (PutMode::Update(v), Some(S3ConditionalPut::ETagMatch)) => {
+                let etag = v.e_tag.ok_or_else(|| Error::Generic {
+                    store: STORE,
+                    source: "ETag required for conditional put".to_string().into(),
+                })?;
+                request.header(&IF_MATCH, etag.as_str()).send().await
+            }
+        }
     }
 
     async fn put_multipart(
@@ -233,11 +210,7 @@ impl ObjectStore for AmazonS3 {
         Ok((id, Box::new(WriteMultiPart::new(upload, 8))))
     }
 
-    async fn abort_multipart(
-        &self,
-        location: &Path,
-        multipart_id: &MultipartId,
-    ) -> Result<()> {
+    async fn abort_multipart(&self, location: &Path, multipart_id: &MultipartId) -> Result<()> {
         self.client
             .delete_request(location, &[("uploadId", multipart_id)])
             .await
@@ -247,10 +220,6 @@ impl ObjectStore for AmazonS3 {
         self.client.get_opts(location, options).await
     }
 
-    async fn head(&self, location: &Path) -> Result<ObjectMeta> {
-        self.client.head(location).await
-    }
-
     async fn delete(&self, location: &Path) -> Result<()> {
         self.client.delete_request(location, &()).await
     }
@@ -275,19 +244,16 @@ impl ObjectStore for AmazonS3 {
             .boxed()
     }
 
-    async fn list(
-        &self,
-        prefix: Option<&Path>,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
-        self.client.list(prefix).await
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+        self.client.list(prefix)
     }
 
-    async fn list_with_offset(
+    fn list_with_offset(
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
-        self.client.list_with_offset(prefix, offset).await
+    ) -> BoxStream<'_, Result<ObjectMeta>> {
+        self.client.list_with_offset(prefix, offset)
     }
 
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
@@ -312,25 +278,9 @@ struct S3MultiPartUpload {
 #[async_trait]
 impl PutPart for S3MultiPartUpload {
     async fn put_part(&self, buf: Vec<u8>, part_idx: usize) -> Result<PartId> {
-        use reqwest::header::ETAG;
-        let part = (part_idx + 1).to_string();
-
-        let response = self
-            .client
-            .put_request(
-                &self.location,
-                buf.into(),
-                &[("partNumber", &part), ("uploadId", &self.upload_id)],
-            )
-            .await?;
-
-        let etag = response.headers().get(ETAG).context(MissingEtagSnafu)?;
-
-        let etag = etag.to_str().context(BadHeaderSnafu)?;
-
-        Ok(PartId {
-            content_id: etag.to_string(),
-        })
+        self.client
+            .put_part(&self.location, &self.upload_id, part_idx, buf.into())
+            .await
     }
 
     async fn complete(&self, completed_parts: Vec<PartId>) -> Result<()> {
@@ -341,874 +291,54 @@ impl PutPart for S3MultiPartUpload {
     }
 }
 
-/// Configure a connection to Amazon S3 using the specified credentials in
-/// the specified Amazon region and bucket.
-///
-/// # Example
-/// ```
-/// # let REGION = "foo";
-/// # let BUCKET_NAME = "foo";
-/// # let ACCESS_KEY_ID = "foo";
-/// # let SECRET_KEY = "foo";
-/// # use object_store::aws::AmazonS3Builder;
-/// let s3 = AmazonS3Builder::new()
-///  .with_region(REGION)
-///  .with_bucket_name(BUCKET_NAME)
-///  .with_access_key_id(ACCESS_KEY_ID)
-///  .with_secret_access_key(SECRET_KEY)
-///  .build();
-/// ```
-#[derive(Debug, Default, Clone)]
-pub struct AmazonS3Builder {
-    /// Access key id
-    access_key_id: Option<String>,
-    /// Secret access_key
-    secret_access_key: Option<String>,
-    /// Region
-    region: Option<String>,
-    /// Bucket name
-    bucket_name: Option<String>,
-    /// Endpoint for communicating with AWS S3
-    endpoint: Option<String>,
-    /// Token to use for requests
-    token: Option<String>,
-    /// Url
-    url: Option<String>,
-    /// Retry config
-    retry_config: RetryConfig,
-    /// When set to true, fallback to IMDSv1
-    imdsv1_fallback: ConfigValue<bool>,
-    /// When set to true, virtual hosted style request has to be used
-    virtual_hosted_style_request: ConfigValue<bool>,
-    /// When set to true, unsigned payload option has to be used
-    unsigned_payload: ConfigValue<bool>,
-    /// Checksum algorithm which has to be used for object integrity check during upload
-    checksum_algorithm: Option<ConfigValue<Checksum>>,
-    /// Metadata endpoint, see <https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html>
-    metadata_endpoint: Option<String>,
-    /// Container credentials URL, see <https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html>
-    container_credentials_relative_uri: Option<String>,
-    /// Client options
-    client_options: ClientOptions,
-    /// Credentials
-    credentials: Option<AwsCredentialProvider>,
-    /// Copy if not exists
-    copy_if_not_exists: Option<ConfigValue<S3CopyIfNotExists>>,
-}
-
-/// Configuration keys for [`AmazonS3Builder`]
-///
-/// Configuration via keys can be done via [`AmazonS3Builder::with_config`]
-///
-/// # Example
-/// ```
-/// # use object_store::aws::{AmazonS3Builder, AmazonS3ConfigKey};
-/// let builder = AmazonS3Builder::new()
-///     .with_config("aws_access_key_id".parse().unwrap(), "my-access-key-id")
-///     .with_config(AmazonS3ConfigKey::DefaultRegion, "my-default-region");
-/// ```
-#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, Serialize, Deserialize)]
-#[non_exhaustive]
-pub enum AmazonS3ConfigKey {
-    /// AWS Access Key
-    ///
-    /// See [`AmazonS3Builder::with_access_key_id`] for details.
-    ///
-    /// Supported keys:
-    /// - `aws_access_key_id`
-    /// - `access_key_id`
-    AccessKeyId,
-
-    /// Secret Access Key
-    ///
-    /// See [`AmazonS3Builder::with_secret_access_key`] for details.
-    ///
-    /// Supported keys:
-    /// - `aws_secret_access_key`
-    /// - `secret_access_key`
-    SecretAccessKey,
-
-    /// Region
-    ///
-    /// See [`AmazonS3Builder::with_region`] for details.
-    ///
-    /// Supported keys:
-    /// - `aws_region`
-    /// - `region`
-    Region,
-
-    /// Default region
-    ///
-    /// See [`AmazonS3Builder::with_region`] for details.
-    ///
-    /// Supported keys:
-    /// - `aws_default_region`
-    /// - `default_region`
-    DefaultRegion,
-
-    /// Bucket name
-    ///
-    /// See [`AmazonS3Builder::with_bucket_name`] for details.
-    ///
-    /// Supported keys:
-    /// - `aws_bucket`
-    /// - `aws_bucket_name`
-    /// - `bucket`
-    /// - `bucket_name`
-    Bucket,
-
-    /// Sets custom endpoint for communicating with AWS S3.
-    ///
-    /// See [`AmazonS3Builder::with_endpoint`] for details.
-    ///
-    /// Supported keys:
-    /// - `aws_endpoint`
-    /// - `aws_endpoint_url`
-    /// - `endpoint`
-    /// - `endpoint_url`
-    Endpoint,
-
-    /// Token to use for requests (passed to underlying provider)
-    ///
-    /// See [`AmazonS3Builder::with_token`] for details.
-    ///
-    /// Supported keys:
-    /// - `aws_session_token`
-    /// - `aws_token`
-    /// - `session_token`
-    /// - `token`
-    Token,
-
-    /// Fall back to ImdsV1
-    ///
-    /// See [`AmazonS3Builder::with_imdsv1_fallback`] for details.
-    ///
-    /// Supported keys:
-    /// - `aws_imdsv1_fallback`
-    /// - `imdsv1_fallback`
-    ImdsV1Fallback,
-
-    /// If virtual hosted style request has to be used
-    ///
-    /// See [`AmazonS3Builder::with_virtual_hosted_style_request`] for details.
-    ///
-    /// Supported keys:
-    /// - `aws_virtual_hosted_style_request`
-    /// - `virtual_hosted_style_request`
-    VirtualHostedStyleRequest,
-
-    /// Avoid computing payload checksum when calculating signature.
-    ///
-    /// See [`AmazonS3Builder::with_unsigned_payload`] for details.
-    ///
-    /// Supported keys:
-    /// - `aws_unsigned_payload`
-    /// - `unsigned_payload`
-    UnsignedPayload,
-
-    /// Set the checksum algorithm for this client
-    ///
-    /// See [`AmazonS3Builder::with_checksum_algorithm`]
-    Checksum,
-
-    /// Set the instance metadata endpoint
-    ///
-    /// See [`AmazonS3Builder::with_metadata_endpoint`] for details.
-    ///
-    /// Supported keys:
-    /// - `aws_metadata_endpoint`
-    /// - `metadata_endpoint`
-    MetadataEndpoint,
-
-    /// Set the container credentials relative URI
-    ///
-    /// <https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html>
-    ContainerCredentialsRelativeUri,
-
-    /// Configure how to provide [`ObjectStore::copy_if_not_exists`]
-    ///
-    /// See [`S3CopyIfNotExists`]
-    CopyIfNotExists,
-
-    /// Client options
-    Client(ClientConfigKey),
-}
-
-impl AsRef<str> for AmazonS3ConfigKey {
-    fn as_ref(&self) -> &str {
-        match self {
-            Self::AccessKeyId => "aws_access_key_id",
-            Self::SecretAccessKey => "aws_secret_access_key",
-            Self::Region => "aws_region",
-            Self::Bucket => "aws_bucket",
-            Self::Endpoint => "aws_endpoint",
-            Self::Token => "aws_session_token",
-            Self::ImdsV1Fallback => "aws_imdsv1_fallback",
-            Self::VirtualHostedStyleRequest => "aws_virtual_hosted_style_request",
-            Self::DefaultRegion => "aws_default_region",
-            Self::MetadataEndpoint => "aws_metadata_endpoint",
-            Self::UnsignedPayload => "aws_unsigned_payload",
-            Self::Checksum => "aws_checksum_algorithm",
-            Self::ContainerCredentialsRelativeUri => {
-                "aws_container_credentials_relative_uri"
-            }
-            Self::CopyIfNotExists => "copy_if_not_exists",
-            Self::Client(opt) => opt.as_ref(),
-        }
-    }
-}
-
-impl FromStr for AmazonS3ConfigKey {
-    type Err = super::Error;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s {
-            "aws_access_key_id" | "access_key_id" => Ok(Self::AccessKeyId),
-            "aws_secret_access_key" | "secret_access_key" => Ok(Self::SecretAccessKey),
-            "aws_default_region" | "default_region" => Ok(Self::DefaultRegion),
-            "aws_region" | "region" => Ok(Self::Region),
-            "aws_bucket" | "aws_bucket_name" | "bucket_name" | "bucket" => {
-                Ok(Self::Bucket)
-            }
-            "aws_endpoint_url" | "aws_endpoint" | "endpoint_url" | "endpoint" => {
-                Ok(Self::Endpoint)
-            }
-            "aws_session_token" | "aws_token" | "session_token" | "token" => {
-                Ok(Self::Token)
-            }
-            "aws_virtual_hosted_style_request" | "virtual_hosted_style_request" => {
-                Ok(Self::VirtualHostedStyleRequest)
-            }
-            "aws_imdsv1_fallback" | "imdsv1_fallback" => Ok(Self::ImdsV1Fallback),
-            "aws_metadata_endpoint" | "metadata_endpoint" => Ok(Self::MetadataEndpoint),
-            "aws_unsigned_payload" | "unsigned_payload" => Ok(Self::UnsignedPayload),
-            "aws_checksum_algorithm" | "checksum_algorithm" => Ok(Self::Checksum),
-            "aws_container_credentials_relative_uri" => {
-                Ok(Self::ContainerCredentialsRelativeUri)
-            }
-            "copy_if_not_exists" => Ok(Self::CopyIfNotExists),
-            // Backwards compatibility
-            "aws_allow_http" => Ok(Self::Client(ClientConfigKey::AllowHttp)),
-            _ => match s.parse() {
-                Ok(key) => Ok(Self::Client(key)),
-                Err(_) => Err(Error::UnknownConfigurationKey { key: s.into() }.into()),
-            },
-        }
-    }
-}
-
-impl AmazonS3Builder {
-    /// Create a new [`AmazonS3Builder`] with default values.
-    pub fn new() -> Self {
-        Default::default()
-    }
-
-    /// Fill the [`AmazonS3Builder`] with regular AWS environment variables
-    ///
-    /// Variables extracted from environment:
-    /// * `AWS_ACCESS_KEY_ID` -> access_key_id
-    /// * `AWS_SECRET_ACCESS_KEY` -> secret_access_key
-    /// * `AWS_DEFAULT_REGION` -> region
-    /// * `AWS_ENDPOINT` -> endpoint
-    /// * `AWS_SESSION_TOKEN` -> token
-    /// * `AWS_CONTAINER_CREDENTIALS_RELATIVE_URI` -> <https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task-iam-roles.html>
-    /// * `AWS_ALLOW_HTTP` -> set to "true" to permit HTTP connections without TLS
-    /// # Example
-    /// ```
-    /// use object_store::aws::AmazonS3Builder;
-    ///
-    /// let s3 = AmazonS3Builder::from_env()
-    ///     .with_bucket_name("foo")
-    ///     .build();
-    /// ```
-    pub fn from_env() -> Self {
-        let mut builder: Self = Default::default();
-
-        for (os_key, os_value) in std::env::vars_os() {
-            if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) {
-                if key.starts_with("AWS_") {
-                    if let Ok(config_key) = key.to_ascii_lowercase().parse() {
-                        builder = builder.with_config(config_key, value);
-                    }
-                }
-            }
-        }
-
-        builder
-    }
-
-    /// Parse available connection info form a well-known storage URL.
-    ///
-    /// The supported url schemes are:
-    ///
-    /// - `s3://<bucket>/<path>`
-    /// - `s3a://<bucket>/<path>`
-    /// - `https://s3.<region>.amazonaws.com/<bucket>`
-    /// - `https://<bucket>.s3.<region>.amazonaws.com`
-    /// - `https://ACCOUNT_ID.r2.cloudflarestorage.com/bucket`
-    ///
-    /// Note: Settings derived from the URL will override any others set on this builder
-    ///
-    /// # Example
-    /// ```
-    /// use object_store::aws::AmazonS3Builder;
-    ///
-    /// let s3 = AmazonS3Builder::from_env()
-    ///     .with_url("s3://bucket/path")
-    ///     .build();
-    /// ```
-    pub fn with_url(mut self, url: impl Into<String>) -> Self {
-        self.url = Some(url.into());
-        self
-    }
-
-    /// Set an option on the builder via a key - value pair.
-    pub fn with_config(
-        mut self,
-        key: AmazonS3ConfigKey,
-        value: impl Into<String>,
-    ) -> Self {
-        match key {
-            AmazonS3ConfigKey::AccessKeyId => self.access_key_id = Some(value.into()),
-            AmazonS3ConfigKey::SecretAccessKey => {
-                self.secret_access_key = Some(value.into())
-            }
-            AmazonS3ConfigKey::Region => self.region = Some(value.into()),
-            AmazonS3ConfigKey::Bucket => self.bucket_name = Some(value.into()),
-            AmazonS3ConfigKey::Endpoint => self.endpoint = Some(value.into()),
-            AmazonS3ConfigKey::Token => self.token = Some(value.into()),
-            AmazonS3ConfigKey::ImdsV1Fallback => self.imdsv1_fallback.parse(value),
-            AmazonS3ConfigKey::VirtualHostedStyleRequest => {
-                self.virtual_hosted_style_request.parse(value)
-            }
-            AmazonS3ConfigKey::DefaultRegion => {
-                self.region = self.region.or_else(|| Some(value.into()))
-            }
-            AmazonS3ConfigKey::MetadataEndpoint => {
-                self.metadata_endpoint = Some(value.into())
-            }
-            AmazonS3ConfigKey::UnsignedPayload => self.unsigned_payload.parse(value),
-            AmazonS3ConfigKey::Checksum => {
-                self.checksum_algorithm = Some(ConfigValue::Deferred(value.into()))
-            }
-            AmazonS3ConfigKey::ContainerCredentialsRelativeUri => {
-                self.container_credentials_relative_uri = Some(value.into())
-            }
-            AmazonS3ConfigKey::Client(key) => {
-                self.client_options = self.client_options.with_config(key, value)
-            }
-            AmazonS3ConfigKey::CopyIfNotExists => {
-                self.copy_if_not_exists = Some(ConfigValue::Deferred(value.into()))
-            }
-        };
-        self
-    }
-
-    /// Set an option on the builder via a key - value pair.
-    ///
-    /// This method will return an `UnknownConfigKey` error if key cannot be parsed into [`AmazonS3ConfigKey`].
-    #[deprecated(note = "Use with_config")]
-    pub fn try_with_option(
-        self,
-        key: impl AsRef<str>,
-        value: impl Into<String>,
-    ) -> Result<Self> {
-        Ok(self.with_config(key.as_ref().parse()?, value))
-    }
-
-    /// Hydrate builder from key value pairs
-    ///
-    /// This method will return an `UnknownConfigKey` error if any key cannot be parsed into [`AmazonS3ConfigKey`].
-    #[deprecated(note = "Use with_config")]
-    #[allow(deprecated)]
-    pub fn try_with_options<
-        I: IntoIterator<Item = (impl AsRef<str>, impl Into<String>)>,
-    >(
-        mut self,
-        options: I,
-    ) -> Result<Self> {
-        for (key, value) in options {
-            self = self.try_with_option(key, value)?;
-        }
-        Ok(self)
-    }
-
-    /// Get config value via a [`AmazonS3ConfigKey`].
-    ///
-    /// # Example
-    /// ```
-    /// use object_store::aws::{AmazonS3Builder, AmazonS3ConfigKey};
-    ///
-    /// let builder = AmazonS3Builder::from_env()
-    ///     .with_bucket_name("foo");
-    /// let bucket_name = builder.get_config_value(&AmazonS3ConfigKey::Bucket).unwrap_or_default();
-    /// assert_eq!("foo", &bucket_name);
-    /// ```
-    pub fn get_config_value(&self, key: &AmazonS3ConfigKey) -> Option<String> {
-        match key {
-            AmazonS3ConfigKey::AccessKeyId => self.access_key_id.clone(),
-            AmazonS3ConfigKey::SecretAccessKey => self.secret_access_key.clone(),
-            AmazonS3ConfigKey::Region | AmazonS3ConfigKey::DefaultRegion => {
-                self.region.clone()
-            }
-            AmazonS3ConfigKey::Bucket => self.bucket_name.clone(),
-            AmazonS3ConfigKey::Endpoint => self.endpoint.clone(),
-            AmazonS3ConfigKey::Token => self.token.clone(),
-            AmazonS3ConfigKey::ImdsV1Fallback => Some(self.imdsv1_fallback.to_string()),
-            AmazonS3ConfigKey::VirtualHostedStyleRequest => {
-                Some(self.virtual_hosted_style_request.to_string())
-            }
-            AmazonS3ConfigKey::MetadataEndpoint => self.metadata_endpoint.clone(),
-            AmazonS3ConfigKey::UnsignedPayload => Some(self.unsigned_payload.to_string()),
-            AmazonS3ConfigKey::Checksum => {
-                self.checksum_algorithm.as_ref().map(ToString::to_string)
-            }
-            AmazonS3ConfigKey::Client(key) => self.client_options.get_config_value(key),
-            AmazonS3ConfigKey::ContainerCredentialsRelativeUri => {
-                self.container_credentials_relative_uri.clone()
-            }
-            AmazonS3ConfigKey::CopyIfNotExists => {
-                self.copy_if_not_exists.as_ref().map(ToString::to_string)
-            }
-        }
-    }
-
-    /// Sets properties on this builder based on a URL
-    ///
-    /// This is a separate member function to allow fallible computation to
-    /// be deferred until [`Self::build`] which in turn allows deriving [`Clone`]
-    fn parse_url(&mut self, url: &str) -> Result<()> {
-        let parsed = Url::parse(url).context(UnableToParseUrlSnafu { url })?;
-        let host = parsed.host_str().context(UrlNotRecognisedSnafu { url })?;
-        match parsed.scheme() {
-            "s3" | "s3a" => self.bucket_name = Some(host.to_string()),
-            "https" => match host.splitn(4, '.').collect_tuple() {
-                Some(("s3", region, "amazonaws", "com")) => {
-                    self.region = Some(region.to_string());
-                    let bucket = parsed.path_segments().into_iter().flatten().next();
-                    if let Some(bucket) = bucket {
-                        self.bucket_name = Some(bucket.into());
-                    }
-                }
-                Some((bucket, "s3", region, "amazonaws.com")) => {
-                    self.bucket_name = Some(bucket.to_string());
-                    self.region = Some(region.to_string());
-                    self.virtual_hosted_style_request = true.into();
-                }
-                Some((account, "r2", "cloudflarestorage", "com")) => {
-                    self.region = Some("auto".to_string());
-                    let endpoint = format!("https://{account}.r2.cloudflarestorage.com");
-                    self.endpoint = Some(endpoint);
-
-                    let bucket = parsed.path_segments().into_iter().flatten().next();
-                    if let Some(bucket) = bucket {
-                        self.bucket_name = Some(bucket.into());
-                    }
-                }
-                _ => return Err(UrlNotRecognisedSnafu { url }.build().into()),
-            },
-            scheme => return Err(UnknownUrlSchemeSnafu { scheme }.build().into()),
-        };
-        Ok(())
-    }
-
-    /// Set the AWS Access Key (required)
-    pub fn with_access_key_id(mut self, access_key_id: impl Into<String>) -> Self {
-        self.access_key_id = Some(access_key_id.into());
-        self
-    }
-
-    /// Set the AWS Secret Access Key (required)
-    pub fn with_secret_access_key(
-        mut self,
-        secret_access_key: impl Into<String>,
-    ) -> Self {
-        self.secret_access_key = Some(secret_access_key.into());
-        self
-    }
-
-    /// Set the region (e.g. `us-east-1`) (required)
-    pub fn with_region(mut self, region: impl Into<String>) -> Self {
-        self.region = Some(region.into());
-        self
-    }
-
-    /// Set the bucket_name (required)
-    pub fn with_bucket_name(mut self, bucket_name: impl Into<String>) -> Self {
-        self.bucket_name = Some(bucket_name.into());
-        self
-    }
-
-    /// Sets the endpoint for communicating with AWS S3. Default value
-    /// is based on region. The `endpoint` field should be consistent with
-    /// the field `virtual_hosted_style_request'.
-    ///
-    /// For example, this might be set to `"http://localhost:4566:`
-    /// for testing against a localstack instance.
-    /// If `virtual_hosted_style_request` is set to true then `endpoint`
-    /// should have bucket name included.
-    pub fn with_endpoint(mut self, endpoint: impl Into<String>) -> Self {
-        self.endpoint = Some(endpoint.into());
-        self
-    }
-
-    /// Set the token to use for requests (passed to underlying provider)
-    pub fn with_token(mut self, token: impl Into<String>) -> Self {
-        self.token = Some(token.into());
-        self
-    }
-
-    /// Set the credential provider overriding any other options
-    pub fn with_credentials(mut self, credentials: AwsCredentialProvider) -> Self {
-        self.credentials = Some(credentials);
-        self
-    }
-
-    /// Sets what protocol is allowed. If `allow_http` is :
-    /// * false (default):  Only HTTPS are allowed
-    /// * true:  HTTP and HTTPS are allowed
-    pub fn with_allow_http(mut self, allow_http: bool) -> Self {
-        self.client_options = self.client_options.with_allow_http(allow_http);
-        self
-    }
-
-    /// Sets if virtual hosted style request has to be used.
-    /// If `virtual_hosted_style_request` is :
-    /// * false (default):  Path style request is used
-    /// * true:  Virtual hosted style request is used
-    ///
-    /// If the `endpoint` is provided then it should be
-    /// consistent with `virtual_hosted_style_request`.
-    /// i.e. if `virtual_hosted_style_request` is set to true
-    /// then `endpoint` should have bucket name included.
-    pub fn with_virtual_hosted_style_request(
-        mut self,
-        virtual_hosted_style_request: bool,
-    ) -> Self {
-        self.virtual_hosted_style_request = virtual_hosted_style_request.into();
-        self
-    }
-
-    /// Set the retry configuration
-    pub fn with_retry(mut self, retry_config: RetryConfig) -> Self {
-        self.retry_config = retry_config;
-        self
-    }
-
-    /// By default instance credentials will only be fetched over [IMDSv2], as AWS recommends
-    /// against having IMDSv1 enabled on EC2 instances as it is vulnerable to [SSRF attack]
-    ///
-    /// However, certain deployment environments, such as those running old versions of kube2iam,
-    /// may not support IMDSv2. This option will enable automatic fallback to using IMDSv1
-    /// if the token endpoint returns a 403 error indicating that IMDSv2 is not supported.
-    ///
-    /// This option has no effect if not using instance credentials
-    ///
-    /// [IMDSv2]: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html
-    /// [SSRF attack]: https://aws.amazon.com/blogs/security/defense-in-depth-open-firewalls-reverse-proxies-ssrf-vulnerabilities-ec2-instance-metadata-service/
-    ///
-    pub fn with_imdsv1_fallback(mut self) -> Self {
-        self.imdsv1_fallback = true.into();
-        self
-    }
-
-    /// Sets if unsigned payload option has to be used.
-    /// See [unsigned payload option](https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html)
-    /// * false (default): Signed payload option is used, where the checksum for the request body is computed and included when constructing a canonical request.
-    /// * true: Unsigned payload option is used. `UNSIGNED-PAYLOAD` literal is included when constructing a canonical request,
-    pub fn with_unsigned_payload(mut self, unsigned_payload: bool) -> Self {
-        self.unsigned_payload = unsigned_payload.into();
-        self
-    }
-
-    /// Sets the [checksum algorithm] which has to be used for object integrity check during upload.
-    ///
-    /// [checksum algorithm]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html
-    pub fn with_checksum_algorithm(mut self, checksum_algorithm: Checksum) -> Self {
-        // Convert to String to enable deferred parsing of config
-        self.checksum_algorithm = Some(checksum_algorithm.into());
-        self
-    }
-
-    /// Set the [instance metadata endpoint](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html),
-    /// used primarily within AWS EC2.
-    ///
-    /// This defaults to the IPv4 endpoint: http://169.254.169.254. One can alternatively use the IPv6
-    /// endpoint http://fd00:ec2::254.
-    pub fn with_metadata_endpoint(mut self, endpoint: impl Into<String>) -> Self {
-        self.metadata_endpoint = Some(endpoint.into());
-        self
-    }
-
-    /// Set the proxy_url to be used by the underlying client
-    pub fn with_proxy_url(mut self, proxy_url: impl Into<String>) -> Self {
-        self.client_options = self.client_options.with_proxy_url(proxy_url);
-        self
-    }
-
-    /// Set a trusted proxy CA certificate
-    pub fn with_proxy_ca_certificate(
-        mut self,
-        proxy_ca_certificate: impl Into<String>,
-    ) -> Self {
-        self.client_options = self
-            .client_options
-            .with_proxy_ca_certificate(proxy_ca_certificate);
-        self
+#[async_trait]
+impl MultiPartStore for AmazonS3 {
+    async fn create_multipart(&self, path: &Path) -> Result<MultipartId> {
+        self.client.create_multipart(path).await
     }
 
-    /// Set a list of hosts to exclude from proxy connections
-    pub fn with_proxy_excludes(mut self, proxy_excludes: impl Into<String>) -> Self {
-        self.client_options = self.client_options.with_proxy_excludes(proxy_excludes);
-        self
+    async fn put_part(
+        &self,
+        path: &Path,
+        id: &MultipartId,
+        part_idx: usize,
+        data: Bytes,
+    ) -> Result<PartId> {
+        self.client.put_part(path, id, part_idx, data).await
     }
 
-    /// Sets the client options, overriding any already set
-    pub fn with_client_options(mut self, options: ClientOptions) -> Self {
-        self.client_options = options;
-        self
+    async fn complete_multipart(
+        &self,
+        path: &Path,
+        id: &MultipartId,
+        parts: Vec<PartId>,
+    ) -> Result<PutResult> {
+        self.client.complete_multipart(path, id, parts).await
     }
 
-    /// Configure how to provide [`ObjectStore::copy_if_not_exists`]
-    pub fn with_copy_if_not_exists(mut self, config: S3CopyIfNotExists) -> Self {
-        self.copy_if_not_exists = Some(config.into());
-        self
-    }
-
-    /// Create a [`AmazonS3`] instance from the provided values,
-    /// consuming `self`.
-    pub fn build(mut self) -> Result<AmazonS3> {
-        if let Some(url) = self.url.take() {
-            self.parse_url(&url)?;
-        }
-
-        let bucket = self.bucket_name.context(MissingBucketNameSnafu)?;
-        let region = self.region.context(MissingRegionSnafu)?;
-        let checksum = self.checksum_algorithm.map(|x| x.get()).transpose()?;
-        let copy_if_not_exists = self.copy_if_not_exists.map(|x| x.get()).transpose()?;
-
-        let credentials = if let Some(credentials) = self.credentials {
-            credentials
-        } else if self.access_key_id.is_some() || self.secret_access_key.is_some() {
-            match (self.access_key_id, self.secret_access_key, self.token) {
-                (Some(key_id), Some(secret_key), token) => {
-                    info!("Using Static credential provider");
-                    let credential = AwsCredential {
-                        key_id,
-                        secret_key,
-                        token,
-                    };
-                    Arc::new(StaticCredentialProvider::new(credential)) as _
-                }
-                (None, Some(_), _) => return Err(Error::MissingAccessKeyId.into()),
-                (Some(_), None, _) => return Err(Error::MissingSecretAccessKey.into()),
-                (None, None, _) => unreachable!(),
-            }
-        } else if let (Ok(token_path), Ok(role_arn)) = (
-            std::env::var("AWS_WEB_IDENTITY_TOKEN_FILE"),
-            std::env::var("AWS_ROLE_ARN"),
-        ) {
-            // TODO: Replace with `AmazonS3Builder::credentials_from_env`
-            info!("Using WebIdentity credential provider");
-
-            let session_name = std::env::var("AWS_ROLE_SESSION_NAME")
-                .unwrap_or_else(|_| "WebIdentitySession".to_string());
-
-            let endpoint = format!("https://sts.{region}.amazonaws.com");
-
-            // Disallow non-HTTPs requests
-            let client = self
-                .client_options
-                .clone()
-                .with_allow_http(false)
-                .client()?;
-
-            let token = WebIdentityProvider {
-                token_path,
-                session_name,
-                role_arn,
-                endpoint,
-            };
-
-            Arc::new(TokenCredentialProvider::new(
-                token,
-                client,
-                self.retry_config.clone(),
-            )) as _
-        } else if let Some(uri) = self.container_credentials_relative_uri {
-            info!("Using Task credential provider");
-            Arc::new(TaskCredentialProvider {
-                url: format!("http://169.254.170.2{uri}"),
-                retry: self.retry_config.clone(),
-                // The instance metadata endpoint is access over HTTP
-                client: self.client_options.clone().with_allow_http(true).client()?,
-                cache: Default::default(),
-            }) as _
-        } else {
-            info!("Using Instance credential provider");
-
-            let token = InstanceCredentialProvider {
-                cache: Default::default(),
-                imdsv1_fallback: self.imdsv1_fallback.get()?,
-                metadata_endpoint: self
-                    .metadata_endpoint
-                    .unwrap_or_else(|| DEFAULT_METADATA_ENDPOINT.into()),
-            };
-
-            Arc::new(TokenCredentialProvider::new(
-                token,
-                // The instance metadata endpoint is access over HTTP
-                self.client_options.clone().with_allow_http(true).client()?,
-                self.retry_config.clone(),
-            )) as _
-        };
-
-        let endpoint: String;
-        let bucket_endpoint: String;
-
-        // If `endpoint` is provided then its assumed to be consistent with
-        // `virtual_hosted_style_request`. i.e. if `virtual_hosted_style_request` is true then
-        // `endpoint` should have bucket name included.
-        if self.virtual_hosted_style_request.get()? {
-            endpoint = self
-                .endpoint
-                .unwrap_or_else(|| format!("https://{bucket}.s3.{region}.amazonaws.com"));
-            bucket_endpoint = endpoint.clone();
-        } else {
-            endpoint = self
-                .endpoint
-                .unwrap_or_else(|| format!("https://s3.{region}.amazonaws.com"));
-            bucket_endpoint = format!("{endpoint}/{bucket}");
-        }
-
-        let config = S3Config {
-            region,
-            endpoint,
-            bucket,
-            bucket_endpoint,
-            credentials,
-            retry_config: self.retry_config,
-            client_options: self.client_options,
-            sign_payload: !self.unsigned_payload.get()?,
-            checksum,
-            copy_if_not_exists,
-        };
-
-        let client = Arc::new(S3Client::new(config)?);
-
-        Ok(AmazonS3 { client })
+    async fn abort_multipart(&self, path: &Path, id: &MultipartId) -> Result<()> {
+        self.client.delete_request(path, &[("uploadId", id)]).await
     }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::tests::{
-        copy_if_not_exists, get_nonexistent_object, get_opts,
-        list_uses_directories_correctly, list_with_delimiter, put_get_delete_list_opts,
-        rename_and_copy, stream_get,
-    };
+    use crate::tests::*;
     use bytes::Bytes;
-    use std::collections::HashMap;
 
     const NON_EXISTENT_NAME: &str = "nonexistentname";
 
-    #[test]
-    fn s3_test_config_from_map() {
-        let aws_access_key_id = "object_store:fake_access_key_id".to_string();
-        let aws_secret_access_key = "object_store:fake_secret_key".to_string();
-        let aws_default_region = "object_store:fake_default_region".to_string();
-        let aws_endpoint = "object_store:fake_endpoint".to_string();
-        let aws_session_token = "object_store:fake_session_token".to_string();
-        let options = HashMap::from([
-            ("aws_access_key_id", aws_access_key_id.clone()),
-            ("aws_secret_access_key", aws_secret_access_key),
-            ("aws_default_region", aws_default_region.clone()),
-            ("aws_endpoint", aws_endpoint.clone()),
-            ("aws_session_token", aws_session_token.clone()),
-            ("aws_unsigned_payload", "true".to_string()),
-            ("aws_checksum_algorithm", "sha256".to_string()),
-        ]);
-
-        let builder = options
-            .into_iter()
-            .fold(AmazonS3Builder::new(), |builder, (key, value)| {
-                builder.with_config(key.parse().unwrap(), value)
-            })
-            .with_config(AmazonS3ConfigKey::SecretAccessKey, "new-secret-key");
-
-        assert_eq!(builder.access_key_id.unwrap(), aws_access_key_id.as_str());
-        assert_eq!(builder.secret_access_key.unwrap(), "new-secret-key");
-        assert_eq!(builder.region.unwrap(), aws_default_region);
-        assert_eq!(builder.endpoint.unwrap(), aws_endpoint);
-        assert_eq!(builder.token.unwrap(), aws_session_token);
-        assert_eq!(
-            builder.checksum_algorithm.unwrap().get().unwrap(),
-            Checksum::SHA256
-        );
-        assert!(builder.unsigned_payload.get().unwrap());
-    }
-
-    #[test]
-    fn s3_test_config_get_value() {
-        let aws_access_key_id = "object_store:fake_access_key_id".to_string();
-        let aws_secret_access_key = "object_store:fake_secret_key".to_string();
-        let aws_default_region = "object_store:fake_default_region".to_string();
-        let aws_endpoint = "object_store:fake_endpoint".to_string();
-        let aws_session_token = "object_store:fake_session_token".to_string();
-
-        let builder = AmazonS3Builder::new()
-            .with_config(AmazonS3ConfigKey::AccessKeyId, &aws_access_key_id)
-            .with_config(AmazonS3ConfigKey::SecretAccessKey, &aws_secret_access_key)
-            .with_config(AmazonS3ConfigKey::DefaultRegion, &aws_default_region)
-            .with_config(AmazonS3ConfigKey::Endpoint, &aws_endpoint)
-            .with_config(AmazonS3ConfigKey::Token, &aws_session_token)
-            .with_config(AmazonS3ConfigKey::UnsignedPayload, "true");
-
-        assert_eq!(
-            builder
-                .get_config_value(&AmazonS3ConfigKey::AccessKeyId)
-                .unwrap(),
-            aws_access_key_id
-        );
-        assert_eq!(
-            builder
-                .get_config_value(&AmazonS3ConfigKey::SecretAccessKey)
-                .unwrap(),
-            aws_secret_access_key
-        );
-        assert_eq!(
-            builder
-                .get_config_value(&AmazonS3ConfigKey::DefaultRegion)
-                .unwrap(),
-            aws_default_region
-        );
-        assert_eq!(
-            builder
-                .get_config_value(&AmazonS3ConfigKey::Endpoint)
-                .unwrap(),
-            aws_endpoint
-        );
-        assert_eq!(
-            builder.get_config_value(&AmazonS3ConfigKey::Token).unwrap(),
-            aws_session_token
-        );
-        assert_eq!(
-            builder
-                .get_config_value(&AmazonS3ConfigKey::UnsignedPayload)
-                .unwrap(),
-            "true"
-        );
-    }
-
     #[tokio::test]
     async fn s3_test() {
         crate::test_util::maybe_skip_integration!();
         let config = AmazonS3Builder::from_env();
 
-        let is_local = matches!(&config.endpoint, Some(e) if e.starts_with("http://"));
-        let test_not_exists = config.copy_if_not_exists.is_some();
         let integration = config.build().unwrap();
+        let config = integration.client.config();
+        let is_local = config.endpoint.starts_with("http://");
+        let test_not_exists = config.copy_if_not_exists.is_some();
+        let test_conditional_put = config.conditional_put.is_some();
 
         // Localstack doesn't support listing with spaces https://github.com/localstack/localstack/issues/6328
         put_get_delete_list_opts(&integration, is_local).await;
@@ -1217,21 +347,28 @@ mod tests {
         list_with_delimiter(&integration).await;
         rename_and_copy(&integration).await;
         stream_get(&integration).await;
+        multipart(&integration, &integration).await;
+
+        tagging(&integration, !config.disable_tagging, |p| {
+            let client = Arc::clone(&integration.client);
+            async move { client.get_object_tagging(&p).await }
+        })
+        .await;
         if test_not_exists {
             copy_if_not_exists(&integration).await;
         }
+        if test_conditional_put {
+            put_opts(&integration, true).await;
+        }
 
         // run integration test with unsigned payload enabled
-        let config = AmazonS3Builder::from_env().with_unsigned_payload(true);
-        let is_local = matches!(&config.endpoint, Some(e) if e.starts_with("http://"));
-        let integration = config.build().unwrap();
+        let builder = AmazonS3Builder::from_env().with_unsigned_payload(true);
+        let integration = builder.build().unwrap();
         put_get_delete_list_opts(&integration, is_local).await;
 
         // run integration test with checksum set to sha256
-        let config =
-            AmazonS3Builder::from_env().with_checksum_algorithm(Checksum::SHA256);
-        let is_local = matches!(&config.endpoint, Some(e) if e.starts_with("http://"));
-        let integration = config.build().unwrap();
+        let builder = AmazonS3Builder::from_env().with_checksum_algorithm(Checksum::SHA256);
+        let integration = builder.build().unwrap();
         put_get_delete_list_opts(&integration, is_local).await;
     }
 
@@ -1296,157 +433,28 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn s3_test_proxy_url() {
-        let s3 = AmazonS3Builder::new()
-            .with_access_key_id("access_key_id")
-            .with_secret_access_key("secret_access_key")
-            .with_region("region")
-            .with_bucket_name("bucket_name")
-            .with_allow_http(true)
-            .with_proxy_url("https://example.com")
-            .build();
-
-        assert!(s3.is_ok());
-
-        let err = AmazonS3Builder::new()
-            .with_access_key_id("access_key_id")
-            .with_secret_access_key("secret_access_key")
-            .with_region("region")
-            .with_bucket_name("bucket_name")
-            .with_allow_http(true)
-            .with_proxy_url("asdf://example.com")
+    #[ignore = "Tests shouldn't call use remote services by default"]
+    async fn test_disable_creds() {
+        // https://registry.opendata.aws/daylight-osm/
+        let v1 = AmazonS3Builder::new()
+            .with_bucket_name("daylight-map-distribution")
+            .with_region("us-west-1")
+            .with_access_key_id("local")
+            .with_secret_access_key("development")
             .build()
-            .unwrap_err()
-            .to_string();
-
-        assert_eq!(
-            "Generic HTTP client error: builder error: unknown proxy scheme",
-            err
-        );
-    }
-
-    #[test]
-    fn s3_test_urls() {
-        let mut builder = AmazonS3Builder::new();
-        builder.parse_url("s3://bucket/path").unwrap();
-        assert_eq!(builder.bucket_name, Some("bucket".to_string()));
-
-        let mut builder = AmazonS3Builder::new();
-        builder
-            .parse_url("s3://buckets.can.have.dots/path")
             .unwrap();
-        assert_eq!(
-            builder.bucket_name,
-            Some("buckets.can.have.dots".to_string())
-        );
-
-        let mut builder = AmazonS3Builder::new();
-        builder
-            .parse_url("https://s3.region.amazonaws.com")
-            .unwrap();
-        assert_eq!(builder.region, Some("region".to_string()));
 
-        let mut builder = AmazonS3Builder::new();
-        builder
-            .parse_url("https://s3.region.amazonaws.com/bucket")
-            .unwrap();
-        assert_eq!(builder.region, Some("region".to_string()));
-        assert_eq!(builder.bucket_name, Some("bucket".to_string()));
+        let prefix = Path::from("release");
 
-        let mut builder = AmazonS3Builder::new();
-        builder
-            .parse_url("https://s3.region.amazonaws.com/bucket.with.dot/path")
-            .unwrap();
-        assert_eq!(builder.region, Some("region".to_string()));
-        assert_eq!(builder.bucket_name, Some("bucket.with.dot".to_string()));
-
-        let mut builder = AmazonS3Builder::new();
-        builder
-            .parse_url("https://bucket.s3.region.amazonaws.com")
-            .unwrap();
-        assert_eq!(builder.bucket_name, Some("bucket".to_string()));
-        assert_eq!(builder.region, Some("region".to_string()));
-        assert!(builder.virtual_hosted_style_request.get().unwrap());
-
-        let mut builder = AmazonS3Builder::new();
-        builder
-            .parse_url("https://account123.r2.cloudflarestorage.com/bucket-123")
-            .unwrap();
-
-        assert_eq!(builder.bucket_name, Some("bucket-123".to_string()));
-        assert_eq!(builder.region, Some("auto".to_string()));
-        assert_eq!(
-            builder.endpoint,
-            Some("https://account123.r2.cloudflarestorage.com".to_string())
-        );
-
-        let err_cases = [
-            "mailto://bucket/path",
-            "https://s3.bucket.mydomain.com",
-            "https://s3.bucket.foo.amazonaws.com",
-            "https://bucket.mydomain.region.amazonaws.com",
-            "https://bucket.s3.region.bar.amazonaws.com",
-            "https://bucket.foo.s3.amazonaws.com",
-        ];
-        let mut builder = AmazonS3Builder::new();
-        for case in err_cases {
-            builder.parse_url(case).unwrap_err();
-        }
-    }
+        v1.list_with_delimiter(Some(&prefix)).await.unwrap_err();
 
-    #[test]
-    fn test_invalid_config() {
-        let err = AmazonS3Builder::new()
-            .with_config(AmazonS3ConfigKey::ImdsV1Fallback, "enabled")
-            .with_bucket_name("bucket")
-            .with_region("region")
+        let v2 = AmazonS3Builder::new()
+            .with_bucket_name("daylight-map-distribution")
+            .with_region("us-west-1")
+            .with_skip_signature(true)
             .build()
-            .unwrap_err()
-            .to_string();
-
-        assert_eq!(
-            err,
-            "Generic Config error: failed to parse \"enabled\" as boolean"
-        );
-
-        let err = AmazonS3Builder::new()
-            .with_config(AmazonS3ConfigKey::Checksum, "md5")
-            .with_bucket_name("bucket")
-            .with_region("region")
-            .build()
-            .unwrap_err()
-            .to_string();
-
-        assert_eq!(
-            err,
-            "Generic Config error: \"md5\" is not a valid checksum algorithm"
-        );
-    }
-}
-
-#[cfg(test)]
-mod s3_resolve_bucket_region_tests {
-    use super::*;
-
-    #[tokio::test]
-    async fn test_private_bucket() {
-        let bucket = "bloxbender";
-
-        let region = resolve_bucket_region(bucket, &ClientOptions::new())
-            .await
             .unwrap();
 
-        let expected = "us-west-2".to_string();
-
-        assert_eq!(region, expected);
-    }
-
-    #[tokio::test]
-    async fn test_bucket_does_not_exist() {
-        let bucket = "please-dont-exist";
-
-        let result = resolve_bucket_region(bucket, &ClientOptions::new()).await;
-
-        assert!(result.is_err());
+        v2.list_with_delimiter(Some(&prefix)).await.unwrap();
     }
 }
diff --git a/object_store/src/aws/copy.rs b/object_store/src/aws/precondition.rs
similarity index 68%
rename from object_store/src/aws/copy.rs
rename to object_store/src/aws/precondition.rs
index da4e2809be1a..a50b57fe23f7 100644
--- a/object_store/src/aws/copy.rs
+++ b/object_store/src/aws/precondition.rs
@@ -17,8 +17,7 @@
 
 use crate::config::Parse;
 
-/// Configure how to provide [`ObjectStore::copy_if_not_exists`] for
-/// [`AmazonS3`].
+/// Configure how to provide [`ObjectStore::copy_if_not_exists`] for [`AmazonS3`].
 ///
 /// [`ObjectStore::copy_if_not_exists`]: crate::ObjectStore::copy_if_not_exists
 /// [`AmazonS3`]: super::AmazonS3
@@ -70,3 +69,45 @@ impl Parse for S3CopyIfNotExists {
         })
     }
 }
+
+/// Configure how to provide conditional put support for [`AmazonS3`].
+///
+/// [`AmazonS3`]: super::AmazonS3
+#[derive(Debug, Clone)]
+#[allow(missing_copy_implementations)]
+#[non_exhaustive]
+pub enum S3ConditionalPut {
+    /// Some S3-compatible stores, such as Cloudflare R2 and minio support conditional
+    /// put using the standard [HTTP precondition] headers If-Match and If-None-Match
+    ///
+    /// Encoded as `etag` ignoring whitespace
+    ///
+    /// [HTTP precondition]: https://datatracker.ietf.org/doc/html/rfc9110#name-preconditions
+    ETagMatch,
+}
+
+impl std::fmt::Display for S3ConditionalPut {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::ETagMatch => write!(f, "etag"),
+        }
+    }
+}
+
+impl S3ConditionalPut {
+    fn from_str(s: &str) -> Option<Self> {
+        match s.trim() {
+            "etag" => Some(Self::ETagMatch),
+            _ => None,
+        }
+    }
+}
+
+impl Parse for S3ConditionalPut {
+    fn parse(v: &str) -> crate::Result<Self> {
+        Self::from_str(v).ok_or_else(|| crate::Error::Generic {
+            store: "Config",
+            source: format!("Failed to parse \"{v}\" as S3PutConditional").into(),
+        })
+    }
+}
diff --git a/object_store/src/aws/resolve.rs b/object_store/src/aws/resolve.rs
new file mode 100644
index 000000000000..12c9f26d220b
--- /dev/null
+++ b/object_store/src/aws/resolve.rs
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::aws::STORE;
+use crate::{ClientOptions, Result};
+use snafu::{ensure, OptionExt, ResultExt, Snafu};
+
+/// A specialized `Error` for object store-related errors
+#[derive(Debug, Snafu)]
+#[allow(missing_docs)]
+enum Error {
+    #[snafu(display("Bucket '{}' not found", bucket))]
+    BucketNotFound { bucket: String },
+
+    #[snafu(display("Failed to resolve region for bucket '{}'", bucket))]
+    ResolveRegion {
+        bucket: String,
+        source: reqwest::Error,
+    },
+
+    #[snafu(display("Failed to parse the region for bucket '{}'", bucket))]
+    RegionParse { bucket: String },
+}
+
+impl From<Error> for crate::Error {
+    fn from(source: Error) -> Self {
+        Self::Generic {
+            store: STORE,
+            source: Box::new(source),
+        }
+    }
+}
+
+/// Get the bucket region using the [HeadBucket API]. This will fail if the bucket does not exist.
+///
+/// [HeadBucket API]: https://docs.aws.amazon.com/AmazonS3/latest/API/API_HeadBucket.html
+pub async fn resolve_bucket_region(bucket: &str, client_options: &ClientOptions) -> Result<String> {
+    use reqwest::StatusCode;
+
+    let endpoint = format!("https://{}.s3.amazonaws.com", bucket);
+
+    let client = client_options.client()?;
+
+    let response = client
+        .head(&endpoint)
+        .send()
+        .await
+        .context(ResolveRegionSnafu { bucket })?;
+
+    ensure!(
+        response.status() != StatusCode::NOT_FOUND,
+        BucketNotFoundSnafu { bucket }
+    );
+
+    let region = response
+        .headers()
+        .get("x-amz-bucket-region")
+        .and_then(|x| x.to_str().ok())
+        .context(RegionParseSnafu { bucket })?;
+
+    Ok(region.to_string())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[tokio::test]
+    async fn test_private_bucket() {
+        let bucket = "bloxbender";
+
+        let region = resolve_bucket_region(bucket, &ClientOptions::new())
+            .await
+            .unwrap();
+
+        let expected = "us-west-2".to_string();
+
+        assert_eq!(region, expected);
+    }
+
+    #[tokio::test]
+    async fn test_bucket_does_not_exist() {
+        let bucket = "please-dont-exist";
+
+        let result = resolve_bucket_region(bucket, &ClientOptions::new()).await;
+
+        assert!(result.is_err());
+    }
+}
diff --git a/object_store/src/azure/builder.rs b/object_store/src/azure/builder.rs
new file mode 100644
index 000000000000..6bd2b265b521
--- /dev/null
+++ b/object_store/src/azure/builder.rs
@@ -0,0 +1,1088 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::azure::client::{AzureClient, AzureConfig};
+use crate::azure::credential::{
+    AzureAccessKey, AzureCliCredential, ClientSecretOAuthProvider, ImdsManagedIdentityProvider,
+    WorkloadIdentityOAuthProvider,
+};
+use crate::azure::{AzureCredential, AzureCredentialProvider, MicrosoftAzure, STORE};
+use crate::client::TokenCredentialProvider;
+use crate::config::ConfigValue;
+use crate::{ClientConfigKey, ClientOptions, Result, RetryConfig, StaticCredentialProvider};
+use percent_encoding::percent_decode_str;
+use serde::{Deserialize, Serialize};
+use snafu::{OptionExt, ResultExt, Snafu};
+use std::str::FromStr;
+use std::sync::Arc;
+use url::Url;
+
+/// The well-known account used by Azurite and the legacy Azure Storage Emulator.
+///
+/// <https://docs.microsoft.com/azure/storage/common/storage-use-azurite#well-known-storage-account-and-key>
+const EMULATOR_ACCOUNT: &str = "devstoreaccount1";
+
+/// The well-known account key used by Azurite and the legacy Azure Storage Emulator.
+///
+/// <https://docs.microsoft.com/azure/storage/common/storage-use-azurite#well-known-storage-account-and-key>
+const EMULATOR_ACCOUNT_KEY: &str =
+    "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==";
+
+const MSI_ENDPOINT_ENV_KEY: &str = "IDENTITY_ENDPOINT";
+
+/// A specialized `Error` for Azure builder-related errors
+#[derive(Debug, Snafu)]
+#[allow(missing_docs)]
+enum Error {
+    #[snafu(display("Unable parse source url. Url: {}, Error: {}", url, source))]
+    UnableToParseUrl {
+        source: url::ParseError,
+        url: String,
+    },
+
+    #[snafu(display(
+        "Unable parse emulator url {}={}, Error: {}",
+        env_name,
+        env_value,
+        source
+    ))]
+    UnableToParseEmulatorUrl {
+        env_name: String,
+        env_value: String,
+        source: url::ParseError,
+    },
+
+    #[snafu(display("Account must be specified"))]
+    MissingAccount {},
+
+    #[snafu(display("Container name must be specified"))]
+    MissingContainerName {},
+
+    #[snafu(display(
+        "Unknown url scheme cannot be parsed into storage location: {}",
+        scheme
+    ))]
+    UnknownUrlScheme { scheme: String },
+
+    #[snafu(display("URL did not match any known pattern for scheme: {}", url))]
+    UrlNotRecognised { url: String },
+
+    #[snafu(display("Failed parsing an SAS key"))]
+    DecodeSasKey { source: std::str::Utf8Error },
+
+    #[snafu(display("Missing component in SAS query pair"))]
+    MissingSasComponent {},
+
+    #[snafu(display("Configuration key: '{}' is not known.", key))]
+    UnknownConfigurationKey { key: String },
+
+    #[snafu(display("Unable to extract metadata from headers: {}", source))]
+    Metadata {
+        source: crate::client::header::Error,
+    },
+}
+
+impl From<Error> for crate::Error {
+    fn from(source: Error) -> Self {
+        match source {
+            Error::UnknownConfigurationKey { key } => {
+                Self::UnknownConfigurationKey { store: STORE, key }
+            }
+            _ => Self::Generic {
+                store: STORE,
+                source: Box::new(source),
+            },
+        }
+    }
+}
+
+/// Configure a connection to Microsoft Azure Blob Storage container using
+/// the specified credentials.
+///
+/// # Example
+/// ```
+/// # let ACCOUNT = "foo";
+/// # let BUCKET_NAME = "foo";
+/// # let ACCESS_KEY = "foo";
+/// # use object_store::azure::MicrosoftAzureBuilder;
+/// let azure = MicrosoftAzureBuilder::new()
+///  .with_account(ACCOUNT)
+///  .with_access_key(ACCESS_KEY)
+///  .with_container_name(BUCKET_NAME)
+///  .build();
+/// ```
+#[derive(Default, Clone)]
+pub struct MicrosoftAzureBuilder {
+    /// Account name
+    account_name: Option<String>,
+    /// Access key
+    access_key: Option<String>,
+    /// Container name
+    container_name: Option<String>,
+    /// Bearer token
+    bearer_token: Option<String>,
+    /// Client id
+    client_id: Option<String>,
+    /// Client secret
+    client_secret: Option<String>,
+    /// Tenant id
+    tenant_id: Option<String>,
+    /// Query pairs for shared access signature authorization
+    sas_query_pairs: Option<Vec<(String, String)>>,
+    /// Shared access signature
+    sas_key: Option<String>,
+    /// Authority host
+    authority_host: Option<String>,
+    /// Url
+    url: Option<String>,
+    /// When set to true, azurite storage emulator has to be used
+    use_emulator: ConfigValue<bool>,
+    /// Storage endpoint
+    endpoint: Option<String>,
+    /// Msi endpoint for acquiring managed identity token
+    msi_endpoint: Option<String>,
+    /// Object id for use with managed identity authentication
+    object_id: Option<String>,
+    /// Msi resource id for use with managed identity authentication
+    msi_resource_id: Option<String>,
+    /// File containing token for Azure AD workload identity federation
+    federated_token_file: Option<String>,
+    /// When set to true, azure cli has to be used for acquiring access token
+    use_azure_cli: ConfigValue<bool>,
+    /// Retry config
+    retry_config: RetryConfig,
+    /// Client options
+    client_options: ClientOptions,
+    /// Credentials
+    credentials: Option<AzureCredentialProvider>,
+    /// When set to true, fabric url scheme will be used
+    ///
+    /// i.e. https://{account_name}.dfs.fabric.microsoft.com
+    use_fabric_endpoint: ConfigValue<bool>,
+    /// When set to true, skips tagging objects
+    disable_tagging: ConfigValue<bool>,
+}
+
+/// Configuration keys for [`MicrosoftAzureBuilder`]
+///
+/// Configuration via keys can be done via [`MicrosoftAzureBuilder::with_config`]
+///
+/// # Example
+/// ```
+/// # use object_store::azure::{MicrosoftAzureBuilder, AzureConfigKey};
+/// let builder = MicrosoftAzureBuilder::new()
+///     .with_config("azure_client_id".parse().unwrap(), "my-client-id")
+///     .with_config(AzureConfigKey::AuthorityId, "my-tenant-id");
+/// ```
+#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, Deserialize, Serialize)]
+#[non_exhaustive]
+pub enum AzureConfigKey {
+    /// The name of the azure storage account
+    ///
+    /// Supported keys:
+    /// - `azure_storage_account_name`
+    /// - `account_name`
+    AccountName,
+
+    /// Master key for accessing storage account
+    ///
+    /// Supported keys:
+    /// - `azure_storage_account_key`
+    /// - `azure_storage_access_key`
+    /// - `azure_storage_master_key`
+    /// - `access_key`
+    /// - `account_key`
+    /// - `master_key`
+    AccessKey,
+
+    /// Service principal client id for authorizing requests
+    ///
+    /// Supported keys:
+    /// - `azure_storage_client_id`
+    /// - `azure_client_id`
+    /// - `client_id`
+    ClientId,
+
+    /// Service principal client secret for authorizing requests
+    ///
+    /// Supported keys:
+    /// - `azure_storage_client_secret`
+    /// - `azure_client_secret`
+    /// - `client_secret`
+    ClientSecret,
+
+    /// Tenant id used in oauth flows
+    ///
+    /// Supported keys:
+    /// - `azure_storage_tenant_id`
+    /// - `azure_storage_authority_id`
+    /// - `azure_tenant_id`
+    /// - `azure_authority_id`
+    /// - `tenant_id`
+    /// - `authority_id`
+    AuthorityId,
+
+    /// Shared access signature.
+    ///
+    /// The signature is expected to be percent-encoded, much like they are provided
+    /// in the azure storage explorer or azure portal.
+    ///
+    /// Supported keys:
+    /// - `azure_storage_sas_key`
+    /// - `azure_storage_sas_token`
+    /// - `sas_key`
+    /// - `sas_token`
+    SasKey,
+
+    /// Bearer token
+    ///
+    /// Supported keys:
+    /// - `azure_storage_token`
+    /// - `bearer_token`
+    /// - `token`
+    Token,
+
+    /// Use object store with azurite storage emulator
+    ///
+    /// Supported keys:
+    /// - `azure_storage_use_emulator`
+    /// - `object_store_use_emulator`
+    /// - `use_emulator`
+    UseEmulator,
+
+    /// Override the endpoint used to communicate with blob storage
+    ///
+    /// Supported keys:
+    /// - `azure_storage_endpoint`
+    /// - `azure_endpoint`
+    /// - `endpoint`
+    Endpoint,
+
+    /// Use object store with url scheme account.dfs.fabric.microsoft.com
+    ///
+    /// Supported keys:
+    /// - `azure_use_fabric_endpoint`
+    /// - `use_fabric_endpoint`
+    UseFabricEndpoint,
+
+    /// Endpoint to request a imds managed identity token
+    ///
+    /// Supported keys:
+    /// - `azure_msi_endpoint`
+    /// - `azure_identity_endpoint`
+    /// - `identity_endpoint`
+    /// - `msi_endpoint`
+    MsiEndpoint,
+
+    /// Object id for use with managed identity authentication
+    ///
+    /// Supported keys:
+    /// - `azure_object_id`
+    /// - `object_id`
+    ObjectId,
+
+    /// Msi resource id for use with managed identity authentication
+    ///
+    /// Supported keys:
+    /// - `azure_msi_resource_id`
+    /// - `msi_resource_id`
+    MsiResourceId,
+
+    /// File containing token for Azure AD workload identity federation
+    ///
+    /// Supported keys:
+    /// - `azure_federated_token_file`
+    /// - `federated_token_file`
+    FederatedTokenFile,
+
+    /// Use azure cli for acquiring access token
+    ///
+    /// Supported keys:
+    /// - `azure_use_azure_cli`
+    /// - `use_azure_cli`
+    UseAzureCli,
+
+    /// Container name
+    ///
+    /// Supported keys:
+    /// - `azure_container_name`
+    /// - `container_name`
+    ContainerName,
+
+    /// Disables tagging objects
+    ///
+    /// This can be desirable if not supported by the backing store
+    ///
+    /// Supported keys:
+    /// - `azure_disable_tagging`
+    /// - `disable_tagging`
+    DisableTagging,
+
+    /// Client options
+    Client(ClientConfigKey),
+}
+
+impl AsRef<str> for AzureConfigKey {
+    fn as_ref(&self) -> &str {
+        match self {
+            Self::AccountName => "azure_storage_account_name",
+            Self::AccessKey => "azure_storage_account_key",
+            Self::ClientId => "azure_storage_client_id",
+            Self::ClientSecret => "azure_storage_client_secret",
+            Self::AuthorityId => "azure_storage_tenant_id",
+            Self::SasKey => "azure_storage_sas_key",
+            Self::Token => "azure_storage_token",
+            Self::UseEmulator => "azure_storage_use_emulator",
+            Self::UseFabricEndpoint => "azure_use_fabric_endpoint",
+            Self::Endpoint => "azure_storage_endpoint",
+            Self::MsiEndpoint => "azure_msi_endpoint",
+            Self::ObjectId => "azure_object_id",
+            Self::MsiResourceId => "azure_msi_resource_id",
+            Self::FederatedTokenFile => "azure_federated_token_file",
+            Self::UseAzureCli => "azure_use_azure_cli",
+            Self::ContainerName => "azure_container_name",
+            Self::DisableTagging => "azure_disable_tagging",
+            Self::Client(key) => key.as_ref(),
+        }
+    }
+}
+
+impl FromStr for AzureConfigKey {
+    type Err = crate::Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "azure_storage_account_key"
+            | "azure_storage_access_key"
+            | "azure_storage_master_key"
+            | "master_key"
+            | "account_key"
+            | "access_key" => Ok(Self::AccessKey),
+            "azure_storage_account_name" | "account_name" => Ok(Self::AccountName),
+            "azure_storage_client_id" | "azure_client_id" | "client_id" => Ok(Self::ClientId),
+            "azure_storage_client_secret" | "azure_client_secret" | "client_secret" => {
+                Ok(Self::ClientSecret)
+            }
+            "azure_storage_tenant_id"
+            | "azure_storage_authority_id"
+            | "azure_tenant_id"
+            | "azure_authority_id"
+            | "tenant_id"
+            | "authority_id" => Ok(Self::AuthorityId),
+            "azure_storage_sas_key" | "azure_storage_sas_token" | "sas_key" | "sas_token" => {
+                Ok(Self::SasKey)
+            }
+            "azure_storage_token" | "bearer_token" | "token" => Ok(Self::Token),
+            "azure_storage_use_emulator" | "use_emulator" => Ok(Self::UseEmulator),
+            "azure_storage_endpoint" | "azure_endpoint" | "endpoint" => Ok(Self::Endpoint),
+            "azure_msi_endpoint"
+            | "azure_identity_endpoint"
+            | "identity_endpoint"
+            | "msi_endpoint" => Ok(Self::MsiEndpoint),
+            "azure_object_id" | "object_id" => Ok(Self::ObjectId),
+            "azure_msi_resource_id" | "msi_resource_id" => Ok(Self::MsiResourceId),
+            "azure_federated_token_file" | "federated_token_file" => Ok(Self::FederatedTokenFile),
+            "azure_use_fabric_endpoint" | "use_fabric_endpoint" => Ok(Self::UseFabricEndpoint),
+            "azure_use_azure_cli" | "use_azure_cli" => Ok(Self::UseAzureCli),
+            "azure_container_name" | "container_name" => Ok(Self::ContainerName),
+            "azure_disable_tagging" | "disable_tagging" => Ok(Self::DisableTagging),
+            // Backwards compatibility
+            "azure_allow_http" => Ok(Self::Client(ClientConfigKey::AllowHttp)),
+            _ => match s.parse() {
+                Ok(key) => Ok(Self::Client(key)),
+                Err(_) => Err(Error::UnknownConfigurationKey { key: s.into() }.into()),
+            },
+        }
+    }
+}
+
+impl std::fmt::Debug for MicrosoftAzureBuilder {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "MicrosoftAzureBuilder {{ account: {:?}, container_name: {:?} }}",
+            self.account_name, self.container_name
+        )
+    }
+}
+
+impl MicrosoftAzureBuilder {
+    /// Create a new [`MicrosoftAzureBuilder`] with default values.
+    pub fn new() -> Self {
+        Default::default()
+    }
+
+    /// Create an instance of [`MicrosoftAzureBuilder`] with values pre-populated from environment variables.
+    ///
+    /// Variables extracted from environment:
+    /// * AZURE_STORAGE_ACCOUNT_NAME: storage account name
+    /// * AZURE_STORAGE_ACCOUNT_KEY: storage account master key
+    /// * AZURE_STORAGE_ACCESS_KEY: alias for AZURE_STORAGE_ACCOUNT_KEY
+    /// * AZURE_STORAGE_CLIENT_ID -> client id for service principal authorization
+    /// * AZURE_STORAGE_CLIENT_SECRET -> client secret for service principal authorization
+    /// * AZURE_STORAGE_TENANT_ID -> tenant id used in oauth flows
+    /// # Example
+    /// ```
+    /// use object_store::azure::MicrosoftAzureBuilder;
+    ///
+    /// let azure = MicrosoftAzureBuilder::from_env()
+    ///     .with_container_name("foo")
+    ///     .build();
+    /// ```
+    pub fn from_env() -> Self {
+        let mut builder = Self::default();
+        for (os_key, os_value) in std::env::vars_os() {
+            if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) {
+                if key.starts_with("AZURE_") {
+                    if let Ok(config_key) = key.to_ascii_lowercase().parse() {
+                        builder = builder.with_config(config_key, value);
+                    }
+                }
+            }
+        }
+
+        if let Ok(text) = std::env::var(MSI_ENDPOINT_ENV_KEY) {
+            builder = builder.with_msi_endpoint(text);
+        }
+
+        builder
+    }
+
+    /// Parse available connection info form a well-known storage URL.
+    ///
+    /// The supported url schemes are:
+    ///
+    /// - `abfs[s]://<container>/<path>` (according to [fsspec](https://github.com/fsspec/adlfs))
+    /// - `abfs[s]://<file_system>@<account_name>.dfs.core.windows.net/<path>`
+    /// - `abfs[s]://<file_system>@<account_name>.dfs.fabric.microsoft.com/<path>`
+    /// - `az://<container>/<path>` (according to [fsspec](https://github.com/fsspec/adlfs))
+    /// - `adl://<container>/<path>` (according to [fsspec](https://github.com/fsspec/adlfs))
+    /// - `azure://<container>/<path>` (custom)
+    /// - `https://<account>.dfs.core.windows.net`
+    /// - `https://<account>.blob.core.windows.net`
+    /// - `https://<account>.dfs.fabric.microsoft.com`
+    /// - `https://<account>.dfs.fabric.microsoft.com/<container>`
+    /// - `https://<account>.blob.fabric.microsoft.com`
+    /// - `https://<account>.blob.fabric.microsoft.com/<container>`
+    ///
+    /// Note: Settings derived from the URL will override any others set on this builder
+    ///
+    /// # Example
+    /// ```
+    /// use object_store::azure::MicrosoftAzureBuilder;
+    ///
+    /// let azure = MicrosoftAzureBuilder::from_env()
+    ///     .with_url("abfss://file_system@account.dfs.core.windows.net/")
+    ///     .build();
+    /// ```
+    pub fn with_url(mut self, url: impl Into<String>) -> Self {
+        self.url = Some(url.into());
+        self
+    }
+
+    /// Set an option on the builder via a key - value pair.
+    pub fn with_config(mut self, key: AzureConfigKey, value: impl Into<String>) -> Self {
+        match key {
+            AzureConfigKey::AccessKey => self.access_key = Some(value.into()),
+            AzureConfigKey::AccountName => self.account_name = Some(value.into()),
+            AzureConfigKey::ClientId => self.client_id = Some(value.into()),
+            AzureConfigKey::ClientSecret => self.client_secret = Some(value.into()),
+            AzureConfigKey::AuthorityId => self.tenant_id = Some(value.into()),
+            AzureConfigKey::SasKey => self.sas_key = Some(value.into()),
+            AzureConfigKey::Token => self.bearer_token = Some(value.into()),
+            AzureConfigKey::MsiEndpoint => self.msi_endpoint = Some(value.into()),
+            AzureConfigKey::ObjectId => self.object_id = Some(value.into()),
+            AzureConfigKey::MsiResourceId => self.msi_resource_id = Some(value.into()),
+            AzureConfigKey::FederatedTokenFile => self.federated_token_file = Some(value.into()),
+            AzureConfigKey::UseAzureCli => self.use_azure_cli.parse(value),
+            AzureConfigKey::UseEmulator => self.use_emulator.parse(value),
+            AzureConfigKey::Endpoint => self.endpoint = Some(value.into()),
+            AzureConfigKey::UseFabricEndpoint => self.use_fabric_endpoint.parse(value),
+            AzureConfigKey::Client(key) => {
+                self.client_options = self.client_options.with_config(key, value)
+            }
+            AzureConfigKey::ContainerName => self.container_name = Some(value.into()),
+            AzureConfigKey::DisableTagging => self.disable_tagging.parse(value),
+        };
+        self
+    }
+
+    /// Set an option on the builder via a key - value pair.
+    #[deprecated(note = "Use with_config")]
+    pub fn try_with_option(self, key: impl AsRef<str>, value: impl Into<String>) -> Result<Self> {
+        Ok(self.with_config(key.as_ref().parse()?, value))
+    }
+
+    /// Hydrate builder from key value pairs
+    #[deprecated(note = "Use with_config")]
+    #[allow(deprecated)]
+    pub fn try_with_options<I: IntoIterator<Item = (impl AsRef<str>, impl Into<String>)>>(
+        mut self,
+        options: I,
+    ) -> Result<Self> {
+        for (key, value) in options {
+            self = self.try_with_option(key, value)?;
+        }
+        Ok(self)
+    }
+
+    /// Get config value via a [`AzureConfigKey`].
+    ///
+    /// # Example
+    /// ```
+    /// use object_store::azure::{MicrosoftAzureBuilder, AzureConfigKey};
+    ///
+    /// let builder = MicrosoftAzureBuilder::from_env()
+    ///     .with_account("foo");
+    /// let account_name = builder.get_config_value(&AzureConfigKey::AccountName).unwrap_or_default();
+    /// assert_eq!("foo", &account_name);
+    /// ```
+    pub fn get_config_value(&self, key: &AzureConfigKey) -> Option<String> {
+        match key {
+            AzureConfigKey::AccountName => self.account_name.clone(),
+            AzureConfigKey::AccessKey => self.access_key.clone(),
+            AzureConfigKey::ClientId => self.client_id.clone(),
+            AzureConfigKey::ClientSecret => self.client_secret.clone(),
+            AzureConfigKey::AuthorityId => self.tenant_id.clone(),
+            AzureConfigKey::SasKey => self.sas_key.clone(),
+            AzureConfigKey::Token => self.bearer_token.clone(),
+            AzureConfigKey::UseEmulator => Some(self.use_emulator.to_string()),
+            AzureConfigKey::UseFabricEndpoint => Some(self.use_fabric_endpoint.to_string()),
+            AzureConfigKey::Endpoint => self.endpoint.clone(),
+            AzureConfigKey::MsiEndpoint => self.msi_endpoint.clone(),
+            AzureConfigKey::ObjectId => self.object_id.clone(),
+            AzureConfigKey::MsiResourceId => self.msi_resource_id.clone(),
+            AzureConfigKey::FederatedTokenFile => self.federated_token_file.clone(),
+            AzureConfigKey::UseAzureCli => Some(self.use_azure_cli.to_string()),
+            AzureConfigKey::Client(key) => self.client_options.get_config_value(key),
+            AzureConfigKey::ContainerName => self.container_name.clone(),
+            AzureConfigKey::DisableTagging => Some(self.disable_tagging.to_string()),
+        }
+    }
+
+    /// Sets properties on this builder based on a URL
+    ///
+    /// This is a separate member function to allow fallible computation to
+    /// be deferred until [`Self::build`] which in turn allows deriving [`Clone`]
+    fn parse_url(&mut self, url: &str) -> Result<()> {
+        let parsed = Url::parse(url).context(UnableToParseUrlSnafu { url })?;
+        let host = parsed.host_str().context(UrlNotRecognisedSnafu { url })?;
+
+        let validate = |s: &str| match s.contains('.') {
+            true => Err(UrlNotRecognisedSnafu { url }.build()),
+            false => Ok(s.to_string()),
+        };
+
+        match parsed.scheme() {
+            "az" | "adl" | "azure" => self.container_name = Some(validate(host)?),
+            "abfs" | "abfss" => {
+                // abfs(s) might refer to the fsspec convention abfs://<container>/<path>
+                // or the convention for the hadoop driver abfs[s]://<file_system>@<account_name>.dfs.core.windows.net/<path>
+                if parsed.username().is_empty() {
+                    self.container_name = Some(validate(host)?);
+                } else if let Some(a) = host.strip_suffix(".dfs.core.windows.net") {
+                    self.container_name = Some(validate(parsed.username())?);
+                    self.account_name = Some(validate(a)?);
+                } else if let Some(a) = host.strip_suffix(".dfs.fabric.microsoft.com") {
+                    self.container_name = Some(validate(parsed.username())?);
+                    self.account_name = Some(validate(a)?);
+                    self.use_fabric_endpoint = true.into();
+                } else {
+                    return Err(UrlNotRecognisedSnafu { url }.build().into());
+                }
+            }
+            "https" => match host.split_once('.') {
+                Some((a, "dfs.core.windows.net")) | Some((a, "blob.core.windows.net")) => {
+                    self.account_name = Some(validate(a)?);
+                }
+                Some((a, "dfs.fabric.microsoft.com")) | Some((a, "blob.fabric.microsoft.com")) => {
+                    self.account_name = Some(validate(a)?);
+                    // Attempt to infer the container name from the URL
+                    // - https://onelake.dfs.fabric.microsoft.com/<workspaceGUID>/<itemGUID>/Files/test.csv
+                    // - https://onelake.dfs.fabric.microsoft.com/<workspace>/<item>.<itemtype>/<path>/<fileName>
+                    //
+                    // See <https://learn.microsoft.com/en-us/fabric/onelake/onelake-access-api>
+                    if let Some(workspace) = parsed.path_segments().unwrap().next() {
+                        if !workspace.is_empty() {
+                            self.container_name = Some(workspace.to_string())
+                        }
+                    }
+                    self.use_fabric_endpoint = true.into();
+                }
+                _ => return Err(UrlNotRecognisedSnafu { url }.build().into()),
+            },
+            scheme => return Err(UnknownUrlSchemeSnafu { scheme }.build().into()),
+        }
+        Ok(())
+    }
+
+    /// Set the Azure Account (required)
+    pub fn with_account(mut self, account: impl Into<String>) -> Self {
+        self.account_name = Some(account.into());
+        self
+    }
+
+    /// Set the Azure Container Name (required)
+    pub fn with_container_name(mut self, container_name: impl Into<String>) -> Self {
+        self.container_name = Some(container_name.into());
+        self
+    }
+
+    /// Set the Azure Access Key (required - one of access key, bearer token, or client credentials)
+    pub fn with_access_key(mut self, access_key: impl Into<String>) -> Self {
+        self.access_key = Some(access_key.into());
+        self
+    }
+
+    /// Set a static bearer token to be used for authorizing requests
+    pub fn with_bearer_token_authorization(mut self, bearer_token: impl Into<String>) -> Self {
+        self.bearer_token = Some(bearer_token.into());
+        self
+    }
+
+    /// Set a client secret used for client secret authorization
+    pub fn with_client_secret_authorization(
+        mut self,
+        client_id: impl Into<String>,
+        client_secret: impl Into<String>,
+        tenant_id: impl Into<String>,
+    ) -> Self {
+        self.client_id = Some(client_id.into());
+        self.client_secret = Some(client_secret.into());
+        self.tenant_id = Some(tenant_id.into());
+        self
+    }
+
+    /// Sets the client id for use in client secret or k8s federated credential flow
+    pub fn with_client_id(mut self, client_id: impl Into<String>) -> Self {
+        self.client_id = Some(client_id.into());
+        self
+    }
+
+    /// Sets the client secret for use in client secret flow
+    pub fn with_client_secret(mut self, client_secret: impl Into<String>) -> Self {
+        self.client_secret = Some(client_secret.into());
+        self
+    }
+
+    /// Sets the tenant id for use in client secret or k8s federated credential flow
+    pub fn with_tenant_id(mut self, tenant_id: impl Into<String>) -> Self {
+        self.tenant_id = Some(tenant_id.into());
+        self
+    }
+
+    /// Set query pairs appended to the url for shared access signature authorization
+    pub fn with_sas_authorization(mut self, query_pairs: impl Into<Vec<(String, String)>>) -> Self {
+        self.sas_query_pairs = Some(query_pairs.into());
+        self
+    }
+
+    /// Set the credential provider overriding any other options
+    pub fn with_credentials(mut self, credentials: AzureCredentialProvider) -> Self {
+        self.credentials = Some(credentials);
+        self
+    }
+
+    /// Set if the Azure emulator should be used (defaults to false)
+    pub fn with_use_emulator(mut self, use_emulator: bool) -> Self {
+        self.use_emulator = use_emulator.into();
+        self
+    }
+
+    /// Override the endpoint used to communicate with blob storage
+    ///
+    /// Defaults to `https://{account}.blob.core.windows.net`
+    pub fn with_endpoint(mut self, endpoint: String) -> Self {
+        self.endpoint = Some(endpoint);
+        self
+    }
+
+    /// Set if Microsoft Fabric url scheme should be used (defaults to false)
+    ///
+    /// When disabled the url scheme used is `https://{account}.blob.core.windows.net`
+    /// When enabled the url scheme used is `https://{account}.dfs.fabric.microsoft.com`
+    ///
+    /// Note: [`Self::with_endpoint`] will take precedence over this option
+    pub fn with_use_fabric_endpoint(mut self, use_fabric_endpoint: bool) -> Self {
+        self.use_fabric_endpoint = use_fabric_endpoint.into();
+        self
+    }
+
+    /// Sets what protocol is allowed
+    ///
+    /// If `allow_http` is :
+    /// * false (default):  Only HTTPS are allowed
+    /// * true:  HTTP and HTTPS are allowed
+    pub fn with_allow_http(mut self, allow_http: bool) -> Self {
+        self.client_options = self.client_options.with_allow_http(allow_http);
+        self
+    }
+
+    /// Sets an alternative authority host for OAuth based authorization
+    ///
+    /// Common hosts for azure clouds are defined in [authority_hosts](crate::azure::authority_hosts).
+    ///
+    /// Defaults to <https://login.microsoftonline.com>
+    pub fn with_authority_host(mut self, authority_host: impl Into<String>) -> Self {
+        self.authority_host = Some(authority_host.into());
+        self
+    }
+
+    /// Set the retry configuration
+    pub fn with_retry(mut self, retry_config: RetryConfig) -> Self {
+        self.retry_config = retry_config;
+        self
+    }
+
+    /// Set the proxy_url to be used by the underlying client
+    pub fn with_proxy_url(mut self, proxy_url: impl Into<String>) -> Self {
+        self.client_options = self.client_options.with_proxy_url(proxy_url);
+        self
+    }
+
+    /// Set a trusted proxy CA certificate
+    pub fn with_proxy_ca_certificate(mut self, proxy_ca_certificate: impl Into<String>) -> Self {
+        self.client_options = self
+            .client_options
+            .with_proxy_ca_certificate(proxy_ca_certificate);
+        self
+    }
+
+    /// Set a list of hosts to exclude from proxy connections
+    pub fn with_proxy_excludes(mut self, proxy_excludes: impl Into<String>) -> Self {
+        self.client_options = self.client_options.with_proxy_excludes(proxy_excludes);
+        self
+    }
+
+    /// Sets the client options, overriding any already set
+    pub fn with_client_options(mut self, options: ClientOptions) -> Self {
+        self.client_options = options;
+        self
+    }
+
+    /// Sets the endpoint for acquiring managed identity token
+    pub fn with_msi_endpoint(mut self, msi_endpoint: impl Into<String>) -> Self {
+        self.msi_endpoint = Some(msi_endpoint.into());
+        self
+    }
+
+    /// Sets a file path for acquiring azure federated identity token in k8s
+    ///
+    /// requires `client_id` and `tenant_id` to be set
+    pub fn with_federated_token_file(mut self, federated_token_file: impl Into<String>) -> Self {
+        self.federated_token_file = Some(federated_token_file.into());
+        self
+    }
+
+    /// Set if the Azure Cli should be used for acquiring access token
+    ///
+    /// <https://learn.microsoft.com/en-us/cli/azure/account?view=azure-cli-latest#az-account-get-access-token>
+    pub fn with_use_azure_cli(mut self, use_azure_cli: bool) -> Self {
+        self.use_azure_cli = use_azure_cli.into();
+        self
+    }
+
+    /// If set to `true` will ignore any tags provided to put_opts
+    pub fn with_disable_tagging(mut self, ignore: bool) -> Self {
+        self.disable_tagging = ignore.into();
+        self
+    }
+
+    /// Configure a connection to container with given name on Microsoft Azure Blob store.
+    pub fn build(mut self) -> Result<MicrosoftAzure> {
+        if let Some(url) = self.url.take() {
+            self.parse_url(&url)?;
+        }
+
+        let container = self.container_name.ok_or(Error::MissingContainerName {})?;
+
+        let static_creds = |credential: AzureCredential| -> AzureCredentialProvider {
+            Arc::new(StaticCredentialProvider::new(credential))
+        };
+
+        let (is_emulator, storage_url, auth, account) = if self.use_emulator.get()? {
+            let account_name = self
+                .account_name
+                .unwrap_or_else(|| EMULATOR_ACCOUNT.to_string());
+            // Allow overriding defaults. Values taken from
+            // from https://docs.rs/azure_storage/0.2.0/src/azure_storage/core/clients/storage_account_client.rs.html#129-141
+            let url = url_from_env("AZURITE_BLOB_STORAGE_URL", "http://127.0.0.1:10000")?;
+            let key = match self.access_key {
+                Some(k) => AzureAccessKey::try_new(&k)?,
+                None => AzureAccessKey::try_new(EMULATOR_ACCOUNT_KEY)?,
+            };
+
+            let credential = static_creds(AzureCredential::AccessKey(key));
+
+            self.client_options = self.client_options.with_allow_http(true);
+            (true, url, credential, account_name)
+        } else {
+            let account_name = self.account_name.ok_or(Error::MissingAccount {})?;
+            let account_url = match self.endpoint {
+                Some(account_url) => account_url,
+                None => match self.use_fabric_endpoint.get()? {
+                    true => {
+                        format!("https://{}.blob.fabric.microsoft.com", &account_name)
+                    }
+                    false => format!("https://{}.blob.core.windows.net", &account_name),
+                },
+            };
+
+            let url =
+                Url::parse(&account_url).context(UnableToParseUrlSnafu { url: account_url })?;
+
+            let credential = if let Some(credential) = self.credentials {
+                credential
+            } else if let Some(bearer_token) = self.bearer_token {
+                static_creds(AzureCredential::BearerToken(bearer_token))
+            } else if let Some(access_key) = self.access_key {
+                let key = AzureAccessKey::try_new(&access_key)?;
+                static_creds(AzureCredential::AccessKey(key))
+            } else if let (Some(client_id), Some(tenant_id), Some(federated_token_file)) =
+                (&self.client_id, &self.tenant_id, self.federated_token_file)
+            {
+                let client_credential = WorkloadIdentityOAuthProvider::new(
+                    client_id,
+                    federated_token_file,
+                    tenant_id,
+                    self.authority_host,
+                );
+                Arc::new(TokenCredentialProvider::new(
+                    client_credential,
+                    self.client_options.client()?,
+                    self.retry_config.clone(),
+                )) as _
+            } else if let (Some(client_id), Some(client_secret), Some(tenant_id)) =
+                (&self.client_id, self.client_secret, &self.tenant_id)
+            {
+                let client_credential = ClientSecretOAuthProvider::new(
+                    client_id.clone(),
+                    client_secret,
+                    tenant_id,
+                    self.authority_host,
+                );
+                Arc::new(TokenCredentialProvider::new(
+                    client_credential,
+                    self.client_options.client()?,
+                    self.retry_config.clone(),
+                )) as _
+            } else if let Some(query_pairs) = self.sas_query_pairs {
+                static_creds(AzureCredential::SASToken(query_pairs))
+            } else if let Some(sas) = self.sas_key {
+                static_creds(AzureCredential::SASToken(split_sas(&sas)?))
+            } else if self.use_azure_cli.get()? {
+                Arc::new(AzureCliCredential::new()) as _
+            } else {
+                let msi_credential = ImdsManagedIdentityProvider::new(
+                    self.client_id,
+                    self.object_id,
+                    self.msi_resource_id,
+                    self.msi_endpoint,
+                );
+                Arc::new(TokenCredentialProvider::new(
+                    msi_credential,
+                    self.client_options.metadata_client()?,
+                    self.retry_config.clone(),
+                )) as _
+            };
+            (false, url, credential, account_name)
+        };
+
+        let config = AzureConfig {
+            account,
+            is_emulator,
+            container,
+            disable_tagging: self.disable_tagging.get()?,
+            retry_config: self.retry_config,
+            client_options: self.client_options,
+            service: storage_url,
+            credentials: auth,
+        };
+
+        let client = Arc::new(AzureClient::new(config)?);
+
+        Ok(MicrosoftAzure { client })
+    }
+}
+
+/// Parses the contents of the environment variable `env_name` as a URL
+/// if present, otherwise falls back to default_url
+fn url_from_env(env_name: &str, default_url: &str) -> Result<Url> {
+    let url = match std::env::var(env_name) {
+        Ok(env_value) => Url::parse(&env_value).context(UnableToParseEmulatorUrlSnafu {
+            env_name,
+            env_value,
+        })?,
+        Err(_) => Url::parse(default_url).expect("Failed to parse default URL"),
+    };
+    Ok(url)
+}
+
+fn split_sas(sas: &str) -> Result<Vec<(String, String)>, Error> {
+    let sas = percent_decode_str(sas)
+        .decode_utf8()
+        .context(DecodeSasKeySnafu {})?;
+    let kv_str_pairs = sas
+        .trim_start_matches('?')
+        .split('&')
+        .filter(|s| !s.chars().all(char::is_whitespace));
+    let mut pairs = Vec::new();
+    for kv_pair_str in kv_str_pairs {
+        let (k, v) = kv_pair_str
+            .trim()
+            .split_once('=')
+            .ok_or(Error::MissingSasComponent {})?;
+        pairs.push((k.into(), v.into()))
+    }
+    Ok(pairs)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::collections::HashMap;
+
+    #[test]
+    fn azure_blob_test_urls() {
+        let mut builder = MicrosoftAzureBuilder::new();
+        builder
+            .parse_url("abfss://file_system@account.dfs.core.windows.net/")
+            .unwrap();
+        assert_eq!(builder.account_name, Some("account".to_string()));
+        assert_eq!(builder.container_name, Some("file_system".to_string()));
+        assert!(!builder.use_fabric_endpoint.get().unwrap());
+
+        let mut builder = MicrosoftAzureBuilder::new();
+        builder
+            .parse_url("abfss://file_system@account.dfs.fabric.microsoft.com/")
+            .unwrap();
+        assert_eq!(builder.account_name, Some("account".to_string()));
+        assert_eq!(builder.container_name, Some("file_system".to_string()));
+        assert!(builder.use_fabric_endpoint.get().unwrap());
+
+        let mut builder = MicrosoftAzureBuilder::new();
+        builder.parse_url("abfs://container/path").unwrap();
+        assert_eq!(builder.container_name, Some("container".to_string()));
+
+        let mut builder = MicrosoftAzureBuilder::new();
+        builder.parse_url("az://container").unwrap();
+        assert_eq!(builder.container_name, Some("container".to_string()));
+
+        let mut builder = MicrosoftAzureBuilder::new();
+        builder.parse_url("az://container/path").unwrap();
+        assert_eq!(builder.container_name, Some("container".to_string()));
+
+        let mut builder = MicrosoftAzureBuilder::new();
+        builder
+            .parse_url("https://account.dfs.core.windows.net/")
+            .unwrap();
+        assert_eq!(builder.account_name, Some("account".to_string()));
+        assert!(!builder.use_fabric_endpoint.get().unwrap());
+
+        let mut builder = MicrosoftAzureBuilder::new();
+        builder
+            .parse_url("https://account.blob.core.windows.net/")
+            .unwrap();
+        assert_eq!(builder.account_name, Some("account".to_string()));
+        assert!(!builder.use_fabric_endpoint.get().unwrap());
+
+        let mut builder = MicrosoftAzureBuilder::new();
+        builder
+            .parse_url("https://account.dfs.fabric.microsoft.com/")
+            .unwrap();
+        assert_eq!(builder.account_name, Some("account".to_string()));
+        assert_eq!(builder.container_name, None);
+        assert!(builder.use_fabric_endpoint.get().unwrap());
+
+        let mut builder = MicrosoftAzureBuilder::new();
+        builder
+            .parse_url("https://account.dfs.fabric.microsoft.com/container")
+            .unwrap();
+        assert_eq!(builder.account_name, Some("account".to_string()));
+        assert_eq!(builder.container_name.as_deref(), Some("container"));
+        assert!(builder.use_fabric_endpoint.get().unwrap());
+
+        let mut builder = MicrosoftAzureBuilder::new();
+        builder
+            .parse_url("https://account.blob.fabric.microsoft.com/")
+            .unwrap();
+        assert_eq!(builder.account_name, Some("account".to_string()));
+        assert_eq!(builder.container_name, None);
+        assert!(builder.use_fabric_endpoint.get().unwrap());
+
+        let mut builder = MicrosoftAzureBuilder::new();
+        builder
+            .parse_url("https://account.blob.fabric.microsoft.com/container")
+            .unwrap();
+        assert_eq!(builder.account_name, Some("account".to_string()));
+        assert_eq!(builder.container_name.as_deref(), Some("container"));
+        assert!(builder.use_fabric_endpoint.get().unwrap());
+
+        let err_cases = [
+            "mailto://account.blob.core.windows.net/",
+            "az://blob.mydomain/",
+            "abfs://container.foo/path",
+            "abfss://file_system@account.foo.dfs.core.windows.net/",
+            "abfss://file_system.bar@account.dfs.core.windows.net/",
+            "https://blob.mydomain/",
+            "https://blob.foo.dfs.core.windows.net/",
+        ];
+        let mut builder = MicrosoftAzureBuilder::new();
+        for case in err_cases {
+            builder.parse_url(case).unwrap_err();
+        }
+    }
+
+    #[test]
+    fn azure_test_config_from_map() {
+        let azure_client_id = "object_store:fake_access_key_id";
+        let azure_storage_account_name = "object_store:fake_secret_key";
+        let azure_storage_token = "object_store:fake_default_region";
+        let options = HashMap::from([
+            ("azure_client_id", azure_client_id),
+            ("azure_storage_account_name", azure_storage_account_name),
+            ("azure_storage_token", azure_storage_token),
+        ]);
+
+        let builder = options
+            .into_iter()
+            .fold(MicrosoftAzureBuilder::new(), |builder, (key, value)| {
+                builder.with_config(key.parse().unwrap(), value)
+            });
+        assert_eq!(builder.client_id.unwrap(), azure_client_id);
+        assert_eq!(builder.account_name.unwrap(), azure_storage_account_name);
+        assert_eq!(builder.bearer_token.unwrap(), azure_storage_token);
+    }
+
+    #[test]
+    fn azure_test_split_sas() {
+        let raw_sas = "?sv=2021-10-04&st=2023-01-04T17%3A48%3A57Z&se=2023-01-04T18%3A15%3A00Z&sr=c&sp=rcwl&sig=C7%2BZeEOWbrxPA3R0Cw%2Fw1EZz0%2B4KBvQexeKZKe%2BB6h0%3D";
+        let expected = vec![
+            ("sv".to_string(), "2021-10-04".to_string()),
+            ("st".to_string(), "2023-01-04T17:48:57Z".to_string()),
+            ("se".to_string(), "2023-01-04T18:15:00Z".to_string()),
+            ("sr".to_string(), "c".to_string()),
+            ("sp".to_string(), "rcwl".to_string()),
+            (
+                "sig".to_string(),
+                "C7+ZeEOWbrxPA3R0Cw/w1EZz0+4KBvQexeKZKe+B6h0=".to_string(),
+            ),
+        ];
+        let pairs = split_sas(raw_sas).unwrap();
+        assert_eq!(expected, pairs);
+    }
+}
diff --git a/object_store/src/azure/client.rs b/object_store/src/azure/client.rs
index e18135c2c77c..3c71e69da00c 100644
--- a/object_store/src/azure/client.rs
+++ b/object_store/src/azure/client.rs
@@ -19,31 +19,39 @@ use super::credential::AzureCredential;
 use crate::azure::credential::*;
 use crate::azure::{AzureCredentialProvider, STORE};
 use crate::client::get::GetClient;
+use crate::client::header::{get_put_result, HeaderConfig};
 use crate::client::list::ListClient;
 use crate::client::retry::RetryExt;
 use crate::client::GetOptionsExt;
+use crate::multipart::PartId;
 use crate::path::DELIMITER;
 use crate::util::deserialize_rfc1123;
 use crate::{
-    ClientOptions, GetOptions, ListResult, ObjectMeta, Path, Result, RetryConfig,
+    ClientOptions, GetOptions, ListResult, ObjectMeta, Path, PutMode, PutOptions, PutResult,
+    Result, RetryConfig,
 };
 use async_trait::async_trait;
 use base64::prelude::BASE64_STANDARD;
 use base64::Engine;
 use bytes::{Buf, Bytes};
 use chrono::{DateTime, Utc};
+use hyper::http::HeaderName;
 use itertools::Itertools;
 use reqwest::header::CONTENT_TYPE;
 use reqwest::{
-    header::{HeaderValue, CONTENT_LENGTH, IF_NONE_MATCH},
-    Client as ReqwestClient, Method, Response, StatusCode,
+    header::{HeaderValue, CONTENT_LENGTH, IF_MATCH, IF_NONE_MATCH},
+    Client as ReqwestClient, Method, RequestBuilder, Response,
 };
 use serde::{Deserialize, Serialize};
-use snafu::{ResultExt, Snafu};
+use snafu::{OptionExt, ResultExt, Snafu};
 use std::collections::HashMap;
 use std::sync::Arc;
 use url::Url;
 
+const VERSION_HEADER: &str = "x-ms-version-id";
+
+static TAGS_HEADER: HeaderName = HeaderName::from_static("x-ms-tags");
+
 /// A specialized `Error` for object store-related errors
 #[derive(Debug, Snafu)]
 #[allow(missing_docs)]
@@ -85,6 +93,14 @@ pub(crate) enum Error {
     Authorization {
         source: crate::azure::credential::Error,
     },
+
+    #[snafu(display("Unable to extract metadata from headers: {}", source))]
+    Metadata {
+        source: crate::client::header::Error,
+    },
+
+    #[snafu(display("ETag required for conditional update"))]
+    MissingETag,
 }
 
 impl From<Error> for crate::Error {
@@ -110,11 +126,12 @@ pub(crate) struct AzureConfig {
     pub retry_config: RetryConfig,
     pub service: Url,
     pub is_emulator: bool,
+    pub disable_tagging: bool,
     pub client_options: ClientOptions,
 }
 
 impl AzureConfig {
-    fn path_url(&self, path: &Path) -> Url {
+    pub(crate) fn path_url(&self, path: &Path) -> Url {
         let mut url = self.service.clone();
         {
             let mut path_mut = url.path_segments_mut().unwrap();
@@ -127,6 +144,39 @@ impl AzureConfig {
     }
 }
 
+/// A builder for a put request allowing customisation of the headers and query string
+struct PutRequest<'a> {
+    path: &'a Path,
+    config: &'a AzureConfig,
+    builder: RequestBuilder,
+}
+
+impl<'a> PutRequest<'a> {
+    fn header(self, k: &HeaderName, v: &str) -> Self {
+        let builder = self.builder.header(k, v);
+        Self { builder, ..self }
+    }
+
+    fn query<T: Serialize + ?Sized + Sync>(self, query: &T) -> Self {
+        let builder = self.builder.query(query);
+        Self { builder, ..self }
+    }
+
+    async fn send(self) -> Result<Response> {
+        let credential = self.config.credentials.get_credential().await?;
+        let response = self
+            .builder
+            .with_azure_authorization(&credential, &self.config.account)
+            .send_retry(&self.config.retry_config)
+            .await
+            .context(PutRequestSnafu {
+                path: self.path.as_ref(),
+            })?;
+
+        Ok(response)
+    }
+}
+
 #[derive(Debug)]
 pub(crate) struct AzureClient {
     config: AzureConfig,
@@ -149,46 +199,75 @@ impl AzureClient {
         self.config.credentials.get_credential().await
     }
 
-    /// Make an Azure PUT request <https://docs.microsoft.com/en-us/rest/api/storageservices/put-blob>
-    pub async fn put_request<T: Serialize + crate::Debug + ?Sized + Sync>(
-        &self,
-        path: &Path,
-        bytes: Option<Bytes>,
-        is_block_op: bool,
-        query: &T,
-    ) -> Result<Response> {
-        let credential = self.get_credential().await?;
+    fn put_request<'a>(&'a self, path: &'a Path, bytes: Bytes) -> PutRequest<'a> {
         let url = self.config.path_url(path);
 
         let mut builder = self.client.request(Method::PUT, url);
 
-        if !is_block_op {
-            builder = builder.header(&BLOB_TYPE, "BlockBlob").query(query);
-        } else {
-            builder = builder.query(query);
-        }
-
         if let Some(value) = self.config().client_options.get_content_type(path) {
             builder = builder.header(CONTENT_TYPE, value);
         }
 
-        if let Some(bytes) = bytes {
-            builder = builder
-                .header(CONTENT_LENGTH, HeaderValue::from(bytes.len()))
-                .body(bytes)
-        } else {
-            builder = builder.header(CONTENT_LENGTH, HeaderValue::from_static("0"));
+        builder = builder
+            .header(CONTENT_LENGTH, HeaderValue::from(bytes.len()))
+            .body(bytes);
+
+        PutRequest {
+            path,
+            builder,
+            config: &self.config,
         }
+    }
 
-        let response = builder
-            .with_azure_authorization(&credential, &self.config.account)
-            .send_retry(&self.config.retry_config)
-            .await
-            .context(PutRequestSnafu {
-                path: path.as_ref(),
-            })?;
+    /// Make an Azure PUT request <https://docs.microsoft.com/en-us/rest/api/storageservices/put-blob>
+    pub async fn put_blob(&self, path: &Path, bytes: Bytes, opts: PutOptions) -> Result<PutResult> {
+        let builder = self.put_request(path, bytes);
+
+        let builder = match &opts.mode {
+            PutMode::Overwrite => builder,
+            PutMode::Create => builder.header(&IF_NONE_MATCH, "*"),
+            PutMode::Update(v) => {
+                let etag = v.e_tag.as_ref().context(MissingETagSnafu)?;
+                builder.header(&IF_MATCH, etag)
+            }
+        };
 
-        Ok(response)
+        let builder = match (opts.tags.encoded(), self.config.disable_tagging) {
+            ("", _) | (_, true) => builder,
+            (tags, false) => builder.header(&TAGS_HEADER, tags),
+        };
+
+        let response = builder.header(&BLOB_TYPE, "BlockBlob").send().await?;
+        Ok(get_put_result(response.headers(), VERSION_HEADER).context(MetadataSnafu)?)
+    }
+
+    /// PUT a block <https://learn.microsoft.com/en-us/rest/api/storageservices/put-block>
+    pub async fn put_block(&self, path: &Path, part_idx: usize, data: Bytes) -> Result<PartId> {
+        let content_id = format!("{part_idx:20}");
+        let block_id = BASE64_STANDARD.encode(&content_id);
+
+        self.put_request(path, data)
+            .query(&[("comp", "block"), ("blockid", &block_id)])
+            .send()
+            .await?;
+
+        Ok(PartId { content_id })
+    }
+
+    /// PUT a block list <https://learn.microsoft.com/en-us/rest/api/storageservices/put-block-list>
+    pub async fn put_block_list(&self, path: &Path, parts: Vec<PartId>) -> Result<PutResult> {
+        let blocks = parts
+            .into_iter()
+            .map(|part| BlockId::from(part.content_id))
+            .collect();
+
+        let response = self
+            .put_request(path, BlockList { blocks }.to_xml().into())
+            .query(&[("comp", "blocklist")])
+            .send()
+            .await?;
+
+        Ok(get_put_result(response.headers(), VERSION_HEADER).context(MetadataSnafu)?)
     }
 
     /// Make an Azure Delete request <https://docs.microsoft.com/en-us/rest/api/storageservices/delete-blob>
@@ -215,12 +294,7 @@ impl AzureClient {
     }
 
     /// Make an Azure Copy request <https://docs.microsoft.com/en-us/rest/api/storageservices/copy-blob>
-    pub async fn copy_request(
-        &self,
-        from: &Path,
-        to: &Path,
-        overwrite: bool,
-    ) -> Result<()> {
+    pub async fn copy_request(&self, from: &Path, to: &Path, overwrite: bool) -> Result<()> {
         let credential = self.get_credential().await?;
         let url = self.config.path_url(to);
         let mut source = self.config.path_url(from);
@@ -245,44 +319,60 @@ impl AzureClient {
             .with_azure_authorization(&credential, &self.config.account)
             .send_retry(&self.config.retry_config)
             .await
-            .map_err(|err| match err.status() {
-                Some(StatusCode::CONFLICT) => crate::Error::AlreadyExists {
-                    source: Box::new(err),
-                    path: to.to_string(),
-                },
-                _ => err.error(STORE, from.to_string()),
-            })?;
+            .map_err(|err| err.error(STORE, from.to_string()))?;
 
         Ok(())
     }
+
+    #[cfg(test)]
+    pub async fn get_blob_tagging(&self, path: &Path) -> Result<Response> {
+        let credential = self.get_credential().await?;
+        let url = self.config.path_url(path);
+        let response = self
+            .client
+            .request(Method::GET, url)
+            .query(&[("comp", "tags")])
+            .with_azure_authorization(&credential, &self.config.account)
+            .send_retry(&self.config.retry_config)
+            .await
+            .context(GetRequestSnafu {
+                path: path.as_ref(),
+            })?;
+        Ok(response)
+    }
 }
 
 #[async_trait]
 impl GetClient for AzureClient {
     const STORE: &'static str = STORE;
 
+    const HEADER_CONFIG: HeaderConfig = HeaderConfig {
+        etag_required: true,
+        last_modified_required: true,
+        version_header: Some(VERSION_HEADER),
+    };
+
     /// Make an Azure GET request
     /// <https://docs.microsoft.com/en-us/rest/api/storageservices/get-blob>
     /// <https://docs.microsoft.com/en-us/rest/api/storageservices/get-blob-properties>
-    async fn get_request(
-        &self,
-        path: &Path,
-        options: GetOptions,
-        head: bool,
-    ) -> Result<Response> {
+    async fn get_request(&self, path: &Path, options: GetOptions) -> Result<Response> {
         let credential = self.get_credential().await?;
         let url = self.config.path_url(path);
-        let method = match head {
+        let method = match options.head {
             true => Method::HEAD,
             false => Method::GET,
         };
 
-        let builder = self
+        let mut builder = self
             .client
             .request(method, url)
             .header(CONTENT_LENGTH, HeaderValue::from_static("0"))
             .body(Bytes::new());
 
+        if let Some(v) = &options.version {
+            builder = builder.query(&[("versionid", v)])
+        }
+
         let response = builder
             .with_get_options(options)
             .with_azure_authorization(&credential, &self.config.account)
@@ -293,16 +383,14 @@ impl GetClient for AzureClient {
             })?;
 
         match response.headers().get("x-ms-resource-type") {
-            Some(resource) if resource.as_ref() != b"file" => {
-                Err(crate::Error::NotFound {
-                    path: path.to_string(),
-                    source: format!(
-                        "Not a file, got x-ms-resource-type: {}",
-                        String::from_utf8_lossy(resource.as_ref())
-                    )
-                    .into(),
-                })
-            }
+            Some(resource) if resource.as_ref() != b"file" => Err(crate::Error::NotFound {
+                path: path.to_string(),
+                source: format!(
+                    "Not a file, got x-ms-resource-type: {}",
+                    String::from_utf8_lossy(resource.as_ref())
+                )
+                .into(),
+            }),
             _ => Ok(response),
         }
     }
@@ -352,8 +440,7 @@ impl ListClient for AzureClient {
             .context(ListResponseBodySnafu)?;
 
         let mut response: ListResultInternal =
-            quick_xml::de::from_reader(response.reader())
-                .context(InvalidListResponseSnafu)?;
+            quick_xml::de::from_reader(response.reader()).context(InvalidListResponseSnafu)?;
         let token = response.next_marker.take();
 
         Ok((to_list_result(response, prefix)?, token))
@@ -372,7 +459,7 @@ struct ListResultInternal {
 }
 
 fn to_list_result(value: ListResultInternal, prefix: Option<&str>) -> Result<ListResult> {
-    let prefix = prefix.map(Path::from).unwrap_or_else(Path::default);
+    let prefix = prefix.map(Path::from).unwrap_or_default();
     let common_prefixes = value
         .blobs
         .blob_prefix
@@ -442,6 +529,7 @@ impl TryFrom<Blob> for ObjectMeta {
             last_modified: value.properties.last_modified,
             size: value.properties.content_length as usize,
             e_tag: value.properties.e_tag,
+            version: None, // For consistency with S3 and GCP which don't include this
         })
     }
 }
diff --git a/object_store/src/azure/credential.rs b/object_store/src/azure/credential.rs
index fd75389249b0..2b8788d333b2 100644
--- a/object_store/src/azure/credential.rs
+++ b/object_store/src/azure/credential.rs
@@ -28,9 +28,9 @@ use chrono::{DateTime, Utc};
 use reqwest::header::ACCEPT;
 use reqwest::{
     header::{
-        HeaderMap, HeaderName, HeaderValue, AUTHORIZATION, CONTENT_ENCODING,
-        CONTENT_LANGUAGE, CONTENT_LENGTH, CONTENT_TYPE, DATE, IF_MATCH,
-        IF_MODIFIED_SINCE, IF_NONE_MATCH, IF_UNMODIFIED_SINCE, RANGE,
+        HeaderMap, HeaderName, HeaderValue, AUTHORIZATION, CONTENT_ENCODING, CONTENT_LANGUAGE,
+        CONTENT_LENGTH, CONTENT_TYPE, DATE, IF_MATCH, IF_MODIFIED_SINCE, IF_NONE_MATCH,
+        IF_UNMODIFIED_SINCE, RANGE,
     },
     Client, Method, RequestBuilder,
 };
@@ -40,14 +40,13 @@ use std::borrow::Cow;
 use std::process::Command;
 use std::str;
 use std::sync::Arc;
-use std::time::{Duration, Instant};
+use std::time::{Duration, Instant, SystemTime};
 use url::Url;
 
 static AZURE_VERSION: HeaderValue = HeaderValue::from_static("2021-08-06");
 static VERSION: HeaderName = HeaderName::from_static("x-ms-version");
 pub(crate) static BLOB_TYPE: HeaderName = HeaderName::from_static("x-ms-blob-type");
-pub(crate) static DELETE_SNAPSHOTS: HeaderName =
-    HeaderName::from_static("x-ms-delete-snapshots");
+pub(crate) static DELETE_SNAPSHOTS: HeaderName = HeaderName::from_static("x-ms-delete-snapshots");
 pub(crate) static COPY_SOURCE: HeaderName = HeaderName::from_static("x-ms-copy-source");
 static CONTENT_MD5: HeaderName = HeaderName::from_static("content-md5");
 pub(crate) const RFC1123_FMT: &str = "%a, %d %h %Y %T GMT";
@@ -76,6 +75,9 @@ pub enum Error {
     #[snafu(display("Error reading federated token file "))]
     FederatedTokenFile,
 
+    #[snafu(display("Invalid Access Key: {}", source))]
+    InvalidAccessKey { source: base64::DecodeError },
+
     #[snafu(display("'az account get-access-token' command failed: {message}"))]
     AzureCli { message: String },
 
@@ -94,13 +96,25 @@ impl From<Error> for crate::Error {
     }
 }
 
+/// A shared Azure Storage Account Key
+#[derive(Debug, Eq, PartialEq)]
+pub struct AzureAccessKey(Vec<u8>);
+
+impl AzureAccessKey {
+    /// Create a new [`AzureAccessKey`], checking it for validity
+    pub fn try_new(key: &str) -> Result<Self> {
+        let key = BASE64_STANDARD.decode(key).context(InvalidAccessKeySnafu)?;
+        Ok(Self(key))
+    }
+}
+
 /// An Azure storage credential
 #[derive(Debug, Eq, PartialEq)]
 pub enum AzureCredential {
     /// A shared access key
     ///
     /// <https://learn.microsoft.com/en-us/rest/api/storageservices/authorize-with-shared-key>
-    AccessKey(String),
+    AccessKey(AzureAccessKey),
     /// A shared access signature
     ///
     /// <https://learn.microsoft.com/en-us/rest/api/storageservices/delegate-access-with-shared-access-signature>
@@ -126,19 +140,11 @@ pub mod authority_hosts {
 pub(crate) trait CredentialExt {
     /// Apply authorization to requests against azure storage accounts
     /// <https://docs.microsoft.com/en-us/rest/api/storageservices/authorize-requests-to-azure-storage>
-    fn with_azure_authorization(
-        self,
-        credential: &AzureCredential,
-        account: &str,
-    ) -> Self;
+    fn with_azure_authorization(self, credential: &AzureCredential, account: &str) -> Self;
 }
 
 impl CredentialExt for RequestBuilder {
-    fn with_azure_authorization(
-        mut self,
-        credential: &AzureCredential,
-        account: &str,
-    ) -> Self {
+    fn with_azure_authorization(mut self, credential: &AzureCredential, account: &str) -> Self {
         // rfc2822 string should never contain illegal characters
         let date = Utc::now();
         let date_str = date.format(RFC1123_FMT).to_string();
@@ -158,7 +164,7 @@ impl CredentialExt for RequestBuilder {
                     request.url(),
                     request.method(),
                     account,
-                    key.as_str(),
+                    key,
                 );
 
                 // "signature" is a base 64 encoded string so it should never
@@ -183,10 +189,10 @@ fn generate_authorization(
     u: &Url,
     method: &Method,
     account: &str,
-    key: &str,
+    key: &AzureAccessKey,
 ) -> String {
     let str_to_sign = string_to_sign(h, u, method, account);
-    let auth = hmac_sha256(BASE64_STANDARD.decode(key).unwrap(), str_to_sign);
+    let auth = hmac_sha256(&key.0, str_to_sign);
     format!("SharedKey {}:{}", account, BASE64_STANDARD.encode(auth))
 }
 
@@ -234,11 +240,9 @@ fn string_to_sign(h: &HeaderMap, u: &Url, method: &Method, account: &str) -> Str
 fn canonicalize_header(headers: &HeaderMap) -> String {
     let mut names = headers
         .iter()
-        .filter_map(|(k, _)| {
-            (k.as_str().starts_with("x-ms"))
-                // TODO remove unwraps
-                .then(|| (k.as_str(), headers.get(k).unwrap().to_str().unwrap()))
-        })
+        .filter(|&(k, _)| (k.as_str().starts_with("x-ms")))
+        // TODO remove unwraps
+        .map(|(k, _)| (k.as_str(), headers.get(k).unwrap().to_str().unwrap()))
         .collect::<Vec<_>>();
     names.sort_unstable();
 
@@ -304,13 +308,16 @@ fn lexy_sort<'a>(
     values
 }
 
+/// <https://learn.microsoft.com/en-us/azure/active-directory/develop/v2-oauth2-client-creds-grant-flow#successful-response-1>
 #[derive(Deserialize, Debug)]
-struct TokenResponse {
+struct OAuthTokenResponse {
     access_token: String,
     expires_in: u64,
 }
 
 /// Encapsulates the logic to perform an OAuth token challenge
+///
+/// <https://learn.microsoft.com/en-us/azure/active-directory/develop/v2-oauth2-client-creds-grant-flow#first-case-access-token-request-with-a-shared-secret>
 #[derive(Debug)]
 pub struct ClientSecretOAuthProvider {
     token_url: String,
@@ -326,8 +333,8 @@ impl ClientSecretOAuthProvider {
         tenant_id: impl AsRef<str>,
         authority_host: Option<String>,
     ) -> Self {
-        let authority_host = authority_host
-            .unwrap_or_else(|| authority_hosts::AZURE_PUBLIC_CLOUD.to_owned());
+        let authority_host =
+            authority_host.unwrap_or_else(|| authority_hosts::AZURE_PUBLIC_CLOUD.to_owned());
 
         Self {
             token_url: format!(
@@ -351,7 +358,7 @@ impl TokenProvider for ClientSecretOAuthProvider {
         client: &Client,
         retry: &RetryConfig,
     ) -> crate::Result<TemporaryToken<Arc<AzureCredential>>> {
-        let response: TokenResponse = client
+        let response: OAuthTokenResponse = client
             .request(Method::POST, &self.token_url)
             .header(ACCEPT, HeaderValue::from_static(CONTENT_TYPE_JSON))
             .form(&[
@@ -374,21 +381,27 @@ impl TokenProvider for ClientSecretOAuthProvider {
     }
 }
 
-fn expires_in_string<'de, D>(deserializer: D) -> std::result::Result<u64, D::Error>
+fn expires_on_string<'de, D>(deserializer: D) -> std::result::Result<Instant, D::Error>
 where
     D: serde::de::Deserializer<'de>,
 {
     let v = String::deserialize(deserializer)?;
-    v.parse::<u64>().map_err(serde::de::Error::custom)
+    let v = v.parse::<u64>().map_err(serde::de::Error::custom)?;
+    let now = SystemTime::now()
+        .duration_since(SystemTime::UNIX_EPOCH)
+        .map_err(serde::de::Error::custom)?;
+
+    Ok(Instant::now() + Duration::from_secs(v.saturating_sub(now.as_secs())))
 }
 
-// NOTE: expires_on is a String version of unix epoch time, not an integer.
-// <https://learn.microsoft.com/en-gb/azure/active-directory/managed-identities-azure-resources/how-to-use-vm-token#get-a-token-using-http>
+/// NOTE: expires_on is a String version of unix epoch time, not an integer.
+/// <https://learn.microsoft.com/en-gb/azure/active-directory/managed-identities-azure-resources/how-to-use-vm-token#get-a-token-using-http>
+/// <https://learn.microsoft.com/en-us/azure/app-service/overview-managed-identity?tabs=portal%2Chttp#connect-to-azure-services-in-app-code>
 #[derive(Debug, Clone, Deserialize)]
-struct MsiTokenResponse {
+struct ImdsTokenResponse {
     pub access_token: String,
-    #[serde(deserialize_with = "expires_in_string")]
-    pub expires_in: u64,
+    #[serde(deserialize_with = "expires_on_string")]
+    pub expires_on: Instant,
 }
 
 /// Attempts authentication using a managed identity that has been assigned to the deployment environment.
@@ -411,9 +424,8 @@ impl ImdsManagedIdentityProvider {
         msi_res_id: Option<String>,
         msi_endpoint: Option<String>,
     ) -> Self {
-        let msi_endpoint = msi_endpoint.unwrap_or_else(|| {
-            "http://169.254.169.254/metadata/identity/oauth2/token".to_owned()
-        });
+        let msi_endpoint = msi_endpoint
+            .unwrap_or_else(|| "http://169.254.169.254/metadata/identity/oauth2/token".to_owned());
 
         Self {
             msi_endpoint,
@@ -462,7 +474,7 @@ impl TokenProvider for ImdsManagedIdentityProvider {
             builder = builder.header("x-identity-header", val);
         };
 
-        let response: MsiTokenResponse = builder
+        let response: ImdsTokenResponse = builder
             .send_retry(retry)
             .await
             .context(TokenRequestSnafu)?
@@ -472,12 +484,12 @@ impl TokenProvider for ImdsManagedIdentityProvider {
 
         Ok(TemporaryToken {
             token: Arc::new(AzureCredential::BearerToken(response.access_token)),
-            expiry: Some(Instant::now() + Duration::from_secs(response.expires_in)),
+            expiry: Some(response.expires_on),
         })
     }
 }
 
-/// Credential for using workload identity dfederation
+/// Credential for using workload identity federation
 ///
 /// <https://learn.microsoft.com/en-us/azure/active-directory/develop/workload-identity-federation>
 #[derive(Debug)]
@@ -495,8 +507,8 @@ impl WorkloadIdentityOAuthProvider {
         tenant_id: impl AsRef<str>,
         authority_host: Option<String>,
     ) -> Self {
-        let authority_host = authority_host
-            .unwrap_or_else(|| authority_hosts::AZURE_PUBLIC_CLOUD.to_owned());
+        let authority_host =
+            authority_host.unwrap_or_else(|| authority_hosts::AZURE_PUBLIC_CLOUD.to_owned());
 
         Self {
             token_url: format!(
@@ -524,7 +536,7 @@ impl TokenProvider for WorkloadIdentityOAuthProvider {
             .map_err(|_| Error::FederatedTokenFile)?;
 
         // https://learn.microsoft.com/en-us/azure/active-directory/develop/v2-oauth2-client-creds-grant-flow#third-case-access-token-request-with-a-federated-credential
-        let response: TokenResponse = client
+        let response: OAuthTokenResponse = client
             .request(Method::POST, &self.token_url)
             .header(ACCEPT, HeaderValue::from_static(CONTENT_TYPE_JSON))
             .form(&[
@@ -555,9 +567,7 @@ mod az_cli_date_format {
     use chrono::{DateTime, TimeZone};
     use serde::{self, Deserialize, Deserializer};
 
-    pub fn deserialize<'de, D>(
-        deserializer: D,
-    ) -> Result<DateTime<chrono::Local>, D::Error>
+    pub fn deserialize<'de, D>(deserializer: D) -> Result<DateTime<chrono::Local>, D::Error>
     where
         D: Deserializer<'de>,
     {
@@ -616,14 +626,12 @@ impl AzureCliCredential {
 
         match Command::new(program).args(args).output() {
             Ok(az_output) if az_output.status.success() => {
-                let output =
-                    str::from_utf8(&az_output.stdout).map_err(|_| Error::AzureCli {
-                        message: "az response is not a valid utf-8 string".to_string(),
-                    })?;
-
-                let token_response =
-                    serde_json::from_str::<AzureCliTokenResponse>(output)
-                        .context(AzureCliResponseSnafu)?;
+                let output = str::from_utf8(&az_output.stdout).map_err(|_| Error::AzureCli {
+                    message: "az response is not a valid utf-8 string".to_string(),
+                })?;
+
+                let token_response = serde_json::from_str::<AzureCliTokenResponse>(output)
+                    .context(AzureCliResponseSnafu)?;
                 if !token_response.token_type.eq_ignore_ascii_case("bearer") {
                     return Err(Error::AzureCli {
                         message: format!(
@@ -632,12 +640,10 @@ impl AzureCliCredential {
                         ),
                     });
                 }
-                let duration = token_response.expires_on.naive_local()
-                    - chrono::Local::now().naive_local();
+                let duration =
+                    token_response.expires_on.naive_local() - chrono::Local::now().naive_local();
                 Ok(TemporaryToken {
-                    token: Arc::new(AzureCredential::BearerToken(
-                        token_response.access_token,
-                    )),
+                    token: Arc::new(AzureCredential::BearerToken(token_response.access_token)),
                     expiry: Some(
                         Instant::now()
                             + duration.to_std().map_err(|_| Error::AzureCli {
diff --git a/object_store/src/azure/mod.rs b/object_store/src/azure/mod.rs
index 2a07710d09d6..1d51cbdc02dc 100644
--- a/object_store/src/azure/mod.rs
+++ b/object_store/src/azure/mod.rs
@@ -26,124 +26,36 @@
 //! [ObjectStore::abort_multipart] is a no-op, since Azure Blob Store doesn't provide
 //! a way to drop old blocks. Instead unused blocks are automatically cleaned up
 //! after 7 days.
-use self::client::{BlockId, BlockList};
 use crate::{
     multipart::{PartId, PutPart, WriteMultiPart},
     path::Path,
-    ClientOptions, GetOptions, GetResult, ListResult, MultipartId, ObjectMeta,
-    ObjectStore, Result, RetryConfig,
+    GetOptions, GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore, PutOptions, PutResult,
+    Result,
 };
 use async_trait::async_trait;
-use base64::prelude::BASE64_STANDARD;
-use base64::Engine;
 use bytes::Bytes;
 use futures::stream::BoxStream;
-use percent_encoding::percent_decode_str;
-use serde::{Deserialize, Serialize};
-use snafu::{OptionExt, ResultExt, Snafu};
-use std::fmt::{Debug, Formatter};
-use std::str::FromStr;
+use std::fmt::Debug;
 use std::sync::Arc;
 use tokio::io::AsyncWrite;
-use url::Url;
 
 use crate::client::get::GetClientExt;
 use crate::client::list::ListClientExt;
-use crate::client::{
-    ClientConfigKey, CredentialProvider, StaticCredentialProvider,
-    TokenCredentialProvider,
-};
-use crate::config::ConfigValue;
+use crate::client::CredentialProvider;
 pub use credential::authority_hosts;
 
+mod builder;
 mod client;
 mod credential;
 
 /// [`CredentialProvider`] for [`MicrosoftAzure`]
-pub type AzureCredentialProvider =
-    Arc<dyn CredentialProvider<Credential = AzureCredential>>;
+pub type AzureCredentialProvider = Arc<dyn CredentialProvider<Credential = AzureCredential>>;
+use crate::multipart::MultiPartStore;
+pub use builder::{AzureConfigKey, MicrosoftAzureBuilder};
 pub use credential::AzureCredential;
 
 const STORE: &str = "MicrosoftAzure";
 
-/// The well-known account used by Azurite and the legacy Azure Storage Emulator.
-/// <https://docs.microsoft.com/azure/storage/common/storage-use-azurite#well-known-storage-account-and-key>
-const EMULATOR_ACCOUNT: &str = "devstoreaccount1";
-
-/// The well-known account key used by Azurite and the legacy Azure Storage Emulator.
-/// <https://docs.microsoft.com/azure/storage/common/storage-use-azurite#well-known-storage-account-and-key>
-const EMULATOR_ACCOUNT_KEY: &str =
-    "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==";
-
-const MSI_ENDPOINT_ENV_KEY: &str = "IDENTITY_ENDPOINT";
-
-/// A specialized `Error` for Azure object store-related errors
-#[derive(Debug, Snafu)]
-#[allow(missing_docs)]
-enum Error {
-    #[snafu(display("Received header containing non-ASCII data"))]
-    BadHeader { source: reqwest::header::ToStrError },
-
-    #[snafu(display("Unable parse source url. Url: {}, Error: {}", url, source))]
-    UnableToParseUrl {
-        source: url::ParseError,
-        url: String,
-    },
-
-    #[snafu(display(
-        "Unable parse emulator url {}={}, Error: {}",
-        env_name,
-        env_value,
-        source
-    ))]
-    UnableToParseEmulatorUrl {
-        env_name: String,
-        env_value: String,
-        source: url::ParseError,
-    },
-
-    #[snafu(display("Account must be specified"))]
-    MissingAccount {},
-
-    #[snafu(display("Container name must be specified"))]
-    MissingContainerName {},
-
-    #[snafu(display(
-        "Unknown url scheme cannot be parsed into storage location: {}",
-        scheme
-    ))]
-    UnknownUrlScheme { scheme: String },
-
-    #[snafu(display("URL did not match any known pattern for scheme: {}", url))]
-    UrlNotRecognised { url: String },
-
-    #[snafu(display("Failed parsing an SAS key"))]
-    DecodeSasKey { source: std::str::Utf8Error },
-
-    #[snafu(display("Missing component in SAS query pair"))]
-    MissingSasComponent {},
-
-    #[snafu(display("Configuration key: '{}' is not known.", key))]
-    UnknownConfigurationKey { key: String },
-
-    #[snafu(display("ETag Header missing from response"))]
-    MissingEtag,
-}
-
-impl From<Error> for super::Error {
-    fn from(source: Error) -> Self {
-        match source {
-            Error::UnknownConfigurationKey { key } => {
-                Self::UnknownConfigurationKey { store: STORE, key }
-            }
-            _ => Self::Generic {
-                store: STORE,
-                source: Box::new(source),
-            },
-        }
-    }
-}
-
 /// Interface for [Microsoft Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/).
 #[derive(Debug)]
 pub struct MicrosoftAzure {
@@ -170,11 +82,8 @@ impl std::fmt::Display for MicrosoftAzure {
 
 #[async_trait]
 impl ObjectStore for MicrosoftAzure {
-    async fn put(&self, location: &Path, bytes: Bytes) -> Result<()> {
-        self.client
-            .put_request(location, Some(bytes), false, &())
-            .await?;
-        Ok(())
+    async fn put_opts(&self, location: &Path, bytes: Bytes, opts: PutOptions) -> Result<PutResult> {
+        self.client.put_blob(location, bytes, opts).await
     }
 
     async fn put_multipart(
@@ -188,11 +97,7 @@ impl ObjectStore for MicrosoftAzure {
         Ok((String::new(), Box::new(WriteMultiPart::new(inner, 8))))
     }
 
-    async fn abort_multipart(
-        &self,
-        _location: &Path,
-        _multipart_id: &MultipartId,
-    ) -> Result<()> {
+    async fn abort_multipart(&self, _location: &Path, _multipart_id: &MultipartId) -> Result<()> {
         // There is no way to drop blocks that have been uploaded. Instead, they simply
         // expire in 7 days.
         Ok(())
@@ -202,19 +107,12 @@ impl ObjectStore for MicrosoftAzure {
         self.client.get_opts(location, options).await
     }
 
-    async fn head(&self, location: &Path) -> Result<ObjectMeta> {
-        self.client.head(location).await
-    }
-
     async fn delete(&self, location: &Path) -> Result<()> {
         self.client.delete_request(location, &()).await
     }
 
-    async fn list(
-        &self,
-        prefix: Option<&Path>,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
-        self.client.list(prefix).await
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+        self.client.list(prefix)
     }
 
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
@@ -243,871 +141,52 @@ struct AzureMultiPartUpload {
 
 #[async_trait]
 impl PutPart for AzureMultiPartUpload {
-    async fn put_part(&self, buf: Vec<u8>, part_idx: usize) -> Result<PartId> {
-        let content_id = format!("{part_idx:20}");
-        let block_id: BlockId = content_id.clone().into();
-
-        self.client
-            .put_request(
-                &self.location,
-                Some(buf.into()),
-                true,
-                &[
-                    ("comp", "block"),
-                    ("blockid", &BASE64_STANDARD.encode(block_id)),
-                ],
-            )
-            .await?;
-
-        Ok(PartId { content_id })
+    async fn put_part(&self, buf: Vec<u8>, idx: usize) -> Result<PartId> {
+        self.client.put_block(&self.location, idx, buf.into()).await
     }
 
-    async fn complete(&self, completed_parts: Vec<PartId>) -> Result<()> {
-        let blocks = completed_parts
-            .into_iter()
-            .map(|part| BlockId::from(part.content_id))
-            .collect();
-
-        let block_list = BlockList { blocks };
-        let block_xml = block_list.to_xml();
-
-        self.client
-            .put_request(
-                &self.location,
-                Some(block_xml.into()),
-                true,
-                &[("comp", "blocklist")],
-            )
-            .await?;
-
+    async fn complete(&self, parts: Vec<PartId>) -> Result<()> {
+        self.client.put_block_list(&self.location, parts).await?;
         Ok(())
     }
 }
 
-/// Configure a connection to Microsoft Azure Blob Storage container using
-/// the specified credentials.
-///
-/// # Example
-/// ```
-/// # let ACCOUNT = "foo";
-/// # let BUCKET_NAME = "foo";
-/// # let ACCESS_KEY = "foo";
-/// # use object_store::azure::MicrosoftAzureBuilder;
-/// let azure = MicrosoftAzureBuilder::new()
-///  .with_account(ACCOUNT)
-///  .with_access_key(ACCESS_KEY)
-///  .with_container_name(BUCKET_NAME)
-///  .build();
-/// ```
-#[derive(Default, Clone)]
-pub struct MicrosoftAzureBuilder {
-    /// Account name
-    account_name: Option<String>,
-    /// Access key
-    access_key: Option<String>,
-    /// Container name
-    container_name: Option<String>,
-    /// Bearer token
-    bearer_token: Option<String>,
-    /// Client id
-    client_id: Option<String>,
-    /// Client secret
-    client_secret: Option<String>,
-    /// Tenant id
-    tenant_id: Option<String>,
-    /// Query pairs for shared access signature authorization
-    sas_query_pairs: Option<Vec<(String, String)>>,
-    /// Shared access signature
-    sas_key: Option<String>,
-    /// Authority host
-    authority_host: Option<String>,
-    /// Url
-    url: Option<String>,
-    /// When set to true, azurite storage emulator has to be used
-    use_emulator: ConfigValue<bool>,
-    /// Msi endpoint for acquiring managed identity token
-    msi_endpoint: Option<String>,
-    /// Object id for use with managed identity authentication
-    object_id: Option<String>,
-    /// Msi resource id for use with managed identity authentication
-    msi_resource_id: Option<String>,
-    /// File containing token for Azure AD workload identity federation
-    federated_token_file: Option<String>,
-    /// When set to true, azure cli has to be used for acquiring access token
-    use_azure_cli: ConfigValue<bool>,
-    /// Retry config
-    retry_config: RetryConfig,
-    /// Client options
-    client_options: ClientOptions,
-    /// Credentials
-    credentials: Option<AzureCredentialProvider>,
-    /// When set to true, fabric url scheme will be used
-    ///
-    /// i.e. https://{account_name}.dfs.fabric.microsoft.com
-    use_fabric_endpoint: ConfigValue<bool>,
-}
-
-/// Configuration keys for [`MicrosoftAzureBuilder`]
-///
-/// Configuration via keys can be done via [`MicrosoftAzureBuilder::with_config`]
-///
-/// # Example
-/// ```
-/// # use object_store::azure::{MicrosoftAzureBuilder, AzureConfigKey};
-/// let builder = MicrosoftAzureBuilder::new()
-///     .with_config("azure_client_id".parse().unwrap(), "my-client-id")
-///     .with_config(AzureConfigKey::AuthorityId, "my-tenant-id");
-/// ```
-#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, Deserialize, Serialize)]
-#[non_exhaustive]
-pub enum AzureConfigKey {
-    /// The name of the azure storage account
-    ///
-    /// Supported keys:
-    /// - `azure_storage_account_name`
-    /// - `account_name`
-    AccountName,
-
-    /// Master key for accessing storage account
-    ///
-    /// Supported keys:
-    /// - `azure_storage_account_key`
-    /// - `azure_storage_access_key`
-    /// - `azure_storage_master_key`
-    /// - `access_key`
-    /// - `account_key`
-    /// - `master_key`
-    AccessKey,
-
-    /// Service principal client id for authorizing requests
-    ///
-    /// Supported keys:
-    /// - `azure_storage_client_id`
-    /// - `azure_client_id`
-    /// - `client_id`
-    ClientId,
-
-    /// Service principal client secret for authorizing requests
-    ///
-    /// Supported keys:
-    /// - `azure_storage_client_secret`
-    /// - `azure_client_secret`
-    /// - `client_secret`
-    ClientSecret,
-
-    /// Tenant id used in oauth flows
-    ///
-    /// Supported keys:
-    /// - `azure_storage_tenant_id`
-    /// - `azure_storage_authority_id`
-    /// - `azure_tenant_id`
-    /// - `azure_authority_id`
-    /// - `tenant_id`
-    /// - `authority_id`
-    AuthorityId,
-
-    /// Shared access signature.
-    ///
-    /// The signature is expected to be percent-encoded, much like they are provided
-    /// in the azure storage explorer or azure portal.
-    ///
-    /// Supported keys:
-    /// - `azure_storage_sas_key`
-    /// - `azure_storage_sas_token`
-    /// - `sas_key`
-    /// - `sas_token`
-    SasKey,
-
-    /// Bearer token
-    ///
-    /// Supported keys:
-    /// - `azure_storage_token`
-    /// - `bearer_token`
-    /// - `token`
-    Token,
-
-    /// Use object store with azurite storage emulator
-    ///
-    /// Supported keys:
-    /// - `azure_storage_use_emulator`
-    /// - `object_store_use_emulator`
-    /// - `use_emulator`
-    UseEmulator,
-
-    /// Use object store with url scheme account.dfs.fabric.microsoft.com
-    ///
-    /// Supported keys:
-    /// - `azure_use_fabric_endpoint`
-    /// - `use_fabric_endpoint`
-    UseFabricEndpoint,
-
-    /// Endpoint to request a imds managed identity token
-    ///
-    /// Supported keys:
-    /// - `azure_msi_endpoint`
-    /// - `azure_identity_endpoint`
-    /// - `identity_endpoint`
-    /// - `msi_endpoint`
-    MsiEndpoint,
-
-    /// Object id for use with managed identity authentication
-    ///
-    /// Supported keys:
-    /// - `azure_object_id`
-    /// - `object_id`
-    ObjectId,
-
-    /// Msi resource id for use with managed identity authentication
-    ///
-    /// Supported keys:
-    /// - `azure_msi_resource_id`
-    /// - `msi_resource_id`
-    MsiResourceId,
-
-    /// File containing token for Azure AD workload identity federation
-    ///
-    /// Supported keys:
-    /// - `azure_federated_token_file`
-    /// - `federated_token_file`
-    FederatedTokenFile,
-
-    /// Use azure cli for acquiring access token
-    ///
-    /// Supported keys:
-    /// - `azure_use_azure_cli`
-    /// - `use_azure_cli`
-    UseAzureCli,
-
-    /// Container name
-    ///
-    /// Supported keys:
-    /// - `azure_container_name`
-    /// - `container_name`
-    ContainerName,
-
-    /// Client options
-    Client(ClientConfigKey),
-}
-
-impl AsRef<str> for AzureConfigKey {
-    fn as_ref(&self) -> &str {
-        match self {
-            Self::AccountName => "azure_storage_account_name",
-            Self::AccessKey => "azure_storage_account_key",
-            Self::ClientId => "azure_storage_client_id",
-            Self::ClientSecret => "azure_storage_client_secret",
-            Self::AuthorityId => "azure_storage_tenant_id",
-            Self::SasKey => "azure_storage_sas_key",
-            Self::Token => "azure_storage_token",
-            Self::UseEmulator => "azure_storage_use_emulator",
-            Self::UseFabricEndpoint => "azure_use_fabric_endpoint",
-            Self::MsiEndpoint => "azure_msi_endpoint",
-            Self::ObjectId => "azure_object_id",
-            Self::MsiResourceId => "azure_msi_resource_id",
-            Self::FederatedTokenFile => "azure_federated_token_file",
-            Self::UseAzureCli => "azure_use_azure_cli",
-            Self::ContainerName => "azure_container_name",
-            Self::Client(key) => key.as_ref(),
-        }
-    }
-}
-
-impl FromStr for AzureConfigKey {
-    type Err = super::Error;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s {
-            "azure_storage_account_key"
-            | "azure_storage_access_key"
-            | "azure_storage_master_key"
-            | "master_key"
-            | "account_key"
-            | "access_key" => Ok(Self::AccessKey),
-            "azure_storage_account_name" | "account_name" => Ok(Self::AccountName),
-            "azure_storage_client_id" | "azure_client_id" | "client_id" => {
-                Ok(Self::ClientId)
-            }
-            "azure_storage_client_secret" | "azure_client_secret" | "client_secret" => {
-                Ok(Self::ClientSecret)
-            }
-            "azure_storage_tenant_id"
-            | "azure_storage_authority_id"
-            | "azure_tenant_id"
-            | "azure_authority_id"
-            | "tenant_id"
-            | "authority_id" => Ok(Self::AuthorityId),
-            "azure_storage_sas_key"
-            | "azure_storage_sas_token"
-            | "sas_key"
-            | "sas_token" => Ok(Self::SasKey),
-            "azure_storage_token" | "bearer_token" | "token" => Ok(Self::Token),
-            "azure_storage_use_emulator" | "use_emulator" => Ok(Self::UseEmulator),
-            "azure_msi_endpoint"
-            | "azure_identity_endpoint"
-            | "identity_endpoint"
-            | "msi_endpoint" => Ok(Self::MsiEndpoint),
-            "azure_object_id" | "object_id" => Ok(Self::ObjectId),
-            "azure_msi_resource_id" | "msi_resource_id" => Ok(Self::MsiResourceId),
-            "azure_federated_token_file" | "federated_token_file" => {
-                Ok(Self::FederatedTokenFile)
-            }
-            "azure_use_fabric_endpoint" | "use_fabric_endpoint" => {
-                Ok(Self::UseFabricEndpoint)
-            }
-            "azure_use_azure_cli" | "use_azure_cli" => Ok(Self::UseAzureCli),
-            "azure_container_name" | "container_name" => Ok(Self::ContainerName),
-            // Backwards compatibility
-            "azure_allow_http" => Ok(Self::Client(ClientConfigKey::AllowHttp)),
-            _ => match s.parse() {
-                Ok(key) => Ok(Self::Client(key)),
-                Err(_) => Err(Error::UnknownConfigurationKey { key: s.into() }.into()),
-            },
-        }
-    }
-}
-
-impl Debug for MicrosoftAzureBuilder {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "MicrosoftAzureBuilder {{ account: {:?}, container_name: {:?} }}",
-            self.account_name, self.container_name
-        )
-    }
-}
-
-impl MicrosoftAzureBuilder {
-    /// Create a new [`MicrosoftAzureBuilder`] with default values.
-    pub fn new() -> Self {
-        Default::default()
-    }
-
-    /// Create an instance of [`MicrosoftAzureBuilder`] with values pre-populated from environment variables.
-    ///
-    /// Variables extracted from environment:
-    /// * AZURE_STORAGE_ACCOUNT_NAME: storage account name
-    /// * AZURE_STORAGE_ACCOUNT_KEY: storage account master key
-    /// * AZURE_STORAGE_ACCESS_KEY: alias for AZURE_STORAGE_ACCOUNT_KEY
-    /// * AZURE_STORAGE_CLIENT_ID -> client id for service principal authorization
-    /// * AZURE_STORAGE_CLIENT_SECRET -> client secret for service principal authorization
-    /// * AZURE_STORAGE_TENANT_ID -> tenant id used in oauth flows
-    /// # Example
-    /// ```
-    /// use object_store::azure::MicrosoftAzureBuilder;
-    ///
-    /// let azure = MicrosoftAzureBuilder::from_env()
-    ///     .with_container_name("foo")
-    ///     .build();
-    /// ```
-    pub fn from_env() -> Self {
-        let mut builder = Self::default();
-        for (os_key, os_value) in std::env::vars_os() {
-            if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) {
-                if key.starts_with("AZURE_") {
-                    if let Ok(config_key) = key.to_ascii_lowercase().parse() {
-                        builder = builder.with_config(config_key, value);
-                    }
-                }
-            }
-        }
-
-        if let Ok(text) = std::env::var(MSI_ENDPOINT_ENV_KEY) {
-            builder = builder.with_msi_endpoint(text);
-        }
-
-        builder
-    }
-
-    /// Parse available connection info form a well-known storage URL.
-    ///
-    /// The supported url schemes are:
-    ///
-    /// - `abfs[s]://<container>/<path>` (according to [fsspec](https://github.com/fsspec/adlfs))
-    /// - `abfs[s]://<file_system>@<account_name>.dfs.core.windows.net/<path>`
-    /// - `abfs[s]://<file_system>@<account_name>.dfs.fabric.microsoft.com/<path>`
-    /// - `az://<container>/<path>` (according to [fsspec](https://github.com/fsspec/adlfs))
-    /// - `adl://<container>/<path>` (according to [fsspec](https://github.com/fsspec/adlfs))
-    /// - `azure://<container>/<path>` (custom)
-    /// - `https://<account>.dfs.core.windows.net`
-    /// - `https://<account>.blob.core.windows.net`
-    /// - `https://<account>.dfs.fabric.microsoft.com`
-    /// - `https://<account>.dfs.fabric.microsoft.com/<container>`
-    /// - `https://<account>.blob.fabric.microsoft.com`
-    /// - `https://<account>.blob.fabric.microsoft.com/<container>`
-    ///
-    /// Note: Settings derived from the URL will override any others set on this builder
-    ///
-    /// # Example
-    /// ```
-    /// use object_store::azure::MicrosoftAzureBuilder;
-    ///
-    /// let azure = MicrosoftAzureBuilder::from_env()
-    ///     .with_url("abfss://file_system@account.dfs.core.windows.net/")
-    ///     .build();
-    /// ```
-    pub fn with_url(mut self, url: impl Into<String>) -> Self {
-        self.url = Some(url.into());
-        self
-    }
-
-    /// Set an option on the builder via a key - value pair.
-    pub fn with_config(mut self, key: AzureConfigKey, value: impl Into<String>) -> Self {
-        match key {
-            AzureConfigKey::AccessKey => self.access_key = Some(value.into()),
-            AzureConfigKey::AccountName => self.account_name = Some(value.into()),
-            AzureConfigKey::ClientId => self.client_id = Some(value.into()),
-            AzureConfigKey::ClientSecret => self.client_secret = Some(value.into()),
-            AzureConfigKey::AuthorityId => self.tenant_id = Some(value.into()),
-            AzureConfigKey::SasKey => self.sas_key = Some(value.into()),
-            AzureConfigKey::Token => self.bearer_token = Some(value.into()),
-            AzureConfigKey::MsiEndpoint => self.msi_endpoint = Some(value.into()),
-            AzureConfigKey::ObjectId => self.object_id = Some(value.into()),
-            AzureConfigKey::MsiResourceId => self.msi_resource_id = Some(value.into()),
-            AzureConfigKey::FederatedTokenFile => {
-                self.federated_token_file = Some(value.into())
-            }
-            AzureConfigKey::UseAzureCli => self.use_azure_cli.parse(value),
-            AzureConfigKey::UseEmulator => self.use_emulator.parse(value),
-            AzureConfigKey::UseFabricEndpoint => self.use_fabric_endpoint.parse(value),
-            AzureConfigKey::Client(key) => {
-                self.client_options = self.client_options.with_config(key, value)
-            }
-            AzureConfigKey::ContainerName => self.container_name = Some(value.into()),
-        };
-        self
-    }
-
-    /// Set an option on the builder via a key - value pair.
-    #[deprecated(note = "Use with_config")]
-    pub fn try_with_option(
-        self,
-        key: impl AsRef<str>,
-        value: impl Into<String>,
-    ) -> Result<Self> {
-        Ok(self.with_config(key.as_ref().parse()?, value))
+#[async_trait]
+impl MultiPartStore for MicrosoftAzure {
+    async fn create_multipart(&self, _: &Path) -> Result<MultipartId> {
+        Ok(String::new())
     }
 
-    /// Hydrate builder from key value pairs
-    #[deprecated(note = "Use with_config")]
-    #[allow(deprecated)]
-    pub fn try_with_options<
-        I: IntoIterator<Item = (impl AsRef<str>, impl Into<String>)>,
-    >(
-        mut self,
-        options: I,
-    ) -> Result<Self> {
-        for (key, value) in options {
-            self = self.try_with_option(key, value)?;
-        }
-        Ok(self)
+    async fn put_part(
+        &self,
+        path: &Path,
+        _: &MultipartId,
+        part_idx: usize,
+        data: Bytes,
+    ) -> Result<PartId> {
+        self.client.put_block(path, part_idx, data).await
     }
 
-    /// Get config value via a [`AzureConfigKey`].
-    ///
-    /// # Example
-    /// ```
-    /// use object_store::azure::{MicrosoftAzureBuilder, AzureConfigKey};
-    ///
-    /// let builder = MicrosoftAzureBuilder::from_env()
-    ///     .with_account("foo");
-    /// let account_name = builder.get_config_value(&AzureConfigKey::AccountName).unwrap_or_default();
-    /// assert_eq!("foo", &account_name);
-    /// ```
-    pub fn get_config_value(&self, key: &AzureConfigKey) -> Option<String> {
-        match key {
-            AzureConfigKey::AccountName => self.account_name.clone(),
-            AzureConfigKey::AccessKey => self.access_key.clone(),
-            AzureConfigKey::ClientId => self.client_id.clone(),
-            AzureConfigKey::ClientSecret => self.client_secret.clone(),
-            AzureConfigKey::AuthorityId => self.tenant_id.clone(),
-            AzureConfigKey::SasKey => self.sas_key.clone(),
-            AzureConfigKey::Token => self.bearer_token.clone(),
-            AzureConfigKey::UseEmulator => Some(self.use_emulator.to_string()),
-            AzureConfigKey::UseFabricEndpoint => {
-                Some(self.use_fabric_endpoint.to_string())
-            }
-            AzureConfigKey::MsiEndpoint => self.msi_endpoint.clone(),
-            AzureConfigKey::ObjectId => self.object_id.clone(),
-            AzureConfigKey::MsiResourceId => self.msi_resource_id.clone(),
-            AzureConfigKey::FederatedTokenFile => self.federated_token_file.clone(),
-            AzureConfigKey::UseAzureCli => Some(self.use_azure_cli.to_string()),
-            AzureConfigKey::Client(key) => self.client_options.get_config_value(key),
-            AzureConfigKey::ContainerName => self.container_name.clone(),
-        }
+    async fn complete_multipart(
+        &self,
+        path: &Path,
+        _: &MultipartId,
+        parts: Vec<PartId>,
+    ) -> Result<PutResult> {
+        self.client.put_block_list(path, parts).await
     }
 
-    /// Sets properties on this builder based on a URL
-    ///
-    /// This is a separate member function to allow fallible computation to
-    /// be deferred until [`Self::build`] which in turn allows deriving [`Clone`]
-    fn parse_url(&mut self, url: &str) -> Result<()> {
-        let parsed = Url::parse(url).context(UnableToParseUrlSnafu { url })?;
-        let host = parsed.host_str().context(UrlNotRecognisedSnafu { url })?;
-
-        let validate = |s: &str| match s.contains('.') {
-            true => Err(UrlNotRecognisedSnafu { url }.build()),
-            false => Ok(s.to_string()),
-        };
-
-        match parsed.scheme() {
-            "az" | "adl" | "azure" => self.container_name = Some(validate(host)?),
-            "abfs" | "abfss" => {
-                // abfs(s) might refer to the fsspec convention abfs://<container>/<path>
-                // or the convention for the hadoop driver abfs[s]://<file_system>@<account_name>.dfs.core.windows.net/<path>
-                if parsed.username().is_empty() {
-                    self.container_name = Some(validate(host)?);
-                } else if let Some(a) = host.strip_suffix(".dfs.core.windows.net") {
-                    self.container_name = Some(validate(parsed.username())?);
-                    self.account_name = Some(validate(a)?);
-                } else if let Some(a) = host.strip_suffix(".dfs.fabric.microsoft.com") {
-                    self.container_name = Some(validate(parsed.username())?);
-                    self.account_name = Some(validate(a)?);
-                    self.use_fabric_endpoint = true.into();
-                } else {
-                    return Err(UrlNotRecognisedSnafu { url }.build().into());
-                }
-            }
-            "https" => match host.split_once('.') {
-                Some((a, "dfs.core.windows.net"))
-                | Some((a, "blob.core.windows.net")) => {
-                    self.account_name = Some(validate(a)?);
-                }
-                Some((a, "dfs.fabric.microsoft.com"))
-                | Some((a, "blob.fabric.microsoft.com")) => {
-                    self.account_name = Some(validate(a)?);
-                    // Attempt to infer the container name from the URL
-                    // - https://onelake.dfs.fabric.microsoft.com/<workspaceGUID>/<itemGUID>/Files/test.csv
-                    // - https://onelake.dfs.fabric.microsoft.com/<workspace>/<item>.<itemtype>/<path>/<fileName>
-                    //
-                    // See <https://learn.microsoft.com/en-us/fabric/onelake/onelake-access-api>
-                    if let Some(workspace) = parsed.path_segments().unwrap().next() {
-                        if !workspace.is_empty() {
-                            self.container_name = Some(workspace.to_string())
-                        }
-                    }
-                    self.use_fabric_endpoint = true.into();
-                }
-                _ => return Err(UrlNotRecognisedSnafu { url }.build().into()),
-            },
-            scheme => return Err(UnknownUrlSchemeSnafu { scheme }.build().into()),
-        }
+    async fn abort_multipart(&self, _: &Path, _: &MultipartId) -> Result<()> {
+        // There is no way to drop blocks that have been uploaded. Instead, they simply
+        // expire in 7 days.
         Ok(())
     }
-
-    /// Set the Azure Account (required)
-    pub fn with_account(mut self, account: impl Into<String>) -> Self {
-        self.account_name = Some(account.into());
-        self
-    }
-
-    /// Set the Azure Container Name (required)
-    pub fn with_container_name(mut self, container_name: impl Into<String>) -> Self {
-        self.container_name = Some(container_name.into());
-        self
-    }
-
-    /// Set the Azure Access Key (required - one of access key, bearer token, or client credentials)
-    pub fn with_access_key(mut self, access_key: impl Into<String>) -> Self {
-        self.access_key = Some(access_key.into());
-        self
-    }
-
-    /// Set a static bearer token to be used for authorizing requests
-    pub fn with_bearer_token_authorization(
-        mut self,
-        bearer_token: impl Into<String>,
-    ) -> Self {
-        self.bearer_token = Some(bearer_token.into());
-        self
-    }
-
-    /// Set a client secret used for client secret authorization
-    pub fn with_client_secret_authorization(
-        mut self,
-        client_id: impl Into<String>,
-        client_secret: impl Into<String>,
-        tenant_id: impl Into<String>,
-    ) -> Self {
-        self.client_id = Some(client_id.into());
-        self.client_secret = Some(client_secret.into());
-        self.tenant_id = Some(tenant_id.into());
-        self
-    }
-
-    /// Sets the client id for use in client secret or k8s federated credential flow
-    pub fn with_client_id(mut self, client_id: impl Into<String>) -> Self {
-        self.client_id = Some(client_id.into());
-        self
-    }
-
-    /// Sets the client secret for use in client secret flow
-    pub fn with_client_secret(mut self, client_secret: impl Into<String>) -> Self {
-        self.client_secret = Some(client_secret.into());
-        self
-    }
-
-    /// Sets the tenant id for use in client secret or k8s federated credential flow
-    pub fn with_tenant_id(mut self, tenant_id: impl Into<String>) -> Self {
-        self.tenant_id = Some(tenant_id.into());
-        self
-    }
-
-    /// Set query pairs appended to the url for shared access signature authorization
-    pub fn with_sas_authorization(
-        mut self,
-        query_pairs: impl Into<Vec<(String, String)>>,
-    ) -> Self {
-        self.sas_query_pairs = Some(query_pairs.into());
-        self
-    }
-
-    /// Set the credential provider overriding any other options
-    pub fn with_credentials(mut self, credentials: AzureCredentialProvider) -> Self {
-        self.credentials = Some(credentials);
-        self
-    }
-
-    /// Set if the Azure emulator should be used (defaults to false)
-    pub fn with_use_emulator(mut self, use_emulator: bool) -> Self {
-        self.use_emulator = use_emulator.into();
-        self
-    }
-
-    /// Set if Microsoft Fabric url scheme should be used (defaults to false)
-    /// When disabled the url scheme used is `https://{account}.blob.core.windows.net`
-    /// When enabled the url scheme used is `https://{account}.dfs.fabric.microsoft.com`
-    pub fn with_use_fabric_endpoint(mut self, use_fabric_endpoint: bool) -> Self {
-        self.use_fabric_endpoint = use_fabric_endpoint.into();
-        self
-    }
-
-    /// Sets what protocol is allowed. If `allow_http` is :
-    /// * false (default):  Only HTTPS are allowed
-    /// * true:  HTTP and HTTPS are allowed
-    pub fn with_allow_http(mut self, allow_http: bool) -> Self {
-        self.client_options = self.client_options.with_allow_http(allow_http);
-        self
-    }
-
-    /// Sets an alternative authority host for OAuth based authorization
-    /// common hosts for azure clouds are defined in [authority_hosts].
-    /// Defaults to <https://login.microsoftonline.com>
-    pub fn with_authority_host(mut self, authority_host: impl Into<String>) -> Self {
-        self.authority_host = Some(authority_host.into());
-        self
-    }
-
-    /// Set the retry configuration
-    pub fn with_retry(mut self, retry_config: RetryConfig) -> Self {
-        self.retry_config = retry_config;
-        self
-    }
-
-    /// Set the proxy_url to be used by the underlying client
-    pub fn with_proxy_url(mut self, proxy_url: impl Into<String>) -> Self {
-        self.client_options = self.client_options.with_proxy_url(proxy_url);
-        self
-    }
-
-    /// Set a trusted proxy CA certificate
-    pub fn with_proxy_ca_certificate(
-        mut self,
-        proxy_ca_certificate: impl Into<String>,
-    ) -> Self {
-        self.client_options = self
-            .client_options
-            .with_proxy_ca_certificate(proxy_ca_certificate);
-        self
-    }
-
-    /// Set a list of hosts to exclude from proxy connections
-    pub fn with_proxy_excludes(mut self, proxy_excludes: impl Into<String>) -> Self {
-        self.client_options = self.client_options.with_proxy_excludes(proxy_excludes);
-        self
-    }
-
-    /// Sets the client options, overriding any already set
-    pub fn with_client_options(mut self, options: ClientOptions) -> Self {
-        self.client_options = options;
-        self
-    }
-
-    /// Sets the endpoint for acquiring managed identity token
-    pub fn with_msi_endpoint(mut self, msi_endpoint: impl Into<String>) -> Self {
-        self.msi_endpoint = Some(msi_endpoint.into());
-        self
-    }
-
-    /// Sets a file path for acquiring azure federated identity token in k8s
-    ///
-    /// requires `client_id` and `tenant_id` to be set
-    pub fn with_federated_token_file(
-        mut self,
-        federated_token_file: impl Into<String>,
-    ) -> Self {
-        self.federated_token_file = Some(federated_token_file.into());
-        self
-    }
-
-    /// Set if the Azure Cli should be used for acquiring access token
-    /// <https://learn.microsoft.com/en-us/cli/azure/account?view=azure-cli-latest#az-account-get-access-token>
-    pub fn with_use_azure_cli(mut self, use_azure_cli: bool) -> Self {
-        self.use_azure_cli = use_azure_cli.into();
-        self
-    }
-
-    /// Configure a connection to container with given name on Microsoft Azure
-    /// Blob store.
-    pub fn build(mut self) -> Result<MicrosoftAzure> {
-        if let Some(url) = self.url.take() {
-            self.parse_url(&url)?;
-        }
-
-        let container = self.container_name.ok_or(Error::MissingContainerName {})?;
-
-        let static_creds = |credential: AzureCredential| -> AzureCredentialProvider {
-            Arc::new(StaticCredentialProvider::new(credential))
-        };
-
-        let (is_emulator, storage_url, auth, account) = if self.use_emulator.get()? {
-            let account_name = self
-                .account_name
-                .unwrap_or_else(|| EMULATOR_ACCOUNT.to_string());
-            // Allow overriding defaults. Values taken from
-            // from https://docs.rs/azure_storage/0.2.0/src/azure_storage/core/clients/storage_account_client.rs.html#129-141
-            let url = url_from_env("AZURITE_BLOB_STORAGE_URL", "http://127.0.0.1:10000")?;
-            let account_key = self
-                .access_key
-                .unwrap_or_else(|| EMULATOR_ACCOUNT_KEY.to_string());
-
-            let credential = static_creds(AzureCredential::AccessKey(account_key));
-
-            self.client_options = self.client_options.with_allow_http(true);
-            (true, url, credential, account_name)
-        } else {
-            let account_name = self.account_name.ok_or(Error::MissingAccount {})?;
-            let account_url = match self.use_fabric_endpoint.get()? {
-                true => format!("https://{}.blob.fabric.microsoft.com", &account_name),
-                false => format!("https://{}.blob.core.windows.net", &account_name),
-            };
-
-            let url = Url::parse(&account_url)
-                .context(UnableToParseUrlSnafu { url: account_url })?;
-
-            let credential = if let Some(credential) = self.credentials {
-                credential
-            } else if let Some(bearer_token) = self.bearer_token {
-                static_creds(AzureCredential::BearerToken(bearer_token))
-            } else if let Some(access_key) = self.access_key {
-                static_creds(AzureCredential::AccessKey(access_key))
-            } else if let (Some(client_id), Some(tenant_id), Some(federated_token_file)) =
-                (&self.client_id, &self.tenant_id, self.federated_token_file)
-            {
-                let client_credential = credential::WorkloadIdentityOAuthProvider::new(
-                    client_id,
-                    federated_token_file,
-                    tenant_id,
-                    self.authority_host,
-                );
-                Arc::new(TokenCredentialProvider::new(
-                    client_credential,
-                    self.client_options.client()?,
-                    self.retry_config.clone(),
-                )) as _
-            } else if let (Some(client_id), Some(client_secret), Some(tenant_id)) =
-                (&self.client_id, self.client_secret, &self.tenant_id)
-            {
-                let client_credential = credential::ClientSecretOAuthProvider::new(
-                    client_id.clone(),
-                    client_secret,
-                    tenant_id,
-                    self.authority_host,
-                );
-                Arc::new(TokenCredentialProvider::new(
-                    client_credential,
-                    self.client_options.client()?,
-                    self.retry_config.clone(),
-                )) as _
-            } else if let Some(query_pairs) = self.sas_query_pairs {
-                static_creds(AzureCredential::SASToken(query_pairs))
-            } else if let Some(sas) = self.sas_key {
-                static_creds(AzureCredential::SASToken(split_sas(&sas)?))
-            } else if self.use_azure_cli.get()? {
-                Arc::new(credential::AzureCliCredential::new()) as _
-            } else {
-                let msi_credential = credential::ImdsManagedIdentityProvider::new(
-                    self.client_id,
-                    self.object_id,
-                    self.msi_resource_id,
-                    self.msi_endpoint,
-                );
-                Arc::new(TokenCredentialProvider::new(
-                    msi_credential,
-                    self.client_options.clone().with_allow_http(true).client()?,
-                    self.retry_config.clone(),
-                )) as _
-            };
-            (false, url, credential, account_name)
-        };
-
-        let config = client::AzureConfig {
-            account,
-            is_emulator,
-            container,
-            retry_config: self.retry_config,
-            client_options: self.client_options,
-            service: storage_url,
-            credentials: auth,
-        };
-
-        let client = Arc::new(client::AzureClient::new(config)?);
-
-        Ok(MicrosoftAzure { client })
-    }
-}
-
-/// Parses the contents of the environment variable `env_name` as a URL
-/// if present, otherwise falls back to default_url
-fn url_from_env(env_name: &str, default_url: &str) -> Result<Url> {
-    let url = match std::env::var(env_name) {
-        Ok(env_value) => {
-            Url::parse(&env_value).context(UnableToParseEmulatorUrlSnafu {
-                env_name,
-                env_value,
-            })?
-        }
-        Err(_) => Url::parse(default_url).expect("Failed to parse default URL"),
-    };
-    Ok(url)
-}
-
-fn split_sas(sas: &str) -> Result<Vec<(String, String)>, Error> {
-    let sas = percent_decode_str(sas)
-        .decode_utf8()
-        .context(DecodeSasKeySnafu {})?;
-    let kv_str_pairs = sas
-        .trim_start_matches('?')
-        .split('&')
-        .filter(|s| !s.chars().all(char::is_whitespace));
-    let mut pairs = Vec::new();
-    for kv_pair_str in kv_str_pairs {
-        let (k, v) = kv_pair_str
-            .trim()
-            .split_once('=')
-            .ok_or(Error::MissingSasComponent {})?;
-        pairs.push((k.into(), v.into()))
-    }
-    Ok(pairs)
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::tests::{
-        copy_if_not_exists, get_opts, list_uses_directories_correctly,
-        list_with_delimiter, put_get_delete_list_opts, rename_and_copy, stream_get,
-    };
-    use std::collections::HashMap;
+    use crate::tests::*;
 
     #[tokio::test]
     async fn azure_blob_test() {
@@ -1121,118 +200,15 @@ mod tests {
         rename_and_copy(&integration).await;
         copy_if_not_exists(&integration).await;
         stream_get(&integration).await;
-    }
-
-    #[test]
-    fn azure_blob_test_urls() {
-        let mut builder = MicrosoftAzureBuilder::new();
-        builder
-            .parse_url("abfss://file_system@account.dfs.core.windows.net/")
-            .unwrap();
-        assert_eq!(builder.account_name, Some("account".to_string()));
-        assert_eq!(builder.container_name, Some("file_system".to_string()));
-        assert!(!builder.use_fabric_endpoint.get().unwrap());
-
-        let mut builder = MicrosoftAzureBuilder::new();
-        builder
-            .parse_url("abfss://file_system@account.dfs.fabric.microsoft.com/")
-            .unwrap();
-        assert_eq!(builder.account_name, Some("account".to_string()));
-        assert_eq!(builder.container_name, Some("file_system".to_string()));
-        assert!(builder.use_fabric_endpoint.get().unwrap());
-
-        let mut builder = MicrosoftAzureBuilder::new();
-        builder.parse_url("abfs://container/path").unwrap();
-        assert_eq!(builder.container_name, Some("container".to_string()));
-
-        let mut builder = MicrosoftAzureBuilder::new();
-        builder.parse_url("az://container").unwrap();
-        assert_eq!(builder.container_name, Some("container".to_string()));
-
-        let mut builder = MicrosoftAzureBuilder::new();
-        builder.parse_url("az://container/path").unwrap();
-        assert_eq!(builder.container_name, Some("container".to_string()));
-
-        let mut builder = MicrosoftAzureBuilder::new();
-        builder
-            .parse_url("https://account.dfs.core.windows.net/")
-            .unwrap();
-        assert_eq!(builder.account_name, Some("account".to_string()));
-        assert!(!builder.use_fabric_endpoint.get().unwrap());
-
-        let mut builder = MicrosoftAzureBuilder::new();
-        builder
-            .parse_url("https://account.blob.core.windows.net/")
-            .unwrap();
-        assert_eq!(builder.account_name, Some("account".to_string()));
-        assert!(!builder.use_fabric_endpoint.get().unwrap());
-
-        let mut builder = MicrosoftAzureBuilder::new();
-        builder
-            .parse_url("https://account.dfs.fabric.microsoft.com/")
-            .unwrap();
-        assert_eq!(builder.account_name, Some("account".to_string()));
-        assert_eq!(builder.container_name, None);
-        assert!(builder.use_fabric_endpoint.get().unwrap());
+        put_opts(&integration, true).await;
+        multipart(&integration, &integration).await;
 
-        let mut builder = MicrosoftAzureBuilder::new();
-        builder
-            .parse_url("https://account.dfs.fabric.microsoft.com/container")
-            .unwrap();
-        assert_eq!(builder.account_name, Some("account".to_string()));
-        assert_eq!(builder.container_name.as_deref(), Some("container"));
-        assert!(builder.use_fabric_endpoint.get().unwrap());
-
-        let mut builder = MicrosoftAzureBuilder::new();
-        builder
-            .parse_url("https://account.blob.fabric.microsoft.com/")
-            .unwrap();
-        assert_eq!(builder.account_name, Some("account".to_string()));
-        assert_eq!(builder.container_name, None);
-        assert!(builder.use_fabric_endpoint.get().unwrap());
-
-        let mut builder = MicrosoftAzureBuilder::new();
-        builder
-            .parse_url("https://account.blob.fabric.microsoft.com/container")
-            .unwrap();
-        assert_eq!(builder.account_name, Some("account".to_string()));
-        assert_eq!(builder.container_name.as_deref(), Some("container"));
-        assert!(builder.use_fabric_endpoint.get().unwrap());
-
-        let err_cases = [
-            "mailto://account.blob.core.windows.net/",
-            "az://blob.mydomain/",
-            "abfs://container.foo/path",
-            "abfss://file_system@account.foo.dfs.core.windows.net/",
-            "abfss://file_system.bar@account.dfs.core.windows.net/",
-            "https://blob.mydomain/",
-            "https://blob.foo.dfs.core.windows.net/",
-        ];
-        let mut builder = MicrosoftAzureBuilder::new();
-        for case in err_cases {
-            builder.parse_url(case).unwrap_err();
-        }
-    }
-
-    #[test]
-    fn azure_test_config_from_map() {
-        let azure_client_id = "object_store:fake_access_key_id";
-        let azure_storage_account_name = "object_store:fake_secret_key";
-        let azure_storage_token = "object_store:fake_default_region";
-        let options = HashMap::from([
-            ("azure_client_id", azure_client_id),
-            ("azure_storage_account_name", azure_storage_account_name),
-            ("azure_storage_token", azure_storage_token),
-        ]);
-
-        let builder = options
-            .into_iter()
-            .fold(MicrosoftAzureBuilder::new(), |builder, (key, value)| {
-                builder.with_config(key.parse().unwrap(), value)
-            });
-        assert_eq!(builder.client_id.unwrap(), azure_client_id);
-        assert_eq!(builder.account_name.unwrap(), azure_storage_account_name);
-        assert_eq!(builder.bearer_token.unwrap(), azure_storage_token);
+        let validate = !integration.client.config().disable_tagging;
+        tagging(&integration, validate, |p| {
+            let client = Arc::clone(&integration.client);
+            async move { client.get_blob_tagging(&p).await }
+        })
+        .await
     }
 
     #[test]
@@ -1260,22 +236,4 @@ mod tests {
             azure_storage_token
         );
     }
-
-    #[test]
-    fn azure_test_split_sas() {
-        let raw_sas = "?sv=2021-10-04&st=2023-01-04T17%3A48%3A57Z&se=2023-01-04T18%3A15%3A00Z&sr=c&sp=rcwl&sig=C7%2BZeEOWbrxPA3R0Cw%2Fw1EZz0%2B4KBvQexeKZKe%2BB6h0%3D";
-        let expected = vec![
-            ("sv".to_string(), "2021-10-04".to_string()),
-            ("st".to_string(), "2023-01-04T17:48:57Z".to_string()),
-            ("se".to_string(), "2023-01-04T18:15:00Z".to_string()),
-            ("sr".to_string(), "c".to_string()),
-            ("sp".to_string(), "rcwl".to_string()),
-            (
-                "sig".to_string(),
-                "C7+ZeEOWbrxPA3R0Cw/w1EZz0+4KBvQexeKZKe+B6h0=".to_string(),
-            ),
-        ];
-        let pairs = split_sas(raw_sas).unwrap();
-        assert_eq!(expected, pairs);
-    }
 }
diff --git a/object_store/src/buffered.rs b/object_store/src/buffered.rs
new file mode 100644
index 000000000000..3a1354f4f20a
--- /dev/null
+++ b/object_store/src/buffered.rs
@@ -0,0 +1,303 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Utilities for performing tokio-style buffered IO
+
+use crate::path::Path;
+use crate::{ObjectMeta, ObjectStore};
+use bytes::Bytes;
+use futures::future::{BoxFuture, FutureExt};
+use futures::ready;
+use std::cmp::Ordering;
+use std::io::{Error, ErrorKind, SeekFrom};
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+use tokio::io::{AsyncBufRead, AsyncRead, AsyncSeek, ReadBuf};
+
+/// The default buffer size used by [`BufReader`]
+pub const DEFAULT_BUFFER_SIZE: usize = 1024 * 1024;
+
+/// An async-buffered reader compatible with the tokio IO traits
+///
+/// Internally this maintains a buffer of the requested size, and uses [`ObjectStore::get_range`]
+/// to populate its internal buffer once depleted. This buffer is cleared on seek.
+///
+/// Whilst simple, this interface will typically be outperformed by the native [`ObjectStore`]
+/// methods that better map to the network APIs. This is because most object stores have
+/// very [high first-byte latencies], on the order of 100-200ms, and so avoiding unnecessary
+/// round-trips is critical to throughput.
+///
+/// Systems looking to sequentially scan a file should instead consider using [`ObjectStore::get`],
+/// or [`ObjectStore::get_opts`], or [`ObjectStore::get_range`] to read a particular range.
+///
+/// Systems looking to read multiple ranges of a file should instead consider using
+/// [`ObjectStore::get_ranges`], which will optimise the vectored IO.
+///
+/// [high first-byte latencies]: https://docs.aws.amazon.com/AmazonS3/latest/userguide/optimizing-performance.html
+pub struct BufReader {
+    /// The object store to fetch data from
+    store: Arc<dyn ObjectStore>,
+    /// The size of the object
+    size: u64,
+    /// The path to the object
+    path: Path,
+    /// The current position in the object
+    cursor: u64,
+    /// The number of bytes to read in a single request
+    capacity: usize,
+    /// The buffered data if any
+    buffer: Buffer,
+}
+
+impl std::fmt::Debug for BufReader {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("BufReader")
+            .field("path", &self.path)
+            .field("size", &self.size)
+            .field("capacity", &self.capacity)
+            .finish()
+    }
+}
+
+enum Buffer {
+    Empty,
+    Pending(BoxFuture<'static, std::io::Result<Bytes>>),
+    Ready(Bytes),
+}
+
+impl BufReader {
+    /// Create a new [`BufReader`] from the provided [`ObjectMeta`] and [`ObjectStore`]
+    pub fn new(store: Arc<dyn ObjectStore>, meta: &ObjectMeta) -> Self {
+        Self::with_capacity(store, meta, DEFAULT_BUFFER_SIZE)
+    }
+
+    /// Create a new [`BufReader`] from the provided [`ObjectMeta`], [`ObjectStore`], and `capacity`
+    pub fn with_capacity(store: Arc<dyn ObjectStore>, meta: &ObjectMeta, capacity: usize) -> Self {
+        Self {
+            path: meta.location.clone(),
+            size: meta.size as _,
+            store,
+            capacity,
+            cursor: 0,
+            buffer: Buffer::Empty,
+        }
+    }
+
+    fn poll_fill_buf_impl(
+        &mut self,
+        cx: &mut Context<'_>,
+        amnt: usize,
+    ) -> Poll<std::io::Result<&[u8]>> {
+        let buf = &mut self.buffer;
+        loop {
+            match buf {
+                Buffer::Empty => {
+                    let store = Arc::clone(&self.store);
+                    let path = self.path.clone();
+                    let start = self.cursor.min(self.size) as _;
+                    let end = self.cursor.saturating_add(amnt as u64).min(self.size) as _;
+
+                    if start == end {
+                        return Poll::Ready(Ok(&[]));
+                    }
+
+                    *buf = Buffer::Pending(Box::pin(async move {
+                        Ok(store.get_range(&path, start..end).await?)
+                    }))
+                }
+                Buffer::Pending(fut) => match ready!(fut.poll_unpin(cx)) {
+                    Ok(b) => *buf = Buffer::Ready(b),
+                    Err(e) => return Poll::Ready(Err(e)),
+                },
+                Buffer::Ready(r) => return Poll::Ready(Ok(r)),
+            }
+        }
+    }
+}
+
+impl AsyncSeek for BufReader {
+    fn start_seek(mut self: Pin<&mut Self>, position: SeekFrom) -> std::io::Result<()> {
+        self.cursor = match position {
+            SeekFrom::Start(offset) => offset,
+            SeekFrom::End(offset) => checked_add_signed(self.size, offset).ok_or_else(|| {
+                Error::new(
+                    ErrorKind::InvalidInput,
+                    format!(
+                        "Seeking {offset} from end of {} byte file would result in overflow",
+                        self.size
+                    ),
+                )
+            })?,
+            SeekFrom::Current(offset) => {
+                checked_add_signed(self.cursor, offset).ok_or_else(|| {
+                    Error::new(
+                        ErrorKind::InvalidInput,
+                        format!(
+                            "Seeking {offset} from current offset of {} would result in overflow",
+                            self.cursor
+                        ),
+                    )
+                })?
+            }
+        };
+        self.buffer = Buffer::Empty;
+        Ok(())
+    }
+
+    fn poll_complete(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<std::io::Result<u64>> {
+        Poll::Ready(Ok(self.cursor))
+    }
+}
+
+impl AsyncRead for BufReader {
+    fn poll_read(
+        mut self: Pin<&mut Self>,
+        cx: &mut Context<'_>,
+        out: &mut ReadBuf<'_>,
+    ) -> Poll<std::io::Result<()>> {
+        // Read the maximum of the internal buffer and `out`
+        let to_read = out.remaining().max(self.capacity);
+        let r = match ready!(self.poll_fill_buf_impl(cx, to_read)) {
+            Ok(buf) => {
+                let to_consume = out.remaining().min(buf.len());
+                out.put_slice(&buf[..to_consume]);
+                self.consume(to_consume);
+                Ok(())
+            }
+            Err(e) => Err(e),
+        };
+        Poll::Ready(r)
+    }
+}
+
+impl AsyncBufRead for BufReader {
+    fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<&[u8]>> {
+        let capacity = self.capacity;
+        self.get_mut().poll_fill_buf_impl(cx, capacity)
+    }
+
+    fn consume(mut self: Pin<&mut Self>, amt: usize) {
+        match &mut self.buffer {
+            Buffer::Empty => assert_eq!(amt, 0, "cannot consume from empty buffer"),
+            Buffer::Ready(b) => match b.len().cmp(&amt) {
+                Ordering::Less => panic!("{amt} exceeds buffer sized of {}", b.len()),
+                Ordering::Greater => *b = b.slice(amt..),
+                Ordering::Equal => self.buffer = Buffer::Empty,
+            },
+            Buffer::Pending(_) => panic!("cannot consume from pending buffer"),
+        }
+        self.cursor += amt as u64;
+    }
+}
+
+/// Port of standardised function as requires Rust 1.66
+///
+/// <https://github.com/rust-lang/rust/pull/87601/files#diff-b9390ee807a1dae3c3128dce36df56748ad8d23c6e361c0ebba4d744bf6efdb9R1533>
+#[inline]
+fn checked_add_signed(a: u64, rhs: i64) -> Option<u64> {
+    let (res, overflowed) = a.overflowing_add(rhs as _);
+    let overflow = overflowed ^ (rhs < 0);
+    (!overflow).then_some(res)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::memory::InMemory;
+    use crate::path::Path;
+    use tokio::io::{AsyncBufReadExt, AsyncReadExt, AsyncSeekExt};
+
+    #[tokio::test]
+    async fn test_buf_reader() {
+        let store = Arc::new(InMemory::new()) as Arc<dyn ObjectStore>;
+
+        let existent = Path::from("exists.txt");
+        const BYTES: usize = 4096;
+
+        let data: Bytes = b"12345678".iter().cycle().copied().take(BYTES).collect();
+        store.put(&existent, data.clone()).await.unwrap();
+
+        let meta = store.head(&existent).await.unwrap();
+
+        let mut reader = BufReader::new(Arc::clone(&store), &meta);
+        let mut out = Vec::with_capacity(BYTES);
+        let read = reader.read_to_end(&mut out).await.unwrap();
+
+        assert_eq!(read, BYTES);
+        assert_eq!(&out, &data);
+
+        let err = reader.seek(SeekFrom::Current(i64::MIN)).await.unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Seeking -9223372036854775808 from current offset of 4096 would result in overflow"
+        );
+
+        reader.rewind().await.unwrap();
+
+        let err = reader.seek(SeekFrom::Current(-1)).await.unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Seeking -1 from current offset of 0 would result in overflow"
+        );
+
+        // Seeking beyond the bounds of the file is permitted but should return no data
+        reader.seek(SeekFrom::Start(u64::MAX)).await.unwrap();
+        let buf = reader.fill_buf().await.unwrap();
+        assert!(buf.is_empty());
+
+        let err = reader.seek(SeekFrom::Current(1)).await.unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Seeking 1 from current offset of 18446744073709551615 would result in overflow"
+        );
+
+        for capacity in [200, 1024, 4096, DEFAULT_BUFFER_SIZE] {
+            let store = Arc::clone(&store);
+            let mut reader = BufReader::with_capacity(store, &meta, capacity);
+
+            let mut bytes_read = 0;
+            loop {
+                let buf = reader.fill_buf().await.unwrap();
+                if buf.is_empty() {
+                    assert_eq!(bytes_read, BYTES);
+                    break;
+                }
+                assert!(buf.starts_with(b"12345678"));
+                bytes_read += 8;
+                reader.consume(8);
+            }
+
+            let mut buf = Vec::with_capacity(76);
+            reader.seek(SeekFrom::Current(-76)).await.unwrap();
+            reader.read_to_end(&mut buf).await.unwrap();
+            assert_eq!(&buf, &data[BYTES - 76..]);
+
+            reader.rewind().await.unwrap();
+            let buffer = reader.fill_buf().await.unwrap();
+            assert_eq!(buffer, &data[..capacity.min(BYTES)]);
+
+            reader.seek(SeekFrom::Start(325)).await.unwrap();
+            let buffer = reader.fill_buf().await.unwrap();
+            assert_eq!(buffer, &data[325..(325 + capacity).min(BYTES)]);
+
+            reader.seek(SeekFrom::End(0)).await.unwrap();
+            let buffer = reader.fill_buf().await.unwrap();
+            assert!(buffer.is_empty());
+        }
+    }
+}
diff --git a/object_store/src/chunked.rs b/object_store/src/chunked.rs
index 008dec679413..d33556f4b12e 100644
--- a/object_store/src/chunked.rs
+++ b/object_store/src/chunked.rs
@@ -29,7 +29,8 @@ use tokio::io::AsyncWrite;
 
 use crate::path::Path;
 use crate::{
-    GetOptions, GetResult, GetResultPayload, ListResult, ObjectMeta, ObjectStore,
+    GetOptions, GetResult, GetResultPayload, ListResult, ObjectMeta, ObjectStore, PutOptions,
+    PutResult,
 };
 use crate::{MultipartId, Result};
 
@@ -62,8 +63,8 @@ impl Display for ChunkedStore {
 
 #[async_trait]
 impl ObjectStore for ChunkedStore {
-    async fn put(&self, location: &Path, bytes: Bytes) -> Result<()> {
-        self.inner.put(location, bytes).await
+    async fn put_opts(&self, location: &Path, bytes: Bytes, opts: PutOptions) -> Result<PutResult> {
+        self.inner.put_opts(location, bytes, opts).await
     }
 
     async fn put_multipart(
@@ -73,11 +74,7 @@ impl ObjectStore for ChunkedStore {
         self.inner.put_multipart(location).await
     }
 
-    async fn abort_multipart(
-        &self,
-        location: &Path,
-        multipart_id: &MultipartId,
-    ) -> Result<()> {
+    async fn abort_multipart(&self, location: &Path, multipart_id: &MultipartId) -> Result<()> {
         self.inner.abort_multipart(location, multipart_id).await
     }
 
@@ -147,19 +144,16 @@ impl ObjectStore for ChunkedStore {
         self.inner.delete(location).await
     }
 
-    async fn list(
-        &self,
-        prefix: Option<&Path>,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
-        self.inner.list(prefix).await
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+        self.inner.list(prefix)
     }
 
-    async fn list_with_offset(
+    fn list_with_offset(
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
-        self.inner.list_with_offset(prefix, offset).await
+    ) -> BoxStream<'_, Result<ObjectMeta>> {
+        self.inner.list_with_offset(prefix, offset)
     }
 
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
diff --git a/object_store/src/client/backoff.rs b/object_store/src/client/backoff.rs
index a4ca9765e79e..e01589102eb1 100644
--- a/object_store/src/client/backoff.rs
+++ b/object_store/src/client/backoff.rs
@@ -98,10 +98,7 @@ impl Backoff {
         };
 
         let next_backoff = self.max_backoff_secs.min(rand_backoff);
-        Duration::from_secs_f64(std::mem::replace(
-            &mut self.next_backoff_secs,
-            next_backoff,
-        ))
+        Duration::from_secs_f64(std::mem::replace(&mut self.next_backoff_secs, next_backoff))
     }
 }
 
@@ -122,8 +119,7 @@ mod tests {
             base,
         };
 
-        let assert_fuzzy_eq =
-            |a: f64, b: f64| assert!((b - a).abs() < 0.0001, "{a} != {b}");
+        let assert_fuzzy_eq = |a: f64, b: f64| assert!((b - a).abs() < 0.0001, "{a} != {b}");
 
         // Create a static rng that takes the minimum of the range
         let rng = Box::new(StepRng::new(0, 0));
@@ -149,8 +145,8 @@ mod tests {
         let mut value = init_backoff_secs;
         for _ in 0..20 {
             assert_fuzzy_eq(backoff.next().as_secs_f64(), value);
-            value = (init_backoff_secs + (value * base - init_backoff_secs) / 2.)
-                .min(max_backoff_secs);
+            value =
+                (init_backoff_secs + (value * base - init_backoff_secs) / 2.).min(max_backoff_secs);
         }
     }
 }
diff --git a/object_store/src/client/get.rs b/object_store/src/client/get.rs
index 6b2d60ae565f..5f9cac9b424b 100644
--- a/object_store/src/client/get.rs
+++ b/object_store/src/client/get.rs
@@ -15,9 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::client::header::header_meta;
+use crate::client::header::{header_meta, HeaderConfig};
 use crate::path::Path;
-use crate::{Error, GetOptions, GetResult, ObjectMeta};
+use crate::{Error, GetOptions, GetResult};
 use crate::{GetResultPayload, Result};
 use async_trait::async_trait;
 use futures::{StreamExt, TryStreamExt};
@@ -28,32 +28,29 @@ use reqwest::Response;
 pub trait GetClient: Send + Sync + 'static {
     const STORE: &'static str;
 
-    async fn get_request(
-        &self,
-        path: &Path,
-        options: GetOptions,
-        head: bool,
-    ) -> Result<Response>;
+    /// Configure the [`HeaderConfig`] for this client
+    const HEADER_CONFIG: HeaderConfig;
+
+    async fn get_request(&self, path: &Path, options: GetOptions) -> Result<Response>;
 }
 
 /// Extension trait for [`GetClient`] that adds common retrieval functionality
 #[async_trait]
 pub trait GetClientExt {
     async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult>;
-
-    async fn head(&self, location: &Path) -> Result<ObjectMeta>;
 }
 
 #[async_trait]
 impl<T: GetClient> GetClientExt for T {
     async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult> {
         let range = options.range.clone();
-        let response = self.get_request(location, options, false).await?;
-        let meta =
-            header_meta(location, response.headers()).map_err(|e| Error::Generic {
+        let response = self.get_request(location, options).await?;
+        let meta = header_meta(location, response.headers(), T::HEADER_CONFIG).map_err(|e| {
+            Error::Generic {
                 store: T::STORE,
                 source: Box::new(e),
-            })?;
+            }
+        })?;
 
         let stream = response
             .bytes_stream()
@@ -69,13 +66,4 @@ impl<T: GetClient> GetClientExt for T {
             meta,
         })
     }
-
-    async fn head(&self, location: &Path) -> Result<ObjectMeta> {
-        let options = GetOptions::default();
-        let response = self.get_request(location, options, true).await?;
-        header_meta(location, response.headers()).map_err(|e| Error::Generic {
-            store: T::STORE,
-            source: Box::new(e),
-        })
-    }
 }
diff --git a/object_store/src/client/header.rs b/object_store/src/client/header.rs
index cc4f16eaa599..e85bf6ba52d0 100644
--- a/object_store/src/client/header.rs
+++ b/object_store/src/client/header.rs
@@ -19,11 +19,27 @@
 
 use crate::path::Path;
 use crate::ObjectMeta;
-use chrono::{DateTime, Utc};
+use chrono::{DateTime, TimeZone, Utc};
 use hyper::header::{CONTENT_LENGTH, ETAG, LAST_MODIFIED};
 use hyper::HeaderMap;
 use snafu::{OptionExt, ResultExt, Snafu};
 
+#[derive(Debug, Copy, Clone)]
+/// Configuration for header extraction
+pub struct HeaderConfig {
+    /// Whether to require an ETag header when extracting [`ObjectMeta`] from headers.
+    ///
+    /// Defaults to `true`
+    pub etag_required: bool,
+    /// Whether to require a Last-Modified header when extracting [`ObjectMeta`] from headers.
+    ///
+    /// Defaults to `true`
+    pub last_modified_required: bool,
+
+    /// The version header name if any
+    pub version_header: Option<&'static str>,
+}
+
 #[derive(Debug, Snafu)]
 pub enum Error {
     #[snafu(display("ETag Header missing from response"))]
@@ -51,33 +67,71 @@ pub enum Error {
     },
 }
 
+/// Extracts a PutResult from the provided [`HeaderMap`]
+#[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))]
+pub fn get_put_result(headers: &HeaderMap, version: &str) -> Result<crate::PutResult, Error> {
+    let e_tag = Some(get_etag(headers)?);
+    let version = get_version(headers, version)?;
+    Ok(crate::PutResult { e_tag, version })
+}
+
+/// Extracts a optional version from the provided [`HeaderMap`]
+#[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))]
+pub fn get_version(headers: &HeaderMap, version: &str) -> Result<Option<String>, Error> {
+    Ok(match headers.get(version) {
+        Some(x) => Some(x.to_str().context(BadHeaderSnafu)?.to_string()),
+        None => None,
+    })
+}
+
+/// Extracts an etag from the provided [`HeaderMap`]
+pub fn get_etag(headers: &HeaderMap) -> Result<String, Error> {
+    let e_tag = headers.get(ETAG).ok_or(Error::MissingEtag)?;
+    Ok(e_tag.to_str().context(BadHeaderSnafu)?.to_string())
+}
+
 /// Extracts [`ObjectMeta`] from the provided [`HeaderMap`]
-pub fn header_meta(location: &Path, headers: &HeaderMap) -> Result<ObjectMeta, Error> {
-    let last_modified = headers
-        .get(LAST_MODIFIED)
-        .context(MissingLastModifiedSnafu)?;
+pub fn header_meta(
+    location: &Path,
+    headers: &HeaderMap,
+    cfg: HeaderConfig,
+) -> Result<ObjectMeta, Error> {
+    let last_modified = match headers.get(LAST_MODIFIED) {
+        Some(last_modified) => {
+            let last_modified = last_modified.to_str().context(BadHeaderSnafu)?;
+            DateTime::parse_from_rfc2822(last_modified)
+                .context(InvalidLastModifiedSnafu { last_modified })?
+                .with_timezone(&Utc)
+        }
+        None if cfg.last_modified_required => return Err(Error::MissingLastModified),
+        None => Utc.timestamp_nanos(0),
+    };
+
+    let e_tag = match get_etag(headers) {
+        Ok(e_tag) => Some(e_tag),
+        Err(Error::MissingEtag) if !cfg.etag_required => None,
+        Err(e) => return Err(e),
+    };
 
     let content_length = headers
         .get(CONTENT_LENGTH)
         .context(MissingContentLengthSnafu)?;
 
-    let last_modified = last_modified.to_str().context(BadHeaderSnafu)?;
-    let last_modified = DateTime::parse_from_rfc2822(last_modified)
-        .context(InvalidLastModifiedSnafu { last_modified })?
-        .with_timezone(&Utc);
-
     let content_length = content_length.to_str().context(BadHeaderSnafu)?;
-    let content_length = content_length
+    let size = content_length
         .parse()
         .context(InvalidContentLengthSnafu { content_length })?;
 
-    let e_tag = headers.get(ETAG).context(MissingEtagSnafu)?;
-    let e_tag = e_tag.to_str().context(BadHeaderSnafu)?;
+    let version = match cfg.version_header.and_then(|h| headers.get(h)) {
+        Some(v) => Some(v.to_str().context(BadHeaderSnafu)?.to_string()),
+        None => None,
+    };
 
     Ok(ObjectMeta {
         location: location.clone(),
         last_modified,
-        size: content_length,
-        e_tag: Some(e_tag.to_string()),
+        version,
+        size,
+        e_tag,
     })
 }
diff --git a/object_store/src/client/list.rs b/object_store/src/client/list.rs
index b2dbee27f14d..371894dfeb71 100644
--- a/object_store/src/client/list.rs
+++ b/object_store/src/client/list.rs
@@ -46,16 +46,13 @@ pub trait ListClientExt {
         offset: Option<&Path>,
     ) -> BoxStream<'_, Result<ListResult>>;
 
-    async fn list(
-        &self,
-        prefix: Option<&Path>,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>>;
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>>;
 
-    async fn list_with_offset(
+    fn list_with_offset(
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>>;
+    ) -> BoxStream<'_, Result<ObjectMeta>>;
 
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult>;
 }
@@ -90,31 +87,22 @@ impl<T: ListClient> ListClientExt for T {
         .boxed()
     }
 
-    async fn list(
-        &self,
-        prefix: Option<&Path>,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
-        let stream = self
-            .list_paginated(prefix, false, None)
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+        self.list_paginated(prefix, false, None)
             .map_ok(|r| futures::stream::iter(r.objects.into_iter().map(Ok)))
             .try_flatten()
-            .boxed();
-
-        Ok(stream)
+            .boxed()
     }
 
-    async fn list_with_offset(
+    fn list_with_offset(
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
-        let stream = self
-            .list_paginated(prefix, false, Some(offset))
+    ) -> BoxStream<'_, Result<ObjectMeta>> {
+        self.list_paginated(prefix, false, Some(offset))
             .map_ok(|r| futures::stream::iter(r.objects.into_iter().map(Ok)))
             .try_flatten()
-            .boxed();
-
-        Ok(stream)
+            .boxed()
     }
 
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
diff --git a/object_store/src/client/mock_server.rs b/object_store/src/client/mock_server.rs
index adb7e0fff779..36c6b650c038 100644
--- a/object_store/src/client/mock_server.rs
+++ b/object_store/src/client/mock_server.rs
@@ -57,8 +57,7 @@ impl MockServer {
         });
 
         let (shutdown, rx) = oneshot::channel::<()>();
-        let server =
-            Server::bind(&SocketAddr::from(([127, 0, 0, 1], 0))).serve(make_service);
+        let server = Server::bind(&SocketAddr::from(([127, 0, 0, 1], 0))).serve(make_service);
 
         let url = format!("http://{}", server.local_addr());
 
diff --git a/object_store/src/client/mod.rs b/object_store/src/client/mod.rs
index d4995a5b143f..ae092edac095 100644
--- a/object_store/src/client/mod.rs
+++ b/object_store/src/client/mod.rs
@@ -18,6 +18,7 @@
 //! Generic utilities reqwest based ObjectStore implementations
 
 pub mod backoff;
+
 #[cfg(test)]
 pub mod mock_server;
 
@@ -26,7 +27,6 @@ pub mod retry;
 #[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))]
 pub mod pagination;
 
-#[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))]
 pub mod get;
 
 #[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))]
@@ -35,11 +35,10 @@ pub mod list;
 #[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))]
 pub mod token;
 
-#[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))]
 pub mod header;
 
 #[cfg(any(feature = "aws", feature = "gcp"))]
-pub mod list_response;
+pub mod s3;
 
 use async_trait::async_trait;
 use std::collections::HashMap;
@@ -62,8 +61,7 @@ fn map_client_error(e: reqwest::Error) -> super::Error {
     }
 }
 
-static DEFAULT_USER_AGENT: &str =
-    concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"),);
+static DEFAULT_USER_AGENT: &str = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"),);
 
 /// Configuration keys for [`ClientOptions`]
 #[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, Deserialize, Serialize)]
@@ -167,7 +165,7 @@ impl FromStr for ClientConfigKey {
 }
 
 /// HTTP client configuration for remote object stores
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone)]
 pub struct ClientOptions {
     user_agent: Option<ConfigValue<HeaderValue>>,
     content_type_map: HashMap<String, String>,
@@ -189,6 +187,38 @@ pub struct ClientOptions {
     http2_only: ConfigValue<bool>,
 }
 
+impl Default for ClientOptions {
+    fn default() -> Self {
+        // Defaults based on
+        // <https://docs.aws.amazon.com/sdkref/latest/guide/feature-smart-config-defaults.html>
+        // <https://docs.aws.amazon.com/whitepapers/latest/s3-optimizing-performance-best-practices/timeouts-and-retries-for-latency-sensitive-applications.html>
+        // Which recommend a connection timeout of 3.1s and a request timeout of 2s
+        //
+        // As object store requests may involve the transfer of non-trivial volumes of data
+        // we opt for a slightly higher default timeout of 30 seconds
+        Self {
+            user_agent: None,
+            content_type_map: Default::default(),
+            default_content_type: None,
+            default_headers: None,
+            proxy_url: None,
+            proxy_ca_certificate: None,
+            proxy_excludes: None,
+            allow_http: Default::default(),
+            allow_insecure: Default::default(),
+            timeout: Some(Duration::from_secs(30).into()),
+            connect_timeout: Some(Duration::from_secs(5).into()),
+            pool_idle_timeout: None,
+            pool_max_idle_per_host: None,
+            http2_keep_alive_interval: None,
+            http2_keep_alive_timeout: None,
+            http2_keep_alive_while_idle: Default::default(),
+            http1_only: Default::default(),
+            http2_only: Default::default(),
+        }
+    }
+}
+
 impl ClientOptions {
     /// Create a new [`ClientOptions`] with default values
     pub fn new() -> Self {
@@ -203,9 +233,7 @@ impl ClientOptions {
             ClientConfigKey::ConnectTimeout => {
                 self.connect_timeout = Some(ConfigValue::Deferred(value.into()))
             }
-            ClientConfigKey::DefaultContentType => {
-                self.default_content_type = Some(value.into())
-            }
+            ClientConfigKey::DefaultContentType => self.default_content_type = Some(value.into()),
             ClientConfigKey::Http1Only => self.http1_only.parse(value),
             ClientConfigKey::Http2Only => self.http2_only.parse(value),
             ClientConfigKey::Http2KeepAliveInterval => {
@@ -224,13 +252,9 @@ impl ClientOptions {
                 self.pool_max_idle_per_host = Some(ConfigValue::Deferred(value.into()))
             }
             ClientConfigKey::ProxyUrl => self.proxy_url = Some(value.into()),
-            ClientConfigKey::ProxyCaCertificate => {
-                self.proxy_ca_certificate = Some(value.into())
-            }
+            ClientConfigKey::ProxyCaCertificate => self.proxy_ca_certificate = Some(value.into()),
             ClientConfigKey::ProxyExcludes => self.proxy_excludes = Some(value.into()),
-            ClientConfigKey::Timeout => {
-                self.timeout = Some(ConfigValue::Deferred(value.into()))
-            }
+            ClientConfigKey::Timeout => self.timeout = Some(ConfigValue::Deferred(value.into())),
             ClientConfigKey::UserAgent => {
                 self.user_agent = Some(ConfigValue::Deferred(value.into()))
             }
@@ -242,12 +266,8 @@ impl ClientOptions {
     pub fn get_config_value(&self, key: &ClientConfigKey) -> Option<String> {
         match key {
             ClientConfigKey::AllowHttp => Some(self.allow_http.to_string()),
-            ClientConfigKey::AllowInvalidCertificates => {
-                Some(self.allow_insecure.to_string())
-            }
-            ClientConfigKey::ConnectTimeout => {
-                self.connect_timeout.as_ref().map(fmt_duration)
-            }
+            ClientConfigKey::AllowInvalidCertificates => Some(self.allow_insecure.to_string()),
+            ClientConfigKey::ConnectTimeout => self.connect_timeout.as_ref().map(fmt_duration),
             ClientConfigKey::DefaultContentType => self.default_content_type.clone(),
             ClientConfigKey::Http1Only => Some(self.http1_only.to_string()),
             ClientConfigKey::Http2KeepAliveInterval => {
@@ -260,9 +280,7 @@ impl ClientOptions {
                 Some(self.http2_keep_alive_while_idle.to_string())
             }
             ClientConfigKey::Http2Only => Some(self.http2_only.to_string()),
-            ClientConfigKey::PoolIdleTimeout => {
-                self.pool_idle_timeout.as_ref().map(fmt_duration)
-            }
+            ClientConfigKey::PoolIdleTimeout => self.pool_idle_timeout.as_ref().map(fmt_duration),
             ClientConfigKey::PoolMaxIdlePerHost => {
                 self.pool_max_idle_per_host.as_ref().map(|v| v.to_string())
             }
@@ -350,10 +368,7 @@ impl ClientOptions {
     }
 
     /// Set a trusted proxy CA certificate
-    pub fn with_proxy_ca_certificate(
-        mut self,
-        proxy_ca_certificate: impl Into<String>,
-    ) -> Self {
+    pub fn with_proxy_ca_certificate(mut self, proxy_ca_certificate: impl Into<String>) -> Self {
         self.proxy_ca_certificate = Some(proxy_ca_certificate.into());
         self
     }
@@ -368,17 +383,37 @@ impl ClientOptions {
     ///
     /// The timeout is applied from when the request starts connecting until the
     /// response body has finished
+    ///
+    /// Default is 5 seconds
     pub fn with_timeout(mut self, timeout: Duration) -> Self {
         self.timeout = Some(ConfigValue::Parsed(timeout));
         self
     }
 
+    /// Disables the request timeout
+    ///
+    /// See [`Self::with_timeout`]
+    pub fn with_timeout_disabled(mut self) -> Self {
+        self.timeout = None;
+        self
+    }
+
     /// Set a timeout for only the connect phase of a Client
+    ///
+    /// Default is 5 seconds
     pub fn with_connect_timeout(mut self, timeout: Duration) -> Self {
         self.connect_timeout = Some(ConfigValue::Parsed(timeout));
         self
     }
 
+    /// Disables the connection timeout
+    ///
+    /// See [`Self::with_connect_timeout`]
+    pub fn with_connect_timeout_disabled(mut self) -> Self {
+        self.timeout = None;
+        self
+    }
+
     /// Set the pool max idle timeout
     ///
     /// This is the length of time an idle connection will be kept alive
@@ -445,7 +480,20 @@ impl ClientOptions {
         }
     }
 
-    pub(crate) fn client(&self) -> super::Result<Client> {
+    /// Create a [`Client`] with overrides optimised for metadata endpoint access
+    ///
+    /// In particular:
+    /// * Allows HTTP as metadata endpoints do not use TLS
+    /// * Configures a low connection timeout to provide quick feedback if not present
+    #[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))]
+    pub(crate) fn metadata_client(&self) -> Result<Client> {
+        self.clone()
+            .with_allow_http(true)
+            .with_connect_timeout(Duration::from_secs(1))
+            .client()
+    }
+
+    pub(crate) fn client(&self) -> Result<Client> {
         let mut builder = ClientBuilder::new();
 
         match &self.user_agent {
@@ -461,9 +509,8 @@ impl ClientOptions {
             let mut proxy = Proxy::all(proxy).map_err(map_client_error)?;
 
             if let Some(certificate) = &self.proxy_ca_certificate {
-                let certificate =
-                    reqwest::tls::Certificate::from_pem(certificate.as_bytes())
-                        .map_err(map_client_error)?;
+                let certificate = reqwest::tls::Certificate::from_pem(certificate.as_bytes())
+                    .map_err(map_client_error)?;
 
                 builder = builder.add_root_certificate(certificate);
             }
@@ -575,6 +622,7 @@ pub struct StaticCredentialProvider<T> {
 }
 
 impl<T> StaticCredentialProvider<T> {
+    /// A [`CredentialProvider`] for a static credential of type `T`
     pub fn new(credential: T) -> Self {
         Self {
             credential: Arc::new(credential),
diff --git a/object_store/src/client/retry.rs b/object_store/src/client/retry.rs
index 39a913142e09..08b9a74e17c5 100644
--- a/object_store/src/client/retry.rs
+++ b/object_store/src/client/retry.rs
@@ -23,46 +23,53 @@ use futures::FutureExt;
 use reqwest::header::LOCATION;
 use reqwest::{Response, StatusCode};
 use snafu::Error as SnafuError;
+use snafu::Snafu;
 use std::time::{Duration, Instant};
 use tracing::info;
 
 /// Retry request error
-#[derive(Debug)]
-pub struct Error {
-    retries: usize,
-    message: String,
-    source: Option<reqwest::Error>,
-    status: Option<StatusCode>,
-}
-
-impl std::fmt::Display for Error {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "response error \"{}\", after {} retries",
-            self.message, self.retries
-        )?;
-        if let Some(source) = &self.source {
-            write!(f, ": {source}")?;
-        }
-        Ok(())
-    }
-}
-
-impl std::error::Error for Error {
-    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
-        self.source.as_ref().map(|e| e as _)
-    }
+#[derive(Debug, Snafu)]
+pub enum Error {
+    #[snafu(display("Received redirect without LOCATION, this normally indicates an incorrectly configured region"))]
+    BareRedirect,
+
+    #[snafu(display("Client error with status {status}: {}", body.as_deref().unwrap_or("No Body")))]
+    Client {
+        status: StatusCode,
+        body: Option<String>,
+    },
+
+    #[snafu(display("Error after {retries} retries in {elapsed:?}, max_retries:{max_retries}, retry_timeout:{retry_timeout:?}, source:{source}"))]
+    Reqwest {
+        retries: usize,
+        max_retries: usize,
+        elapsed: Duration,
+        retry_timeout: Duration,
+        source: reqwest::Error,
+    },
 }
 
 impl Error {
     /// Returns the status code associated with this error if any
     pub fn status(&self) -> Option<StatusCode> {
-        self.status
+        match self {
+            Self::BareRedirect => None,
+            Self::Client { status, .. } => Some(*status),
+            Self::Reqwest { source, .. } => source.status(),
+        }
+    }
+
+    /// Returns the error body if any
+    pub fn body(&self) -> Option<&str> {
+        match self {
+            Self::Client { body, .. } => body.as_deref(),
+            Self::BareRedirect => None,
+            Self::Reqwest { .. } => None,
+        }
     }
 
     pub fn error(self, store: &'static str, path: String) -> crate::Error {
-        match self.status {
+        match self.status() {
             Some(StatusCode::NOT_FOUND) => crate::Error::NotFound {
                 path,
                 source: Box::new(self),
@@ -75,6 +82,10 @@ impl Error {
                 path,
                 source: Box::new(self),
             },
+            Some(StatusCode::CONFLICT) => crate::Error::AlreadyExists {
+                path,
+                source: Box::new(self),
+            },
             _ => crate::Error::Generic {
                 store,
                 source: Box::new(self),
@@ -86,16 +97,19 @@ impl Error {
 impl From<Error> for std::io::Error {
     fn from(err: Error) -> Self {
         use std::io::ErrorKind;
-        match (&err.source, err.status()) {
-            (Some(source), _) if source.is_builder() || source.is_request() => {
-                Self::new(ErrorKind::InvalidInput, err)
-            }
-            (_, Some(StatusCode::NOT_FOUND)) => Self::new(ErrorKind::NotFound, err),
-            (_, Some(StatusCode::BAD_REQUEST)) => Self::new(ErrorKind::InvalidInput, err),
-            (Some(source), None) if source.is_timeout() => {
+        match &err {
+            Error::Client {
+                status: StatusCode::NOT_FOUND,
+                ..
+            } => Self::new(ErrorKind::NotFound, err),
+            Error::Client {
+                status: StatusCode::BAD_REQUEST,
+                ..
+            } => Self::new(ErrorKind::InvalidInput, err),
+            Error::Reqwest { source, .. } if source.is_timeout() => {
                 Self::new(ErrorKind::TimedOut, err)
             }
-            (Some(source), None) if source.is_connect() => {
+            Error::Reqwest { source, .. } if source.is_connect() => {
                 Self::new(ErrorKind::NotConnected, err)
             }
             _ => Self::new(ErrorKind::Other, err),
@@ -169,52 +183,54 @@ impl RetryExt for reqwest::RequestBuilder {
                     Ok(r) => match r.error_for_status_ref() {
                         Ok(_) if r.status().is_success() => return Ok(r),
                         Ok(r) if r.status() == StatusCode::NOT_MODIFIED => {
-                            return Err(Error{
-                                message: "not modified".to_string(),
-                                retries,
-                                status: Some(r.status()),
-                                source: None,
+                            return Err(Error::Client {
+                                body: None,
+                                status: StatusCode::NOT_MODIFIED,
                             })
                         }
                         Ok(r) => {
                             let is_bare_redirect = r.status().is_redirection() && !r.headers().contains_key(LOCATION);
-                            let message = match is_bare_redirect {
-                                true => "Received redirect without LOCATION, this normally indicates an incorrectly configured region".to_string(),
+                            return match is_bare_redirect {
+                                true => Err(Error::BareRedirect),
                                 // Not actually sure if this is reachable, but here for completeness
-                                false => format!("request unsuccessful: {}", r.status()),
-                            };
-
-                            return Err(Error{
-                                message,
-                                retries,
-                                status: Some(r.status()),
-                                source: None,
-                            })
+                                false => Err(Error::Client {
+                                    body: None,
+                                    status: r.status(),
+                                })
+                            }
                         }
                         Err(e) => {
                             let status = r.status();
-
                             if retries == max_retries
                                 || now.elapsed() > retry_timeout
                                 || !status.is_server_error() {
 
-                                // Get the response message if returned a client error
-                                let message = match status.is_client_error() {
+                                return Err(match status.is_client_error() {
                                     true => match r.text().await {
-                                        Ok(message) if !message.is_empty() => message,
-                                        Ok(_) => "No Body".to_string(),
-                                        Err(e) => format!("error getting response body: {e}")
+                                        Ok(body) => {
+                                            Error::Client {
+                                                body: Some(body).filter(|b| !b.is_empty()),
+                                                status,
+                                            }
+                                        }
+                                        Err(e) => {
+                                            Error::Reqwest {
+                                                retries,
+                                                max_retries,
+                                                elapsed: now.elapsed(),
+                                                retry_timeout,
+                                                source: e,
+                                            }
+                                        }
                                     }
-                                    false => status.to_string(),
-                                };
-
-                                return Err(Error{
-                                    message,
-                                    retries,
-                                    status: Some(status),
-                                    source: Some(e),
-                                })
-
+                                    false => Error::Reqwest {
+                                        retries,
+                                        max_retries,
+                                        elapsed: now.elapsed(),
+                                        retry_timeout,
+                                        source: e,
+                                    }
+                                });
                             }
 
                             let sleep = backoff.next();
@@ -238,16 +254,17 @@ impl RetryExt for reqwest::RequestBuilder {
                             || now.elapsed() > retry_timeout
                             || !do_retry {
 
-                            return Err(Error{
+                            return Err(Error::Reqwest {
                                 retries,
-                                message: "request error".to_string(),
-                                status: e.status(),
-                                source: Some(e),
+                                max_retries,
+                                elapsed: now.elapsed(),
+                                retry_timeout,
+                                source: e,
                             })
                         }
                         let sleep = backoff.next();
                         retries += 1;
-                        info!("Encountered request error ({}) backing off for {} seconds, retry {} of {}", e, sleep.as_secs_f32(), retries, max_retries);
+                        info!("Encountered transport error ({}) backing off for {} seconds, retry {} of {}", e, sleep.as_secs_f32(), retries, max_retries);
                         tokio::time::sleep(sleep).await;
                     }
                 }
@@ -260,7 +277,7 @@ impl RetryExt for reqwest::RequestBuilder {
 #[cfg(test)]
 mod tests {
     use crate::client::mock_server::MockServer;
-    use crate::client::retry::RetryExt;
+    use crate::client::retry::{Error, RetryExt};
     use crate::RetryConfig;
     use hyper::header::LOCATION;
     use hyper::{Body, Response};
@@ -294,8 +311,11 @@ mod tests {
 
         let e = do_request().await.unwrap_err();
         assert_eq!(e.status().unwrap(), StatusCode::BAD_REQUEST);
-        assert_eq!(e.retries, 0);
-        assert_eq!(&e.message, "cupcakes");
+        assert_eq!(e.body(), Some("cupcakes"));
+        assert_eq!(
+            e.to_string(),
+            "Client error with status 400 Bad Request: cupcakes"
+        );
 
         // Handles client errors with no payload
         mock.push(
@@ -307,8 +327,11 @@ mod tests {
 
         let e = do_request().await.unwrap_err();
         assert_eq!(e.status().unwrap(), StatusCode::BAD_REQUEST);
-        assert_eq!(e.retries, 0);
-        assert_eq!(&e.message, "No Body");
+        assert_eq!(e.body(), None);
+        assert_eq!(
+            e.to_string(),
+            "Client error with status 400 Bad Request: No Body"
+        );
 
         // Should retry server error request
         mock.push(
@@ -381,7 +404,8 @@ mod tests {
         );
 
         let e = do_request().await.unwrap_err();
-        assert_eq!(e.message, "Received redirect without LOCATION, this normally indicates an incorrectly configured region");
+        assert!(matches!(e, Error::BareRedirect));
+        assert_eq!(e.to_string(), "Received redirect without LOCATION, this normally indicates an incorrectly configured region");
 
         // Gives up after the retrying the specified number of times
         for _ in 0..=retry.max_retries {
@@ -393,22 +417,30 @@ mod tests {
             );
         }
 
-        let e = do_request().await.unwrap_err();
-        assert_eq!(e.retries, retry.max_retries);
-        assert_eq!(e.message, "502 Bad Gateway");
+        let e = do_request().await.unwrap_err().to_string();
+        assert!(
+            e.contains("Error after 2 retries in") && 
+            e.contains("max_retries:2, retry_timeout:1000s, source:HTTP status server error (502 Bad Gateway) for url"),
+            "{e}"
+        );
 
         // Panic results in an incomplete message error in the client
         mock.push_fn(|_| panic!());
         let r = do_request().await.unwrap();
         assert_eq!(r.status(), StatusCode::OK);
 
-        // Gives up after retrying mulitiple panics
+        // Gives up after retrying multiple panics
         for _ in 0..=retry.max_retries {
             mock.push_fn(|_| panic!());
         }
-        let e = do_request().await.unwrap_err();
-        assert_eq!(e.retries, retry.max_retries);
-        assert_eq!(e.message, "request error");
+        let e = do_request().await.unwrap_err().to_string();
+        assert!(
+            e.contains("Error after 2 retries in")
+                && e.contains(
+                    "max_retries:2, retry_timeout:1000s, source:error sending request for url"
+                ),
+            "{e}"
+        );
 
         // Shutdown
         mock.shutdown().await
diff --git a/object_store/src/client/list_response.rs b/object_store/src/client/s3.rs
similarity index 67%
rename from object_store/src/client/list_response.rs
rename to object_store/src/client/s3.rs
index 6a3889e3be5b..61237dc4beab 100644
--- a/object_store/src/client/list_response.rs
+++ b/object_store/src/client/s3.rs
@@ -14,12 +14,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! The list response format used by GCP and AWS
+//! The list and multipart API used by both GCS and S3
 
+use crate::multipart::PartId;
 use crate::path::Path;
 use crate::{ListResult, ObjectMeta, Result};
 use chrono::{DateTime, Utc};
-use serde::Deserialize;
+use serde::{Deserialize, Serialize};
 
 #[derive(Debug, Deserialize)]
 #[serde(rename_all = "PascalCase")]
@@ -80,6 +81,48 @@ impl TryFrom<ListContents> for ObjectMeta {
             last_modified: value.last_modified,
             size: value.size,
             e_tag: value.e_tag,
+            version: None,
         })
     }
 }
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "PascalCase")]
+pub struct InitiateMultipartUploadResult {
+    pub upload_id: String,
+}
+
+#[derive(Debug, Serialize)]
+#[serde(rename_all = "PascalCase")]
+pub struct CompleteMultipartUpload {
+    pub part: Vec<MultipartPart>,
+}
+
+impl From<Vec<PartId>> for CompleteMultipartUpload {
+    fn from(value: Vec<PartId>) -> Self {
+        let part = value
+            .into_iter()
+            .enumerate()
+            .map(|(part_number, part)| MultipartPart {
+                e_tag: part.content_id,
+                part_number: part_number + 1,
+            })
+            .collect();
+        Self { part }
+    }
+}
+
+#[derive(Debug, Serialize)]
+pub struct MultipartPart {
+    #[serde(rename = "ETag")]
+    pub e_tag: String,
+    #[serde(rename = "PartNumber")]
+    pub part_number: usize,
+}
+
+#[derive(Debug, Deserialize)]
+#[serde(rename_all = "PascalCase")]
+pub struct CompleteMultipartUploadResult {
+    #[serde(rename = "ETag")]
+    pub e_tag: String,
+}
diff --git a/object_store/src/delimited.rs b/object_store/src/delimited.rs
index 13214865117a..4f25c9d6d313 100644
--- a/object_store/src/delimited.rs
+++ b/object_store/src/delimited.rs
@@ -228,8 +228,7 @@ mod tests {
     #[tokio::test]
     async fn test_delimiter_stream() {
         let input = vec!["hello\nworld\nbin", "go\ncup", "cakes"];
-        let input_stream =
-            futures::stream::iter(input.into_iter().map(|s| Ok(Bytes::from(s))));
+        let input_stream = futures::stream::iter(input.into_iter().map(|s| Ok(Bytes::from(s))));
         let stream = newline_delimited_stream(input_stream);
 
         let results: Vec<_> = stream.try_collect().await.unwrap();
diff --git a/object_store/src/gcp/builder.rs b/object_store/src/gcp/builder.rs
new file mode 100644
index 000000000000..7417ea4c8a50
--- /dev/null
+++ b/object_store/src/gcp/builder.rs
@@ -0,0 +1,664 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::client::TokenCredentialProvider;
+use crate::gcp::client::{GoogleCloudStorageClient, GoogleCloudStorageConfig};
+use crate::gcp::credential::{
+    ApplicationDefaultCredentials, InstanceCredentialProvider, ServiceAccountCredentials,
+    DEFAULT_GCS_BASE_URL,
+};
+use crate::gcp::{credential, GcpCredential, GcpCredentialProvider, GoogleCloudStorage, STORE};
+use crate::{ClientConfigKey, ClientOptions, Result, RetryConfig, StaticCredentialProvider};
+use serde::{Deserialize, Serialize};
+use snafu::{OptionExt, ResultExt, Snafu};
+use std::str::FromStr;
+use std::sync::Arc;
+use url::Url;
+
+#[derive(Debug, Snafu)]
+enum Error {
+    #[snafu(display("Missing bucket name"))]
+    MissingBucketName {},
+
+    #[snafu(display("One of service account path or service account key may be provided."))]
+    ServiceAccountPathAndKeyProvided,
+
+    #[snafu(display("Unable parse source url. Url: {}, Error: {}", url, source))]
+    UnableToParseUrl {
+        source: url::ParseError,
+        url: String,
+    },
+
+    #[snafu(display(
+        "Unknown url scheme cannot be parsed into storage location: {}",
+        scheme
+    ))]
+    UnknownUrlScheme { scheme: String },
+
+    #[snafu(display("URL did not match any known pattern for scheme: {}", url))]
+    UrlNotRecognised { url: String },
+
+    #[snafu(display("Configuration key: '{}' is not known.", key))]
+    UnknownConfigurationKey { key: String },
+
+    #[snafu(display("Unable to extract metadata from headers: {}", source))]
+    Metadata {
+        source: crate::client::header::Error,
+    },
+
+    #[snafu(display("GCP credential error: {}", source))]
+    Credential { source: credential::Error },
+}
+
+impl From<Error> for crate::Error {
+    fn from(err: Error) -> Self {
+        match err {
+            Error::UnknownConfigurationKey { key } => {
+                Self::UnknownConfigurationKey { store: STORE, key }
+            }
+            _ => Self::Generic {
+                store: STORE,
+                source: Box::new(err),
+            },
+        }
+    }
+}
+
+/// Configure a connection to Google Cloud Storage using the specified
+/// credentials.
+///
+/// # Example
+/// ```
+/// # let BUCKET_NAME = "foo";
+/// # let SERVICE_ACCOUNT_PATH = "/tmp/foo.json";
+/// # use object_store::gcp::GoogleCloudStorageBuilder;
+/// let gcs = GoogleCloudStorageBuilder::new()
+///  .with_service_account_path(SERVICE_ACCOUNT_PATH)
+///  .with_bucket_name(BUCKET_NAME)
+///  .build();
+/// ```
+#[derive(Debug, Clone)]
+pub struct GoogleCloudStorageBuilder {
+    /// Bucket name
+    bucket_name: Option<String>,
+    /// Url
+    url: Option<String>,
+    /// Path to the service account file
+    service_account_path: Option<String>,
+    /// The serialized service account key
+    service_account_key: Option<String>,
+    /// Path to the application credentials file.
+    application_credentials_path: Option<String>,
+    /// Retry config
+    retry_config: RetryConfig,
+    /// Client options
+    client_options: ClientOptions,
+    /// Credentials
+    credentials: Option<GcpCredentialProvider>,
+}
+
+/// Configuration keys for [`GoogleCloudStorageBuilder`]
+///
+/// Configuration via keys can be done via [`GoogleCloudStorageBuilder::with_config`]
+///
+/// # Example
+/// ```
+/// # use object_store::gcp::{GoogleCloudStorageBuilder, GoogleConfigKey};
+/// let builder = GoogleCloudStorageBuilder::new()
+///     .with_config("google_service_account".parse().unwrap(), "my-service-account")
+///     .with_config(GoogleConfigKey::Bucket, "my-bucket");
+/// ```
+#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, Serialize, Deserialize)]
+#[non_exhaustive]
+pub enum GoogleConfigKey {
+    /// Path to the service account file
+    ///
+    /// Supported keys:
+    /// - `google_service_account`
+    /// - `service_account`
+    /// - `google_service_account_path`
+    /// - `service_account_path`
+    ServiceAccount,
+
+    /// The serialized service account key.
+    ///
+    /// Supported keys:
+    /// - `google_service_account_key`
+    /// - `service_account_key`
+    ServiceAccountKey,
+
+    /// Bucket name
+    ///
+    /// See [`GoogleCloudStorageBuilder::with_bucket_name`] for details.
+    ///
+    /// Supported keys:
+    /// - `google_bucket`
+    /// - `google_bucket_name`
+    /// - `bucket`
+    /// - `bucket_name`
+    Bucket,
+
+    /// Application credentials path
+    ///
+    /// See [`GoogleCloudStorageBuilder::with_application_credentials`].
+    ApplicationCredentials,
+
+    /// Client options
+    Client(ClientConfigKey),
+}
+
+impl AsRef<str> for GoogleConfigKey {
+    fn as_ref(&self) -> &str {
+        match self {
+            Self::ServiceAccount => "google_service_account",
+            Self::ServiceAccountKey => "google_service_account_key",
+            Self::Bucket => "google_bucket",
+            Self::ApplicationCredentials => "google_application_credentials",
+            Self::Client(key) => key.as_ref(),
+        }
+    }
+}
+
+impl FromStr for GoogleConfigKey {
+    type Err = crate::Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "google_service_account"
+            | "service_account"
+            | "google_service_account_path"
+            | "service_account_path" => Ok(Self::ServiceAccount),
+            "google_service_account_key" | "service_account_key" => Ok(Self::ServiceAccountKey),
+            "google_bucket" | "google_bucket_name" | "bucket" | "bucket_name" => Ok(Self::Bucket),
+            "google_application_credentials" => Ok(Self::ApplicationCredentials),
+            _ => match s.parse() {
+                Ok(key) => Ok(Self::Client(key)),
+                Err(_) => Err(Error::UnknownConfigurationKey { key: s.into() }.into()),
+            },
+        }
+    }
+}
+
+impl Default for GoogleCloudStorageBuilder {
+    fn default() -> Self {
+        Self {
+            bucket_name: None,
+            service_account_path: None,
+            service_account_key: None,
+            application_credentials_path: None,
+            retry_config: Default::default(),
+            client_options: ClientOptions::new().with_allow_http(true),
+            url: None,
+            credentials: None,
+        }
+    }
+}
+
+impl GoogleCloudStorageBuilder {
+    /// Create a new [`GoogleCloudStorageBuilder`] with default values.
+    pub fn new() -> Self {
+        Default::default()
+    }
+
+    /// Create an instance of [`GoogleCloudStorageBuilder`] with values pre-populated from environment variables.
+    ///
+    /// Variables extracted from environment:
+    /// * GOOGLE_SERVICE_ACCOUNT: location of service account file
+    /// * GOOGLE_SERVICE_ACCOUNT_PATH: (alias) location of service account file
+    /// * SERVICE_ACCOUNT: (alias) location of service account file
+    /// * GOOGLE_SERVICE_ACCOUNT_KEY: JSON serialized service account key
+    /// * GOOGLE_BUCKET: bucket name
+    /// * GOOGLE_BUCKET_NAME: (alias) bucket name
+    ///
+    /// # Example
+    /// ```
+    /// use object_store::gcp::GoogleCloudStorageBuilder;
+    ///
+    /// let gcs = GoogleCloudStorageBuilder::from_env()
+    ///     .with_bucket_name("foo")
+    ///     .build();
+    /// ```
+    pub fn from_env() -> Self {
+        let mut builder = Self::default();
+
+        if let Ok(service_account_path) = std::env::var("SERVICE_ACCOUNT") {
+            builder.service_account_path = Some(service_account_path);
+        }
+
+        for (os_key, os_value) in std::env::vars_os() {
+            if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) {
+                if key.starts_with("GOOGLE_") {
+                    if let Ok(config_key) = key.to_ascii_lowercase().parse() {
+                        builder = builder.with_config(config_key, value);
+                    }
+                }
+            }
+        }
+
+        builder
+    }
+
+    /// Parse available connection info form a well-known storage URL.
+    ///
+    /// The supported url schemes are:
+    ///
+    /// - `gs://<bucket>/<path>`
+    ///
+    /// Note: Settings derived from the URL will override any others set on this builder
+    ///
+    /// # Example
+    /// ```
+    /// use object_store::gcp::GoogleCloudStorageBuilder;
+    ///
+    /// let gcs = GoogleCloudStorageBuilder::from_env()
+    ///     .with_url("gs://bucket/path")
+    ///     .build();
+    /// ```
+    pub fn with_url(mut self, url: impl Into<String>) -> Self {
+        self.url = Some(url.into());
+        self
+    }
+
+    /// Set an option on the builder via a key - value pair.
+    pub fn with_config(mut self, key: GoogleConfigKey, value: impl Into<String>) -> Self {
+        match key {
+            GoogleConfigKey::ServiceAccount => self.service_account_path = Some(value.into()),
+            GoogleConfigKey::ServiceAccountKey => self.service_account_key = Some(value.into()),
+            GoogleConfigKey::Bucket => self.bucket_name = Some(value.into()),
+            GoogleConfigKey::ApplicationCredentials => {
+                self.application_credentials_path = Some(value.into())
+            }
+            GoogleConfigKey::Client(key) => {
+                self.client_options = self.client_options.with_config(key, value)
+            }
+        };
+        self
+    }
+
+    /// Set an option on the builder via a key - value pair.
+    #[deprecated(note = "Use with_config")]
+    pub fn try_with_option(self, key: impl AsRef<str>, value: impl Into<String>) -> Result<Self> {
+        Ok(self.with_config(key.as_ref().parse()?, value))
+    }
+
+    /// Hydrate builder from key value pairs
+    #[deprecated(note = "Use with_config")]
+    #[allow(deprecated)]
+    pub fn try_with_options<I: IntoIterator<Item = (impl AsRef<str>, impl Into<String>)>>(
+        mut self,
+        options: I,
+    ) -> Result<Self> {
+        for (key, value) in options {
+            self = self.try_with_option(key, value)?;
+        }
+        Ok(self)
+    }
+
+    /// Get config value via a [`GoogleConfigKey`].
+    ///
+    /// # Example
+    /// ```
+    /// use object_store::gcp::{GoogleCloudStorageBuilder, GoogleConfigKey};
+    ///
+    /// let builder = GoogleCloudStorageBuilder::from_env()
+    ///     .with_service_account_key("foo");
+    /// let service_account_key = builder.get_config_value(&GoogleConfigKey::ServiceAccountKey).unwrap_or_default();
+    /// assert_eq!("foo", &service_account_key);
+    /// ```
+    pub fn get_config_value(&self, key: &GoogleConfigKey) -> Option<String> {
+        match key {
+            GoogleConfigKey::ServiceAccount => self.service_account_path.clone(),
+            GoogleConfigKey::ServiceAccountKey => self.service_account_key.clone(),
+            GoogleConfigKey::Bucket => self.bucket_name.clone(),
+            GoogleConfigKey::ApplicationCredentials => self.application_credentials_path.clone(),
+            GoogleConfigKey::Client(key) => self.client_options.get_config_value(key),
+        }
+    }
+
+    /// Sets properties on this builder based on a URL
+    ///
+    /// This is a separate member function to allow fallible computation to
+    /// be deferred until [`Self::build`] which in turn allows deriving [`Clone`]
+    fn parse_url(&mut self, url: &str) -> Result<()> {
+        let parsed = Url::parse(url).context(UnableToParseUrlSnafu { url })?;
+        let host = parsed.host_str().context(UrlNotRecognisedSnafu { url })?;
+
+        match parsed.scheme() {
+            "gs" => self.bucket_name = Some(host.to_string()),
+            scheme => return Err(UnknownUrlSchemeSnafu { scheme }.build().into()),
+        }
+        Ok(())
+    }
+
+    /// Set the bucket name (required)
+    pub fn with_bucket_name(mut self, bucket_name: impl Into<String>) -> Self {
+        self.bucket_name = Some(bucket_name.into());
+        self
+    }
+
+    /// Set the path to the service account file.
+    ///
+    /// This or [`GoogleCloudStorageBuilder::with_service_account_key`] must be
+    /// set.
+    ///
+    /// Example `"/tmp/gcs.json"`.
+    ///
+    /// Example contents of `gcs.json`:
+    ///
+    /// ```json
+    /// {
+    ///    "gcs_base_url": "https://localhost:4443",
+    ///    "disable_oauth": true,
+    ///    "client_email": "",
+    ///    "private_key": ""
+    /// }
+    /// ```
+    pub fn with_service_account_path(mut self, service_account_path: impl Into<String>) -> Self {
+        self.service_account_path = Some(service_account_path.into());
+        self
+    }
+
+    /// Set the service account key. The service account must be in the JSON
+    /// format.
+    ///
+    /// This or [`GoogleCloudStorageBuilder::with_service_account_path`] must be
+    /// set.
+    pub fn with_service_account_key(mut self, service_account: impl Into<String>) -> Self {
+        self.service_account_key = Some(service_account.into());
+        self
+    }
+
+    /// Set the path to the application credentials file.
+    ///
+    /// <https://cloud.google.com/docs/authentication/provide-credentials-adc>
+    pub fn with_application_credentials(
+        mut self,
+        application_credentials_path: impl Into<String>,
+    ) -> Self {
+        self.application_credentials_path = Some(application_credentials_path.into());
+        self
+    }
+
+    /// Set the credential provider overriding any other options
+    pub fn with_credentials(mut self, credentials: GcpCredentialProvider) -> Self {
+        self.credentials = Some(credentials);
+        self
+    }
+
+    /// Set the retry configuration
+    pub fn with_retry(mut self, retry_config: RetryConfig) -> Self {
+        self.retry_config = retry_config;
+        self
+    }
+
+    /// Set the proxy_url to be used by the underlying client
+    pub fn with_proxy_url(mut self, proxy_url: impl Into<String>) -> Self {
+        self.client_options = self.client_options.with_proxy_url(proxy_url);
+        self
+    }
+
+    /// Set a trusted proxy CA certificate
+    pub fn with_proxy_ca_certificate(mut self, proxy_ca_certificate: impl Into<String>) -> Self {
+        self.client_options = self
+            .client_options
+            .with_proxy_ca_certificate(proxy_ca_certificate);
+        self
+    }
+
+    /// Set a list of hosts to exclude from proxy connections
+    pub fn with_proxy_excludes(mut self, proxy_excludes: impl Into<String>) -> Self {
+        self.client_options = self.client_options.with_proxy_excludes(proxy_excludes);
+        self
+    }
+
+    /// Sets the client options, overriding any already set
+    pub fn with_client_options(mut self, options: ClientOptions) -> Self {
+        self.client_options = options;
+        self
+    }
+
+    /// Configure a connection to Google Cloud Storage, returning a
+    /// new [`GoogleCloudStorage`] and consuming `self`
+    pub fn build(mut self) -> Result<GoogleCloudStorage> {
+        if let Some(url) = self.url.take() {
+            self.parse_url(&url)?;
+        }
+
+        let bucket_name = self.bucket_name.ok_or(Error::MissingBucketName {})?;
+
+        // First try to initialize from the service account information.
+        let service_account_credentials =
+            match (self.service_account_path, self.service_account_key) {
+                (Some(path), None) => {
+                    Some(ServiceAccountCredentials::from_file(path).context(CredentialSnafu)?)
+                }
+                (None, Some(key)) => {
+                    Some(ServiceAccountCredentials::from_key(&key).context(CredentialSnafu)?)
+                }
+                (None, None) => None,
+                (Some(_), Some(_)) => return Err(Error::ServiceAccountPathAndKeyProvided.into()),
+            };
+
+        // Then try to initialize from the application credentials file, or the environment.
+        let application_default_credentials =
+            ApplicationDefaultCredentials::read(self.application_credentials_path.as_deref())?;
+
+        let disable_oauth = service_account_credentials
+            .as_ref()
+            .map(|c| c.disable_oauth)
+            .unwrap_or(false);
+
+        let gcs_base_url: String = service_account_credentials
+            .as_ref()
+            .and_then(|c| c.gcs_base_url.clone())
+            .unwrap_or_else(|| DEFAULT_GCS_BASE_URL.to_string());
+
+        let credentials = if let Some(credentials) = self.credentials {
+            credentials
+        } else if disable_oauth {
+            Arc::new(StaticCredentialProvider::new(GcpCredential {
+                bearer: "".to_string(),
+            })) as _
+        } else if let Some(credentials) = service_account_credentials {
+            Arc::new(TokenCredentialProvider::new(
+                credentials.token_provider()?,
+                self.client_options.client()?,
+                self.retry_config.clone(),
+            )) as _
+        } else if let Some(credentials) = application_default_credentials {
+            match credentials {
+                ApplicationDefaultCredentials::AuthorizedUser(token) => {
+                    Arc::new(TokenCredentialProvider::new(
+                        token,
+                        self.client_options.client()?,
+                        self.retry_config.clone(),
+                    )) as _
+                }
+                ApplicationDefaultCredentials::ServiceAccount(token) => {
+                    Arc::new(TokenCredentialProvider::new(
+                        token.token_provider()?,
+                        self.client_options.client()?,
+                        self.retry_config.clone(),
+                    )) as _
+                }
+            }
+        } else {
+            Arc::new(TokenCredentialProvider::new(
+                InstanceCredentialProvider::default(),
+                self.client_options.metadata_client()?,
+                self.retry_config.clone(),
+            )) as _
+        };
+
+        let config = GoogleCloudStorageConfig {
+            base_url: gcs_base_url,
+            credentials,
+            bucket_name,
+            retry_config: self.retry_config,
+            client_options: self.client_options,
+        };
+
+        Ok(GoogleCloudStorage {
+            client: Arc::new(GoogleCloudStorageClient::new(config)?),
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::collections::HashMap;
+    use std::io::Write;
+    use tempfile::NamedTempFile;
+
+    const FAKE_KEY: &str = r#"{"private_key": "private_key", "private_key_id": "private_key_id", "client_email":"client_email", "disable_oauth":true}"#;
+
+    #[test]
+    fn gcs_test_service_account_key_and_path() {
+        let mut tfile = NamedTempFile::new().unwrap();
+        write!(tfile, "{FAKE_KEY}").unwrap();
+        let _ = GoogleCloudStorageBuilder::new()
+            .with_service_account_key(FAKE_KEY)
+            .with_service_account_path(tfile.path().to_str().unwrap())
+            .with_bucket_name("foo")
+            .build()
+            .unwrap_err();
+    }
+
+    #[test]
+    fn gcs_test_config_from_map() {
+        let google_service_account = "object_store:fake_service_account".to_string();
+        let google_bucket_name = "object_store:fake_bucket".to_string();
+        let options = HashMap::from([
+            ("google_service_account", google_service_account.clone()),
+            ("google_bucket_name", google_bucket_name.clone()),
+        ]);
+
+        let builder = options
+            .iter()
+            .fold(GoogleCloudStorageBuilder::new(), |builder, (key, value)| {
+                builder.with_config(key.parse().unwrap(), value)
+            });
+
+        assert_eq!(
+            builder.service_account_path.unwrap(),
+            google_service_account.as_str()
+        );
+        assert_eq!(builder.bucket_name.unwrap(), google_bucket_name.as_str());
+    }
+
+    #[test]
+    fn gcs_test_config_aliases() {
+        // Service account path
+        for alias in [
+            "google_service_account",
+            "service_account",
+            "google_service_account_path",
+            "service_account_path",
+        ] {
+            let builder = GoogleCloudStorageBuilder::new()
+                .with_config(alias.parse().unwrap(), "/fake/path.json");
+            assert_eq!("/fake/path.json", builder.service_account_path.unwrap());
+        }
+
+        // Service account key
+        for alias in ["google_service_account_key", "service_account_key"] {
+            let builder =
+                GoogleCloudStorageBuilder::new().with_config(alias.parse().unwrap(), FAKE_KEY);
+            assert_eq!(FAKE_KEY, builder.service_account_key.unwrap());
+        }
+
+        // Bucket name
+        for alias in [
+            "google_bucket",
+            "google_bucket_name",
+            "bucket",
+            "bucket_name",
+        ] {
+            let builder =
+                GoogleCloudStorageBuilder::new().with_config(alias.parse().unwrap(), "fake_bucket");
+            assert_eq!("fake_bucket", builder.bucket_name.unwrap());
+        }
+    }
+
+    #[tokio::test]
+    async fn gcs_test_proxy_url() {
+        let mut tfile = NamedTempFile::new().unwrap();
+        write!(tfile, "{FAKE_KEY}").unwrap();
+        let service_account_path = tfile.path();
+        let gcs = GoogleCloudStorageBuilder::new()
+            .with_service_account_path(service_account_path.to_str().unwrap())
+            .with_bucket_name("foo")
+            .with_proxy_url("https://example.com")
+            .build();
+        assert!(gcs.is_ok());
+
+        let err = GoogleCloudStorageBuilder::new()
+            .with_service_account_path(service_account_path.to_str().unwrap())
+            .with_bucket_name("foo")
+            .with_proxy_url("asdf://example.com")
+            .build()
+            .unwrap_err()
+            .to_string();
+
+        assert_eq!(
+            "Generic HTTP client error: builder error: unknown proxy scheme",
+            err
+        );
+    }
+
+    #[test]
+    fn gcs_test_urls() {
+        let mut builder = GoogleCloudStorageBuilder::new();
+        builder.parse_url("gs://bucket/path").unwrap();
+        assert_eq!(builder.bucket_name.as_deref(), Some("bucket"));
+
+        builder.parse_url("gs://bucket.mydomain/path").unwrap();
+        assert_eq!(builder.bucket_name.as_deref(), Some("bucket.mydomain"));
+
+        builder.parse_url("mailto://bucket/path").unwrap_err();
+    }
+
+    #[test]
+    fn gcs_test_service_account_key_only() {
+        let _ = GoogleCloudStorageBuilder::new()
+            .with_service_account_key(FAKE_KEY)
+            .with_bucket_name("foo")
+            .build()
+            .unwrap();
+    }
+
+    #[test]
+    fn gcs_test_config_get_value() {
+        let google_service_account = "object_store:fake_service_account".to_string();
+        let google_bucket_name = "object_store:fake_bucket".to_string();
+        let builder = GoogleCloudStorageBuilder::new()
+            .with_config(GoogleConfigKey::ServiceAccount, &google_service_account)
+            .with_config(GoogleConfigKey::Bucket, &google_bucket_name);
+
+        assert_eq!(
+            builder
+                .get_config_value(&GoogleConfigKey::ServiceAccount)
+                .unwrap(),
+            google_service_account
+        );
+        assert_eq!(
+            builder.get_config_value(&GoogleConfigKey::Bucket).unwrap(),
+            google_bucket_name
+        );
+    }
+}
diff --git a/object_store/src/gcp/client.rs b/object_store/src/gcp/client.rs
new file mode 100644
index 000000000000..e4b0f9af7d15
--- /dev/null
+++ b/object_store/src/gcp/client.rs
@@ -0,0 +1,518 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::client::get::GetClient;
+use crate::client::header::{get_put_result, get_version, HeaderConfig};
+use crate::client::list::ListClient;
+use crate::client::retry::RetryExt;
+use crate::client::s3::{
+    CompleteMultipartUpload, CompleteMultipartUploadResult, InitiateMultipartUploadResult,
+    ListResponse,
+};
+use crate::client::GetOptionsExt;
+use crate::gcp::{GcpCredential, GcpCredentialProvider, STORE};
+use crate::multipart::PartId;
+use crate::path::{Path, DELIMITER};
+use crate::{
+    ClientOptions, GetOptions, ListResult, MultipartId, PutMode, PutOptions, PutResult, Result,
+    RetryConfig,
+};
+use async_trait::async_trait;
+use bytes::{Buf, Bytes};
+use percent_encoding::{percent_encode, utf8_percent_encode, NON_ALPHANUMERIC};
+use reqwest::header::HeaderName;
+use reqwest::{header, Client, Method, RequestBuilder, Response, StatusCode};
+use serde::Serialize;
+use snafu::{OptionExt, ResultExt, Snafu};
+use std::sync::Arc;
+
+const VERSION_HEADER: &str = "x-goog-generation";
+
+static VERSION_MATCH: HeaderName = HeaderName::from_static("x-goog-if-generation-match");
+
+#[derive(Debug, Snafu)]
+enum Error {
+    #[snafu(display("Error performing list request: {}", source))]
+    ListRequest { source: crate::client::retry::Error },
+
+    #[snafu(display("Error getting list response body: {}", source))]
+    ListResponseBody { source: reqwest::Error },
+
+    #[snafu(display("Got invalid list response: {}", source))]
+    InvalidListResponse { source: quick_xml::de::DeError },
+
+    #[snafu(display("Error performing get request {}: {}", path, source))]
+    GetRequest {
+        source: crate::client::retry::Error,
+        path: String,
+    },
+
+    #[snafu(display("Error performing delete request {}: {}", path, source))]
+    DeleteRequest {
+        source: crate::client::retry::Error,
+        path: String,
+    },
+
+    #[snafu(display("Error performing put request {}: {}", path, source))]
+    PutRequest {
+        source: crate::client::retry::Error,
+        path: String,
+    },
+
+    #[snafu(display("Error getting put response body: {}", source))]
+    PutResponseBody { source: reqwest::Error },
+
+    #[snafu(display("Got invalid put response: {}", source))]
+    InvalidPutResponse { source: quick_xml::de::DeError },
+
+    #[snafu(display("Error performing post request {}: {}", path, source))]
+    PostRequest {
+        source: crate::client::retry::Error,
+        path: String,
+    },
+
+    #[snafu(display("Unable to extract metadata from headers: {}", source))]
+    Metadata {
+        source: crate::client::header::Error,
+    },
+
+    #[snafu(display("Version required for conditional update"))]
+    MissingVersion,
+
+    #[snafu(display("Error performing complete multipart request: {}", source))]
+    CompleteMultipartRequest { source: crate::client::retry::Error },
+
+    #[snafu(display("Error getting complete multipart response body: {}", source))]
+    CompleteMultipartResponseBody { source: reqwest::Error },
+
+    #[snafu(display("Got invalid multipart response: {}", source))]
+    InvalidMultipartResponse { source: quick_xml::de::DeError },
+}
+
+impl From<Error> for crate::Error {
+    fn from(err: Error) -> Self {
+        match err {
+            Error::GetRequest { source, path }
+            | Error::DeleteRequest { source, path }
+            | Error::PutRequest { source, path } => source.error(STORE, path),
+            _ => Self::Generic {
+                store: STORE,
+                source: Box::new(err),
+            },
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct GoogleCloudStorageConfig {
+    pub base_url: String,
+
+    pub credentials: GcpCredentialProvider,
+
+    pub bucket_name: String,
+
+    pub retry_config: RetryConfig,
+
+    pub client_options: ClientOptions,
+}
+
+/// A builder for a put request allowing customisation of the headers and query string
+pub struct PutRequest<'a> {
+    path: &'a Path,
+    config: &'a GoogleCloudStorageConfig,
+    builder: RequestBuilder,
+}
+
+impl<'a> PutRequest<'a> {
+    fn header(self, k: &HeaderName, v: &str) -> Self {
+        let builder = self.builder.header(k, v);
+        Self { builder, ..self }
+    }
+
+    fn query<T: Serialize + ?Sized + Sync>(self, query: &T) -> Self {
+        let builder = self.builder.query(query);
+        Self { builder, ..self }
+    }
+
+    async fn send(self) -> Result<PutResult> {
+        let credential = self.config.credentials.get_credential().await?;
+        let response = self
+            .builder
+            .bearer_auth(&credential.bearer)
+            .send_retry(&self.config.retry_config)
+            .await
+            .context(PutRequestSnafu {
+                path: self.path.as_ref(),
+            })?;
+
+        Ok(get_put_result(response.headers(), VERSION_HEADER).context(MetadataSnafu)?)
+    }
+}
+
+#[derive(Debug)]
+pub struct GoogleCloudStorageClient {
+    config: GoogleCloudStorageConfig,
+
+    client: Client,
+
+    bucket_name_encoded: String,
+
+    // TODO: Hook this up in tests
+    max_list_results: Option<String>,
+}
+
+impl GoogleCloudStorageClient {
+    pub fn new(config: GoogleCloudStorageConfig) -> Result<Self> {
+        let client = config.client_options.client()?;
+        let bucket_name_encoded =
+            percent_encode(config.bucket_name.as_bytes(), NON_ALPHANUMERIC).to_string();
+
+        Ok(Self {
+            config,
+            client,
+            bucket_name_encoded,
+            max_list_results: None,
+        })
+    }
+
+    pub fn config(&self) -> &GoogleCloudStorageConfig {
+        &self.config
+    }
+
+    async fn get_credential(&self) -> Result<Arc<GcpCredential>> {
+        self.config.credentials.get_credential().await
+    }
+
+    pub fn object_url(&self, path: &Path) -> String {
+        let encoded = utf8_percent_encode(path.as_ref(), NON_ALPHANUMERIC);
+        format!(
+            "{}/{}/{}",
+            self.config.base_url, self.bucket_name_encoded, encoded
+        )
+    }
+
+    /// Perform a put request <https://cloud.google.com/storage/docs/xml-api/put-object-upload>
+    ///
+    /// Returns the new ETag
+    pub fn put_request<'a>(&'a self, path: &'a Path, payload: Bytes) -> PutRequest<'a> {
+        let url = self.object_url(path);
+
+        let content_type = self
+            .config
+            .client_options
+            .get_content_type(path)
+            .unwrap_or("application/octet-stream");
+
+        let builder = self
+            .client
+            .request(Method::PUT, url)
+            .header(header::CONTENT_TYPE, content_type)
+            .header(header::CONTENT_LENGTH, payload.len())
+            .body(payload);
+
+        PutRequest {
+            path,
+            builder,
+            config: &self.config,
+        }
+    }
+
+    pub async fn put(&self, path: &Path, data: Bytes, opts: PutOptions) -> Result<PutResult> {
+        let builder = self.put_request(path, data);
+
+        let builder = match &opts.mode {
+            PutMode::Overwrite => builder,
+            PutMode::Create => builder.header(&VERSION_MATCH, "0"),
+            PutMode::Update(v) => {
+                let etag = v.version.as_ref().context(MissingVersionSnafu)?;
+                builder.header(&VERSION_MATCH, etag)
+            }
+        };
+
+        match (opts.mode, builder.send().await) {
+            (PutMode::Create, Err(crate::Error::Precondition { path, source })) => {
+                Err(crate::Error::AlreadyExists { path, source })
+            }
+            (_, r) => r,
+        }
+    }
+
+    /// Perform a put part request <https://cloud.google.com/storage/docs/xml-api/put-object-multipart>
+    ///
+    /// Returns the new [`PartId`]
+    pub async fn put_part(
+        &self,
+        path: &Path,
+        upload_id: &MultipartId,
+        part_idx: usize,
+        data: Bytes,
+    ) -> Result<PartId> {
+        let query = &[
+            ("partNumber", &format!("{}", part_idx + 1)),
+            ("uploadId", upload_id),
+        ];
+        let result = self.put_request(path, data).query(query).send().await?;
+
+        Ok(PartId {
+            content_id: result.e_tag.unwrap(),
+        })
+    }
+
+    /// Initiate a multi-part upload <https://cloud.google.com/storage/docs/xml-api/post-object-multipart>
+    pub async fn multipart_initiate(&self, path: &Path) -> Result<MultipartId> {
+        let credential = self.get_credential().await?;
+        let url = self.object_url(path);
+
+        let content_type = self
+            .config
+            .client_options
+            .get_content_type(path)
+            .unwrap_or("application/octet-stream");
+
+        let response = self
+            .client
+            .request(Method::POST, &url)
+            .bearer_auth(&credential.bearer)
+            .header(header::CONTENT_TYPE, content_type)
+            .header(header::CONTENT_LENGTH, "0")
+            .query(&[("uploads", "")])
+            .send_retry(&self.config.retry_config)
+            .await
+            .context(PutRequestSnafu {
+                path: path.as_ref(),
+            })?;
+
+        let data = response.bytes().await.context(PutResponseBodySnafu)?;
+        let result: InitiateMultipartUploadResult =
+            quick_xml::de::from_reader(data.as_ref().reader()).context(InvalidPutResponseSnafu)?;
+
+        Ok(result.upload_id)
+    }
+
+    /// Cleanup unused parts <https://cloud.google.com/storage/docs/xml-api/delete-multipart>
+    pub async fn multipart_cleanup(&self, path: &Path, multipart_id: &MultipartId) -> Result<()> {
+        let credential = self.get_credential().await?;
+        let url = self.object_url(path);
+
+        self.client
+            .request(Method::DELETE, &url)
+            .bearer_auth(&credential.bearer)
+            .header(header::CONTENT_TYPE, "application/octet-stream")
+            .header(header::CONTENT_LENGTH, "0")
+            .query(&[("uploadId", multipart_id)])
+            .send_retry(&self.config.retry_config)
+            .await
+            .context(PutRequestSnafu {
+                path: path.as_ref(),
+            })?;
+
+        Ok(())
+    }
+
+    pub async fn multipart_complete(
+        &self,
+        path: &Path,
+        multipart_id: &MultipartId,
+        completed_parts: Vec<PartId>,
+    ) -> Result<PutResult> {
+        let upload_id = multipart_id.clone();
+        let url = self.object_url(path);
+
+        let upload_info = CompleteMultipartUpload::from(completed_parts);
+        let credential = self.get_credential().await?;
+
+        let data = quick_xml::se::to_string(&upload_info)
+            .context(InvalidPutResponseSnafu)?
+            // We cannot disable the escaping that transforms "/" to "&quote;" :(
+            // https://github.com/tafia/quick-xml/issues/362
+            // https://github.com/tafia/quick-xml/issues/350
+            .replace("&quot;", "\"");
+
+        let response = self
+            .client
+            .request(Method::POST, &url)
+            .bearer_auth(&credential.bearer)
+            .query(&[("uploadId", upload_id)])
+            .body(data)
+            .send_retry(&self.config.retry_config)
+            .await
+            .context(CompleteMultipartRequestSnafu)?;
+
+        let version = get_version(response.headers(), VERSION_HEADER).context(MetadataSnafu)?;
+
+        let data = response
+            .bytes()
+            .await
+            .context(CompleteMultipartResponseBodySnafu)?;
+
+        let response: CompleteMultipartUploadResult =
+            quick_xml::de::from_reader(data.reader()).context(InvalidMultipartResponseSnafu)?;
+
+        Ok(PutResult {
+            e_tag: Some(response.e_tag),
+            version,
+        })
+    }
+
+    /// Perform a delete request <https://cloud.google.com/storage/docs/xml-api/delete-object>
+    pub async fn delete_request(&self, path: &Path) -> Result<()> {
+        let credential = self.get_credential().await?;
+        let url = self.object_url(path);
+
+        let builder = self.client.request(Method::DELETE, url);
+        builder
+            .bearer_auth(&credential.bearer)
+            .send_retry(&self.config.retry_config)
+            .await
+            .context(DeleteRequestSnafu {
+                path: path.as_ref(),
+            })?;
+
+        Ok(())
+    }
+
+    /// Perform a copy request <https://cloud.google.com/storage/docs/xml-api/put-object-copy>
+    pub async fn copy_request(&self, from: &Path, to: &Path, if_not_exists: bool) -> Result<()> {
+        let credential = self.get_credential().await?;
+        let url = self.object_url(to);
+
+        let from = utf8_percent_encode(from.as_ref(), NON_ALPHANUMERIC);
+        let source = format!("{}/{}", self.bucket_name_encoded, from);
+
+        let mut builder = self
+            .client
+            .request(Method::PUT, url)
+            .header("x-goog-copy-source", source);
+
+        if if_not_exists {
+            builder = builder.header(&VERSION_MATCH, 0);
+        }
+
+        builder
+            .bearer_auth(&credential.bearer)
+            // Needed if reqwest is compiled with native-tls instead of rustls-tls
+            // See https://github.com/apache/arrow-rs/pull/3921
+            .header(header::CONTENT_LENGTH, 0)
+            .send_retry(&self.config.retry_config)
+            .await
+            .map_err(|err| match err.status() {
+                Some(StatusCode::PRECONDITION_FAILED) => crate::Error::AlreadyExists {
+                    source: Box::new(err),
+                    path: to.to_string(),
+                },
+                _ => err.error(STORE, from.to_string()),
+            })?;
+
+        Ok(())
+    }
+}
+
+#[async_trait]
+impl GetClient for GoogleCloudStorageClient {
+    const STORE: &'static str = STORE;
+    const HEADER_CONFIG: HeaderConfig = HeaderConfig {
+        etag_required: true,
+        last_modified_required: true,
+        version_header: Some(VERSION_HEADER),
+    };
+
+    /// Perform a get request <https://cloud.google.com/storage/docs/xml-api/get-object-download>
+    async fn get_request(&self, path: &Path, options: GetOptions) -> Result<Response> {
+        let credential = self.get_credential().await?;
+        let url = self.object_url(path);
+
+        let method = match options.head {
+            true => Method::HEAD,
+            false => Method::GET,
+        };
+
+        let mut request = self.client.request(method, url);
+
+        if let Some(version) = &options.version {
+            request = request.query(&[("generation", version)]);
+        }
+
+        if !credential.bearer.is_empty() {
+            request = request.bearer_auth(&credential.bearer);
+        }
+
+        let response = request
+            .with_get_options(options)
+            .send_retry(&self.config.retry_config)
+            .await
+            .context(GetRequestSnafu {
+                path: path.as_ref(),
+            })?;
+
+        Ok(response)
+    }
+}
+
+#[async_trait]
+impl ListClient for GoogleCloudStorageClient {
+    /// Perform a list request <https://cloud.google.com/storage/docs/xml-api/get-bucket-list>
+    async fn list_request(
+        &self,
+        prefix: Option<&str>,
+        delimiter: bool,
+        page_token: Option<&str>,
+        offset: Option<&str>,
+    ) -> Result<(ListResult, Option<String>)> {
+        let credential = self.get_credential().await?;
+        let url = format!("{}/{}", self.config.base_url, self.bucket_name_encoded);
+
+        let mut query = Vec::with_capacity(5);
+        query.push(("list-type", "2"));
+        if delimiter {
+            query.push(("delimiter", DELIMITER))
+        }
+
+        if let Some(prefix) = &prefix {
+            query.push(("prefix", prefix))
+        }
+
+        if let Some(page_token) = page_token {
+            query.push(("continuation-token", page_token))
+        }
+
+        if let Some(max_results) = &self.max_list_results {
+            query.push(("max-keys", max_results))
+        }
+
+        if let Some(offset) = offset {
+            query.push(("start-after", offset))
+        }
+
+        let response = self
+            .client
+            .request(Method::GET, url)
+            .query(&query)
+            .bearer_auth(&credential.bearer)
+            .send_retry(&self.config.retry_config)
+            .await
+            .context(ListRequestSnafu)?
+            .bytes()
+            .await
+            .context(ListResponseBodySnafu)?;
+
+        let mut response: ListResponse =
+            quick_xml::de::from_reader(response.reader()).context(InvalidListResponseSnafu)?;
+
+        let token = response.next_continuation_token.take();
+        Ok((response.try_into()?, token))
+    }
+}
diff --git a/object_store/src/gcp/credential.rs b/object_store/src/gcp/credential.rs
index 205b805947cc..29c7b4563ad5 100644
--- a/object_store/src/gcp/credential.rs
+++ b/object_store/src/gcp/credential.rs
@@ -17,10 +17,8 @@
 
 use crate::client::retry::RetryExt;
 use crate::client::token::TemporaryToken;
-use crate::client::{TokenCredentialProvider, TokenProvider};
-use crate::gcp::credential::Error::UnsupportedCredentialsType;
-use crate::gcp::{GcpCredentialProvider, STORE};
-use crate::ClientOptions;
+use crate::client::TokenProvider;
+use crate::gcp::STORE;
 use crate::RetryConfig;
 use async_trait::async_trait;
 use base64::prelude::BASE64_URL_SAFE_NO_PAD;
@@ -28,6 +26,7 @@ use base64::Engine;
 use futures::TryFutureExt;
 use reqwest::{Client, Method};
 use ring::signature::RsaKeyPair;
+use serde::Deserialize;
 use snafu::{ResultExt, Snafu};
 use std::env;
 use std::fs::File;
@@ -37,6 +36,10 @@ use std::sync::Arc;
 use std::time::{Duration, Instant};
 use tracing::info;
 
+pub const DEFAULT_SCOPE: &str = "https://www.googleapis.com/auth/devstorage.full_control";
+
+pub const DEFAULT_GCS_BASE_URL: &str = "https://storage.googleapis.com";
+
 #[derive(Debug, Snafu)]
 pub enum Error {
     #[snafu(display("Unable to open service account file from {}: {}", path.display(), source))]
@@ -68,9 +71,6 @@ pub enum Error {
 
     #[snafu(display("Error getting token response body: {}", source))]
     TokenResponseBody { source: reqwest::Error },
-
-    #[snafu(display("Unsupported ApplicationCredentials type: {}", type_))]
-    UnsupportedCredentialsType { type_: String },
 }
 
 impl From<Error> for crate::Error {
@@ -92,48 +92,48 @@ pub struct GcpCredential {
 pub type Result<T, E = Error> = std::result::Result<T, E>;
 
 #[derive(Debug, Default, serde::Serialize)]
-pub struct JwtHeader {
+pub struct JwtHeader<'a> {
     /// The type of JWS: it can only be "JWT" here
     ///
     /// Defined in [RFC7515#4.1.9](https://tools.ietf.org/html/rfc7515#section-4.1.9).
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub typ: Option<String>,
+    pub typ: Option<&'a str>,
     /// The algorithm used
     ///
     /// Defined in [RFC7515#4.1.1](https://tools.ietf.org/html/rfc7515#section-4.1.1).
-    pub alg: String,
+    pub alg: &'a str,
     /// Content type
     ///
     /// Defined in [RFC7519#5.2](https://tools.ietf.org/html/rfc7519#section-5.2).
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub cty: Option<String>,
+    pub cty: Option<&'a str>,
     /// JSON Key URL
     ///
     /// Defined in [RFC7515#4.1.2](https://tools.ietf.org/html/rfc7515#section-4.1.2).
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub jku: Option<String>,
+    pub jku: Option<&'a str>,
     /// Key ID
     ///
     /// Defined in [RFC7515#4.1.4](https://tools.ietf.org/html/rfc7515#section-4.1.4).
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub kid: Option<String>,
+    pub kid: Option<&'a str>,
     /// X.509 URL
     ///
     /// Defined in [RFC7515#4.1.5](https://tools.ietf.org/html/rfc7515#section-4.1.5).
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub x5u: Option<String>,
+    pub x5u: Option<&'a str>,
     /// X.509 certificate thumbprint
     ///
     /// Defined in [RFC7515#4.1.7](https://tools.ietf.org/html/rfc7515#section-4.1.7).
     #[serde(skip_serializing_if = "Option::is_none")]
-    pub x5t: Option<String>,
+    pub x5t: Option<&'a str>,
 }
 
 #[derive(serde::Serialize)]
 struct TokenClaims<'a> {
     iss: &'a str,
+    sub: &'a str,
     scope: &'a str,
-    aud: &'a str,
     exp: u64,
     iat: u64,
 }
@@ -144,28 +144,32 @@ struct TokenResponse {
     expires_in: u64,
 }
 
-/// Encapsulates the logic to perform an OAuth token challenge
+/// Self-signed JWT (JSON Web Token).
+///
+/// # References
+/// - <https://google.aip.dev/auth/4111>
 #[derive(Debug)]
-pub struct OAuthProvider {
+pub struct SelfSignedJwt {
     issuer: String,
     scope: String,
-    audience: String,
     key_pair: RsaKeyPair,
     jwt_header: String,
     random: ring::rand::SystemRandom,
 }
 
-impl OAuthProvider {
-    /// Create a new [`OAuthProvider`]
+impl SelfSignedJwt {
+    /// Create a new [`SelfSignedJwt`]
     pub fn new(
+        key_id: String,
         issuer: String,
         private_key_pem: String,
         scope: String,
-        audience: String,
     ) -> Result<Self> {
         let key_pair = decode_first_rsa_key(private_key_pem)?;
         let jwt_header = b64_encode_obj(&JwtHeader {
-            alg: "RS256".to_string(),
+            alg: "RS256",
+            typ: Some("JWT"),
+            kid: Some(&key_id),
             ..Default::default()
         })?;
 
@@ -173,7 +177,6 @@ impl OAuthProvider {
             issuer,
             key_pair,
             scope,
-            audience,
             jwt_header,
             random: ring::rand::SystemRandom::new(),
         })
@@ -181,29 +184,29 @@ impl OAuthProvider {
 }
 
 #[async_trait]
-impl TokenProvider for OAuthProvider {
+impl TokenProvider for SelfSignedJwt {
     type Credential = GcpCredential;
 
     /// Fetch a fresh token
     async fn fetch_token(
         &self,
-        client: &Client,
-        retry: &RetryConfig,
+        _client: &Client,
+        _retry: &RetryConfig,
     ) -> crate::Result<TemporaryToken<Arc<GcpCredential>>> {
         let now = seconds_since_epoch();
         let exp = now + 3600;
 
         let claims = TokenClaims {
             iss: &self.issuer,
+            sub: &self.issuer,
             scope: &self.scope,
-            aud: &self.audience,
-            exp,
             iat: now,
+            exp,
         };
 
         let claim_str = b64_encode_obj(&claims)?;
         let message = [self.jwt_header.as_ref(), claim_str.as_ref()].join(".");
-        let mut sig_bytes = vec![0; self.key_pair.public_modulus_len()];
+        let mut sig_bytes = vec![0; self.key_pair.public().modulus_len()];
         self.key_pair
             .sign(
                 &ring::signature::RSA_PKCS1_SHA256,
@@ -214,35 +217,16 @@ impl TokenProvider for OAuthProvider {
             .context(SignSnafu)?;
 
         let signature = BASE64_URL_SAFE_NO_PAD.encode(sig_bytes);
-        let jwt = [message, signature].join(".");
-
-        let body = [
-            ("grant_type", "urn:ietf:params:oauth:grant-type:jwt-bearer"),
-            ("assertion", &jwt),
-        ];
-
-        let response: TokenResponse = client
-            .request(Method::POST, &self.audience)
-            .form(&body)
-            .send_retry(retry)
-            .await
-            .context(TokenRequestSnafu)?
-            .json()
-            .await
-            .context(TokenResponseBodySnafu)?;
+        let bearer = [message, signature].join(".");
 
         Ok(TemporaryToken {
-            token: Arc::new(GcpCredential {
-                bearer: response.access_token,
-            }),
-            expiry: Some(Instant::now() + Duration::from_secs(response.expires_in)),
+            token: Arc::new(GcpCredential { bearer }),
+            expiry: Some(Instant::now() + Duration::from_secs(3600)),
         })
     }
 }
 
-fn read_credentials_file<T>(
-    service_account_path: impl AsRef<std::path::Path>,
-) -> Result<T>
+fn read_credentials_file<T>(service_account_path: impl AsRef<std::path::Path>) -> Result<T>
 where
     T: serde::de::DeserializeOwned,
 {
@@ -259,29 +243,24 @@ pub struct ServiceAccountCredentials {
     /// The private key in RSA format.
     pub private_key: String,
 
+    /// The private key ID
+    pub private_key_id: String,
+
     /// The email address associated with the service account.
     pub client_email: String,
 
     /// Base URL for GCS
-    #[serde(default = "default_gcs_base_url")]
-    pub gcs_base_url: String,
+    #[serde(default)]
+    pub gcs_base_url: Option<String>,
 
     /// Disable oauth and use empty tokens.
-    #[serde(default = "default_disable_oauth")]
+    #[serde(default)]
     pub disable_oauth: bool,
 }
 
-pub fn default_gcs_base_url() -> String {
-    "https://storage.googleapis.com".to_owned()
-}
-
-pub fn default_disable_oauth() -> bool {
-    false
-}
-
 impl ServiceAccountCredentials {
     /// Create a new [`ServiceAccountCredentials`] from a file.
-    pub fn from_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
+    pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
         read_credentials_file(path)
     }
 
@@ -290,17 +269,20 @@ impl ServiceAccountCredentials {
         serde_json::from_str(key).context(DecodeCredentialsSnafu)
     }
 
-    /// Create an [`OAuthProvider`] from this credentials struct.
-    pub fn oauth_provider(
-        self,
-        scope: &str,
-        audience: &str,
-    ) -> crate::Result<OAuthProvider> {
-        Ok(OAuthProvider::new(
+    /// Create a [`SelfSignedJwt`] from this credentials struct.
+    ///
+    /// We use a scope of [`DEFAULT_SCOPE`] as opposed to an audience
+    /// as GCS appears to not support audience
+    ///
+    /// # References
+    /// - <https://stackoverflow.com/questions/63222450/service-account-authorization-without-oauth-can-we-get-file-from-google-cloud/71834557#71834557>
+    /// - <https://www.codejam.info/2022/05/google-cloud-service-account-authorization-without-oauth.html>
+    pub fn token_provider(self) -> crate::Result<SelfSignedJwt> {
+        Ok(SelfSignedJwt::new(
+            self.private_key_id,
             self.client_email,
             self.private_key,
-            scope.to_string(),
-            audience.to_string(),
+            DEFAULT_SCOPE.to_string(),
         )?)
     }
 }
@@ -337,33 +319,20 @@ fn b64_encode_obj<T: serde::Serialize>(obj: &T) -> Result<String> {
 ///
 /// <https://cloud.google.com/docs/authentication/get-id-token#metadata-server>
 #[derive(Debug, Default)]
-pub struct InstanceCredentialProvider {
-    audience: String,
-}
-
-impl InstanceCredentialProvider {
-    /// Create a new [`InstanceCredentialProvider`], we need to control the client in order to enable http access so save the options.
-    pub fn new<T: Into<String>>(audience: T) -> Self {
-        Self {
-            audience: audience.into(),
-        }
-    }
-}
+pub struct InstanceCredentialProvider {}
 
 /// Make a request to the metadata server to fetch a token, using a a given hostname.
 async fn make_metadata_request(
     client: &Client,
     hostname: &str,
     retry: &RetryConfig,
-    audience: &str,
 ) -> crate::Result<TokenResponse> {
-    let url = format!(
-        "http://{hostname}/computeMetadata/v1/instance/service-accounts/default/token"
-    );
+    let url =
+        format!("http://{hostname}/computeMetadata/v1/instance/service-accounts/default/token");
     let response: TokenResponse = client
         .request(Method::GET, url)
         .header("Metadata-Flavor", "Google")
-        .query(&[("audience", audience)])
+        .query(&[("audience", "https://www.googleapis.com/oauth2/v4/token")])
         .send_retry(retry)
         .await
         .context(TokenRequestSnafu)?
@@ -388,12 +357,9 @@ impl TokenProvider for InstanceCredentialProvider {
         const METADATA_HOST: &str = "metadata";
 
         info!("fetching token from metadata server");
-        let response =
-            make_metadata_request(client, METADATA_HOST, retry, &self.audience)
-                .or_else(|_| {
-                    make_metadata_request(client, METADATA_IP, retry, &self.audience)
-                })
-                .await?;
+        let response = make_metadata_request(client, METADATA_HOST, retry)
+            .or_else(|_| make_metadata_request(client, METADATA_IP, retry))
+            .await?;
         let token = TemporaryToken {
             token: Arc::new(GcpCredential {
                 bearer: response.access_token,
@@ -404,62 +370,35 @@ impl TokenProvider for InstanceCredentialProvider {
     }
 }
 
-/// ApplicationDefaultCredentials
-/// <https://google.aip.dev/auth/4110>
-pub fn application_default_credentials(
-    path: Option<&str>,
-    client: &ClientOptions,
-    retry: &RetryConfig,
-) -> crate::Result<Option<GcpCredentialProvider>> {
-    let file = match ApplicationDefaultCredentialsFile::read(path)? {
-        Some(x) => x,
-        None => return Ok(None),
-    };
-
-    match file.type_.as_str() {
-        // <https://google.aip.dev/auth/4113>
-        "authorized_user" => {
-            let token = AuthorizedUserCredentials {
-                client_id: file.client_id,
-                client_secret: file.client_secret,
-                refresh_token: file.refresh_token,
-            };
-
-            Ok(Some(Arc::new(TokenCredentialProvider::new(
-                token,
-                client.client()?,
-                retry.clone(),
-            ))))
-        }
-        type_ => Err(UnsupportedCredentialsType {
-            type_: type_.to_string(),
-        }
-        .into()),
-    }
-}
-
 /// A deserialized `application_default_credentials.json`-file.
-/// <https://cloud.google.com/docs/authentication/application-default-credentials#personal>
+///
+/// # References
+/// - <https://cloud.google.com/docs/authentication/application-default-credentials#personal>
+/// - <https://google.aip.dev/auth/4110>
 #[derive(serde::Deserialize)]
-struct ApplicationDefaultCredentialsFile {
-    #[serde(default)]
-    client_id: String,
-    #[serde(default)]
-    client_secret: String,
-    #[serde(default)]
-    refresh_token: String,
-    #[serde(rename = "type")]
-    type_: String,
+#[serde(tag = "type")]
+pub enum ApplicationDefaultCredentials {
+    /// Service Account.
+    ///
+    /// # References
+    /// - <https://google.aip.dev/auth/4112>
+    #[serde(rename = "service_account")]
+    ServiceAccount(ServiceAccountCredentials),
+    /// Authorized user via "gcloud CLI Integration".
+    ///
+    /// # References
+    /// - <https://google.aip.dev/auth/4113>
+    #[serde(rename = "authorized_user")]
+    AuthorizedUser(AuthorizedUserCredentials),
 }
 
-impl ApplicationDefaultCredentialsFile {
-    const CREDENTIALS_PATH: &'static str =
-        ".config/gcloud/application_default_credentials.json";
+impl ApplicationDefaultCredentials {
+    const CREDENTIALS_PATH: &'static str = ".config/gcloud/application_default_credentials.json";
 
     // Create a new application default credential in the following situations:
     //  1. a file is passed in and the type matches.
     //  2. without argument if the well-known configuration file is present.
-    fn read(path: Option<&str>) -> Result<Option<Self>, Error> {
+    pub fn read(path: Option<&str>) -> Result<Option<Self>, Error> {
         if let Some(path) = path {
             return read_credentials_file::<Self>(path).map(Some);
         }
@@ -478,8 +417,8 @@ impl ApplicationDefaultCredentialsFile {
 const DEFAULT_TOKEN_GCP_URI: &str = "https://accounts.google.com/o/oauth2/token";
 
 /// <https://google.aip.dev/auth/4113>
-#[derive(Debug)]
-struct AuthorizedUserCredentials {
+#[derive(Debug, Deserialize)]
+pub struct AuthorizedUserCredentials {
     client_id: String,
     client_secret: String,
     refresh_token: String,
diff --git a/object_store/src/gcp/mod.rs b/object_store/src/gcp/mod.rs
index 3f5bf629d180..11fa68310a2e 100644
--- a/object_store/src/gcp/mod.rs
+++ b/object_store/src/gcp/mod.rs
@@ -29,178 +29,35 @@
 //! to abort the upload and drop those unneeded parts. In addition, you may wish to
 //! consider implementing automatic clean up of unused parts that are older than one
 //! week.
-use std::str::FromStr;
 use std::sync::Arc;
 
-use async_trait::async_trait;
-use bytes::{Buf, Bytes};
-use futures::stream::BoxStream;
-use percent_encoding::{percent_encode, utf8_percent_encode, NON_ALPHANUMERIC};
-use reqwest::{header, Client, Method, Response, StatusCode};
-use serde::{Deserialize, Serialize};
-use snafu::{OptionExt, ResultExt, Snafu};
-use tokio::io::AsyncWrite;
-use url::Url;
-
-use crate::client::get::{GetClient, GetClientExt};
-use crate::client::list::{ListClient, ListClientExt};
-use crate::client::list_response::ListResponse;
-use crate::client::retry::RetryExt;
-use crate::client::{
-    ClientConfigKey, CredentialProvider, GetOptionsExt, StaticCredentialProvider,
-    TokenCredentialProvider,
-};
+use crate::client::CredentialProvider;
 use crate::{
     multipart::{PartId, PutPart, WriteMultiPart},
-    path::{Path, DELIMITER},
-    ClientOptions, GetOptions, GetResult, ListResult, MultipartId, ObjectMeta,
-    ObjectStore, Result, RetryConfig,
+    path::Path,
+    GetOptions, GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore, PutOptions, PutResult,
+    Result,
 };
+use async_trait::async_trait;
+use bytes::Bytes;
+use client::GoogleCloudStorageClient;
+use futures::stream::BoxStream;
+use tokio::io::AsyncWrite;
 
-use credential::{
-    application_default_credentials, default_gcs_base_url, InstanceCredentialProvider,
-    ServiceAccountCredentials,
-};
+use crate::client::get::GetClientExt;
+use crate::client::list::ListClientExt;
+use crate::multipart::MultiPartStore;
+pub use builder::{GoogleCloudStorageBuilder, GoogleConfigKey};
+pub use credential::GcpCredential;
 
+mod builder;
+mod client;
 mod credential;
 
 const STORE: &str = "GCS";
 
 /// [`CredentialProvider`] for [`GoogleCloudStorage`]
 pub type GcpCredentialProvider = Arc<dyn CredentialProvider<Credential = GcpCredential>>;
-pub use credential::GcpCredential;
-
-#[derive(Debug, Snafu)]
-enum Error {
-    #[snafu(display("Got invalid XML response for {} {}: {}", method, url, source))]
-    InvalidXMLResponse {
-        source: quick_xml::de::DeError,
-        method: String,
-        url: String,
-        data: Bytes,
-    },
-
-    #[snafu(display("Error performing list request: {}", source))]
-    ListRequest { source: crate::client::retry::Error },
-
-    #[snafu(display("Error getting list response body: {}", source))]
-    ListResponseBody { source: reqwest::Error },
-
-    #[snafu(display("Got invalid list response: {}", source))]
-    InvalidListResponse { source: quick_xml::de::DeError },
-
-    #[snafu(display("Error performing get request {}: {}", path, source))]
-    GetRequest {
-        source: crate::client::retry::Error,
-        path: String,
-    },
-
-    #[snafu(display("Error getting get response body {}: {}", path, source))]
-    GetResponseBody {
-        source: reqwest::Error,
-        path: String,
-    },
-
-    #[snafu(display("Error performing delete request {}: {}", path, source))]
-    DeleteRequest {
-        source: crate::client::retry::Error,
-        path: String,
-    },
-
-    #[snafu(display("Error performing put request {}: {}", path, source))]
-    PutRequest {
-        source: crate::client::retry::Error,
-        path: String,
-    },
-
-    #[snafu(display("Error getting put response body: {}", source))]
-    PutResponseBody { source: reqwest::Error },
-
-    #[snafu(display("Got invalid put response: {}", source))]
-    InvalidPutResponse { source: quick_xml::de::DeError },
-
-    #[snafu(display("Error performing post request {}: {}", path, source))]
-    PostRequest {
-        source: crate::client::retry::Error,
-        path: String,
-    },
-
-    #[snafu(display("Error decoding object size: {}", source))]
-    InvalidSize { source: std::num::ParseIntError },
-
-    #[snafu(display("Missing bucket name"))]
-    MissingBucketName {},
-
-    #[snafu(display(
-        "One of service account path or service account key may be provided."
-    ))]
-    ServiceAccountPathAndKeyProvided,
-
-    #[snafu(display("GCP credential error: {}", source))]
-    Credential { source: credential::Error },
-
-    #[snafu(display("Unable parse source url. Url: {}, Error: {}", url, source))]
-    UnableToParseUrl {
-        source: url::ParseError,
-        url: String,
-    },
-
-    #[snafu(display(
-        "Unknown url scheme cannot be parsed into storage location: {}",
-        scheme
-    ))]
-    UnknownUrlScheme { scheme: String },
-
-    #[snafu(display("URL did not match any known pattern for scheme: {}", url))]
-    UrlNotRecognised { url: String },
-
-    #[snafu(display("Configuration key: '{}' is not known.", key))]
-    UnknownConfigurationKey { key: String },
-
-    #[snafu(display("ETag Header missing from response"))]
-    MissingEtag,
-
-    #[snafu(display("Received header containing non-ASCII data"))]
-    BadHeader { source: header::ToStrError },
-}
-
-impl From<Error> for super::Error {
-    fn from(err: Error) -> Self {
-        match err {
-            Error::GetRequest { source, path }
-            | Error::DeleteRequest { source, path }
-            | Error::PutRequest { source, path } => source.error(STORE, path),
-            Error::UnknownConfigurationKey { key } => {
-                Self::UnknownConfigurationKey { store: STORE, key }
-            }
-            _ => Self::Generic {
-                store: STORE,
-                source: Box::new(err),
-            },
-        }
-    }
-}
-
-#[derive(serde::Deserialize, Debug)]
-#[serde(rename_all = "PascalCase")]
-struct InitiateMultipartUploadResult {
-    upload_id: String,
-}
-
-#[derive(serde::Serialize, Debug)]
-#[serde(rename_all = "PascalCase", rename(serialize = "Part"))]
-struct MultipartPart {
-    #[serde(rename = "PartNumber")]
-    part_number: usize,
-    e_tag: String,
-}
-
-#[derive(serde::Serialize, Debug)]
-#[serde(rename_all = "PascalCase")]
-struct CompleteMultipartUpload {
-    #[serde(rename = "Part", default)]
-    parts: Vec<MultipartPart>,
-}
 
 /// Interface for [Google Cloud Storage](https://cloud.google.com/storage/).
 #[derive(Debug)]
@@ -210,273 +67,24 @@ pub struct GoogleCloudStorage {
 
 impl std::fmt::Display for GoogleCloudStorage {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "GoogleCloudStorage({})", self.client.bucket_name)
+        write!(
+            f,
+            "GoogleCloudStorage({})",
+            self.client.config().bucket_name
+        )
     }
 }
 
 impl GoogleCloudStorage {
     /// Returns the [`GcpCredentialProvider`] used by [`GoogleCloudStorage`]
     pub fn credentials(&self) -> &GcpCredentialProvider {
-        &self.client.credentials
-    }
-}
-
-#[derive(Debug)]
-struct GoogleCloudStorageClient {
-    client: Client,
-    base_url: String,
-
-    credentials: GcpCredentialProvider,
-
-    bucket_name: String,
-    bucket_name_encoded: String,
-
-    retry_config: RetryConfig,
-    client_options: ClientOptions,
-
-    // TODO: Hook this up in tests
-    max_list_results: Option<String>,
-}
-
-impl GoogleCloudStorageClient {
-    async fn get_credential(&self) -> Result<Arc<GcpCredential>> {
-        self.credentials.get_credential().await
-    }
-
-    fn object_url(&self, path: &Path) -> String {
-        let encoded = utf8_percent_encode(path.as_ref(), NON_ALPHANUMERIC);
-        format!("{}/{}/{}", self.base_url, self.bucket_name_encoded, encoded)
-    }
-
-    /// Perform a put request <https://cloud.google.com/storage/docs/xml-api/put-object-upload>
-    async fn put_request(&self, path: &Path, payload: Bytes) -> Result<()> {
-        let credential = self.get_credential().await?;
-        let url = self.object_url(path);
-
-        let content_type = self
-            .client_options
-            .get_content_type(path)
-            .unwrap_or("application/octet-stream");
-
-        self.client
-            .request(Method::PUT, url)
-            .bearer_auth(&credential.bearer)
-            .header(header::CONTENT_TYPE, content_type)
-            .header(header::CONTENT_LENGTH, payload.len())
-            .body(payload)
-            .send_retry(&self.retry_config)
-            .await
-            .context(PutRequestSnafu {
-                path: path.as_ref(),
-            })?;
-
-        Ok(())
-    }
-
-    /// Initiate a multi-part upload <https://cloud.google.com/storage/docs/xml-api/post-object-multipart>
-    async fn multipart_initiate(&self, path: &Path) -> Result<MultipartId> {
-        let credential = self.get_credential().await?;
-        let url = format!("{}/{}/{}", self.base_url, self.bucket_name_encoded, path);
-
-        let content_type = self
-            .client_options
-            .get_content_type(path)
-            .unwrap_or("application/octet-stream");
-
-        let response = self
-            .client
-            .request(Method::POST, &url)
-            .bearer_auth(&credential.bearer)
-            .header(header::CONTENT_TYPE, content_type)
-            .header(header::CONTENT_LENGTH, "0")
-            .query(&[("uploads", "")])
-            .send_retry(&self.retry_config)
-            .await
-            .context(PutRequestSnafu {
-                path: path.as_ref(),
-            })?;
-
-        let data = response.bytes().await.context(PutResponseBodySnafu)?;
-        let result: InitiateMultipartUploadResult =
-            quick_xml::de::from_reader(data.as_ref().reader())
-                .context(InvalidPutResponseSnafu)?;
-
-        Ok(result.upload_id)
-    }
-
-    /// Cleanup unused parts <https://cloud.google.com/storage/docs/xml-api/delete-multipart>
-    async fn multipart_cleanup(
-        &self,
-        path: &str,
-        multipart_id: &MultipartId,
-    ) -> Result<()> {
-        let credential = self.get_credential().await?;
-        let url = format!("{}/{}/{}", self.base_url, self.bucket_name_encoded, path);
-
-        self.client
-            .request(Method::DELETE, &url)
-            .bearer_auth(&credential.bearer)
-            .header(header::CONTENT_TYPE, "application/octet-stream")
-            .header(header::CONTENT_LENGTH, "0")
-            .query(&[("uploadId", multipart_id)])
-            .send_retry(&self.retry_config)
-            .await
-            .context(PutRequestSnafu { path })?;
-
-        Ok(())
-    }
-
-    /// Perform a delete request <https://cloud.google.com/storage/docs/xml-api/delete-object>
-    async fn delete_request(&self, path: &Path) -> Result<()> {
-        let credential = self.get_credential().await?;
-        let url = self.object_url(path);
-
-        let builder = self.client.request(Method::DELETE, url);
-        builder
-            .bearer_auth(&credential.bearer)
-            .send_retry(&self.retry_config)
-            .await
-            .context(DeleteRequestSnafu {
-                path: path.as_ref(),
-            })?;
-
-        Ok(())
-    }
-
-    /// Perform a copy request <https://cloud.google.com/storage/docs/xml-api/put-object-copy>
-    async fn copy_request(
-        &self,
-        from: &Path,
-        to: &Path,
-        if_not_exists: bool,
-    ) -> Result<()> {
-        let credential = self.get_credential().await?;
-        let url = self.object_url(to);
-
-        let from = utf8_percent_encode(from.as_ref(), NON_ALPHANUMERIC);
-        let source = format!("{}/{}", self.bucket_name_encoded, from);
-
-        let mut builder = self
-            .client
-            .request(Method::PUT, url)
-            .header("x-goog-copy-source", source);
-
-        if if_not_exists {
-            builder = builder.header("x-goog-if-generation-match", 0);
-        }
-
-        builder
-            .bearer_auth(&credential.bearer)
-            // Needed if reqwest is compiled with native-tls instead of rustls-tls
-            // See https://github.com/apache/arrow-rs/pull/3921
-            .header(header::CONTENT_LENGTH, 0)
-            .send_retry(&self.retry_config)
-            .await
-            .map_err(|err| match err.status() {
-                Some(StatusCode::PRECONDITION_FAILED) => crate::Error::AlreadyExists {
-                    source: Box::new(err),
-                    path: to.to_string(),
-                },
-                _ => err.error(STORE, from.to_string()),
-            })?;
-
-        Ok(())
-    }
-}
-
-#[async_trait]
-impl GetClient for GoogleCloudStorageClient {
-    const STORE: &'static str = STORE;
-
-    /// Perform a get request <https://cloud.google.com/storage/docs/xml-api/get-object-download>
-    async fn get_request(
-        &self,
-        path: &Path,
-        options: GetOptions,
-        head: bool,
-    ) -> Result<Response> {
-        let credential = self.get_credential().await?;
-        let url = self.object_url(path);
-
-        let method = match head {
-            true => Method::HEAD,
-            false => Method::GET,
-        };
-
-        let mut request = self.client.request(method, url).with_get_options(options);
-
-        if !credential.bearer.is_empty() {
-            request = request.bearer_auth(&credential.bearer);
-        }
-
-        let response =
-            request
-                .send_retry(&self.retry_config)
-                .await
-                .context(GetRequestSnafu {
-                    path: path.as_ref(),
-                })?;
-
-        Ok(response)
-    }
-}
-
-#[async_trait]
-impl ListClient for GoogleCloudStorageClient {
-    /// Perform a list request <https://cloud.google.com/storage/docs/xml-api/get-bucket-list>
-    async fn list_request(
-        &self,
-        prefix: Option<&str>,
-        delimiter: bool,
-        page_token: Option<&str>,
-        offset: Option<&str>,
-    ) -> Result<(ListResult, Option<String>)> {
-        assert!(offset.is_none()); // Not yet supported
-
-        let credential = self.get_credential().await?;
-        let url = format!("{}/{}", self.base_url, self.bucket_name_encoded);
-
-        let mut query = Vec::with_capacity(5);
-        query.push(("list-type", "2"));
-        if delimiter {
-            query.push(("delimiter", DELIMITER))
-        }
-
-        if let Some(prefix) = &prefix {
-            query.push(("prefix", prefix))
-        }
-
-        if let Some(page_token) = page_token {
-            query.push(("continuation-token", page_token))
-        }
-
-        if let Some(max_results) = &self.max_list_results {
-            query.push(("max-keys", max_results))
-        }
-
-        let response = self
-            .client
-            .request(Method::GET, url)
-            .query(&query)
-            .bearer_auth(&credential.bearer)
-            .send_retry(&self.retry_config)
-            .await
-            .context(ListRequestSnafu)?
-            .bytes()
-            .await
-            .context(ListResponseBodySnafu)?;
-
-        let mut response: ListResponse = quick_xml::de::from_reader(response.reader())
-            .context(InvalidListResponseSnafu)?;
-
-        let token = response.next_continuation_token.take();
-        Ok((response.try_into()?, token))
+        &self.client.config().credentials
     }
 }
 
 struct GCSMultipartUpload {
     client: Arc<GoogleCloudStorageClient>,
-    encoded_path: String,
+    path: Path,
     multipart_id: MultipartId,
 }
 
@@ -484,90 +92,24 @@ struct GCSMultipartUpload {
 impl PutPart for GCSMultipartUpload {
     /// Upload an object part <https://cloud.google.com/storage/docs/xml-api/put-object-multipart>
     async fn put_part(&self, buf: Vec<u8>, part_idx: usize) -> Result<PartId> {
-        let upload_id = self.multipart_id.clone();
-        let url = format!(
-            "{}/{}/{}",
-            self.client.base_url, self.client.bucket_name_encoded, self.encoded_path
-        );
-
-        let credential = self.client.get_credential().await?;
-
-        let response = self
-            .client
-            .client
-            .request(Method::PUT, &url)
-            .bearer_auth(&credential.bearer)
-            .query(&[
-                ("partNumber", format!("{}", part_idx + 1)),
-                ("uploadId", upload_id),
-            ])
-            .header(header::CONTENT_TYPE, "application/octet-stream")
-            .header(header::CONTENT_LENGTH, format!("{}", buf.len()))
-            .body(buf)
-            .send_retry(&self.client.retry_config)
+        self.client
+            .put_part(&self.path, &self.multipart_id, part_idx, buf.into())
             .await
-            .context(PutRequestSnafu {
-                path: &self.encoded_path,
-            })?;
-
-        let content_id = response
-            .headers()
-            .get("ETag")
-            .context(MissingEtagSnafu)?
-            .to_str()
-            .context(BadHeaderSnafu)?
-            .to_string();
-
-        Ok(PartId { content_id })
     }
 
     /// Complete a multipart upload <https://cloud.google.com/storage/docs/xml-api/post-object-complete>
     async fn complete(&self, completed_parts: Vec<PartId>) -> Result<()> {
-        let upload_id = self.multipart_id.clone();
-        let url = format!(
-            "{}/{}/{}",
-            self.client.base_url, self.client.bucket_name_encoded, self.encoded_path
-        );
-
-        let parts = completed_parts
-            .into_iter()
-            .enumerate()
-            .map(|(part_number, part)| MultipartPart {
-                e_tag: part.content_id,
-                part_number: part_number + 1,
-            })
-            .collect();
-
-        let credential = self.client.get_credential().await?;
-        let upload_info = CompleteMultipartUpload { parts };
-
-        let data = quick_xml::se::to_string(&upload_info)
-            .context(InvalidPutResponseSnafu)?
-            // We cannot disable the escaping that transforms "/" to "&quote;" :(
-            // https://github.com/tafia/quick-xml/issues/362
-            // https://github.com/tafia/quick-xml/issues/350
-            .replace("&quot;", "\"");
-
         self.client
-            .client
-            .request(Method::POST, &url)
-            .bearer_auth(&credential.bearer)
-            .query(&[("uploadId", upload_id)])
-            .body(data)
-            .send_retry(&self.client.retry_config)
-            .await
-            .context(PostRequestSnafu {
-                path: &self.encoded_path,
-            })?;
-
+            .multipart_complete(&self.path, &self.multipart_id, completed_parts)
+            .await?;
         Ok(())
     }
 }
 
 #[async_trait]
 impl ObjectStore for GoogleCloudStorage {
-    async fn put(&self, location: &Path, bytes: Bytes) -> Result<()> {
-        self.client.put_request(location, bytes).await
+    async fn put_opts(&self, location: &Path, bytes: Bytes, opts: PutOptions) -> Result<PutResult> {
+        self.client.put(location, bytes, opts).await
     }
 
     async fn put_multipart(
@@ -576,25 +118,18 @@ impl ObjectStore for GoogleCloudStorage {
     ) -> Result<(MultipartId, Box<dyn AsyncWrite + Unpin + Send>)> {
         let upload_id = self.client.multipart_initiate(location).await?;
 
-        let encoded_path =
-            percent_encode(location.to_string().as_bytes(), NON_ALPHANUMERIC).to_string();
-
         let inner = GCSMultipartUpload {
             client: Arc::clone(&self.client),
-            encoded_path,
+            path: location.clone(),
             multipart_id: upload_id.clone(),
         };
 
         Ok((upload_id, Box::new(WriteMultiPart::new(inner, 8))))
     }
 
-    async fn abort_multipart(
-        &self,
-        location: &Path,
-        multipart_id: &MultipartId,
-    ) -> Result<()> {
+    async fn abort_multipart(&self, location: &Path, multipart_id: &MultipartId) -> Result<()> {
         self.client
-            .multipart_cleanup(location.as_ref(), multipart_id)
+            .multipart_cleanup(location, multipart_id)
             .await?;
 
         Ok(())
@@ -604,19 +139,20 @@ impl ObjectStore for GoogleCloudStorage {
         self.client.get_opts(location, options).await
     }
 
-    async fn head(&self, location: &Path) -> Result<ObjectMeta> {
-        self.client.head(location).await
-    }
-
     async fn delete(&self, location: &Path) -> Result<()> {
         self.client.delete_request(location).await
     }
 
-    async fn list(
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+        self.client.list(prefix)
+    }
+
+    fn list_with_offset(
         &self,
         prefix: Option<&Path>,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
-        self.client.list(prefix).await
+        offset: &Path,
+    ) -> BoxStream<'_, Result<ObjectMeta>> {
+        self.client.list_with_offset(prefix, offset)
     }
 
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
@@ -632,489 +168,46 @@ impl ObjectStore for GoogleCloudStorage {
     }
 }
 
-/// Configure a connection to Google Cloud Storage using the specified
-/// credentials.
-///
-/// # Example
-/// ```
-/// # let BUCKET_NAME = "foo";
-/// # let SERVICE_ACCOUNT_PATH = "/tmp/foo.json";
-/// # use object_store::gcp::GoogleCloudStorageBuilder;
-/// let gcs = GoogleCloudStorageBuilder::new()
-///  .with_service_account_path(SERVICE_ACCOUNT_PATH)
-///  .with_bucket_name(BUCKET_NAME)
-///  .build();
-/// ```
-#[derive(Debug, Clone)]
-pub struct GoogleCloudStorageBuilder {
-    /// Bucket name
-    bucket_name: Option<String>,
-    /// Url
-    url: Option<String>,
-    /// Path to the service account file
-    service_account_path: Option<String>,
-    /// The serialized service account key
-    service_account_key: Option<String>,
-    /// Path to the application credentials file.
-    application_credentials_path: Option<String>,
-    /// Retry config
-    retry_config: RetryConfig,
-    /// Client options
-    client_options: ClientOptions,
-    /// Credentials
-    credentials: Option<GcpCredentialProvider>,
-}
-
-/// Configuration keys for [`GoogleCloudStorageBuilder`]
-///
-/// Configuration via keys can be done via [`GoogleCloudStorageBuilder::with_config`]
-///
-/// # Example
-/// ```
-/// # use object_store::gcp::{GoogleCloudStorageBuilder, GoogleConfigKey};
-/// let builder = GoogleCloudStorageBuilder::new()
-///     .with_config("google_service_account".parse().unwrap(), "my-service-account")
-///     .with_config(GoogleConfigKey::Bucket, "my-bucket");
-/// ```
-#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, Serialize, Deserialize)]
-#[non_exhaustive]
-pub enum GoogleConfigKey {
-    /// Path to the service account file
-    ///
-    /// Supported keys:
-    /// - `google_service_account`
-    /// - `service_account`
-    /// - `google_service_account_path`
-    /// - `service_account_path`
-    ServiceAccount,
-
-    /// The serialized service account key.
-    ///
-    /// Supported keys:
-    /// - `google_service_account_key`
-    /// - `service_account_key`
-    ServiceAccountKey,
-
-    /// Bucket name
-    ///
-    /// See [`GoogleCloudStorageBuilder::with_bucket_name`] for details.
-    ///
-    /// Supported keys:
-    /// - `google_bucket`
-    /// - `google_bucket_name`
-    /// - `bucket`
-    /// - `bucket_name`
-    Bucket,
-
-    /// Application credentials path
-    ///
-    /// See [`GoogleCloudStorageBuilder::with_application_credentials`].
-    ApplicationCredentials,
-
-    /// Client options
-    Client(ClientConfigKey),
-}
-
-impl AsRef<str> for GoogleConfigKey {
-    fn as_ref(&self) -> &str {
-        match self {
-            Self::ServiceAccount => "google_service_account",
-            Self::ServiceAccountKey => "google_service_account_key",
-            Self::Bucket => "google_bucket",
-            Self::ApplicationCredentials => "google_application_credentials",
-            Self::Client(key) => key.as_ref(),
-        }
-    }
-}
-
-impl FromStr for GoogleConfigKey {
-    type Err = super::Error;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s {
-            "google_service_account"
-            | "service_account"
-            | "google_service_account_path"
-            | "service_account_path" => Ok(Self::ServiceAccount),
-            "google_service_account_key" | "service_account_key" => {
-                Ok(Self::ServiceAccountKey)
-            }
-            "google_bucket" | "google_bucket_name" | "bucket" | "bucket_name" => {
-                Ok(Self::Bucket)
-            }
-            "google_application_credentials" => Ok(Self::ApplicationCredentials),
-            _ => match s.parse() {
-                Ok(key) => Ok(Self::Client(key)),
-                Err(_) => Err(Error::UnknownConfigurationKey { key: s.into() }.into()),
-            },
-        }
-    }
-}
-
-impl Default for GoogleCloudStorageBuilder {
-    fn default() -> Self {
-        Self {
-            bucket_name: None,
-            service_account_path: None,
-            service_account_key: None,
-            application_credentials_path: None,
-            retry_config: Default::default(),
-            client_options: ClientOptions::new().with_allow_http(true),
-            url: None,
-            credentials: None,
-        }
-    }
-}
-
-impl GoogleCloudStorageBuilder {
-    /// Create a new [`GoogleCloudStorageBuilder`] with default values.
-    pub fn new() -> Self {
-        Default::default()
-    }
-
-    /// Create an instance of [`GoogleCloudStorageBuilder`] with values pre-populated from environment variables.
-    ///
-    /// Variables extracted from environment:
-    /// * GOOGLE_SERVICE_ACCOUNT: location of service account file
-    /// * GOOGLE_SERVICE_ACCOUNT_PATH: (alias) location of service account file
-    /// * SERVICE_ACCOUNT: (alias) location of service account file
-    /// * GOOGLE_SERVICE_ACCOUNT_KEY: JSON serialized service account key
-    /// * GOOGLE_BUCKET: bucket name
-    /// * GOOGLE_BUCKET_NAME: (alias) bucket name
-    ///
-    /// # Example
-    /// ```
-    /// use object_store::gcp::GoogleCloudStorageBuilder;
-    ///
-    /// let gcs = GoogleCloudStorageBuilder::from_env()
-    ///     .with_bucket_name("foo")
-    ///     .build();
-    /// ```
-    pub fn from_env() -> Self {
-        let mut builder = Self::default();
-
-        if let Ok(service_account_path) = std::env::var("SERVICE_ACCOUNT") {
-            builder.service_account_path = Some(service_account_path);
-        }
-
-        for (os_key, os_value) in std::env::vars_os() {
-            if let (Some(key), Some(value)) = (os_key.to_str(), os_value.to_str()) {
-                if key.starts_with("GOOGLE_") {
-                    if let Ok(config_key) = key.to_ascii_lowercase().parse() {
-                        builder = builder.with_config(config_key, value);
-                    }
-                }
-            }
-        }
-
-        builder
-    }
-
-    /// Parse available connection info form a well-known storage URL.
-    ///
-    /// The supported url schemes are:
-    ///
-    /// - `gs://<bucket>/<path>`
-    ///
-    /// Note: Settings derived from the URL will override any others set on this builder
-    ///
-    /// # Example
-    /// ```
-    /// use object_store::gcp::GoogleCloudStorageBuilder;
-    ///
-    /// let gcs = GoogleCloudStorageBuilder::from_env()
-    ///     .with_url("gs://bucket/path")
-    ///     .build();
-    /// ```
-    pub fn with_url(mut self, url: impl Into<String>) -> Self {
-        self.url = Some(url.into());
-        self
-    }
-
-    /// Set an option on the builder via a key - value pair.
-    pub fn with_config(mut self, key: GoogleConfigKey, value: impl Into<String>) -> Self {
-        match key {
-            GoogleConfigKey::ServiceAccount => {
-                self.service_account_path = Some(value.into())
-            }
-            GoogleConfigKey::ServiceAccountKey => {
-                self.service_account_key = Some(value.into())
-            }
-            GoogleConfigKey::Bucket => self.bucket_name = Some(value.into()),
-            GoogleConfigKey::ApplicationCredentials => {
-                self.application_credentials_path = Some(value.into())
-            }
-            GoogleConfigKey::Client(key) => {
-                self.client_options = self.client_options.with_config(key, value)
-            }
-        };
-        self
-    }
-
-    /// Set an option on the builder via a key - value pair.
-    #[deprecated(note = "Use with_config")]
-    pub fn try_with_option(
-        self,
-        key: impl AsRef<str>,
-        value: impl Into<String>,
-    ) -> Result<Self> {
-        Ok(self.with_config(key.as_ref().parse()?, value))
-    }
-
-    /// Hydrate builder from key value pairs
-    #[deprecated(note = "Use with_config")]
-    #[allow(deprecated)]
-    pub fn try_with_options<
-        I: IntoIterator<Item = (impl AsRef<str>, impl Into<String>)>,
-    >(
-        mut self,
-        options: I,
-    ) -> Result<Self> {
-        for (key, value) in options {
-            self = self.try_with_option(key, value)?;
-        }
-        Ok(self)
-    }
-
-    /// Get config value via a [`GoogleConfigKey`].
-    ///
-    /// # Example
-    /// ```
-    /// use object_store::gcp::{GoogleCloudStorageBuilder, GoogleConfigKey};
-    ///
-    /// let builder = GoogleCloudStorageBuilder::from_env()
-    ///     .with_service_account_key("foo");
-    /// let service_account_key = builder.get_config_value(&GoogleConfigKey::ServiceAccountKey).unwrap_or_default();
-    /// assert_eq!("foo", &service_account_key);
-    /// ```
-    pub fn get_config_value(&self, key: &GoogleConfigKey) -> Option<String> {
-        match key {
-            GoogleConfigKey::ServiceAccount => self.service_account_path.clone(),
-            GoogleConfigKey::ServiceAccountKey => self.service_account_key.clone(),
-            GoogleConfigKey::Bucket => self.bucket_name.clone(),
-            GoogleConfigKey::ApplicationCredentials => {
-                self.application_credentials_path.clone()
-            }
-            GoogleConfigKey::Client(key) => self.client_options.get_config_value(key),
-        }
-    }
-
-    /// Sets properties on this builder based on a URL
-    ///
-    /// This is a separate member function to allow fallible computation to
-    /// be deferred until [`Self::build`] which in turn allows deriving [`Clone`]
-    fn parse_url(&mut self, url: &str) -> Result<()> {
-        let parsed = Url::parse(url).context(UnableToParseUrlSnafu { url })?;
-        let host = parsed.host_str().context(UrlNotRecognisedSnafu { url })?;
-
-        let validate = |s: &str| match s.contains('.') {
-            true => Err(UrlNotRecognisedSnafu { url }.build()),
-            false => Ok(s.to_string()),
-        };
-
-        match parsed.scheme() {
-            "gs" => self.bucket_name = Some(validate(host)?),
-            scheme => return Err(UnknownUrlSchemeSnafu { scheme }.build().into()),
-        }
-        Ok(())
-    }
-
-    /// Set the bucket name (required)
-    pub fn with_bucket_name(mut self, bucket_name: impl Into<String>) -> Self {
-        self.bucket_name = Some(bucket_name.into());
-        self
-    }
-
-    /// Set the path to the service account file.
-    ///
-    /// This or [`GoogleCloudStorageBuilder::with_service_account_key`] must be
-    /// set.
-    ///
-    /// Example `"/tmp/gcs.json"`.
-    ///
-    /// Example contents of `gcs.json`:
-    ///
-    /// ```json
-    /// {
-    ///    "gcs_base_url": "https://localhost:4443",
-    ///    "disable_oauth": true,
-    ///    "client_email": "",
-    ///    "private_key": ""
-    /// }
-    /// ```
-    pub fn with_service_account_path(
-        mut self,
-        service_account_path: impl Into<String>,
-    ) -> Self {
-        self.service_account_path = Some(service_account_path.into());
-        self
-    }
-
-    /// Set the service account key. The service account must be in the JSON
-    /// format.
-    ///
-    /// This or [`GoogleCloudStorageBuilder::with_service_account_path`] must be
-    /// set.
-    pub fn with_service_account_key(
-        mut self,
-        service_account: impl Into<String>,
-    ) -> Self {
-        self.service_account_key = Some(service_account.into());
-        self
-    }
-
-    /// Set the path to the application credentials file.
-    ///
-    /// <https://cloud.google.com/docs/authentication/provide-credentials-adc>
-    pub fn with_application_credentials(
-        mut self,
-        application_credentials_path: impl Into<String>,
-    ) -> Self {
-        self.application_credentials_path = Some(application_credentials_path.into());
-        self
-    }
-
-    /// Set the credential provider overriding any other options
-    pub fn with_credentials(mut self, credentials: GcpCredentialProvider) -> Self {
-        self.credentials = Some(credentials);
-        self
-    }
-
-    /// Set the retry configuration
-    pub fn with_retry(mut self, retry_config: RetryConfig) -> Self {
-        self.retry_config = retry_config;
-        self
-    }
-
-    /// Set the proxy_url to be used by the underlying client
-    pub fn with_proxy_url(mut self, proxy_url: impl Into<String>) -> Self {
-        self.client_options = self.client_options.with_proxy_url(proxy_url);
-        self
-    }
-
-    /// Set a trusted proxy CA certificate
-    pub fn with_proxy_ca_certificate(
-        mut self,
-        proxy_ca_certificate: impl Into<String>,
-    ) -> Self {
-        self.client_options = self
-            .client_options
-            .with_proxy_ca_certificate(proxy_ca_certificate);
-        self
+#[async_trait]
+impl MultiPartStore for GoogleCloudStorage {
+    async fn create_multipart(&self, path: &Path) -> Result<MultipartId> {
+        self.client.multipart_initiate(path).await
     }
 
-    /// Set a list of hosts to exclude from proxy connections
-    pub fn with_proxy_excludes(mut self, proxy_excludes: impl Into<String>) -> Self {
-        self.client_options = self.client_options.with_proxy_excludes(proxy_excludes);
-        self
+    async fn put_part(
+        &self,
+        path: &Path,
+        id: &MultipartId,
+        part_idx: usize,
+        data: Bytes,
+    ) -> Result<PartId> {
+        self.client.put_part(path, id, part_idx, data).await
     }
 
-    /// Sets the client options, overriding any already set
-    pub fn with_client_options(mut self, options: ClientOptions) -> Self {
-        self.client_options = options;
-        self
+    async fn complete_multipart(
+        &self,
+        path: &Path,
+        id: &MultipartId,
+        parts: Vec<PartId>,
+    ) -> Result<PutResult> {
+        self.client.multipart_complete(path, id, parts).await
     }
 
-    /// Configure a connection to Google Cloud Storage, returning a
-    /// new [`GoogleCloudStorage`] and consuming `self`
-    pub fn build(mut self) -> Result<GoogleCloudStorage> {
-        if let Some(url) = self.url.take() {
-            self.parse_url(&url)?;
-        }
-
-        let bucket_name = self.bucket_name.ok_or(Error::MissingBucketName {})?;
-
-        let client = self.client_options.client()?;
-
-        // First try to initialize from the service account information.
-        let service_account_credentials =
-            match (self.service_account_path, self.service_account_key) {
-                (Some(path), None) => Some(
-                    ServiceAccountCredentials::from_file(path)
-                        .context(CredentialSnafu)?,
-                ),
-                (None, Some(key)) => Some(
-                    ServiceAccountCredentials::from_key(&key).context(CredentialSnafu)?,
-                ),
-                (None, None) => None,
-                (Some(_), Some(_)) => {
-                    return Err(Error::ServiceAccountPathAndKeyProvided.into())
-                }
-            };
-
-        // Then try to initialize from the application credentials file, or the environment.
-        let application_default_credentials = application_default_credentials(
-            self.application_credentials_path.as_deref(),
-            &self.client_options,
-            &self.retry_config,
-        )?;
-
-        let disable_oauth = service_account_credentials
-            .as_ref()
-            .map(|c| c.disable_oauth)
-            .unwrap_or(false);
-
-        let gcs_base_url = service_account_credentials
-            .as_ref()
-            .map(|c| c.gcs_base_url.clone())
-            .unwrap_or_else(default_gcs_base_url);
-
-        // TODO: https://cloud.google.com/storage/docs/authentication#oauth-scopes
-        let scope = "https://www.googleapis.com/auth/devstorage.full_control";
-        let audience = "https://www.googleapis.com/oauth2/v4/token";
-
-        let credentials = if let Some(credentials) = self.credentials {
-            credentials
-        } else if disable_oauth {
-            Arc::new(StaticCredentialProvider::new(GcpCredential {
-                bearer: "".to_string(),
-            })) as _
-        } else if let Some(credentials) = service_account_credentials {
-            Arc::new(TokenCredentialProvider::new(
-                credentials.oauth_provider(scope, audience)?,
-                self.client_options.client()?,
-                self.retry_config.clone(),
-            )) as _
-        } else if let Some(credentials) = application_default_credentials {
-            credentials
-        } else {
-            Arc::new(TokenCredentialProvider::new(
-                InstanceCredentialProvider::new(audience),
-                self.client_options.clone().with_allow_http(true).client()?,
-                self.retry_config.clone(),
-            )) as _
-        };
-
-        let encoded_bucket_name =
-            percent_encode(bucket_name.as_bytes(), NON_ALPHANUMERIC).to_string();
-
-        Ok(GoogleCloudStorage {
-            client: Arc::new(GoogleCloudStorageClient {
-                client,
-                base_url: gcs_base_url,
-                credentials,
-                bucket_name,
-                bucket_name_encoded: encoded_bucket_name,
-                retry_config: self.retry_config,
-                client_options: self.client_options,
-                max_list_results: None,
-            }),
-        })
+    async fn abort_multipart(&self, path: &Path, id: &MultipartId) -> Result<()> {
+        self.client.multipart_cleanup(path, id).await
     }
 }
 
 #[cfg(test)]
 mod test {
+
     use bytes::Bytes;
-    use std::collections::HashMap;
-    use std::io::Write;
-    use tempfile::NamedTempFile;
+    use credential::DEFAULT_GCS_BASE_URL;
 
     use crate::tests::*;
 
     use super::*;
 
-    const FAKE_KEY: &str = r#"{"private_key": "private_key", "client_email":"client_email", "disable_oauth":true}"#;
     const NON_EXISTENT_NAME: &str = "nonexistentname";
 
     #[tokio::test]
@@ -1126,15 +219,17 @@ mod test {
         list_uses_directories_correctly(&integration).await;
         list_with_delimiter(&integration).await;
         rename_and_copy(&integration).await;
-        if integration.client.base_url == default_gcs_base_url() {
+        if integration.client.config().base_url == DEFAULT_GCS_BASE_URL {
             // Fake GCS server doesn't currently honor ifGenerationMatch
             // https://github.com/fsouza/fake-gcs-server/issues/994
             copy_if_not_exists(&integration).await;
             // Fake GCS server does not yet implement XML Multipart uploads
             // https://github.com/fsouza/fake-gcs-server/issues/852
             stream_get(&integration).await;
+            multipart(&integration, &integration).await;
             // Fake GCS server doesn't currently honor preconditions
             get_opts(&integration).await;
+            put_opts(&integration, true).await;
         }
     }
 
@@ -1215,145 +310,9 @@ mod test {
             .unwrap_err()
             .to_string();
         assert!(
-            err.contains("HTTP status client error (404 Not Found)"),
+            err.contains("Client error with status 404 Not Found"),
             "{}",
             err
         )
     }
-
-    #[tokio::test]
-    async fn gcs_test_proxy_url() {
-        let mut tfile = NamedTempFile::new().unwrap();
-        write!(tfile, "{FAKE_KEY}").unwrap();
-        let service_account_path = tfile.path();
-        let gcs = GoogleCloudStorageBuilder::new()
-            .with_service_account_path(service_account_path.to_str().unwrap())
-            .with_bucket_name("foo")
-            .with_proxy_url("https://example.com")
-            .build();
-        assert!(dbg!(gcs).is_ok());
-
-        let err = GoogleCloudStorageBuilder::new()
-            .with_service_account_path(service_account_path.to_str().unwrap())
-            .with_bucket_name("foo")
-            .with_proxy_url("asdf://example.com")
-            .build()
-            .unwrap_err()
-            .to_string();
-
-        assert_eq!(
-            "Generic HTTP client error: builder error: unknown proxy scheme",
-            err
-        );
-    }
-
-    #[test]
-    fn gcs_test_urls() {
-        let mut builder = GoogleCloudStorageBuilder::new();
-        builder.parse_url("gs://bucket/path").unwrap();
-        assert_eq!(builder.bucket_name, Some("bucket".to_string()));
-
-        let err_cases = ["mailto://bucket/path", "gs://bucket.mydomain/path"];
-        let mut builder = GoogleCloudStorageBuilder::new();
-        for case in err_cases {
-            builder.parse_url(case).unwrap_err();
-        }
-    }
-
-    #[test]
-    fn gcs_test_service_account_key_only() {
-        let _ = GoogleCloudStorageBuilder::new()
-            .with_service_account_key(FAKE_KEY)
-            .with_bucket_name("foo")
-            .build()
-            .unwrap();
-    }
-
-    #[test]
-    fn gcs_test_service_account_key_and_path() {
-        let mut tfile = NamedTempFile::new().unwrap();
-        write!(tfile, "{FAKE_KEY}").unwrap();
-        let _ = GoogleCloudStorageBuilder::new()
-            .with_service_account_key(FAKE_KEY)
-            .with_service_account_path(tfile.path().to_str().unwrap())
-            .with_bucket_name("foo")
-            .build()
-            .unwrap_err();
-    }
-
-    #[test]
-    fn gcs_test_config_from_map() {
-        let google_service_account = "object_store:fake_service_account".to_string();
-        let google_bucket_name = "object_store:fake_bucket".to_string();
-        let options = HashMap::from([
-            ("google_service_account", google_service_account.clone()),
-            ("google_bucket_name", google_bucket_name.clone()),
-        ]);
-
-        let builder = options
-            .iter()
-            .fold(GoogleCloudStorageBuilder::new(), |builder, (key, value)| {
-                builder.with_config(key.parse().unwrap(), value)
-            });
-
-        assert_eq!(
-            builder.service_account_path.unwrap(),
-            google_service_account.as_str()
-        );
-        assert_eq!(builder.bucket_name.unwrap(), google_bucket_name.as_str());
-    }
-
-    #[test]
-    fn gcs_test_config_get_value() {
-        let google_service_account = "object_store:fake_service_account".to_string();
-        let google_bucket_name = "object_store:fake_bucket".to_string();
-        let builder = GoogleCloudStorageBuilder::new()
-            .with_config(GoogleConfigKey::ServiceAccount, &google_service_account)
-            .with_config(GoogleConfigKey::Bucket, &google_bucket_name);
-
-        assert_eq!(
-            builder
-                .get_config_value(&GoogleConfigKey::ServiceAccount)
-                .unwrap(),
-            google_service_account
-        );
-        assert_eq!(
-            builder.get_config_value(&GoogleConfigKey::Bucket).unwrap(),
-            google_bucket_name
-        );
-    }
-
-    #[test]
-    fn gcs_test_config_aliases() {
-        // Service account path
-        for alias in [
-            "google_service_account",
-            "service_account",
-            "google_service_account_path",
-            "service_account_path",
-        ] {
-            let builder = GoogleCloudStorageBuilder::new()
-                .with_config(alias.parse().unwrap(), "/fake/path.json");
-            assert_eq!("/fake/path.json", builder.service_account_path.unwrap());
-        }
-
-        // Service account key
-        for alias in ["google_service_account_key", "service_account_key"] {
-            let builder = GoogleCloudStorageBuilder::new()
-                .with_config(alias.parse().unwrap(), FAKE_KEY);
-            assert_eq!(FAKE_KEY, builder.service_account_key.unwrap());
-        }
-
-        // Bucket name
-        for alias in [
-            "google_bucket",
-            "google_bucket_name",
-            "bucket",
-            "bucket_name",
-        ] {
-            let builder = GoogleCloudStorageBuilder::new()
-                .with_config(alias.parse().unwrap(), "fake_bucket");
-            assert_eq!("fake_bucket", builder.bucket_name.unwrap());
-        }
-    }
 }
diff --git a/object_store/src/http/client.rs b/object_store/src/http/client.rs
index 1d3df34db9d1..8700775fb243 100644
--- a/object_store/src/http/client.rs
+++ b/object_store/src/http/client.rs
@@ -15,11 +15,14 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use crate::client::get::GetClient;
+use crate::client::header::HeaderConfig;
 use crate::client::retry::{self, RetryConfig, RetryExt};
 use crate::client::GetOptionsExt;
 use crate::path::{Path, DELIMITER};
 use crate::util::deserialize_rfc1123;
 use crate::{ClientOptions, GetOptions, ObjectMeta, Result};
+use async_trait::async_trait;
 use bytes::{Buf, Bytes};
 use chrono::{DateTime, Utc};
 use percent_encoding::percent_decode_str;
@@ -37,6 +40,9 @@ enum Error {
     #[snafu(display("Request error: {}", source))]
     Reqwest { source: reqwest::Error },
 
+    #[snafu(display("Range request not supported by {}", href))]
+    RangeNotSupported { href: String },
+
     #[snafu(display("Error decoding PROPFIND response: {}", source))]
     InvalidPropFind { source: quick_xml::de::DeError },
 
@@ -84,11 +90,7 @@ pub struct Client {
 }
 
 impl Client {
-    pub fn new(
-        url: Url,
-        client_options: ClientOptions,
-        retry_config: RetryConfig,
-    ) -> Result<Self> {
+    pub fn new(url: Url, client_options: ClientOptions, retry_config: RetryConfig) -> Result<Self> {
         let client = client_options.client()?;
         Ok(Self {
             url,
@@ -154,7 +156,7 @@ impl Client {
         Ok(())
     }
 
-    pub async fn put(&self, location: &Path, bytes: Bytes) -> Result<()> {
+    pub async fn put(&self, location: &Path, bytes: Bytes) -> Result<Response> {
         let mut retry = false;
         loop {
             let url = self.path_url(location);
@@ -164,7 +166,7 @@ impl Client {
             }
 
             match builder.send_retry(&self.retry_config).await {
-                Ok(_) => return Ok(()),
+                Ok(response) => return Ok(response),
                 Err(source) => match source.status() {
                     // Some implementations return 404 instead of 409
                     Some(StatusCode::CONFLICT | StatusCode::NOT_FOUND) if !retry => {
@@ -177,11 +179,7 @@ impl Client {
         }
     }
 
-    pub async fn list(
-        &self,
-        location: Option<&Path>,
-        depth: &str,
-    ) -> Result<MultiStatus> {
+    pub async fn list(&self, location: Option<&Path>, depth: &str) -> Result<MultiStatus> {
         let url = location
             .map(|path| self.path_url(path))
             .unwrap_or_else(|| self.url.clone());
@@ -214,8 +212,7 @@ impl Client {
             Err(source) => return Err(Error::Request { source }.into()),
         };
 
-        let status = quick_xml::de::from_reader(response.reader())
-            .context(InvalidPropFindSnafu)?;
+        let status = quick_xml::de::from_reader(response.reader()).context(InvalidPropFindSnafu)?;
         Ok(status)
     }
 
@@ -235,11 +232,68 @@ impl Client {
         Ok(())
     }
 
-    pub async fn get(&self, location: &Path, options: GetOptions) -> Result<Response> {
-        let url = self.path_url(location);
-        let builder = self.client.get(url);
+    pub async fn copy(&self, from: &Path, to: &Path, overwrite: bool) -> Result<()> {
+        let mut retry = false;
+        loop {
+            let method = Method::from_bytes(b"COPY").unwrap();
+
+            let mut builder = self
+                .client
+                .request(method, self.path_url(from))
+                .header("Destination", self.path_url(to).as_str());
+
+            if !overwrite {
+                // While the Overwrite header appears to duplicate
+                // the functionality of the If-Match: * header of HTTP/1.1, If-Match
+                // applies only to the Request-URI, and not to the Destination of a COPY
+                // or MOVE.
+                builder = builder.header("Overwrite", "F");
+            }
+
+            return match builder.send_retry(&self.retry_config).await {
+                Ok(_) => Ok(()),
+                Err(source) => Err(match source.status() {
+                    Some(StatusCode::PRECONDITION_FAILED) if !overwrite => {
+                        crate::Error::AlreadyExists {
+                            path: to.to_string(),
+                            source: Box::new(source),
+                        }
+                    }
+                    // Some implementations return 404 instead of 409
+                    Some(StatusCode::CONFLICT | StatusCode::NOT_FOUND) if !retry => {
+                        retry = true;
+                        self.create_parent_directories(to).await?;
+                        continue;
+                    }
+                    _ => Error::Request { source }.into(),
+                }),
+            };
+        }
+    }
+}
 
-        builder
+#[async_trait]
+impl GetClient for Client {
+    const STORE: &'static str = "HTTP";
+
+    /// Override the [`HeaderConfig`] to be less strict to support a
+    /// broader range of HTTP servers (#4831)
+    const HEADER_CONFIG: HeaderConfig = HeaderConfig {
+        etag_required: false,
+        last_modified_required: false,
+        version_header: None,
+    };
+
+    async fn get_request(&self, path: &Path, options: GetOptions) -> Result<Response> {
+        let url = self.path_url(path);
+        let method = match options.head {
+            true => Method::HEAD,
+            false => Method::GET,
+        };
+        let has_range = options.range.is_some();
+        let builder = self.client.request(method, url);
+
+        let res = builder
             .with_get_options(options)
             .send_retry(&self.retry_config)
             .await
@@ -248,40 +302,23 @@ impl Client {
                 Some(StatusCode::NOT_FOUND | StatusCode::METHOD_NOT_ALLOWED) => {
                     crate::Error::NotFound {
                         source: Box::new(source),
-                        path: location.to_string(),
+                        path: path.to_string(),
                     }
                 }
                 _ => Error::Request { source }.into(),
-            })
-    }
-
-    pub async fn copy(&self, from: &Path, to: &Path, overwrite: bool) -> Result<()> {
-        let from = self.path_url(from);
-        let to = self.path_url(to);
-        let method = Method::from_bytes(b"COPY").unwrap();
-
-        let mut builder = self
-            .client
-            .request(method, from)
-            .header("Destination", to.as_str());
+            })?;
 
-        if !overwrite {
-            builder = builder.header("Overwrite", "F");
+        // We expect a 206 Partial Content response if a range was requested
+        // a 200 OK response would indicate the server did not fulfill the request
+        if has_range && res.status() != StatusCode::PARTIAL_CONTENT {
+            return Err(crate::Error::NotSupported {
+                source: Box::new(Error::RangeNotSupported {
+                    href: path.to_string(),
+                }),
+            });
         }
 
-        match builder.send_retry(&self.retry_config).await {
-            Ok(_) => Ok(()),
-            Err(e)
-                if !overwrite
-                    && matches!(e.status(), Some(StatusCode::PRECONDITION_FAILED)) =>
-            {
-                Err(crate::Error::AlreadyExists {
-                    path: to.to_string(),
-                    source: Box::new(e),
-                })
-            }
-            Err(source) => Err(Error::Request { source }.into()),
-        }
+        Ok(res)
     }
 }
 
@@ -343,6 +380,7 @@ impl MultiStatusResponse {
             last_modified,
             size: self.size()?,
             e_tag: self.prop_stat.prop.e_tag.clone(),
+            version: None,
         })
     }
 
diff --git a/object_store/src/http/mod.rs b/object_store/src/http/mod.rs
index e8e7b459e12f..cfcde27fd781 100644
--- a/object_store/src/http/mod.rs
+++ b/object_store/src/http/mod.rs
@@ -17,7 +17,7 @@
 
 //! An object store implementation for generic HTTP servers
 //!
-//! This follows [rfc2518] commonly known called [WebDAV]
+//! This follows [rfc2518] commonly known as [WebDAV]
 //!
 //! Basic get support will work out of the box with most HTTP servers,
 //! even those that don't explicitly support [rfc2518]
@@ -40,12 +40,13 @@ use snafu::{OptionExt, ResultExt, Snafu};
 use tokio::io::AsyncWrite;
 use url::Url;
 
-use crate::client::header::header_meta;
+use crate::client::get::GetClientExt;
+use crate::client::header::get_etag;
 use crate::http::client::Client;
 use crate::path::Path;
 use crate::{
-    ClientConfigKey, ClientOptions, GetOptions, GetResult, GetResultPayload, ListResult,
-    MultipartId, ObjectMeta, ObjectStore, Result, RetryConfig,
+    ClientConfigKey, ClientOptions, GetOptions, GetResult, ListResult, MultipartId, ObjectMeta,
+    ObjectStore, PutMode, PutOptions, PutResult, Result, RetryConfig,
 };
 
 mod client;
@@ -95,8 +96,23 @@ impl std::fmt::Display for HttpStore {
 
 #[async_trait]
 impl ObjectStore for HttpStore {
-    async fn put(&self, location: &Path, bytes: Bytes) -> Result<()> {
-        self.client.put(location, bytes).await
+    async fn put_opts(&self, location: &Path, bytes: Bytes, opts: PutOptions) -> Result<PutResult> {
+        if opts.mode != PutMode::Overwrite {
+            // TODO: Add support for If header - https://datatracker.ietf.org/doc/html/rfc2518#section-9.4
+            return Err(crate::Error::NotImplemented);
+        }
+
+        let response = self.client.put(location, bytes).await?;
+        let e_tag = match get_etag(response.headers()) {
+            Ok(e_tag) => Some(e_tag),
+            Err(crate::client::header::Error::MissingEtag) => None,
+            Err(source) => return Err(Error::Metadata { source }.into()),
+        };
+
+        Ok(PutResult {
+            e_tag,
+            version: None,
+        })
     }
 
     async fn put_multipart(
@@ -106,64 +122,25 @@ impl ObjectStore for HttpStore {
         Err(super::Error::NotImplemented)
     }
 
-    async fn abort_multipart(
-        &self,
-        _location: &Path,
-        _multipart_id: &MultipartId,
-    ) -> Result<()> {
+    async fn abort_multipart(&self, _location: &Path, _multipart_id: &MultipartId) -> Result<()> {
         Err(super::Error::NotImplemented)
     }
 
     async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult> {
-        let range = options.range.clone();
-        let response = self.client.get(location, options).await?;
-        let meta = header_meta(location, response.headers()).context(MetadataSnafu)?;
-
-        let stream = response
-            .bytes_stream()
-            .map_err(|source| Error::Reqwest { source }.into())
-            .boxed();
-
-        Ok(GetResult {
-            payload: GetResultPayload::Stream(stream),
-            range: range.unwrap_or(0..meta.size),
-            meta,
-        })
-    }
-
-    async fn head(&self, location: &Path) -> Result<ObjectMeta> {
-        let status = self.client.list(Some(location), "0").await?;
-        match status.response.len() {
-            1 => {
-                let response = status.response.into_iter().next().unwrap();
-                response.check_ok()?;
-                match response.is_dir() {
-                    true => Err(crate::Error::NotFound {
-                        path: location.to_string(),
-                        source: "Is directory".to_string().into(),
-                    }),
-                    false => response.object_meta(self.client.base_url()),
-                }
-            }
-            x => Err(crate::Error::NotFound {
-                path: location.to_string(),
-                source: format!("Expected 1 result, got {x}").into(),
-            }),
-        }
+        self.client.get_opts(location, options).await
     }
 
     async fn delete(&self, location: &Path) -> Result<()> {
         self.client.delete(location).await
     }
 
-    async fn list(
-        &self,
-        prefix: Option<&Path>,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
         let prefix_len = prefix.map(|p| p.as_ref().len()).unwrap_or_default();
-        let status = self.client.list(prefix, "infinity").await?;
-        Ok(futures::stream::iter(
-            status
+        let prefix = prefix.cloned();
+        futures::stream::once(async move {
+            let status = self.client.list(prefix.as_ref(), "infinity").await?;
+
+            let iter = status
                 .response
                 .into_iter()
                 .filter(|r| !r.is_dir())
@@ -172,9 +149,12 @@ impl ObjectStore for HttpStore {
                     response.object_meta(self.client.base_url())
                 })
                 // Filter out exact prefix matches
-                .filter_ok(move |r| r.location.as_ref().len() > prefix_len),
-        )
-        .boxed())
+                .filter_ok(move |r| r.location.as_ref().len() > prefix_len);
+
+            Ok::<_, crate::Error>(futures::stream::iter(iter))
+        })
+        .try_flatten()
+        .boxed()
     }
 
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index 7496b589cd8a..40dca8f756d2 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -36,15 +36,20 @@
 //! clouds and local test environments, via a simple runtime
 //! configuration change.
 //!
-//! # Features:
+//! # Highlights
 //!
-//! 1. A focused, easy to use, idiomatic, well documented, high
-//! performance, `async` API.
+//! 1. A high-performance async API focused on providing a consistent interface
+//! mirroring that of object stores such as [S3]
 //!
 //! 2. Production quality, leading this crate to be used in large
-//! scale production systems, such as [crates.io] and [InfluxDB IOx].
+//! scale production systems, such as [crates.io] and [InfluxDB IOx]
 //!
-//! 3. Stable and predictable governance via the [Apache Arrow] project.
+//! 3. Support for advanced functionality, including atomic, conditional reads
+//! and writes, vectored IO, bulk deletion, and more...
+//!
+//! 4. Stable and predictable governance via the [Apache Arrow] project
+//!
+//! 5. Small dependency footprint, depending on only a small number of common crates
 //!
 //! Originally developed for [InfluxDB IOx] and subsequently donated
 //! to [Apache Arrow].
@@ -52,27 +57,50 @@
 //! [Apache Arrow]: https://arrow.apache.org/
 //! [InfluxDB IOx]: https://github.com/influxdata/influxdb_iox/
 //! [crates.io]: https://github.com/rust-lang/crates.io
+//! [ACID]: https://en.wikipedia.org/wiki/ACID
+//! [S3]: https://aws.amazon.com/s3/
+//!
+//! # Available [`ObjectStore`] Implementations
+//!
+//! By default, this crate provides the following implementations:
+//!
+//! * Memory: [`InMemory`](memory::InMemory)
+//! * Local filesystem: [`LocalFileSystem`](local::LocalFileSystem)
 //!
-//! # Example: Create an [`ObjectStore`] implementation:
+//! Feature flags are used to enable support for other implementations:
 //!
 #![cfg_attr(
     feature = "gcp",
-    doc = "* [Google Cloud Storage](https://cloud.google.com/storage/): [`GoogleCloudStorageBuilder`](gcp::GoogleCloudStorageBuilder)"
+    doc = "* [`gcp`]: [Google Cloud Storage](https://cloud.google.com/storage/) support. See [`GoogleCloudStorageBuilder`](gcp::GoogleCloudStorageBuilder)"
 )]
 #![cfg_attr(
     feature = "aws",
-    doc = "* [Amazon S3](https://aws.amazon.com/s3/): [`AmazonS3Builder`](aws::AmazonS3Builder)"
+    doc = "* [`aws`]: [Amazon S3](https://aws.amazon.com/s3/). See [`AmazonS3Builder`](aws::AmazonS3Builder)"
 )]
 #![cfg_attr(
     feature = "azure",
-    doc = "* [Azure Blob Storage](https://azure.microsoft.com/en-gb/services/storage/blobs/): [`MicrosoftAzureBuilder`](azure::MicrosoftAzureBuilder)"
+    doc = "* [`azure`]: [Azure Blob Storage](https://azure.microsoft.com/en-gb/services/storage/blobs/). See [`MicrosoftAzureBuilder`](azure::MicrosoftAzureBuilder)"
 )]
 #![cfg_attr(
     feature = "http",
-    doc = "* [HTTP Storage](https://datatracker.ietf.org/doc/html/rfc2518): [`HttpBuilder`](http::HttpBuilder)"
+    doc = "* [`http`]: [HTTP/WebDAV Storage](https://datatracker.ietf.org/doc/html/rfc2518). See [`HttpBuilder`](http::HttpBuilder)"
 )]
-//! * In Memory: [`InMemory`](memory::InMemory)
-//! * Local filesystem: [`LocalFileSystem`](local::LocalFileSystem)
+//!
+//! # Why not a Filesystem Interface?
+//!
+//! Whilst this crate does provide a [`BufReader`], the [`ObjectStore`] interface mirrors the APIs
+//! of object stores and not filesystems, opting to provide stateless APIs instead of the cursor
+//! based interfaces such as [`Read`] or [`Seek`] favoured by filesystems.
+//!
+//! This provides some compelling advantages:
+//!
+//! * All operations are atomic, and readers cannot observe partial and/or failed writes
+//! * Methods map directly to object store APIs, providing both efficiency and predictability
+//! * Abstracts away filesystem and operating system specific quirks, ensuring portability
+//! * Allows for functionality not native to filesystems, such as operation preconditions
+//! and atomic multipart uploads
+//!
+//! [`BufReader`]: buffered::BufReader
 //!
 //! # Adapters
 //!
@@ -82,48 +110,74 @@
 //! * Rate Throttling: [`ThrottleConfig`](throttle::ThrottleConfig)
 //! * Concurrent Request Limit: [`LimitStore`](limit::LimitStore)
 //!
+//! # Configuration System
+//!
+//! This crate provides a configuration system inspired by the APIs exposed by [fsspec],
+//! [PyArrow FileSystem], and [Hadoop FileSystem], allowing creating a [`DynObjectStore`]
+//! from a URL and an optional list of key value pairs. This provides a flexible interface
+//! to support a wide variety of user-defined store configurations, with minimal additional
+//! application complexity.
+//!
+//! ```no_run
+//! # use url::Url;
+//! # use object_store::{parse_url, parse_url_opts};
+//! # use object_store::aws::{AmazonS3, AmazonS3Builder};
+//! #
+//! #
+//! // Can manually create a specific store variant using the appropriate builder
+//! let store: AmazonS3 = AmazonS3Builder::from_env()
+//!     .with_bucket_name("my-bucket").build().unwrap();
+//!
+//! // Alternatively can create an ObjectStore from an S3 URL
+//! let url = Url::parse("s3://bucket/path").unwrap();
+//! let (store, path) = parse_url(&url).unwrap();
+//! assert_eq!(path.as_ref(), "path");
+//!
+//! // Potentially with additional options
+//! let (store, path) = parse_url_opts(&url, vec![("aws_access_key_id", "...")]).unwrap();
 //!
-//! # List objects:
+//! // Or with URLs that encode the bucket name in the URL path
+//! let url = Url::parse("https://ACCOUNT_ID.r2.cloudflarestorage.com/bucket/path").unwrap();
+//! let (store, path) = parse_url(&url).unwrap();
+//! assert_eq!(path.as_ref(), "path");
+//! ```
+//!
+//! [PyArrow FileSystem]: https://arrow.apache.org/docs/python/generated/pyarrow.fs.FileSystem.html#pyarrow.fs.FileSystem.from_uri
+//! [fsspec]: https://filesystem-spec.readthedocs.io/en/latest/api.html#fsspec.filesystem
+//! [Hadoop FileSystem]: https://hadoop.apache.org/docs/r3.0.0/api/org/apache/hadoop/fs/FileSystem.html#get-java.net.URI-org.apache.hadoop.conf.Configuration-
+//!
+//! # List objects
 //!
 //! Use the [`ObjectStore::list`] method to iterate over objects in
 //! remote storage or files in the local filesystem:
 //!
 //! ```
 //! # use object_store::local::LocalFileSystem;
+//! # use std::sync::Arc;
+//! # use object_store::{path::Path, ObjectStore};
+//! # use futures::stream::StreamExt;
 //! # // use LocalFileSystem for example
-//! # fn get_object_store() -> LocalFileSystem {
-//! #   LocalFileSystem::new_with_prefix("/tmp").unwrap()
+//! # fn get_object_store() -> Arc<dyn ObjectStore> {
+//! #   Arc::new(LocalFileSystem::new())
 //! # }
-//!
+//! #
 //! # async fn example() {
-//! use std::sync::Arc;
-//! use object_store::{path::Path, ObjectStore};
-//! use futures::stream::StreamExt;
-//!
+//! #
 //! // create an ObjectStore
-//! let object_store: Arc<dyn ObjectStore> = Arc::new(get_object_store());
+//! let object_store: Arc<dyn ObjectStore> = get_object_store();
 //!
 //! // Recursively list all files below the 'data' path.
 //! // 1. On AWS S3 this would be the 'data/' prefix
 //! // 2. On a local filesystem, this would be the 'data' directory
-//! let prefix: Path = "data".try_into().unwrap();
+//! let prefix = Path::from("data");
 //!
 //! // Get an `async` stream of Metadata objects:
-//!  let list_stream = object_store
-//!      .list(Some(&prefix))
-//!      .await
-//!      .expect("Error listing files");
-//!
-//!  // Print a line about each object based on its metadata
-//!  // using for_each from `StreamExt` trait.
-//!  list_stream
-//!      .for_each(move |meta|  {
-//!          async {
-//!              let meta = meta.expect("Error listing");
-//!              println!("Name: {}, size: {}", meta.location, meta.size);
-//!          }
-//!      })
-//!      .await;
+//! let mut list_stream = object_store.list(Some(&prefix));
+//!
+//! // Print a line about each object
+//! while let Some(meta) = list_stream.next().await.transpose().unwrap() {
+//!     println!("Name: {}, size: {}", meta.location, meta.size);
+//! }
 //! # }
 //! ```
 //!
@@ -142,112 +196,269 @@
 //! from remote storage or files in the local filesystem as a stream.
 //!
 //! ```
+//! # use futures::TryStreamExt;
 //! # use object_store::local::LocalFileSystem;
-//! # // use LocalFileSystem for example
-//! # fn get_object_store() -> LocalFileSystem {
-//! #   LocalFileSystem::new_with_prefix("/tmp").unwrap()
+//! # use std::sync::Arc;
+//! #  use bytes::Bytes;
+//! # use object_store::{path::Path, ObjectStore, GetResult};
+//! # fn get_object_store() -> Arc<dyn ObjectStore> {
+//! #   Arc::new(LocalFileSystem::new())
 //! # }
-//!
+//! #
 //! # async fn example() {
-//! use std::sync::Arc;
-//! use object_store::{path::Path, ObjectStore};
-//! use futures::stream::StreamExt;
-//!
-//! // create an ObjectStore
-//! let object_store: Arc<dyn ObjectStore> = Arc::new(get_object_store());
+//! #
+//! // Create an ObjectStore
+//! let object_store: Arc<dyn ObjectStore> = get_object_store();
 //!
 //! // Retrieve a specific file
-//! let path: Path = "data/file01.parquet".try_into().unwrap();
+//! let path = Path::from("data/file01.parquet");
 //!
-//! // fetch the bytes from object store
-//! let stream = object_store
-//!     .get(&path)
-//!     .await
-//!     .unwrap()
-//!     .into_stream();
+//! // Fetch just the file metadata
+//! let meta = object_store.head(&path).await.unwrap();
+//! println!("{meta:?}");
 //!
-//! // Count the '0's using `map` from `StreamExt` trait
+//! // Fetch the object including metadata
+//! let result: GetResult = object_store.get(&path).await.unwrap();
+//! assert_eq!(result.meta, meta);
+//!
+//! // Buffer the entire object in memory
+//! let object: Bytes = result.bytes().await.unwrap();
+//! assert_eq!(object.len(), meta.size);
+//!
+//! // Alternatively stream the bytes from object storage
+//! let stream = object_store.get(&path).await.unwrap().into_stream();
+//!
+//! // Count the '0's using `try_fold` from `TryStreamExt` trait
 //! let num_zeros = stream
-//!     .map(|bytes| {
-//!         let bytes = bytes.unwrap();
-//!        bytes.iter().filter(|b| **b == 0).count()
-//!     })
-//!     .collect::<Vec<usize>>()
-//!     .await
-//!     .into_iter()
-//!     .sum::<usize>();
+//!     .try_fold(0, |acc, bytes| async move {
+//!         Ok(acc + bytes.iter().filter(|b| **b == 0).count())
+//!     }).await.unwrap();
 //!
 //! println!("Num zeros in {} is {}", path, num_zeros);
 //! # }
 //! ```
 //!
-//! Which will print out something like the following:
+//! #  Put Object
 //!
-//! ```text
-//! Num zeros in data/file01.parquet is 657
-//! ```
-//! #  Put object
-//! Use the [`ObjectStore::put`] method to save data in remote storage or local filesystem.
+//! Use the [`ObjectStore::put`] method to atomically write data.
 //!
 //! ```
 //! # use object_store::local::LocalFileSystem;
-//! # fn get_object_store() -> LocalFileSystem {
-//! #     LocalFileSystem::new_with_prefix("/tmp").unwrap()
+//! # use object_store::ObjectStore;
+//! # use std::sync::Arc;
+//! # use bytes::Bytes;
+//! # use object_store::path::Path;
+//! # fn get_object_store() -> Arc<dyn ObjectStore> {
+//! #   Arc::new(LocalFileSystem::new())
 //! # }
 //! # async fn put() {
-//!  use object_store::ObjectStore;
-//!  use std::sync::Arc;
-//!  use bytes::Bytes;
-//!  use object_store::path::Path;
-//!
-//!  let object_store: Arc<dyn ObjectStore> = Arc::new(get_object_store());
-//!  let path: Path = "data/file1".try_into().unwrap();
-//!  let bytes = Bytes::from_static(b"hello");
-//!  object_store
-//!      .put(&path, bytes)
-//!      .await
-//!      .unwrap();
+//! #
+//! let object_store: Arc<dyn ObjectStore> = get_object_store();
+//! let path = Path::from("data/file1");
+//! let bytes = Bytes::from_static(b"hello");
+//! object_store.put(&path, bytes).await.unwrap();
 //! # }
 //! ```
 //!
-//! #  Multipart put object
-//! Use the [`ObjectStore::put_multipart`] method to save large amount of data in chunks.
+//! #  Multipart Upload
+//!
+//! Use the [`ObjectStore::put_multipart`] method to atomically write a large amount of data,
+//! with implementations automatically handling parallel, chunked upload where appropriate.
 //!
 //! ```
 //! # use object_store::local::LocalFileSystem;
-//! # fn get_object_store() -> LocalFileSystem {
-//! #     LocalFileSystem::new_with_prefix("/tmp").unwrap()
+//! # use object_store::ObjectStore;
+//! # use std::sync::Arc;
+//! # use bytes::Bytes;
+//! # use tokio::io::AsyncWriteExt;
+//! # use object_store::path::Path;
+//! # fn get_object_store() -> Arc<dyn ObjectStore> {
+//! #   Arc::new(LocalFileSystem::new())
 //! # }
 //! # async fn multi_upload() {
-//!  use object_store::ObjectStore;
-//!  use std::sync::Arc;
-//!  use bytes::Bytes;
-//!  use tokio::io::AsyncWriteExt;
-//!  use object_store::path::Path;
-//!
-//!  let object_store: Arc<dyn ObjectStore> = Arc::new(get_object_store());
-//!  let path: Path = "data/large_file".try_into().unwrap();
-//!  let (_id, mut writer) =  object_store
-//!      .put_multipart(&path)
-//!      .await
-//!      .unwrap();
-//!  let bytes = Bytes::from_static(b"hello");
-//!  writer.write_all(&bytes).await.unwrap();
-//!  writer.flush().await.unwrap();
-//!  writer.shutdown().await.unwrap();
+//! #
+//! let object_store: Arc<dyn ObjectStore> = get_object_store();
+//! let path = Path::from("data/large_file");
+//! let (_id, mut writer) =  object_store.put_multipart(&path).await.unwrap();
+//!
+//! let bytes = Bytes::from_static(b"hello");
+//! writer.write_all(&bytes).await.unwrap();
+//! writer.flush().await.unwrap();
+//! writer.shutdown().await.unwrap();
 //! # }
 //! ```
+//!
+//! # Vectored Read
+//!
+//! A common pattern, especially when reading structured datasets, is to need to fetch
+//! multiple, potentially non-contiguous, ranges of a particular object.
+//!
+//! [`ObjectStore::get_ranges`] provides an efficient way to perform such vectored IO, and will
+//! automatically coalesce adjacent ranges into an appropriate number of parallel requests.
+//!
+//! ```
+//! # use object_store::local::LocalFileSystem;
+//! # use object_store::ObjectStore;
+//! # use std::sync::Arc;
+//! # use bytes::Bytes;
+//! # use tokio::io::AsyncWriteExt;
+//! # use object_store::path::Path;
+//! # fn get_object_store() -> Arc<dyn ObjectStore> {
+//! #   Arc::new(LocalFileSystem::new())
+//! # }
+//! # async fn multi_upload() {
+//! #
+//! let object_store: Arc<dyn ObjectStore> = get_object_store();
+//! let path = Path::from("data/large_file");
+//! let ranges = object_store.get_ranges(&path, &[90..100, 400..600, 0..10]).await.unwrap();
+//! assert_eq!(ranges.len(), 3);
+//! assert_eq!(ranges[0].len(), 10);
+//! # }
+//! ```
+//!
+//! # Conditional Fetch
+//!
+//! More complex object retrieval can be supported by [`ObjectStore::get_opts`].
+//!
+//! For example, efficiently refreshing a cache without re-fetching the entire object
+//! data if the object hasn't been modified.
+//!
+//! ```
+//! # use std::collections::btree_map::Entry;
+//! # use std::collections::HashMap;
+//! # use object_store::{GetOptions, GetResult, ObjectStore, Result, Error};
+//! # use std::sync::Arc;
+//! # use std::time::{Duration, Instant};
+//! # use bytes::Bytes;
+//! # use tokio::io::AsyncWriteExt;
+//! # use object_store::path::Path;
+//! struct CacheEntry {
+//!     /// Data returned by last request
+//!     data: Bytes,
+//!     /// ETag identifying the object returned by the server
+//!     e_tag: String,
+//!     /// Instant of last refresh
+//!     refreshed_at: Instant,
+//! }
+//!
+//! /// Example cache that checks entries after 10 seconds for a new version
+//! struct Cache {
+//!     entries: HashMap<Path, CacheEntry>,
+//!     store: Arc<dyn ObjectStore>,
+//! }
+//!
+//! impl Cache {
+//!     pub async fn get(&mut self, path: &Path) -> Result<Bytes> {
+//!         Ok(match self.entries.get_mut(path) {
+//!             Some(e) => match e.refreshed_at.elapsed() < Duration::from_secs(10) {
+//!                 true => e.data.clone(), // Return cached data
+//!                 false => { // Check if remote version has changed
+//!                     let opts = GetOptions {
+//!                         if_none_match: Some(e.e_tag.clone()),
+//!                         ..GetOptions::default()
+//!                     };
+//!                     match self.store.get_opts(&path, opts).await {
+//!                         Ok(d) => e.data = d.bytes().await?,
+//!                         Err(Error::NotModified { .. }) => {} // Data has not changed
+//!                         Err(e) => return Err(e),
+//!                     };
+//!                     e.refreshed_at = Instant::now();
+//!                     e.data.clone()
+//!                 }
+//!             },
+//!             None => { // Not cached, fetch data
+//!                 let get = self.store.get(&path).await?;
+//!                 let e_tag = get.meta.e_tag.clone();
+//!                 let data = get.bytes().await?;
+//!                 if let Some(e_tag) = e_tag {
+//!                     let entry = CacheEntry {
+//!                         e_tag,
+//!                         data: data.clone(),
+//!                         refreshed_at: Instant::now(),
+//!                     };
+//!                     self.entries.insert(path.clone(), entry);
+//!                 }
+//!                 data
+//!             }
+//!         })
+//!     }
+//! }
+//! ```
+//!
+//! # Conditional Put
+//!
+//! The default behaviour when writing data is to upsert any existing object at the given path,
+//! overwriting any previous value. More complex behaviours can be achieved using [`PutMode`], and
+//! can be used to build [Optimistic Concurrency Control] based transactions. This facilitates
+//! building metadata catalogs, such as [Apache Iceberg] or [Delta Lake], directly on top of object
+//! storage, without relying on a separate DBMS.
+//!
+//! ```
+//! # use object_store::{Error, ObjectStore, PutMode, UpdateVersion};
+//! # use std::sync::Arc;
+//! # use bytes::Bytes;
+//! # use tokio::io::AsyncWriteExt;
+//! # use object_store::memory::InMemory;
+//! # use object_store::path::Path;
+//! # fn get_object_store() -> Arc<dyn ObjectStore> {
+//! #   Arc::new(InMemory::new())
+//! # }
+//! # fn do_update(b: Bytes) -> Bytes {b}
+//! # async fn conditional_put() {
+//! let store = get_object_store();
+//! let path = Path::from("test");
+//!
+//! // Perform a conditional update on path
+//! loop {
+//!     // Perform get request
+//!     let r = store.get(&path).await.unwrap();
+//!
+//!     // Save version information fetched
+//!     let version = UpdateVersion {
+//!         e_tag: r.meta.e_tag.clone(),
+//!         version: r.meta.version.clone(),
+//!     };
+//!
+//!     // Compute new version of object contents
+//!     let new = do_update(r.bytes().await.unwrap());
+//!
+//!     // Attempt to commit transaction
+//!     match store.put_opts(&path, new, PutMode::Update(version).into()).await {
+//!         Ok(_) => break, // Successfully committed
+//!         Err(Error::Precondition { .. }) => continue, // Object has changed, try again
+//!         Err(e) => panic!("{e}")
+//!     }
+//! }
+//! # }
+//! ```
+//!
+//! [Optimistic Concurrency Control]: https://en.wikipedia.org/wiki/Optimistic_concurrency_control
+//! [Apache Iceberg]: https://iceberg.apache.org/
+//! [Delta Lake]: https://delta.io/
+//!
+//! # TLS Certificates
+//!
+//! Stores that use HTTPS/TLS (this is true for most cloud stores) can choose the source of their [CA]
+//! certificates. By default the system-bundled certificates are used (see
+//! [`rustls-native-certs`]). The `tls-webpki-roots` feature switch can be used to also bundle Mozilla's
+//! root certificates with the library/application (see [`webpki-roots`]).
+//!
+//! [CA]: https://en.wikipedia.org/wiki/Certificate_authority
+//! [`rustls-native-certs`]: https://crates.io/crates/rustls-native-certs/
+//! [`webpki-roots`]: https://crates.io/crates/webpki-roots
+//!
 
 #[cfg(all(
     target_arch = "wasm32",
-    any(feature = "gcp", feature = "aws", feature = "azure",)
+    any(feature = "gcp", feature = "aws", feature = "azure", feature = "http")
 ))]
-compile_error!("Features 'gcp', 'aws', 'azure' are not supported on wasm.");
+compile_error!("Features 'gcp', 'aws', 'azure', 'http' are not supported on wasm.");
 
 #[cfg(feature = "aws")]
 pub mod aws;
 #[cfg(feature = "azure")]
 pub mod azure;
+pub mod buffered;
 #[cfg(not(target_arch = "wasm32"))]
 pub mod chunked;
 pub mod delimited;
@@ -261,18 +472,26 @@ pub mod local;
 pub mod memory;
 pub mod path;
 pub mod prefix;
+#[cfg(feature = "cloud")]
+pub mod signer;
 pub mod throttle;
 
-#[cfg(any(feature = "gcp", feature = "aws", feature = "azure", feature = "http"))]
+#[cfg(feature = "cloud")]
 mod client;
 
-#[cfg(any(feature = "gcp", feature = "aws", feature = "azure", feature = "http"))]
-pub use client::{backoff::BackoffConfig, retry::RetryConfig, CredentialProvider};
+#[cfg(feature = "cloud")]
+pub use client::{
+    backoff::BackoffConfig, retry::RetryConfig, ClientConfigKey, ClientOptions, CredentialProvider,
+    StaticCredentialProvider,
+};
 
-#[cfg(any(feature = "gcp", feature = "aws", feature = "azure", feature = "http"))]
+#[cfg(feature = "cloud")]
 mod config;
 
-#[cfg(feature = "cloud")]
+mod tags;
+
+pub use tags::TagSet;
+
 pub mod multipart;
 mod parse;
 mod util;
@@ -282,7 +501,7 @@ pub use parse::{parse_url, parse_url_opts};
 use crate::path::Path;
 #[cfg(not(target_arch = "wasm32"))]
 use crate::util::maybe_spawn_blocking;
-use crate::util::{coalesce_ranges, collect_bytes, OBJECT_STORE_COALESCE_DEFAULT};
+pub use crate::util::{coalesce_ranges, collect_bytes, OBJECT_STORE_COALESCE_DEFAULT};
 use async_trait::async_trait;
 use bytes::Bytes;
 use chrono::{DateTime, Utc};
@@ -295,9 +514,6 @@ use std::ops::Range;
 use std::sync::Arc;
 use tokio::io::AsyncWrite;
 
-#[cfg(any(feature = "azure", feature = "aws", feature = "gcp", feature = "http"))]
-pub use client::{ClientConfigKey, ClientOptions};
-
 /// An alias for a dynamically dispatched object store implementation.
 pub type DynObjectStore = dyn ObjectStore;
 
@@ -312,20 +528,36 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static {
     /// The operation is guaranteed to be atomic, it will either successfully
     /// write the entirety of `bytes` to `location`, or fail. No clients
     /// should be able to observe a partially written object
-    async fn put(&self, location: &Path, bytes: Bytes) -> Result<()>;
+    async fn put(&self, location: &Path, bytes: Bytes) -> Result<PutResult> {
+        self.put_opts(location, bytes, PutOptions::default()).await
+    }
+
+    /// Save the provided bytes to the specified location with the given options
+    async fn put_opts(&self, location: &Path, bytes: Bytes, opts: PutOptions) -> Result<PutResult>;
 
-    /// Get a multi-part upload that allows writing data in chunks
+    /// Get a multi-part upload that allows writing data in chunks.
     ///
     /// Most cloud-based uploads will buffer and upload parts in parallel.
     ///
     /// To complete the upload, [AsyncWrite::poll_shutdown] must be called
     /// to completion. This operation is guaranteed to be atomic, it will either
     /// make all the written data available at `location`, or fail. No clients
-    /// should be able to observe a partially written object
+    /// should be able to observe a partially written object.
     ///
     /// For some object stores (S3, GCS, and local in particular), if the
     /// writer fails or panics, you must call [ObjectStore::abort_multipart]
     /// to clean up partially written data.
+    ///
+    /// For applications requiring fine-grained control of multipart uploads
+    /// see [`MultiPartStore`], although note that this interface cannot be
+    /// supported by all [`ObjectStore`] backends.
+    ///
+    /// For applications looking to implement this interface for a custom
+    /// multipart API, see [`WriteMultiPart`] which handles the complexities
+    /// of performing parallel uploads of fixed size parts.
+    ///
+    /// [`WriteMultiPart`]: multipart::WriteMultiPart
+    /// [`MultiPartStore`]: multipart::MultiPartStore
     async fn put_multipart(
         &self,
         location: &Path,
@@ -335,38 +567,7 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static {
     ///
     /// See documentation for individual stores for exact behavior, as capabilities
     /// vary by object store.
-    async fn abort_multipart(
-        &self,
-        location: &Path,
-        multipart_id: &MultipartId,
-    ) -> Result<()>;
-
-    /// Returns an [`AsyncWrite`] that can be used to append to the object at `location`
-    ///
-    /// A new object will be created if it doesn't already exist, otherwise it will be
-    /// opened, with subsequent writes appended to the end.
-    ///
-    /// This operation cannot be supported by all stores, most use-cases should prefer
-    /// [`ObjectStore::put`] and [`ObjectStore::put_multipart`] for better portability
-    /// and stronger guarantees
-    ///
-    /// This API is not guaranteed to be atomic, in particular
-    ///
-    /// * On error, `location` may contain partial data
-    /// * Concurrent calls to [`ObjectStore::list`] may return partially written objects
-    /// * Concurrent calls to [`ObjectStore::get`] may return partially written data
-    /// * Concurrent calls to [`ObjectStore::put`] may result in data loss / corruption
-    /// * Concurrent calls to [`ObjectStore::append`] may result in data loss / corruption
-    ///
-    /// Additionally some stores, such as Azure, may only support appending to objects created
-    /// with [`ObjectStore::append`], and not with [`ObjectStore::put`], [`ObjectStore::copy`], or
-    /// [`ObjectStore::put_multipart`]
-    async fn append(
-        &self,
-        _location: &Path,
-    ) -> Result<Box<dyn AsyncWrite + Unpin + Send>> {
-        Err(Error::NotImplemented)
-    }
+    async fn abort_multipart(&self, location: &Path, multipart_id: &MultipartId) -> Result<()>;
 
     /// Return the bytes that are stored at the specified location.
     async fn get(&self, location: &Path) -> Result<GetResult> {
@@ -388,11 +589,7 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static {
 
     /// Return the bytes that are stored at the specified location
     /// in the given byte ranges
-    async fn get_ranges(
-        &self,
-        location: &Path,
-        ranges: &[Range<usize>],
-    ) -> Result<Vec<Bytes>> {
+    async fn get_ranges(&self, location: &Path, ranges: &[Range<usize>]) -> Result<Vec<Bytes>> {
         coalesce_ranges(
             ranges,
             |range| self.get_range(location, range),
@@ -402,7 +599,13 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static {
     }
 
     /// Return the metadata for the specified location
-    async fn head(&self, location: &Path) -> Result<ObjectMeta>;
+    async fn head(&self, location: &Path) -> Result<ObjectMeta> {
+        let options = GetOptions {
+            head: true,
+            ..Default::default()
+        };
+        Ok(self.get_opts(location, options).await?.meta)
+    }
 
     /// Delete the object at the specified location.
     async fn delete(&self, location: &Path) -> Result<()>;
@@ -425,23 +628,22 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static {
     /// return Ok. If it is an error, it will be [`Error::NotFound`].
     ///
     /// ```
+    /// # use futures::{StreamExt, TryStreamExt};
     /// # use object_store::local::LocalFileSystem;
     /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
     /// # let root = tempfile::TempDir::new().unwrap();
     /// # let store = LocalFileSystem::new_with_prefix(root.path()).unwrap();
-    /// use object_store::{ObjectStore, ObjectMeta};
-    /// use object_store::path::Path;
-    /// use futures::{StreamExt, TryStreamExt};
-    /// use bytes::Bytes;
-    ///
+    /// # use object_store::{ObjectStore, ObjectMeta};
+    /// # use object_store::path::Path;
+    /// # use futures::{StreamExt, TryStreamExt};
+    /// # use bytes::Bytes;
+    /// #
     /// // Create two objects
     /// store.put(&Path::from("foo"), Bytes::from("foo")).await?;
     /// store.put(&Path::from("bar"), Bytes::from("bar")).await?;
     ///
     /// // List object
-    /// let locations = store.list(None).await?
-    ///   .map(|meta: Result<ObjectMeta, _>| meta.map(|m| m.location))
-    ///   .boxed();
+    /// let locations = store.list(None).map_ok(|m| m.location).boxed();
     ///
     /// // Delete them
     /// store.delete_stream(locations).try_collect::<Vec<Path>>().await?;
@@ -470,10 +672,7 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static {
     /// `foo/bar_baz/x`.
     ///
     /// Note: the order of returned [`ObjectMeta`] is not guaranteed
-    async fn list(
-        &self,
-        prefix: Option<&Path>,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>>;
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>>;
 
     /// List all the objects with the given prefix and a location greater than `offset`
     ///
@@ -481,18 +680,15 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static {
     /// the number of network requests required
     ///
     /// Note: the order of returned [`ObjectMeta`] is not guaranteed
-    async fn list_with_offset(
+    fn list_with_offset(
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
+    ) -> BoxStream<'_, Result<ObjectMeta>> {
         let offset = offset.clone();
-        let stream = self
-            .list(prefix)
-            .await?
+        self.list(prefix)
             .try_filter(move |f| futures::future::ready(f.location > offset))
-            .boxed();
-        Ok(stream)
+            .boxed()
     }
 
     /// List objects with the given prefix and an implementation specific
@@ -541,10 +737,19 @@ macro_rules! as_ref_impl {
     ($type:ty) => {
         #[async_trait]
         impl ObjectStore for $type {
-            async fn put(&self, location: &Path, bytes: Bytes) -> Result<()> {
+            async fn put(&self, location: &Path, bytes: Bytes) -> Result<PutResult> {
                 self.as_ref().put(location, bytes).await
             }
 
+            async fn put_opts(
+                &self,
+                location: &Path,
+                bytes: Bytes,
+                opts: PutOptions,
+            ) -> Result<PutResult> {
+                self.as_ref().put_opts(location, bytes, opts).await
+            }
+
             async fn put_multipart(
                 &self,
                 location: &Path,
@@ -560,30 +765,15 @@ macro_rules! as_ref_impl {
                 self.as_ref().abort_multipart(location, multipart_id).await
             }
 
-            async fn append(
-                &self,
-                location: &Path,
-            ) -> Result<Box<dyn AsyncWrite + Unpin + Send>> {
-                self.as_ref().append(location).await
-            }
-
             async fn get(&self, location: &Path) -> Result<GetResult> {
                 self.as_ref().get(location).await
             }
 
-            async fn get_opts(
-                &self,
-                location: &Path,
-                options: GetOptions,
-            ) -> Result<GetResult> {
+            async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult> {
                 self.as_ref().get_opts(location, options).await
             }
 
-            async fn get_range(
-                &self,
-                location: &Path,
-                range: Range<usize>,
-            ) -> Result<Bytes> {
+            async fn get_range(&self, location: &Path, range: Range<usize>) -> Result<Bytes> {
                 self.as_ref().get_range(location, range).await
             }
 
@@ -610,25 +800,19 @@ macro_rules! as_ref_impl {
                 self.as_ref().delete_stream(locations)
             }
 
-            async fn list(
-                &self,
-                prefix: Option<&Path>,
-            ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
-                self.as_ref().list(prefix).await
+            fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+                self.as_ref().list(prefix)
             }
 
-            async fn list_with_offset(
+            fn list_with_offset(
                 &self,
                 prefix: Option<&Path>,
                 offset: &Path,
-            ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
-                self.as_ref().list_with_offset(prefix, offset).await
+            ) -> BoxStream<'_, Result<ObjectMeta>> {
+                self.as_ref().list_with_offset(prefix, offset)
             }
 
-            async fn list_with_delimiter(
-                &self,
-                prefix: Option<&Path>,
-            ) -> Result<ListResult> {
+            async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
                 self.as_ref().list_with_delimiter(prefix).await
             }
 
@@ -675,7 +859,11 @@ pub struct ObjectMeta {
     /// The size in bytes of the object
     pub size: usize,
     /// The unique identifier for the object
+    ///
+    /// <https://datatracker.ietf.org/doc/html/rfc9110#name-etag>
     pub e_tag: Option<String>,
+    /// A version indicator for this object
+    pub version: Option<String>,
 }
 
 /// Options for a get request, such as range
@@ -684,12 +872,28 @@ pub struct GetOptions {
     /// Request will succeed if the `ObjectMeta::e_tag` matches
     /// otherwise returning [`Error::Precondition`]
     ///
-    /// <https://datatracker.ietf.org/doc/html/rfc9110#name-if-match>
+    /// See <https://datatracker.ietf.org/doc/html/rfc9110#name-if-match>
+    ///
+    /// Examples:
+    ///
+    /// ```text
+    /// If-Match: "xyzzy"
+    /// If-Match: "xyzzy", "r2d2xxxx", "c3piozzzz"
+    /// If-Match: *
+    /// ```
     pub if_match: Option<String>,
     /// Request will succeed if the `ObjectMeta::e_tag` does not match
     /// otherwise returning [`Error::NotModified`]
     ///
-    /// <https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.2>
+    /// See <https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.2>
+    ///
+    /// Examples:
+    ///
+    /// ```text
+    /// If-None-Match: "xyzzy"
+    /// If-None-Match: "xyzzy", "r2d2xxxx", "c3piozzzz"
+    /// If-None-Match: *
+    /// ```
     pub if_none_match: Option<String>,
     /// Request will succeed if the object has been modified since
     ///
@@ -708,29 +912,51 @@ pub struct GetOptions {
     ///
     /// <https://datatracker.ietf.org/doc/html/rfc9110#name-range>
     pub range: Option<Range<usize>>,
+    /// Request a particular object version
+    pub version: Option<String>,
+    /// Request transfer of no content
+    ///
+    /// <https://datatracker.ietf.org/doc/html/rfc9110#name-head>
+    pub head: bool,
 }
 
 impl GetOptions {
     /// Returns an error if the modification conditions on this request are not satisfied
-    fn check_modified(
-        &self,
-        location: &Path,
-        last_modified: DateTime<Utc>,
-    ) -> Result<()> {
-        if let Some(date) = self.if_modified_since {
-            if last_modified <= date {
-                return Err(Error::NotModified {
-                    path: location.to_string(),
-                    source: format!("{} >= {}", date, last_modified).into(),
+    ///
+    /// <https://datatracker.ietf.org/doc/html/rfc7232#section-6>
+    fn check_preconditions(&self, meta: &ObjectMeta) -> Result<()> {
+        // The use of the invalid etag "*" means no ETag is equivalent to never matching
+        let etag = meta.e_tag.as_deref().unwrap_or("*");
+        let last_modified = meta.last_modified;
+
+        if let Some(m) = &self.if_match {
+            if m != "*" && m.split(',').map(str::trim).all(|x| x != etag) {
+                return Err(Error::Precondition {
+                    path: meta.location.to_string(),
+                    source: format!("{etag} does not match {m}").into(),
                 });
             }
-        }
-
-        if let Some(date) = self.if_unmodified_since {
+        } else if let Some(date) = self.if_unmodified_since {
             if last_modified > date {
                 return Err(Error::Precondition {
-                    path: location.to_string(),
-                    source: format!("{} < {}", date, last_modified).into(),
+                    path: meta.location.to_string(),
+                    source: format!("{date} < {last_modified}").into(),
+                });
+            }
+        }
+
+        if let Some(m) = &self.if_none_match {
+            if m == "*" || m.split(',').map(str::trim).any(|x| x == etag) {
+                return Err(Error::NotModified {
+                    path: meta.location.to_string(),
+                    source: format!("{etag} matches {m}").into(),
+                });
+            }
+        } else if let Some(date) = self.if_modified_since {
+            if last_modified <= date {
+                return Err(Error::NotModified {
+                    path: meta.location.to_string(),
+                    source: format!("{date} >= {last_modified}").into(),
                 });
             }
         }
@@ -777,20 +1003,16 @@ impl GetResult {
             #[cfg(not(target_arch = "wasm32"))]
             GetResultPayload::File(mut file, path) => {
                 maybe_spawn_blocking(move || {
-                    file.seek(SeekFrom::Start(self.range.start as _)).map_err(
-                        |source| local::Error::Seek {
+                    file.seek(SeekFrom::Start(self.range.start as _))
+                        .map_err(|source| local::Error::Seek {
                             source,
                             path: path.clone(),
-                        },
-                    )?;
+                        })?;
 
                     let mut buffer = Vec::with_capacity(len);
                     file.take(len as _)
                         .read_to_end(&mut buffer)
-                        .map_err(|source| local::Error::UnableToReadBytes {
-                            source,
-                            path,
-                        })?;
+                        .map_err(|source| local::Error::UnableToReadBytes { source, path })?;
 
                     Ok(buffer.into())
                 })
@@ -830,6 +1052,83 @@ impl GetResult {
     }
 }
 
+/// Configure preconditions for the put operation
+#[derive(Debug, Clone, PartialEq, Eq, Default)]
+pub enum PutMode {
+    /// Perform an atomic write operation, overwriting any object present at the provided path
+    #[default]
+    Overwrite,
+    /// Perform an atomic write operation, returning [`Error::AlreadyExists`] if an
+    /// object already exists at the provided path
+    Create,
+    /// Perform an atomic write operation if the current version of the object matches the
+    /// provided [`UpdateVersion`], returning [`Error::Precondition`] otherwise
+    Update(UpdateVersion),
+}
+
+/// Uniquely identifies a version of an object to update
+///
+/// Stores will use differing combinations of `e_tag` and `version` to provide conditional
+/// updates, and it is therefore recommended applications preserve both
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct UpdateVersion {
+    /// The unique identifier for the newly created object
+    ///
+    /// <https://datatracker.ietf.org/doc/html/rfc9110#name-etag>
+    pub e_tag: Option<String>,
+    /// A version indicator for the newly created object
+    pub version: Option<String>,
+}
+
+impl From<PutResult> for UpdateVersion {
+    fn from(value: PutResult) -> Self {
+        Self {
+            e_tag: value.e_tag,
+            version: value.version,
+        }
+    }
+}
+
+/// Options for a put request
+#[derive(Debug, Clone, PartialEq, Eq, Default)]
+pub struct PutOptions {
+    /// Configure the [`PutMode`] for this operation
+    pub mode: PutMode,
+    /// Provide a [`TagSet`] for this object
+    ///
+    /// Implementations that don't support object tagging should ignore this
+    pub tags: TagSet,
+}
+
+impl From<PutMode> for PutOptions {
+    fn from(mode: PutMode) -> Self {
+        Self {
+            mode,
+            ..Default::default()
+        }
+    }
+}
+
+impl From<TagSet> for PutOptions {
+    fn from(tags: TagSet) -> Self {
+        Self {
+            tags,
+            ..Default::default()
+        }
+    }
+}
+
+/// Result for a put request
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct PutResult {
+    /// The unique identifier for the newly created object
+    ///
+    /// <https://datatracker.ietf.org/doc/html/rfc9110#name-etag>
+    pub e_tag: Option<String>,
+    /// A version indicator for the newly created object
+    pub version: Option<String>,
+}
+
 /// A specialized `Result` for object store-related errors
 pub type Result<T, E = Error> = std::result::Result<T, E>;
 
@@ -884,11 +1183,7 @@ pub enum Error {
     #[snafu(display("Operation not yet implemented."))]
     NotImplemented,
 
-    #[snafu(display(
-        "Configuration key: '{}' is not valid for store '{}'.",
-        key,
-        store
-    ))]
+    #[snafu(display("Configuration key: '{}' is not valid for store '{}'.", key, store))]
     UnknownConfigurationKey { store: &'static str, key: String },
 }
 
@@ -923,7 +1218,6 @@ mod test_util {
     ) -> Result<Vec<Path>> {
         storage
             .list(prefix)
-            .await?
             .map_ok(|meta| meta.location)
             .try_collect::<Vec<Path>>()
             .await
@@ -933,8 +1227,12 @@ mod test_util {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::multipart::MultiPartStore;
     use crate::test_util::flatten_list_stream;
+    use chrono::TimeZone;
+    use futures::stream::FuturesUnordered;
     use rand::{thread_rng, Rng};
+    use std::future::Future;
     use tokio::io::AsyncWriteExt;
 
     pub(crate) async fn put_get_delete_list(storage: &DynObjectStore) {
@@ -1105,8 +1403,24 @@ mod tests {
         files.sort_unstable();
         assert_eq!(files, vec![emoji_file.clone(), dst.clone()]);
 
+        let dst2 = Path::from("new/nested/foo.parquet");
+        storage.copy(&emoji_file, &dst2).await.unwrap();
+        let mut files = flatten_list_stream(storage, None).await.unwrap();
+        files.sort_unstable();
+        assert_eq!(files, vec![emoji_file.clone(), dst.clone(), dst2.clone()]);
+
+        let dst3 = Path::from("new/nested2/bar.parquet");
+        storage.rename(&dst, &dst3).await.unwrap();
+        let mut files = flatten_list_stream(storage, None).await.unwrap();
+        files.sort_unstable();
+        assert_eq!(files, vec![emoji_file.clone(), dst2.clone(), dst3.clone()]);
+
+        let err = storage.head(&dst).await.unwrap_err();
+        assert!(matches!(err, Error::NotFound { .. }));
+
         storage.delete(&emoji_file).await.unwrap();
-        storage.delete(&dst).await.unwrap();
+        storage.delete(&dst3).await.unwrap();
+        storage.delete(&dst2).await.unwrap();
         let files = flatten_list_stream(storage, Some(&emoji_prefix))
             .await
             .unwrap();
@@ -1139,6 +1453,23 @@ mod tests {
 
         storage.delete(&path).await.unwrap();
 
+        // Test handling of unicode paths
+        let path = Path::parse("🇦🇺/$shenanigans@@~.txt").unwrap();
+        storage.put(&path, "test".into()).await.unwrap();
+
+        let r = storage.get(&path).await.unwrap();
+        assert_eq!(r.bytes().await.unwrap(), "test");
+
+        let dir = Path::parse("🇦🇺").unwrap();
+        let r = storage.list_with_delimiter(None).await.unwrap();
+        assert!(r.common_prefixes.contains(&dir));
+
+        let r = storage.list_with_delimiter(Some(&dir)).await.unwrap();
+        assert_eq!(r.objects.len(), 1);
+        assert_eq!(r.objects[0].location, path);
+
+        storage.delete(&path).await.unwrap();
+
         // Can also write non-percent encoded sequences
         let path = Path::parse("%Q.parquet").unwrap();
         storage.put(&path, Bytes::from(vec![0, 1])).await.unwrap();
@@ -1197,24 +1528,18 @@ mod tests {
         ];
 
         for (prefix, offset) in cases {
-            let s = storage
-                .list_with_offset(prefix.as_ref(), &offset)
-                .await
-                .unwrap();
-
-            let mut actual: Vec<_> =
-                s.map_ok(|x| x.location).try_collect().await.unwrap();
+            let s = storage.list_with_offset(prefix.as_ref(), &offset);
+            let mut actual: Vec<_> = s.map_ok(|x| x.location).try_collect().await.unwrap();
 
             actual.sort_unstable();
 
             let expected: Vec<_> = files
                 .iter()
-                .cloned()
                 .filter(|x| {
-                    let prefix_match =
-                        prefix.as_ref().map(|p| x.prefix_matches(p)).unwrap_or(true);
-                    prefix_match && x > &offset
+                    let prefix_match = prefix.as_ref().map(|p| x.prefix_matches(p)).unwrap_or(true);
+                    prefix_match && *x > &offset
                 })
+                .cloned()
                 .collect();
 
             assert_eq!(actual, expected, "{prefix:?} - {offset:?}");
@@ -1325,33 +1650,168 @@ mod tests {
             Err(e) => panic!("{e}"),
         }
 
-        if let Some(tag) = meta.e_tag {
-            let options = GetOptions {
-                if_match: Some(tag.clone()),
-                ..GetOptions::default()
-            };
-            storage.get_opts(&path, options).await.unwrap();
+        let tag = meta.e_tag.unwrap();
+        let options = GetOptions {
+            if_match: Some(tag.clone()),
+            ..GetOptions::default()
+        };
+        storage.get_opts(&path, options).await.unwrap();
 
-            let options = GetOptions {
-                if_match: Some("invalid".to_string()),
-                ..GetOptions::default()
-            };
-            let err = storage.get_opts(&path, options).await.unwrap_err();
-            assert!(matches!(err, Error::Precondition { .. }), "{err}");
+        let options = GetOptions {
+            if_match: Some("invalid".to_string()),
+            ..GetOptions::default()
+        };
+        let err = storage.get_opts(&path, options).await.unwrap_err();
+        assert!(matches!(err, Error::Precondition { .. }), "{err}");
 
-            let options = GetOptions {
-                if_none_match: Some(tag.clone()),
-                ..GetOptions::default()
-            };
-            let err = storage.get_opts(&path, options).await.unwrap_err();
-            assert!(matches!(err, Error::NotModified { .. }), "{err}");
+        let options = GetOptions {
+            if_none_match: Some(tag.clone()),
+            ..GetOptions::default()
+        };
+        let err = storage.get_opts(&path, options).await.unwrap_err();
+        assert!(matches!(err, Error::NotModified { .. }), "{err}");
+
+        let options = GetOptions {
+            if_none_match: Some("invalid".to_string()),
+            ..GetOptions::default()
+        };
+        storage.get_opts(&path, options).await.unwrap();
+
+        let result = storage.put(&path, "test".into()).await.unwrap();
+        let new_tag = result.e_tag.unwrap();
+        assert_ne!(tag, new_tag);
+
+        let meta = storage.head(&path).await.unwrap();
+        assert_eq!(meta.e_tag.unwrap(), new_tag);
+
+        let options = GetOptions {
+            if_match: Some(new_tag),
+            ..GetOptions::default()
+        };
+        storage.get_opts(&path, options).await.unwrap();
+
+        let options = GetOptions {
+            if_match: Some(tag),
+            ..GetOptions::default()
+        };
+        let err = storage.get_opts(&path, options).await.unwrap_err();
+        assert!(matches!(err, Error::Precondition { .. }), "{err}");
+
+        if let Some(version) = meta.version {
+            storage.put(&path, "bar".into()).await.unwrap();
 
             let options = GetOptions {
-                if_none_match: Some("invalid".to_string()),
+                version: Some(version),
                 ..GetOptions::default()
             };
-            storage.get_opts(&path, options).await.unwrap();
+
+            // Can retrieve previous version
+            let get_opts = storage.get_opts(&path, options).await.unwrap();
+            let old = get_opts.bytes().await.unwrap();
+            assert_eq!(old, b"test".as_slice());
+
+            // Current version contains the updated data
+            let current = storage.get(&path).await.unwrap().bytes().await.unwrap();
+            assert_eq!(&current, b"bar".as_slice());
+        }
+    }
+
+    pub(crate) async fn put_opts(storage: &dyn ObjectStore, supports_update: bool) {
+        delete_fixtures(storage).await;
+        let path = Path::from("put_opts");
+        let v1 = storage
+            .put_opts(&path, "a".into(), PutMode::Create.into())
+            .await
+            .unwrap();
+
+        let err = storage
+            .put_opts(&path, "b".into(), PutMode::Create.into())
+            .await
+            .unwrap_err();
+        assert!(matches!(err, Error::AlreadyExists { .. }), "{err}");
+
+        let b = storage.get(&path).await.unwrap().bytes().await.unwrap();
+        assert_eq!(b.as_ref(), b"a");
+
+        if !supports_update {
+            return;
         }
+
+        let v2 = storage
+            .put_opts(&path, "c".into(), PutMode::Update(v1.clone().into()).into())
+            .await
+            .unwrap();
+
+        let b = storage.get(&path).await.unwrap().bytes().await.unwrap();
+        assert_eq!(b.as_ref(), b"c");
+
+        let err = storage
+            .put_opts(&path, "d".into(), PutMode::Update(v1.into()).into())
+            .await
+            .unwrap_err();
+        assert!(matches!(err, Error::Precondition { .. }), "{err}");
+
+        storage
+            .put_opts(&path, "e".into(), PutMode::Update(v2.clone().into()).into())
+            .await
+            .unwrap();
+
+        let b = storage.get(&path).await.unwrap().bytes().await.unwrap();
+        assert_eq!(b.as_ref(), b"e");
+
+        // Update not exists
+        let path = Path::from("I don't exist");
+        let err = storage
+            .put_opts(&path, "e".into(), PutMode::Update(v2.into()).into())
+            .await
+            .unwrap_err();
+        assert!(matches!(err, Error::Precondition { .. }), "{err}");
+
+        const NUM_WORKERS: usize = 5;
+        const NUM_INCREMENTS: usize = 10;
+
+        let path = Path::from("RACE");
+        let mut futures: FuturesUnordered<_> = (0..NUM_WORKERS)
+            .map(|_| async {
+                for _ in 0..NUM_INCREMENTS {
+                    loop {
+                        match storage.get(&path).await {
+                            Ok(r) => {
+                                let mode = PutMode::Update(UpdateVersion {
+                                    e_tag: r.meta.e_tag.clone(),
+                                    version: r.meta.version.clone(),
+                                });
+
+                                let b = r.bytes().await.unwrap();
+                                let v: usize = std::str::from_utf8(&b).unwrap().parse().unwrap();
+                                let new = (v + 1).to_string();
+
+                                match storage.put_opts(&path, new.into(), mode.into()).await {
+                                    Ok(_) => break,
+                                    Err(Error::Precondition { .. }) => continue,
+                                    Err(e) => return Err(e),
+                                }
+                            }
+                            Err(Error::NotFound { .. }) => {
+                                let mode = PutMode::Create;
+                                match storage.put_opts(&path, "1".into(), mode.into()).await {
+                                    Ok(_) => break,
+                                    Err(Error::AlreadyExists { .. }) => continue,
+                                    Err(e) => return Err(e),
+                                }
+                            }
+                            Err(e) => return Err(e),
+                        }
+                    }
+                }
+                Ok(())
+            })
+            .collect();
+
+        while futures.next().await.transpose().unwrap().is_some() {}
+        let b = storage.get(&path).await.unwrap().bytes().await.unwrap();
+        let v = std::str::from_utf8(&b).unwrap().parse::<usize>().unwrap();
+        assert_eq!(v, NUM_WORKERS * NUM_INCREMENTS);
     }
 
     /// Returns a chunk of length `chunk_length`
@@ -1565,8 +2025,7 @@ mod tests {
         storage: &DynObjectStore,
         location: Option<Path>,
     ) -> crate::Result<Bytes> {
-        let location =
-            location.unwrap_or_else(|| Path::from("this_file_should_not_exist"));
+        let location = location.unwrap_or_else(|| Path::from("this_file_should_not_exist"));
 
         let err = storage.head(&location).await.unwrap_err();
         assert!(matches!(err, crate::Error::NotFound { .. }));
@@ -1605,7 +2064,7 @@ mod tests {
     pub(crate) async fn copy_if_not_exists(storage: &DynObjectStore) {
         // Create two objects
         let path1 = Path::from("test1");
-        let path2 = Path::from("test2");
+        let path2 = Path::from("not_exists_nested/test2");
         let contents1 = Bytes::from("cats");
         let contents2 = Bytes::from("dogs");
 
@@ -1633,13 +2092,95 @@ mod tests {
         storage.delete(&path2).await.unwrap();
     }
 
-    async fn delete_fixtures(storage: &DynObjectStore) {
-        let paths = storage
-            .list(None)
+    pub(crate) async fn multipart(storage: &dyn ObjectStore, multipart: &dyn MultiPartStore) {
+        let path = Path::from("test_multipart");
+        let chunk_size = 5 * 1024 * 1024;
+
+        let chunks = get_chunks(chunk_size, 2);
+
+        let id = multipart.create_multipart(&path).await.unwrap();
+
+        let parts: Vec<_> = futures::stream::iter(chunks)
+            .enumerate()
+            .map(|(idx, b)| multipart.put_part(&path, &id, idx, b))
+            .buffered(2)
+            .try_collect()
             .await
-            .unwrap()
-            .map_ok(|meta| meta.location)
-            .boxed();
+            .unwrap();
+
+        multipart
+            .complete_multipart(&path, &id, parts)
+            .await
+            .unwrap();
+
+        let meta = storage.head(&path).await.unwrap();
+        assert_eq!(meta.size, chunk_size * 2);
+    }
+
+    #[cfg(any(feature = "aws", feature = "azure"))]
+    pub(crate) async fn tagging<F, Fut>(storage: &dyn ObjectStore, validate: bool, get_tags: F)
+    where
+        F: Fn(Path) -> Fut + Send + Sync,
+        Fut: Future<Output = Result<reqwest::Response>> + Send,
+    {
+        use bytes::Buf;
+        use serde::Deserialize;
+
+        #[derive(Deserialize)]
+        struct Tagging {
+            #[serde(rename = "TagSet")]
+            list: TagList,
+        }
+
+        #[derive(Debug, Deserialize)]
+        struct TagList {
+            #[serde(rename = "Tag")]
+            tags: Vec<Tag>,
+        }
+
+        #[derive(Debug, Deserialize, Eq, PartialEq)]
+        #[serde(rename_all = "PascalCase")]
+        struct Tag {
+            key: String,
+            value: String,
+        }
+
+        let tags = vec![
+            Tag {
+                key: "foo.com=bar/s".to_string(),
+                value: "bananas/foo.com-_".to_string(),
+            },
+            Tag {
+                key: "namespace/key.foo".to_string(),
+                value: "value with a space".to_string(),
+            },
+        ];
+        let mut tag_set = TagSet::default();
+        for t in &tags {
+            tag_set.push(&t.key, &t.value)
+        }
+
+        let path = Path::from("tag_test");
+        storage
+            .put_opts(&path, "test".into(), tag_set.into())
+            .await
+            .unwrap();
+
+        // Write should always succeed, but certain configurations may simply ignore tags
+        if !validate {
+            return;
+        }
+
+        let resp = get_tags(path.clone()).await.unwrap();
+        let body = resp.bytes().await.unwrap();
+
+        let mut resp: Tagging = quick_xml::de::from_reader(body.reader()).unwrap();
+        resp.list.tags.sort_by(|a, b| a.key.cmp(&b.key));
+        assert_eq!(resp.list.tags, tags);
+    }
+
+    async fn delete_fixtures(storage: &DynObjectStore) {
+        let paths = storage.list(None).map_ok(|meta| meta.location).boxed();
         storage
             .delete_stream(paths)
             .try_collect::<Vec<_>>()
@@ -1648,23 +2189,102 @@ mod tests {
     }
 
     /// Test that the returned stream does not borrow the lifetime of Path
-    async fn list_store<'a, 'b>(
+    fn list_store<'a>(
         store: &'a dyn ObjectStore,
-        path_str: &'b str,
-    ) -> super::Result<BoxStream<'a, super::Result<ObjectMeta>>> {
+        path_str: &str,
+    ) -> BoxStream<'a, Result<ObjectMeta>> {
         let path = Path::from(path_str);
-        store.list(Some(&path)).await
+        store.list(Some(&path))
     }
 
     #[tokio::test]
     async fn test_list_lifetimes() {
         let store = memory::InMemory::new();
-        let mut stream = list_store(&store, "path").await.unwrap();
+        let mut stream = list_store(&store, "path");
         assert!(stream.next().await.is_none());
     }
 
-    // Tests TODO:
-    // GET nonexisting location (in_memory/file)
-    // DELETE nonexisting location
-    // PUT overwriting
+    #[test]
+    fn test_preconditions() {
+        let mut meta = ObjectMeta {
+            location: Path::from("test"),
+            last_modified: Utc.timestamp_nanos(100),
+            size: 100,
+            e_tag: Some("123".to_string()),
+            version: None,
+        };
+
+        let mut options = GetOptions::default();
+        options.check_preconditions(&meta).unwrap();
+
+        options.if_modified_since = Some(Utc.timestamp_nanos(50));
+        options.check_preconditions(&meta).unwrap();
+
+        options.if_modified_since = Some(Utc.timestamp_nanos(100));
+        options.check_preconditions(&meta).unwrap_err();
+
+        options.if_modified_since = Some(Utc.timestamp_nanos(101));
+        options.check_preconditions(&meta).unwrap_err();
+
+        options = GetOptions::default();
+
+        options.if_unmodified_since = Some(Utc.timestamp_nanos(50));
+        options.check_preconditions(&meta).unwrap_err();
+
+        options.if_unmodified_since = Some(Utc.timestamp_nanos(100));
+        options.check_preconditions(&meta).unwrap();
+
+        options.if_unmodified_since = Some(Utc.timestamp_nanos(101));
+        options.check_preconditions(&meta).unwrap();
+
+        options = GetOptions::default();
+
+        options.if_match = Some("123".to_string());
+        options.check_preconditions(&meta).unwrap();
+
+        options.if_match = Some("123,354".to_string());
+        options.check_preconditions(&meta).unwrap();
+
+        options.if_match = Some("354, 123,".to_string());
+        options.check_preconditions(&meta).unwrap();
+
+        options.if_match = Some("354".to_string());
+        options.check_preconditions(&meta).unwrap_err();
+
+        options.if_match = Some("*".to_string());
+        options.check_preconditions(&meta).unwrap();
+
+        // If-Match takes precedence
+        options.if_unmodified_since = Some(Utc.timestamp_nanos(200));
+        options.check_preconditions(&meta).unwrap();
+
+        options = GetOptions::default();
+
+        options.if_none_match = Some("123".to_string());
+        options.check_preconditions(&meta).unwrap_err();
+
+        options.if_none_match = Some("*".to_string());
+        options.check_preconditions(&meta).unwrap_err();
+
+        options.if_none_match = Some("1232".to_string());
+        options.check_preconditions(&meta).unwrap();
+
+        options.if_none_match = Some("23, 123".to_string());
+        options.check_preconditions(&meta).unwrap_err();
+
+        // If-None-Match takes precedence
+        options.if_modified_since = Some(Utc.timestamp_nanos(10));
+        options.check_preconditions(&meta).unwrap_err();
+
+        // Check missing ETag
+        meta.e_tag = None;
+        options = GetOptions::default();
+
+        options.if_none_match = Some("*".to_string()); // Fails if any file exists
+        options.check_preconditions(&meta).unwrap_err();
+
+        options = GetOptions::default();
+        options.if_match = Some("*".to_string()); // Passes if file exists
+        options.check_preconditions(&meta).unwrap();
+    }
 }
diff --git a/object_store/src/limit.rs b/object_store/src/limit.rs
index a9b8c4b05020..d1363d9a4d46 100644
--- a/object_store/src/limit.rs
+++ b/object_store/src/limit.rs
@@ -18,12 +18,12 @@
 //! An object store that limits the maximum concurrency of the wrapped implementation
 
 use crate::{
-    BoxStream, GetOptions, GetResult, GetResultPayload, ListResult, MultipartId,
-    ObjectMeta, ObjectStore, Path, Result, StreamExt,
+    BoxStream, GetOptions, GetResult, GetResultPayload, ListResult, MultipartId, ObjectMeta,
+    ObjectStore, Path, PutOptions, PutResult, Result, StreamExt,
 };
 use async_trait::async_trait;
 use bytes::Bytes;
-use futures::Stream;
+use futures::{FutureExt, Stream};
 use std::io::{Error, IoSlice};
 use std::ops::Range;
 use std::pin::Pin;
@@ -72,11 +72,15 @@ impl<T: ObjectStore> std::fmt::Display for LimitStore<T> {
 
 #[async_trait]
 impl<T: ObjectStore> ObjectStore for LimitStore<T> {
-    async fn put(&self, location: &Path, bytes: Bytes) -> Result<()> {
+    async fn put(&self, location: &Path, bytes: Bytes) -> Result<PutResult> {
         let _permit = self.semaphore.acquire().await.unwrap();
         self.inner.put(location, bytes).await
     }
 
+    async fn put_opts(&self, location: &Path, bytes: Bytes, opts: PutOptions) -> Result<PutResult> {
+        let _permit = self.semaphore.acquire().await.unwrap();
+        self.inner.put_opts(location, bytes, opts).await
+    }
     async fn put_multipart(
         &self,
         location: &Path,
@@ -86,24 +90,10 @@ impl<T: ObjectStore> ObjectStore for LimitStore<T> {
         Ok((id, Box::new(PermitWrapper::new(write, permit))))
     }
 
-    async fn abort_multipart(
-        &self,
-        location: &Path,
-        multipart_id: &MultipartId,
-    ) -> Result<()> {
+    async fn abort_multipart(&self, location: &Path, multipart_id: &MultipartId) -> Result<()> {
         let _permit = self.semaphore.acquire().await.unwrap();
         self.inner.abort_multipart(location, multipart_id).await
     }
-
-    async fn append(
-        &self,
-        location: &Path,
-    ) -> Result<Box<dyn AsyncWrite + Unpin + Send>> {
-        let permit = Arc::clone(&self.semaphore).acquire_owned().await.unwrap();
-        let write = self.inner.append(location).await?;
-        Ok(Box::new(PermitWrapper::new(write, permit)))
-    }
-
     async fn get(&self, location: &Path) -> Result<GetResult> {
         let permit = Arc::clone(&self.semaphore).acquire_owned().await.unwrap();
         let r = self.inner.get(location).await?;
@@ -121,11 +111,7 @@ impl<T: ObjectStore> ObjectStore for LimitStore<T> {
         self.inner.get_range(location, range).await
     }
 
-    async fn get_ranges(
-        &self,
-        location: &Path,
-        ranges: &[Range<usize>],
-    ) -> Result<Vec<Bytes>> {
+    async fn get_ranges(&self, location: &Path, ranges: &[Range<usize>]) -> Result<Vec<Bytes>> {
         let _permit = self.semaphore.acquire().await.unwrap();
         self.inner.get_ranges(location, ranges).await
     }
@@ -147,23 +133,31 @@ impl<T: ObjectStore> ObjectStore for LimitStore<T> {
         self.inner.delete_stream(locations)
     }
 
-    async fn list(
-        &self,
-        prefix: Option<&Path>,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
-        let permit = Arc::clone(&self.semaphore).acquire_owned().await.unwrap();
-        let s = self.inner.list(prefix).await?;
-        Ok(PermitWrapper::new(s, permit).boxed())
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+        let prefix = prefix.cloned();
+        let fut = Arc::clone(&self.semaphore)
+            .acquire_owned()
+            .map(move |permit| {
+                let s = self.inner.list(prefix.as_ref());
+                PermitWrapper::new(s, permit.unwrap())
+            });
+        fut.into_stream().flatten().boxed()
     }
 
-    async fn list_with_offset(
+    fn list_with_offset(
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
-        let permit = Arc::clone(&self.semaphore).acquire_owned().await.unwrap();
-        let s = self.inner.list_with_offset(prefix, offset).await?;
-        Ok(PermitWrapper::new(s, permit).boxed())
+    ) -> BoxStream<'_, Result<ObjectMeta>> {
+        let prefix = prefix.cloned();
+        let offset = offset.clone();
+        let fut = Arc::clone(&self.semaphore)
+            .acquire_owned()
+            .map(move |permit| {
+                let s = self.inner.list_with_offset(prefix.as_ref(), &offset);
+                PermitWrapper::new(s, permit.unwrap())
+            });
+        fut.into_stream().flatten().boxed()
     }
 
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
@@ -218,10 +212,7 @@ impl<T> PermitWrapper<T> {
 impl<T: Stream + Unpin> Stream for PermitWrapper<T> {
     type Item = T::Item;
 
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
         Pin::new(&mut self.inner).poll_next(cx)
     }
 
@@ -272,6 +263,8 @@ mod tests {
     use crate::memory::InMemory;
     use crate::tests::*;
     use crate::ObjectStore;
+    use futures::stream::StreamExt;
+    use std::pin::Pin;
     use std::time::Duration;
     use tokio::time::timeout;
 
@@ -290,19 +283,21 @@ mod tests {
 
         let mut streams = Vec::with_capacity(max_requests);
         for _ in 0..max_requests {
-            let stream = integration.list(None).await.unwrap();
+            let mut stream = integration.list(None).peekable();
+            Pin::new(&mut stream).peek().await; // Ensure semaphore is acquired
             streams.push(stream);
         }
 
         let t = Duration::from_millis(20);
 
         // Expect to not be able to make another request
-        assert!(timeout(t, integration.list(None)).await.is_err());
+        let fut = integration.list(None).collect::<Vec<_>>();
+        assert!(timeout(t, fut).await.is_err());
 
         // Drop one of the streams
         streams.pop();
 
         // Can now make another request
-        integration.list(None).await.unwrap();
+        integration.list(None).collect::<Vec<_>>().await;
     }
 }
diff --git a/object_store/src/local.rs b/object_store/src/local.rs
index 4d57ef1b79e1..71b96f058c79 100644
--- a/object_store/src/local.rs
+++ b/object_store/src/local.rs
@@ -19,22 +19,24 @@
 use crate::{
     maybe_spawn_blocking,
     path::{absolute_path_to_url, Path},
-    GetOptions, GetResult, GetResultPayload, ListResult, MultipartId, ObjectMeta,
-    ObjectStore, Result,
+    GetOptions, GetResult, GetResultPayload, ListResult, MultipartId, ObjectMeta, ObjectStore,
+    PutMode, PutOptions, PutResult, Result,
 };
 use async_trait::async_trait;
 use bytes::Bytes;
 use chrono::{DateTime, Utc};
 use futures::future::BoxFuture;
+use futures::ready;
 use futures::{stream::BoxStream, StreamExt};
 use futures::{FutureExt, TryStreamExt};
-use snafu::{ensure, OptionExt, ResultExt, Snafu};
+use snafu::{ensure, ResultExt, Snafu};
 use std::fs::{metadata, symlink_metadata, File, Metadata, OpenOptions};
 use std::io::{ErrorKind, Read, Seek, SeekFrom, Write};
 use std::ops::Range;
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::Poll;
+use std::time::SystemTime;
 use std::{collections::BTreeSet, convert::TryFrom, io};
 use std::{collections::VecDeque, path::PathBuf};
 use tokio::io::AsyncWrite;
@@ -78,10 +80,10 @@ pub(crate) enum Error {
         path: PathBuf,
     },
 
-    #[snafu(display("Unable to create file {}: {}", path.display(), err))]
+    #[snafu(display("Unable to create file {}: {}", path.display(), source))]
     UnableToCreateFile {
+        source: io::Error,
         path: PathBuf,
-        err: io::Error,
     },
 
     #[snafu(display("Unable to delete file {}: {}", path.display(), source))]
@@ -142,6 +144,11 @@ pub(crate) enum Error {
         path: PathBuf,
         source: io::Error,
     },
+
+    #[snafu(display("Filenames containing trailing '/#\\d+/' are not supported: {}", path))]
+    InvalidPath {
+        path: String,
+    },
 }
 
 impl From<Error> for super::Error {
@@ -174,6 +181,30 @@ impl From<Error> for super::Error {
 /// [file URI]: https://en.wikipedia.org/wiki/File_URI_scheme
 /// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986
 ///
+/// # Path Semantics
+///
+/// [`LocalFileSystem`] will expose the path semantics of the underlying filesystem, which may
+/// have additional restrictions beyond those enforced by [`Path`].
+///
+/// For example:
+///
+/// * Windows forbids certain filenames, e.g. `COM0`,
+/// * Windows forbids folders with trailing `.`
+/// * Windows forbids certain ASCII characters, e.g. `<` or `|`
+/// * OS X forbids filenames containing `:`
+/// * Leading `-` are discouraged on Unix systems where they may be interpreted as CLI flags
+/// * Filesystems may have restrictions on the maximum path or path segment length
+/// * Filesystem support for non-ASCII characters is inconsistent
+///
+/// Additionally some filesystems, such as NTFS, are case-insensitive, whilst others like
+/// FAT don't preserve case at all. Further some filesystems support non-unicode character
+/// sequences, such as unpaired UTF-16 surrogates, and [`LocalFileSystem`] will error on
+/// encountering such sequences.
+///
+/// Finally, filenames matching the regex `/.*#\d+/`, e.g. `foo.parquet#123`, are not supported
+/// by [`LocalFileSystem`] as they are used to provide atomic writes. Such files will be ignored
+/// for listing operations, and attempting to address such a file will error.
+///
 /// # Tokio Compatibility
 ///
 /// Tokio discourages performing blocking IO on a tokio worker thread, however,
@@ -194,6 +225,11 @@ impl From<Error> for super::Error {
 /// * Mutating a file through one or more symlinks will mutate the underlying file
 /// * Deleting a path that resolves to a symlink will only delete the symlink
 ///
+/// # Cross-Filesystem Copy
+///
+/// [`LocalFileSystem::copy`] is implemented using [`std::fs::hard_link`], and therefore
+/// does not support copying across filesystem boundaries.
+///
 #[derive(Debug)]
 pub struct LocalFileSystem {
     config: Arc<Config>,
@@ -244,8 +280,19 @@ impl LocalFileSystem {
 }
 
 impl Config {
-    /// Return an absolute filesystem path of the given location
+    /// Return an absolute filesystem path of the given file location
     fn path_to_filesystem(&self, location: &Path) -> Result<PathBuf> {
+        ensure!(
+            is_valid_file_path(location),
+            InvalidPathSnafu {
+                path: location.as_ref()
+            }
+        );
+        self.prefix_to_filesystem(location)
+    }
+
+    /// Return an absolute filesystem path of the given location
+    fn prefix_to_filesystem(&self, location: &Path) -> Result<PathBuf> {
         let mut url = self.root.clone();
         url.path_segments_mut()
             .expect("url path")
@@ -267,20 +314,77 @@ impl Config {
     }
 }
 
+fn is_valid_file_path(path: &Path) -> bool {
+    match path.filename() {
+        Some(p) => match p.split_once('#') {
+            Some((_, suffix)) if !suffix.is_empty() => {
+                // Valid if contains non-digits
+                !suffix.as_bytes().iter().all(|x| x.is_ascii_digit())
+            }
+            _ => true,
+        },
+        None => false,
+    }
+}
+
 #[async_trait]
 impl ObjectStore for LocalFileSystem {
-    async fn put(&self, location: &Path, bytes: Bytes) -> Result<()> {
+    async fn put_opts(&self, location: &Path, bytes: Bytes, opts: PutOptions) -> Result<PutResult> {
+        if matches!(opts.mode, PutMode::Update(_)) {
+            return Err(crate::Error::NotImplemented);
+        }
+
         let path = self.config.path_to_filesystem(location)?;
         maybe_spawn_blocking(move || {
             let (mut file, suffix) = new_staged_upload(&path)?;
             let staging_path = staged_upload_path(&path, &suffix);
+            let mut e_tag = None;
 
-            file.write_all(&bytes)
-                .context(UnableToCopyDataToFileSnafu)?;
+            let err = match file.write_all(&bytes) {
+                Ok(_) => {
+                    let metadata = file.metadata().map_err(|e| Error::Metadata {
+                        source: e.into(),
+                        path: path.to_string_lossy().to_string(),
+                    })?;
+                    e_tag = Some(get_etag(&metadata));
+                    match opts.mode {
+                        PutMode::Overwrite => {
+                            // For some fuse types of file systems, the file must be closed first
+                            // to trigger the upload operation, and then renamed, such as Blobfuse
+                            std::mem::drop(file);
+                            match std::fs::rename(&staging_path, &path) {
+                                Ok(_) => None,
+                                Err(source) => Some(Error::UnableToRenameFile { source }),
+                            }
+                        }
+                        PutMode::Create => match std::fs::hard_link(&staging_path, &path) {
+                            Ok(_) => {
+                                let _ = std::fs::remove_file(&staging_path); // Attempt to cleanup
+                                None
+                            }
+                            Err(source) => match source.kind() {
+                                ErrorKind::AlreadyExists => Some(Error::AlreadyExists {
+                                    path: path.to_str().unwrap().to_string(),
+                                    source,
+                                }),
+                                _ => Some(Error::UnableToRenameFile { source }),
+                            },
+                        },
+                        PutMode::Update(_) => unreachable!(),
+                    }
+                }
+                Err(source) => Some(Error::UnableToCopyDataToFile { source }),
+            };
 
-            std::fs::rename(staging_path, path).context(UnableToRenameFileSnafu)?;
+            if let Some(err) = err {
+                let _ = std::fs::remove_file(&staging_path); // Attempt to cleanup
+                return Err(err.into());
+            }
 
-            Ok(())
+            Ok(PutResult {
+                e_tag,
+                version: None,
+            })
         })
         .await
     }
@@ -298,87 +402,27 @@ impl ObjectStore for LocalFileSystem {
         ))
     }
 
-    async fn abort_multipart(
-        &self,
-        location: &Path,
-        multipart_id: &MultipartId,
-    ) -> Result<()> {
+    async fn abort_multipart(&self, location: &Path, multipart_id: &MultipartId) -> Result<()> {
         let dest = self.config.path_to_filesystem(location)?;
-        let staging_path: PathBuf = staged_upload_path(&dest, multipart_id);
+        let path: PathBuf = staged_upload_path(&dest, multipart_id);
 
-        maybe_spawn_blocking(move || {
-            std::fs::remove_file(&staging_path)
-                .context(UnableToDeleteFileSnafu { path: staging_path })?;
-            Ok(())
+        maybe_spawn_blocking(move || match std::fs::remove_file(&path) {
+            Ok(_) => Ok(()),
+            Err(source) => match source.kind() {
+                ErrorKind::NotFound => Ok(()), // Already deleted
+                _ => Err(Error::UnableToDeleteFile { path, source }.into()),
+            },
         })
         .await
     }
 
-    async fn append(
-        &self,
-        location: &Path,
-    ) -> Result<Box<dyn AsyncWrite + Unpin + Send>> {
-        #[cfg(not(target_arch = "wasm32"))]
-        // Get the path to the file from the configuration.
-        let path = self.config.path_to_filesystem(location)?;
-        loop {
-            // Create new `OpenOptions`.
-            let mut options = tokio::fs::OpenOptions::new();
-
-            // Attempt to open the file with the given options.
-            match options
-                .truncate(false)
-                .append(true)
-                .create(true)
-                .open(&path)
-                .await
-            {
-                // If the file was successfully opened, return it wrapped in a boxed `AsyncWrite` trait object.
-                Ok(file) => return Ok(Box::new(file)),
-                // If the error is that the file was not found, attempt to create the file and any necessary parent directories.
-                Err(err) if err.kind() == ErrorKind::NotFound => {
-                    // Get the path to the parent directory of the file.
-                    let parent = path
-                        .parent()
-                        // If the parent directory does not exist, return a `UnableToCreateFileSnafu` error.
-                        .context(UnableToCreateFileSnafu { path: &path, err })?;
-
-                    // Create the parent directory and any necessary ancestors.
-                    tokio::fs::create_dir_all(parent)
-                        .await
-                        // If creating the directory fails, return a `UnableToCreateDirSnafu` error.
-                        .context(UnableToCreateDirSnafu { path: parent })?;
-                    // Try again to open the file.
-                    continue;
-                }
-                // If any other error occurs, return a `UnableToOpenFile` error.
-                Err(source) => {
-                    return Err(Error::UnableToOpenFile { source, path }.into())
-                }
-            }
-        }
-        #[cfg(target_arch = "wasm32")]
-        Err(super::Error::NotImplemented)
-    }
-
     async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult> {
-        if options.if_match.is_some() || options.if_none_match.is_some() {
-            return Err(super::Error::NotSupported {
-                source: "ETags not supported by LocalFileSystem".to_string().into(),
-            });
-        }
-
         let location = location.clone();
         let path = self.config.path_to_filesystem(&location)?;
         maybe_spawn_blocking(move || {
             let (file, metadata) = open_file(&path)?;
-            if options.if_unmodified_since.is_some()
-                || options.if_modified_since.is_some()
-            {
-                options.check_modified(&location, last_modified(&metadata))?;
-            }
-
             let meta = convert_metadata(metadata, location)?;
+            options.check_preconditions(&meta)?;
 
             Ok(GetResult {
                 payload: GetResultPayload::File(file, path),
@@ -398,11 +442,7 @@ impl ObjectStore for LocalFileSystem {
         .await
     }
 
-    async fn get_ranges(
-        &self,
-        location: &Path,
-        ranges: &[Range<usize>],
-    ) -> Result<Vec<Bytes>> {
+    async fn get_ranges(&self, location: &Path, ranges: &[Range<usize>]) -> Result<Vec<Bytes>> {
         let path = self.config.path_to_filesystem(location)?;
         let ranges = ranges.to_vec();
         maybe_spawn_blocking(move || {
@@ -416,35 +456,6 @@ impl ObjectStore for LocalFileSystem {
         .await
     }
 
-    async fn head(&self, location: &Path) -> Result<ObjectMeta> {
-        let path = self.config.path_to_filesystem(location)?;
-        let location = location.clone();
-
-        maybe_spawn_blocking(move || {
-            let metadata = match metadata(&path) {
-                Err(e) => Err(match e.kind() {
-                    ErrorKind::NotFound => Error::NotFound {
-                        path: path.clone(),
-                        source: e,
-                    },
-                    _ => Error::Metadata {
-                        source: e.into(),
-                        path: location.to_string(),
-                    },
-                }),
-                Ok(m) => match !m.is_dir() {
-                    true => Ok(m),
-                    false => Err(Error::NotFound {
-                        path,
-                        source: io::Error::new(ErrorKind::NotFound, "is directory"),
-                    }),
-                },
-            }?;
-            convert_metadata(metadata, location)
-        })
-        .await
-    }
-
     async fn delete(&self, location: &Path) -> Result<()> {
         let path = self.config.path_to_filesystem(location)?;
         maybe_spawn_blocking(move || match std::fs::remove_file(&path) {
@@ -457,14 +468,14 @@ impl ObjectStore for LocalFileSystem {
         .await
     }
 
-    async fn list(
-        &self,
-        prefix: Option<&Path>,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
         let config = Arc::clone(&self.config);
 
         let root_path = match prefix {
-            Some(prefix) => config.path_to_filesystem(prefix)?,
+            Some(prefix) => match config.prefix_to_filesystem(prefix) {
+                Ok(path) => path,
+                Err(e) => return futures::future::ready(Err(e)).into_stream().boxed(),
+            },
             None => self.config.root.to_file_path().unwrap(),
         };
 
@@ -474,63 +485,62 @@ impl ObjectStore for LocalFileSystem {
             .follow_links(true);
 
         let s = walkdir.into_iter().flat_map(move |result_dir_entry| {
-            match convert_walkdir_result(result_dir_entry) {
+            let entry = match convert_walkdir_result(result_dir_entry).transpose()? {
+                Ok(entry) => entry,
+                Err(e) => return Some(Err(e)),
+            };
+
+            if !entry.path().is_file() {
+                return None;
+            }
+
+            match config.filesystem_to_path(entry.path()) {
+                Ok(path) => match is_valid_file_path(&path) {
+                    true => Some(convert_entry(entry, path)),
+                    false => None,
+                },
                 Err(e) => Some(Err(e)),
-                Ok(None) => None,
-                Ok(entry @ Some(_)) => entry
-                    .filter(|dir_entry| {
-                        dir_entry.file_type().is_file()
-                            // Ignore file names with # in them, since they might be in-progress uploads.
-                            // They would be rejected anyways by filesystem_to_path below.
-                            && !dir_entry.file_name().to_string_lossy().contains('#')
-                    })
-                    .map(|entry| {
-                        let location = config.filesystem_to_path(entry.path())?;
-                        convert_entry(entry, location)
-                    }),
             }
         });
 
         // If no tokio context, return iterator directly as no
         // need to perform chunked spawn_blocking reads
         if tokio::runtime::Handle::try_current().is_err() {
-            return Ok(futures::stream::iter(s).boxed());
+            return futures::stream::iter(s).boxed();
         }
 
         // Otherwise list in batches of CHUNK_SIZE
         const CHUNK_SIZE: usize = 1024;
 
         let buffer = VecDeque::with_capacity(CHUNK_SIZE);
-        let stream =
-            futures::stream::try_unfold((s, buffer), |(mut s, mut buffer)| async move {
-                if buffer.is_empty() {
-                    (s, buffer) = tokio::task::spawn_blocking(move || {
-                        for _ in 0..CHUNK_SIZE {
-                            match s.next() {
-                                Some(r) => buffer.push_back(r),
-                                None => break,
-                            }
+        futures::stream::try_unfold((s, buffer), |(mut s, mut buffer)| async move {
+            if buffer.is_empty() {
+                (s, buffer) = tokio::task::spawn_blocking(move || {
+                    for _ in 0..CHUNK_SIZE {
+                        match s.next() {
+                            Some(r) => buffer.push_back(r),
+                            None => break,
                         }
-                        (s, buffer)
-                    })
-                    .await?;
-                }
-
-                match buffer.pop_front() {
-                    Some(Err(e)) => Err(e),
-                    Some(Ok(meta)) => Ok(Some((meta, (s, buffer)))),
-                    None => Ok(None),
-                }
-            });
+                    }
+                    (s, buffer)
+                })
+                .await?;
+            }
 
-        Ok(stream.boxed())
+            match buffer.pop_front() {
+                Some(Err(e)) => Err(e),
+                Some(Ok(meta)) => Ok(Some((meta, (s, buffer)))),
+                None => Ok(None),
+            }
+        })
+        .boxed()
     }
 
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
         let config = Arc::clone(&self.config);
 
         let prefix = prefix.cloned().unwrap_or_default();
-        let resolved_prefix = config.path_to_filesystem(&prefix)?;
+        let resolved_prefix = config.prefix_to_filesystem(&prefix)?;
 
         maybe_spawn_blocking(move || {
             let walkdir = WalkDir::new(&resolved_prefix)
@@ -543,15 +553,11 @@ impl ObjectStore for LocalFileSystem {
 
             for entry_res in walkdir.into_iter().map(convert_walkdir_result) {
                 if let Some(entry) = entry_res? {
-                    if entry.file_type().is_file()
-                        // Ignore file names with # in them, since they might be in-progress uploads.
-                        // They would be rejected anyways by filesystem_to_path below.
-                        && entry.file_name().to_string_lossy().contains('#')
-                    {
-                        continue;
-                    }
                     let is_directory = entry.file_type().is_dir();
                     let entry_location = config.filesystem_to_path(entry.path())?;
+                    if !is_directory && !is_valid_file_path(&entry_location) {
+                        continue;
+                    }
 
                     let mut parts = match entry_location.prefix_match(&prefix) {
                         Some(parts) => parts,
@@ -584,10 +590,28 @@ impl ObjectStore for LocalFileSystem {
     async fn copy(&self, from: &Path, to: &Path) -> Result<()> {
         let from = self.config.path_to_filesystem(from)?;
         let to = self.config.path_to_filesystem(to)?;
-
-        maybe_spawn_blocking(move || {
-            std::fs::copy(&from, &to).context(UnableToCopyFileSnafu { from, to })?;
-            Ok(())
+        let mut id = 0;
+        // In order to make this atomic we:
+        //
+        // - hard link to a hidden temporary file
+        // - atomically rename this temporary file into place
+        //
+        // This is necessary because hard_link returns an error if the destination already exists
+        maybe_spawn_blocking(move || loop {
+            let staged = staged_upload_path(&to, &id.to_string());
+            match std::fs::hard_link(&from, &staged) {
+                Ok(_) => {
+                    return std::fs::rename(&staged, &to).map_err(|source| {
+                        let _ = std::fs::remove_file(&staged); // Attempt to clean up
+                        Error::UnableToCopyFile { from, to, source }.into()
+                    });
+                }
+                Err(source) => match source.kind() {
+                    ErrorKind::AlreadyExists => id += 1,
+                    ErrorKind::NotFound => create_parent_dirs(&to, source)?,
+                    _ => return Err(Error::UnableToCopyFile { from, to, source }.into()),
+                },
+            }
         })
         .await
     }
@@ -595,9 +619,14 @@ impl ObjectStore for LocalFileSystem {
     async fn rename(&self, from: &Path, to: &Path) -> Result<()> {
         let from = self.config.path_to_filesystem(from)?;
         let to = self.config.path_to_filesystem(to)?;
-        maybe_spawn_blocking(move || {
-            std::fs::rename(&from, &to).context(UnableToCopyFileSnafu { from, to })?;
-            Ok(())
+        maybe_spawn_blocking(move || loop {
+            match std::fs::rename(&from, &to) {
+                Ok(_) => return Ok(()),
+                Err(source) => match source.kind() {
+                    ErrorKind::NotFound => create_parent_dirs(&to, source)?,
+                    _ => return Err(Error::UnableToCopyFile { from, to, source }.into()),
+                },
+            }
         })
         .await
     }
@@ -606,25 +635,37 @@ impl ObjectStore for LocalFileSystem {
         let from = self.config.path_to_filesystem(from)?;
         let to = self.config.path_to_filesystem(to)?;
 
-        maybe_spawn_blocking(move || {
-            std::fs::hard_link(&from, &to).map_err(|err| match err.kind() {
-                io::ErrorKind::AlreadyExists => Error::AlreadyExists {
-                    path: to.to_str().unwrap().to_string(),
-                    source: err,
-                }
-                .into(),
-                _ => Error::UnableToCopyFile {
-                    from,
-                    to,
-                    source: err,
-                }
-                .into(),
-            })
+        maybe_spawn_blocking(move || loop {
+            match std::fs::hard_link(&from, &to) {
+                Ok(_) => return Ok(()),
+                Err(source) => match source.kind() {
+                    ErrorKind::AlreadyExists => {
+                        return Err(Error::AlreadyExists {
+                            path: to.to_str().unwrap().to_string(),
+                            source,
+                        }
+                        .into())
+                    }
+                    ErrorKind::NotFound => create_parent_dirs(&to, source)?,
+                    _ => return Err(Error::UnableToCopyFile { from, to, source }.into()),
+                },
+            }
         })
         .await
     }
 }
 
+/// Creates the parent directories of `path` or returns an error based on `source` if no parent
+fn create_parent_dirs(path: &std::path::Path, source: io::Error) -> Result<()> {
+    let parent = path.parent().ok_or_else(|| Error::UnableToCreateFile {
+        path: path.to_path_buf(),
+        source,
+    })?;
+
+    std::fs::create_dir_all(parent).context(UnableToCreateDirSnafu { path: parent })?;
+    Ok(())
+}
+
 /// Generates a unique file path `{base}#{suffix}`, returning the opened `File` and `suffix`
 ///
 /// Creates any directories if necessary
@@ -636,20 +677,11 @@ fn new_staged_upload(base: &std::path::Path) -> Result<(File, String)> {
         let mut options = OpenOptions::new();
         match options.read(true).write(true).create_new(true).open(&path) {
             Ok(f) => return Ok((f, suffix)),
-            Err(e) if e.kind() == ErrorKind::AlreadyExists => {
-                multipart_id += 1;
-            }
-            Err(err) if err.kind() == ErrorKind::NotFound => {
-                let parent = path
-                    .parent()
-                    .context(UnableToCreateFileSnafu { path: &path, err })?;
-
-                std::fs::create_dir_all(parent)
-                    .context(UnableToCreateDirSnafu { path: parent })?;
-
-                continue;
-            }
-            Err(source) => return Err(Error::UnableToOpenFile { source, path }.into()),
+            Err(source) => match source.kind() {
+                ErrorKind::AlreadyExists => multipart_id += 1,
+                ErrorKind::NotFound => create_parent_dirs(&path, source)?,
+                _ => return Err(Error::UnableToOpenFile { source, path }.into()),
+            },
         }
     }
 }
@@ -664,12 +696,9 @@ fn staged_upload_path(dest: &std::path::Path, suffix: &str) -> PathBuf {
 
 enum LocalUploadState {
     /// Upload is ready to send new data
-    Idle(Arc<std::fs::File>),
+    Idle(Arc<File>),
     /// In the middle of a write
-    Writing(
-        Arc<std::fs::File>,
-        BoxFuture<'static, Result<usize, io::Error>>,
-    ),
+    Writing(Arc<File>, BoxFuture<'static, Result<usize, io::Error>>),
     /// In the middle of syncing data and closing file.
     ///
     /// Future will contain last reference to file, so it will call drop on completion.
@@ -687,11 +716,7 @@ struct LocalUpload {
 }
 
 impl LocalUpload {
-    pub fn new(
-        dest: PathBuf,
-        multipart_id: MultipartId,
-        file: Arc<std::fs::File>,
-    ) -> Self {
+    pub fn new(dest: PathBuf, multipart_id: MultipartId, file: Arc<File>) -> Self {
         Self {
             inner_state: LocalUploadState::Idle(file),
             dest,
@@ -705,14 +730,13 @@ impl AsyncWrite for LocalUpload {
         mut self: Pin<&mut Self>,
         cx: &mut std::task::Context<'_>,
         buf: &[u8],
-    ) -> std::task::Poll<Result<usize, io::Error>> {
-        let invalid_state =
-            |condition: &str| -> std::task::Poll<Result<usize, io::Error>> {
-                Poll::Ready(Err(io::Error::new(
-                    io::ErrorKind::InvalidInput,
-                    format!("Tried to write to file {condition}."),
-                )))
-            };
+    ) -> Poll<Result<usize, io::Error>> {
+        let invalid_state = |condition: &str| -> Poll<Result<usize, io::Error>> {
+            Poll::Ready(Err(io::Error::new(
+                ErrorKind::InvalidInput,
+                format!("Tried to write to file {condition}."),
+            )))
+        };
 
         if let Ok(runtime) = tokio::runtime::Handle::try_current() {
             let mut data: Vec<u8> = buf.to_vec();
@@ -730,25 +754,16 @@ impl AsyncWrite for LocalUpload {
                                 runtime
                                     .spawn_blocking(move || (&*file2).write_all(&data))
                                     .map(move |res| match res {
-                                        Err(err) => {
-                                            Err(io::Error::new(io::ErrorKind::Other, err))
-                                        }
+                                        Err(err) => Err(io::Error::new(ErrorKind::Other, err)),
                                         Ok(res) => res.map(move |_| data_len),
                                     }),
                             ),
                         );
                     }
                     LocalUploadState::Writing(file, inner_write) => {
-                        match inner_write.poll_unpin(cx) {
-                            Poll::Ready(res) => {
-                                self.inner_state =
-                                    LocalUploadState::Idle(Arc::clone(file));
-                                return Poll::Ready(res);
-                            }
-                            Poll::Pending => {
-                                return Poll::Pending;
-                            }
-                        }
+                        let res = ready!(inner_write.poll_unpin(cx));
+                        self.inner_state = LocalUploadState::Idle(Arc::clone(file));
+                        return Poll::Ready(res);
                     }
                     LocalUploadState::ShuttingDown(_) => {
                         return invalid_state("when writer is shutting down");
@@ -774,14 +789,14 @@ impl AsyncWrite for LocalUpload {
     fn poll_flush(
         self: Pin<&mut Self>,
         _cx: &mut std::task::Context<'_>,
-    ) -> std::task::Poll<Result<(), io::Error>> {
+    ) -> Poll<Result<(), io::Error>> {
         Poll::Ready(Ok(()))
     }
 
     fn poll_shutdown(
         mut self: Pin<&mut Self>,
         cx: &mut std::task::Context<'_>,
-    ) -> std::task::Poll<Result<(), io::Error>> {
+    ) -> Poll<Result<(), io::Error>> {
         if let Ok(runtime) = tokio::runtime::Handle::try_current() {
             loop {
                 match &mut self.inner_state {
@@ -789,31 +804,24 @@ impl AsyncWrite for LocalUpload {
                         // We are moving file into the future, and it will be dropped on it's completion, closing the file.
                         let file = Arc::clone(file);
                         self.inner_state = LocalUploadState::ShuttingDown(Box::pin(
-                            runtime.spawn_blocking(move || (*file).sync_all()).map(
-                                move |res| match res {
-                                    Err(err) => {
-                                        Err(io::Error::new(io::ErrorKind::Other, err))
-                                    }
+                            runtime
+                                .spawn_blocking(move || (*file).sync_all())
+                                .map(move |res| match res {
+                                    Err(err) => Err(io::Error::new(io::ErrorKind::Other, err)),
                                     Ok(res) => res,
-                                },
-                            ),
+                                }),
                         ));
                     }
                     LocalUploadState::ShuttingDown(fut) => match fut.poll_unpin(cx) {
                         Poll::Ready(res) => {
                             res?;
-                            let staging_path =
-                                staged_upload_path(&self.dest, &self.multipart_id);
+                            let staging_path = staged_upload_path(&self.dest, &self.multipart_id);
                             let dest = self.dest.clone();
                             self.inner_state = LocalUploadState::Committing(Box::pin(
                                 runtime
-                                    .spawn_blocking(move || {
-                                        std::fs::rename(&staging_path, &dest)
-                                    })
+                                    .spawn_blocking(move || std::fs::rename(&staging_path, &dest))
                                     .map(move |res| match res {
-                                        Err(err) => {
-                                            Err(io::Error::new(io::ErrorKind::Other, err))
-                                        }
+                                        Err(err) => Err(io::Error::new(io::ErrorKind::Other, err)),
                                         Ok(res) => res,
                                     }),
                             ));
@@ -828,13 +836,11 @@ impl AsyncWrite for LocalUpload {
                             "Tried to commit a file where a write is in progress.",
                         )));
                     }
-                    LocalUploadState::Committing(fut) => match fut.poll_unpin(cx) {
-                        Poll::Ready(res) => {
-                            self.inner_state = LocalUploadState::Complete;
-                            return Poll::Ready(res);
-                        }
-                        Poll::Pending => return Poll::Pending,
-                    },
+                    LocalUploadState::Committing(fut) => {
+                        let res = ready!(fut.poll_unpin(cx));
+                        self.inner_state = LocalUploadState::Complete;
+                        return Poll::Ready(res);
+                    }
                     LocalUploadState::Complete => {
                         return Poll::Ready(Err(io::Error::new(
                             io::ErrorKind::Other,
@@ -850,22 +856,36 @@ impl AsyncWrite for LocalUpload {
                     let file = Arc::clone(file);
                     self.inner_state = LocalUploadState::Complete;
                     file.sync_all()?;
-                    std::mem::drop(file);
+                    drop(file);
                     std::fs::rename(staging_path, &self.dest)?;
                     Poll::Ready(Ok(()))
                 }
                 _ => {
                     // If we are running on this thread, then only possible states are Idle and Complete.
-                    Poll::Ready(Err(io::Error::new(
-                        io::ErrorKind::Other,
-                        "Already complete",
-                    )))
+                    Poll::Ready(Err(io::Error::new(ErrorKind::Other, "Already complete")))
                 }
             }
         }
     }
 }
 
+impl Drop for LocalUpload {
+    fn drop(&mut self) {
+        match self.inner_state {
+            LocalUploadState::Complete => (),
+            _ => {
+                self.inner_state = LocalUploadState::Complete;
+                let path = staged_upload_path(&self.dest, &self.multipart_id);
+                // Try to cleanup intermediate file ignoring any error
+                match tokio::runtime::Handle::try_current() {
+                    Ok(r) => drop(r.spawn_blocking(move || std::fs::remove_file(path))),
+                    Err(_) => drop(std::fs::remove_file(path)),
+                };
+            }
+        }
+    }
+}
+
 pub(crate) fn chunked_stream(
     mut file: File,
     path: PathBuf,
@@ -911,11 +931,7 @@ pub(crate) fn chunked_stream(
     .boxed()
 }
 
-pub(crate) fn read_range(
-    file: &mut File,
-    path: &PathBuf,
-    range: Range<usize>,
-) -> Result<Bytes> {
+pub(crate) fn read_range(file: &mut File, path: &PathBuf, range: Range<usize>) -> Result<Bytes> {
     let to_read = range.end - range.start;
     file.seek(SeekFrom::Start(range.start as u64))
         .context(SeekSnafu { path })?;
@@ -968,13 +984,29 @@ fn convert_entry(entry: DirEntry, location: Path) -> Result<ObjectMeta> {
     convert_metadata(metadata, location)
 }
 
-fn last_modified(metadata: &std::fs::Metadata) -> DateTime<Utc> {
+fn last_modified(metadata: &Metadata) -> DateTime<Utc> {
     metadata
         .modified()
         .expect("Modified file time should be supported on this platform")
         .into()
 }
 
+fn get_etag(metadata: &Metadata) -> String {
+    let inode = get_inode(metadata);
+    let size = metadata.len();
+    let mtime = metadata
+        .modified()
+        .ok()
+        .and_then(|mtime| mtime.duration_since(SystemTime::UNIX_EPOCH).ok())
+        .unwrap_or_default()
+        .as_micros();
+
+    // Use an ETag scheme based on that used by many popular HTTP servers
+    // <https://httpd.apache.org/docs/2.2/mod/core.html#fileetag>
+    // <https://stackoverflow.com/questions/47512043/how-etags-are-generated-and-configured>
+    format!("{inode:x}-{mtime:x}-{size:x}")
+}
+
 fn convert_metadata(metadata: Metadata, location: Path) -> Result<ObjectMeta> {
     let last_modified = last_modified(&metadata);
     let size = usize::try_from(metadata.len()).context(FileSizeOverflowedUsizeSnafu {
@@ -985,15 +1017,29 @@ fn convert_metadata(metadata: Metadata, location: Path) -> Result<ObjectMeta> {
         location,
         last_modified,
         size,
-        e_tag: None,
+        e_tag: Some(get_etag(&metadata)),
+        version: None,
     })
 }
 
+#[cfg(unix)]
+/// We include the inode when available to yield an ETag more resistant to collisions
+/// and as used by popular web servers such as [Apache](https://httpd.apache.org/docs/2.2/mod/core.html#fileetag)
+fn get_inode(metadata: &Metadata) -> u64 {
+    std::os::unix::fs::MetadataExt::ino(metadata)
+}
+
+#[cfg(not(unix))]
+/// On platforms where an inode isn't available, fallback to just relying on size and mtime
+fn get_inode(metadata: &Metadata) -> u64 {
+    0
+}
+
 /// Convert walkdir results and converts not-found errors into `None`.
 /// Convert broken symlinks to `None`.
 fn convert_walkdir_result(
-    res: std::result::Result<walkdir::DirEntry, walkdir::Error>,
-) -> Result<Option<walkdir::DirEntry>> {
+    res: std::result::Result<DirEntry, walkdir::Error>,
+) -> Result<Option<DirEntry>> {
     match res {
         Ok(entry) => {
             // To check for broken symlink: call symlink_metadata() - it does not traverse symlinks);
@@ -1022,7 +1068,7 @@ fn convert_walkdir_result(
 
         Err(walkdir_err) => match walkdir_err.io_error() {
             Some(io_err) => match io_err.kind() {
-                io::ErrorKind::NotFound => Ok(None),
+                ErrorKind::NotFound => Ok(None),
                 _ => Err(Error::UnableToWalkDir {
                     source: walkdir_err,
                 }
@@ -1057,6 +1103,7 @@ mod tests {
         rename_and_copy(&integration).await;
         copy_if_not_exists(&integration).await;
         stream_get(&integration).await;
+        put_opts(&integration, false).await;
     }
 
     #[test]
@@ -1132,21 +1179,14 @@ mod tests {
 
         let store = LocalFileSystem::new_with_prefix(root.path()).unwrap();
 
-        // `list` must fail
-        match store.list(None).await {
-            Err(_) => {
-                // ok, error found
-            }
-            Ok(mut stream) => {
-                let mut any_err = false;
-                while let Some(res) = stream.next().await {
-                    if res.is_err() {
-                        any_err = true;
-                    }
-                }
-                assert!(any_err);
+        let mut stream = store.list(None);
+        let mut any_err = false;
+        while let Some(res) = stream.next().await {
+            if res.is_err() {
+                any_err = true;
             }
         }
+        assert!(any_err);
 
         // `list_with_delimiter
         assert!(store.list_with_delimiter(None).await.is_err());
@@ -1215,18 +1255,8 @@ mod tests {
         fs.list_with_delimiter(None).await.unwrap();
     }
 
-    async fn check_list(
-        integration: &LocalFileSystem,
-        prefix: Option<&Path>,
-        expected: &[&str],
-    ) {
-        let result: Vec<_> = integration
-            .list(prefix)
-            .await
-            .unwrap()
-            .try_collect()
-            .await
-            .unwrap();
+    async fn check_list(integration: &LocalFileSystem, prefix: Option<&Path>, expected: &[&str]) {
+        let result: Vec<_> = integration.list(prefix).try_collect().await.unwrap();
 
         let mut strings: Vec<_> = result.iter().map(|x| x.location.as_ref()).collect();
         strings.sort_unstable();
@@ -1252,8 +1282,7 @@ mod tests {
 
         // Follow out of tree symlink
         let other = NamedTempFile::new().unwrap();
-        std::os::unix::fs::symlink(other.path(), root.path().join("test.parquet"))
-            .unwrap();
+        std::os::unix::fs::symlink(other.path(), root.path().join("test.parquet")).unwrap();
 
         // Should return test.parquet even though out of tree
         check_list(&integration, None, &["a/file.parquet", "test.parquet"]).await;
@@ -1278,11 +1307,7 @@ mod tests {
             .unwrap();
 
         // Ignore broken symlink
-        std::os::unix::fs::symlink(
-            root.path().join("foo.parquet"),
-            root.path().join("c"),
-        )
-        .unwrap();
+        std::os::unix::fs::symlink(root.path().join("foo.parquet"), root.path().join("c")).unwrap();
 
         check_list(
             &integration,
@@ -1363,24 +1388,19 @@ mod tests {
         assert!(result.common_prefixes.is_empty());
         assert_eq!(result.objects[0].location, object);
 
-        let illegal = root.join("💀");
-        std::fs::write(illegal, "foo").unwrap();
-
-        // Can list directory that doesn't contain illegal path
-        flatten_list_stream(&integration, Some(&directory))
-            .await
-            .unwrap();
+        let emoji = root.join("💀");
+        std::fs::write(emoji, "foo").unwrap();
 
-        // Cannot list illegal file
-        let err = flatten_list_stream(&integration, None)
-            .await
-            .unwrap_err()
-            .to_string();
+        // Can list illegal file
+        let mut paths = flatten_list_stream(&integration, None).await.unwrap();
+        paths.sort_unstable();
 
-        assert!(
-            err.contains("Encountered illegal character sequence \"💀\" whilst parsing path segment \"💀\""),
-            "{}",
-            err
+        assert_eq!(
+            paths,
+            vec![
+                Path::parse("directory/child.txt").unwrap(),
+                Path::parse("💀").unwrap()
+            ]
         );
     }
 
@@ -1391,12 +1411,10 @@ mod tests {
         let location = Path::from("some_file");
 
         let data = Bytes::from("arbitrary data");
-        let (multipart_id, mut writer) =
-            integration.put_multipart(&location).await.unwrap();
+        let (multipart_id, mut writer) = integration.put_multipart(&location).await.unwrap();
         writer.write_all(&data).await.unwrap();
 
-        let (multipart_id_2, mut writer_2) =
-            integration.put_multipart(&location).await.unwrap();
+        let (multipart_id_2, mut writer_2) = integration.put_multipart(&location).await.unwrap();
         assert_ne!(multipart_id, multipart_id_2);
         writer_2.write_all(&data).await.unwrap();
 
@@ -1422,8 +1440,7 @@ mod tests {
 
         std::fs::write(temp_dir.path().join(filename), "foo").unwrap();
 
-        let list_stream = integration.list(None).await.unwrap();
-        let res: Vec<_> = list_stream.try_collect().await.unwrap();
+        let res: Vec<_> = integration.list(None).try_collect().await.unwrap();
         assert_eq!(res.len(), 1);
         assert_eq!(res[0].location.as_ref(), filename);
 
@@ -1442,97 +1459,79 @@ mod tests {
         let path = Path::from_filesystem_path(".").unwrap();
         integration.list_with_delimiter(Some(&path)).await.unwrap();
     }
-}
 
-#[cfg(not(target_arch = "wasm32"))]
-#[cfg(test)]
-mod not_wasm_tests {
-    use crate::local::LocalFileSystem;
-    use crate::{ObjectStore, Path};
-    use bytes::Bytes;
-    use tempfile::TempDir;
-    use tokio::io::AsyncWriteExt;
+    #[test]
+    fn test_valid_path() {
+        let cases = [
+            ("foo#123/test.txt", true),
+            ("foo#123/test#23.txt", true),
+            ("foo#123/test#34", false),
+            ("foo😁/test#34", false),
+            ("foo/test#😁34", true),
+        ];
+
+        for (case, expected) in cases {
+            let path = Path::parse(case).unwrap();
+            assert_eq!(is_valid_file_path(&path), expected);
+        }
+    }
 
     #[tokio::test]
-    async fn creates_dir_if_not_present_append() {
+    async fn test_intermediate_files() {
         let root = TempDir::new().unwrap();
         let integration = LocalFileSystem::new_with_prefix(root.path()).unwrap();
 
-        let location = Path::from("nested/file/test_file");
+        let a = Path::parse("foo#123/test.txt").unwrap();
+        integration.put(&a, "test".into()).await.unwrap();
 
-        let data = Bytes::from("arbitrary data");
-        let expected_data = data.clone();
-
-        let mut writer = integration.append(&location).await.unwrap();
-
-        writer.write_all(data.as_ref()).await.unwrap();
-
-        let read_data = integration
-            .get(&location)
-            .await
-            .unwrap()
-            .bytes()
-            .await
-            .unwrap();
-        assert_eq!(&*read_data, expected_data);
-    }
+        let list = flatten_list_stream(&integration, None).await.unwrap();
+        assert_eq!(list, vec![a.clone()]);
 
-    #[tokio::test]
-    async fn unknown_length_append() {
-        let root = TempDir::new().unwrap();
-        let integration = LocalFileSystem::new_with_prefix(root.path()).unwrap();
+        std::fs::write(root.path().join("bar#123"), "test").unwrap();
 
-        let location = Path::from("some_file");
+        // Should ignore file
+        let list = flatten_list_stream(&integration, None).await.unwrap();
+        assert_eq!(list, vec![a.clone()]);
 
-        let data = Bytes::from("arbitrary data");
-        let expected_data = data.clone();
-        let mut writer = integration.append(&location).await.unwrap();
+        let b = Path::parse("bar#123").unwrap();
+        let err = integration.get(&b).await.unwrap_err().to_string();
+        assert_eq!(err, "Generic LocalFileSystem error: Filenames containing trailing '/#\\d+/' are not supported: bar#123");
 
-        writer.write_all(data.as_ref()).await.unwrap();
-        writer.flush().await.unwrap();
+        let c = Path::parse("foo#123.txt").unwrap();
+        integration.put(&c, "test".into()).await.unwrap();
 
-        let read_data = integration
-            .get(&location)
-            .await
-            .unwrap()
-            .bytes()
-            .await
-            .unwrap();
-        assert_eq!(&*read_data, expected_data);
+        let mut list = flatten_list_stream(&integration, None).await.unwrap();
+        list.sort_unstable();
+        assert_eq!(list, vec![c, a]);
     }
+}
+
+#[cfg(not(target_arch = "wasm32"))]
+#[cfg(test)]
+mod not_wasm_tests {
+    use crate::local::LocalFileSystem;
+    use crate::{ObjectStore, Path};
+    use std::time::Duration;
+    use tempfile::TempDir;
+    use tokio::io::AsyncWriteExt;
 
     #[tokio::test]
-    async fn multiple_append() {
+    async fn test_cleanup_intermediate_files() {
         let root = TempDir::new().unwrap();
         let integration = LocalFileSystem::new_with_prefix(root.path()).unwrap();
 
         let location = Path::from("some_file");
+        let (_, mut writer) = integration.put_multipart(&location).await.unwrap();
+        writer.write_all(b"hello").await.unwrap();
 
-        let data = vec![
-            Bytes::from("arbitrary"),
-            Bytes::from("data"),
-            Bytes::from("gnz"),
-        ];
+        let file_count = std::fs::read_dir(root.path()).unwrap().count();
+        assert_eq!(file_count, 1);
+        drop(writer);
 
-        let mut writer = integration.append(&location).await.unwrap();
-        for d in &data {
-            writer.write_all(d).await.unwrap();
-        }
+        tokio::time::sleep(Duration::from_millis(1)).await;
 
-        let mut writer = integration.append(&location).await.unwrap();
-        for d in &data {
-            writer.write_all(d).await.unwrap();
-        }
-
-        let read_data = integration
-            .get(&location)
-            .await
-            .unwrap()
-            .bytes()
-            .await
-            .unwrap();
-        let expected_data = Bytes::from("arbitrarydatagnzarbitrarydatagnz");
-        assert_eq!(&*read_data, expected_data);
+        let file_count = std::fs::read_dir(root.path()).unwrap().count();
+        assert_eq!(file_count, 0);
     }
 }
 
@@ -1554,15 +1553,14 @@ mod unix_test {
         let path = root.path().join(filename);
         unistd::mkfifo(&path, stat::Mode::S_IRWXU).unwrap();
 
-        let location = Path::from(filename);
-        integration.head(&location).await.unwrap();
-
         // Need to open read and write side in parallel
-        let spawned = tokio::task::spawn_blocking(|| {
-            OpenOptions::new().write(true).open(path).unwrap();
-        });
+        let spawned =
+            tokio::task::spawn_blocking(|| OpenOptions::new().write(true).open(path).unwrap());
 
+        let location = Path::from(filename);
+        integration.head(&location).await.unwrap();
         integration.get(&location).await.unwrap();
+
         spawned.await.unwrap();
     }
 }
diff --git a/object_store/src/memory.rs b/object_store/src/memory.rs
index 0e229885b006..382300123846 100644
--- a/object_store/src/memory.rs
+++ b/object_store/src/memory.rs
@@ -17,7 +17,8 @@
 
 //! An in-memory object store implementation
 use crate::{
-    path::Path, GetResult, GetResultPayload, ListResult, ObjectMeta, ObjectStore, Result,
+    path::Path, GetResult, GetResultPayload, ListResult, ObjectMeta, ObjectStore, PutMode,
+    PutOptions, PutResult, Result, UpdateVersion,
 };
 use crate::{GetOptions, MultipartId};
 use async_trait::async_trait;
@@ -35,9 +36,6 @@ use std::sync::Arc;
 use std::task::Poll;
 use tokio::io::AsyncWrite;
 
-type Entry = (Bytes, DateTime<Utc>);
-type StorageType = Arc<RwLock<BTreeMap<Path, Entry>>>;
-
 /// A specialized `Error` for in-memory object store-related errors
 #[derive(Debug, Snafu)]
 #[allow(missing_docs)]
@@ -55,6 +53,9 @@ enum Error {
 
     #[snafu(display("Object already exists at that location: {path}"))]
     AlreadyExists { path: String },
+
+    #[snafu(display("ETag required for conditional update"))]
+    MissingETag,
 }
 
 impl From<Error> for super::Error {
@@ -80,7 +81,83 @@ impl From<Error> for super::Error {
 /// storage provider.
 #[derive(Debug, Default)]
 pub struct InMemory {
-    storage: StorageType,
+    storage: SharedStorage,
+}
+
+#[derive(Debug, Clone)]
+struct Entry {
+    data: Bytes,
+    last_modified: DateTime<Utc>,
+    e_tag: usize,
+}
+
+impl Entry {
+    fn new(data: Bytes, last_modified: DateTime<Utc>, e_tag: usize) -> Self {
+        Self {
+            data,
+            last_modified,
+            e_tag,
+        }
+    }
+}
+
+#[derive(Debug, Default, Clone)]
+struct Storage {
+    next_etag: usize,
+    map: BTreeMap<Path, Entry>,
+}
+
+type SharedStorage = Arc<RwLock<Storage>>;
+
+impl Storage {
+    fn insert(&mut self, location: &Path, bytes: Bytes) -> usize {
+        let etag = self.next_etag;
+        self.next_etag += 1;
+        let entry = Entry::new(bytes, Utc::now(), etag);
+        self.overwrite(location, entry);
+        etag
+    }
+
+    fn overwrite(&mut self, location: &Path, entry: Entry) {
+        self.map.insert(location.clone(), entry);
+    }
+
+    fn create(&mut self, location: &Path, entry: Entry) -> Result<()> {
+        use std::collections::btree_map;
+        match self.map.entry(location.clone()) {
+            btree_map::Entry::Occupied(_) => Err(Error::AlreadyExists {
+                path: location.to_string(),
+            }
+            .into()),
+            btree_map::Entry::Vacant(v) => {
+                v.insert(entry);
+                Ok(())
+            }
+        }
+    }
+
+    fn update(&mut self, location: &Path, v: UpdateVersion, entry: Entry) -> Result<()> {
+        match self.map.get_mut(location) {
+            // Return Precondition instead of NotFound for consistency with stores
+            None => Err(crate::Error::Precondition {
+                path: location.to_string(),
+                source: format!("Object at location {location} not found").into(),
+            }),
+            Some(e) => {
+                let existing = e.e_tag.to_string();
+                let expected = v.e_tag.context(MissingETagSnafu)?;
+                if existing == expected {
+                    *e = entry;
+                    Ok(())
+                } else {
+                    Err(crate::Error::Precondition {
+                        path: location.to_string(),
+                        source: format!("{existing} does not match {expected}").into(),
+                    })
+                }
+            }
+        }
+    }
 }
 
 impl std::fmt::Display for InMemory {
@@ -91,11 +168,22 @@ impl std::fmt::Display for InMemory {
 
 #[async_trait]
 impl ObjectStore for InMemory {
-    async fn put(&self, location: &Path, bytes: Bytes) -> Result<()> {
-        self.storage
-            .write()
-            .insert(location.clone(), (bytes, Utc::now()));
-        Ok(())
+    async fn put_opts(&self, location: &Path, bytes: Bytes, opts: PutOptions) -> Result<PutResult> {
+        let mut storage = self.storage.write();
+        let etag = storage.next_etag;
+        let entry = Entry::new(bytes, Utc::now(), etag);
+
+        match opts.mode {
+            PutMode::Overwrite => storage.overwrite(location, entry),
+            PutMode::Create => storage.create(location, entry)?,
+            PutMode::Update(v) => storage.update(location, v, entry)?,
+        }
+        storage.next_etag += 1;
+
+        Ok(PutResult {
+            e_tag: Some(etag.to_string()),
+            version: None,
+        })
     }
 
     async fn put_multipart(
@@ -112,49 +200,32 @@ impl ObjectStore for InMemory {
         ))
     }
 
-    async fn abort_multipart(
-        &self,
-        _location: &Path,
-        _multipart_id: &MultipartId,
-    ) -> Result<()> {
+    async fn abort_multipart(&self, _location: &Path, _multipart_id: &MultipartId) -> Result<()> {
         // Nothing to clean up
         Ok(())
     }
 
-    async fn append(
-        &self,
-        location: &Path,
-    ) -> Result<Box<dyn AsyncWrite + Unpin + Send>> {
-        Ok(Box::new(InMemoryAppend {
-            location: location.clone(),
-            data: Vec::<u8>::new(),
-            storage: StorageType::clone(&self.storage),
-        }))
-    }
-
     async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult> {
-        if options.if_match.is_some() || options.if_none_match.is_some() {
-            return Err(super::Error::NotSupported {
-                source: "ETags not supported by InMemory".to_string().into(),
-            });
-        }
-        let (data, last_modified) = self.entry(location).await?;
-        options.check_modified(location, last_modified)?;
+        let entry = self.entry(location).await?;
+        let e_tag = entry.e_tag.to_string();
+
         let meta = ObjectMeta {
             location: location.clone(),
-            last_modified,
-            size: data.len(),
-            e_tag: None,
+            last_modified: entry.last_modified,
+            size: entry.data.len(),
+            e_tag: Some(e_tag),
+            version: None,
         };
+        options.check_preconditions(&meta)?;
 
         let (range, data) = match options.range {
             Some(range) => {
-                let len = data.len();
+                let len = entry.data.len();
                 ensure!(range.end <= len, OutOfRangeSnafu { range, len });
                 ensure!(range.start <= range.end, BadRangeSnafu { range });
-                (range.clone(), data.slice(range))
+                (range.clone(), entry.data.slice(range))
             }
-            None => (0..data.len(), data),
+            None => (0..entry.data.len(), entry.data),
         };
         let stream = futures::stream::once(futures::future::ready(Ok(data)));
 
@@ -165,20 +236,19 @@ impl ObjectStore for InMemory {
         })
     }
 
-    async fn get_ranges(
-        &self,
-        location: &Path,
-        ranges: &[Range<usize>],
-    ) -> Result<Vec<Bytes>> {
-        let data = self.entry(location).await?;
+    async fn get_ranges(&self, location: &Path, ranges: &[Range<usize>]) -> Result<Vec<Bytes>> {
+        let entry = self.entry(location).await?;
         ranges
             .iter()
             .map(|range| {
                 let range = range.clone();
-                let len = data.0.len();
-                ensure!(range.end <= data.0.len(), OutOfRangeSnafu { range, len });
+                let len = entry.data.len();
+                ensure!(
+                    range.end <= entry.data.len(),
+                    OutOfRangeSnafu { range, len }
+                );
                 ensure!(range.start <= range.end, BadRangeSnafu { range });
-                Ok(data.0.slice(range))
+                Ok(entry.data.slice(range))
             })
             .collect()
     }
@@ -188,26 +258,25 @@ impl ObjectStore for InMemory {
 
         Ok(ObjectMeta {
             location: location.clone(),
-            last_modified: entry.1,
-            size: entry.0.len(),
-            e_tag: None,
+            last_modified: entry.last_modified,
+            size: entry.data.len(),
+            e_tag: Some(entry.e_tag.to_string()),
+            version: None,
         })
     }
 
     async fn delete(&self, location: &Path) -> Result<()> {
-        self.storage.write().remove(location);
+        self.storage.write().map.remove(location);
         Ok(())
     }
 
-    async fn list(
-        &self,
-        prefix: Option<&Path>,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
         let root = Path::default();
         let prefix = prefix.unwrap_or(&root);
 
         let storage = self.storage.read();
         let values: Vec<_> = storage
+            .map
             .range((prefix)..)
             .take_while(|(key, _)| key.as_ref().starts_with(prefix.as_ref()))
             .filter(|(key, _)| {
@@ -219,14 +288,15 @@ impl ObjectStore for InMemory {
             .map(|(key, value)| {
                 Ok(ObjectMeta {
                     location: key.clone(),
-                    last_modified: value.1,
-                    size: value.0.len(),
-                    e_tag: None,
+                    last_modified: value.last_modified,
+                    size: value.data.len(),
+                    e_tag: Some(value.e_tag.to_string()),
+                    version: None,
                 })
             })
             .collect();
 
-        Ok(futures::stream::iter(values).boxed())
+        futures::stream::iter(values).boxed()
     }
 
     /// The memory implementation returns all results, as opposed to the cloud
@@ -241,7 +311,7 @@ impl ObjectStore for InMemory {
         // Only objects in this base level should be returned in the
         // response. Otherwise, we just collect the common prefixes.
         let mut objects = vec![];
-        for (k, v) in self.storage.read().range((prefix)..) {
+        for (k, v) in self.storage.read().map.range((prefix)..) {
             if !k.as_ref().starts_with(prefix.as_ref()) {
                 break;
             }
@@ -263,9 +333,10 @@ impl ObjectStore for InMemory {
             } else {
                 let object = ObjectMeta {
                     location: k.clone(),
-                    last_modified: v.1,
-                    size: v.0.len(),
-                    e_tag: None,
+                    last_modified: v.last_modified,
+                    size: v.data.len(),
+                    e_tag: Some(v.e_tag.to_string()),
+                    version: None,
                 };
                 objects.push(object);
             }
@@ -278,23 +349,21 @@ impl ObjectStore for InMemory {
     }
 
     async fn copy(&self, from: &Path, to: &Path) -> Result<()> {
-        let data = self.entry(from).await?;
-        self.storage
-            .write()
-            .insert(to.clone(), (data.0, Utc::now()));
+        let entry = self.entry(from).await?;
+        self.storage.write().insert(to, entry.data);
         Ok(())
     }
 
     async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> {
-        let data = self.entry(from).await?;
+        let entry = self.entry(from).await?;
         let mut storage = self.storage.write();
-        if storage.contains_key(to) {
+        if storage.map.contains_key(to) {
             return Err(Error::AlreadyExists {
                 path: to.to_string(),
             }
             .into());
         }
-        storage.insert(to.clone(), (data.0, Utc::now()));
+        storage.insert(to, entry.data);
         Ok(())
     }
 }
@@ -319,9 +388,10 @@ impl InMemory {
         self.fork()
     }
 
-    async fn entry(&self, location: &Path) -> Result<(Bytes, DateTime<Utc>)> {
+    async fn entry(&self, location: &Path) -> Result<Entry> {
         let storage = self.storage.read();
         let value = storage
+            .map
             .get(location)
             .cloned()
             .context(NoDataInMemorySnafu {
@@ -335,7 +405,7 @@ impl InMemory {
 struct InMemoryUpload {
     location: Path,
     data: Vec<u8>,
-    storage: StorageType,
+    storage: Arc<RwLock<Storage>>,
 }
 
 impl AsyncWrite for InMemoryUpload {
@@ -343,7 +413,7 @@ impl AsyncWrite for InMemoryUpload {
         mut self: Pin<&mut Self>,
         _cx: &mut std::task::Context<'_>,
         buf: &[u8],
-    ) -> std::task::Poll<Result<usize, io::Error>> {
+    ) -> Poll<Result<usize, io::Error>> {
         self.data.extend_from_slice(buf);
         Poll::Ready(Ok(buf.len()))
     }
@@ -351,71 +421,22 @@ impl AsyncWrite for InMemoryUpload {
     fn poll_flush(
         self: Pin<&mut Self>,
         _cx: &mut std::task::Context<'_>,
-    ) -> std::task::Poll<Result<(), io::Error>> {
+    ) -> Poll<Result<(), io::Error>> {
         Poll::Ready(Ok(()))
     }
 
     fn poll_shutdown(
         mut self: Pin<&mut Self>,
         _cx: &mut std::task::Context<'_>,
-    ) -> std::task::Poll<Result<(), io::Error>> {
+    ) -> Poll<Result<(), io::Error>> {
         let data = Bytes::from(std::mem::take(&mut self.data));
-        self.storage
-            .write()
-            .insert(self.location.clone(), (data, Utc::now()));
+        self.storage.write().insert(&self.location, data);
         Poll::Ready(Ok(()))
     }
 }
 
-struct InMemoryAppend {
-    location: Path,
-    data: Vec<u8>,
-    storage: StorageType,
-}
-
-impl AsyncWrite for InMemoryAppend {
-    fn poll_write(
-        mut self: Pin<&mut Self>,
-        _cx: &mut std::task::Context<'_>,
-        buf: &[u8],
-    ) -> std::task::Poll<Result<usize, io::Error>> {
-        self.data.extend_from_slice(buf);
-        Poll::Ready(Ok(buf.len()))
-    }
-
-    fn poll_flush(
-        mut self: Pin<&mut Self>,
-        _cx: &mut std::task::Context<'_>,
-    ) -> std::task::Poll<Result<(), io::Error>> {
-        let storage = StorageType::clone(&self.storage);
-
-        let mut writer = storage.write();
-
-        if let Some((bytes, _)) = writer.remove(&self.location) {
-            let buf = std::mem::take(&mut self.data);
-            let concat = Bytes::from_iter(bytes.into_iter().chain(buf));
-            writer.insert(self.location.clone(), (concat, Utc::now()));
-        } else {
-            writer.insert(
-                self.location.clone(),
-                (Bytes::from(std::mem::take(&mut self.data)), Utc::now()),
-            );
-        };
-        Poll::Ready(Ok(()))
-    }
-
-    fn poll_shutdown(
-        self: Pin<&mut Self>,
-        cx: &mut std::task::Context<'_>,
-    ) -> std::task::Poll<Result<(), io::Error>> {
-        self.poll_flush(cx)
-    }
-}
-
 #[cfg(test)]
 mod tests {
-    use tokio::io::AsyncWriteExt;
-
     use super::*;
 
     use crate::tests::*;
@@ -431,6 +452,7 @@ mod tests {
         rename_and_copy(&integration).await;
         copy_if_not_exists(&integration).await;
         stream_get(&integration).await;
+        put_opts(&integration, true).await;
     }
 
     #[tokio::test]
@@ -502,50 +524,4 @@ mod tests {
             panic!("unexpected error type: {err:?}");
         }
     }
-
-    #[tokio::test]
-    async fn test_append_new() {
-        let in_memory = InMemory::new();
-        let location = Path::from("some_file");
-        let data = Bytes::from("arbitrary data");
-        let expected_data = data.clone();
-
-        let mut writer = in_memory.append(&location).await.unwrap();
-        writer.write_all(&data).await.unwrap();
-        writer.flush().await.unwrap();
-
-        let read_data = in_memory
-            .get(&location)
-            .await
-            .unwrap()
-            .bytes()
-            .await
-            .unwrap();
-        assert_eq!(&*read_data, expected_data);
-    }
-
-    #[tokio::test]
-    async fn test_append_existing() {
-        let in_memory = InMemory::new();
-        let location = Path::from("some_file");
-        let data = Bytes::from("arbitrary");
-        let data_appended = Bytes::from(" data");
-        let expected_data = Bytes::from("arbitrary data");
-
-        let mut writer = in_memory.append(&location).await.unwrap();
-        writer.write_all(&data).await.unwrap();
-        writer.flush().await.unwrap();
-
-        writer.write_all(&data_appended).await.unwrap();
-        writer.flush().await.unwrap();
-
-        let read_data = in_memory
-            .get(&location)
-            .await
-            .unwrap()
-            .bytes()
-            .await
-            .unwrap();
-        assert_eq!(&*read_data, expected_data);
-    }
 }
diff --git a/object_store/src/multipart.rs b/object_store/src/multipart.rs
index d4c911fceab4..1dcd5a6f4960 100644
--- a/object_store/src/multipart.rs
+++ b/object_store/src/multipart.rs
@@ -22,17 +22,18 @@
 //! especially useful when dealing with large files or high-throughput systems.
 
 use async_trait::async_trait;
+use bytes::Bytes;
 use futures::{stream::FuturesUnordered, Future, StreamExt};
 use std::{io, pin::Pin, sync::Arc, task::Poll};
 use tokio::io::AsyncWrite;
 
-use crate::Result;
+use crate::path::Path;
+use crate::{MultipartId, PutResult, Result};
 
 type BoxedTryFuture<T> = Pin<Box<dyn Future<Output = Result<T, io::Error>> + Send>>;
 
-/// A trait that can be implemented by cloud-based object stores
-/// and used in combination with [`WriteMultiPart`] to provide
-/// multipart upload support
+/// A trait used in combination with [`WriteMultiPart`] to implement
+/// [`AsyncWrite`] on top of an API for multipart upload
 #[async_trait]
 pub trait PutPart: Send + Sync + 'static {
     /// Upload a single part
@@ -52,6 +53,9 @@ pub struct PartId {
 }
 
 /// Wrapper around a [`PutPart`] that implements [`AsyncWrite`]
+///
+/// Data will be uploaded in fixed size chunks of 10 MiB in parallel,
+/// up to the configured maximum concurrency
 pub struct WriteMultiPart<T: PutPart> {
     inner: Arc<T>,
     /// A list of completed parts, in sequential order.
@@ -263,3 +267,52 @@ impl<T: PutPart> std::fmt::Debug for WriteMultiPart<T> {
             .finish()
     }
 }
+
+/// A low-level interface for interacting with multipart upload APIs
+///
+/// Most use-cases should prefer [`ObjectStore::put_multipart`] as this is supported by more
+/// backends, including [`LocalFileSystem`], and automatically handles uploading fixed
+/// size parts of sufficient size in parallel
+///
+/// [`ObjectStore::put_multipart`]: crate::ObjectStore::put_multipart
+/// [`LocalFileSystem`]: crate::local::LocalFileSystem
+#[async_trait]
+pub trait MultiPartStore: Send + Sync + 'static {
+    /// Creates a new multipart upload, returning the [`MultipartId`]
+    async fn create_multipart(&self, path: &Path) -> Result<MultipartId>;
+
+    /// Uploads a new part with index `part_idx`
+    ///
+    /// `part_idx` should be an integer in the range `0..N` where `N` is the number of
+    /// parts in the upload. Parts may be uploaded concurrently and in any order.
+    ///
+    /// Most stores require that all parts excluding the last are at least 5 MiB, and some
+    /// further require that all parts excluding the last be the same size, e.g. [R2].
+    /// [`WriteMultiPart`] performs writes in fixed size blocks of 10 MiB, and clients wanting
+    /// to maximise compatibility should look to do likewise.
+    ///
+    /// [R2]: https://developers.cloudflare.com/r2/objects/multipart-objects/#limitations
+    async fn put_part(
+        &self,
+        path: &Path,
+        id: &MultipartId,
+        part_idx: usize,
+        data: Bytes,
+    ) -> Result<PartId>;
+
+    /// Completes a multipart upload
+    ///
+    /// The `i`'th value of `parts` must be a [`PartId`] returned by a call to [`Self::put_part`]
+    /// with a `part_idx` of `i`, and the same `path` and `id` as provided to this method. Calling
+    /// this method with out of sequence or repeated [`PartId`], or [`PartId`] returned for other
+    /// values of `path` or `id`, will result in implementation-defined behaviour
+    async fn complete_multipart(
+        &self,
+        path: &Path,
+        id: &MultipartId,
+        parts: Vec<PartId>,
+    ) -> Result<PutResult>;
+
+    /// Aborts a multipart upload
+    async fn abort_multipart(&self, path: &Path, id: &MultipartId) -> Result<()>;
+}
diff --git a/object_store/src/parse.rs b/object_store/src/parse.rs
index 7b89e58e10e7..ddea034699f0 100644
--- a/object_store/src/parse.rs
+++ b/object_store/src/parse.rs
@@ -47,12 +47,12 @@ impl From<Error> for super::Error {
     }
 }
 
-/// Recognises various URL formats, identifying the relevant [`ObjectStore`](crate::ObjectStore)
+/// Recognises various URL formats, identifying the relevant [`ObjectStore`]
 #[derive(Debug, Eq, PartialEq)]
 enum ObjectStoreScheme {
-    /// Url corresponding to [`LocalFileSystem`](crate::local::LocalFileSystem)
+    /// Url corresponding to [`LocalFileSystem`]
     Local,
-    /// Url corresponding to [`InMemory`](crate::memory::InMemory)
+    /// Url corresponding to [`InMemory`]
     Memory,
     /// Url corresponding to [`AmazonS3`](crate::aws::AmazonS3)
     AmazonS3,
@@ -83,6 +83,8 @@ impl ObjectStoreScheme {
             ("https", Some(host)) => {
                 if host.ends_with("dfs.core.windows.net")
                     || host.ends_with("blob.core.windows.net")
+                    || host.ends_with("dfs.fabric.microsoft.com")
+                    || host.ends_with("blob.fabric.microsoft.com")
                 {
                     (Self::MicrosoftAzure, url.path())
                 } else if host.ends_with("amazonaws.com") {
@@ -99,12 +101,11 @@ impl ObjectStoreScheme {
             _ => return Err(Error::Unrecognised { url: url.clone() }),
         };
 
-        let path = Path::parse(path)?;
-        Ok((scheme, path))
+        Ok((scheme, Path::from_url_path(path)?))
     }
 }
 
-#[cfg(any(feature = "aws", feature = "gcp", feature = "azure", feature = "http"))]
+#[cfg(any(feature = "aws", feature = "gcp", feature = "azure"))]
 macro_rules! builder_opts {
     ($builder:ty, $url:expr, $options:expr) => {{
         let builder = $options.into_iter().fold(
@@ -166,12 +167,7 @@ where
             let url = &url[..url::Position::BeforePath];
             Box::new(crate::http::HttpBuilder::new().with_url(url).build()?) as _
         }
-        #[cfg(not(all(
-            feature = "aws",
-            feature = "azure",
-            feature = "gcp",
-            feature = "http"
-        )))]
+        #[cfg(not(all(feature = "aws", feature = "azure", feature = "gcp", feature = "http")))]
         s => {
             return Err(super::Error::Generic {
                 store: "parse_url",
@@ -240,8 +236,48 @@ mod tests {
                 "gs://bucket/path",
                 (ObjectStoreScheme::GoogleCloudStorage, "path"),
             ),
+            (
+                "gs://test.example.com/path",
+                (ObjectStoreScheme::GoogleCloudStorage, "path"),
+            ),
             ("http://mydomain/path", (ObjectStoreScheme::Http, "path")),
             ("https://mydomain/path", (ObjectStoreScheme::Http, "path")),
+            (
+                "s3://bucket/foo%20bar",
+                (ObjectStoreScheme::AmazonS3, "foo bar"),
+            ),
+            (
+                "https://foo/bar%20baz",
+                (ObjectStoreScheme::Http, "bar baz"),
+            ),
+            (
+                "file:///bar%252Efoo",
+                (ObjectStoreScheme::Local, "bar%2Efoo"),
+            ),
+            (
+                "abfss://file_system@account.dfs.fabric.microsoft.com/",
+                (ObjectStoreScheme::MicrosoftAzure, ""),
+            ),
+            (
+                "abfss://file_system@account.dfs.fabric.microsoft.com/",
+                (ObjectStoreScheme::MicrosoftAzure, ""),
+            ),
+            (
+                "https://account.dfs.fabric.microsoft.com/",
+                (ObjectStoreScheme::MicrosoftAzure, ""),
+            ),
+            (
+                "https://account.dfs.fabric.microsoft.com/container",
+                (ObjectStoreScheme::MicrosoftAzure, "container"),
+            ),
+            (
+                "https://account.blob.fabric.microsoft.com/",
+                (ObjectStoreScheme::MicrosoftAzure, ""),
+            ),
+            (
+                "https://account.blob.fabric.microsoft.com/container",
+                (ObjectStoreScheme::MicrosoftAzure, "container"),
+            ),
         ];
 
         for (s, (expected_scheme, expected_path)) in cases {
@@ -262,4 +298,12 @@ mod tests {
             assert!(ObjectStoreScheme::parse(&url).is_err());
         }
     }
+
+    #[test]
+    fn test_url_spaces() {
+        let url = Url::parse("file:///my file with spaces").unwrap();
+        assert_eq!(url.path(), "/my%20file%20with%20spaces");
+        let (_, path) = parse_url(&url).unwrap();
+        assert_eq!(path.as_ref(), "my file with spaces");
+    }
 }
diff --git a/object_store/src/path/mod.rs b/object_store/src/path/mod.rs
index ab30e0ed04cc..f914862bc53d 100644
--- a/object_store/src/path/mod.rs
+++ b/object_store/src/path/mod.rs
@@ -65,10 +65,23 @@ pub enum Error {
 
 /// A parsed path representation that can be safely written to object storage
 ///
-/// # Path Safety
+/// A [`Path`] maintains the following invariants:
+///
+/// * Paths are delimited by `/`
+/// * Paths do not contain leading or trailing `/`
+/// * Paths do not contain relative path segments, i.e. `.` or `..`
+/// * Paths do not contain empty path segments
+/// * Paths do not contain any ASCII control characters
+///
+/// There are no enforced restrictions on path length, however, it should be noted that most
+/// object stores do not permit paths longer than 1024 bytes, and many filesystems do not
+/// support path segments longer than 255 bytes.
+///
+/// # Encode
 ///
 /// In theory object stores support any UTF-8 character sequence, however, certain character
-/// sequences cause compatibility problems with some applications and protocols. As such the
+/// sequences cause compatibility problems with some applications and protocols. Additionally
+/// some filesystems may impose character restrictions, see [`LocalFileSystem`]. As such the
 /// naming guidelines for [S3], [GCS] and [Azure Blob Storage] all recommend sticking to a
 /// limited character subset.
 ///
@@ -76,34 +89,16 @@ pub enum Error {
 /// [GCS]: https://cloud.google.com/storage/docs/naming-objects
 /// [Azure Blob Storage]: https://docs.microsoft.com/en-us/rest/api/storageservices/Naming-and-Referencing-Containers--Blobs--and-Metadata#blob-names
 ///
-/// This presents libraries with two options for consistent path handling:
-///
-/// 1. Allow constructing unsafe paths, allowing for both reading and writing of data to paths
-/// that may not be consistently understood or supported
-/// 2. Disallow constructing unsafe paths, ensuring data written can be consistently handled by
-/// all other systems, but preventing interaction with objects at unsafe paths
-///
-/// This library takes the second approach, in particular:
-///
-/// * Paths are delimited by `/`
-/// * Paths do not start with a `/`
-/// * Empty path segments are discarded (e.g. `//` is treated as though it were `/`)
-/// * Relative path segments, i.e. `.` and `..` are percent encoded
-/// * Unsafe characters are percent encoded, as described by [RFC 1738]
-/// * All paths are relative to the root of the object store
-///
-/// In order to provide these guarantees there are two ways to safely construct a [`Path`]
-///
-/// # Encode
-///
-/// A string containing potentially illegal path segments can be encoded to a [`Path`]
-/// using [`Path::from`] or [`Path::from_iter`].
+/// A string containing potentially problematic path segments can therefore be encoded to a [`Path`]
+/// using [`Path::from`] or [`Path::from_iter`]. This will percent encode any problematic
+/// segments according to [RFC 1738].
 ///
 /// ```
 /// # use object_store::path::Path;
 /// assert_eq!(Path::from("foo/bar").as_ref(), "foo/bar");
 /// assert_eq!(Path::from("foo//bar").as_ref(), "foo/bar");
 /// assert_eq!(Path::from("foo/../bar").as_ref(), "foo/%2E%2E/bar");
+/// assert_eq!(Path::from("/").as_ref(), "");
 /// assert_eq!(Path::from_iter(["foo", "foo/bar"]).as_ref(), "foo/foo%2Fbar");
 /// ```
 ///
@@ -116,20 +111,20 @@ pub enum Error {
 ///
 /// # Parse
 ///
-/// Alternatively a [`Path`] can be created from an existing string, returning an
-/// error if it is invalid. Unlike the encoding methods, this will permit
-/// valid percent encoded sequences.
+/// Alternatively a [`Path`] can be parsed from an existing string, returning an
+/// error if it is invalid. Unlike the encoding methods above, this will permit
+/// arbitrary unicode, including percent encoded sequences.
 ///
 /// ```
 /// # use object_store::path::Path;
-///
 /// assert_eq!(Path::parse("/foo/foo%2Fbar").unwrap().as_ref(), "foo/foo%2Fbar");
-/// Path::parse("..").unwrap_err();
-/// Path::parse("/foo//").unwrap_err();
-/// Path::parse("😀").unwrap_err();
+/// Path::parse("..").unwrap_err(); // Relative path segments are disallowed
+/// Path::parse("/foo//").unwrap_err(); // Empty path segments are disallowed
+/// Path::parse("\x00").unwrap_err(); // ASCII control characters are disallowed
 /// ```
 ///
 /// [RFC 1738]: https://www.ietf.org/rfc/rfc1738.txt
+/// [`LocalFileSystem`]: crate::local::LocalFileSystem
 #[derive(Debug, Clone, Default, PartialEq, Eq, Hash, Ord, PartialOrd)]
 pub struct Path {
     /// The raw path with no leading or trailing delimiters
@@ -168,9 +163,7 @@ impl Path {
     /// as defined on the docstring for [`Path`] or does not exist
     ///
     /// Note: this will canonicalize the provided path, resolving any symlinks
-    pub fn from_filesystem_path(
-        path: impl AsRef<std::path::Path>,
-    ) -> Result<Self, Error> {
+    pub fn from_filesystem_path(path: impl AsRef<std::path::Path>) -> Result<Self, Error> {
         let absolute = std::fs::canonicalize(&path).context(CanonicalizeSnafu {
             path: path.as_ref(),
         })?;
@@ -199,12 +192,14 @@ impl Path {
     ) -> Result<Self, Error> {
         let url = absolute_path_to_url(path)?;
         let path = match base {
-            Some(prefix) => url.path().strip_prefix(prefix.path()).ok_or_else(|| {
-                Error::PrefixMismatch {
-                    path: url.path().to_string(),
-                    prefix: prefix.to_string(),
-                }
-            })?,
+            Some(prefix) => {
+                url.path()
+                    .strip_prefix(prefix.path())
+                    .ok_or_else(|| Error::PrefixMismatch {
+                        path: url.path().to_string(),
+                        prefix: prefix.to_string(),
+                    })?
+            }
             None => url.path(),
         };
 
@@ -236,7 +231,7 @@ impl Path {
     pub fn filename(&self) -> Option<&str> {
         match self.raw.is_empty() {
             true => None,
-            false => self.raw.split(DELIMITER).last(),
+            false => self.raw.rsplit(DELIMITER).next(),
         }
     }
 
@@ -256,10 +251,7 @@ impl Path {
     /// Returns an iterator of the [`PathPart`] of this [`Path`] after `prefix`
     ///
     /// Returns `None` if the prefix does not match
-    pub fn prefix_match(
-        &self,
-        prefix: &Self,
-    ) -> Option<impl Iterator<Item = PathPart<'_>> + '_> {
+    pub fn prefix_match(&self, prefix: &Self) -> Option<impl Iterator<Item = PathPart<'_>> + '_> {
         let mut stripped = self.raw.strip_prefix(&prefix.raw)?;
         if !stripped.is_empty() && !prefix.raw.is_empty() {
             stripped = stripped.strip_prefix(DELIMITER)?;
@@ -333,9 +325,7 @@ where
 
 #[cfg(not(target_arch = "wasm32"))]
 /// Given an absolute filesystem path convert it to a URL representation without canonicalization
-pub(crate) fn absolute_path_to_url(
-    path: impl AsRef<std::path::Path>,
-) -> Result<Url, Error> {
+pub(crate) fn absolute_path_to_url(path: impl AsRef<std::path::Path>) -> Result<Url, Error> {
     Url::from_file_path(&path).map_err(|_| Error::InvalidPath {
         path: path.as_ref().into(),
     })
@@ -498,8 +488,7 @@ mod tests {
 
     #[test]
     fn prefix_matches_with_file_name() {
-        let haystack =
-            Path::from_iter(["foo/bar", "baz%2Ftest", "something", "foo.segment"]);
+        let haystack = Path::from_iter(["foo/bar", "baz%2Ftest", "something", "foo.segment"]);
 
         // All directories match and file name is a prefix
         let needle = Path::from_iter(["foo/bar", "baz%2Ftest", "something", "foo"]);
diff --git a/object_store/src/path/parts.rs b/object_store/src/path/parts.rs
index 9da4815712db..df7097cbe9db 100644
--- a/object_store/src/path/parts.rs
+++ b/object_store/src/path/parts.rs
@@ -37,8 +37,10 @@ pub struct InvalidPart {
 /// The PathPart type exists to validate the directory/file names that form part
 /// of a path.
 ///
-/// A PathPart instance is guaranteed to to contain no illegal characters (e.g. `/`)
-/// as it can only be constructed by going through the `from` impl.
+/// A [`PathPart`] is guaranteed to:
+///
+/// * Contain no ASCII control characters or `/`
+/// * Not be a relative path segment, i.e. `.` or `..`
 #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)]
 pub struct PathPart<'a> {
     pub(super) raw: Cow<'a, str>,
@@ -54,19 +56,12 @@ impl<'a> PathPart<'a> {
             });
         }
 
-        for (idx, b) in segment.as_bytes().iter().cloned().enumerate() {
-            // A percent character is always valid, even if not
-            // followed by a valid 2-digit hex code
-            // https://url.spec.whatwg.org/#percent-encoded-bytes
-            if b == b'%' {
-                continue;
-            }
-
-            if !b.is_ascii() || should_percent_encode(b) {
+        for c in segment.chars() {
+            if c.is_ascii_control() || c == '/' {
                 return Err(InvalidPart {
                     segment: segment.to_string(),
                     // This is correct as only single byte characters up to this point
-                    illegal: segment.chars().nth(idx).unwrap().to_string(),
+                    illegal: c.to_string(),
                 });
             }
         }
@@ -77,10 +72,6 @@ impl<'a> PathPart<'a> {
     }
 }
 
-fn should_percent_encode(c: u8) -> bool {
-    percent_encode(&[c], INVALID).next().unwrap().len() != 1
-}
-
 /// Characters we want to encode.
 const INVALID: &AsciiSet = &CONTROLS
     // The delimiter we are reserving for internal hierarchy
diff --git a/object_store/src/prefix.rs b/object_store/src/prefix.rs
index 39585f73b692..38f9b07bbd05 100644
--- a/object_store/src/prefix.rs
+++ b/object_store/src/prefix.rs
@@ -23,7 +23,8 @@ use tokio::io::AsyncWrite;
 
 use crate::path::Path;
 use crate::{
-    GetOptions, GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore, Result,
+    GetOptions, GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore, PutOptions, PutResult,
+    Result,
 };
 
 #[doc(hidden)]
@@ -73,17 +74,23 @@ impl<T: ObjectStore> PrefixStore<T> {
             size: meta.size,
             location: self.strip_prefix(meta.location),
             e_tag: meta.e_tag,
+            version: None,
         }
     }
 }
 
 #[async_trait::async_trait]
 impl<T: ObjectStore> ObjectStore for PrefixStore<T> {
-    async fn put(&self, location: &Path, bytes: Bytes) -> Result<()> {
+    async fn put(&self, location: &Path, bytes: Bytes) -> Result<PutResult> {
         let full_path = self.full_path(location);
         self.inner.put(&full_path, bytes).await
     }
 
+    async fn put_opts(&self, location: &Path, bytes: Bytes, opts: PutOptions) -> Result<PutResult> {
+        let full_path = self.full_path(location);
+        self.inner.put_opts(&full_path, bytes, opts).await
+    }
+
     async fn put_multipart(
         &self,
         location: &Path,
@@ -92,23 +99,10 @@ impl<T: ObjectStore> ObjectStore for PrefixStore<T> {
         self.inner.put_multipart(&full_path).await
     }
 
-    async fn abort_multipart(
-        &self,
-        location: &Path,
-        multipart_id: &MultipartId,
-    ) -> Result<()> {
+    async fn abort_multipart(&self, location: &Path, multipart_id: &MultipartId) -> Result<()> {
         let full_path = self.full_path(location);
         self.inner.abort_multipart(&full_path, multipart_id).await
     }
-
-    async fn append(
-        &self,
-        location: &Path,
-    ) -> Result<Box<dyn AsyncWrite + Unpin + Send>> {
-        let full_path = self.full_path(location);
-        self.inner.append(&full_path).await
-    }
-
     async fn get(&self, location: &Path) -> Result<GetResult> {
         let full_path = self.full_path(location);
         self.inner.get(&full_path).await
@@ -124,11 +118,7 @@ impl<T: ObjectStore> ObjectStore for PrefixStore<T> {
         self.inner.get_opts(&full_path, options).await
     }
 
-    async fn get_ranges(
-        &self,
-        location: &Path,
-        ranges: &[Range<usize>],
-    ) -> Result<Vec<Bytes>> {
+    async fn get_ranges(&self, location: &Path, ranges: &[Range<usize>]) -> Result<Vec<Bytes>> {
         let full_path = self.full_path(location);
         self.inner.get_ranges(&full_path, ranges).await
     }
@@ -144,24 +134,21 @@ impl<T: ObjectStore> ObjectStore for PrefixStore<T> {
         self.inner.delete(&full_path).await
     }
 
-    async fn list(
-        &self,
-        prefix: Option<&Path>,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
         let prefix = self.full_path(prefix.unwrap_or(&Path::default()));
-        let s = self.inner.list(Some(&prefix)).await?;
-        Ok(s.map_ok(|meta| self.strip_meta(meta)).boxed())
+        let s = self.inner.list(Some(&prefix));
+        s.map_ok(|meta| self.strip_meta(meta)).boxed()
     }
 
-    async fn list_with_offset(
+    fn list_with_offset(
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
+    ) -> BoxStream<'_, Result<ObjectMeta>> {
         let offset = self.full_path(offset);
         let prefix = self.full_path(prefix.unwrap_or(&Path::default()));
-        let s = self.inner.list_with_offset(Some(&prefix), &offset).await?;
-        Ok(s.map_ok(|meta| self.strip_meta(meta)).boxed())
+        let s = self.inner.list_with_offset(Some(&prefix), &offset);
+        s.map_ok(|meta| self.strip_meta(meta)).boxed()
     }
 
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
diff --git a/object_store/src/signer.rs b/object_store/src/signer.rs
new file mode 100644
index 000000000000..ed92e28799e5
--- /dev/null
+++ b/object_store/src/signer.rs
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Abstraction of signed URL generation for those object store implementations that support it
+
+use crate::{path::Path, Result};
+use async_trait::async_trait;
+use reqwest::Method;
+use std::{fmt, time::Duration};
+use url::Url;
+
+/// Universal API to generate presigned URLs from multiple object store services.
+#[async_trait]
+pub trait Signer: Send + Sync + fmt::Debug + 'static {
+    /// Given the intended [`Method`] and [`Path`] to use and the desired length of time for which
+    /// the URL should be valid, return a signed [`Url`] created with the object store
+    /// implementation's credentials such that the URL can be handed to something that doesn't have
+    /// access to the object store's credentials, to allow limited access to the object store.
+    async fn signed_url(&self, method: Method, path: &Path, expires_in: Duration) -> Result<Url>;
+}
diff --git a/object_store/src/tags.rs b/object_store/src/tags.rs
new file mode 100644
index 000000000000..fa6e5913f4b1
--- /dev/null
+++ b/object_store/src/tags.rs
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use url::form_urlencoded::Serializer;
+
+/// A collection of key value pairs used to annotate objects
+///
+/// <https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-tagging.html>
+/// <https://learn.microsoft.com/en-us/rest/api/storageservices/set-blob-tags>
+#[derive(Debug, Clone, Default, Eq, PartialEq)]
+pub struct TagSet(String);
+
+impl TagSet {
+    /// Append a key value pair to this [`TagSet`]
+    ///
+    /// Stores have different restrictions on what characters are permitted,
+    /// for portability it is recommended applications use no more than 10 tags,
+    /// and stick to alphanumeric characters, and `+ - = . _ : /`
+    ///
+    /// <https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObjectTagging.html>
+    /// <https://learn.microsoft.com/en-us/rest/api/storageservices/set-blob-tags?tabs=azure-ad#request-body>
+    pub fn push(&mut self, key: &str, value: &str) {
+        Serializer::new(&mut self.0).append_pair(key, value);
+    }
+
+    /// Return this [`TagSet`] as a URL-encoded string
+    pub fn encoded(&self) -> &str {
+        &self.0
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_tag_set() {
+        let mut set = TagSet::default();
+        set.push("test/foo", "value sdlks");
+        set.push("foo", " sdf _ /+./sd");
+        assert_eq!(
+            set.encoded(),
+            "test%2Ffoo=value+sdlks&foo=+sdf+_+%2F%2B.%2Fsd"
+        );
+    }
+}
diff --git a/object_store/src/throttle.rs b/object_store/src/throttle.rs
index 58c476ab4530..252256a4599e 100644
--- a/object_store/src/throttle.rs
+++ b/object_store/src/throttle.rs
@@ -21,7 +21,8 @@ use std::ops::Range;
 use std::{convert::TryInto, sync::Arc};
 
 use crate::{
-    path::Path, GetResult, GetResultPayload, ListResult, ObjectMeta, ObjectStore, Result,
+    path::Path, GetResult, GetResultPayload, ListResult, ObjectMeta, ObjectStore, PutOptions,
+    PutResult, Result,
 };
 use crate::{GetOptions, MultipartId};
 use async_trait::async_trait;
@@ -147,12 +148,16 @@ impl<T: ObjectStore> std::fmt::Display for ThrottledStore<T> {
 
 #[async_trait]
 impl<T: ObjectStore> ObjectStore for ThrottledStore<T> {
-    async fn put(&self, location: &Path, bytes: Bytes) -> Result<()> {
+    async fn put(&self, location: &Path, bytes: Bytes) -> Result<PutResult> {
         sleep(self.config().wait_put_per_call).await;
-
         self.inner.put(location, bytes).await
     }
 
+    async fn put_opts(&self, location: &Path, bytes: Bytes, opts: PutOptions) -> Result<PutResult> {
+        sleep(self.config().wait_put_per_call).await;
+        self.inner.put_opts(location, bytes, opts).await
+    }
+
     async fn put_multipart(
         &self,
         _location: &Path,
@@ -160,18 +165,7 @@ impl<T: ObjectStore> ObjectStore for ThrottledStore<T> {
         Err(super::Error::NotImplemented)
     }
 
-    async fn abort_multipart(
-        &self,
-        _location: &Path,
-        _multipart_id: &MultipartId,
-    ) -> Result<()> {
-        Err(super::Error::NotImplemented)
-    }
-
-    async fn append(
-        &self,
-        _location: &Path,
-    ) -> Result<Box<dyn AsyncWrite + Unpin + Send>> {
+    async fn abort_multipart(&self, _location: &Path, _multipart_id: &MultipartId) -> Result<()> {
         Err(super::Error::NotImplemented)
     }
 
@@ -198,19 +192,15 @@ impl<T: ObjectStore> ObjectStore for ThrottledStore<T> {
     async fn get_range(&self, location: &Path, range: Range<usize>) -> Result<Bytes> {
         let config = self.config();
 
-        let sleep_duration = config.wait_get_per_call
-            + config.wait_get_per_byte * (range.end - range.start) as u32;
+        let sleep_duration =
+            config.wait_get_per_call + config.wait_get_per_byte * (range.end - range.start) as u32;
 
         sleep(sleep_duration).await;
 
         self.inner.get_range(location, range).await
     }
 
-    async fn get_ranges(
-        &self,
-        location: &Path,
-        ranges: &[Range<usize>],
-    ) -> Result<Vec<Bytes>> {
+    async fn get_ranges(&self, location: &Path, ranges: &[Range<usize>]) -> Result<Vec<Bytes>> {
         let config = self.config();
 
         let total_bytes: usize = ranges.iter().map(|range| range.end - range.start).sum();
@@ -233,29 +223,30 @@ impl<T: ObjectStore> ObjectStore for ThrottledStore<T> {
         self.inner.delete(location).await
     }
 
-    async fn list(
-        &self,
-        prefix: Option<&Path>,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
-        sleep(self.config().wait_list_per_call).await;
-
-        // need to copy to avoid moving / referencing `self`
-        let wait_list_per_entry = self.config().wait_list_per_entry;
-        let stream = self.inner.list(prefix).await?;
-        Ok(throttle_stream(stream, move |_| wait_list_per_entry))
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+        let stream = self.inner.list(prefix);
+        futures::stream::once(async move {
+            let wait_list_per_entry = self.config().wait_list_per_entry;
+            sleep(self.config().wait_list_per_call).await;
+            throttle_stream(stream, move |_| wait_list_per_entry)
+        })
+        .flatten()
+        .boxed()
     }
 
-    async fn list_with_offset(
+    fn list_with_offset(
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> Result<BoxStream<'_, Result<ObjectMeta>>> {
-        sleep(self.config().wait_list_per_call).await;
-
-        // need to copy to avoid moving / referencing `self`
-        let wait_list_per_entry = self.config().wait_list_per_entry;
-        let stream = self.inner.list_with_offset(prefix, offset).await?;
-        Ok(throttle_stream(stream, move |_| wait_list_per_entry))
+    ) -> BoxStream<'_, Result<ObjectMeta>> {
+        let stream = self.inner.list_with_offset(prefix, offset);
+        futures::stream::once(async move {
+            let wait_list_per_entry = self.config().wait_list_per_entry;
+            sleep(self.config().wait_list_per_call).await;
+            throttle_stream(stream, move |_| wait_list_per_entry)
+        })
+        .flatten()
+        .boxed()
     }
 
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
@@ -264,8 +255,7 @@ impl<T: ObjectStore> ObjectStore for ThrottledStore<T> {
         match self.inner.list_with_delimiter(prefix).await {
             Ok(list_result) => {
                 let entries_len = usize_to_u32_saturate(list_result.objects.len());
-                sleep(self.config().wait_list_with_delimiter_per_entry * entries_len)
-                    .await;
+                sleep(self.config().wait_list_with_delimiter_per_entry * entries_len).await;
                 Ok(list_result)
             }
             Err(err) => Err(err),
@@ -485,10 +475,7 @@ mod tests {
         assert_bounds!(measure_put(&store, 0).await, 0);
     }
 
-    async fn place_test_object(
-        store: &ThrottledStore<InMemory>,
-        n_bytes: Option<usize>,
-    ) -> Path {
+    async fn place_test_object(store: &ThrottledStore<InMemory>, n_bytes: Option<usize>) -> Path {
         let path = Path::from("foo");
 
         if let Some(n_bytes) = n_bytes {
@@ -504,20 +491,11 @@ mod tests {
     }
 
     #[allow(dead_code)]
-    async fn place_test_objects(
-        store: &ThrottledStore<InMemory>,
-        n_entries: usize,
-    ) -> Path {
+    async fn place_test_objects(store: &ThrottledStore<InMemory>, n_entries: usize) -> Path {
         let prefix = Path::from("foo");
 
         // clean up store
-        let entries: Vec<_> = store
-            .list(Some(&prefix))
-            .await
-            .unwrap()
-            .try_collect()
-            .await
-            .unwrap();
+        let entries: Vec<_> = store.list(Some(&prefix)).try_collect().await.unwrap();
 
         for entry in entries {
             store.delete(&entry.location).await.unwrap();
@@ -534,10 +512,7 @@ mod tests {
         prefix
     }
 
-    async fn measure_delete(
-        store: &ThrottledStore<InMemory>,
-        n_bytes: Option<usize>,
-    ) -> Duration {
+    async fn measure_delete(store: &ThrottledStore<InMemory>, n_bytes: Option<usize>) -> Duration {
         let path = place_test_object(store, n_bytes).await;
 
         let t0 = Instant::now();
@@ -547,10 +522,7 @@ mod tests {
     }
 
     #[allow(dead_code)]
-    async fn measure_get(
-        store: &ThrottledStore<InMemory>,
-        n_bytes: Option<usize>,
-    ) -> Duration {
+    async fn measure_get(store: &ThrottledStore<InMemory>, n_bytes: Option<usize>) -> Duration {
         let path = place_test_object(store, n_bytes).await;
 
         let t0 = Instant::now();
@@ -574,17 +546,12 @@ mod tests {
     }
 
     #[allow(dead_code)]
-    async fn measure_list(
-        store: &ThrottledStore<InMemory>,
-        n_entries: usize,
-    ) -> Duration {
+    async fn measure_list(store: &ThrottledStore<InMemory>, n_entries: usize) -> Duration {
         let prefix = place_test_objects(store, n_entries).await;
 
         let t0 = Instant::now();
         store
             .list(Some(&prefix))
-            .await
-            .unwrap()
             .try_collect::<Vec<_>>()
             .await
             .unwrap();
diff --git a/object_store/src/util.rs b/object_store/src/util.rs
index 07d3ed44ca16..fd86ba7366b0 100644
--- a/object_store/src/util.rs
+++ b/object_store/src/util.rs
@@ -32,23 +32,22 @@ where
     D: serde::Deserializer<'de>,
 {
     let s: String = serde::Deserialize::deserialize(deserializer)?;
-    chrono::TimeZone::datetime_from_str(&chrono::Utc, &s, RFC1123_FMT)
-        .map_err(serde::de::Error::custom)
+    let naive =
+        chrono::NaiveDateTime::parse_from_str(&s, RFC1123_FMT).map_err(serde::de::Error::custom)?;
+    Ok(chrono::TimeZone::from_utc_datetime(&chrono::Utc, &naive))
 }
 
 #[cfg(any(feature = "aws", feature = "azure"))]
-pub(crate) fn hmac_sha256(
-    secret: impl AsRef<[u8]>,
-    bytes: impl AsRef<[u8]>,
-) -> ring::hmac::Tag {
+pub(crate) fn hmac_sha256(secret: impl AsRef<[u8]>, bytes: impl AsRef<[u8]>) -> ring::hmac::Tag {
     let key = ring::hmac::Key::new(ring::hmac::HMAC_SHA256, secret.as_ref());
     ring::hmac::sign(&key, bytes.as_ref())
 }
 
 /// Collect a stream into [`Bytes`] avoiding copying in the event of a single chunk
-pub async fn collect_bytes<S>(mut stream: S, size_hint: Option<usize>) -> Result<Bytes>
+pub async fn collect_bytes<S, E>(mut stream: S, size_hint: Option<usize>) -> Result<Bytes, E>
 where
-    S: Stream<Item = Result<Bytes>> + Send + Unpin,
+    E: Send,
+    S: Stream<Item = Result<Bytes, E>> + Send + Unpin,
 {
     let first = stream.next().await.transpose()?.unwrap_or_default();
 
@@ -98,14 +97,15 @@ pub const OBJECT_STORE_COALESCE_PARALLEL: usize = 10;
 /// * Combine ranges less than `coalesce` bytes apart into a single call to `fetch`
 /// * Make multiple `fetch` requests in parallel (up to maximum of 10)
 ///
-pub async fn coalesce_ranges<F, Fut>(
+pub async fn coalesce_ranges<F, E, Fut>(
     ranges: &[std::ops::Range<usize>],
     fetch: F,
     coalesce: usize,
-) -> Result<Vec<Bytes>>
+) -> Result<Vec<Bytes>, E>
 where
     F: Send + FnMut(std::ops::Range<usize>) -> Fut,
-    Fut: std::future::Future<Output = Result<Bytes>> + Send,
+    E: Send,
+    Fut: std::future::Future<Output = Result<Bytes, E>> + Send,
 {
     let fetch_ranges = merge_ranges(ranges, coalesce);
 
@@ -130,10 +130,7 @@ where
 }
 
 /// Returns a sorted list of ranges that cover `ranges`
-fn merge_ranges(
-    ranges: &[std::ops::Range<usize>],
-    coalesce: usize,
-) -> Vec<std::ops::Range<usize>> {
+fn merge_ranges(ranges: &[std::ops::Range<usize>], coalesce: usize) -> Vec<std::ops::Range<usize>> {
     if ranges.is_empty() {
         return vec![];
     }
@@ -172,6 +169,8 @@ fn merge_ranges(
 
 #[cfg(test)]
 mod tests {
+    use crate::Error;
+
     use super::*;
     use rand::{thread_rng, Rng};
     use std::ops::Range;
@@ -184,7 +183,7 @@ mod tests {
         let src: Vec<_> = (0..max).map(|x| x as u8).collect();
 
         let mut fetches = vec![];
-        let coalesced = coalesce_ranges(
+        let coalesced = coalesce_ranges::<_, Error, _>(
             &ranges,
             |range| {
                 fetches.push(range.clone());
diff --git a/object_store/tests/get_range_file.rs b/object_store/tests/get_range_file.rs
index f926e3b07f2a..85231a5a5b9b 100644
--- a/object_store/tests/get_range_file.rs
+++ b/object_store/tests/get_range_file.rs
@@ -22,9 +22,7 @@ use bytes::Bytes;
 use futures::stream::BoxStream;
 use object_store::local::LocalFileSystem;
 use object_store::path::Path;
-use object_store::{
-    GetOptions, GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore,
-};
+use object_store::*;
 use std::fmt::Formatter;
 use tempfile::tempdir;
 use tokio::io::AsyncWrite;
@@ -40,60 +38,42 @@ impl std::fmt::Display for MyStore {
 
 #[async_trait]
 impl ObjectStore for MyStore {
-    async fn put(&self, path: &Path, data: Bytes) -> object_store::Result<()> {
-        self.0.put(path, data).await
+    async fn put_opts(&self, path: &Path, data: Bytes, opts: PutOptions) -> Result<PutResult> {
+        self.0.put_opts(path, data, opts).await
     }
 
     async fn put_multipart(
         &self,
         _: &Path,
-    ) -> object_store::Result<(MultipartId, Box<dyn AsyncWrite + Unpin + Send>)> {
+    ) -> Result<(MultipartId, Box<dyn AsyncWrite + Unpin + Send>)> {
         todo!()
     }
 
-    async fn abort_multipart(
-        &self,
-        _: &Path,
-        _: &MultipartId,
-    ) -> object_store::Result<()> {
+    async fn abort_multipart(&self, _: &Path, _: &MultipartId) -> Result<()> {
         todo!()
     }
 
-    async fn get_opts(
-        &self,
-        location: &Path,
-        options: GetOptions,
-    ) -> object_store::Result<GetResult> {
+    async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult> {
         self.0.get_opts(location, options).await
     }
 
-    async fn head(&self, _: &Path) -> object_store::Result<ObjectMeta> {
-        todo!()
-    }
-
-    async fn delete(&self, _: &Path) -> object_store::Result<()> {
+    async fn delete(&self, _: &Path) -> Result<()> {
         todo!()
     }
 
-    async fn list(
-        &self,
-        _: Option<&Path>,
-    ) -> object_store::Result<BoxStream<'_, object_store::Result<ObjectMeta>>> {
+    fn list(&self, _: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
         todo!()
     }
 
-    async fn list_with_delimiter(
-        &self,
-        _: Option<&Path>,
-    ) -> object_store::Result<ListResult> {
+    async fn list_with_delimiter(&self, _: Option<&Path>) -> Result<ListResult> {
         todo!()
     }
 
-    async fn copy(&self, _: &Path, _: &Path) -> object_store::Result<()> {
+    async fn copy(&self, _: &Path, _: &Path) -> Result<()> {
         todo!()
     }
 
-    async fn copy_if_not_exists(&self, _: &Path, _: &Path) -> object_store::Result<()> {
+    async fn copy_if_not_exists(&self, _: &Path, _: &Path) -> Result<()> {
         todo!()
     }
 }
diff --git a/parquet-testing b/parquet-testing
index aafd3fc9df43..89b685a64c31 160000
--- a/parquet-testing
+++ b/parquet-testing
@@ -1 +1 @@
-Subproject commit aafd3fc9df431c2625a514fb46626e5614f1d199
+Subproject commit 89b685a64c3117b3023d8684af1f41400841db71
diff --git a/parquet/CONTRIBUTING.md b/parquet/CONTRIBUTING.md
index 903126d9f4f8..922332b15d64 100644
--- a/parquet/CONTRIBUTING.md
+++ b/parquet/CONTRIBUTING.md
@@ -57,15 +57,13 @@ Run `cargo bench` for benchmarks.
 
 ## Docs
 
-To build documentation, run `cargo doc --no-deps`.
-To compile and view in the browser, run `cargo doc --no-deps --open`.
+To build documentation, run `cargo doc --no-deps --all-features`.
+To compile and view in the browser, run `cargo doc --no-deps --all-features --open`.
 
-## Update Parquet Format
+Before submitting a pull request, run `cargo fmt --all` to format the change.
 
-To generate the parquet format (thrift definitions) code run from the repository root run
+## Update Parquet Format
 
-```
-$ docker run -v $(pwd):/thrift/src -it archlinux pacman -Sy --noconfirm thrift  && wget https://raw.githubusercontent.com/apache/parquet-format/apache-parquet-format-2.9.0/src/main/thrift/parquet.thrift -O /tmp/parquet.thrift && thrift --gen rs /tmp/parquet.thrift && sed -i '/use thrift::server::TProcessor;/d' parquet.rs && mv parquet.rs parquet/src/format.rs
-```
+To generate the parquet format (thrift definitions) code run [`./regen.sh`](./regen.sh).
 
 You may need to manually patch up doc comments that contain unescaped `[]`
diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index c4f3696b43c9..4cd03c051e62 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -44,16 +44,16 @@ arrow-schema = { workspace = true, optional = true }
 arrow-select = { workspace = true, optional = true }
 arrow-ipc = { workspace = true, optional = true }
 # Intentionally not a path dependency as object_store is released separately
-object_store = { version = "0.7", default-features = false, optional = true }
+object_store = { version = "0.8", default-features = false, optional = true }
 
 bytes = { version = "1.1", default-features = false, features = ["std"] }
 thrift = { version = "0.17", default-features = false }
 snap = { version = "1.0", default-features = false, optional = true }
 brotli = { version = "3.3", default-features = false, features = ["std"], optional = true }
 flate2 = { version = "1.0", default-features = false, features = ["rust_backend"], optional = true }
-lz4 = { version = "1.23", default-features = false, optional = true }
-zstd = { version = "0.12.0", optional = true, default-features = false }
-chrono = { version = "0.4.23", default-features = false, features = ["alloc"] }
+lz4_flex = { version = "0.11", default-features = false, features = ["std", "frame"], optional = true }
+zstd = { version = "0.13.0", optional = true, default-features = false }
+chrono = { workspace = true }
 num = { version = "0.4", default-features = false }
 num-bigint = { version = "0.4", default-features = false }
 base64 = { version = "0.21", default-features = false, features = ["std", ], optional = true }
@@ -66,6 +66,7 @@ tokio = { version = "1.0", optional = true, default-features = false, features =
 hashbrown = { version = "0.14", default-features = false }
 twox-hash = { version = "1.6", default-features = false }
 paste = { version = "1.0" }
+half = { version = "2.1", default-features = false, features = ["num-traits"] }
 
 [dev-dependencies]
 base64 = { version = "0.21", default-features = false, features = ["std"] }
@@ -74,18 +75,21 @@ snap = { version = "1.0", default-features = false }
 tempfile = { version = "3.0", default-features = false }
 brotli = { version = "3.3", default-features = false, features = ["std"] }
 flate2 = { version = "1.0", default-features = false, features = ["rust_backend"] }
-lz4 = { version = "1.23", default-features = false }
-zstd = { version = "0.12", default-features = false }
+lz4_flex = { version = "0.11", default-features = false, features = ["std", "frame"] }
+zstd = { version = "0.13", default-features = false }
 serde_json = { version = "1.0", features = ["std"], default-features = false }
 arrow = { workspace = true, features = ["ipc", "test_utils", "prettyprint", "json"] }
 tokio = { version = "1.0", default-features = false, features = ["macros", "rt", "io-util", "fs"] }
 rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] }
+object_store = { version = "0.8", default-features = false, features = ["azure"] }
 
 [package.metadata.docs.rs]
 all-features = true
 
 [features]
 default = ["arrow", "snap", "brotli", "flate2", "lz4", "zstd", "base64"]
+# Enable lz4
+lz4 = ["lz4_flex"]
 # Enable arrow reader/writer APIs
 arrow = ["base64", "arrow-array", "arrow-buffer", "arrow-cast", "arrow-data", "arrow-schema", "arrow-select", "arrow-ipc"]
 # Enable CLI tools
@@ -166,5 +170,15 @@ name = "arrow_reader"
 required-features = ["arrow", "test_common", "experimental"]
 harness = false
 
+[[bench]]
+name = "compression"
+required-features = ["experimental", "default"]
+harness = false
+
+
+[[bench]]
+name = "metadata"
+harness = false
+
 [lib]
 bench = false
diff --git a/parquet/README.md b/parquet/README.md
index 86c7ee2c35d0..9de7aec4e59a 100644
--- a/parquet/README.md
+++ b/parquet/README.md
@@ -55,7 +55,7 @@ The `parquet` crate provides the following features which may be enabled in your
 
 ## Parquet Feature Status
 
-- [x] All encodings supported
+- [x] All encodings supported, except for BYTE_STREAM_SPLIT ([#4102](https://github.com/apache/arrow-rs/issues/4102))
 - [x] All compression codecs supported
 - [x] Read support
   - [x] Primitive column value readers
@@ -71,14 +71,6 @@ The `parquet` crate provides the following features which may be enabled in your
 - [x] Predicate pushdown
 - [x] Parquet format 4.0.0 support
 
-## Support for `wasm32-unknown-unknown` target
-
-It's possible to build `parquet` for the `wasm32-unknown-unknown` target, however not all the compression features are currently unsupported due to issues with the upstream crates. In particular, the `zstd` and `lz4` features may have compilation issues. See issue [#180](https://github.com/apache/arrow-rs/issues/180).
-
-```
-cargo build -p parquet --target wasm32-unknown-unknown --no-default-features --features cli,snap,flate2,brotli
-```
-
 ## License
 
 Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0.
diff --git a/parquet/benches/arrow_reader.rs b/parquet/benches/arrow_reader.rs
index 825c7f00f905..7eed86d2826e 100644
--- a/parquet/benches/arrow_reader.rs
+++ b/parquet/benches/arrow_reader.rs
@@ -174,8 +174,7 @@ where
                     max_def_level
                 };
                 if def_level == max_def_level {
-                    let value =
-                        FromPrimitive::from_usize(rng.gen_range(min..max)).unwrap();
+                    let value = FromPrimitive::from_usize(rng.gen_range(min..max)).unwrap();
                     values.push(value);
                 }
                 def_levels.push(def_level);
@@ -283,10 +282,8 @@ fn build_plain_encoded_string_page_iterator(
                     max_def_level
                 };
                 if def_level == max_def_level {
-                    let string_value =
-                        format!("Test value {k}, row group: {i}, page: {j}");
-                    values
-                        .push(parquet::data_type::ByteArray::from(string_value.as_str()));
+                    let string_value = format!("Test value {k}, row group: {i}, page: {j}");
+                    values.push(parquet::data_type::ByteArray::from(string_value.as_str()));
                 }
                 def_levels.push(def_level);
             }
@@ -334,8 +331,7 @@ fn build_dictionary_encoded_string_page_iterator(
                 };
                 if def_level == max_def_level {
                     // select random value from list of unique values
-                    let string_value =
-                        unique_values[rng.gen_range(0..NUM_UNIQUE_VALUES)].as_str();
+                    let string_value = unique_values[rng.gen_range(0..NUM_UNIQUE_VALUES)].as_str();
                     values.push(parquet::data_type::ByteArray::from(string_value));
                 }
                 def_levels.push(def_level);
@@ -383,8 +379,7 @@ fn build_string_list_page_iterator(
         let mut column_chunk_pages = Vec::new();
         for j in 0..PAGES_PER_GROUP {
             // generate page
-            let mut values: Vec<ByteArray> =
-                Vec::with_capacity(VALUES_PER_PAGE * MAX_LIST_LEN);
+            let mut values: Vec<ByteArray> = Vec::with_capacity(VALUES_PER_PAGE * MAX_LIST_LEN);
             let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE * MAX_LIST_LEN);
             let mut rep_levels = Vec::with_capacity(VALUES_PER_PAGE * MAX_LIST_LEN);
             for k in 0..VALUES_PER_PAGE {
@@ -409,8 +404,7 @@ fn build_string_list_page_iterator(
                         def_levels.push(2);
                     } else {
                         def_levels.push(3);
-                        let value =
-                            format!("Test value {k}[{l}], row group: {i}, page: {j}");
+                        let value = format!("Test value {k}[{l}], row group: {i}, page: {j}");
                         values.push(value.as_str().into());
                     }
                 }
@@ -470,21 +464,15 @@ fn create_primitive_array_reader(
     use parquet::arrow::array_reader::PrimitiveArrayReader;
     match column_desc.physical_type() {
         Type::INT32 => {
-            let reader = PrimitiveArrayReader::<Int32Type>::new(
-                Box::new(page_iterator),
-                column_desc,
-                None,
-            )
-            .unwrap();
+            let reader =
+                PrimitiveArrayReader::<Int32Type>::new(Box::new(page_iterator), column_desc, None)
+                    .unwrap();
             Box::new(reader)
         }
         Type::INT64 => {
-            let reader = PrimitiveArrayReader::<Int64Type>::new(
-                Box::new(page_iterator),
-                column_desc,
-                None,
-            )
-            .unwrap();
+            let reader =
+                PrimitiveArrayReader::<Int64Type>::new(Box::new(page_iterator), column_desc, None)
+                    .unwrap();
             Box::new(reader)
         }
         _ => unreachable!(),
@@ -501,8 +489,7 @@ fn create_decimal_by_bytes_reader(
             make_byte_array_reader(Box::new(page_iterator), column_desc, None).unwrap()
         }
         Type::FIXED_LEN_BYTE_ARRAY => {
-            make_fixed_len_byte_array_reader(Box::new(page_iterator), column_desc, None)
-                .unwrap()
+            make_fixed_len_byte_array_reader(Box::new(page_iterator), column_desc, None).unwrap()
         }
         _ => unimplemented!(),
     }
@@ -520,15 +507,10 @@ fn create_string_byte_array_dictionary_reader(
     column_desc: ColumnDescPtr,
 ) -> Box<dyn ArrayReader> {
     use parquet::arrow::array_reader::make_byte_array_dictionary_reader;
-    let arrow_type =
-        DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
-
-    make_byte_array_dictionary_reader(
-        Box::new(page_iterator),
-        column_desc,
-        Some(arrow_type),
-    )
-    .unwrap()
+    let arrow_type = DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8));
+
+    make_byte_array_dictionary_reader(Box::new(page_iterator), column_desc, Some(arrow_type))
+        .unwrap()
 }
 
 fn create_string_list_reader(
@@ -564,10 +546,8 @@ fn bench_byte_decimal<T>(
     );
     group.bench_function("plain encoded, mandatory, no NULLs", |b| {
         b.iter(|| {
-            let array_reader = create_decimal_by_bytes_reader(
-                data.clone(),
-                mandatory_column_desc.clone(),
-            );
+            let array_reader =
+                create_decimal_by_bytes_reader(data.clone(), mandatory_column_desc.clone());
             count = bench_array_reader(array_reader);
         });
         assert_eq!(count, EXPECTED_VALUE_COUNT);
@@ -582,10 +562,8 @@ fn bench_byte_decimal<T>(
     );
     group.bench_function("plain encoded, optional, no NULLs", |b| {
         b.iter(|| {
-            let array_reader = create_decimal_by_bytes_reader(
-                data.clone(),
-                optional_column_desc.clone(),
-            );
+            let array_reader =
+                create_decimal_by_bytes_reader(data.clone(), optional_column_desc.clone());
             count = bench_array_reader(array_reader);
         });
         assert_eq!(count, EXPECTED_VALUE_COUNT);
@@ -601,10 +579,8 @@ fn bench_byte_decimal<T>(
     );
     group.bench_function("plain encoded, optional, half NULLs", |b| {
         b.iter(|| {
-            let array_reader = create_decimal_by_bytes_reader(
-                data.clone(),
-                optional_column_desc.clone(),
-            );
+            let array_reader =
+                create_decimal_by_bytes_reader(data.clone(), optional_column_desc.clone());
             count = bench_array_reader(array_reader);
         });
         assert_eq!(count, EXPECTED_VALUE_COUNT);
@@ -633,10 +609,8 @@ fn bench_primitive<T>(
     );
     group.bench_function("plain encoded, mandatory, no NULLs", |b| {
         b.iter(|| {
-            let array_reader = create_primitive_array_reader(
-                data.clone(),
-                mandatory_column_desc.clone(),
-            );
+            let array_reader =
+                create_primitive_array_reader(data.clone(), mandatory_column_desc.clone());
             count = bench_array_reader(array_reader);
         });
         assert_eq!(count, EXPECTED_VALUE_COUNT);
@@ -685,10 +659,8 @@ fn bench_primitive<T>(
     );
     group.bench_function("binary packed, mandatory, no NULLs", |b| {
         b.iter(|| {
-            let array_reader = create_primitive_array_reader(
-                data.clone(),
-                mandatory_column_desc.clone(),
-            );
+            let array_reader =
+                create_primitive_array_reader(data.clone(), mandatory_column_desc.clone());
             count = bench_array_reader(array_reader);
         });
         assert_eq!(count, EXPECTED_VALUE_COUNT);
@@ -720,10 +692,8 @@ fn bench_primitive<T>(
     );
     group.bench_function("binary packed skip, mandatory, no NULLs", |b| {
         b.iter(|| {
-            let array_reader = create_primitive_array_reader(
-                data.clone(),
-                mandatory_column_desc.clone(),
-            );
+            let array_reader =
+                create_primitive_array_reader(data.clone(), mandatory_column_desc.clone());
             count = bench_array_reader_skip(array_reader);
         });
         assert_eq!(count, EXPECTED_VALUE_COUNT);
@@ -763,25 +733,19 @@ fn bench_primitive<T>(
     });
 
     // dictionary encoded, no NULLs
-    let data = build_dictionary_encoded_primitive_page_iterator::<T>(
-        mandatory_column_desc.clone(),
-        0.0,
-    );
+    let data =
+        build_dictionary_encoded_primitive_page_iterator::<T>(mandatory_column_desc.clone(), 0.0);
     group.bench_function("dictionary encoded, mandatory, no NULLs", |b| {
         b.iter(|| {
-            let array_reader = create_primitive_array_reader(
-                data.clone(),
-                mandatory_column_desc.clone(),
-            );
+            let array_reader =
+                create_primitive_array_reader(data.clone(), mandatory_column_desc.clone());
             count = bench_array_reader(array_reader);
         });
         assert_eq!(count, EXPECTED_VALUE_COUNT);
     });
 
-    let data = build_dictionary_encoded_primitive_page_iterator::<T>(
-        optional_column_desc.clone(),
-        0.0,
-    );
+    let data =
+        build_dictionary_encoded_primitive_page_iterator::<T>(optional_column_desc.clone(), 0.0);
     group.bench_function("dictionary encoded, optional, no NULLs", |b| {
         b.iter(|| {
             let array_reader =
@@ -792,10 +756,8 @@ fn bench_primitive<T>(
     });
 
     // dictionary encoded, half NULLs
-    let data = build_dictionary_encoded_primitive_page_iterator::<T>(
-        optional_column_desc.clone(),
-        0.5,
-    );
+    let data =
+        build_dictionary_encoded_primitive_page_iterator::<T>(optional_column_desc.clone(), 0.5);
     group.bench_function("dictionary encoded, optional, half NULLs", |b| {
         b.iter(|| {
             let array_reader =
@@ -850,8 +812,7 @@ fn decimal_benches(c: &mut Criterion) {
     );
     group.finish();
 
-    let mut group =
-        c.benchmark_group("arrow_array_reader/FIXED_LENGTH_BYTE_ARRAY/Decimal128Array");
+    let mut group = c.benchmark_group("arrow_array_reader/FIXED_LENGTH_BYTE_ARRAY/Decimal128Array");
     let mandatory_decimal4_leaf_desc = schema.column(12);
     let optional_decimal4_leaf_desc = schema.column(13);
     bench_byte_decimal::<FixedLenByteArrayType>(
@@ -909,10 +870,8 @@ fn add_benches(c: &mut Criterion) {
     let mut group = c.benchmark_group("arrow_array_reader/StringArray");
 
     // string, plain encoded, no NULLs
-    let plain_string_no_null_data = build_plain_encoded_string_page_iterator(
-        mandatory_string_column_desc.clone(),
-        0.0,
-    );
+    let plain_string_no_null_data =
+        build_plain_encoded_string_page_iterator(mandatory_string_column_desc.clone(), 0.0);
     group.bench_function("plain encoded, mandatory, no NULLs", |b| {
         b.iter(|| {
             let array_reader = create_string_byte_array_reader(
@@ -924,10 +883,8 @@ fn add_benches(c: &mut Criterion) {
         assert_eq!(count, EXPECTED_VALUE_COUNT);
     });
 
-    let plain_string_no_null_data = build_plain_encoded_string_page_iterator(
-        optional_string_column_desc.clone(),
-        0.0,
-    );
+    let plain_string_no_null_data =
+        build_plain_encoded_string_page_iterator(optional_string_column_desc.clone(), 0.0);
     group.bench_function("plain encoded, optional, no NULLs", |b| {
         b.iter(|| {
             let array_reader = create_string_byte_array_reader(
@@ -940,10 +897,8 @@ fn add_benches(c: &mut Criterion) {
     });
 
     // string, plain encoded, half NULLs
-    let plain_string_half_null_data = build_plain_encoded_string_page_iterator(
-        optional_string_column_desc.clone(),
-        0.5,
-    );
+    let plain_string_half_null_data =
+        build_plain_encoded_string_page_iterator(optional_string_column_desc.clone(), 0.5);
     group.bench_function("plain encoded, optional, half NULLs", |b| {
         b.iter(|| {
             let array_reader = create_string_byte_array_reader(
@@ -956,10 +911,8 @@ fn add_benches(c: &mut Criterion) {
     });
 
     // string, dictionary encoded, no NULLs
-    let dictionary_string_no_null_data = build_dictionary_encoded_string_page_iterator(
-        mandatory_string_column_desc.clone(),
-        0.0,
-    );
+    let dictionary_string_no_null_data =
+        build_dictionary_encoded_string_page_iterator(mandatory_string_column_desc.clone(), 0.0);
     group.bench_function("dictionary encoded, mandatory, no NULLs", |b| {
         b.iter(|| {
             let array_reader = create_string_byte_array_reader(
@@ -971,10 +924,8 @@ fn add_benches(c: &mut Criterion) {
         assert_eq!(count, EXPECTED_VALUE_COUNT);
     });
 
-    let dictionary_string_no_null_data = build_dictionary_encoded_string_page_iterator(
-        optional_string_column_desc.clone(),
-        0.0,
-    );
+    let dictionary_string_no_null_data =
+        build_dictionary_encoded_string_page_iterator(optional_string_column_desc.clone(), 0.0);
     group.bench_function("dictionary encoded, optional, no NULLs", |b| {
         b.iter(|| {
             let array_reader = create_string_byte_array_reader(
@@ -987,10 +938,8 @@ fn add_benches(c: &mut Criterion) {
     });
 
     // string, dictionary encoded, half NULLs
-    let dictionary_string_half_null_data = build_dictionary_encoded_string_page_iterator(
-        optional_string_column_desc.clone(),
-        0.5,
-    );
+    let dictionary_string_half_null_data =
+        build_dictionary_encoded_string_page_iterator(optional_string_column_desc.clone(), 0.5);
     group.bench_function("dictionary encoded, optional, half NULLs", |b| {
         b.iter(|| {
             let array_reader = create_string_byte_array_reader(
@@ -1051,8 +1000,7 @@ fn add_benches(c: &mut Criterion) {
     let mut group = c.benchmark_group("arrow_array_reader/ListArray");
     group.bench_function("plain encoded optional strings no NULLs", |b| {
         b.iter(|| {
-            let reader =
-                create_string_list_reader(list_data.clone(), string_list_desc.clone());
+            let reader = create_string_list_reader(list_data.clone(), string_list_desc.clone());
             count = bench_array_reader(reader);
         });
         assert_eq!(count, EXPECTED_VALUE_COUNT);
@@ -1060,8 +1008,7 @@ fn add_benches(c: &mut Criterion) {
     let list_data = build_string_list_page_iterator(string_list_desc.clone(), 0.5);
     group.bench_function("plain encoded optional strings half NULLs", |b| {
         b.iter(|| {
-            let reader =
-                create_string_list_reader(list_data.clone(), string_list_desc.clone());
+            let reader = create_string_list_reader(list_data.clone(), string_list_desc.clone());
             count = bench_array_reader(reader);
         });
         assert_eq!(count, EXPECTED_VALUE_COUNT);
diff --git a/parquet/benches/arrow_writer.rs b/parquet/benches/arrow_writer.rs
index a494d9a97791..b84e897db2f3 100644
--- a/parquet/benches/arrow_writer.rs
+++ b/parquet/benches/arrow_writer.rs
@@ -311,10 +311,7 @@ fn write_batch_enable_bloom_filter(batch: &RecordBatch) -> Result<()> {
 }
 
 #[inline]
-fn write_batch_with_option(
-    batch: &RecordBatch,
-    props: Option<WriterProperties>,
-) -> Result<()> {
+fn write_batch_with_option(batch: &RecordBatch, props: Option<WriterProperties>) -> Result<()> {
     let path = env::temp_dir().join("arrow_writer.temp");
     let file = File::create(path).unwrap();
     let mut writer = ArrowWriter::try_new(file, batch.schema(), props)?;
diff --git a/parquet/benches/compression.rs b/parquet/benches/compression.rs
new file mode 100644
index 000000000000..2275a89405d9
--- /dev/null
+++ b/parquet/benches/compression.rs
@@ -0,0 +1,96 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use criterion::*;
+use parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel};
+use parquet::compression::create_codec;
+use rand::distributions::Alphanumeric;
+use rand::prelude::*;
+
+fn do_bench(c: &mut Criterion, name: &str, uncompressed: &[u8]) {
+    let codecs = [
+        Compression::BROTLI(BrotliLevel::default()),
+        Compression::GZIP(GzipLevel::default()),
+        Compression::LZ4,
+        Compression::LZ4_RAW,
+        Compression::SNAPPY,
+        Compression::GZIP(GzipLevel::default()),
+        Compression::ZSTD(ZstdLevel::default()),
+    ];
+
+    for compression in codecs {
+        let mut codec = create_codec(compression, &Default::default())
+            .unwrap()
+            .unwrap();
+
+        c.bench_function(&format!("compress {compression} - {name}"), |b| {
+            b.iter(|| {
+                let mut out = Vec::new();
+                codec.compress(uncompressed, &mut out).unwrap();
+                out
+            });
+        });
+
+        let mut compressed = Vec::new();
+        codec.compress(uncompressed, &mut compressed).unwrap();
+        println!(
+            "{compression} compressed {} bytes of {name} to {} bytes",
+            uncompressed.len(),
+            compressed.len()
+        );
+
+        c.bench_function(&format!("decompress {compression} - {name}"), |b| {
+            b.iter(|| {
+                let mut out = Vec::new();
+                codec
+                    .decompress(black_box(&compressed), &mut out, Some(uncompressed.len()))
+                    .unwrap();
+                out
+            });
+        });
+    }
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let mut rng = StdRng::seed_from_u64(42);
+    let rng = &mut rng;
+    const DATA_SIZE: usize = 1024 * 1024;
+
+    let uncompressed: Vec<_> = rng.sample_iter(&Alphanumeric).take(DATA_SIZE).collect();
+    do_bench(c, "alphanumeric", &uncompressed);
+
+    // Create a collection of 64 words
+    let words: Vec<Vec<_>> = (0..64)
+        .map(|_| {
+            let len = rng.gen_range(1..12);
+            rng.sample_iter(&Alphanumeric).take(len).collect()
+        })
+        .collect();
+
+    // Build data by concatenating these words randomly together
+    let mut uncompressed = Vec::with_capacity(DATA_SIZE);
+    while uncompressed.len() < DATA_SIZE {
+        let word = &words[rng.gen_range(0..words.len())];
+        uncompressed.extend_from_slice(&word[..word.len().min(DATA_SIZE - uncompressed.len())])
+    }
+    assert_eq!(uncompressed.len(), DATA_SIZE);
+
+    do_bench(c, "words", &uncompressed);
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/parquet/benches/metadata.rs b/parquet/benches/metadata.rs
new file mode 100644
index 000000000000..c817385f6ba9
--- /dev/null
+++ b/parquet/benches/metadata.rs
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use bytes::Bytes;
+use criterion::*;
+use parquet::file::reader::SerializedFileReader;
+use parquet::file::serialized_reader::ReadOptionsBuilder;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    // Read file into memory to isolate filesystem performance
+    let file = "../parquet-testing/data/alltypes_tiny_pages.parquet";
+    let data = std::fs::read(file).unwrap();
+    let data = Bytes::from(data);
+
+    c.bench_function("open(default)", |b| {
+        b.iter(|| SerializedFileReader::new(data.clone()).unwrap())
+    });
+
+    c.bench_function("open(page index)", |b| {
+        b.iter(|| {
+            let options = ReadOptionsBuilder::new().with_page_index().build();
+            SerializedFileReader::new_with_options(data.clone(), options).unwrap()
+        })
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/parquet/examples/read_with_rowgroup.rs b/parquet/examples/read_with_rowgroup.rs
index b2d113d50529..8cccc7fe14ac 100644
--- a/parquet/examples/read_with_rowgroup.rs
+++ b/parquet/examples/read_with_rowgroup.rs
@@ -116,13 +116,12 @@ impl RowGroups for InMemoryRowGroup {
                 "Invalid column index {i}, column was not fetched"
             ))),
             Some(data) => {
-                let page_reader: Box<dyn PageReader> =
-                    Box::new(SerializedPageReader::new(
-                        data.clone(),
-                        self.metadata.column(i),
-                        self.num_rows(),
-                        None,
-                    )?);
+                let page_reader: Box<dyn PageReader> = Box::new(SerializedPageReader::new(
+                    data.clone(),
+                    self.metadata.column(i),
+                    self.num_rows(),
+                    None,
+                )?);
 
                 Ok(Box::new(ColumnChunkIterator {
                     reader: Some(Ok(page_reader)),
@@ -154,9 +153,7 @@ impl InMemoryRowGroup {
             None,
         )?;
 
-        ParquetRecordBatchReader::try_new_with_row_groups(
-            &levels, self, batch_size, selection,
-        )
+        ParquetRecordBatchReader::try_new_with_row_groups(&levels, self, batch_size, selection)
     }
 
     /// fetch data from a reader in sync mode
diff --git a/parquet/regen.sh b/parquet/regen.sh
new file mode 100755
index 000000000000..91539634339d
--- /dev/null
+++ b/parquet/regen.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+REVISION=46cc3a0647d301bb9579ca8dd2cc356caf2a72d2
+
+SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
+
+docker run -v $SOURCE_DIR:/thrift/src -it archlinux pacman -Sy --noconfirm thrift  && \
+  wget https://raw.githubusercontent.com/apache/parquet-format/$REVISION/src/main/thrift/parquet.thrift -O /tmp/parquet.thrift && \
+  thrift --gen rs /tmp/parquet.thrift && \
+  echo "Removing TProcessor" && \
+  sed -i '/use thrift::server::TProcessor;/d' parquet.rs && \
+  echo "Replacing TSerializable" && \
+  sed -i 's/impl TSerializable for/impl crate::thrift::TSerializable for/g' parquet.rs && \
+  echo "Rewriting write_to_out_protocol" && \
+  sed -i 's/fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol)/fn write_to_out_protocol<T: TOutputProtocol>(\&self, o_prot: \&mut T)/g' parquet.rs && \
+  echo "Rewriting read_from_in_protocol" && \
+  sed -i 's/fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol)/fn read_from_in_protocol<T: TInputProtocol>(i_prot: \&mut T)/g' parquet.rs && \
+  mv parquet.rs src/format.rs
diff --git a/parquet/src/arrow/array_reader/byte_array.rs b/parquet/src/arrow/array_reader/byte_array.rs
index 4612f816146a..01666c0af4e6 100644
--- a/parquet/src/arrow/array_reader/byte_array.rs
+++ b/parquet/src/arrow/array_reader/byte_array.rs
@@ -29,12 +29,12 @@ use crate::data_type::Int32Type;
 use crate::encodings::decoding::{Decoder, DeltaBitPackDecoder};
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
-use crate::util::memory::ByteBufferPtr;
 use arrow_array::{
     Array, ArrayRef, BinaryArray, Decimal128Array, Decimal256Array, OffsetSizeTrait,
 };
 use arrow_buffer::{i256, Buffer};
 use arrow_schema::DataType as ArrowType;
+use bytes::Bytes;
 use std::any::Any;
 use std::ops::Range;
 use std::sync::Arc;
@@ -189,7 +189,7 @@ impl<I: OffsetSizeTrait + ScalarValue> ColumnValueDecoder
 
     fn set_dict(
         &mut self,
-        buf: ByteBufferPtr,
+        buf: Bytes,
         num_values: u32,
         encoding: Encoding,
         _is_sorted: bool,
@@ -219,7 +219,7 @@ impl<I: OffsetSizeTrait + ScalarValue> ColumnValueDecoder
     fn set_data(
         &mut self,
         encoding: Encoding,
-        data: ByteBufferPtr,
+        data: Bytes,
         num_levels: usize,
         num_values: Option<usize>,
     ) -> Result<()> {
@@ -263,7 +263,7 @@ pub enum ByteArrayDecoder {
 impl ByteArrayDecoder {
     pub fn new(
         encoding: Encoding,
-        data: ByteBufferPtr,
+        data: Bytes,
         num_levels: usize,
         num_values: Option<usize>,
         validate_utf8: bool,
@@ -339,7 +339,7 @@ impl ByteArrayDecoder {
 
 /// Decoder from [`Encoding::PLAIN`] data to [`OffsetBuffer`]
 pub struct ByteArrayDecoderPlain {
-    buf: ByteBufferPtr,
+    buf: Bytes,
     offset: usize,
     validate_utf8: bool,
 
@@ -350,7 +350,7 @@ pub struct ByteArrayDecoderPlain {
 
 impl ByteArrayDecoderPlain {
     pub fn new(
-        buf: ByteBufferPtr,
+        buf: Bytes,
         num_levels: usize,
         num_values: Option<usize>,
         validate_utf8: bool,
@@ -438,16 +438,16 @@ impl ByteArrayDecoderPlain {
 /// Decoder from [`Encoding::DELTA_LENGTH_BYTE_ARRAY`] data to [`OffsetBuffer`]
 pub struct ByteArrayDecoderDeltaLength {
     lengths: Vec<i32>,
-    data: ByteBufferPtr,
+    data: Bytes,
     length_offset: usize,
     data_offset: usize,
     validate_utf8: bool,
 }
 
 impl ByteArrayDecoderDeltaLength {
-    fn new(data: ByteBufferPtr, validate_utf8: bool) -> Result<Self> {
+    fn new(data: Bytes, validate_utf8: bool) -> Result<Self> {
         let mut len_decoder = DeltaBitPackDecoder::<Int32Type>::new();
-        len_decoder.set_data(data.all(), 0)?;
+        len_decoder.set_data(data.clone(), 0)?;
         let values = len_decoder.values_left();
 
         let mut lengths = vec![0; values];
@@ -522,7 +522,7 @@ pub struct ByteArrayDecoderDelta {
 }
 
 impl ByteArrayDecoderDelta {
-    fn new(data: ByteBufferPtr, validate_utf8: bool) -> Result<Self> {
+    fn new(data: Bytes, validate_utf8: bool) -> Result<Self> {
         Ok(Self {
             decoder: DeltaByteArrayDecoder::new(data)?,
             validate_utf8,
@@ -558,7 +558,7 @@ pub struct ByteArrayDecoderDictionary {
 }
 
 impl ByteArrayDecoderDictionary {
-    fn new(data: ByteBufferPtr, num_levels: usize, num_values: Option<usize>) -> Self {
+    fn new(data: Bytes, num_levels: usize, num_values: Option<usize>) -> Self {
         Self {
             decoder: DictIndexDecoder::new(data, num_levels, num_values),
         }
diff --git a/parquet/src/arrow/array_reader/byte_array_dictionary.rs b/parquet/src/arrow/array_reader/byte_array_dictionary.rs
index 841f5a95fd4e..0d216fa08327 100644
--- a/parquet/src/arrow/array_reader/byte_array_dictionary.rs
+++ b/parquet/src/arrow/array_reader/byte_array_dictionary.rs
@@ -23,6 +23,7 @@ use std::sync::Arc;
 use arrow_array::{Array, ArrayRef, OffsetSizeTrait};
 use arrow_buffer::{ArrowNativeType, Buffer};
 use arrow_schema::DataType as ArrowType;
+use bytes::Bytes;
 
 use crate::arrow::array_reader::byte_array::{ByteArrayDecoder, ByteArrayDecoderPlain};
 use crate::arrow::array_reader::{read_records, skip_records, ArrayReader};
@@ -39,7 +40,6 @@ use crate::encodings::rle::RleDecoder;
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
 use crate::util::bit_util::FromBytes;
-use crate::util::memory::ByteBufferPtr;
 
 /// A macro to reduce verbosity of [`make_byte_array_dictionary_reader`]
 macro_rules! make_reader {
@@ -253,7 +253,7 @@ where
 
     fn set_dict(
         &mut self,
-        buf: ByteBufferPtr,
+        buf: Bytes,
         num_values: u32,
         encoding: Encoding,
         _is_sorted: bool,
@@ -286,7 +286,7 @@ where
     fn set_data(
         &mut self,
         encoding: Encoding,
-        data: ByteBufferPtr,
+        data: Bytes,
         num_levels: usize,
         num_values: Option<usize>,
     ) -> Result<()> {
@@ -294,7 +294,7 @@ where
             Encoding::RLE_DICTIONARY | Encoding::PLAIN_DICTIONARY => {
                 let bit_width = data[0];
                 let mut decoder = RleDecoder::new(bit_width);
-                decoder.set_data(data.start_from(1));
+                decoder.set_data(data.slice(1..));
                 MaybeDictionaryDecoder::Dict {
                     decoder,
                     max_remaining_values: num_values.unwrap_or(num_levels),
diff --git a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
index b06091b6b57a..b846997d36b8 100644
--- a/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
+++ b/parquet/src/arrow/array_reader/fixed_len_byte_array.rs
@@ -26,14 +26,15 @@ use crate::column::page::PageIterator;
 use crate::column::reader::decoder::{ColumnValueDecoder, ValuesBufferSlice};
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
-use crate::util::memory::ByteBufferPtr;
 use arrow_array::{
-    ArrayRef, Decimal128Array, Decimal256Array, FixedSizeBinaryArray,
+    ArrayRef, Decimal128Array, Decimal256Array, FixedSizeBinaryArray, Float16Array,
     IntervalDayTimeArray, IntervalYearMonthArray,
 };
 use arrow_buffer::{i256, Buffer};
 use arrow_data::ArrayDataBuilder;
 use arrow_schema::{DataType as ArrowType, IntervalUnit};
+use bytes::Bytes;
+use half::f16;
 use std::any::Any;
 use std::ops::Range;
 use std::sync::Arc;
@@ -88,6 +89,14 @@ pub fn make_fixed_len_byte_array_reader(
                 ));
             }
         }
+        ArrowType::Float16 => {
+            if byte_length != 2 {
+                return Err(general_err!(
+                    "float 16 type must be 2 bytes, got {}",
+                    byte_length
+                ));
+            }
+        }
         _ => {
             return Err(general_err!(
                 "invalid data type for fixed length byte array reader - {}",
@@ -208,6 +217,12 @@ impl ArrayReader for FixedLenByteArrayReader {
                     }
                 }
             }
+            ArrowType::Float16 => Arc::new(
+                binary
+                    .iter()
+                    .map(|o| o.map(|b| f16::from_le_bytes(b[..2].try_into().unwrap())))
+                    .collect::<Float16Array>(),
+            ) as ArrayRef,
             _ => Arc::new(binary) as ArrayRef,
         };
 
@@ -298,7 +313,7 @@ impl ValuesBuffer for FixedLenByteArrayBuffer {
 
 struct ValueDecoder {
     byte_length: usize,
-    dict_page: Option<ByteBufferPtr>,
+    dict_page: Option<Bytes>,
     decoder: Option<Decoder>,
 }
 
@@ -315,7 +330,7 @@ impl ColumnValueDecoder for ValueDecoder {
 
     fn set_dict(
         &mut self,
-        buf: ByteBufferPtr,
+        buf: Bytes,
         num_values: u32,
         encoding: Encoding,
         _is_sorted: bool,
@@ -345,7 +360,7 @@ impl ColumnValueDecoder for ValueDecoder {
     fn set_data(
         &mut self,
         encoding: Encoding,
-        data: ByteBufferPtr,
+        data: Bytes,
         num_levels: usize,
         num_values: Option<usize>,
     ) -> Result<()> {
@@ -434,7 +449,7 @@ impl ColumnValueDecoder for ValueDecoder {
 }
 
 enum Decoder {
-    Plain { buf: ByteBufferPtr, offset: usize },
+    Plain { buf: Bytes, offset: usize },
     Dict { decoder: DictIndexDecoder },
     Delta { decoder: DeltaByteArrayDecoder },
 }
diff --git a/parquet/src/arrow/array_reader/mod.rs b/parquet/src/arrow/array_reader/mod.rs
index 625ac034ef47..a4ee5040590e 100644
--- a/parquet/src/arrow/array_reader/mod.rs
+++ b/parquet/src/arrow/array_reader/mod.rs
@@ -152,7 +152,7 @@ where
     Ok(records_read)
 }
 
-/// Uses `record_reader` to skip up to `batch_size` records from`pages`
+/// Uses `record_reader` to skip up to `batch_size` records from `pages`
 ///
 /// Returns the number of records skipped, which can be less than `batch_size` if
 /// pages is exhausted
diff --git a/parquet/src/arrow/array_reader/test_util.rs b/parquet/src/arrow/array_reader/test_util.rs
index 7e66efead2e5..05032920139b 100644
--- a/parquet/src/arrow/array_reader/test_util.rs
+++ b/parquet/src/arrow/array_reader/test_util.rs
@@ -17,6 +17,7 @@
 
 use arrow_array::{Array, ArrayRef};
 use arrow_schema::DataType as ArrowType;
+use bytes::Bytes;
 use std::any::Any;
 use std::sync::Arc;
 
@@ -27,7 +28,6 @@ use crate::data_type::{ByteArray, ByteArrayType};
 use crate::encodings::encoding::{get_encoder, DictEncoder, Encoder};
 use crate::errors::Result;
 use crate::schema::types::{ColumnDescPtr, ColumnDescriptor, ColumnPath, Type};
-use crate::util::memory::ByteBufferPtr;
 
 /// Returns a descriptor for a UTF-8 column
 pub fn utf8_column() -> ColumnDescPtr {
@@ -45,7 +45,7 @@ pub fn utf8_column() -> ColumnDescPtr {
 }
 
 /// Encode `data` with the provided `encoding`
-pub fn encode_byte_array(encoding: Encoding, data: &[ByteArray]) -> ByteBufferPtr {
+pub fn encode_byte_array(encoding: Encoding, data: &[ByteArray]) -> Bytes {
     let mut encoder = get_encoder::<ByteArrayType>(encoding).unwrap();
 
     encoder.put(data).unwrap();
@@ -53,7 +53,7 @@ pub fn encode_byte_array(encoding: Encoding, data: &[ByteArray]) -> ByteBufferPt
 }
 
 /// Returns the encoded dictionary and value data
-pub fn encode_dictionary(data: &[ByteArray]) -> (ByteBufferPtr, ByteBufferPtr) {
+pub fn encode_dictionary(data: &[ByteArray]) -> (Bytes, Bytes) {
     let mut dict_encoder = DictEncoder::<ByteArrayType>::new(utf8_column());
 
     dict_encoder.put(data).unwrap();
@@ -68,7 +68,7 @@ pub fn encode_dictionary(data: &[ByteArray]) -> (ByteBufferPtr, ByteBufferPtr) {
 /// Returns an array of data with its associated encoding, along with an encoded dictionary
 pub fn byte_array_all_encodings(
     data: Vec<impl Into<ByteArray>>,
-) -> (Vec<(Encoding, ByteBufferPtr)>, ByteBufferPtr) {
+) -> (Vec<(Encoding, Bytes)>, Bytes) {
     let data: Vec<_> = data.into_iter().map(Into::into).collect();
     let (encoded_dictionary, encoded_rle) = encode_dictionary(&data);
 
diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
index 5f95a8664b4b..b9e9d2898459 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -112,8 +112,7 @@ impl<T> ArrowReaderBuilder<T> {
     /// If the batch_size more than the file row count, use the file row count.
     pub fn with_batch_size(self, batch_size: usize) -> Self {
         // Try to avoid allocate large buffer
-        let batch_size =
-            batch_size.min(self.metadata.file_metadata().num_rows() as usize);
+        let batch_size = batch_size.min(self.metadata.file_metadata().num_rows() as usize);
         Self { batch_size, ..self }
     }
 
@@ -407,11 +406,8 @@ impl<T: ChunkReader + 'static> ParquetRecordBatchReaderBuilder<T> {
                     break;
                 }
 
-                let array_reader = build_array_reader(
-                    self.fields.as_deref(),
-                    predicate.projection(),
-                    &reader,
-                )?;
+                let array_reader =
+                    build_array_reader(self.fields.as_deref(), predicate.projection(), &reader)?;
 
                 selection = Some(evaluate_predicate(
                     batch_size,
@@ -422,8 +418,7 @@ impl<T: ChunkReader + 'static> ParquetRecordBatchReaderBuilder<T> {
             }
         }
 
-        let array_reader =
-            build_array_reader(self.fields.as_deref(), &self.projection, &reader)?;
+        let array_reader = build_array_reader(self.fields.as_deref(), &self.projection, &reader)?;
 
         // If selection is empty, truncate
         if !selects_any(selection.as_ref()) {
@@ -480,7 +475,11 @@ impl<T: ChunkReader + 'static> Iterator for ReaderPageIterator<T> {
         let rg = self.metadata.row_group(rg_idx);
         let meta = rg.column(self.column_idx);
         let offset_index = self.metadata.offset_index();
-        let page_locations = offset_index.map(|i| i[rg_idx][self.column_idx].clone());
+        // `offset_index` may not exist and `i[rg_idx]` will be empty.
+        // To avoid `i[rg_idx][self.oolumn_idx`] panic, we need to filter out empty `i[rg_idx]`.
+        let page_locations = offset_index
+            .filter(|i| !i[rg_idx].is_empty())
+            .map(|i| i[rg_idx][self.column_idx].clone());
         let total_rows = rg.num_rows() as usize;
         let reader = self.reader.clone();
 
@@ -510,11 +509,10 @@ impl Iterator for ParquetRecordBatchReader {
                 while read_records < self.batch_size && !selection.is_empty() {
                     let front = selection.pop_front().unwrap();
                     if front.skip {
-                        let skipped =
-                            match self.array_reader.skip_records(front.row_count) {
-                                Ok(skipped) => skipped,
-                                Err(e) => return Some(Err(e.into())),
-                            };
+                        let skipped = match self.array_reader.skip_records(front.row_count) {
+                            Ok(skipped) => skipped,
+                            Err(e) => return Some(Err(e.into())),
+                        };
 
                         if skipped != front.row_count {
                             return Some(Err(general_err!(
@@ -586,10 +584,7 @@ impl ParquetRecordBatchReader {
     /// Create a new [`ParquetRecordBatchReader`] from the provided chunk reader
     ///
     /// See [`ParquetRecordBatchReaderBuilder`] for more options
-    pub fn try_new<T: ChunkReader + 'static>(
-        reader: T,
-        batch_size: usize,
-    ) -> Result<Self> {
+    pub fn try_new<T: ChunkReader + 'static>(reader: T, batch_size: usize) -> Result<Self> {
         ParquetRecordBatchReaderBuilder::try_new(reader)?
             .with_batch_size(batch_size)
             .build()
@@ -605,11 +600,8 @@ impl ParquetRecordBatchReader {
         batch_size: usize,
         selection: Option<RowSelection>,
     ) -> Result<Self> {
-        let array_reader = build_array_reader(
-            levels.levels.as_ref(),
-            &ProjectionMask::all(),
-            row_groups,
-        )?;
+        let array_reader =
+            build_array_reader(levels.levels.as_ref(), &ProjectionMask::all(), row_groups)?;
 
         Ok(Self {
             batch_size,
@@ -692,8 +684,7 @@ pub(crate) fn evaluate_predicate(
     input_selection: Option<RowSelection>,
     predicate: &mut dyn ArrowPredicate,
 ) -> Result<RowSelection> {
-    let reader =
-        ParquetRecordBatchReader::new(batch_size, array_reader, input_selection.clone());
+    let reader = ParquetRecordBatchReader::new(batch_size, array_reader, input_selection.clone());
     let mut filters = vec![];
     for maybe_batch in reader {
         let filter = predicate.evaluate(maybe_batch?)?;
@@ -721,13 +712,14 @@ mod tests {
     use std::sync::Arc;
 
     use bytes::Bytes;
+    use half::f16;
     use num::PrimInt;
     use rand::{thread_rng, Rng, RngCore};
     use tempfile::tempfile;
 
     use arrow_array::builder::*;
     use arrow_array::cast::AsArray;
-    use arrow_array::types::{Decimal128Type, Decimal256Type, DecimalType};
+    use arrow_array::types::{Decimal128Type, Decimal256Type, DecimalType, Float16Type};
     use arrow_array::*;
     use arrow_array::{RecordBatch, RecordBatchReader};
     use arrow_buffer::{i256, ArrowNativeType, Buffer};
@@ -744,8 +736,8 @@ mod tests {
     use crate::basic::{ConvertedType, Encoding, Repetition, Type as PhysicalType};
     use crate::column::reader::decoder::REPETITION_LEVELS_BATCH_SIZE;
     use crate::data_type::{
-        BoolType, ByteArray, ByteArrayType, DataType, FixedLenByteArray,
-        FixedLenByteArrayType, Int32Type, Int64Type, Int96Type,
+        BoolType, ByteArray, ByteArrayType, DataType, FixedLenByteArray, FixedLenByteArrayType,
+        Int32Type, Int64Type, Int96Type,
     };
     use crate::errors::Result;
     use crate::file::properties::{EnabledStatistics, WriterProperties, WriterVersion};
@@ -917,8 +909,7 @@ mod tests {
         writer.write(&original).unwrap();
         writer.close().unwrap();
 
-        let mut reader =
-            ParquetRecordBatchReader::try_new(Bytes::from(buf), 1024).unwrap();
+        let mut reader = ParquetRecordBatchReader::try_new(Bytes::from(buf), 1024).unwrap();
         let ret = reader.next().unwrap().unwrap();
         assert_eq!(ret, original);
 
@@ -934,6 +925,66 @@ mod tests {
             .unwrap();
     }
 
+    #[test]
+    fn test_float16_roundtrip() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("float16", ArrowDataType::Float16, false),
+            Field::new("float16-nullable", ArrowDataType::Float16, true),
+        ]));
+
+        let mut buf = Vec::with_capacity(1024);
+        let mut writer = ArrowWriter::try_new(&mut buf, schema.clone(), None)?;
+
+        let original = RecordBatch::try_new(
+            schema,
+            vec![
+                Arc::new(Float16Array::from_iter_values([
+                    f16::EPSILON,
+                    f16::MIN,
+                    f16::MAX,
+                    f16::NAN,
+                    f16::INFINITY,
+                    f16::NEG_INFINITY,
+                    f16::ONE,
+                    f16::NEG_ONE,
+                    f16::ZERO,
+                    f16::NEG_ZERO,
+                    f16::E,
+                    f16::PI,
+                    f16::FRAC_1_PI,
+                ])),
+                Arc::new(Float16Array::from(vec![
+                    None,
+                    None,
+                    None,
+                    Some(f16::NAN),
+                    Some(f16::INFINITY),
+                    Some(f16::NEG_INFINITY),
+                    None,
+                    None,
+                    None,
+                    None,
+                    None,
+                    None,
+                    Some(f16::FRAC_1_PI),
+                ])),
+            ],
+        )?;
+
+        writer.write(&original)?;
+        writer.close()?;
+
+        let mut reader = ParquetRecordBatchReader::try_new(Bytes::from(buf), 1024)?;
+        let ret = reader.next().unwrap()?;
+        assert_eq!(ret, original);
+
+        // Ensure can be downcast to the correct type
+        ret.column(0).as_primitive::<Float16Type>();
+        ret.column(1).as_primitive::<Float16Type>();
+
+        Ok(())
+    }
+
     struct RandFixedLenGen {}
 
     impl RandGen<FixedLenByteArrayType> for RandFixedLenGen {
@@ -974,9 +1025,8 @@ mod tests {
                 Arc::new(
                     vals.iter()
                         .map(|x| {
-                            x.as_ref().map(|b| {
-                                i64::from_le_bytes(b.as_ref()[4..12].try_into().unwrap())
-                            })
+                            x.as_ref()
+                                .map(|b| i64::from_le_bytes(b.as_ref()[4..12].try_into().unwrap()))
                         })
                         .collect::<IntervalDayTimeArray>(),
                 )
@@ -1066,10 +1116,8 @@ mod tests {
                 let mut opts = TestOptions::new(2, 20, 15).with_null_percent(50);
                 opts.encoding = *encoding;
 
-                let data_type = ArrowDataType::Dictionary(
-                    Box::new(key.clone()),
-                    Box::new(ArrowDataType::Utf8),
-                );
+                let data_type =
+                    ArrowDataType::Dictionary(Box::new(key.clone()), Box::new(ArrowDataType::Utf8));
 
                 // Cannot run full test suite as keys overflow, run small test instead
                 single_column_reader_test::<ByteArrayType, _, RandUtf8Gen>(
@@ -1095,10 +1143,8 @@ mod tests {
         ];
 
         for key in &key_types {
-            let data_type = ArrowDataType::Dictionary(
-                Box::new(key.clone()),
-                Box::new(ArrowDataType::Utf8),
-            );
+            let data_type =
+                ArrowDataType::Dictionary(Box::new(key.clone()), Box::new(ArrowDataType::Utf8));
 
             run_single_column_reader_tests::<ByteArrayType, _, RandUtf8Gen>(
                 2,
@@ -1136,27 +1182,23 @@ mod tests {
             [1, 2, 3, 4, 5, 6, 7, 8].into_iter().map(i256::from_i128),
         );
 
-        let data =
-            ArrayDataBuilder::new(ArrowDataType::Struct(Fields::from(vec![Field::new(
-                "decimals",
-                decimals.data_type().clone(),
-                false,
-            )])))
-            .len(8)
-            .null_bit_buffer(Some(Buffer::from(&[0b11101111])))
-            .child_data(vec![decimals.into_data()])
-            .build()
-            .unwrap();
-
-        let written = RecordBatch::try_from_iter([(
-            "struct",
-            Arc::new(StructArray::from(data)) as ArrayRef,
-        )])
+        let data = ArrayDataBuilder::new(ArrowDataType::Struct(Fields::from(vec![Field::new(
+            "decimals",
+            decimals.data_type().clone(),
+            false,
+        )])))
+        .len(8)
+        .null_bit_buffer(Some(Buffer::from(&[0b11101111])))
+        .child_data(vec![decimals.into_data()])
+        .build()
         .unwrap();
 
+        let written =
+            RecordBatch::try_from_iter([("struct", Arc::new(StructArray::from(data)) as ArrayRef)])
+                .unwrap();
+
         let mut buffer = Vec::with_capacity(1024);
-        let mut writer =
-            ArrowWriter::try_new(&mut buffer, written.schema(), None).unwrap();
+        let mut writer = ArrowWriter::try_new(&mut buffer, written.schema(), None).unwrap();
         writer.write(&written).unwrap();
         writer.close().unwrap();
 
@@ -1173,27 +1215,23 @@ mod tests {
     #[test]
     fn test_int32_nullable_struct() {
         let int32 = Int32Array::from_iter_values([1, 2, 3, 4, 5, 6, 7, 8]);
-        let data =
-            ArrayDataBuilder::new(ArrowDataType::Struct(Fields::from(vec![Field::new(
-                "int32",
-                int32.data_type().clone(),
-                false,
-            )])))
-            .len(8)
-            .null_bit_buffer(Some(Buffer::from(&[0b11101111])))
-            .child_data(vec![int32.into_data()])
-            .build()
-            .unwrap();
-
-        let written = RecordBatch::try_from_iter([(
-            "struct",
-            Arc::new(StructArray::from(data)) as ArrayRef,
-        )])
+        let data = ArrayDataBuilder::new(ArrowDataType::Struct(Fields::from(vec![Field::new(
+            "int32",
+            int32.data_type().clone(),
+            false,
+        )])))
+        .len(8)
+        .null_bit_buffer(Some(Buffer::from(&[0b11101111])))
+        .child_data(vec![int32.into_data()])
+        .build()
         .unwrap();
 
+        let written =
+            RecordBatch::try_from_iter([("struct", Arc::new(StructArray::from(data)) as ArrayRef)])
+                .unwrap();
+
         let mut buffer = Vec::with_capacity(1024);
-        let mut writer =
-            ArrowWriter::try_new(&mut buffer, written.schema(), None).unwrap();
+        let mut writer = ArrowWriter::try_new(&mut buffer, written.schema(), None).unwrap();
         writer.write(&written).unwrap();
         writer.close().unwrap();
 
@@ -1225,15 +1263,12 @@ mod tests {
         .build()
         .unwrap();
 
-        let written = RecordBatch::try_from_iter([(
-            "list",
-            Arc::new(ListArray::from(data)) as ArrayRef,
-        )])
-        .unwrap();
+        let written =
+            RecordBatch::try_from_iter([("list", Arc::new(ListArray::from(data)) as ArrayRef)])
+                .unwrap();
 
         let mut buffer = Vec::with_capacity(1024);
-        let mut writer =
-            ArrowWriter::try_new(&mut buffer, written.schema(), None).unwrap();
+        let mut writer = ArrowWriter::try_new(&mut buffer, written.schema(), None).unwrap();
         writer.write(&written).unwrap();
         writer.close().unwrap();
 
@@ -1281,6 +1316,62 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_read_float16_nonzeros_file() {
+        use arrow_array::Float16Array;
+        let testdata = arrow::util::test_util::parquet_test_data();
+        // see https://github.com/apache/parquet-testing/pull/40
+        let path = format!("{testdata}/float16_nonzeros_and_nans.parquet");
+        let file = File::open(path).unwrap();
+        let mut record_reader = ParquetRecordBatchReader::try_new(file, 32).unwrap();
+
+        let batch = record_reader.next().unwrap().unwrap();
+        assert_eq!(batch.num_rows(), 8);
+        let col = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<Float16Array>()
+            .unwrap();
+
+        let f16_two = f16::ONE + f16::ONE;
+
+        assert_eq!(col.null_count(), 1);
+        assert!(col.is_null(0));
+        assert_eq!(col.value(1), f16::ONE);
+        assert_eq!(col.value(2), -f16_two);
+        assert!(col.value(3).is_nan());
+        assert_eq!(col.value(4), f16::ZERO);
+        assert!(col.value(4).is_sign_positive());
+        assert_eq!(col.value(5), f16::NEG_ONE);
+        assert_eq!(col.value(6), f16::NEG_ZERO);
+        assert!(col.value(6).is_sign_negative());
+        assert_eq!(col.value(7), f16_two);
+    }
+
+    #[test]
+    fn test_read_float16_zeros_file() {
+        use arrow_array::Float16Array;
+        let testdata = arrow::util::test_util::parquet_test_data();
+        // see https://github.com/apache/parquet-testing/pull/40
+        let path = format!("{testdata}/float16_zeros_and_nans.parquet");
+        let file = File::open(path).unwrap();
+        let mut record_reader = ParquetRecordBatchReader::try_new(file, 32).unwrap();
+
+        let batch = record_reader.next().unwrap().unwrap();
+        assert_eq!(batch.num_rows(), 3);
+        let col = batch
+            .column(0)
+            .as_any()
+            .downcast_ref::<Float16Array>()
+            .unwrap();
+
+        assert_eq!(col.null_count(), 1);
+        assert!(col.is_null(0));
+        assert_eq!(col.value(1), f16::ZERO);
+        assert!(col.value(1).is_sign_positive());
+        assert!(col.value(2).is_nan());
+    }
+
     /// Parameters for single_column_reader_test
     #[derive(Clone)]
     struct TestOptions {
@@ -1404,11 +1495,8 @@ mod tests {
 
             let mut rng = thread_rng();
             let step = rng.gen_range(self.record_batch_size..self.num_rows);
-            let row_selections = create_test_selection(
-                step,
-                self.num_row_groups * self.num_rows,
-                rng.gen::<bool>(),
-            );
+            let row_selections =
+                create_test_selection(step, self.num_row_groups * self.num_rows, rng.gen::<bool>());
             Self {
                 row_selections: Some(row_selections),
                 ..self
@@ -1594,8 +1682,7 @@ mod tests {
         ];
 
         all_options.into_iter().for_each(|opts| {
-            for writer_version in [WriterVersion::PARQUET_1_0, WriterVersion::PARQUET_2_0]
-            {
+            for writer_version in [WriterVersion::PARQUET_1_0, WriterVersion::PARQUET_2_0] {
                 for encoding in encodings {
                     let opts = TestOptions {
                         writer_version,
@@ -1711,8 +1798,7 @@ mod tests {
 
         let expected_data = match opts.row_selections {
             Some((selections, row_count)) => {
-                let mut without_skip_data =
-                    gen_expected_data::<T>(def_levels.as_ref(), &values);
+                let mut without_skip_data = gen_expected_data::<T>(def_levels.as_ref(), &values);
 
                 let mut skip_data: Vec<Option<T::T>> = vec![];
                 let dequeue: VecDeque<RowSelector> = selections.clone().into();
@@ -1952,12 +2038,9 @@ mod tests {
 
         {
             // Write using low-level parquet API (#1167)
-            let mut writer = SerializedFileWriter::new(
-                file.try_clone().unwrap(),
-                schema,
-                Default::default(),
-            )
-            .unwrap();
+            let mut writer =
+                SerializedFileWriter::new(file.try_clone().unwrap(), schema, Default::default())
+                    .unwrap();
 
             {
                 let mut row_group_writer = writer.next_row_group().unwrap();
@@ -1982,9 +2065,7 @@ mod tests {
 
         let expected_schema = Schema::new(Fields::from(vec![Field::new(
             "group",
-            ArrowDataType::Struct(
-                vec![Field::new("leaf", ArrowDataType::Int32, false)].into(),
-            ),
+            ArrowDataType::Struct(vec![Field::new("leaf", ArrowDataType::Int32, false)].into()),
             true,
         )]));
 
@@ -1998,24 +2079,22 @@ mod tests {
     fn test_invalid_utf8() {
         // a parquet file with 1 column with invalid utf8
         let data = vec![
-            80, 65, 82, 49, 21, 6, 21, 22, 21, 22, 92, 21, 2, 21, 0, 21, 2, 21, 0, 21, 4,
-            21, 0, 18, 28, 54, 0, 40, 5, 104, 101, 255, 108, 111, 24, 5, 104, 101, 255,
-            108, 111, 0, 0, 0, 3, 1, 5, 0, 0, 0, 104, 101, 255, 108, 111, 38, 110, 28,
-            21, 12, 25, 37, 6, 0, 25, 24, 2, 99, 49, 21, 0, 22, 2, 22, 102, 22, 102, 38,
-            8, 60, 54, 0, 40, 5, 104, 101, 255, 108, 111, 24, 5, 104, 101, 255, 108, 111,
-            0, 0, 0, 21, 4, 25, 44, 72, 4, 114, 111, 111, 116, 21, 2, 0, 21, 12, 37, 2,
-            24, 2, 99, 49, 37, 0, 76, 28, 0, 0, 0, 22, 2, 25, 28, 25, 28, 38, 110, 28,
-            21, 12, 25, 37, 6, 0, 25, 24, 2, 99, 49, 21, 0, 22, 2, 22, 102, 22, 102, 38,
-            8, 60, 54, 0, 40, 5, 104, 101, 255, 108, 111, 24, 5, 104, 101, 255, 108, 111,
-            0, 0, 0, 22, 102, 22, 2, 0, 40, 44, 65, 114, 114, 111, 119, 50, 32, 45, 32,
-            78, 97, 116, 105, 118, 101, 32, 82, 117, 115, 116, 32, 105, 109, 112, 108,
-            101, 109, 101, 110, 116, 97, 116, 105, 111, 110, 32, 111, 102, 32, 65, 114,
-            114, 111, 119, 0, 130, 0, 0, 0, 80, 65, 82, 49,
+            80, 65, 82, 49, 21, 6, 21, 22, 21, 22, 92, 21, 2, 21, 0, 21, 2, 21, 0, 21, 4, 21, 0,
+            18, 28, 54, 0, 40, 5, 104, 101, 255, 108, 111, 24, 5, 104, 101, 255, 108, 111, 0, 0, 0,
+            3, 1, 5, 0, 0, 0, 104, 101, 255, 108, 111, 38, 110, 28, 21, 12, 25, 37, 6, 0, 25, 24,
+            2, 99, 49, 21, 0, 22, 2, 22, 102, 22, 102, 38, 8, 60, 54, 0, 40, 5, 104, 101, 255, 108,
+            111, 24, 5, 104, 101, 255, 108, 111, 0, 0, 0, 21, 4, 25, 44, 72, 4, 114, 111, 111, 116,
+            21, 2, 0, 21, 12, 37, 2, 24, 2, 99, 49, 37, 0, 76, 28, 0, 0, 0, 22, 2, 25, 28, 25, 28,
+            38, 110, 28, 21, 12, 25, 37, 6, 0, 25, 24, 2, 99, 49, 21, 0, 22, 2, 22, 102, 22, 102,
+            38, 8, 60, 54, 0, 40, 5, 104, 101, 255, 108, 111, 24, 5, 104, 101, 255, 108, 111, 0, 0,
+            0, 22, 102, 22, 2, 0, 40, 44, 65, 114, 114, 111, 119, 50, 32, 45, 32, 78, 97, 116, 105,
+            118, 101, 32, 82, 117, 115, 116, 32, 105, 109, 112, 108, 101, 109, 101, 110, 116, 97,
+            116, 105, 111, 110, 32, 111, 102, 32, 65, 114, 114, 111, 119, 0, 130, 0, 0, 0, 80, 65,
+            82, 49,
         ];
 
         let file = Bytes::from(data);
-        let mut record_batch_reader =
-            ParquetRecordBatchReader::try_new(file, 10).unwrap();
+        let mut record_batch_reader = ParquetRecordBatchReader::try_new(file, 10).unwrap();
 
         let error = record_batch_reader.next().unwrap().unwrap_err();
 
@@ -2107,8 +2186,7 @@ mod tests {
             vec![(3, 2), (3, 2), (3, 1), (3, 1), (3, 2), (2, 2)]
         );
 
-        let get_dict =
-            |batch: &RecordBatch| batch.column(0).to_data().child_data()[0].clone();
+        let get_dict = |batch: &RecordBatch| batch.column(0).to_data().child_data()[0].clone();
 
         // First and second batch in same row group -> same dictionary
         assert_eq!(get_dict(&batches[0]), get_dict(&batches[1]));
@@ -2125,8 +2203,7 @@ mod tests {
         let testdata = arrow::util::test_util::parquet_test_data();
         let path = format!("{testdata}/null_list.parquet");
         let file = File::open(path).unwrap();
-        let mut record_batch_reader =
-            ParquetRecordBatchReader::try_new(file, 60).unwrap();
+        let mut record_batch_reader = ParquetRecordBatchReader::try_new(file, 60).unwrap();
 
         let batch = record_batch_reader.next().unwrap().unwrap();
         assert_eq!(batch.num_rows(), 1);
@@ -2158,8 +2235,7 @@ mod tests {
         );
 
         let options = ArrowReaderOptions::new().with_skip_arrow_metadata(true);
-        let builder =
-            ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap();
+        let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(file, options).unwrap();
         let schema = builder.schema();
         assert_eq!(schema.fields().len(), 1);
         assert_eq!(schema.field(0), &arrow_field);
@@ -2176,14 +2252,12 @@ mod tests {
             .into_iter()
             .collect();
 
-        let schema_with_metadata =
-            Arc::new(Schema::new(vec![field.with_metadata(metadata)]));
+        let schema_with_metadata = Arc::new(Schema::new(vec![field.with_metadata(metadata)]));
 
         assert_ne!(schema_with_metadata, schema_without_metadata);
 
         let batch =
-            RecordBatch::try_new(schema_with_metadata.clone(), vec![col as ArrayRef])
-                .unwrap();
+            RecordBatch::try_new(schema_with_metadata.clone(), vec![col as ArrayRef]).unwrap();
 
         let file = |version: WriterVersion| {
             let props = WriterProperties::builder()
@@ -2191,12 +2265,9 @@ mod tests {
                 .build();
 
             let file = tempfile().unwrap();
-            let mut writer = ArrowWriter::try_new(
-                file.try_clone().unwrap(),
-                batch.schema(),
-                Some(props),
-            )
-            .unwrap();
+            let mut writer =
+                ArrowWriter::try_new(file.try_clone().unwrap(), batch.schema(), Some(props))
+                    .unwrap();
             writer.write(&batch).unwrap();
             writer.close().unwrap();
             file
@@ -2208,31 +2279,24 @@ mod tests {
         let v2_reader = file(WriterVersion::PARQUET_2_0);
 
         let arrow_reader =
-            ParquetRecordBatchReader::try_new(v1_reader.try_clone().unwrap(), 1024)
-                .unwrap();
+            ParquetRecordBatchReader::try_new(v1_reader.try_clone().unwrap(), 1024).unwrap();
         assert_eq!(arrow_reader.schema(), schema_with_metadata);
 
-        let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(
-            v1_reader,
-            skip_options.clone(),
-        )
-        .unwrap()
-        .build()
-        .unwrap();
+        let reader =
+            ParquetRecordBatchReaderBuilder::try_new_with_options(v1_reader, skip_options.clone())
+                .unwrap()
+                .build()
+                .unwrap();
         assert_eq!(reader.schema(), schema_without_metadata);
 
         let arrow_reader =
-            ParquetRecordBatchReader::try_new(v2_reader.try_clone().unwrap(), 1024)
-                .unwrap();
+            ParquetRecordBatchReader::try_new(v2_reader.try_clone().unwrap(), 1024).unwrap();
         assert_eq!(arrow_reader.schema(), schema_with_metadata);
 
-        let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(
-            v2_reader,
-            skip_options,
-        )
-        .unwrap()
-        .build()
-        .unwrap();
+        let reader = ParquetRecordBatchReaderBuilder::try_new_with_options(v2_reader, skip_options)
+            .unwrap()
+            .build()
+            .unwrap();
         assert_eq!(reader.schema(), schema_without_metadata);
     }
 
@@ -2284,16 +2348,12 @@ mod tests {
         )
         .unwrap();
         for _ in 0..2 {
-            let mut list_builder =
-                ListBuilder::new(Int32Builder::with_capacity(batch_size));
+            let mut list_builder = ListBuilder::new(Int32Builder::with_capacity(batch_size));
             for _ in 0..(batch_size) {
                 list_builder.append(true);
             }
-            let batch = RecordBatch::try_new(
-                schema.clone(),
-                vec![Arc::new(list_builder.finish())],
-            )
-            .unwrap();
+            let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(list_builder.finish())])
+                .unwrap();
             writer.write(&batch).unwrap();
         }
         writer.close().unwrap();
@@ -2355,8 +2415,7 @@ mod tests {
                 match skip {
                     true => {
                         if let Some(last_start) = last_start.take() {
-                            expected_batches
-                                .push(column.slice(last_start, row_offset - last_start))
+                            expected_batches.push(column.slice(last_start, row_offset - last_start))
                         }
                         row_offset += to_read
                     }
@@ -2420,8 +2479,7 @@ mod tests {
 
         let do_test = |batch_size: usize, selection_len: usize| {
             for skip_first in [false, true] {
-                let selections =
-                    create_test_selection(batch_size, data.num_rows(), skip_first).0;
+                let selections = create_test_selection(batch_size, data.num_rows(), skip_first).0;
 
                 let expected = get_expected_batches(&data, &selections, batch_size);
                 let skip_reader = create_skip_reader(&test_file, batch_size, selections);
@@ -2481,6 +2539,43 @@ mod tests {
         assert_eq!(reader.batch_size, num_rows as usize);
     }
 
+    #[test]
+    fn test_read_with_page_index_enabled() {
+        let testdata = arrow::util::test_util::parquet_test_data();
+
+        {
+            // `alltypes_tiny_pages.parquet` has page index
+            let path = format!("{testdata}/alltypes_tiny_pages.parquet");
+            let test_file = File::open(path).unwrap();
+            let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(
+                test_file,
+                ArrowReaderOptions::new().with_page_index(true),
+            )
+            .unwrap();
+            assert!(!builder.metadata().offset_index().unwrap()[0].is_empty());
+            let reader = builder.build().unwrap();
+            let batches = reader.collect::<Result<Vec<_>, _>>().unwrap();
+            assert_eq!(batches.len(), 8);
+        }
+
+        {
+            // `alltypes_plain.parquet` doesn't have page index
+            let path = format!("{testdata}/alltypes_plain.parquet");
+            let test_file = File::open(path).unwrap();
+            let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(
+                test_file,
+                ArrowReaderOptions::new().with_page_index(true),
+            )
+            .unwrap();
+            // Although `Vec<Vec<PageLoacation>>` of each row group is empty,
+            // we should read the file successfully.
+            assert!(builder.metadata().offset_index().unwrap()[0].is_empty());
+            let reader = builder.build().unwrap();
+            let batches = reader.collect::<Result<Vec<_>, _>>().unwrap();
+            assert_eq!(batches.len(), 1);
+        }
+    }
+
     #[test]
     fn test_raw_repetition() {
         const MESSAGE_TYPE: &str = "
@@ -2693,8 +2788,7 @@ mod tests {
         .unwrap();
 
         let mut buffer = Vec::with_capacity(1024);
-        let mut writer =
-            ArrowWriter::try_new(&mut buffer, written.schema(), None).unwrap();
+        let mut writer = ArrowWriter::try_new(&mut buffer, written.schema(), None).unwrap();
         writer.write(&written).unwrap();
         writer.close().unwrap();
 
@@ -2722,8 +2816,7 @@ mod tests {
             .build();
 
         let mut buffer = Vec::with_capacity(1024);
-        let mut writer =
-            ArrowWriter::try_new(&mut buffer, batch.schema(), Some(props)).unwrap();
+        let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), Some(props)).unwrap();
         writer.write(&batch).unwrap();
         writer.close().unwrap();
 
@@ -2768,8 +2861,7 @@ mod tests {
         writer.write(&batch).unwrap();
         writer.close().unwrap();
 
-        let builder =
-            ParquetRecordBatchReaderBuilder::try_new(Bytes::from(buffer)).unwrap();
+        let builder = ParquetRecordBatchReaderBuilder::try_new(Bytes::from(buffer)).unwrap();
         let t1 = builder.parquet_schema().columns()[0].physical_type();
         assert_eq!(t1, PhysicalType::INT32);
         let t2 = builder.parquet_schema().columns()[1].physical_type();
@@ -2809,11 +2901,9 @@ mod tests {
                 list_a_builder.values().append_value(format!("{i} {j}"));
                 list_a_builder.append(true);
             }
-            let batch = RecordBatch::try_new(
-                schema.clone(),
-                vec![Arc::new(list_a_builder.finish())],
-            )
-            .unwrap();
+            let batch =
+                RecordBatch::try_new(schema.clone(), vec![Arc::new(list_a_builder.finish())])
+                    .unwrap();
             writer.write(&batch).unwrap();
         }
         let _metadata = writer.close().unwrap();
diff --git a/parquet/src/arrow/arrow_reader/selection.rs b/parquet/src/arrow/arrow_reader/selection.rs
index a558f893c43e..cebf3f9d38b6 100644
--- a/parquet/src/arrow/arrow_reader/selection.rs
+++ b/parquet/src/arrow/arrow_reader/selection.rs
@@ -64,25 +64,30 @@ impl RowSelector {
 /// use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
 ///
 /// let selectors = vec![
-///   RowSelector { row_count: 5, skip: true },
-///   RowSelector { row_count: 5, skip: false },
-///   RowSelector { row_count: 5, skip: false },
-///   RowSelector { row_count: 5, skip: true },
+///     RowSelector::skip(5),
+///     RowSelector::select(5),
+///     RowSelector::select(5),
+///     RowSelector::skip(5),
 /// ];
 ///
 /// // Creating a selection will combine adjacent selectors
 /// let selection: RowSelection = selectors.into();
 ///
 /// let expected = vec![
-///   RowSelector { row_count: 5, skip: true },
-///   RowSelector { row_count: 10, skip: false },
-///   RowSelector { row_count: 5, skip: true },
+///     RowSelector::skip(5),
+///     RowSelector::select(10),
+///     RowSelector::skip(5),
 /// ];
 ///
 /// let actual: Vec<RowSelector> = selection.into();
 /// assert_eq!(actual, expected);
 /// ```
 ///
+/// A [`RowSelection`] maintains the following invariants:
+///
+/// * It contains no [`RowSelector`] of 0 rows
+/// * Consecutive [`RowSelector`]s alternate skipping or selecting rows
+///
 /// [`PageIndex`]: crate::file::page_index::index::PageIndex
 #[derive(Debug, Clone, Default, Eq, PartialEq)]
 pub struct RowSelection {
@@ -103,8 +108,7 @@ impl RowSelection {
             let offset = next_offset;
             next_offset += filter.len();
             assert_eq!(filter.null_count(), 0);
-            SlicesIterator::new(filter)
-                .map(move |(start, end)| start + offset..end + offset)
+            SlicesIterator::new(filter).map(move |(start, end)| start + offset..end + offset)
         });
 
         Self::from_consecutive_ranges(iter, total_rows)
@@ -119,10 +123,13 @@ impl RowSelection {
         let mut last_end = 0;
         for range in ranges {
             let len = range.end - range.start;
+            if len == 0 {
+                continue;
+            }
 
             match range.start.cmp(&last_end) {
                 Ordering::Equal => match selectors.last_mut() {
-                    Some(last) => last.row_count += len,
+                    Some(last) => last.row_count = last.row_count.checked_add(len).unwrap(),
                     None => selectors.push(RowSelector::select(len)),
                 },
                 Ordering::Greater => {
@@ -141,38 +148,6 @@ impl RowSelection {
         Self { selectors }
     }
 
-    /// Creates a [`RowSelection`] from a slice of uncombined `RowSelector`:
-    /// Like [skip(5),skip(5),read(10)].
-    /// After combine will return [skip(10),read(10)]
-    /// # Note
-    ///  [`RowSelection`] must be combined prior to use within offset_index or else the code will panic.
-    fn from_selectors_and_combine(selectors: &[RowSelector]) -> Self {
-        if selectors.len() < 2 {
-            return Self {
-                selectors: Vec::from(selectors),
-            };
-        }
-        let first = selectors.first().unwrap();
-        let mut sum_rows = first.row_count;
-        let mut skip = first.skip;
-        let mut combined_result = vec![];
-
-        for s in selectors.iter().skip(1) {
-            if s.skip == skip {
-                sum_rows += s.row_count
-            } else {
-                add_selector(skip, sum_rows, &mut combined_result);
-                sum_rows = s.row_count;
-                skip = s.skip;
-            }
-        }
-        add_selector(skip, sum_rows, &mut combined_result);
-
-        Self {
-            selectors: combined_result,
-        }
-    }
-
     /// Given an offset index, return the byte ranges for all data pages selected by `self`
     ///
     /// This is useful for determining what byte ranges to fetch from underlying storage
@@ -180,10 +155,7 @@ impl RowSelection {
     /// Note: this method does not make any effort to combine consecutive ranges, nor coalesce
     /// ranges that are close together. This is instead delegated to the IO subsystem to optimise,
     /// e.g. [`ObjectStore::get_ranges`](object_store::ObjectStore::get_ranges)
-    pub fn scan_ranges(
-        &self,
-        page_locations: &[crate::format::PageLocation],
-    ) -> Vec<Range<usize>> {
+    pub fn scan_ranges(&self, page_locations: &[crate::format::PageLocation]) -> Vec<Range<usize>> {
         let mut ranges = vec![];
         let mut row_offset = 0;
 
@@ -204,8 +176,7 @@ impl RowSelection {
 
             if let Some(next_page) = pages.peek() {
                 if row_offset + selector.row_count > next_page.first_row_index as usize {
-                    let remaining_in_page =
-                        next_page.first_row_index as usize - row_offset;
+                    let remaining_in_page = next_page.first_row_index as usize - row_offset;
                     selector.row_count -= remaining_in_page;
                     row_offset += remaining_in_page;
                     current_page = pages.next();
@@ -213,9 +184,7 @@ impl RowSelection {
 
                     continue;
                 } else {
-                    if row_offset + selector.row_count
-                        == next_page.first_row_index as usize
-                    {
+                    if row_offset + selector.row_count == next_page.first_row_index as usize {
                         current_page = pages.next();
                         current_page_included = false;
                     }
@@ -358,9 +327,7 @@ impl RowSelection {
     ///
     /// returned:  NNNNNNNNYYNYN
     pub fn intersection(&self, other: &Self) -> Self {
-        Self {
-            selectors: intersect_row_selections(&self.selectors, &other.selectors),
-        }
+        intersect_row_selections(&self.selectors, &other.selectors)
     }
 
     /// Returns `true` if this [`RowSelection`] selects any rows
@@ -450,7 +417,37 @@ impl RowSelection {
 
 impl From<Vec<RowSelector>> for RowSelection {
     fn from(selectors: Vec<RowSelector>) -> Self {
-        Self::from_selectors_and_combine(selectors.as_slice())
+        selectors.into_iter().collect()
+    }
+}
+
+impl FromIterator<RowSelector> for RowSelection {
+    fn from_iter<T: IntoIterator<Item = RowSelector>>(iter: T) -> Self {
+        let iter = iter.into_iter();
+
+        // Capacity before filter
+        let mut selectors = Vec::with_capacity(iter.size_hint().0);
+
+        let mut filtered = iter.filter(|x| x.row_count != 0);
+        if let Some(x) = filtered.next() {
+            selectors.push(x);
+        }
+
+        for s in filtered {
+            if s.row_count == 0 {
+                continue;
+            }
+
+            // Combine consecutive selectors
+            let last = selectors.last_mut().unwrap();
+            if last.skip == s.skip {
+                last.row_count = last.row_count.checked_add(s.row_count).unwrap();
+            } else {
+                selectors.push(s)
+            }
+        }
+
+        Self { selectors }
     }
 }
 
@@ -472,67 +469,58 @@ impl From<RowSelection> for VecDeque<RowSelector> {
 /// other:     NYNNNNNNY
 ///
 /// returned:  NNNNNNNNYYNYN
-fn intersect_row_selections(
-    left: &[RowSelector],
-    right: &[RowSelector],
-) -> Vec<RowSelector> {
-    let mut res = Vec::with_capacity(left.len());
+fn intersect_row_selections(left: &[RowSelector], right: &[RowSelector]) -> RowSelection {
     let mut l_iter = left.iter().copied().peekable();
     let mut r_iter = right.iter().copied().peekable();
 
-    while let (Some(a), Some(b)) = (l_iter.peek_mut(), r_iter.peek_mut()) {
-        if a.row_count == 0 {
-            l_iter.next().unwrap();
-            continue;
-        }
-        if b.row_count == 0 {
-            r_iter.next().unwrap();
-            continue;
-        }
-        match (a.skip, b.skip) {
-            // Keep both ranges
-            (false, false) => {
-                if a.row_count < b.row_count {
-                    res.push(RowSelector::select(a.row_count));
-                    b.row_count -= a.row_count;
+    let iter = std::iter::from_fn(move || {
+        loop {
+            let l = l_iter.peek_mut();
+            let r = r_iter.peek_mut();
+
+            match (l, r) {
+                (Some(a), _) if a.row_count == 0 => {
                     l_iter.next().unwrap();
-                } else {
-                    res.push(RowSelector::select(b.row_count));
-                    a.row_count -= b.row_count;
-                    r_iter.next().unwrap();
                 }
-            }
-            // skip at least one
-            _ => {
-                if a.row_count < b.row_count {
-                    res.push(RowSelector::skip(a.row_count));
-                    b.row_count -= a.row_count;
-                    l_iter.next().unwrap();
-                } else {
-                    res.push(RowSelector::skip(b.row_count));
-                    a.row_count -= b.row_count;
+                (_, Some(b)) if b.row_count == 0 => {
                     r_iter.next().unwrap();
                 }
+                (Some(l), Some(r)) => {
+                    return match (l.skip, r.skip) {
+                        // Keep both ranges
+                        (false, false) => {
+                            if l.row_count < r.row_count {
+                                r.row_count -= l.row_count;
+                                l_iter.next()
+                            } else {
+                                l.row_count -= r.row_count;
+                                r_iter.next()
+                            }
+                        }
+                        // skip at least one
+                        _ => {
+                            if l.row_count < r.row_count {
+                                let skip = l.row_count;
+                                r.row_count -= l.row_count;
+                                l_iter.next();
+                                Some(RowSelector::skip(skip))
+                            } else {
+                                let skip = r.row_count;
+                                l.row_count -= skip;
+                                r_iter.next();
+                                Some(RowSelector::skip(skip))
+                            }
+                        }
+                    };
+                }
+                (Some(_), None) => return l_iter.next(),
+                (None, Some(_)) => return r_iter.next(),
+                (None, None) => return None,
             }
         }
-    }
-
-    if l_iter.peek().is_some() {
-        res.extend(l_iter);
-    }
-    if r_iter.peek().is_some() {
-        res.extend(r_iter);
-    }
-    res
-}
+    });
 
-fn add_selector(skip: bool, sum_row: usize, combined_result: &mut Vec<RowSelector>) {
-    let selector = if skip {
-        RowSelector::skip(sum_row)
-    } else {
-        RowSelector::select(sum_row)
-    };
-    combined_result.push(selector);
+    iter.collect()
 }
 
 #[cfg(test)]
@@ -780,40 +768,28 @@ mod tests {
             RowSelector::skip(4),
         ]);
 
-        assert_eq!(RowSelection::from_selectors_and_combine(&a), expected);
-        assert_eq!(RowSelection::from_selectors_and_combine(&b), expected);
-        assert_eq!(RowSelection::from_selectors_and_combine(&c), expected);
+        assert_eq!(RowSelection::from_iter(a), expected);
+        assert_eq!(RowSelection::from_iter(b), expected);
+        assert_eq!(RowSelection::from_iter(c), expected);
     }
 
     #[test]
     fn test_combine_2elements() {
         let a = vec![RowSelector::select(10), RowSelector::select(5)];
         let a_expect = vec![RowSelector::select(15)];
-        assert_eq!(
-            RowSelection::from_selectors_and_combine(&a).selectors,
-            a_expect
-        );
+        assert_eq!(RowSelection::from_iter(a).selectors, a_expect);
 
         let b = vec![RowSelector::select(10), RowSelector::skip(5)];
         let b_expect = vec![RowSelector::select(10), RowSelector::skip(5)];
-        assert_eq!(
-            RowSelection::from_selectors_and_combine(&b).selectors,
-            b_expect
-        );
+        assert_eq!(RowSelection::from_iter(b).selectors, b_expect);
 
         let c = vec![RowSelector::skip(10), RowSelector::select(5)];
         let c_expect = vec![RowSelector::skip(10), RowSelector::select(5)];
-        assert_eq!(
-            RowSelection::from_selectors_and_combine(&c).selectors,
-            c_expect
-        );
+        assert_eq!(RowSelection::from_iter(c).selectors, c_expect);
 
         let d = vec![RowSelector::skip(10), RowSelector::skip(5)];
         let d_expect = vec![RowSelector::skip(15)];
-        assert_eq!(
-            RowSelection::from_selectors_and_combine(&d).selectors,
-            d_expect
-        );
+        assert_eq!(RowSelection::from_iter(d).selectors, d_expect);
     }
 
     #[test]
@@ -869,7 +845,7 @@ mod tests {
 
         let res = intersect_row_selections(&a, &b);
         assert_eq!(
-            RowSelection::from_selectors_and_combine(&res).selectors,
+            res.selectors,
             vec![
                 RowSelector::select(5),
                 RowSelector::skip(4),
@@ -887,7 +863,7 @@ mod tests {
         let b = vec![RowSelector::select(36), RowSelector::skip(36)];
         let res = intersect_row_selections(&a, &b);
         assert_eq!(
-            RowSelection::from_selectors_and_combine(&res).selectors,
+            res.selectors,
             vec![RowSelector::select(3), RowSelector::skip(69)]
         );
 
@@ -902,7 +878,7 @@ mod tests {
         ];
         let res = intersect_row_selections(&a, &b);
         assert_eq!(
-            RowSelection::from_selectors_and_combine(&res).selectors,
+            res.selectors,
             vec![RowSelector::select(2), RowSelector::skip(8)]
         );
 
@@ -916,7 +892,7 @@ mod tests {
         ];
         let res = intersect_row_selections(&a, &b);
         assert_eq!(
-            RowSelection::from_selectors_and_combine(&res).selectors,
+            res.selectors,
             vec![RowSelector::select(2), RowSelector::skip(8)]
         );
     }
@@ -942,8 +918,7 @@ mod tests {
                 }
             }
 
-            let expected =
-                RowSelection::from_filters(&[BooleanArray::from(expected_bools)]);
+            let expected = RowSelection::from_filters(&[BooleanArray::from(expected_bools)]);
 
             let total_rows: usize = expected.selectors.iter().map(|s| s.row_count).sum();
             assert_eq!(a_len, total_rows);
@@ -972,8 +947,7 @@ mod tests {
     #[test]
     fn test_limit() {
         // Limit to existing limit should no-op
-        let selection =
-            RowSelection::from(vec![RowSelector::select(10), RowSelector::skip(90)]);
+        let selection = RowSelection::from(vec![RowSelector::select(10), RowSelector::skip(90)]);
         let limited = selection.limit(10);
         assert_eq!(RowSelection::from(vec![RowSelector::select(10)]), limited);
 
@@ -1154,4 +1128,70 @@ mod tests {
         // assert_eq!(mask, vec![false, true, true, false, true, true, true]);
         assert_eq!(ranges, vec![10..20, 20..30, 30..40]);
     }
+
+    #[test]
+    fn test_empty_ranges() {
+        let ranges = [1..3, 4..6, 6..6, 8..8, 9..10];
+        let selection = RowSelection::from_consecutive_ranges(ranges.into_iter(), 10);
+        assert_eq!(
+            selection.selectors,
+            vec![
+                RowSelector::skip(1),
+                RowSelector::select(2),
+                RowSelector::skip(1),
+                RowSelector::select(2),
+                RowSelector::skip(3),
+                RowSelector::select(1)
+            ]
+        )
+    }
+
+    #[test]
+    fn test_empty_selector() {
+        let selection = RowSelection::from(vec![
+            RowSelector::skip(0),
+            RowSelector::select(2),
+            RowSelector::skip(0),
+            RowSelector::select(2),
+        ]);
+        assert_eq!(selection.selectors, vec![RowSelector::select(4)]);
+
+        let selection = RowSelection::from(vec![
+            RowSelector::select(0),
+            RowSelector::skip(2),
+            RowSelector::select(0),
+            RowSelector::skip(2),
+        ]);
+        assert_eq!(selection.selectors, vec![RowSelector::skip(4)]);
+    }
+
+    #[test]
+    fn test_intersection() {
+        let selection = RowSelection::from(vec![RowSelector::select(1048576)]);
+        let result = selection.intersection(&selection);
+        assert_eq!(result, selection);
+
+        let a = RowSelection::from(vec![
+            RowSelector::skip(10),
+            RowSelector::select(10),
+            RowSelector::skip(10),
+            RowSelector::select(20),
+        ]);
+
+        let b = RowSelection::from(vec![
+            RowSelector::skip(20),
+            RowSelector::select(20),
+            RowSelector::skip(10),
+        ]);
+
+        let result = a.intersection(&b);
+        assert_eq!(
+            result.selectors,
+            vec![
+                RowSelector::skip(30),
+                RowSelector::select(10),
+                RowSelector::skip(10)
+            ]
+        );
+    }
 }
diff --git a/parquet/src/arrow/arrow_writer/byte_array.rs b/parquet/src/arrow/arrow_writer/byte_array.rs
index 6dbc83dd05c4..28c7c3b00540 100644
--- a/parquet/src/arrow/arrow_writer/byte_array.rs
+++ b/parquet/src/arrow/arrow_writer/byte_array.rs
@@ -17,9 +17,7 @@
 
 use crate::basic::Encoding;
 use crate::bloom_filter::Sbbf;
-use crate::column::writer::encoder::{
-    ColumnValueEncoder, DataPageValues, DictionaryPage,
-};
+use crate::column::writer::encoder::{ColumnValueEncoder, DataPageValues, DictionaryPage};
 use crate::data_type::{AsBytes, ByteArray, Int32Type};
 use crate::encodings::encoding::{DeltaBitPackEncoder, Encoder};
 use crate::encodings::rle::RleEncoder;
@@ -29,8 +27,8 @@ use crate::schema::types::ColumnDescPtr;
 use crate::util::bit_util::num_required_bits;
 use crate::util::interner::{Interner, Storage};
 use arrow_array::{
-    Array, ArrayAccessor, BinaryArray, DictionaryArray, LargeBinaryArray,
-    LargeStringArray, StringArray,
+    Array, ArrayAccessor, BinaryArray, DictionaryArray, LargeBinaryArray, LargeStringArray,
+    StringArray,
 };
 use arrow_schema::DataType;
 
@@ -119,12 +117,13 @@ impl FallbackEncoder {
     /// Create the fallback encoder for the given [`ColumnDescPtr`] and [`WriterProperties`]
     fn new(descr: &ColumnDescPtr, props: &WriterProperties) -> Result<Self> {
         // Set either main encoder or fallback encoder.
-        let encoding = props.encoding(descr.path()).unwrap_or_else(|| {
-            match props.writer_version() {
-                WriterVersion::PARQUET_1_0 => Encoding::PLAIN,
-                WriterVersion::PARQUET_2_0 => Encoding::DELTA_BYTE_ARRAY,
-            }
-        });
+        let encoding =
+            props
+                .encoding(descr.path())
+                .unwrap_or_else(|| match props.writer_version() {
+                    WriterVersion::PARQUET_1_0 => Encoding::PLAIN,
+                    WriterVersion::PARQUET_2_0 => Encoding::DELTA_BYTE_ARRAY,
+                });
 
         let encoder = match encoding {
             Encoding::PLAIN => FallbackEncoderImpl::Plain { buffer: vec![] },
@@ -232,14 +231,12 @@ impl FallbackEncoder {
         max_value: Option<ByteArray>,
     ) -> Result<DataPageValues<ByteArray>> {
         let (buf, encoding) = match &mut self.encoder {
-            FallbackEncoderImpl::Plain { buffer } => {
-                (std::mem::take(buffer), Encoding::PLAIN)
-            }
+            FallbackEncoderImpl::Plain { buffer } => (std::mem::take(buffer), Encoding::PLAIN),
             FallbackEncoderImpl::DeltaLength { buffer, lengths } => {
                 let lengths = lengths.flush_buffer()?;
 
                 let mut out = Vec::with_capacity(lengths.len() + buffer.len());
-                out.extend_from_slice(lengths.data());
+                out.extend_from_slice(&lengths);
                 out.extend_from_slice(buffer);
                 buffer.clear();
                 (out, Encoding::DELTA_LENGTH_BYTE_ARRAY)
@@ -253,11 +250,10 @@ impl FallbackEncoder {
                 let prefix_lengths = prefix_lengths.flush_buffer()?;
                 let suffix_lengths = suffix_lengths.flush_buffer()?;
 
-                let mut out = Vec::with_capacity(
-                    prefix_lengths.len() + suffix_lengths.len() + buffer.len(),
-                );
-                out.extend_from_slice(prefix_lengths.data());
-                out.extend_from_slice(suffix_lengths.data());
+                let mut out =
+                    Vec::with_capacity(prefix_lengths.len() + suffix_lengths.len() + buffer.len());
+                out.extend_from_slice(&prefix_lengths);
+                out.extend_from_slice(&suffix_lengths);
                 out.extend_from_slice(buffer);
                 buffer.clear();
                 last_value.clear();
@@ -437,12 +433,7 @@ impl ColumnValueEncoder for ByteArrayEncoder {
         })
     }
 
-    fn write(
-        &mut self,
-        _values: &Self::Values,
-        _offset: usize,
-        _len: usize,
-    ) -> Result<()> {
+    fn write(&mut self, _values: &Self::Values, _offset: usize, _len: usize) -> Result<()> {
         unreachable!("should call write_gather instead")
     }
 
diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs
index 48615dc3d599..df37665ce1fc 100644
--- a/parquet/src/arrow/arrow_writer/levels.rs
+++ b/parquet/src/arrow/arrow_writer/levels.rs
@@ -42,19 +42,17 @@
 
 use crate::errors::{ParquetError, Result};
 use arrow_array::cast::AsArray;
-use arrow_array::{Array, ArrayRef, FixedSizeListArray, OffsetSizeTrait, StructArray};
-use arrow_buffer::NullBuffer;
+use arrow_array::{Array, ArrayRef, OffsetSizeTrait};
+use arrow_buffer::{NullBuffer, OffsetBuffer};
 use arrow_schema::{DataType, Field};
 use std::ops::Range;
+use std::sync::Arc;
 
-/// Performs a depth-first scan of the children of `array`, constructing [`LevelInfo`]
+/// Performs a depth-first scan of the children of `array`, constructing [`ArrayLevels`]
 /// for each leaf column encountered
-pub(crate) fn calculate_array_levels(
-    array: &ArrayRef,
-    field: &Field,
-) -> Result<Vec<LevelInfo>> {
-    let mut builder = LevelInfoBuilder::try_new(field, Default::default())?;
-    builder.write(array, 0..array.len());
+pub(crate) fn calculate_array_levels(array: &ArrayRef, field: &Field) -> Result<Vec<ArrayLevels>> {
+    let mut builder = LevelInfoBuilder::try_new(field, Default::default(), array)?;
+    builder.write(0..array.len());
     Ok(builder.finish())
 }
 
@@ -102,31 +100,53 @@ struct LevelContext {
     def_level: i16,
 }
 
-/// A helper to construct [`LevelInfo`] from a potentially nested [`Field`]
+/// A helper to construct [`ArrayLevels`] from a potentially nested [`Field`]
 enum LevelInfoBuilder {
     /// A primitive, leaf array
-    Primitive(LevelInfo),
-    /// A list array, contains the [`LevelInfoBuilder`] of the child and
-    /// the [`LevelContext`] of this list
-    List(Box<LevelInfoBuilder>, LevelContext),
-    /// A list array, contains the [`LevelInfoBuilder`] of its children and
-    /// the [`LevelContext`] of this struct array
-    Struct(Vec<LevelInfoBuilder>, LevelContext),
+    Primitive(ArrayLevels),
+    /// A list array
+    List(
+        Box<LevelInfoBuilder>, // Child Values
+        LevelContext,          // Context
+        OffsetBuffer<i32>,     // Offsets
+        Option<NullBuffer>,    // Nulls
+    ),
+    /// A large list array
+    LargeList(
+        Box<LevelInfoBuilder>, // Child Values
+        LevelContext,          // Context
+        OffsetBuffer<i64>,     // Offsets
+        Option<NullBuffer>,    // Nulls
+    ),
+    /// A fixed size list array
+    FixedSizeList(
+        Box<LevelInfoBuilder>, // Values
+        LevelContext,          // Context
+        usize,                 // List Size
+        Option<NullBuffer>,    // Nulls
+    ),
+    /// A struct array
+    Struct(Vec<LevelInfoBuilder>, LevelContext, Option<NullBuffer>),
 }
 
 impl LevelInfoBuilder {
     /// Create a new [`LevelInfoBuilder`] for the given [`Field`] and parent [`LevelContext`]
-    fn try_new(field: &Field, parent_ctx: LevelContext) -> Result<Self> {
-        match field.data_type() {
-            d if is_leaf(d) => Ok(Self::Primitive(LevelInfo::new(
-                parent_ctx,
-                field.is_nullable(),
-            ))),
-            DataType::Dictionary(_, v) if is_leaf(v.as_ref()) => Ok(Self::Primitive(
-                LevelInfo::new(parent_ctx, field.is_nullable()),
-            )),
+    fn try_new(field: &Field, parent_ctx: LevelContext, array: &ArrayRef) -> Result<Self> {
+        assert_eq!(field.data_type(), array.data_type());
+        let is_nullable = field.is_nullable();
+
+        match array.data_type() {
+            d if is_leaf(d) => {
+                let levels = ArrayLevels::new(parent_ctx, is_nullable, array.clone());
+                Ok(Self::Primitive(levels))
+            }
+            DataType::Dictionary(_, v) if is_leaf(v.as_ref()) => {
+                let levels = ArrayLevels::new(parent_ctx, is_nullable, array.clone());
+                Ok(Self::Primitive(levels))
+            }
             DataType::Struct(children) => {
-                let def_level = match field.is_nullable() {
+                let array = array.as_struct();
+                let def_level = match is_nullable {
                     true => parent_ctx.def_level + 1,
                     false => parent_ctx.def_level,
                 };
@@ -138,16 +158,17 @@ impl LevelInfoBuilder {
 
                 let children = children
                     .iter()
-                    .map(|f| Self::try_new(f, ctx))
+                    .zip(array.columns())
+                    .map(|(f, a)| Self::try_new(f, ctx, a))
                     .collect::<Result<_>>()?;
 
-                Ok(Self::Struct(children, ctx))
+                Ok(Self::Struct(children, ctx, array.nulls().cloned()))
             }
             DataType::List(child)
             | DataType::LargeList(child)
             | DataType::Map(child, _)
             | DataType::FixedSizeList(child, _) => {
-                let def_level = match field.is_nullable() {
+                let def_level = match is_nullable {
                     true => parent_ctx.def_level + 2,
                     false => parent_ctx.def_level + 1,
                 };
@@ -157,79 +178,68 @@ impl LevelInfoBuilder {
                     def_level,
                 };
 
-                let child = Self::try_new(child.as_ref(), ctx)?;
-                Ok(Self::List(Box::new(child), ctx))
+                Ok(match field.data_type() {
+                    DataType::List(_) => {
+                        let list = array.as_list();
+                        let child = Self::try_new(child.as_ref(), ctx, list.values())?;
+                        let offsets = list.offsets().clone();
+                        Self::List(Box::new(child), ctx, offsets, list.nulls().cloned())
+                    }
+                    DataType::LargeList(_) => {
+                        let list = array.as_list();
+                        let child = Self::try_new(child.as_ref(), ctx, list.values())?;
+                        let offsets = list.offsets().clone();
+                        let nulls = list.nulls().cloned();
+                        Self::LargeList(Box::new(child), ctx, offsets, nulls)
+                    }
+                    DataType::Map(_, _) => {
+                        let map = array.as_map();
+                        let entries = Arc::new(map.entries().clone()) as ArrayRef;
+                        let child = Self::try_new(child.as_ref(), ctx, &entries)?;
+                        let offsets = map.offsets().clone();
+                        Self::List(Box::new(child), ctx, offsets, map.nulls().cloned())
+                    }
+                    DataType::FixedSizeList(_, size) => {
+                        let list = array.as_fixed_size_list();
+                        let child = Self::try_new(child.as_ref(), ctx, list.values())?;
+                        let nulls = list.nulls().cloned();
+                        Self::FixedSizeList(Box::new(child), ctx, *size as _, nulls)
+                    }
+                    _ => unreachable!(),
+                })
             }
             d => Err(nyi_err!("Datatype {} is not yet supported", d)),
         }
     }
 
-    /// Finish this [`LevelInfoBuilder`] returning the [`LevelInfo`] for the leaf columns
+    /// Finish this [`LevelInfoBuilder`] returning the [`ArrayLevels`] for the leaf columns
     /// as enumerated by a depth-first search
-    fn finish(self) -> Vec<LevelInfo> {
+    fn finish(self) -> Vec<ArrayLevels> {
         match self {
             LevelInfoBuilder::Primitive(v) => vec![v],
-            LevelInfoBuilder::List(v, _) => v.finish(),
-            LevelInfoBuilder::Struct(v, _) => {
-                v.into_iter().flat_map(|l| l.finish()).collect()
-            }
+            LevelInfoBuilder::List(v, _, _, _)
+            | LevelInfoBuilder::LargeList(v, _, _, _)
+            | LevelInfoBuilder::FixedSizeList(v, _, _, _) => v.finish(),
+            LevelInfoBuilder::Struct(v, _, _) => v.into_iter().flat_map(|l| l.finish()).collect(),
         }
     }
 
     /// Given an `array`, write the level data for the elements in `range`
-    fn write(&mut self, array: &dyn Array, range: Range<usize>) {
-        match array.data_type() {
-            d if is_leaf(d) => self.write_leaf(array, range),
-            DataType::Dictionary(_, v) if is_leaf(v.as_ref()) => {
-                self.write_leaf(array, range)
-            }
-            DataType::Struct(_) => {
-                let array = array.as_struct();
-                self.write_struct(array, range)
-            }
-            DataType::List(_) => {
-                let array = array.as_list::<i32>();
-                self.write_list(
-                    array.value_offsets(),
-                    array.nulls(),
-                    array.values(),
-                    range,
-                )
+    fn write(&mut self, range: Range<usize>) {
+        match self {
+            LevelInfoBuilder::Primitive(info) => Self::write_leaf(info, range),
+            LevelInfoBuilder::List(child, ctx, offsets, nulls) => {
+                Self::write_list(child, ctx, offsets, nulls.as_ref(), range)
             }
-            DataType::LargeList(_) => {
-                let array = array.as_list::<i64>();
-                self.write_list(
-                    array.value_offsets(),
-                    array.nulls(),
-                    array.values(),
-                    range,
-                )
+            LevelInfoBuilder::LargeList(child, ctx, offsets, nulls) => {
+                Self::write_list(child, ctx, offsets, nulls.as_ref(), range)
             }
-            DataType::Map(_, _) => {
-                let array = array.as_map();
-                // A Map is just as ListArray<i32> with a StructArray child, we therefore
-                // treat it as such to avoid code duplication
-                self.write_list(
-                    array.value_offsets(),
-                    array.nulls(),
-                    array.entries(),
-                    range,
-                )
+            LevelInfoBuilder::FixedSizeList(child, ctx, size, nulls) => {
+                Self::write_fixed_size_list(child, ctx, *size, nulls.as_ref(), range)
             }
-            &DataType::FixedSizeList(_, size) => {
-                let array = array
-                    .as_any()
-                    .downcast_ref::<FixedSizeListArray>()
-                    .expect("unable to get fixed-size list array");
-
-                self.write_fixed_size_list(
-                    size as usize,
-                    array.nulls(),
-                    array.values(),
-                    range,
-                )
+            LevelInfoBuilder::Struct(children, ctx, nulls) => {
+                Self::write_struct(children, ctx, nulls.as_ref(), range)
             }
-            _ => unreachable!(),
         }
     }
 
@@ -237,22 +247,17 @@ impl LevelInfoBuilder {
     ///
     /// Note: MapArrays are `ListArray<i32>` under the hood and so are dispatched to this method
     fn write_list<O: OffsetSizeTrait>(
-        &mut self,
+        child: &mut LevelInfoBuilder,
+        ctx: &LevelContext,
         offsets: &[O],
         nulls: Option<&NullBuffer>,
-        values: &dyn Array,
         range: Range<usize>,
     ) {
-        let (child, ctx) = match self {
-            Self::List(child, ctx) => (child, ctx),
-            _ => unreachable!(),
-        };
-
         let offsets = &offsets[range.start..range.end + 1];
 
         let write_non_null_slice =
             |child: &mut LevelInfoBuilder, start_idx: usize, end_idx: usize| {
-                child.write(values, start_idx..end_idx);
+                child.write(start_idx..end_idx);
                 child.visit_leaves(|leaf| {
                     let rep_levels = leaf.rep_levels.as_mut().unwrap();
                     let mut rev = rep_levels.iter_mut().rev();
@@ -324,12 +329,12 @@ impl LevelInfoBuilder {
     }
 
     /// Write `range` elements from StructArray `array`
-    fn write_struct(&mut self, array: &StructArray, range: Range<usize>) {
-        let (children, ctx) = match self {
-            Self::Struct(children, ctx) => (children, ctx),
-            _ => unreachable!(),
-        };
-
+    fn write_struct(
+        children: &mut [LevelInfoBuilder],
+        ctx: &LevelContext,
+        nulls: Option<&NullBuffer>,
+        range: Range<usize>,
+    ) {
         let write_null = |children: &mut [LevelInfoBuilder], range: Range<usize>| {
             for child in children {
                 child.visit_leaves(|info| {
@@ -346,12 +351,12 @@ impl LevelInfoBuilder {
         };
 
         let write_non_null = |children: &mut [LevelInfoBuilder], range: Range<usize>| {
-            for (child_array, child) in array.columns().iter().zip(children) {
-                child.write(child_array, range.clone())
+            for child in children {
+                child.write(range.clone())
             }
         };
 
-        match array.nulls() {
+        match nulls {
             Some(validity) => {
                 let mut last_non_null_idx = None;
                 let mut last_null_idx = None;
@@ -388,66 +393,58 @@ impl LevelInfoBuilder {
 
     /// Write `range` elements from FixedSizeListArray with child data `values` and null bitmap `nulls`.
     fn write_fixed_size_list(
-        &mut self,
+        child: &mut LevelInfoBuilder,
+        ctx: &LevelContext,
         fixed_size: usize,
         nulls: Option<&NullBuffer>,
-        values: &dyn Array,
         range: Range<usize>,
     ) {
-        let (child, ctx) = match self {
-            Self::List(child, ctx) => (child, ctx),
-            _ => unreachable!(),
-        };
+        let write_non_null = |child: &mut LevelInfoBuilder, start_idx: usize, end_idx: usize| {
+            let values_start = start_idx * fixed_size;
+            let values_end = end_idx * fixed_size;
+            child.write(values_start..values_end);
 
-        let write_non_null =
-            |child: &mut LevelInfoBuilder, start_idx: usize, end_idx: usize| {
-                let values_start = start_idx * fixed_size;
-                let values_end = end_idx * fixed_size;
-                child.write(values, values_start..values_end);
-
-                child.visit_leaves(|leaf| {
-                    let rep_levels = leaf.rep_levels.as_mut().unwrap();
+            child.visit_leaves(|leaf| {
+                let rep_levels = leaf.rep_levels.as_mut().unwrap();
 
-                    let row_indices = (0..fixed_size)
-                        .rev()
-                        .cycle()
-                        .take(values_end - values_start);
-
-                    // Step backward over the child rep levels and mark the start of each list
-                    rep_levels
-                        .iter_mut()
-                        .rev()
-                        // Filter out reps from nested children
-                        .filter(|&&mut r| r == ctx.rep_level)
-                        .zip(row_indices)
-                        .for_each(|(r, idx)| {
-                            if idx == 0 {
-                                *r = ctx.rep_level - 1;
-                            }
-                        });
-                })
-            };
+                let row_indices = (0..fixed_size)
+                    .rev()
+                    .cycle()
+                    .take(values_end - values_start);
+
+                // Step backward over the child rep levels and mark the start of each list
+                rep_levels
+                    .iter_mut()
+                    .rev()
+                    // Filter out reps from nested children
+                    .filter(|&&mut r| r == ctx.rep_level)
+                    .zip(row_indices)
+                    .for_each(|(r, idx)| {
+                        if idx == 0 {
+                            *r = ctx.rep_level - 1;
+                        }
+                    });
+            })
+        };
 
         // If list size is 0, ignore values and just write rep/def levels.
-        let write_empty =
-            |child: &mut LevelInfoBuilder, start_idx: usize, end_idx: usize| {
-                let len = end_idx - start_idx;
-                child.visit_leaves(|leaf| {
-                    let rep_levels = leaf.rep_levels.as_mut().unwrap();
-                    rep_levels.extend(std::iter::repeat(ctx.rep_level - 1).take(len));
-                    let def_levels = leaf.def_levels.as_mut().unwrap();
-                    def_levels.extend(std::iter::repeat(ctx.def_level - 1).take(len));
-                })
-            };
+        let write_empty = |child: &mut LevelInfoBuilder, start_idx: usize, end_idx: usize| {
+            let len = end_idx - start_idx;
+            child.visit_leaves(|leaf| {
+                let rep_levels = leaf.rep_levels.as_mut().unwrap();
+                rep_levels.extend(std::iter::repeat(ctx.rep_level - 1).take(len));
+                let def_levels = leaf.def_levels.as_mut().unwrap();
+                def_levels.extend(std::iter::repeat(ctx.def_level - 1).take(len));
+            })
+        };
 
-        let write_rows =
-            |child: &mut LevelInfoBuilder, start_idx: usize, end_idx: usize| {
-                if fixed_size > 0 {
-                    write_non_null(child, start_idx, end_idx)
-                } else {
-                    write_empty(child, start_idx, end_idx)
-                }
-            };
+        let write_rows = |child: &mut LevelInfoBuilder, start_idx: usize, end_idx: usize| {
+            if fixed_size > 0 {
+                write_non_null(child, start_idx, end_idx)
+            } else {
+                write_empty(child, start_idx, end_idx)
+            }
+        };
 
         match nulls {
             Some(nulls) => {
@@ -481,12 +478,7 @@ impl LevelInfoBuilder {
     }
 
     /// Write a primitive array, as defined by [`is_leaf`]
-    fn write_leaf(&mut self, array: &dyn Array, range: Range<usize>) {
-        let info = match self {
-            Self::Primitive(info) => info,
-            _ => unreachable!(),
-        };
-
+    fn write_leaf(info: &mut ArrayLevels, range: Range<usize>) {
         let len = range.end - range.start;
 
         match &mut info.def_levels {
@@ -494,7 +486,7 @@ impl LevelInfoBuilder {
                 def_levels.reserve(len);
                 info.non_null_indices.reserve(len);
 
-                match array.logical_nulls() {
+                match info.array.logical_nulls() {
                     Some(nulls) => {
                         // TODO: Faster bitmask iteration (#1757)
                         for i in range {
@@ -523,11 +515,13 @@ impl LevelInfoBuilder {
     }
 
     /// Visits all children of this node in depth first order
-    fn visit_leaves(&mut self, visit: impl Fn(&mut LevelInfo) + Copy) {
+    fn visit_leaves(&mut self, visit: impl Fn(&mut ArrayLevels) + Copy) {
         match self {
             LevelInfoBuilder::Primitive(info) => visit(info),
-            LevelInfoBuilder::List(c, _) => c.visit_leaves(visit),
-            LevelInfoBuilder::Struct(children, _) => {
+            LevelInfoBuilder::List(c, _, _, _)
+            | LevelInfoBuilder::LargeList(c, _, _, _)
+            | LevelInfoBuilder::FixedSizeList(c, _, _, _) => c.visit_leaves(visit),
+            LevelInfoBuilder::Struct(children, _, _) => {
                 for c in children {
                     c.visit_leaves(visit)
                 }
@@ -537,8 +531,8 @@ impl LevelInfoBuilder {
 }
 /// The data necessary to write a primitive Arrow array to parquet, taking into account
 /// any non-primitive parents it may have in the arrow representation
-#[derive(Debug, Eq, PartialEq, Clone)]
-pub(crate) struct LevelInfo {
+#[derive(Debug, Clone)]
+pub(crate) struct ArrayLevels {
     /// Array's definition levels
     ///
     /// Present if `max_def_level != 0`
@@ -558,10 +552,25 @@ pub(crate) struct LevelInfo {
 
     /// The maximum repetition for this leaf column
     max_rep_level: i16,
+
+    /// The arrow array
+    array: ArrayRef,
 }
 
-impl LevelInfo {
-    fn new(ctx: LevelContext, is_nullable: bool) -> Self {
+impl PartialEq for ArrayLevels {
+    fn eq(&self, other: &Self) -> bool {
+        self.def_levels == other.def_levels
+            && self.rep_levels == other.rep_levels
+            && self.non_null_indices == other.non_null_indices
+            && self.max_def_level == other.max_def_level
+            && self.max_rep_level == other.max_rep_level
+            && self.array.as_ref() == other.array.as_ref()
+    }
+}
+impl Eq for ArrayLevels {}
+
+impl ArrayLevels {
+    fn new(ctx: LevelContext, is_nullable: bool, array: ArrayRef) -> Self {
         let max_rep_level = ctx.rep_level;
         let max_def_level = match is_nullable {
             true => ctx.def_level + 1,
@@ -574,9 +583,14 @@ impl LevelInfo {
             non_null_indices: vec![],
             max_def_level,
             max_rep_level,
+            array,
         }
     }
 
+    pub fn array(&self) -> &ArrayRef {
+        &self.array
+    }
+
     pub fn def_levels(&self) -> Option<&[i16]> {
         self.def_levels.as_deref()
     }
@@ -597,6 +611,7 @@ mod tests {
     use std::sync::Arc;
 
     use arrow_array::builder::*;
+    use arrow_array::cast::AsArray;
     use arrow_array::types::Int32Type;
     use arrow_array::*;
     use arrow_buffer::{Buffer, ToByteSlice};
@@ -622,7 +637,7 @@ mod tests {
         let inner_list = ArrayDataBuilder::new(inner_type)
             .len(4)
             .add_buffer(offsets)
-            .add_child_data(primitives.into_data())
+            .add_child_data(primitives.to_data())
             .build()
             .unwrap();
 
@@ -638,12 +653,13 @@ mod tests {
         let levels = calculate_array_levels(&outer_list, &outer_field).unwrap();
         assert_eq!(levels.len(), 1);
 
-        let expected = LevelInfo {
+        let expected = ArrayLevels {
             def_levels: Some(vec![2; 10]),
             rep_levels: Some(vec![0, 2, 2, 1, 2, 2, 2, 0, 1, 2]),
             non_null_indices: vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
             max_def_level: 2,
             max_rep_level: 2,
+            array: Arc::new(primitives),
         };
         assert_eq!(&levels[0], &expected);
     }
@@ -657,12 +673,13 @@ mod tests {
         let levels = calculate_array_levels(&array, &field).unwrap();
         assert_eq!(levels.len(), 1);
 
-        let expected_levels = LevelInfo {
+        let expected_levels = ArrayLevels {
             def_levels: None,
             rep_levels: None,
             non_null_indices: (0..10).collect(),
             max_def_level: 0,
             max_rep_level: 0,
+            array,
         };
         assert_eq!(&levels[0], &expected_levels);
     }
@@ -682,12 +699,13 @@ mod tests {
         let levels = calculate_array_levels(&array, &field).unwrap();
         assert_eq!(levels.len(), 1);
 
-        let expected_levels = LevelInfo {
+        let expected_levels = ArrayLevels {
             def_levels: Some(vec![1, 0, 1, 1, 0]),
             rep_levels: None,
             non_null_indices: vec![0, 2, 3],
             max_def_level: 1,
             max_rep_level: 0,
+            array,
         };
         assert_eq!(&levels[0], &expected_levels);
     }
@@ -706,7 +724,7 @@ mod tests {
         let list = ArrayDataBuilder::new(list_type.clone())
             .len(5)
             .add_buffer(offsets)
-            .add_child_data(leaf_array.into_data())
+            .add_child_data(leaf_array.to_data())
             .build()
             .unwrap();
         let list = make_array(list);
@@ -715,12 +733,13 @@ mod tests {
         let levels = calculate_array_levels(&list, &list_field).unwrap();
         assert_eq!(levels.len(), 1);
 
-        let expected_levels = LevelInfo {
+        let expected_levels = ArrayLevels {
             def_levels: Some(vec![1; 5]),
             rep_levels: Some(vec![0; 5]),
             non_null_indices: (0..5).collect(),
             max_def_level: 1,
             max_rep_level: 1,
+            array: Arc::new(leaf_array),
         };
         assert_eq!(&levels[0], &expected_levels);
 
@@ -737,7 +756,7 @@ mod tests {
         let list = ArrayDataBuilder::new(list_type.clone())
             .len(5)
             .add_buffer(offsets)
-            .add_child_data(leaf_array.into_data())
+            .add_child_data(leaf_array.to_data())
             .null_bit_buffer(Some(Buffer::from([0b00011101])))
             .build()
             .unwrap();
@@ -747,12 +766,13 @@ mod tests {
         let levels = calculate_array_levels(&list, &list_field).unwrap();
         assert_eq!(levels.len(), 1);
 
-        let expected_levels = LevelInfo {
+        let expected_levels = ArrayLevels {
             def_levels: Some(vec![2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2]),
             rep_levels: Some(vec![0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1]),
             non_null_indices: (0..11).collect(),
             max_def_level: 2,
             max_rep_level: 1,
+            array: Arc::new(leaf_array),
         };
         assert_eq!(&levels[0], &expected_levels);
     }
@@ -778,7 +798,7 @@ mod tests {
         let list_type = DataType::List(Arc::new(leaf_field));
         let list = ArrayData::builder(list_type.clone())
             .len(5)
-            .add_child_data(leaf.into_data())
+            .add_child_data(leaf.to_data())
             .add_buffer(Buffer::from_iter([0_i32, 2, 2, 4, 8, 11]))
             .build()
             .unwrap();
@@ -795,12 +815,13 @@ mod tests {
         let levels = calculate_array_levels(&array, &struct_field).unwrap();
         assert_eq!(levels.len(), 1);
 
-        let expected_levels = LevelInfo {
+        let expected_levels = ArrayLevels {
             def_levels: Some(vec![0, 2, 0, 3, 3, 3, 3, 3, 3, 3]),
             rep_levels: Some(vec![0, 0, 0, 0, 1, 1, 1, 0, 1, 1]),
             non_null_indices: (4..11).collect(),
             max_def_level: 3,
             max_rep_level: 1,
+            array: Arc::new(leaf),
         };
 
         assert_eq!(&levels[0], &expected_levels);
@@ -820,7 +841,7 @@ mod tests {
         let offsets = Buffer::from_iter([0_i32, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22]);
         let l1 = ArrayData::builder(l1_type.clone())
             .len(11)
-            .add_child_data(leaf.into_data())
+            .add_child_data(leaf.to_data())
             .add_buffer(offsets)
             .build()
             .unwrap();
@@ -840,7 +861,7 @@ mod tests {
         let levels = calculate_array_levels(&l2, &l2_field).unwrap();
         assert_eq!(levels.len(), 1);
 
-        let expected_levels = LevelInfo {
+        let expected_levels = ArrayLevels {
             def_levels: Some(vec![
                 5, 5, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
             ]),
@@ -850,6 +871,7 @@ mod tests {
             non_null_indices: (0..22).collect(),
             max_def_level: 5,
             max_rep_level: 2,
+            array: Arc::new(leaf),
         };
 
         assert_eq!(&levels[0], &expected_levels);
@@ -871,7 +893,7 @@ mod tests {
         let list = ArrayData::builder(list_type.clone())
             .len(4)
             .add_buffer(Buffer::from_iter(0_i32..5))
-            .add_child_data(leaf.into_data())
+            .add_child_data(leaf.to_data())
             .build()
             .unwrap();
         let list = make_array(list);
@@ -880,12 +902,13 @@ mod tests {
         let levels = calculate_array_levels(&list, &list_field).unwrap();
         assert_eq!(levels.len(), 1);
 
-        let expected_levels = LevelInfo {
+        let expected_levels = ArrayLevels {
             def_levels: Some(vec![1; 4]),
             rep_levels: Some(vec![0; 4]),
             non_null_indices: (0..4).collect(),
             max_def_level: 1,
             max_rep_level: 1,
+            array: Arc::new(leaf),
         };
         assert_eq!(&levels[0], &expected_levels);
 
@@ -898,7 +921,7 @@ mod tests {
             .len(4)
             .add_buffer(Buffer::from_iter([0_i32, 0, 3, 5, 7]))
             .null_bit_buffer(Some(Buffer::from([0b00001110])))
-            .add_child_data(leaf.into_data())
+            .add_child_data(leaf.to_data())
             .build()
             .unwrap();
         let list = make_array(list);
@@ -911,12 +934,13 @@ mod tests {
         let levels = calculate_array_levels(&array, &struct_field).unwrap();
         assert_eq!(levels.len(), 1);
 
-        let expected_levels = LevelInfo {
+        let expected_levels = ArrayLevels {
             def_levels: Some(vec![1, 3, 3, 3, 3, 3, 3, 3]),
             rep_levels: Some(vec![0, 0, 1, 1, 0, 1, 0, 1]),
             non_null_indices: (0..7).collect(),
             max_def_level: 3,
             max_rep_level: 1,
+            array: Arc::new(leaf),
         };
         assert_eq!(&levels[0], &expected_levels);
 
@@ -933,7 +957,7 @@ mod tests {
         let list_1 = ArrayData::builder(list_1_type.clone())
             .len(7)
             .add_buffer(Buffer::from_iter([0_i32, 1, 3, 3, 6, 10, 10, 15]))
-            .add_child_data(leaf.into_data())
+            .add_child_data(leaf.to_data())
             .build()
             .unwrap();
 
@@ -958,12 +982,13 @@ mod tests {
         let levels = calculate_array_levels(&array, &struct_field).unwrap();
         assert_eq!(levels.len(), 1);
 
-        let expected_levels = LevelInfo {
+        let expected_levels = ArrayLevels {
             def_levels: Some(vec![1, 5, 5, 5, 4, 5, 5, 5, 5, 5, 5, 5, 4, 5, 5, 5, 5, 5]),
             rep_levels: Some(vec![0, 0, 1, 2, 1, 0, 2, 2, 1, 2, 2, 2, 0, 1, 2, 2, 2, 2]),
             non_null_indices: (0..15).collect(),
             max_def_level: 5,
             max_rep_level: 2,
+            array: Arc::new(leaf),
         };
         assert_eq!(&levels[0], &expected_levels);
     }
@@ -980,11 +1005,9 @@ mod tests {
         //  - {a: {b: {c: 6}}}
 
         let c = Int32Array::from_iter([Some(1), None, Some(3), None, Some(5), Some(6)]);
+        let leaf = Arc::new(c) as ArrayRef;
         let c_field = Arc::new(Field::new("c", DataType::Int32, true));
-        let b = StructArray::from((
-            (vec![(c_field, Arc::new(c) as ArrayRef)]),
-            Buffer::from([0b00110111]),
-        ));
+        let b = StructArray::from(((vec![(c_field, leaf.clone())]), Buffer::from([0b00110111])));
 
         let b_field = Arc::new(Field::new("b", b.data_type().clone(), true));
         let a = StructArray::from((
@@ -998,12 +1021,13 @@ mod tests {
         let levels = calculate_array_levels(&a_array, &a_field).unwrap();
         assert_eq!(levels.len(), 1);
 
-        let expected_levels = LevelInfo {
+        let expected_levels = ArrayLevels {
             def_levels: Some(vec![3, 2, 3, 1, 0, 3]),
             rep_levels: None,
             non_null_indices: vec![0, 2, 5],
             max_def_level: 3,
             max_rep_level: 0,
+            array: leaf,
         };
         assert_eq!(&levels[0], &expected_levels);
     }
@@ -1014,13 +1038,12 @@ mod tests {
 
         let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
         let a_value_offsets = arrow::buffer::Buffer::from_iter([0_i32, 1, 3, 3, 6, 10]);
-        let a_list_type =
-            DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
+        let a_list_type = DataType::List(Arc::new(Field::new("item", DataType::Int32, true)));
         let a_list_data = ArrayData::builder(a_list_type.clone())
             .len(5)
             .add_buffer(a_value_offsets)
             .null_bit_buffer(Some(Buffer::from(vec![0b00011011])))
-            .add_child_data(a_values.into_data())
+            .add_child_data(a_values.to_data())
             .build()
             .unwrap();
 
@@ -1029,21 +1052,21 @@ mod tests {
         let a = ListArray::from(a_list_data);
 
         let item_field = Field::new("item", a_list_type, true);
-        let mut builder =
-            LevelInfoBuilder::try_new(&item_field, Default::default()).unwrap();
-        builder.write(&a, 2..4);
+        let mut builder = levels(&item_field, a);
+        builder.write(2..4);
         let levels = builder.finish();
 
         assert_eq!(levels.len(), 1);
 
         let list_level = levels.get(0).unwrap();
 
-        let expected_level = LevelInfo {
+        let expected_level = ArrayLevels {
             def_levels: Some(vec![0, 3, 3, 3]),
             rep_levels: Some(vec![0, 0, 1, 1]),
             non_null_indices: vec![3, 4, 5],
             max_def_level: 3,
             max_rep_level: 1,
+            array: Arc::new(a_values),
         };
         assert_eq!(list_level, &expected_level);
     }
@@ -1070,9 +1093,7 @@ mod tests {
             Field::new("b", DataType::Int32, true),
             Field::new(
                 "c",
-                DataType::Struct(
-                    vec![struct_field_d.clone(), struct_field_e.clone()].into(),
-                ),
+                DataType::Struct(vec![struct_field_d.clone(), struct_field_e.clone()].into()),
                 true, // https://github.com/apache/arrow-rs/issues/245
             ),
         ]);
@@ -1087,8 +1108,7 @@ mod tests {
 
         // Construct a buffer for value offsets, for the nested array:
         //  [[1], [2, 3], null, [4, 5, 6], [7, 8, 9, 10]]
-        let g_value_offsets =
-            arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
+        let g_value_offsets = arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
 
         // Construct a list array from the above two
         let g_list_data = ArrayData::builder(struct_field_g.data_type().clone())
@@ -1100,19 +1120,19 @@ mod tests {
         let g = ListArray::from(g_list_data);
 
         let e = StructArray::from(vec![
-            (struct_field_f, Arc::new(f) as ArrayRef),
+            (struct_field_f, Arc::new(f.clone()) as ArrayRef),
             (struct_field_g, Arc::new(g) as ArrayRef),
         ]);
 
         let c = StructArray::from(vec![
-            (struct_field_d, Arc::new(d) as ArrayRef),
+            (struct_field_d, Arc::new(d.clone()) as ArrayRef),
             (struct_field_e, Arc::new(e) as ArrayRef),
         ]);
 
         // build a record batch
         let batch = RecordBatch::try_new(
             Arc::new(schema),
-            vec![Arc::new(a), Arc::new(b), Arc::new(c)],
+            vec![Arc::new(a.clone()), Arc::new(b.clone()), Arc::new(c)],
         )
         .unwrap();
 
@@ -1132,48 +1152,52 @@ mod tests {
         // test "a" levels
         let list_level = levels.get(0).unwrap();
 
-        let expected_level = LevelInfo {
+        let expected_level = ArrayLevels {
             def_levels: None,
             rep_levels: None,
             non_null_indices: vec![0, 1, 2, 3, 4],
             max_def_level: 0,
             max_rep_level: 0,
+            array: Arc::new(a),
         };
         assert_eq!(list_level, &expected_level);
 
         // test "b" levels
         let list_level = levels.get(1).unwrap();
 
-        let expected_level = LevelInfo {
+        let expected_level = ArrayLevels {
             def_levels: Some(vec![1, 0, 0, 1, 1]),
             rep_levels: None,
             non_null_indices: vec![0, 3, 4],
             max_def_level: 1,
             max_rep_level: 0,
+            array: Arc::new(b),
         };
         assert_eq!(list_level, &expected_level);
 
         // test "d" levels
         let list_level = levels.get(2).unwrap();
 
-        let expected_level = LevelInfo {
+        let expected_level = ArrayLevels {
             def_levels: Some(vec![1, 1, 1, 2, 1]),
             rep_levels: None,
             non_null_indices: vec![3],
             max_def_level: 2,
             max_rep_level: 0,
+            array: Arc::new(d),
         };
         assert_eq!(list_level, &expected_level);
 
         // test "f" levels
         let list_level = levels.get(3).unwrap();
 
-        let expected_level = LevelInfo {
+        let expected_level = ArrayLevels {
             def_levels: Some(vec![3, 2, 3, 2, 3]),
             rep_levels: None,
             non_null_indices: vec![0, 2, 4],
             max_def_level: 3,
             max_rep_level: 0,
+            array: Arc::new(f),
         };
         assert_eq!(list_level, &expected_level);
     }
@@ -1196,8 +1220,7 @@ mod tests {
 
         // build a record batch
         let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(some_nested_object)])
-                .unwrap();
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(some_nested_object)]).unwrap();
 
         let struct_null_level =
             calculate_array_levels(batch.column(0), batch.schema().field(0)).unwrap();
@@ -1219,8 +1242,7 @@ mod tests {
 
         // build a record batch
         let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(some_nested_object)])
-                .unwrap();
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(some_nested_object)]).unwrap();
 
         let struct_non_null_level =
             calculate_array_levels(batch.column(0), batch.schema().field(0)).unwrap();
@@ -1270,27 +1292,31 @@ mod tests {
             });
         assert_eq!(levels.len(), 2);
 
+        let map = batch.column(0).as_map();
+
         // test key levels
         let list_level = levels.get(0).unwrap();
 
-        let expected_level = LevelInfo {
+        let expected_level = ArrayLevels {
             def_levels: Some(vec![1; 7]),
             rep_levels: Some(vec![0, 1, 0, 1, 0, 1, 1]),
             non_null_indices: vec![0, 1, 2, 3, 4, 5, 6],
             max_def_level: 1,
             max_rep_level: 1,
+            array: map.keys().clone(),
         };
         assert_eq!(list_level, &expected_level);
 
         // test values levels
         let list_level = levels.get(1).unwrap();
 
-        let expected_level = LevelInfo {
+        let expected_level = ArrayLevels {
             def_levels: Some(vec![2, 2, 2, 1, 2, 1, 2]),
             rep_levels: Some(vec![0, 1, 0, 1, 0, 1, 1]),
             non_null_indices: vec![0, 1, 2, 4, 6],
             max_def_level: 2,
             max_rep_level: 1,
+            array: map.values().clone(),
         };
         assert_eq!(list_level, &expected_level);
     }
@@ -1358,7 +1384,8 @@ mod tests {
 
         let array = Arc::new(list_builder.finish());
 
-        let values_len = array.values().len();
+        let values = array.values().as_struct().column(0).clone();
+        let values_len = values.len();
         assert_eq!(values_len, 5);
 
         let schema = Arc::new(Schema::new(vec![list_field]));
@@ -1368,12 +1395,13 @@ mod tests {
         let levels = calculate_array_levels(rb.column(0), rb.schema().field(0)).unwrap();
         let list_level = &levels[0];
 
-        let expected_level = LevelInfo {
+        let expected_level = ArrayLevels {
             def_levels: Some(vec![4, 1, 0, 2, 2, 3, 4]),
             rep_levels: Some(vec![0, 0, 0, 0, 1, 0, 0]),
             non_null_indices: vec![0, 4],
             max_def_level: 4,
             max_rep_level: 1,
+            array: values,
         };
 
         assert_eq!(list_level, &expected_level);
@@ -1391,6 +1419,7 @@ mod tests {
             None, // Masked by struct array
             None,
         ]);
+        let values = inner.values().clone();
 
         // This test assumes that nulls don't take up space
         assert_eq!(inner.values().len(), 7);
@@ -1406,12 +1435,13 @@ mod tests {
 
         assert_eq!(levels.len(), 1);
 
-        let expected_level = LevelInfo {
+        let expected_level = ArrayLevels {
             def_levels: Some(vec![4, 4, 3, 2, 0, 4, 4, 0, 1]),
             rep_levels: Some(vec![0, 1, 0, 0, 0, 0, 1, 0, 0]),
             non_null_indices: vec![0, 1, 5, 6],
             max_def_level: 4,
             max_rep_level: 1,
+            array: values,
         };
 
         assert_eq!(&levels[0], &expected_level);
@@ -1422,14 +1452,16 @@ mod tests {
         // Test the null mask of a struct array and the null mask of a list array
         // masking out non-null elements of their children
 
-        let a1 = Arc::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
+        let a1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
             Some(vec![None]), // Masked by list array
             Some(vec![]),     // Masked by list array
             Some(vec![Some(3), None]),
             Some(vec![Some(4), Some(5), None, Some(6)]), // Masked by struct array
             None,
             None,
-        ])) as ArrayRef;
+        ]);
+        let a1_values = a1.values().clone();
+        let a1 = Arc::new(a1) as ArrayRef;
 
         let a2 = Arc::new(Int32Array::from_iter(vec![
             Some(1), // Masked by list array
@@ -1439,14 +1471,15 @@ mod tests {
             Some(5),
             None,
         ])) as ArrayRef;
+        let a2_values = a2.clone();
 
         let field_a1 = Arc::new(Field::new("list", a1.data_type().clone(), true));
         let field_a2 = Arc::new(Field::new("integers", a2.data_type().clone(), true));
 
         let nulls = Buffer::from([0b00110111]);
-        let struct_a = Arc::new(
-            StructArray::try_from((vec![(field_a1, a1), (field_a2, a2)], nulls)).unwrap(),
-        ) as ArrayRef;
+        let struct_a =
+            Arc::new(StructArray::try_from((vec![(field_a1, a1), (field_a2, a2)], nulls)).unwrap())
+                as ArrayRef;
 
         let offsets = Buffer::from_iter([0_i32, 0, 2, 2, 3, 5, 5]);
         let nulls = Buffer::from([0b00111100]);
@@ -1486,22 +1519,24 @@ mod tests {
 
         assert_eq!(levels.len(), 2);
 
-        let expected_level = LevelInfo {
+        let expected_level = ArrayLevels {
             def_levels: Some(vec![0, 0, 1, 6, 5, 2, 3, 1]),
             rep_levels: Some(vec![0, 0, 0, 0, 2, 0, 1, 0]),
             non_null_indices: vec![1],
             max_def_level: 6,
             max_rep_level: 2,
+            array: a1_values,
         };
 
         assert_eq!(&levels[0], &expected_level);
 
-        let expected_level = LevelInfo {
+        let expected_level = ArrayLevels {
             def_levels: Some(vec![0, 0, 1, 3, 2, 4, 1]),
             rep_levels: Some(vec![0, 0, 0, 0, 0, 1, 0]),
             non_null_indices: vec![4],
             max_def_level: 4,
             max_rep_level: 1,
+            array: a2_values,
         };
 
         assert_eq!(&levels[1], &expected_level);
@@ -1522,23 +1557,24 @@ mod tests {
         builder.values().append_slice(&[9, 10]);
         builder.append(false);
         let a = builder.finish();
+        let values = a.values().clone();
 
         let item_field = Field::new("item", a.data_type().clone(), true);
-        let mut builder =
-            LevelInfoBuilder::try_new(&item_field, Default::default()).unwrap();
-        builder.write(&a, 1..4);
+        let mut builder = levels(&item_field, a);
+        builder.write(1..4);
         let levels = builder.finish();
 
         assert_eq!(levels.len(), 1);
 
         let list_level = levels.get(0).unwrap();
 
-        let expected_level = LevelInfo {
+        let expected_level = ArrayLevels {
             def_levels: Some(vec![0, 0, 3, 3]),
             rep_levels: Some(vec![0, 0, 0, 1]),
             non_null_indices: vec![6, 7],
             max_def_level: 3,
             max_rep_level: 1,
+            array: values,
         };
         assert_eq!(list_level, &expected_level);
     }
@@ -1670,6 +1706,10 @@ mod tests {
         assert_eq!(array.values().len(), 8);
         assert_eq!(array.len(), 4);
 
+        let struct_values = array.values().as_struct();
+        let values_a = struct_values.column(0).clone();
+        let values_b = struct_values.column(1).clone();
+
         let schema = Arc::new(Schema::new(vec![list_field]));
         let rb = RecordBatch::try_new(schema, vec![array]).unwrap();
 
@@ -1678,20 +1718,22 @@ mod tests {
         let b_levels = &levels[1];
 
         // [[{a: 1}, null], null, [null, null], [{a: null}, {a: 2}]]
-        let expected_a = LevelInfo {
+        let expected_a = ArrayLevels {
             def_levels: Some(vec![4, 2, 0, 2, 2, 3, 4]),
             rep_levels: Some(vec![0, 1, 0, 0, 1, 0, 1]),
             non_null_indices: vec![0, 7],
             max_def_level: 4,
             max_rep_level: 1,
+            array: values_a,
         };
         // [[{b: 2}, null], null, [null, null], [{b: 3}, {b: 4}]]
-        let expected_b = LevelInfo {
+        let expected_b = ArrayLevels {
             def_levels: Some(vec![3, 2, 0, 2, 2, 3, 3]),
             rep_levels: Some(vec![0, 1, 0, 0, 1, 0, 1]),
             non_null_indices: vec![0, 6, 7],
             max_def_level: 3,
             max_rep_level: 1,
+            array: values_b,
         };
 
         assert_eq!(a_levels, &expected_a);
@@ -1704,24 +1746,25 @@ mod tests {
         builder.append(true);
         builder.append(false);
         builder.append(true);
-        let a = builder.finish();
+        let array = builder.finish();
+        let values = array.values().clone();
 
-        let item_field = Field::new("item", a.data_type().clone(), true);
-        let mut builder =
-            LevelInfoBuilder::try_new(&item_field, Default::default()).unwrap();
-        builder.write(&a, 0..3);
+        let item_field = Field::new("item", array.data_type().clone(), true);
+        let mut builder = levels(&item_field, array);
+        builder.write(0..3);
         let levels = builder.finish();
 
         assert_eq!(levels.len(), 1);
 
         let list_level = levels.get(0).unwrap();
 
-        let expected_level = LevelInfo {
+        let expected_level = ArrayLevels {
             def_levels: Some(vec![1, 0, 1]),
             rep_levels: Some(vec![0, 0, 0]),
             non_null_indices: vec![],
             max_def_level: 3,
             max_rep_level: 1,
+            array: values,
         };
         assert_eq!(list_level, &expected_level);
     }
@@ -1729,8 +1772,7 @@ mod tests {
     #[test]
     fn test_fixed_size_list_of_var_lists() {
         // [[[1, null, 3], null], [[4], []], [[5, 6], [null, null]], null]
-        let mut builder =
-            FixedSizeListBuilder::new(ListBuilder::new(Int32Builder::new()), 2);
+        let mut builder = FixedSizeListBuilder::new(ListBuilder::new(Int32Builder::new()), 2);
         builder.values().append_value([Some(1), None, Some(3)]);
         builder.values().append_null();
         builder.append(true);
@@ -1744,19 +1786,20 @@ mod tests {
         builder.values().append_null();
         builder.append(false);
         let a = builder.finish();
+        let values = a.values().as_list::<i32>().values().clone();
 
         let item_field = Field::new("item", a.data_type().clone(), true);
-        let mut builder =
-            LevelInfoBuilder::try_new(&item_field, Default::default()).unwrap();
-        builder.write(&a, 0..4);
+        let mut builder = levels(&item_field, a);
+        builder.write(0..4);
         let levels = builder.finish();
 
-        let expected_level = LevelInfo {
+        let expected_level = ArrayLevels {
             def_levels: Some(vec![5, 4, 5, 2, 5, 3, 5, 5, 4, 4, 0]),
             rep_levels: Some(vec![0, 2, 2, 1, 0, 1, 0, 2, 1, 2, 0]),
             non_null_indices: vec![0, 2, 3, 4, 5],
             max_def_level: 5,
             max_rep_level: 2,
+            array: values,
         };
 
         assert_eq!(levels[0], expected_level);
@@ -1777,17 +1820,22 @@ mod tests {
 
         let item_field = Field::new("item", dict.data_type().clone(), true);
 
-        let mut builder =
-            LevelInfoBuilder::try_new(&item_field, Default::default()).unwrap();
-        builder.write(&dict, 0..4);
+        let mut builder = levels(&item_field, dict.clone());
+        builder.write(0..4);
         let levels = builder.finish();
-        let expected_level = LevelInfo {
+        let expected_level = ArrayLevels {
             def_levels: Some(vec![0, 0, 1, 1]),
             rep_levels: None,
             non_null_indices: vec![2, 3],
             max_def_level: 1,
             max_rep_level: 0,
+            array: Arc::new(dict),
         };
         assert_eq!(levels[0], expected_level);
     }
+
+    fn levels<T: Array + 'static>(field: &Field, array: T) -> LevelInfoBuilder {
+        let v = Arc::new(array) as ArrayRef;
+        LevelInfoBuilder::try_new(field, Default::default(), &v).unwrap()
+    }
 }
diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs
index 5417ebe894a3..ea7b1eee99b8 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -18,22 +18,20 @@
 //! Contains writer which writes arrow data into parquet data.
 
 use bytes::Bytes;
-use std::fmt::Debug;
 use std::io::{Read, Write};
 use std::iter::Peekable;
 use std::slice::Iter;
 use std::sync::{Arc, Mutex};
 use std::vec::IntoIter;
-use thrift::protocol::{TCompactOutputProtocol, TSerializable};
+use thrift::protocol::TCompactOutputProtocol;
 
 use arrow_array::cast::AsArray;
 use arrow_array::types::*;
-use arrow_array::{Array, FixedSizeListArray, RecordBatch, RecordBatchWriter};
-use arrow_schema::{ArrowError, DataType as ArrowDataType, IntervalUnit, SchemaRef};
+use arrow_array::{ArrayRef, RecordBatch, RecordBatchWriter};
+use arrow_schema::{ArrowError, DataType as ArrowDataType, Field, IntervalUnit, SchemaRef};
 
 use super::schema::{
-    add_encoded_arrow_schema_to_metadata, arrow_to_parquet_schema,
-    decimal_length_from_precision,
+    add_encoded_arrow_schema_to_metadata, arrow_to_parquet_schema, decimal_length_from_precision,
 };
 
 use crate::arrow::arrow_writer::byte_array::ByteArrayEncoder;
@@ -47,14 +45,15 @@ use crate::errors::{ParquetError, Result};
 use crate::file::metadata::{ColumnChunkMetaData, KeyValue, RowGroupMetaDataPtr};
 use crate::file::properties::{WriterProperties, WriterPropertiesPtr};
 use crate::file::reader::{ChunkReader, Length};
-use crate::file::writer::SerializedFileWriter;
+use crate::file::writer::{SerializedFileWriter, SerializedRowGroupWriter};
 use crate::schema::types::{ColumnDescPtr, SchemaDescriptor};
-use levels::{calculate_array_levels, LevelInfo};
+use crate::thrift::TSerializable;
+use levels::{calculate_array_levels, ArrayLevels};
 
 mod byte_array;
 mod levels;
 
-/// Arrow writer
+/// Encodes [`RecordBatch`] to parquet
 ///
 /// Writes Arrow `RecordBatch`es to a Parquet writer. Multiple [`RecordBatch`] will be encoded
 /// to the same row group, up to `max_row_group_size` rows. Any remaining rows will be
@@ -97,7 +96,7 @@ pub struct ArrowWriter<W: Write> {
     max_row_group_size: usize,
 }
 
-impl<W: Write + Send> Debug for ArrowWriter<W> {
+impl<W: Write + Send> std::fmt::Debug for ArrowWriter<W> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         let buffered_memory = self.in_progress_size();
         f.debug_struct("ArrowWriter")
@@ -150,7 +149,7 @@ impl<W: Write + Send> ArrowWriter<W> {
             Some(in_progress) => in_progress
                 .writers
                 .iter()
-                .map(|(_, x)| x.get_estimated_total_bytes() as usize)
+                .map(|x| x.get_estimated_total_bytes())
                 .sum(),
             None => 0,
         }
@@ -208,8 +207,8 @@ impl<W: Write + Send> ArrowWriter<W> {
         };
 
         let mut row_group_writer = self.writer.next_row_group()?;
-        for (chunk, close) in in_progress.close()? {
-            row_group_writer.append_column(&chunk, close)?;
+        for chunk in in_progress.close()? {
+            chunk.append_to_row_group(&mut row_group_writer)?;
         }
         row_group_writer.close()?;
         Ok(())
@@ -246,25 +245,27 @@ impl<W: Write + Send> RecordBatchWriter for ArrowWriter<W> {
     }
 }
 
-/// A list of [`Bytes`] comprising a single column chunk
+/// A single column chunk produced by [`ArrowColumnWriter`]
 #[derive(Default)]
-struct ArrowColumnChunk {
+struct ArrowColumnChunkData {
     length: usize,
     data: Vec<Bytes>,
 }
 
-impl Length for ArrowColumnChunk {
+impl Length for ArrowColumnChunkData {
     fn len(&self) -> u64 {
         self.length as _
     }
 }
 
-impl ChunkReader for ArrowColumnChunk {
-    type T = ChainReader;
+impl ChunkReader for ArrowColumnChunkData {
+    type T = ArrowColumnChunkReader;
 
     fn get_read(&self, start: u64) -> Result<Self::T> {
         assert_eq!(start, 0); // Assume append_column writes all data in one-shot
-        Ok(ChainReader(self.data.clone().into_iter().peekable()))
+        Ok(ArrowColumnChunkReader(
+            self.data.clone().into_iter().peekable(),
+        ))
     }
 
     fn get_bytes(&self, _start: u64, _length: usize) -> Result<Bytes> {
@@ -272,10 +273,10 @@ impl ChunkReader for ArrowColumnChunk {
     }
 }
 
-/// A [`Read`] for an iterator of [`Bytes`]
-struct ChainReader(Peekable<IntoIter<Bytes>>);
+/// A [`Read`] for [`ArrowColumnChunkData`]
+struct ArrowColumnChunkReader(Peekable<IntoIter<Bytes>>);
 
-impl Read for ChainReader {
+impl Read for ArrowColumnChunkReader {
     fn read(&mut self, out: &mut [u8]) -> std::io::Result<usize> {
         let buffer = loop {
             match self.0.peek_mut() {
@@ -295,11 +296,11 @@ impl Read for ChainReader {
     }
 }
 
-/// A shared [`ArrowColumnChunk`]
+/// A shared [`ArrowColumnChunkData`]
 ///
 /// This allows it to be owned by [`ArrowPageWriter`] whilst allowing access via
 /// [`ArrowRowGroupWriter`] on flush, without requiring self-referential borrows
-type SharedColumnChunk = Arc<Mutex<ArrowColumnChunk>>;
+type SharedColumnChunk = Arc<Mutex<ArrowColumnChunkData>>;
 
 #[derive(Default)]
 struct ArrowPageWriter {
@@ -330,7 +331,7 @@ impl PageWriter for ArrowPageWriter {
 
         buf.length += compressed_size;
         buf.data.push(header);
-        buf.data.push(data.into());
+        buf.data.push(data);
 
         Ok(spec)
     }
@@ -345,25 +346,169 @@ impl PageWriter for ArrowPageWriter {
     }
 }
 
-/// Encodes a leaf column to [`ArrowPageWriter`]
-enum ArrowColumnWriter {
+/// A leaf column that can be encoded by [`ArrowColumnWriter`]
+#[derive(Debug)]
+pub struct ArrowLeafColumn(ArrayLevels);
+
+/// Computes the [`ArrowLeafColumn`] for a potentially nested [`ArrayRef`]
+pub fn compute_leaves(field: &Field, array: &ArrayRef) -> Result<Vec<ArrowLeafColumn>> {
+    let levels = calculate_array_levels(array, field)?;
+    Ok(levels.into_iter().map(ArrowLeafColumn).collect())
+}
+
+/// The data for a single column chunk, see [`ArrowColumnWriter`]
+pub struct ArrowColumnChunk {
+    data: ArrowColumnChunkData,
+    close: ColumnCloseResult,
+}
+
+impl std::fmt::Debug for ArrowColumnChunk {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("ArrowColumnChunk")
+            .field("length", &self.data.length)
+            .finish_non_exhaustive()
+    }
+}
+
+impl ArrowColumnChunk {
+    /// Calls [`SerializedRowGroupWriter::append_column`] with this column's data
+    pub fn append_to_row_group<W: Write + Send>(
+        self,
+        writer: &mut SerializedRowGroupWriter<'_, W>,
+    ) -> Result<()> {
+        writer.append_column(&self.data, self.close)
+    }
+}
+
+/// Encodes [`ArrowLeafColumn`] to [`ArrowColumnChunk`]
+///
+/// Note: This is a low-level interface for applications that require fine-grained control
+/// of encoding, see [`ArrowWriter`] for a higher-level interface
+///
+/// ```
+/// // The arrow schema
+/// # use std::sync::Arc;
+/// # use arrow_array::*;
+/// # use arrow_schema::*;
+/// # use parquet::arrow::arrow_to_parquet_schema;
+/// # use parquet::arrow::arrow_writer::{ArrowLeafColumn, compute_leaves, get_column_writers};
+/// # use parquet::file::properties::WriterProperties;
+/// # use parquet::file::writer::SerializedFileWriter;
+/// #
+/// let schema = Arc::new(Schema::new(vec![
+///     Field::new("i32", DataType::Int32, false),
+///     Field::new("f32", DataType::Float32, false),
+/// ]));
+///
+/// // Compute the parquet schema
+/// let parquet_schema = arrow_to_parquet_schema(schema.as_ref()).unwrap();
+/// let props = Arc::new(WriterProperties::default());
+///
+/// // Create writers for each of the leaf columns
+/// let col_writers = get_column_writers(&parquet_schema, &props, &schema).unwrap();
+///
+/// // Spawn a worker thread for each column
+/// // This is for demonstration purposes, a thread-pool e.g. rayon or tokio, would be better
+/// let mut workers: Vec<_> = col_writers
+///     .into_iter()
+///     .map(|mut col_writer| {
+///         let (send, recv) = std::sync::mpsc::channel::<ArrowLeafColumn>();
+///         let handle = std::thread::spawn(move || {
+///             for col in recv {
+///                 col_writer.write(&col)?;
+///             }
+///             col_writer.close()
+///         });
+///         (handle, send)
+///     })
+///     .collect();
+///
+/// // Create parquet writer
+/// let root_schema = parquet_schema.root_schema_ptr();
+/// let mut out = Vec::with_capacity(1024); // This could be a File
+/// let mut writer = SerializedFileWriter::new(&mut out, root_schema, props.clone()).unwrap();
+///
+/// // Start row group
+/// let mut row_group = writer.next_row_group().unwrap();
+///
+/// // Columns to encode
+/// let to_write = vec![
+///     Arc::new(Int32Array::from_iter_values([1, 2, 3])) as _,
+///     Arc::new(Float32Array::from_iter_values([1., 45., -1.])) as _,
+/// ];
+///
+/// // Spawn work to encode columns
+/// let mut worker_iter = workers.iter_mut();
+/// for (arr, field) in to_write.iter().zip(&schema.fields) {
+///     for leaves in compute_leaves(field, arr).unwrap() {
+///         worker_iter.next().unwrap().1.send(leaves).unwrap();
+///     }
+/// }
+///
+/// // Finish up parallel column encoding
+/// for (handle, send) in workers {
+///     drop(send); // Drop send side to signal termination
+///     let chunk = handle.join().unwrap().unwrap();
+///     chunk.append_to_row_group(&mut row_group).unwrap();
+/// }
+/// row_group.close().unwrap();
+///
+/// let metadata = writer.close().unwrap();
+/// assert_eq!(metadata.num_rows, 3);
+/// ```
+pub struct ArrowColumnWriter {
+    writer: ArrowColumnWriterImpl,
+    chunk: SharedColumnChunk,
+}
+
+impl std::fmt::Debug for ArrowColumnWriter {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("ArrowColumnWriter").finish_non_exhaustive()
+    }
+}
+
+enum ArrowColumnWriterImpl {
     ByteArray(GenericColumnWriter<'static, ByteArrayEncoder>),
     Column(ColumnWriter<'static>),
 }
 
 impl ArrowColumnWriter {
+    /// Write an [`ArrowLeafColumn`]
+    pub fn write(&mut self, col: &ArrowLeafColumn) -> Result<()> {
+        match &mut self.writer {
+            ArrowColumnWriterImpl::Column(c) => {
+                write_leaf(c, &col.0)?;
+            }
+            ArrowColumnWriterImpl::ByteArray(c) => {
+                write_primitive(c, col.0.array().as_ref(), &col.0)?;
+            }
+        }
+        Ok(())
+    }
+
+    /// Close this column returning the written [`ArrowColumnChunk`]
+    pub fn close(self) -> Result<ArrowColumnChunk> {
+        let close = match self.writer {
+            ArrowColumnWriterImpl::ByteArray(c) => c.close()?,
+            ArrowColumnWriterImpl::Column(c) => c.close()?,
+        };
+        let chunk = Arc::try_unwrap(self.chunk).ok().unwrap();
+        let data = chunk.into_inner().unwrap();
+        Ok(ArrowColumnChunk { data, close })
+    }
+
     /// Returns the estimated total bytes for this column writer
-    fn get_estimated_total_bytes(&self) -> u64 {
-        match self {
-            ArrowColumnWriter::ByteArray(c) => c.get_estimated_total_bytes(),
-            ArrowColumnWriter::Column(c) => c.get_estimated_total_bytes(),
+    pub fn get_estimated_total_bytes(&self) -> usize {
+        match &self.writer {
+            ArrowColumnWriterImpl::ByteArray(c) => c.get_estimated_total_bytes() as _,
+            ArrowColumnWriterImpl::Column(c) => c.get_estimated_total_bytes() as _,
         }
     }
 }
 
 /// Encodes [`RecordBatch`] to a parquet row group
 struct ArrowRowGroupWriter {
-    writers: Vec<(SharedColumnChunk, ArrowColumnWriter)>,
+    writers: Vec<ArrowColumnWriter>,
     schema: SchemaRef,
     buffered_rows: usize,
 }
@@ -374,11 +519,7 @@ impl ArrowRowGroupWriter {
         props: &WriterPropertiesPtr,
         arrow: &SchemaRef,
     ) -> Result<Self> {
-        let mut writers = Vec::with_capacity(arrow.fields.len());
-        let mut leaves = parquet.columns().iter();
-        for field in &arrow.fields {
-            get_arrow_column_writer(field.data_type(), props, &mut leaves, &mut writers)?;
-        }
+        let writers = get_column_writers(parquet, props, arrow)?;
         Ok(Self {
             writers,
             schema: arrow.clone(),
@@ -388,49 +529,62 @@ impl ArrowRowGroupWriter {
 
     fn write(&mut self, batch: &RecordBatch) -> Result<()> {
         self.buffered_rows += batch.num_rows();
-        let mut writers = self.writers.iter_mut().map(|(_, x)| x);
-        for (array, field) in batch.columns().iter().zip(&self.schema.fields) {
-            let mut levels = calculate_array_levels(array, field)?.into_iter();
-            write_leaves(&mut writers, &mut levels, array.as_ref())?;
+        let mut writers = self.writers.iter_mut();
+        for (field, column) in self.schema.fields().iter().zip(batch.columns()) {
+            for leaf in compute_leaves(field.as_ref(), column)? {
+                writers.next().unwrap().write(&leaf)?
+            }
         }
         Ok(())
     }
 
-    fn close(self) -> Result<Vec<(ArrowColumnChunk, ColumnCloseResult)>> {
+    fn close(self) -> Result<Vec<ArrowColumnChunk>> {
         self.writers
             .into_iter()
-            .map(|(chunk, writer)| {
-                let close_result = match writer {
-                    ArrowColumnWriter::ByteArray(c) => c.close()?,
-                    ArrowColumnWriter::Column(c) => c.close()?,
-                };
-
-                let chunk = Arc::try_unwrap(chunk).ok().unwrap().into_inner().unwrap();
-                Ok((chunk, close_result))
-            })
+            .map(|writer| writer.close())
             .collect()
     }
 }
 
-/// Get an [`ArrowColumnWriter`] along with a reference to its [`SharedColumnChunk`]
+/// Returns the [`ArrowColumnWriter`] for a given schema
+pub fn get_column_writers(
+    parquet: &SchemaDescriptor,
+    props: &WriterPropertiesPtr,
+    arrow: &SchemaRef,
+) -> Result<Vec<ArrowColumnWriter>> {
+    let mut writers = Vec::with_capacity(arrow.fields.len());
+    let mut leaves = parquet.columns().iter();
+    for field in &arrow.fields {
+        get_arrow_column_writer(field.data_type(), props, &mut leaves, &mut writers)?;
+    }
+    Ok(writers)
+}
+
+/// Gets the [`ArrowColumnWriter`] for the given `data_type`
 fn get_arrow_column_writer(
     data_type: &ArrowDataType,
     props: &WriterPropertiesPtr,
     leaves: &mut Iter<'_, ColumnDescPtr>,
-    out: &mut Vec<(SharedColumnChunk, ArrowColumnWriter)>,
+    out: &mut Vec<ArrowColumnWriter>,
 ) -> Result<()> {
     let col = |desc: &ColumnDescPtr| {
         let page_writer = Box::<ArrowPageWriter>::default();
         let chunk = page_writer.buffer.clone();
         let writer = get_column_writer(desc.clone(), props.clone(), page_writer);
-        (chunk, ArrowColumnWriter::Column(writer))
+        ArrowColumnWriter {
+            chunk,
+            writer: ArrowColumnWriterImpl::Column(writer),
+        }
     };
 
     let bytes = |desc: &ColumnDescPtr| {
         let page_writer = Box::<ArrowPageWriter>::default();
         let chunk = page_writer.buffer.clone();
         let writer = GenericColumnWriter::new(desc.clone(), props.clone(), page_writer);
-        (chunk, ArrowColumnWriter::ByteArray(writer))
+        ArrowColumnWriter {
+            chunk,
+            writer: ArrowColumnWriterImpl::ByteArray(writer),
+        }
     };
 
     match data_type {
@@ -476,52 +630,8 @@ fn get_arrow_column_writer(
     Ok(())
 }
 
-/// Write the leaves of `array` in depth-first order to `writers` with `levels`
-fn write_leaves<'a, W>(
-    writers: &mut W,
-    levels: &mut IntoIter<LevelInfo>,
-    array: &(dyn Array + 'static),
-) -> Result<()>
-where
-    W: Iterator<Item = &'a mut ArrowColumnWriter>,
-{
-    match array.data_type() {
-        ArrowDataType::List(_) => {
-            write_leaves(writers, levels, array.as_list::<i32>().values().as_ref())?
-        }
-        ArrowDataType::LargeList(_) => {
-            write_leaves(writers, levels, array.as_list::<i64>().values().as_ref())?
-        }
-        ArrowDataType::FixedSizeList(_, _) => {
-            let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
-            write_leaves(writers, levels, array.values().as_ref())?
-        }
-        ArrowDataType::Struct(_) => {
-            for column in array.as_struct().columns() {
-                write_leaves(writers, levels, column.as_ref())?
-            }
-        }
-        ArrowDataType::Map(_, _) => {
-            let map = array.as_map();
-            write_leaves(writers, levels, map.keys().as_ref())?;
-            write_leaves(writers, levels, map.values().as_ref())?
-        }
-        _ => {
-            let levels = levels.next().unwrap();
-            match writers.next().unwrap() {
-                ArrowColumnWriter::Column(c) => write_leaf(c, array, levels)?,
-                ArrowColumnWriter::ByteArray(c) => write_primitive(c, array, levels)?,
-            };
-        }
-    }
-    Ok(())
-}
-
-fn write_leaf(
-    writer: &mut ColumnWriter<'_>,
-    column: &dyn Array,
-    levels: LevelInfo,
-) -> Result<usize> {
+fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usize> {
+    let column = levels.array().as_ref();
     let indices = levels.non_null_indices();
     match writer {
         ColumnWriter::Int32ColumnWriter(ref mut typed) => {
@@ -661,10 +771,13 @@ fn write_leaf(
                         .unwrap();
                     get_decimal_256_array_slice(array, indices)
                 }
+                ArrowDataType::Float16 => {
+                    let array = column.as_primitive::<Float16Type>();
+                    get_float_16_array_slice(array, indices)
+                }
                 _ => {
                     return Err(ParquetError::NYI(
-                        "Attempting to write an Arrow type that is not yet implemented"
-                            .to_string(),
+                        "Attempting to write an Arrow type that is not yet implemented".to_string(),
                     ));
                 }
             };
@@ -676,7 +789,7 @@ fn write_leaf(
 fn write_primitive<E: ColumnValueEncoder>(
     writer: &mut GenericColumnWriter<E>,
     values: &E::Values,
-    levels: LevelInfo,
+    levels: &ArrayLevels,
 ) -> Result<usize> {
     writer.write_batch_internal(
         values,
@@ -689,10 +802,7 @@ fn write_primitive<E: ColumnValueEncoder>(
     )
 }
 
-fn get_bool_array_slice(
-    array: &arrow_array::BooleanArray,
-    indices: &[usize],
-) -> Vec<bool> {
+fn get_bool_array_slice(array: &arrow_array::BooleanArray, indices: &[usize]) -> Vec<bool> {
     let mut values = Vec::with_capacity(indices.len());
     for i in indices {
         values.push(array.value(*i))
@@ -761,6 +871,18 @@ fn get_decimal_256_array_slice(
     values
 }
 
+fn get_float_16_array_slice(
+    array: &arrow_array::Float16Array,
+    indices: &[usize],
+) -> Vec<FixedLenByteArray> {
+    let mut values = Vec::with_capacity(indices.len());
+    for i in indices {
+        let value = array.value(*i).to_le_bytes().to_vec();
+        values.push(FixedLenByteArray::from(ByteArray::from(value)));
+    }
+    values
+}
+
 fn get_fsb_array_slice(
     array: &arrow_array::FixedSizeBinaryArray,
     indices: &[usize],
@@ -781,9 +903,7 @@ mod tests {
     use std::fs::File;
     use std::sync::Arc;
 
-    use crate::arrow::arrow_reader::{
-        ParquetRecordBatchReader, ParquetRecordBatchReaderBuilder,
-    };
+    use crate::arrow::arrow_reader::{ParquetRecordBatchReader, ParquetRecordBatchReaderBuilder};
     use arrow::datatypes::ToByteSlice;
     use arrow::datatypes::{DataType, Field, Schema, UInt32Type, UInt8Type};
     use arrow::error::Result as ArrowResult;
@@ -817,9 +937,7 @@ mod tests {
         let b = Int32Array::from(vec![Some(1), None, None, Some(4), Some(5)]);
 
         // build a record batch
-        let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)])
-                .unwrap();
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(a), Arc::new(b)]).unwrap();
 
         roundtrip(batch, Some(SMALL_SIZE / 2));
     }
@@ -834,10 +952,7 @@ mod tests {
         buffer
     }
 
-    fn get_bytes_by_into_inner(
-        schema: SchemaRef,
-        expected_batch: &RecordBatch,
-    ) -> Vec<u8> {
+    fn get_bytes_by_into_inner(schema: SchemaRef, expected_batch: &RecordBatch) -> Vec<u8> {
         let mut writer = ArrowWriter::try_new(Vec::new(), schema, None).unwrap();
         writer.write(expected_batch).unwrap();
         writer.into_inner().unwrap()
@@ -864,8 +979,7 @@ mod tests {
             get_bytes_by_into_inner(schema, &expected_batch),
         ] {
             let cursor = Bytes::from(buffer);
-            let mut record_batch_reader =
-                ParquetRecordBatchReader::try_new(cursor, 1024).unwrap();
+            let mut record_batch_reader = ParquetRecordBatchReader::try_new(cursor, 1024).unwrap();
 
             let actual_batch = record_batch_reader
                 .next()
@@ -912,8 +1026,7 @@ mod tests {
 
         // Construct a buffer for value offsets, for the nested array:
         //  [[1], [2, 3], null, [4, 5, 6], [7, 8, 9, 10]]
-        let a_value_offsets =
-            arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
+        let a_value_offsets = arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
 
         // Construct a list array from the above two
         let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new(
@@ -953,8 +1066,7 @@ mod tests {
 
         // Construct a buffer for value offsets, for the nested array:
         //  [[1], [2, 3], [], [4, 5, 6], [7, 8, 9, 10]]
-        let a_value_offsets =
-            arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
+        let a_value_offsets = arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
 
         // Construct a list array from the above two
         let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new(
@@ -1009,8 +1121,7 @@ mod tests {
     }
 
     fn get_decimal_batch(precision: u8, scale: i8) -> RecordBatch {
-        let decimal_field =
-            Field::new("a", DataType::Decimal128(precision, scale), false);
+        let decimal_field = Field::new("a", DataType::Decimal128(precision, scale), false);
         let schema = Schema::new(vec![decimal_field]);
 
         let decimal_values = vec![10_000, 50_000, 0, -100]
@@ -1080,8 +1191,7 @@ mod tests {
 
         // Construct a buffer for value offsets, for the nested array:
         //  [[1], [2, 3], [], [4, 5, 6], [7, 8, 9, 10]]
-        let g_value_offsets =
-            arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
+        let g_value_offsets = arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
 
         // Construct a list array from the above two
         let g_list_data = ArrayData::builder(struct_field_g.data_type().clone())
@@ -1155,8 +1265,7 @@ mod tests {
 
         // build a record batch
         let batch =
-            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(some_nested_object)])
-                .unwrap();
+            RecordBatch::try_new(Arc::new(schema), vec![Arc::new(some_nested_object)]).unwrap();
 
         roundtrip(batch, Some(SMALL_SIZE / 2));
     }
@@ -1296,8 +1405,7 @@ mod tests {
 
     #[test]
     fn arrow_writer_page_size() {
-        let schema =
-            Arc::new(Schema::new(vec![Field::new("col", DataType::Utf8, false)]));
+        let schema = Arc::new(Schema::new(vec![Field::new("col", DataType::Utf8, false)]));
 
         let mut builder = StringBuilder::with_capacity(100, 329 * 10_000);
 
@@ -1361,10 +1469,7 @@ mod tests {
     const SMALL_SIZE: usize = 7;
     const MEDIUM_SIZE: usize = 63;
 
-    fn roundtrip(
-        expected_batch: RecordBatch,
-        max_row_group_size: Option<usize>,
-    ) -> Vec<File> {
+    fn roundtrip(expected_batch: RecordBatch, max_row_group_size: Option<usize>) -> Vec<File> {
         let mut files = vec![];
         for version in [WriterVersion::PARQUET_1_0, WriterVersion::PARQUET_2_0] {
             let mut props = WriterProperties::builder().set_writer_version(version);
@@ -1419,9 +1524,7 @@ mod tests {
     }
 
     fn roundtrip_opts(expected_batch: &RecordBatch, props: WriterProperties) -> File {
-        roundtrip_opts_with_array_validation(expected_batch, props, |a, b| {
-            assert_eq!(a, b)
-        })
+        roundtrip_opts_with_array_validation(expected_batch, props, |a, b| assert_eq!(a, b))
     }
 
     struct RoundTripOptions {
@@ -1446,10 +1549,7 @@ mod tests {
         one_column_roundtrip_with_options(RoundTripOptions::new(values, nullable))
     }
 
-    fn one_column_roundtrip_with_schema(
-        values: ArrayRef,
-        schema: SchemaRef,
-    ) -> Vec<File> {
+    fn one_column_roundtrip_with_schema(values: ArrayRef, schema: SchemaRef) -> Vec<File> {
         let mut options = RoundTripOptions::new(values, false);
         options.schema = schema;
         one_column_roundtrip_with_options(options)
@@ -1463,14 +1563,13 @@ mod tests {
         } = options;
 
         let encodings = match values.data_type() {
-            DataType::Utf8
-            | DataType::LargeUtf8
-            | DataType::Binary
-            | DataType::LargeBinary => vec![
-                Encoding::PLAIN,
-                Encoding::DELTA_BYTE_ARRAY,
-                Encoding::DELTA_LENGTH_BYTE_ARRAY,
-            ],
+            DataType::Utf8 | DataType::LargeUtf8 | DataType::Binary | DataType::LargeBinary => {
+                vec![
+                    Encoding::PLAIN,
+                    Encoding::DELTA_BYTE_ARRAY,
+                    Encoding::DELTA_LENGTH_BYTE_ARRAY,
+                ]
+            }
             DataType::Int64
             | DataType::Int32
             | DataType::Int16
@@ -1573,9 +1672,7 @@ mod tests {
                     let row_group_reader = file_reader
                         .get_row_group(ri)
                         .expect("Unable to read row group");
-                    if let Some(sbbf) =
-                        row_group_reader.get_column_bloom_filter(column_index)
-                    {
+                    if let Some(sbbf) = row_group_reader.get_column_bloom_filter(column_index) {
                         bloom_filters.push(sbbf.clone());
                     } else {
                         panic!("No bloom filter for column named {file_column} found");
@@ -1634,18 +1731,13 @@ mod tests {
                 .take(200_000)
                 .collect::<BooleanArray>(),
         );
-        let schema =
-            Schema::new(vec![Field::new("col", values.data_type().clone(), true)]);
-        let expected_batch =
-            RecordBatch::try_new(Arc::new(schema), vec![values]).unwrap();
+        let schema = Schema::new(vec![Field::new("col", values.data_type().clone(), true)]);
+        let expected_batch = RecordBatch::try_new(Arc::new(schema), vec![values]).unwrap();
         let file = tempfile::tempfile().unwrap();
 
-        let mut writer = ArrowWriter::try_new(
-            file.try_clone().unwrap(),
-            expected_batch.schema(),
-            None,
-        )
-        .expect("Unable to write file");
+        let mut writer =
+            ArrowWriter::try_new(file.try_clone().unwrap(), expected_batch.schema(), None)
+                .expect("Unable to write file");
         writer.write(&expected_batch).unwrap();
         writer.close().unwrap();
     }
@@ -1657,8 +1749,8 @@ mod tests {
         let batch = RecordBatch::try_new(Arc::new(schema), vec![values]).unwrap();
 
         let mut out = Vec::with_capacity(1024);
-        let mut writer = ArrowWriter::try_new(&mut out, batch.schema(), None)
-            .expect("Unable to write file");
+        let mut writer =
+            ArrowWriter::try_new(&mut out, batch.schema(), None).expect("Unable to write file");
         writer.write(&batch).unwrap();
         let file_meta_data = writer.close().unwrap();
         for row_group in file_meta_data.row_groups {
@@ -1937,8 +2029,7 @@ mod tests {
     #[test]
     fn null_list_single_column() {
         let null_field = Field::new("item", DataType::Null, true);
-        let list_field =
-            Field::new("emptylist", DataType::List(Arc::new(null_field)), true);
+        let list_field = Field::new("emptylist", DataType::List(Arc::new(null_field)), true);
 
         let schema = Schema::new(vec![list_field]);
 
@@ -1974,8 +2065,7 @@ mod tests {
     #[test]
     fn list_single_column() {
         let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-        let a_value_offsets =
-            arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
+        let a_value_offsets = arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
         let a_list_data = ArrayData::builder(DataType::List(Arc::new(Field::new(
             "item",
             DataType::Int32,
@@ -1999,8 +2089,7 @@ mod tests {
     #[test]
     fn large_list_single_column() {
         let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
-        let a_value_offsets =
-            arrow::buffer::Buffer::from(&[0i64, 1, 3, 3, 6, 10].to_byte_slice());
+        let a_value_offsets = arrow::buffer::Buffer::from(&[0i64, 1, 3, 3, 6, 10].to_byte_slice());
         let a_list_data = ArrayData::builder(DataType::LargeList(Arc::new(Field::new(
             "large_item",
             DataType::Int32,
@@ -2082,11 +2171,12 @@ mod tests {
                 roundtrip_opts_with_array_validation(&expected_batch, props, |a, b| {
                     let string_array_a = StringArray::from(a.clone());
                     let string_array_b = StringArray::from(b.clone());
-                    let vec_a: Vec<&str> =
-                        string_array_a.iter().map(|v| v.unwrap()).collect();
-                    let vec_b: Vec<&str> =
-                        string_array_b.iter().map(|v| v.unwrap()).collect();
-                    assert_eq!(vec_a, vec_b, "failed for encoder: {encoding:?} and row_group_size: {row_group_size:?}");
+                    let vec_a: Vec<&str> = string_array_a.iter().map(|v| v.unwrap()).collect();
+                    let vec_b: Vec<&str> = string_array_b.iter().map(|v| v.unwrap()).collect();
+                    assert_eq!(
+                        vec_a, vec_b,
+                        "failed for encoder: {encoding:?} and row_group_size: {row_group_size:?}"
+                    );
                 });
             }
         }
@@ -2371,12 +2461,10 @@ mod tests {
             .build();
 
         let mut writer =
-            ArrowWriter::try_new(file.try_clone().unwrap(), schema.clone(), Some(props))
-                .unwrap();
+            ArrowWriter::try_new(file.try_clone().unwrap(), schema.clone(), Some(props)).unwrap();
 
         for array in arrays {
-            let batch =
-                RecordBatch::try_new(schema.clone(), vec![Arc::new(array)]).unwrap();
+            let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(array)]).unwrap();
             writer.write(&batch).unwrap();
         }
 
@@ -2412,8 +2500,7 @@ mod tests {
             })
             .collect();
 
-        let expected_values: Vec<_> =
-            [0..100, 0..50, 200..500].into_iter().flatten().collect();
+        let expected_values: Vec<_> = [0..100, 0..50, 200..500].into_iter().flatten().collect();
         assert_eq!(&values, &expected_values)
     }
 
@@ -2462,11 +2549,9 @@ mod tests {
         let list_a_array = Arc::new(ListArray::from(list_data)) as ArrayRef;
         let struct_b_array = StructArray::from(vec![(list_a.clone(), list_a_array)]);
 
-        let batch1 = RecordBatch::try_from_iter(vec![(
-            "struct_b",
-            Arc::new(struct_b_array) as ArrayRef,
-        )])
-        .unwrap();
+        let batch1 =
+            RecordBatch::try_from_iter(vec![("struct_b", Arc::new(struct_b_array) as ArrayRef)])
+                .unwrap();
 
         let field_a_array = Int32Array::from(vec![6, 7, 8, 9, 10]);
         let field_b_array = Int32Array::from_iter(vec![None, None, None, Some(1), None]);
@@ -2486,11 +2571,9 @@ mod tests {
         let list_a_array = Arc::new(ListArray::from(list_data)) as ArrayRef;
         let struct_b_array = StructArray::from(vec![(list_a, list_a_array)]);
 
-        let batch2 = RecordBatch::try_from_iter(vec![(
-            "struct_b",
-            Arc::new(struct_b_array) as ArrayRef,
-        )])
-        .unwrap();
+        let batch2 =
+            RecordBatch::try_from_iter(vec![("struct_b", Arc::new(struct_b_array) as ArrayRef)])
+                .unwrap();
 
         let batches = &[batch1, batch2];
 
@@ -2565,8 +2648,7 @@ mod tests {
         .unwrap();
 
         let mut buf = Vec::with_capacity(1024);
-        let mut writer =
-            ArrowWriter::try_new(&mut buf, Arc::new(file_schema), None).unwrap();
+        let mut writer = ArrowWriter::try_new(&mut buf, Arc::new(file_schema), None).unwrap();
         writer.write(&batch).unwrap();
         writer.close().unwrap();
     }
@@ -2584,8 +2666,7 @@ mod tests {
         .unwrap();
 
         let mut buf = Vec::with_capacity(1024);
-        let mut writer =
-            ArrowWriter::try_new(&mut buf, file_schema.clone(), None).unwrap();
+        let mut writer = ArrowWriter::try_new(&mut buf, file_schema.clone(), None).unwrap();
         writer.write(&batch).unwrap();
         writer.close().unwrap();
 
diff --git a/parquet/src/arrow/async_reader/metadata.rs b/parquet/src/arrow/async_reader/metadata.rs
index 076ae5c54052..2ac4e0bc9674 100644
--- a/parquet/src/arrow/async_reader/metadata.rs
+++ b/parquet/src/arrow/async_reader/metadata.rs
@@ -17,17 +17,14 @@
 
 use crate::arrow::async_reader::AsyncFileReader;
 use crate::errors::{ParquetError, Result};
-use crate::file::footer::{decode_footer, read_metadata};
+use crate::file::footer::{decode_footer, decode_metadata};
 use crate::file::metadata::ParquetMetaData;
 use crate::file::page_index::index::Index;
-use crate::file::page_index::index_reader::{
-    acc_range, decode_column_index, decode_offset_index,
-};
+use crate::file::page_index::index_reader::{acc_range, decode_column_index, decode_offset_index};
 use bytes::Bytes;
 use futures::future::BoxFuture;
 use futures::FutureExt;
 use std::future::Future;
-use std::io::Read;
 use std::ops::Range;
 
 /// A data source that can be used with [`MetadataLoader`] to load [`ParquetMetaData`]
@@ -57,11 +54,7 @@ impl<F: MetadataFetch> MetadataLoader<F> {
     /// Create a new [`MetadataLoader`] by reading the footer information
     ///
     /// See [`fetch_parquet_metadata`] for the meaning of the individual parameters
-    pub async fn load(
-        mut fetch: F,
-        file_size: usize,
-        prefetch: Option<usize>,
-    ) -> Result<Self> {
+    pub async fn load(mut fetch: F, file_size: usize, prefetch: Option<usize>) -> Result<Self> {
         if file_size < 8 {
             return Err(ParquetError::EOF(format!(
                 "file size of {file_size} is less than footer"
@@ -95,16 +88,14 @@ impl<F: MetadataFetch> MetadataLoader<F> {
         // Did not fetch the entire file metadata in the initial read, need to make a second request
         let (metadata, remainder) = if length > suffix_len - 8 {
             let metadata_start = file_size - length - 8;
-            let remaining_metadata = fetch.fetch(metadata_start..footer_start).await?;
-
-            let reader = remaining_metadata.as_ref().chain(&suffix[..suffix_len - 8]);
-            (read_metadata(reader)?, None)
+            let meta = fetch.fetch(metadata_start..file_size - 8).await?;
+            (decode_metadata(&meta)?, None)
         } else {
             let metadata_start = file_size - length - 8 - footer_start;
 
             let slice = &suffix[metadata_start..suffix_len - 8];
             (
-                read_metadata(slice)?,
+                decode_metadata(slice)?,
                 Some((footer_start, suffix.slice(..metadata_start))),
             )
         };
@@ -129,11 +120,7 @@ impl<F: MetadataFetch> MetadataLoader<F> {
     ///
     /// * `column_index`: if true will load column index
     /// * `offset_index`: if true will load offset index
-    pub async fn load_page_index(
-        &mut self,
-        column_index: bool,
-        offset_index: bool,
-    ) -> Result<()> {
+    pub async fn load_page_index(&mut self, column_index: bool, offset_index: bool) -> Result<()> {
         if !column_index && !offset_index {
             return Ok(());
         }
@@ -192,9 +179,7 @@ impl<F: MetadataFetch> MetadataLoader<F> {
                     x.columns()
                         .iter()
                         .map(|c| match c.offset_index_range() {
-                            Some(r) => decode_offset_index(
-                                &data[r.start - offset..r.end - offset],
-                            ),
+                            Some(r) => decode_offset_index(&data[r.start - offset..r.end - offset]),
                             None => Err(general_err!("missing offset index")),
                         })
                         .collect::<Result<Vec<_>>>()
diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs
index 7d30580ece93..04383bb51bda 100644
--- a/parquet/src/arrow/async_reader/mod.rs
+++ b/parquet/src/arrow/async_reader/mod.rs
@@ -77,7 +77,6 @@
 
 use std::collections::VecDeque;
 use std::fmt::Formatter;
-
 use std::io::SeekFrom;
 use std::ops::Range;
 use std::pin::Pin;
@@ -88,7 +87,6 @@ use bytes::{Buf, Bytes};
 use futures::future::{BoxFuture, FutureExt};
 use futures::ready;
 use futures::stream::Stream;
-
 use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt};
 
 use arrow_array::RecordBatch;
@@ -96,21 +94,21 @@ use arrow_schema::SchemaRef;
 
 use crate::arrow::array_reader::{build_array_reader, RowGroups};
 use crate::arrow::arrow_reader::{
-    apply_range, evaluate_predicate, selects_any, ArrowReaderBuilder,
-    ArrowReaderMetadata, ArrowReaderOptions, ParquetRecordBatchReader, RowFilter,
-    RowSelection,
+    apply_range, evaluate_predicate, selects_any, ArrowReaderBuilder, ArrowReaderMetadata,
+    ArrowReaderOptions, ParquetRecordBatchReader, RowFilter, RowSelection,
 };
 use crate::arrow::ProjectionMask;
 
+use crate::bloom_filter::{
+    chunk_read_bloom_filter_header_and_offset, Sbbf, SBBF_HEADER_SIZE_ESTIMATE,
+};
 use crate::column::page::{PageIterator, PageReader};
-
 use crate::errors::{ParquetError, Result};
 use crate::file::footer::{decode_footer, decode_metadata};
 use crate::file::metadata::{ParquetMetaData, RowGroupMetaData};
 use crate::file::reader::{ChunkReader, Length, SerializedPageReader};
-use crate::format::PageLocation;
-
 use crate::file::FOOTER_SIZE;
+use crate::format::{BloomFilterAlgorithm, BloomFilterCompression, BloomFilterHash, PageLocation};
 
 mod metadata;
 pub use metadata::*;
@@ -128,10 +126,7 @@ pub trait AsyncFileReader: Send {
     fn get_bytes(&mut self, range: Range<usize>) -> BoxFuture<'_, Result<Bytes>>;
 
     /// Retrieve multiple byte ranges. The default implementation will call `get_bytes` sequentially
-    fn get_byte_ranges(
-        &mut self,
-        ranges: Vec<Range<usize>>,
-    ) -> BoxFuture<'_, Result<Vec<Bytes>>> {
+    fn get_byte_ranges(&mut self, ranges: Vec<Range<usize>>) -> BoxFuture<'_, Result<Vec<Bytes>>> {
         async move {
             let mut result = Vec::with_capacity(ranges.len());
 
@@ -156,10 +151,7 @@ impl AsyncFileReader for Box<dyn AsyncFileReader> {
         self.as_mut().get_bytes(range)
     }
 
-    fn get_byte_ranges(
-        &mut self,
-        ranges: Vec<Range<usize>>,
-    ) -> BoxFuture<'_, Result<Vec<Bytes>>> {
+    fn get_byte_ranges(&mut self, ranges: Vec<Range<usize>>) -> BoxFuture<'_, Result<Vec<Bytes>>> {
         self.as_mut().get_byte_ranges(ranges)
     }
 
@@ -253,10 +245,7 @@ impl<T: AsyncFileReader + Send + 'static> ParquetRecordBatchStreamBuilder<T> {
 
     /// Create a new [`ParquetRecordBatchStreamBuilder`] with the provided parquet file
     /// and [`ArrowReaderOptions`]
-    pub async fn new_with_options(
-        mut input: T,
-        options: ArrowReaderOptions,
-    ) -> Result<Self> {
+    pub async fn new_with_options(mut input: T, options: ArrowReaderOptions) -> Result<Self> {
         let metadata = ArrowReaderMetadata::load_async(&mut input, options).await?;
         Ok(Self::new_with_metadata(input, metadata))
     }
@@ -302,6 +291,69 @@ impl<T: AsyncFileReader + Send + 'static> ParquetRecordBatchStreamBuilder<T> {
         Self::new_builder(AsyncReader(input), metadata)
     }
 
+    /// Read bloom filter for a column in a row group
+    /// Returns `None` if the column does not have a bloom filter
+    ///
+    /// We should call this function after other forms pruning, such as projection and predicate pushdown.
+    pub async fn get_row_group_column_bloom_filter(
+        &mut self,
+        row_group_idx: usize,
+        column_idx: usize,
+    ) -> Result<Option<Sbbf>> {
+        let metadata = self.metadata.row_group(row_group_idx);
+        let column_metadata = metadata.column(column_idx);
+
+        let offset: usize = if let Some(offset) = column_metadata.bloom_filter_offset() {
+            offset
+                .try_into()
+                .map_err(|_| ParquetError::General("Bloom filter offset is invalid".to_string()))?
+        } else {
+            return Ok(None);
+        };
+
+        let buffer = match column_metadata.bloom_filter_length() {
+            Some(length) => self.input.0.get_bytes(offset..offset + length as usize),
+            None => self
+                .input
+                .0
+                .get_bytes(offset..offset + SBBF_HEADER_SIZE_ESTIMATE),
+        }
+        .await?;
+
+        let (header, bitset_offset) =
+            chunk_read_bloom_filter_header_and_offset(offset as u64, buffer.clone())?;
+
+        match header.algorithm {
+            BloomFilterAlgorithm::BLOCK(_) => {
+                // this match exists to future proof the singleton algorithm enum
+            }
+        }
+        match header.compression {
+            BloomFilterCompression::UNCOMPRESSED(_) => {
+                // this match exists to future proof the singleton compression enum
+            }
+        }
+        match header.hash {
+            BloomFilterHash::XXHASH(_) => {
+                // this match exists to future proof the singleton hash enum
+            }
+        }
+
+        let bitset = match column_metadata.bloom_filter_length() {
+            Some(_) => buffer.slice((bitset_offset as usize - offset)..),
+            None => {
+                let bitset_length: usize = header.num_bytes.try_into().map_err(|_| {
+                    ParquetError::General("Bloom filter length is invalid".to_string())
+                })?;
+                self.input
+                    .0
+                    .get_bytes(bitset_offset as usize..bitset_offset as usize + bitset_length)
+                    .await?
+            }
+        };
+        Ok(Some(Sbbf::new(&bitset)))
+    }
+
     /// Build a new [`ParquetRecordBatchStream`]
     pub fn build(self) -> Result<ParquetRecordBatchStream<T>> {
         let num_row_groups = self.metadata.row_groups().len();
@@ -405,11 +457,8 @@ where
                     .fetch(&mut self.input, predicate_projection, selection.as_ref())
                     .await?;
 
-                let array_reader = build_array_reader(
-                    self.fields.as_deref(),
-                    predicate_projection,
-                    &row_group,
-                )?;
+                let array_reader =
+                    build_array_reader(self.fields.as_deref(), predicate_projection, &row_group)?;
 
                 selection = Some(evaluate_predicate(
                     batch_size,
@@ -535,10 +584,7 @@ where
 {
     type Item = Result<RecordBatch>;
 
-    fn poll_next(
-        mut self: Pin<&mut Self>,
-        cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
+    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
         loop {
             match &mut self.state {
                 StreamState::Decoding(batch_reader) => match batch_reader.next() {
@@ -547,9 +593,7 @@ where
                     }
                     Some(Err(e)) => {
                         self.state = StreamState::Error;
-                        return Poll::Ready(Some(Err(ParquetError::ArrowError(
-                            e.to_string(),
-                        ))));
+                        return Poll::Ready(Some(Err(ParquetError::ArrowError(e.to_string()))));
                     }
                     None => self.state = StreamState::Init,
                 },
@@ -561,11 +605,9 @@ where
 
                     let reader = self.reader.take().expect("lost reader");
 
-                    let row_count =
-                        self.metadata.row_group(row_group_idx).num_rows() as usize;
+                    let row_count = self.metadata.row_group(row_group_idx).num_rows() as usize;
 
-                    let selection =
-                        self.selection.as_mut().map(|s| s.split_off(row_count));
+                    let selection = self.selection.as_mut().map(|s| s.split_off(row_count));
 
                     let fut = reader
                         .read_row_group(
@@ -625,27 +667,26 @@ impl<'a> InMemoryRowGroup<'a> {
                 .iter()
                 .zip(self.metadata.columns())
                 .enumerate()
-                .filter_map(|(idx, (chunk, chunk_meta))| {
-                    (chunk.is_none() && projection.leaf_included(idx)).then(|| {
-                        // If the first page does not start at the beginning of the column,
-                        // then we need to also fetch a dictionary page.
-                        let mut ranges = vec![];
-                        let (start, _len) = chunk_meta.byte_range();
-                        match page_locations[idx].first() {
-                            Some(first) if first.offset as u64 != start => {
-                                ranges.push(start as usize..first.offset as usize);
-                            }
-                            _ => (),
+                .filter(|&(idx, (chunk, _chunk_meta))| {
+                    chunk.is_none() && projection.leaf_included(idx)
+                })
+                .flat_map(|(idx, (_chunk, chunk_meta))| {
+                    // If the first page does not start at the beginning of the column,
+                    // then we need to also fetch a dictionary page.
+                    let mut ranges = vec![];
+                    let (start, _len) = chunk_meta.byte_range();
+                    match page_locations[idx].first() {
+                        Some(first) if first.offset as u64 != start => {
+                            ranges.push(start as usize..first.offset as usize);
                         }
+                        _ => (),
+                    }
 
-                        ranges.extend(selection.scan_ranges(&page_locations[idx]));
-                        page_start_offsets
-                            .push(ranges.iter().map(|range| range.start).collect());
+                    ranges.extend(selection.scan_ranges(&page_locations[idx]));
+                    page_start_offsets.push(ranges.iter().map(|range| range.start).collect());
 
-                        ranges
-                    })
+                    ranges
                 })
-                .flatten()
                 .collect();
 
             let mut chunk_data = input.get_byte_ranges(fetch_ranges).await?.into_iter();
@@ -673,12 +714,11 @@ impl<'a> InMemoryRowGroup<'a> {
                 .column_chunks
                 .iter()
                 .enumerate()
-                .filter_map(|(idx, chunk)| {
-                    (chunk.is_none() && projection.leaf_included(idx)).then(|| {
-                        let column = self.metadata.column(idx);
-                        let (start, length) = column.byte_range();
-                        start as usize..(start + length) as usize
-                    })
+                .filter(|&(idx, chunk)| chunk.is_none() && projection.leaf_included(idx))
+                .map(|(idx, _chunk)| {
+                    let column = self.metadata.column(idx);
+                    let (start, length) = column.byte_range();
+                    start as usize..(start + length) as usize
                 })
                 .collect();
 
@@ -714,13 +754,12 @@ impl<'a> RowGroups for InMemoryRowGroup<'a> {
             ))),
             Some(data) => {
                 let page_locations = self.page_locations.map(|index| index[i].clone());
-                let page_reader: Box<dyn PageReader> =
-                    Box::new(SerializedPageReader::new(
-                        data.clone(),
-                        self.metadata.column(i),
-                        self.row_count,
-                        page_locations,
-                    )?);
+                let page_reader: Box<dyn PageReader> = Box::new(SerializedPageReader::new(
+                    data.clone(),
+                    self.metadata.column(i),
+                    self.row_count,
+                    page_locations,
+                )?);
 
                 Ok(Box::new(ColumnChunkIterator {
                     reader: Some(Ok(page_reader)),
@@ -813,12 +852,15 @@ mod tests {
     use crate::file::properties::WriterProperties;
     use arrow::compute::kernels::cmp::eq;
     use arrow::error::Result as ArrowResult;
+    use arrow_array::builder::{ListBuilder, StringBuilder};
     use arrow_array::cast::AsArray;
     use arrow_array::types::Int32Type;
-    use arrow_array::{Array, ArrayRef, Int32Array, Int8Array, Scalar, StringArray};
-    use futures::TryStreamExt;
+    use arrow_array::{Array, ArrayRef, Int32Array, Int8Array, Scalar, StringArray, UInt64Array};
+    use arrow_schema::{DataType, Field, Schema};
+    use futures::{StreamExt, TryStreamExt};
     use rand::{thread_rng, Rng};
     use std::sync::Mutex;
+    use tempfile::tempfile;
 
     #[derive(Clone)]
     struct TestReader {
@@ -911,10 +953,9 @@ mod tests {
         };
 
         let options = ArrowReaderOptions::new().with_page_index(true);
-        let builder =
-            ParquetRecordBatchStreamBuilder::new_with_options(async_reader, options)
-                .await
-                .unwrap();
+        let builder = ParquetRecordBatchStreamBuilder::new_with_options(async_reader, options)
+            .await
+            .unwrap();
 
         // The builder should have page and offset indexes loaded now
         let metadata_with_index = builder.metadata();
@@ -1022,10 +1063,9 @@ mod tests {
         };
 
         let options = ArrowReaderOptions::new().with_page_index(true);
-        let builder =
-            ParquetRecordBatchStreamBuilder::new_with_options(async_reader, options)
-                .await
-                .unwrap();
+        let builder = ParquetRecordBatchStreamBuilder::new_with_options(async_reader, options)
+            .await
+            .unwrap();
 
         let selection = RowSelection::from(vec![
             RowSelector::skip(21),   // Skip first page
@@ -1104,10 +1144,9 @@ mod tests {
             };
 
             let options = ArrowReaderOptions::new().with_page_index(true);
-            let builder =
-                ParquetRecordBatchStreamBuilder::new_with_options(async_reader, options)
-                    .await
-                    .unwrap();
+            let builder = ParquetRecordBatchStreamBuilder::new_with_options(async_reader, options)
+                .await
+                .unwrap();
 
             let col_idx: usize = rand.gen_range(0..13);
             let mask = ProjectionMask::leaves(builder.parquet_schema(), vec![col_idx]);
@@ -1120,8 +1159,7 @@ mod tests {
 
             let async_batches: Vec<_> = stream.try_collect().await.unwrap();
 
-            let actual_rows: usize =
-                async_batches.into_iter().map(|b| b.num_rows()).sum();
+            let actual_rows: usize = async_batches.into_iter().map(|b| b.num_rows()).sum();
 
             assert_eq!(actual_rows, expected_rows);
         }
@@ -1175,10 +1213,9 @@ mod tests {
         };
 
         let options = ArrowReaderOptions::new().with_page_index(true);
-        let builder =
-            ParquetRecordBatchStreamBuilder::new_with_options(async_reader, options)
-                .await
-                .unwrap();
+        let builder = ParquetRecordBatchStreamBuilder::new_with_options(async_reader, options)
+            .await
+            .unwrap();
 
         let col_idx: usize = rand.gen_range(0..13);
         let mask = ProjectionMask::leaves(builder.parquet_schema(), vec![col_idx]);
@@ -1283,8 +1320,7 @@ mod tests {
         let props = WriterProperties::builder()
             .set_max_row_group_size(3)
             .build();
-        let mut writer =
-            ArrowWriter::try_new(&mut buf, data.schema(), Some(props)).unwrap();
+        let mut writer = ArrowWriter::try_new(&mut buf, data.schema(), Some(props)).unwrap();
         writer.write(&data).unwrap();
         writer.close().unwrap();
 
@@ -1389,10 +1425,10 @@ mod tests {
             requests: Default::default(),
         };
 
-        let a_filter = ArrowPredicateFn::new(
-            ProjectionMask::leaves(&parquet_schema, vec![1]),
-            |batch| Ok(batch.column(0).as_boolean().clone()),
-        );
+        let a_filter =
+            ArrowPredicateFn::new(ProjectionMask::leaves(&parquet_schema, vec![1]), |batch| {
+                Ok(batch.column(0).as_boolean().clone())
+            });
 
         let b_scalar = Int8Array::from(vec![2]);
         let b_filter = ArrowPredicateFn::new(
@@ -1405,15 +1441,14 @@ mod tests {
         let mask = ProjectionMask::leaves(&parquet_schema, vec![0, 2]);
 
         let options = ArrowReaderOptions::new().with_page_index(true);
-        let stream =
-            ParquetRecordBatchStreamBuilder::new_with_options(async_reader, options)
-                .await
-                .unwrap()
-                .with_projection(mask.clone())
-                .with_batch_size(1024)
-                .with_row_filter(filter)
-                .build()
-                .unwrap();
+        let stream = ParquetRecordBatchStreamBuilder::new_with_options(async_reader, options)
+            .await
+            .unwrap()
+            .with_projection(mask.clone())
+            .with_batch_size(1024)
+            .with_row_filter(filter)
+            .build()
+            .unwrap();
 
         let batches: Vec<RecordBatch> = stream.try_collect().await.unwrap();
 
@@ -1464,8 +1499,7 @@ mod tests {
 
         let _schema_desc = metadata.file_metadata().schema_descr();
 
-        let projection =
-            ProjectionMask::leaves(metadata.file_metadata().schema_descr(), vec![0]);
+        let projection = ProjectionMask::leaves(metadata.file_metadata().schema_descr(), vec![0]);
 
         let reader_factory = ReaderFactory {
             metadata,
@@ -1541,4 +1575,160 @@ mod tests {
         assert_ne!(1024, file_rows);
         assert_eq!(stream.batch_size, file_rows);
     }
+
+    #[tokio::test]
+    async fn test_get_row_group_column_bloom_filter_without_length() {
+        let testdata = arrow::util::test_util::parquet_test_data();
+        let path = format!("{testdata}/data_index_bloom_encoding_stats.parquet");
+        let data = Bytes::from(std::fs::read(path).unwrap());
+        test_get_row_group_column_bloom_filter(data, false).await;
+    }
+
+    #[tokio::test]
+    async fn test_get_row_group_column_bloom_filter_with_length() {
+        // convert to new parquet file with bloom_filter_length
+        let testdata = arrow::util::test_util::parquet_test_data();
+        let path = format!("{testdata}/data_index_bloom_encoding_stats.parquet");
+        let data = Bytes::from(std::fs::read(path).unwrap());
+        let metadata = parse_metadata(&data).unwrap();
+        let metadata = Arc::new(metadata);
+        let async_reader = TestReader {
+            data: data.clone(),
+            metadata: metadata.clone(),
+            requests: Default::default(),
+        };
+        let builder = ParquetRecordBatchStreamBuilder::new(async_reader)
+            .await
+            .unwrap();
+        let schema = builder.schema().clone();
+        let stream = builder.build().unwrap();
+        let batches = stream.try_collect::<Vec<_>>().await.unwrap();
+
+        let mut parquet_data = Vec::new();
+        let props = WriterProperties::builder()
+            .set_bloom_filter_enabled(true)
+            .build();
+        let mut writer = ArrowWriter::try_new(&mut parquet_data, schema, Some(props)).unwrap();
+        for batch in batches {
+            writer.write(&batch).unwrap();
+        }
+        writer.close().unwrap();
+
+        // test the new parquet file
+        test_get_row_group_column_bloom_filter(parquet_data.into(), true).await;
+    }
+
+    async fn test_get_row_group_column_bloom_filter(data: Bytes, with_length: bool) {
+        let metadata = parse_metadata(&data).unwrap();
+        let metadata = Arc::new(metadata);
+
+        assert_eq!(metadata.num_row_groups(), 1);
+        let row_group = metadata.row_group(0);
+        let column = row_group.column(0);
+        assert_eq!(column.bloom_filter_length().is_some(), with_length);
+
+        let async_reader = TestReader {
+            data: data.clone(),
+            metadata: metadata.clone(),
+            requests: Default::default(),
+        };
+
+        let mut builder = ParquetRecordBatchStreamBuilder::new(async_reader)
+            .await
+            .unwrap();
+
+        let sbbf = builder
+            .get_row_group_column_bloom_filter(0, 0)
+            .await
+            .unwrap()
+            .unwrap();
+        assert!(sbbf.check(&"Hello"));
+        assert!(!sbbf.check(&"Hello_Not_Exists"));
+    }
+
+    #[tokio::test]
+    async fn test_nested_skip() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("col_1", DataType::UInt64, false),
+            Field::new_list("col_2", Field::new("item", DataType::Utf8, true), true),
+        ]));
+
+        // Default writer properties
+        let props = WriterProperties::builder()
+            .set_data_page_row_count_limit(256)
+            .set_write_batch_size(256)
+            .set_max_row_group_size(1024);
+
+        // Write data
+        let mut file = tempfile().unwrap();
+        let mut writer =
+            ArrowWriter::try_new(&mut file, schema.clone(), Some(props.build())).unwrap();
+
+        let mut builder = ListBuilder::new(StringBuilder::new());
+        for id in 0..1024 {
+            match id % 3 {
+                0 => builder.append_value([Some("val_1".to_string()), Some(format!("id_{id}"))]),
+                1 => builder.append_value([Some(format!("id_{id}"))]),
+                _ => builder.append_null(),
+            }
+        }
+        let refs = vec![
+            Arc::new(UInt64Array::from_iter_values(0..1024)) as ArrayRef,
+            Arc::new(builder.finish()) as ArrayRef,
+        ];
+
+        let batch = RecordBatch::try_new(schema.clone(), refs).unwrap();
+        writer.write(&batch).unwrap();
+        writer.close().unwrap();
+
+        let selections = [
+            RowSelection::from(vec![
+                RowSelector::skip(313),
+                RowSelector::select(1),
+                RowSelector::skip(709),
+                RowSelector::select(1),
+            ]),
+            RowSelection::from(vec![
+                RowSelector::skip(255),
+                RowSelector::select(1),
+                RowSelector::skip(767),
+                RowSelector::select(1),
+            ]),
+            RowSelection::from(vec![
+                RowSelector::select(255),
+                RowSelector::skip(1),
+                RowSelector::select(767),
+                RowSelector::skip(1),
+            ]),
+            RowSelection::from(vec![
+                RowSelector::skip(254),
+                RowSelector::select(1),
+                RowSelector::select(1),
+                RowSelector::skip(767),
+                RowSelector::select(1),
+            ]),
+        ];
+
+        for selection in selections {
+            let expected = selection.row_count();
+            // Read data
+            let mut reader = ParquetRecordBatchStreamBuilder::new_with_options(
+                tokio::fs::File::from_std(file.try_clone().unwrap()),
+                ArrowReaderOptions::new().with_page_index(true),
+            )
+            .await
+            .unwrap();
+
+            reader = reader.with_row_selection(selection);
+
+            let mut stream = reader.build().unwrap();
+
+            let mut total_rows = 0;
+            while let Some(rb) = stream.next().await {
+                let rb = rb.unwrap();
+                total_rows += rb.num_rows();
+            }
+            assert_eq!(total_rows, expected);
+        }
+    }
 }
diff --git a/parquet/src/arrow/async_reader/store.rs b/parquet/src/arrow/async_reader/store.rs
index 40d982cedf40..293b91aea3ba 100644
--- a/parquet/src/arrow/async_reader/store.rs
+++ b/parquet/src/arrow/async_reader/store.rs
@@ -28,7 +28,30 @@ use crate::arrow::async_reader::{AsyncFileReader, MetadataLoader};
 use crate::errors::{ParquetError, Result};
 use crate::file::metadata::ParquetMetaData;
 
-/// Implements [`AsyncFileReader`] for a parquet file in object storage
+/// Reads Parquet files in object storage using [`ObjectStore`].
+///
+/// ```no_run
+/// # use std::io::stdout;
+/// # use std::sync::Arc;
+/// # use object_store::azure::MicrosoftAzureBuilder;
+/// # use object_store::ObjectStore;
+/// # use object_store::path::Path;
+/// # use parquet::arrow::async_reader::ParquetObjectReader;
+/// # use parquet::arrow::ParquetRecordBatchStreamBuilder;
+/// # use parquet::schema::printer::print_parquet_metadata;
+/// # async fn run() {
+/// // Populate configuration from environment
+/// let storage_container = Arc::new(MicrosoftAzureBuilder::from_env().build().unwrap());
+/// let location = Path::from("path/to/blob.parquet");
+/// let meta = storage_container.head(&location).await.unwrap();
+/// println!("Found Blob with {}B at {}", meta.size, meta.location);
+///
+/// // Show Parquet metadata
+/// let reader = ParquetObjectReader::new(storage_container, meta);
+/// let builder = ParquetRecordBatchStreamBuilder::new(reader).await.unwrap();
+/// print_parquet_metadata(&mut stdout(), builder.metadata());
+/// # }
+/// ```
 #[derive(Clone, Debug)]
 pub struct ParquetObjectReader {
     store: Arc<dyn ObjectStore>,
@@ -82,16 +105,11 @@ impl AsyncFileReader for ParquetObjectReader {
     fn get_bytes(&mut self, range: Range<usize>) -> BoxFuture<'_, Result<Bytes>> {
         self.store
             .get_range(&self.meta.location, range)
-            .map_err(|e| {
-                ParquetError::General(format!("AsyncChunkReader::get_bytes error: {e}"))
-            })
+            .map_err(|e| ParquetError::General(format!("AsyncChunkReader::get_bytes error: {e}")))
             .boxed()
     }
 
-    fn get_byte_ranges(
-        &mut self,
-        ranges: Vec<Range<usize>>,
-    ) -> BoxFuture<'_, Result<Vec<Bytes>>>
+    fn get_byte_ranges(&mut self, ranges: Vec<Range<usize>>) -> BoxFuture<'_, Result<Vec<Bytes>>>
     where
         Self: Send,
     {
diff --git a/parquet/src/arrow/async_writer/mod.rs b/parquet/src/arrow/async_writer/mod.rs
index 0957b58697d7..30080c579e8f 100644
--- a/parquet/src/arrow/async_writer/mod.rs
+++ b/parquet/src/arrow/async_writer/mod.rs
@@ -99,8 +99,7 @@ impl<W: AsyncWrite + Unpin + Send> AsyncArrowWriter<W> {
         props: Option<WriterProperties>,
     ) -> Result<Self> {
         let shared_buffer = SharedBuffer::new(buffer_size);
-        let sync_writer =
-            ArrowWriter::try_new(shared_buffer.clone(), arrow_schema, props)?;
+        let sync_writer = ArrowWriter::try_new(shared_buffer.clone(), arrow_schema, props)?;
 
         Ok(Self {
             sync_writer,
@@ -211,9 +210,7 @@ mod tests {
     use bytes::Bytes;
     use tokio::pin;
 
-    use crate::arrow::arrow_reader::{
-        ParquetRecordBatchReader, ParquetRecordBatchReaderBuilder,
-    };
+    use crate::arrow::arrow_reader::{ParquetRecordBatchReader, ParquetRecordBatchReaderBuilder};
 
     use super::*;
 
@@ -270,8 +267,7 @@ mod tests {
 
         let mut sync_buffer = Vec::new();
         let mut sync_writer =
-            ArrowWriter::try_new(&mut sync_buffer, reader.schema(), Some(write_props))
-                .unwrap();
+            ArrowWriter::try_new(&mut sync_buffer, reader.schema(), Some(write_props)).unwrap();
         for record_batch in reader {
             let record_batch = record_batch.unwrap();
             async_writer.write(&record_batch).await.unwrap();
@@ -349,8 +345,7 @@ mod tests {
             buffer.len()
         };
 
-        let test_buffer_flush_thresholds =
-            vec![0, 1024, 40 * 1024, 50 * 1024, 100 * 1024];
+        let test_buffer_flush_thresholds = vec![0, 1024, 40 * 1024, 50 * 1024, 100 * 1024];
 
         for buffer_flush_threshold in test_buffer_flush_thresholds {
             let reader = get_test_reader();
@@ -383,14 +378,12 @@ mod tests {
             vec![0; 500000],
             vec![0; 500000],
         ])) as ArrayRef;
-        let to_write =
-            RecordBatch::try_from_iter([("col", col), ("col2", col2)]).unwrap();
+        let to_write = RecordBatch::try_from_iter([("col", col), ("col2", col2)]).unwrap();
 
         let temp = tempfile::tempfile().unwrap();
 
         let file = tokio::fs::File::from_std(temp.try_clone().unwrap());
-        let mut writer =
-            AsyncArrowWriter::try_new(file, to_write.schema(), 0, None).unwrap();
+        let mut writer = AsyncArrowWriter::try_new(file, to_write.schema(), 0, None).unwrap();
         writer.write(&to_write).await.unwrap();
         writer.close().await.unwrap();
 
diff --git a/parquet/src/arrow/buffer/bit_util.rs b/parquet/src/arrow/buffer/bit_util.rs
index d01556d24e30..e7aea56a7f05 100644
--- a/parquet/src/arrow/buffer/bit_util.rs
+++ b/parquet/src/arrow/buffer/bit_util.rs
@@ -28,8 +28,7 @@ pub fn count_set_bits(bytes: &[u8], range: Range<usize>) -> usize {
 pub fn iter_set_bits_rev(bytes: &[u8]) -> impl Iterator<Item = usize> + '_ {
     let bit_length = bytes.len() * 8;
     let unaligned = UnalignedBitChunk::new(bytes, 0, bit_length);
-    let mut chunk_end_idx =
-        bit_length + unaligned.lead_padding() + unaligned.trailing_padding();
+    let mut chunk_end_idx = bit_length + unaligned.lead_padding() + unaligned.trailing_padding();
 
     let iter = unaligned
         .prefix()
@@ -84,7 +83,7 @@ mod tests {
             .iter()
             .enumerate()
             .rev()
-            .filter_map(|(x, y)| y.then(|| x))
+            .filter_map(|(x, y)| y.then_some(x))
             .collect();
         assert_eq!(actual, expected);
 
diff --git a/parquet/src/arrow/buffer/dictionary_buffer.rs b/parquet/src/arrow/buffer/dictionary_buffer.rs
index a0a47e3b98f7..4208318122af 100644
--- a/parquet/src/arrow/buffer/dictionary_buffer.rs
+++ b/parquet/src/arrow/buffer/dictionary_buffer.rs
@@ -16,9 +16,7 @@
 // under the License.
 
 use crate::arrow::buffer::offset_buffer::OffsetBuffer;
-use crate::arrow::record_reader::buffer::{
-    BufferQueue, ScalarBuffer, ScalarValue, ValuesBuffer,
-};
+use crate::arrow::record_reader::buffer::{BufferQueue, ScalarBuffer, ScalarValue, ValuesBuffer};
 use crate::column::reader::decoder::ValuesBufferSlice;
 use crate::errors::{ParquetError, Result};
 use arrow_array::{make_array, Array, ArrayRef, OffsetSizeTrait};
@@ -121,11 +119,7 @@ impl<K: ScalarValue + ArrowNativeType + Ord, V: ScalarValue + OffsetSizeTrait>
                     // likely sub-optimal, as we would prefer zero length null "slots", but
                     // spilling is already a degenerate case and so it is unclear if this is
                     // worth optimising for, e.g. by keeping a null mask around
-                    spilled.extend_from_dictionary(
-                        keys.as_slice(),
-                        dict_offsets,
-                        dict_values,
-                    )?;
+                    spilled.extend_from_dictionary(keys.as_slice(), dict_offsets, dict_values)?;
                 }
 
                 *self = Self::Values { values: spilled };
@@ -188,11 +182,9 @@ impl<K: ScalarValue + ArrowNativeType + Ord, V: ScalarValue + OffsetSizeTrait>
                 };
 
                 // This will compute a new dictionary
-                let array = arrow_cast::cast(
-                    &values.into_array(null_buffer, value_type),
-                    data_type,
-                )
-                .expect("cast should be infallible");
+                let array =
+                    arrow_cast::cast(&values.into_array(null_buffer, value_type), data_type)
+                        .expect("cast should be infallible");
 
                 Ok(array)
             }
@@ -206,9 +198,7 @@ impl<K: ScalarValue, V: ScalarValue> ValuesBufferSlice for DictionaryBuffer<K, V
     }
 }
 
-impl<K: ScalarValue, V: ScalarValue + OffsetSizeTrait> ValuesBuffer
-    for DictionaryBuffer<K, V>
-{
+impl<K: ScalarValue, V: ScalarValue + OffsetSizeTrait> ValuesBuffer for DictionaryBuffer<K, V> {
     fn pad_nulls(
         &mut self,
         read_offset: usize,
@@ -228,9 +218,7 @@ impl<K: ScalarValue, V: ScalarValue + OffsetSizeTrait> ValuesBuffer
     }
 }
 
-impl<K: ScalarValue, V: ScalarValue + OffsetSizeTrait> BufferQueue
-    for DictionaryBuffer<K, V>
-{
+impl<K: ScalarValue, V: ScalarValue + OffsetSizeTrait> BufferQueue for DictionaryBuffer<K, V> {
     type Output = Self;
     type Slice = Self;
 
@@ -269,8 +257,7 @@ mod tests {
         let dict_type =
             ArrowType::Dictionary(Box::new(ArrowType::Int32), Box::new(ArrowType::Utf8));
 
-        let d1: ArrayRef =
-            Arc::new(StringArray::from(vec!["hello", "world", "", "a", "b"]));
+        let d1: ArrayRef = Arc::new(StringArray::from(vec!["hello", "world", "", "a", "b"]));
 
         let mut buffer = DictionaryBuffer::<i32, i32>::default();
 
diff --git a/parquet/src/arrow/buffer/offset_buffer.rs b/parquet/src/arrow/buffer/offset_buffer.rs
index 07d78e8a3282..3f8f85494f02 100644
--- a/parquet/src/arrow/buffer/offset_buffer.rs
+++ b/parquet/src/arrow/buffer/offset_buffer.rs
@@ -16,9 +16,7 @@
 // under the License.
 
 use crate::arrow::buffer::bit_util::iter_set_bits_rev;
-use crate::arrow::record_reader::buffer::{
-    BufferQueue, ScalarBuffer, ScalarValue, ValuesBuffer,
-};
+use crate::arrow::record_reader::buffer::{BufferQueue, ScalarBuffer, ScalarValue, ValuesBuffer};
 use crate::column::reader::decoder::ValuesBufferSlice;
 use crate::errors::{ParquetError, Result};
 use arrow_array::{make_array, ArrayRef, OffsetSizeTrait};
@@ -127,11 +125,7 @@ impl<I: OffsetSizeTrait + ScalarValue> OffsetBuffer<I> {
     }
 
     /// Converts this into an [`ArrayRef`] with the provided `data_type` and `null_buffer`
-    pub fn into_array(
-        self,
-        null_buffer: Option<Buffer>,
-        data_type: ArrowType,
-    ) -> ArrayRef {
+    pub fn into_array(self, null_buffer: Option<Buffer>, data_type: ArrowType) -> ArrayRef {
         let array_data_builder = ArrayDataBuilder::new(data_type)
             .len(self.len())
             .add_buffer(self.offsets.into())
diff --git a/parquet/src/arrow/decoder/delta_byte_array.rs b/parquet/src/arrow/decoder/delta_byte_array.rs
index dd4a8fa87d27..7686a4292c43 100644
--- a/parquet/src/arrow/decoder/delta_byte_array.rs
+++ b/parquet/src/arrow/decoder/delta_byte_array.rs
@@ -15,16 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use bytes::Bytes;
+
 use crate::data_type::Int32Type;
 use crate::encodings::decoding::{Decoder, DeltaBitPackDecoder};
 use crate::errors::{ParquetError, Result};
-use crate::util::memory::ByteBufferPtr;
 
 /// Decoder for `Encoding::DELTA_BYTE_ARRAY`
 pub struct DeltaByteArrayDecoder {
     prefix_lengths: Vec<i32>,
     suffix_lengths: Vec<i32>,
-    data: ByteBufferPtr,
+    data: Bytes,
     length_offset: usize,
     data_offset: usize,
     last_value: Vec<u8>,
@@ -32,16 +33,16 @@ pub struct DeltaByteArrayDecoder {
 
 impl DeltaByteArrayDecoder {
     /// Create a new [`DeltaByteArrayDecoder`] with the provided data page
-    pub fn new(data: ByteBufferPtr) -> Result<Self> {
+    pub fn new(data: Bytes) -> Result<Self> {
         let mut prefix = DeltaBitPackDecoder::<Int32Type>::new();
-        prefix.set_data(data.all(), 0)?;
+        prefix.set_data(data.clone(), 0)?;
 
         let num_prefix = prefix.values_left();
         let mut prefix_lengths = vec![0; num_prefix];
         assert_eq!(prefix.get(&mut prefix_lengths)?, num_prefix);
 
         let mut suffix = DeltaBitPackDecoder::<Int32Type>::new();
-        suffix.set_data(data.start_from(prefix.get_offset()), 0)?;
+        suffix.set_data(data.slice(prefix.get_offset()..), 0)?;
 
         let num_suffix = suffix.values_left();
         let mut suffix_lengths = vec![0; num_suffix];
@@ -96,9 +97,8 @@ impl DeltaByteArrayDecoder {
             }
 
             self.last_value.truncate(prefix_length);
-            self.last_value.extend_from_slice(
-                &data[self.data_offset..self.data_offset + suffix_length],
-            );
+            self.last_value
+                .extend_from_slice(&data[self.data_offset..self.data_offset + suffix_length]);
             f(&self.last_value)?;
 
             self.data_offset += suffix_length;
@@ -128,9 +128,8 @@ impl DeltaByteArrayDecoder {
             }
 
             self.last_value.truncate(prefix_length);
-            self.last_value.extend_from_slice(
-                &data[self.data_offset..self.data_offset + suffix_length],
-            );
+            self.last_value
+                .extend_from_slice(&data[self.data_offset..self.data_offset + suffix_length]);
             self.data_offset += suffix_length;
         }
         self.length_offset += to_skip;
diff --git a/parquet/src/arrow/decoder/dictionary_index.rs b/parquet/src/arrow/decoder/dictionary_index.rs
index 3d258309dd3b..38f2b058360c 100644
--- a/parquet/src/arrow/decoder/dictionary_index.rs
+++ b/parquet/src/arrow/decoder/dictionary_index.rs
@@ -15,9 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use bytes::Bytes;
+
 use crate::encodings::rle::RleDecoder;
 use crate::errors::Result;
-use crate::util::memory::ByteBufferPtr;
 
 /// Decoder for `Encoding::RLE_DICTIONARY` indices
 pub struct DictIndexDecoder {
@@ -41,14 +42,10 @@ pub struct DictIndexDecoder {
 impl DictIndexDecoder {
     /// Create a new [`DictIndexDecoder`] with the provided data page, the number of levels
     /// associated with this data page, and the number of non-null values (if known)
-    pub fn new(
-        data: ByteBufferPtr,
-        num_levels: usize,
-        num_values: Option<usize>,
-    ) -> Self {
+    pub fn new(data: Bytes, num_levels: usize, num_values: Option<usize>) -> Self {
         let bit_width = data[0];
         let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(data.start_from(1));
+        decoder.set_data(data.slice(1..));
 
         Self {
             decoder,
@@ -63,11 +60,7 @@ impl DictIndexDecoder {
     /// and calling `f` with each decoded dictionary index
     ///
     /// Will short-circuit and return on error
-    pub fn read<F: FnMut(&[i32]) -> Result<()>>(
-        &mut self,
-        len: usize,
-        mut f: F,
-    ) -> Result<usize> {
+    pub fn read<F: FnMut(&[i32]) -> Result<()>>(&mut self, len: usize, mut f: F) -> Result<usize> {
         let mut values_read = 0;
 
         while values_read != len && self.max_remaining_values != 0 {
@@ -112,8 +105,7 @@ impl DictIndexDecoder {
                 values_skip += skip;
             } else {
                 // We still have indices buffered, so skip within the buffer
-                let skip =
-                    (to_skip - values_skip).min(self.index_buf_len - self.index_offset);
+                let skip = (to_skip - values_skip).min(self.index_buf_len - self.index_offset);
 
                 self.index_offset += skip;
                 self.max_remaining_values -= skip;
diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index 0174db6b517f..950226aef721 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Provides API for reading/writing Arrow
+//! High-level API for reading/writing Arrow
 //! [RecordBatch](arrow_array::RecordBatch)es and
 //! [Array](arrow_array::Array)s to/from Parquet Files.
 //!
@@ -175,10 +175,7 @@ impl ProjectionMask {
     /// Note: repeated or out of order indices will not impact the final mask
     ///
     /// i.e. `[0, 1, 2]` will construct the same mask as `[1, 0, 0, 2]`
-    pub fn leaves(
-        schema: &SchemaDescriptor,
-        indices: impl IntoIterator<Item = usize>,
-    ) -> Self {
+    pub fn leaves(schema: &SchemaDescriptor, indices: impl IntoIterator<Item = usize>) -> Self {
         let mut mask = vec![false; schema.num_columns()];
         for leaf_idx in indices {
             mask[leaf_idx] = true;
@@ -191,10 +188,7 @@ impl ProjectionMask {
     /// Note: repeated or out of order indices will not impact the final mask
     ///
     /// i.e. `[0, 1, 2]` will construct the same mask as `[1, 0, 0, 2]`
-    pub fn roots(
-        schema: &SchemaDescriptor,
-        indices: impl IntoIterator<Item = usize>,
-    ) -> Self {
+    pub fn roots(schema: &SchemaDescriptor, indices: impl IntoIterator<Item = usize>) -> Self {
         let num_root_columns = schema.root_schema().get_fields().len();
         let mut root_mask = vec![false; num_root_columns];
         for root_idx in indices {
diff --git a/parquet/src/arrow/record_reader/buffer.rs b/parquet/src/arrow/record_reader/buffer.rs
index 4a0fc2a2f2eb..35a322e6c723 100644
--- a/parquet/src/arrow/record_reader/buffer.rs
+++ b/parquet/src/arrow/record_reader/buffer.rs
@@ -136,8 +136,7 @@ impl<T: ScalarValue> ScalarBuffer<T> {
 
     #[inline]
     pub fn as_slice_mut(&mut self) -> &mut [T] {
-        let (prefix, buf, suffix) =
-            unsafe { self.buffer.as_slice_mut().align_to_mut::<T>() };
+        let (prefix, buf, suffix) = unsafe { self.buffer.as_slice_mut().align_to_mut::<T>() };
         assert!(prefix.is_empty() && suffix.is_empty());
         buf
     }
@@ -225,9 +224,7 @@ impl<T: ScalarValue> ValuesBuffer for ScalarBuffer<T> {
         assert!(slice.len() >= read_offset + levels_read);
 
         let values_range = read_offset..read_offset + values_read;
-        for (value_pos, level_pos) in
-            values_range.rev().zip(iter_set_bits_rev(valid_mask))
-        {
+        for (value_pos, level_pos) in values_range.rev().zip(iter_set_bits_rev(valid_mask)) {
             debug_assert!(level_pos >= value_pos);
             if level_pos <= value_pos {
                 break;
diff --git a/parquet/src/arrow/record_reader/definition_levels.rs b/parquet/src/arrow/record_reader/definition_levels.rs
index 5be0ac84dea2..9009c596c4bf 100644
--- a/parquet/src/arrow/record_reader/definition_levels.rs
+++ b/parquet/src/arrow/record_reader/definition_levels.rs
@@ -20,16 +20,15 @@ use std::ops::Range;
 use arrow_array::builder::BooleanBufferBuilder;
 use arrow_buffer::bit_chunk_iterator::UnalignedBitChunk;
 use arrow_buffer::Buffer;
+use bytes::Bytes;
 
 use crate::arrow::buffer::bit_util::count_set_bits;
 use crate::basic::Encoding;
 use crate::column::reader::decoder::{
-    ColumnLevelDecoder, DefinitionLevelDecoder, DefinitionLevelDecoderImpl,
-    LevelsBufferSlice,
+    ColumnLevelDecoder, DefinitionLevelDecoder, DefinitionLevelDecoderImpl, LevelsBufferSlice,
 };
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
-use crate::util::memory::ByteBufferPtr;
 
 use super::buffer::ScalarBuffer;
 
@@ -153,7 +152,7 @@ impl DefinitionLevelBufferDecoder {
 impl ColumnLevelDecoder for DefinitionLevelBufferDecoder {
     type Slice = DefinitionLevelBuffer;
 
-    fn set_data(&mut self, encoding: Encoding, data: ByteBufferPtr) {
+    fn set_data(&mut self, encoding: Encoding, data: Bytes) {
         match &mut self.decoder {
             MaybePacked::Packed(d) => d.set_data(encoding, data),
             MaybePacked::Fallback(d) => d.set_data(encoding, data),
@@ -162,11 +161,7 @@ impl ColumnLevelDecoder for DefinitionLevelBufferDecoder {
 }
 
 impl DefinitionLevelDecoder for DefinitionLevelBufferDecoder {
-    fn read_def_levels(
-        &mut self,
-        writer: &mut Self::Slice,
-        range: Range<usize>,
-    ) -> Result<usize> {
+    fn read_def_levels(&mut self, writer: &mut Self::Slice, range: Range<usize>) -> Result<usize> {
         match (&mut writer.inner, &mut self.decoder) {
             (
                 BufferInner::Full {
@@ -201,15 +196,9 @@ impl DefinitionLevelDecoder for DefinitionLevelBufferDecoder {
         }
     }
 
-    fn skip_def_levels(
-        &mut self,
-        num_levels: usize,
-        max_def_level: i16,
-    ) -> Result<(usize, usize)> {
+    fn skip_def_levels(&mut self, num_levels: usize, max_def_level: i16) -> Result<(usize, usize)> {
         match &mut self.decoder {
-            MaybePacked::Fallback(decoder) => {
-                decoder.skip_def_levels(num_levels, max_def_level)
-            }
+            MaybePacked::Fallback(decoder) => decoder.skip_def_levels(num_levels, max_def_level),
             MaybePacked::Packed(decoder) => decoder.skip(num_levels),
         }
     }
@@ -230,7 +219,7 @@ impl DefinitionLevelDecoder for DefinitionLevelBufferDecoder {
 /// [RLE]: https://github.com/apache/parquet-format/blob/master/Encodings.md#run-length-encoding--bit-packing-hybrid-rle--3
 /// [BIT_PACKED]: https://github.com/apache/parquet-format/blob/master/Encodings.md#bit-packed-deprecated-bit_packed--4
 struct PackedDecoder {
-    data: ByteBufferPtr,
+    data: Bytes,
     data_offset: usize,
     rle_left: usize,
     rle_value: bool,
@@ -249,8 +238,7 @@ impl PackedDecoder {
             self.rle_left = (indicator_value >> 1) as usize;
             let byte = *self.data.as_ref().get(self.data_offset).ok_or_else(|| {
                 ParquetError::EOF(
-                    "unexpected end of file whilst decoding definition levels rle value"
-                        .into(),
+                    "unexpected end of file whilst decoding definition levels rle value".into(),
                 )
             })?;
 
@@ -290,7 +278,7 @@ impl PackedDecoder {
 impl PackedDecoder {
     fn new() -> Self {
         Self {
-            data: ByteBufferPtr::new(vec![]),
+            data: Bytes::from(vec![]),
             data_offset: 0,
             rle_left: 0,
             rle_value: false,
@@ -299,7 +287,7 @@ impl PackedDecoder {
         }
     }
 
-    fn set_data(&mut self, encoding: Encoding, data: ByteBufferPtr) {
+    fn set_data(&mut self, encoding: Encoding, data: Bytes) {
         self.rle_left = 0;
         self.rle_value = false;
         self.packed_offset = 0;
@@ -354,11 +342,10 @@ impl PackedDecoder {
                     skipped_value += to_skip;
                 }
             } else if self.packed_count != self.packed_offset {
-                let to_skip = (self.packed_count - self.packed_offset)
-                    .min(level_num - skipped_level);
+                let to_skip =
+                    (self.packed_count - self.packed_offset).min(level_num - skipped_level);
                 let offset = self.data_offset * 8 + self.packed_offset;
-                let bit_chunk =
-                    UnalignedBitChunk::new(self.data.as_ref(), offset, to_skip);
+                let bit_chunk = UnalignedBitChunk::new(self.data.as_ref(), offset, to_skip);
                 skipped_value += bit_chunk.count_ones();
                 self.packed_offset += to_skip;
                 skipped_level += to_skip;
@@ -398,7 +385,7 @@ mod tests {
 
         let encoded = encoder.consume();
         let mut decoder = PackedDecoder::new();
-        decoder.set_data(Encoding::RLE, ByteBufferPtr::new(encoded));
+        decoder.set_data(Encoding::RLE, encoded.into());
 
         // Decode data in random length intervals
         let mut decoded = BooleanBufferBuilder::new(len);
@@ -437,7 +424,7 @@ mod tests {
 
         let encoded = encoder.consume();
         let mut decoder = PackedDecoder::new();
-        decoder.set_data(Encoding::RLE, ByteBufferPtr::new(encoded));
+        decoder.set_data(Encoding::RLE, encoded.into());
 
         let mut skip_value = 0;
         let mut read_value = 0;
@@ -452,14 +439,12 @@ mod tests {
             }
             let to_read_or_skip_level = rng.gen_range(1..=remaining_levels);
             if rng.gen_bool(0.5) {
-                let (skip_val_num, skip_level_num) =
-                    decoder.skip(to_read_or_skip_level).unwrap();
+                let (skip_val_num, skip_level_num) = decoder.skip(to_read_or_skip_level).unwrap();
                 skip_value += skip_val_num;
                 skip_level += skip_level_num
             } else {
                 let mut decoded = BooleanBufferBuilder::new(to_read_or_skip_level);
-                let read_level_num =
-                    decoder.read(&mut decoded, to_read_or_skip_level).unwrap();
+                let read_level_num = decoder.read(&mut decoded, to_read_or_skip_level).unwrap();
                 read_level += read_level_num;
                 for i in 0..read_level_num {
                     assert!(!decoded.is_empty());
diff --git a/parquet/src/arrow/record_reader/mod.rs b/parquet/src/arrow/record_reader/mod.rs
index 35933e6e15d9..ea982341994e 100644
--- a/parquet/src/arrow/record_reader/mod.rs
+++ b/parquet/src/arrow/record_reader/mod.rs
@@ -100,10 +100,7 @@ where
         let values_decoder = CV::new(descr);
 
         let def_level_decoder = (descr.max_def_level() != 0).then(|| {
-            DefinitionLevelBufferDecoder::new(
-                descr.max_def_level(),
-                packed_null_mask(descr),
-            )
+            DefinitionLevelBufferDecoder::new(descr.max_def_level(), packed_null_mask(descr))
         });
 
         let rep_level_decoder = (descr.max_rep_level() != 0)
@@ -134,9 +131,7 @@ where
         loop {
             let records_to_read = num_records - records_read;
             records_read += self.read_one_batch(records_to_read)?;
-            if records_read == num_records
-                || !self.column_reader.as_mut().unwrap().has_next()?
-            {
+            if records_read == num_records || !self.column_reader.as_mut().unwrap().has_next()? {
                 break;
             }
         }
@@ -226,9 +221,7 @@ where
 
         if values_read < levels_read {
             let def_levels = self.def_levels.as_ref().ok_or_else(|| {
-                general_err!(
-                    "Definition levels should exist when data is less than levels!"
-                )
+                general_err!("Definition levels should exist when data is less than levels!")
             })?;
 
             self.values.pad_nulls(
@@ -256,9 +249,7 @@ where
 /// only possible if the max definition level is 1, and corresponds to nulls at the
 /// leaf level, as opposed to a nullable parent nested type
 fn packed_null_mask(descr: &ColumnDescPtr) -> bool {
-    descr.max_def_level() == 1
-        && descr.max_rep_level() == 0
-        && descr.self_type().is_optional()
+    descr.max_def_level() == 1 && descr.max_rep_level() == 0 && descr.self_type().is_optional()
 }
 
 #[cfg(test)]
diff --git a/parquet/src/arrow/schema/complex.rs b/parquet/src/arrow/schema/complex.rs
index 0d19875d97de..9f85b2c284c6 100644
--- a/parquet/src/arrow/schema/complex.rs
+++ b/parquet/src/arrow/schema/complex.rs
@@ -19,7 +19,7 @@ use std::collections::HashMap;
 use std::sync::Arc;
 
 use crate::arrow::schema::primitive::convert_primitive;
-use crate::arrow::ProjectionMask;
+use crate::arrow::{ProjectionMask, PARQUET_FIELD_ID_META_KEY};
 use crate::basic::{ConvertedType, Repetition};
 use crate::errors::ParquetError;
 use crate::errors::Result;
@@ -550,7 +550,16 @@ fn convert_field(
 
             field.with_metadata(hint.metadata().clone())
         }
-        None => Field::new(name, data_type, nullable),
+        None => {
+            let mut ret = Field::new(name, data_type, nullable);
+            let basic_info = parquet_type.get_basic_info();
+            if basic_info.has_id() {
+                let mut meta = HashMap::with_capacity(1);
+                meta.insert(PARQUET_FIELD_ID_META_KEY.to_string(), basic_info.id().to_string());
+                ret.set_metadata(meta);
+            }
+            ret
+        },
     }
 }
 
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index 3f1994d10829..4c350c4b1d8c 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -45,6 +45,8 @@ mod primitive;
 use crate::arrow::ProjectionMask;
 pub(crate) use complex::{ParquetField, ParquetFieldType};
 
+use super::PARQUET_FIELD_ID_META_KEY;
+
 /// Convert Parquet schema to Arrow schema including optional metadata
 ///
 /// Attempts to decode any existing Arrow schema metadata, falling back
@@ -268,12 +270,20 @@ fn parse_key_value_metadata(
 /// Convert parquet column schema to arrow field.
 pub fn parquet_to_arrow_field(parquet_column: &ColumnDescriptor) -> Result<Field> {
     let field = complex::convert_type(&parquet_column.self_type_ptr())?;
-
-    Ok(Field::new(
+    let mut ret = Field::new(
         parquet_column.name(),
         field.arrow_type,
         field.nullable,
-    ))
+    );
+
+    let basic_info = parquet_column.self_type().get_basic_info();
+    if basic_info.has_id() {
+        let mut meta = HashMap::with_capacity(1);
+        meta.insert(PARQUET_FIELD_ID_META_KEY.to_string(), basic_info.id().to_string());
+        ret.set_metadata(meta);
+    }
+
+    Ok(ret)
 }
 
 pub fn decimal_length_from_precision(precision: u8) -> usize {
@@ -363,7 +373,12 @@ fn arrow_to_parquet_type(field: &Field) -> Result<Type> {
             .with_repetition(repetition)
             .with_id(id)
             .build(),
-        DataType::Float16 => Err(arrow_err!("Float16 arrays not supported")),
+        DataType::Float16 => Type::primitive_type_builder(name, PhysicalType::FIXED_LEN_BYTE_ARRAY)
+            .with_repetition(repetition)
+            .with_id(id)
+            .with_logical_type(Some(LogicalType::Float16))
+            .with_length(2)
+            .build(),
         DataType::Float32 => Type::primitive_type_builder(name, PhysicalType::FLOAT)
             .with_repetition(repetition)
             .with_id(id)
@@ -578,6 +593,7 @@ mod tests {
 
     use crate::arrow::PARQUET_FIELD_ID_META_KEY;
     use crate::file::metadata::KeyValue;
+    use crate::file::reader::FileReader;
     use crate::{
         arrow::{arrow_reader::ParquetRecordBatchReaderBuilder, ArrowWriter},
         schema::{parser::parse_message_type, types::SchemaDescriptor},
@@ -593,9 +609,10 @@ mod tests {
             REQUIRED INT32   uint8 (INTEGER(8,false));
             REQUIRED INT32   uint16 (INTEGER(16,false));
             REQUIRED INT32   int32;
-            REQUIRED INT64   int64 ;
+            REQUIRED INT64   int64;
             OPTIONAL DOUBLE  double;
             OPTIONAL FLOAT   float;
+            OPTIONAL FIXED_LEN_BYTE_ARRAY (2) float16 (FLOAT16);
             OPTIONAL BINARY  string (UTF8);
             OPTIONAL BINARY  string_2 (STRING);
             OPTIONAL BINARY  json (JSON);
@@ -617,6 +634,7 @@ mod tests {
             Field::new("int64", DataType::Int64, false),
             Field::new("double", DataType::Float64, true),
             Field::new("float", DataType::Float32, true),
+            Field::new("float16", DataType::Float16, true),
             Field::new("string", DataType::Utf8, true),
             Field::new("string_2", DataType::Utf8, true),
             Field::new("json", DataType::Utf8, true),
@@ -1292,6 +1310,7 @@ mod tests {
             REQUIRED INT64   int64;
             OPTIONAL DOUBLE  double;
             OPTIONAL FLOAT   float;
+            OPTIONAL FIXED_LEN_BYTE_ARRAY (2) float16 (FLOAT16);
             OPTIONAL BINARY  string (UTF8);
             REPEATED BOOLEAN bools;
             OPTIONAL INT32   date       (DATE);
@@ -1328,6 +1347,7 @@ mod tests {
             Field::new("int64", DataType::Int64, false),
             Field::new("double", DataType::Float64, true),
             Field::new("float", DataType::Float32, true),
+            Field::new("float16", DataType::Float16, true),
             Field::new("string", DataType::Utf8, true),
             Field::new_list(
                 "bools",
@@ -1387,6 +1407,7 @@ mod tests {
             REQUIRED INT64   int64;
             OPTIONAL DOUBLE  double;
             OPTIONAL FLOAT   float;
+            OPTIONAL FIXED_LEN_BYTE_ARRAY (2) float16 (FLOAT16);
             OPTIONAL BINARY  string (STRING);
             OPTIONAL GROUP   bools (LIST) {
                 REPEATED GROUP list {
@@ -1437,6 +1458,7 @@ mod tests {
             Field::new("int64", DataType::Int64, false),
             Field::new("double", DataType::Float64, true),
             Field::new("float", DataType::Float32, true),
+            Field::new("float16", DataType::Float16, true),
             Field::new("string", DataType::Utf8, true),
             Field::new_list(
                 "bools",
@@ -1650,6 +1672,8 @@ mod tests {
                                 vec![
                                     Field::new("a", DataType::Int16, true),
                                     Field::new("b", DataType::Float64, false),
+                                    Field::new("c", DataType::Float32, false),
+                                    Field::new("d", DataType::Float16, false),
                                 ]
                                 .into(),
                             ),
@@ -1811,6 +1835,52 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn test_read_parquet_field_ids_raw() -> Result<()> {
+        let meta = |a: &[(&str, &str)]| -> HashMap<String, String> {
+            a.iter()
+                .map(|(a, b)| (a.to_string(), b.to_string()))
+                .collect()
+        };
+        let schema = Schema::new_with_metadata(
+            vec![
+                Field::new("c1", DataType::Utf8, true).with_metadata(meta(&[
+                    (PARQUET_FIELD_ID_META_KEY, "1"),
+                ])),
+                Field::new("c2", DataType::Utf8, true).with_metadata(meta(&[
+                    (PARQUET_FIELD_ID_META_KEY, "2"),
+                ])),
+            ],
+            HashMap::new(),
+        );
+
+        let writer = ArrowWriter::try_new(
+            vec![],
+            Arc::new(schema.clone()),
+            None,
+        )?;
+        let parquet_bytes = writer.into_inner()?;
+
+        let reader = crate::file::reader::SerializedFileReader::new(
+            bytes::Bytes::from(parquet_bytes),
+        )?;
+        let schema_descriptor = reader.metadata().file_metadata().schema_descr_ptr();
+
+        // don't pass metadata so field ids are read from Parquet and not from serialized Arrow schema
+        let arrow_schema = crate::arrow::parquet_to_arrow_schema(
+            &schema_descriptor,
+            None,
+        )?;
+
+        let parq_schema_descr = crate::arrow::arrow_to_parquet_schema(&arrow_schema)?;
+        let parq_fields = parq_schema_descr.root_schema().get_fields();
+        assert_eq!(parq_fields.len(), 2);
+        assert_eq!(parq_fields[0].get_basic_info().id(), 1);
+        assert_eq!(parq_fields[1].get_basic_info().id(), 2);
+
+        Ok(())
+    }
+
     #[test]
     fn test_arrow_schema_roundtrip_lists() -> Result<()> {
         let metadata: HashMap<String, String> =
diff --git a/parquet/src/arrow/schema/primitive.rs b/parquet/src/arrow/schema/primitive.rs
index 83d84b77ec06..fdc744831a25 100644
--- a/parquet/src/arrow/schema/primitive.rs
+++ b/parquet/src/arrow/schema/primitive.rs
@@ -193,11 +193,11 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result<DataTy
         (None, ConvertedType::NONE) => Ok(DataType::Int64),
         (
             Some(LogicalType::Integer {
-                bit_width,
+                bit_width: 64,
                 is_signed,
             }),
             _,
-        ) if bit_width == 64 => match is_signed {
+        ) => match is_signed {
             true => Ok(DataType::Int64),
             false => Ok(DataType::UInt64),
         },
@@ -304,6 +304,16 @@ fn from_fixed_len_byte_array(
             // would be incorrect if all 12 bytes of the interval are populated
             Ok(DataType::Interval(IntervalUnit::DayTime))
         }
+        (Some(LogicalType::Float16), _) => {
+            if type_length == 2 {
+                Ok(DataType::Float16)
+            } else {
+                Err(ParquetError::General(
+                    "FLOAT16 logical type must be Fixed Length Byte Array with length 2"
+                        .to_string(),
+                ))
+            }
+        }
         _ => Ok(DataType::FixedSizeBinary(type_length)),
     }
 }
diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs
index cc8d033f42a4..2327e1d84b41 100644
--- a/parquet/src/basic.rs
+++ b/parquet/src/basic.rs
@@ -18,6 +18,7 @@
 //! Contains Rust mappings for Thrift definition.
 //! Refer to [`parquet.thrift`](https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift) file to see raw definitions.
 
+use std::str::FromStr;
 use std::{fmt, str};
 
 pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel};
@@ -27,8 +28,8 @@ use crate::errors::{ParquetError, Result};
 
 // Re-export crate::format types used in this module
 pub use crate::format::{
-    BsonType, DateType, DecimalType, EnumType, IntType, JsonType, ListType, MapType,
-    NullType, StringType, TimeType, TimeUnit, TimestampType, UUIDType,
+    BsonType, DateType, DecimalType, EnumType, IntType, JsonType, ListType, MapType, NullType,
+    StringType, TimeType, TimeUnit, TimestampType, UUIDType,
 };
 
 // ----------------------------------------------------------------------
@@ -193,6 +194,7 @@ pub enum LogicalType {
     Json,
     Bson,
     Uuid,
+    Float16,
 }
 
 // ----------------------------------------------------------------------
@@ -214,8 +216,21 @@ pub enum Repetition {
 // Mirrors `parquet::Encoding`
 
 /// Encodings supported by Parquet.
+///
 /// Not all encodings are valid for all types. These enums are also used to specify the
 /// encoding of definition and repetition levels.
+///
+/// By default this crate uses [Encoding::PLAIN], [Encoding::RLE], and [Encoding::RLE_DICTIONARY].
+/// These provide very good encode and decode performance, whilst yielding reasonable storage
+/// efficiency and being supported by all major parquet readers.
+///
+/// The delta encodings are also supported and will be used if a newer [WriterVersion] is
+/// configured, however, it should be noted that these sacrifice encode and decode performance for
+/// improved storage efficiency. This performance regression is particularly pronounced in the case
+/// of record skipping as occurs during predicate push-down. It is recommended users assess the
+/// performance impact when evaluating these encodings.
+///
+/// [WriterVersion]: crate::file::properties::WriterVersion
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)]
 #[allow(non_camel_case_types)]
 pub enum Encoding {
@@ -278,10 +293,45 @@ pub enum Encoding {
     BYTE_STREAM_SPLIT,
 }
 
+impl FromStr for Encoding {
+    type Err = ParquetError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "PLAIN" | "plain" => Ok(Encoding::PLAIN),
+            "PLAIN_DICTIONARY" | "plain_dictionary" => Ok(Encoding::PLAIN_DICTIONARY),
+            "RLE" | "rle" => Ok(Encoding::RLE),
+            "BIT_PACKED" | "bit_packed" => Ok(Encoding::BIT_PACKED),
+            "DELTA_BINARY_PACKED" | "delta_binary_packed" => Ok(Encoding::DELTA_BINARY_PACKED),
+            "DELTA_LENGTH_BYTE_ARRAY" | "delta_length_byte_array" => {
+                Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY)
+            }
+            "DELTA_BYTE_ARRAY" | "delta_byte_array" => Ok(Encoding::DELTA_BYTE_ARRAY),
+            "RLE_DICTIONARY" | "rle_dictionary" => Ok(Encoding::RLE_DICTIONARY),
+            "BYTE_STREAM_SPLIT" | "byte_stream_split" => Ok(Encoding::BYTE_STREAM_SPLIT),
+            _ => Err(general_err!("unknown encoding: {}", s)),
+        }
+    }
+}
+
 // ----------------------------------------------------------------------
 // Mirrors `parquet::CompressionCodec`
 
-/// Supported compression algorithms.
+/// Supported block compression algorithms.
+///
+/// Block compression can yield non-trivial improvements to storage efficiency at the expense
+/// of potentially significantly worse encode and decode performance. Many applications,
+/// especially those making use of high-throughput and low-cost commodity object storage,
+/// may find storage efficiency less important than decode throughput, and therefore may
+/// wish to not make use of block compression.
+///
+/// The writers in this crate default to no block compression for this reason.
+///
+/// Applications that do still wish to use block compression, will find [`Compression::ZSTD`]
+/// to provide a good balance of compression, performance, and ecosystem support. Alternatively,
+/// [`Compression::LZ4_RAW`] provides much faster decompression speeds, at the cost of typically
+/// worse compression ratios. However, it is not as widely supported by the ecosystem, with the
+/// Hadoop ecosystem historically favoring the non-standard and now deprecated [`Compression::LZ4`].
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 #[allow(non_camel_case_types)]
 pub enum Compression {
@@ -295,6 +345,84 @@ pub enum Compression {
     LZ4_RAW,
 }
 
+fn split_compression_string(str_setting: &str) -> Result<(&str, Option<u32>), ParquetError> {
+    let split_setting = str_setting.split_once('(');
+
+    match split_setting {
+        Some((codec, level_str)) => {
+            let level = &level_str[..level_str.len() - 1]
+                .parse::<u32>()
+                .map_err(|_| {
+                    ParquetError::General(format!("invalid compression level: {}", level_str))
+                })?;
+            Ok((codec, Some(*level)))
+        }
+        None => Ok((str_setting, None)),
+    }
+}
+
+fn check_level_is_none(level: &Option<u32>) -> Result<(), ParquetError> {
+    if level.is_some() {
+        return Err(ParquetError::General("level is not support".to_string()));
+    }
+
+    Ok(())
+}
+
+fn require_level(codec: &str, level: Option<u32>) -> Result<u32, ParquetError> {
+    level.ok_or(ParquetError::General(format!("{} require level", codec)))
+}
+
+impl FromStr for Compression {
+    type Err = ParquetError;
+
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        let (codec, level) = split_compression_string(s)?;
+
+        let c = match codec {
+            "UNCOMPRESSED" | "uncompressed" => {
+                check_level_is_none(&level)?;
+                Compression::UNCOMPRESSED
+            }
+            "SNAPPY" | "snappy" => {
+                check_level_is_none(&level)?;
+                Compression::SNAPPY
+            }
+            "GZIP" | "gzip" => {
+                let level = require_level(codec, level)?;
+                Compression::GZIP(GzipLevel::try_new(level)?)
+            }
+            "LZO" | "lzo" => {
+                check_level_is_none(&level)?;
+                Compression::LZO
+            }
+            "BROTLI" | "brotli" => {
+                let level = require_level(codec, level)?;
+                Compression::BROTLI(BrotliLevel::try_new(level)?)
+            }
+            "LZ4" | "lz4" => {
+                check_level_is_none(&level)?;
+                Compression::LZ4
+            }
+            "ZSTD" | "zstd" => {
+                let level = require_level(codec, level)?;
+                Compression::ZSTD(ZstdLevel::try_new(level as i32)?)
+            }
+            "LZ4_RAW" | "lz4_raw" => {
+                check_level_is_none(&level)?;
+                Compression::LZ4_RAW
+            }
+            _ => {
+                return Err(ParquetError::General(format!(
+                    "unsupport compression {codec}"
+                )));
+            }
+        };
+
+        Ok(c)
+    }
+}
+
 // ----------------------------------------------------------------------
 // Mirrors `parquet::PageType`
 
@@ -364,10 +492,9 @@ impl ColumnOrder {
         // TODO: Should this take converted and logical type, for compatibility?
         match logical_type {
             Some(logical) => match logical {
-                LogicalType::String
-                | LogicalType::Enum
-                | LogicalType::Json
-                | LogicalType::Bson => SortOrder::UNSIGNED,
+                LogicalType::String | LogicalType::Enum | LogicalType::Json | LogicalType::Bson => {
+                    SortOrder::UNSIGNED
+                }
                 LogicalType::Integer { is_signed, .. } => match is_signed {
                     true => SortOrder::SIGNED,
                     false => SortOrder::UNSIGNED,
@@ -379,16 +506,14 @@ impl ColumnOrder {
                 LogicalType::Timestamp { .. } => SortOrder::SIGNED,
                 LogicalType::Unknown => SortOrder::UNDEFINED,
                 LogicalType::Uuid => SortOrder::UNSIGNED,
+                LogicalType::Float16 => SortOrder::SIGNED,
             },
             // Fall back to converted type
             None => Self::get_converted_sort_order(converted_type, physical_type),
         }
     }
 
-    fn get_converted_sort_order(
-        converted_type: ConvertedType,
-        physical_type: Type,
-    ) -> SortOrder {
+    fn get_converted_sort_order(converted_type: ConvertedType, physical_type: Type) -> SortOrder {
         match converted_type {
             // Unsigned byte-wise comparison.
             ConvertedType::UTF8
@@ -558,12 +683,8 @@ impl TryFrom<Option<parquet::ConvertedType>> for ConvertedType {
                 parquet::ConvertedType::DATE => ConvertedType::DATE,
                 parquet::ConvertedType::TIME_MILLIS => ConvertedType::TIME_MILLIS,
                 parquet::ConvertedType::TIME_MICROS => ConvertedType::TIME_MICROS,
-                parquet::ConvertedType::TIMESTAMP_MILLIS => {
-                    ConvertedType::TIMESTAMP_MILLIS
-                }
-                parquet::ConvertedType::TIMESTAMP_MICROS => {
-                    ConvertedType::TIMESTAMP_MICROS
-                }
+                parquet::ConvertedType::TIMESTAMP_MILLIS => ConvertedType::TIMESTAMP_MILLIS,
+                parquet::ConvertedType::TIMESTAMP_MICROS => ConvertedType::TIMESTAMP_MICROS,
                 parquet::ConvertedType::UINT_8 => ConvertedType::UINT_8,
                 parquet::ConvertedType::UINT_16 => ConvertedType::UINT_16,
                 parquet::ConvertedType::UINT_32 => ConvertedType::UINT_32,
@@ -599,12 +720,8 @@ impl From<ConvertedType> for Option<parquet::ConvertedType> {
             ConvertedType::DATE => Some(parquet::ConvertedType::DATE),
             ConvertedType::TIME_MILLIS => Some(parquet::ConvertedType::TIME_MILLIS),
             ConvertedType::TIME_MICROS => Some(parquet::ConvertedType::TIME_MICROS),
-            ConvertedType::TIMESTAMP_MILLIS => {
-                Some(parquet::ConvertedType::TIMESTAMP_MILLIS)
-            }
-            ConvertedType::TIMESTAMP_MICROS => {
-                Some(parquet::ConvertedType::TIMESTAMP_MICROS)
-            }
+            ConvertedType::TIMESTAMP_MILLIS => Some(parquet::ConvertedType::TIMESTAMP_MILLIS),
+            ConvertedType::TIMESTAMP_MICROS => Some(parquet::ConvertedType::TIMESTAMP_MICROS),
             ConvertedType::UINT_8 => Some(parquet::ConvertedType::UINT_8),
             ConvertedType::UINT_16 => Some(parquet::ConvertedType::UINT_16),
             ConvertedType::UINT_32 => Some(parquet::ConvertedType::UINT_32),
@@ -651,6 +768,7 @@ impl From<parquet::LogicalType> for LogicalType {
             parquet::LogicalType::JSON(_) => LogicalType::Json,
             parquet::LogicalType::BSON(_) => LogicalType::Bson,
             parquet::LogicalType::UUID(_) => LogicalType::Uuid,
+            parquet::LogicalType::FLOAT16(_) => LogicalType::Float16,
         }
     }
 }
@@ -691,6 +809,7 @@ impl From<LogicalType> for parquet::LogicalType {
             LogicalType::Json => parquet::LogicalType::JSON(Default::default()),
             LogicalType::Bson => parquet::LogicalType::BSON(Default::default()),
             LogicalType::Uuid => parquet::LogicalType::UUID(Default::default()),
+            LogicalType::Float16 => parquet::LogicalType::FLOAT16(Default::default()),
         }
     }
 }
@@ -738,10 +857,11 @@ impl From<Option<LogicalType>> for ConvertedType {
                     (64, false) => ConvertedType::UINT_64,
                     t => panic!("Integer type {t:?} is not supported"),
                 },
-                LogicalType::Unknown => ConvertedType::NONE,
                 LogicalType::Json => ConvertedType::JSON,
                 LogicalType::Bson => ConvertedType::BSON,
-                LogicalType::Uuid => ConvertedType::NONE,
+                LogicalType::Uuid | LogicalType::Float16 | LogicalType::Unknown => {
+                    ConvertedType::NONE
+                }
             },
             None => ConvertedType::NONE,
         }
@@ -792,9 +912,7 @@ impl TryFrom<parquet::Encoding> for Encoding {
             parquet::Encoding::RLE => Encoding::RLE,
             parquet::Encoding::BIT_PACKED => Encoding::BIT_PACKED,
             parquet::Encoding::DELTA_BINARY_PACKED => Encoding::DELTA_BINARY_PACKED,
-            parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => {
-                Encoding::DELTA_LENGTH_BYTE_ARRAY
-            }
+            parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => Encoding::DELTA_LENGTH_BYTE_ARRAY,
             parquet::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY,
             parquet::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY,
             parquet::Encoding::BYTE_STREAM_SPLIT => Encoding::BYTE_STREAM_SPLIT,
@@ -811,9 +929,7 @@ impl From<Encoding> for parquet::Encoding {
             Encoding::RLE => parquet::Encoding::RLE,
             Encoding::BIT_PACKED => parquet::Encoding::BIT_PACKED,
             Encoding::DELTA_BINARY_PACKED => parquet::Encoding::DELTA_BINARY_PACKED,
-            Encoding::DELTA_LENGTH_BYTE_ARRAY => {
-                parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY
-            }
+            Encoding::DELTA_LENGTH_BYTE_ARRAY => parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY,
             Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DELTA_BYTE_ARRAY,
             Encoding::RLE_DICTIONARY => parquet::Encoding::RLE_DICTIONARY,
             Encoding::BYTE_STREAM_SPLIT => parquet::Encoding::BYTE_STREAM_SPLIT,
@@ -991,6 +1107,7 @@ impl str::FromStr for LogicalType {
             "INTERVAL" => Err(general_err!(
                 "Interval parquet logical type not yet supported"
             )),
+            "FLOAT16" => Ok(LogicalType::Float16),
             other => Err(general_err!("Invalid parquet logical type {}", other)),
         }
     }
@@ -1170,13 +1287,11 @@ mod tests {
             ConvertedType::TIME_MICROS
         );
         assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MILLIS))
-                .unwrap(),
+            ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MILLIS)).unwrap(),
             ConvertedType::TIMESTAMP_MILLIS
         );
         assert_eq!(
-            ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MICROS))
-                .unwrap(),
+            ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MICROS)).unwrap(),
             ConvertedType::TIMESTAMP_MICROS
         );
         assert_eq!(
@@ -1637,6 +1752,10 @@ mod tests {
             ConvertedType::from(Some(LogicalType::Enum)),
             ConvertedType::ENUM
         );
+        assert_eq!(
+            ConvertedType::from(Some(LogicalType::Float16)),
+            ConvertedType::NONE
+        );
         assert_eq!(
             ConvertedType::from(Some(LogicalType::Unknown)),
             ConvertedType::NONE
@@ -1931,11 +2050,7 @@ mod tests {
         fn check_sort_order(types: Vec<LogicalType>, expected_order: SortOrder) {
             for tpe in types {
                 assert_eq!(
-                    ColumnOrder::get_sort_order(
-                        Some(tpe),
-                        ConvertedType::NONE,
-                        Type::BYTE_ARRAY
-                    ),
+                    ColumnOrder::get_sort_order(Some(tpe), ConvertedType::NONE, Type::BYTE_ARRAY),
                     expected_order
                 );
             }
@@ -2014,6 +2129,7 @@ mod tests {
                 is_adjusted_to_u_t_c: true,
                 unit: TimeUnit::NANOS(Default::default()),
             },
+            LogicalType::Float16,
         ];
         check_sort_order(signed, SortOrder::SIGNED);
 
@@ -2130,4 +2246,81 @@ mod tests {
         );
         assert_eq!(ColumnOrder::UNDEFINED.sort_order(), SortOrder::SIGNED);
     }
+
+    #[test]
+    fn test_parse_encoding() {
+        let mut encoding: Encoding = "PLAIN".parse().unwrap();
+        assert_eq!(encoding, Encoding::PLAIN);
+        encoding = "PLAIN_DICTIONARY".parse().unwrap();
+        assert_eq!(encoding, Encoding::PLAIN_DICTIONARY);
+        encoding = "RLE".parse().unwrap();
+        assert_eq!(encoding, Encoding::RLE);
+        encoding = "BIT_PACKED".parse().unwrap();
+        assert_eq!(encoding, Encoding::BIT_PACKED);
+        encoding = "DELTA_BINARY_PACKED".parse().unwrap();
+        assert_eq!(encoding, Encoding::DELTA_BINARY_PACKED);
+        encoding = "DELTA_LENGTH_BYTE_ARRAY".parse().unwrap();
+        assert_eq!(encoding, Encoding::DELTA_LENGTH_BYTE_ARRAY);
+        encoding = "DELTA_BYTE_ARRAY".parse().unwrap();
+        assert_eq!(encoding, Encoding::DELTA_BYTE_ARRAY);
+        encoding = "RLE_DICTIONARY".parse().unwrap();
+        assert_eq!(encoding, Encoding::RLE_DICTIONARY);
+        encoding = "BYTE_STREAM_SPLIT".parse().unwrap();
+        assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
+
+        // test lowercase
+        encoding = "byte_stream_split".parse().unwrap();
+        assert_eq!(encoding, Encoding::BYTE_STREAM_SPLIT);
+
+        // test unknown string
+        match "plain_xxx".parse::<Encoding>() {
+            Ok(e) => {
+                panic!("Should not be able to parse {:?}", e);
+            }
+            Err(e) => {
+                assert_eq!(e.to_string(), "Parquet error: unknown encoding: plain_xxx");
+            }
+        }
+    }
+
+    #[test]
+    fn test_parse_compression() {
+        let mut compress: Compression = "snappy".parse().unwrap();
+        assert_eq!(compress, Compression::SNAPPY);
+        compress = "lzo".parse().unwrap();
+        assert_eq!(compress, Compression::LZO);
+        compress = "zstd(3)".parse().unwrap();
+        assert_eq!(compress, Compression::ZSTD(ZstdLevel::try_new(3).unwrap()));
+        compress = "LZ4_RAW".parse().unwrap();
+        assert_eq!(compress, Compression::LZ4_RAW);
+        compress = "uncompressed".parse().unwrap();
+        assert_eq!(compress, Compression::UNCOMPRESSED);
+        compress = "snappy".parse().unwrap();
+        assert_eq!(compress, Compression::SNAPPY);
+        compress = "gzip(9)".parse().unwrap();
+        assert_eq!(compress, Compression::GZIP(GzipLevel::try_new(9).unwrap()));
+        compress = "lzo".parse().unwrap();
+        assert_eq!(compress, Compression::LZO);
+        compress = "brotli(3)".parse().unwrap();
+        assert_eq!(
+            compress,
+            Compression::BROTLI(BrotliLevel::try_new(3).unwrap())
+        );
+        compress = "lz4".parse().unwrap();
+        assert_eq!(compress, Compression::LZ4);
+
+        // test unknown compression
+        let mut err = "plain_xxx".parse::<Encoding>().unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Parquet error: unknown encoding: plain_xxx"
+        );
+
+        // test invalid compress level
+        err = "gzip(-10)".parse::<Encoding>().unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Parquet error: unknown encoding: gzip(-10)"
+        );
+    }
 }
diff --git a/parquet/src/bin/parquet-fromcsv.rs b/parquet/src/bin/parquet-fromcsv.rs
index 1ff6fecf5a81..445409610a8f 100644
--- a/parquet/src/bin/parquet-fromcsv.rs
+++ b/parquet/src/bin/parquet-fromcsv.rs
@@ -296,12 +296,10 @@ fn configure_writer_properties(args: &Args) -> WriterProperties {
         properties_builder = properties_builder.set_writer_version(writer_version);
     }
     if let Some(max_row_group_size) = args.max_row_group_size {
-        properties_builder =
-            properties_builder.set_max_row_group_size(max_row_group_size);
+        properties_builder = properties_builder.set_max_row_group_size(max_row_group_size);
     }
     if let Some(enable_bloom_filter) = args.enable_bloom_filter {
-        properties_builder =
-            properties_builder.set_bloom_filter_enabled(enable_bloom_filter);
+        properties_builder = properties_builder.set_bloom_filter_enabled(enable_bloom_filter);
     }
     properties_builder.build()
 }
@@ -321,7 +319,7 @@ fn configure_reader_builder(args: &Args, arrow_schema: Arc<Schema>) -> ReaderBui
 
     let mut builder = ReaderBuilder::new(arrow_schema)
         .with_batch_size(args.batch_size)
-        .has_header(args.has_header)
+        .with_header(args.has_header)
         .with_delimiter(args.get_delimiter());
 
     builder = configure_reader(
@@ -362,9 +360,7 @@ fn convert_csv_to_parquet(args: &Args) -> Result<(), ParquetFromCsvError> {
     let writer_properties = Some(configure_writer_properties(args));
     let mut arrow_writer =
         ArrowWriter::try_new(parquet_file, arrow_schema.clone(), writer_properties)
-            .map_err(|e| {
-                ParquetFromCsvError::with_context(e, "Failed to create ArrowWriter")
-            })?;
+            .map_err(|e| ParquetFromCsvError::with_context(e, "Failed to create ArrowWriter"))?;
 
     // open input file
     let input_file = File::open(&args.input_file).map_err(|e| {
@@ -377,21 +373,21 @@ fn convert_csv_to_parquet(args: &Args) -> Result<(), ParquetFromCsvError> {
     // open input file decoder
     let input_file_decoder = match args.csv_compression {
         Compression::UNCOMPRESSED => Box::new(input_file) as Box<dyn Read>,
-        Compression::SNAPPY => {
-            Box::new(snap::read::FrameDecoder::new(input_file)) as Box<dyn Read>
-        }
+        Compression::SNAPPY => Box::new(snap::read::FrameDecoder::new(input_file)) as Box<dyn Read>,
         Compression::GZIP(_) => {
             Box::new(flate2::read::MultiGzDecoder::new(input_file)) as Box<dyn Read>
         }
         Compression::BROTLI(_) => {
             Box::new(brotli::Decompressor::new(input_file, 0)) as Box<dyn Read>
         }
-        Compression::LZ4 => Box::new(lz4::Decoder::new(input_file).map_err(|e| {
-            ParquetFromCsvError::with_context(e, "Failed to create lz4::Decoder")
-        })?) as Box<dyn Read>,
-        Compression::ZSTD(_) => Box::new(zstd::Decoder::new(input_file).map_err(|e| {
-            ParquetFromCsvError::with_context(e, "Failed to create zstd::Decoder")
-        })?) as Box<dyn Read>,
+        Compression::LZ4 => {
+            Box::new(lz4_flex::frame::FrameDecoder::new(input_file)) as Box<dyn Read>
+        }
+        Compression::ZSTD(_) => {
+            Box::new(zstd::Decoder::new(input_file).map_err(|e| {
+                ParquetFromCsvError::with_context(e, "Failed to create zstd::Decoder")
+            })?) as Box<dyn Read>
+        }
         d => unimplemented!("compression type {d}"),
     };
 
@@ -606,7 +602,7 @@ mod tests {
 
         let reader_builder = configure_reader_builder(&args, arrow_schema);
         let builder_debug = format!("{reader_builder:?}");
-        assert_debug_text(&builder_debug, "has_header", "false");
+        assert_debug_text(&builder_debug, "header", "false");
         assert_debug_text(&builder_debug, "delimiter", "Some(44)");
         assert_debug_text(&builder_debug, "quote", "Some(34)");
         assert_debug_text(&builder_debug, "terminator", "None");
@@ -641,7 +637,7 @@ mod tests {
         ]));
         let reader_builder = configure_reader_builder(&args, arrow_schema);
         let builder_debug = format!("{reader_builder:?}");
-        assert_debug_text(&builder_debug, "has_header", "true");
+        assert_debug_text(&builder_debug, "header", "true");
         assert_debug_text(&builder_debug, "delimiter", "Some(9)");
         assert_debug_text(&builder_debug, "quote", "None");
         assert_debug_text(&builder_debug, "terminator", "Some(10)");
@@ -692,31 +688,17 @@ mod tests {
                 encoder.into_inner()
             }
             Compression::LZ4 => {
-                let mut encoder = lz4::EncoderBuilder::new()
-                    .build(input_file)
-                    .map_err(|e| {
-                        ParquetFromCsvError::with_context(
-                            e,
-                            "Failed to create lz4::Encoder",
-                        )
-                    })
-                    .unwrap();
+                let mut encoder = lz4_flex::frame::FrameEncoder::new(input_file);
                 write_tmp_file(&mut encoder);
-                let (inner, err) = encoder.finish();
-                err.unwrap();
-                inner
+                encoder.finish().unwrap()
             }
 
             Compression::ZSTD(level) => {
-                let mut encoder =
-                    zstd::Encoder::new(input_file, level.compression_level())
-                        .map_err(|e| {
-                            ParquetFromCsvError::with_context(
-                                e,
-                                "Failed to create zstd::Encoder",
-                            )
-                        })
-                        .unwrap();
+                let mut encoder = zstd::Encoder::new(input_file, level.compression_level())
+                    .map_err(|e| {
+                        ParquetFromCsvError::with_context(e, "Failed to create zstd::Encoder")
+                    })
+                    .unwrap();
                 write_tmp_file(&mut encoder);
                 encoder.finish().unwrap()
             }
@@ -752,15 +734,11 @@ mod tests {
     fn test_convert_csv_to_parquet() {
         test_convert_compressed_csv_to_parquet(Compression::UNCOMPRESSED);
         test_convert_compressed_csv_to_parquet(Compression::SNAPPY);
-        test_convert_compressed_csv_to_parquet(Compression::GZIP(
-            GzipLevel::try_new(1).unwrap(),
-        ));
+        test_convert_compressed_csv_to_parquet(Compression::GZIP(GzipLevel::try_new(1).unwrap()));
         test_convert_compressed_csv_to_parquet(Compression::BROTLI(
             BrotliLevel::try_new(2).unwrap(),
         ));
         test_convert_compressed_csv_to_parquet(Compression::LZ4);
-        test_convert_compressed_csv_to_parquet(Compression::ZSTD(
-            ZstdLevel::try_new(1).unwrap(),
-        ));
+        test_convert_compressed_csv_to_parquet(Compression::ZSTD(ZstdLevel::try_new(1).unwrap()));
     }
 }
diff --git a/parquet/src/bin/parquet-index.rs b/parquet/src/bin/parquet-index.rs
index 4b82c21967a0..86e08b6dafa3 100644
--- a/parquet/src/bin/parquet-index.rs
+++ b/parquet/src/bin/parquet-index.rs
@@ -102,9 +102,7 @@ impl Args {
                 Index::INT96(v) => print_index(&v.indexes, offset_index, &row_counts)?,
                 Index::FLOAT(v) => print_index(&v.indexes, offset_index, &row_counts)?,
                 Index::DOUBLE(v) => print_index(&v.indexes, offset_index, &row_counts)?,
-                Index::BYTE_ARRAY(v) => {
-                    print_index(&v.indexes, offset_index, &row_counts)?
-                }
+                Index::BYTE_ARRAY(v) => print_index(&v.indexes, offset_index, &row_counts)?,
                 Index::FIXED_LEN_BYTE_ARRAY(v) => {
                     print_index(&v.indexes, offset_index, &row_counts)?
                 }
diff --git a/parquet/src/bin/parquet-layout.rs b/parquet/src/bin/parquet-layout.rs
index d749bb8a4ba7..b6d655757b87 100644
--- a/parquet/src/bin/parquet-layout.rs
+++ b/parquet/src/bin/parquet-layout.rs
@@ -38,12 +38,13 @@ use std::io::Read;
 
 use clap::Parser;
 use serde::Serialize;
-use thrift::protocol::{TCompactInputProtocol, TSerializable};
+use thrift::protocol::TCompactInputProtocol;
 
 use parquet::basic::{Compression, Encoding};
 use parquet::errors::Result;
 use parquet::file::reader::ChunkReader;
 use parquet::format::PageHeader;
+use parquet::thrift::TSerializable;
 
 #[derive(Serialize, Debug)]
 struct ParquetFile {
@@ -161,10 +162,7 @@ fn do_layout<C: ChunkReader>(reader: &C) -> Result<ParquetFile> {
 
 /// Reads the page header at `offset` from `reader`, returning
 /// both the `PageHeader` and its length in bytes
-fn read_page_header<C: ChunkReader>(
-    reader: &C,
-    offset: u64,
-) -> Result<(usize, PageHeader)> {
+fn read_page_header<C: ChunkReader>(reader: &C, offset: u64) -> Result<(usize, PageHeader)> {
     struct TrackedRead<R>(R, usize);
 
     impl<R: Read> Read for TrackedRead<R> {
diff --git a/parquet/src/bin/parquet-read.rs b/parquet/src/bin/parquet-read.rs
index 392697e6c619..fe486e633624 100644
--- a/parquet/src/bin/parquet-read.rs
+++ b/parquet/src/bin/parquet-read.rs
@@ -71,8 +71,7 @@ fn main() {
             .read_to_end(&mut buf)
             .expect("Failed to read stdin into a buffer");
         Box::new(
-            SerializedFileReader::new(bytes::Bytes::from(buf))
-                .expect("Failed to create reader"),
+            SerializedFileReader::new(bytes::Bytes::from(buf)).expect("Failed to create reader"),
         )
     } else {
         let path = Path::new(&filename);
diff --git a/parquet/src/bin/parquet-rewrite.rs b/parquet/src/bin/parquet-rewrite.rs
index e4a80e7af354..ad0f7ae0df7d 100644
--- a/parquet/src/bin/parquet-rewrite.rs
+++ b/parquet/src/bin/parquet-rewrite.rs
@@ -205,10 +205,9 @@ fn main() {
     let args = Args::parse();
 
     // read key-value metadata
-    let parquet_reader = SerializedFileReader::new(
-        File::open(&args.input).expect("Unable to open input file"),
-    )
-    .expect("Failed to create reader");
+    let parquet_reader =
+        SerializedFileReader::new(File::open(&args.input).expect("Unable to open input file"))
+            .expect("Failed to create reader");
     let kv_md = parquet_reader
         .metadata()
         .file_metadata()
@@ -223,58 +222,45 @@ fn main() {
     .build()
     .expect("parquet open");
 
-    let mut writer_properties_builder =
-        WriterProperties::builder().set_key_value_metadata(kv_md);
+    let mut writer_properties_builder = WriterProperties::builder().set_key_value_metadata(kv_md);
     if let Some(value) = args.compression {
-        writer_properties_builder =
-            writer_properties_builder.set_compression(value.into());
+        writer_properties_builder = writer_properties_builder.set_compression(value.into());
     }
     if let Some(value) = args.max_row_group_size {
-        writer_properties_builder =
-            writer_properties_builder.set_max_row_group_size(value);
+        writer_properties_builder = writer_properties_builder.set_max_row_group_size(value);
     }
     if let Some(value) = args.data_page_row_count_limit {
-        writer_properties_builder =
-            writer_properties_builder.set_data_page_row_count_limit(value);
+        writer_properties_builder = writer_properties_builder.set_data_page_row_count_limit(value);
     }
     if let Some(value) = args.data_page_size_limit {
-        writer_properties_builder =
-            writer_properties_builder.set_data_page_size_limit(value);
+        writer_properties_builder = writer_properties_builder.set_data_page_size_limit(value);
     }
     if let Some(value) = args.dictionary_page_size_limit {
-        writer_properties_builder =
-            writer_properties_builder.set_dictionary_page_size_limit(value);
+        writer_properties_builder = writer_properties_builder.set_dictionary_page_size_limit(value);
     }
     if let Some(value) = args.max_statistics_size {
-        writer_properties_builder =
-            writer_properties_builder.set_max_statistics_size(value);
+        writer_properties_builder = writer_properties_builder.set_max_statistics_size(value);
     }
     if let Some(value) = args.bloom_filter_enabled {
-        writer_properties_builder =
-            writer_properties_builder.set_bloom_filter_enabled(value);
+        writer_properties_builder = writer_properties_builder.set_bloom_filter_enabled(value);
 
         if value {
             if let Some(value) = args.bloom_filter_fpp {
-                writer_properties_builder =
-                    writer_properties_builder.set_bloom_filter_fpp(value);
+                writer_properties_builder = writer_properties_builder.set_bloom_filter_fpp(value);
             }
             if let Some(value) = args.bloom_filter_ndv {
-                writer_properties_builder =
-                    writer_properties_builder.set_bloom_filter_ndv(value);
+                writer_properties_builder = writer_properties_builder.set_bloom_filter_ndv(value);
             }
         }
     }
     if let Some(value) = args.dictionary_enabled {
-        writer_properties_builder =
-            writer_properties_builder.set_dictionary_enabled(value);
+        writer_properties_builder = writer_properties_builder.set_dictionary_enabled(value);
     }
     if let Some(value) = args.statistics_enabled {
-        writer_properties_builder =
-            writer_properties_builder.set_statistics_enabled(value.into());
+        writer_properties_builder = writer_properties_builder.set_statistics_enabled(value.into());
     }
     if let Some(value) = args.writer_version {
-        writer_properties_builder =
-            writer_properties_builder.set_writer_version(value.into());
+        writer_properties_builder = writer_properties_builder.set_writer_version(value.into());
     }
     let writer_properties = writer_properties_builder.build();
     let mut parquet_writer = ArrowWriter::try_new(
diff --git a/parquet/src/bin/parquet-rowcount.rs b/parquet/src/bin/parquet-rowcount.rs
index 55c76c5f73e4..07e4bd1d14cc 100644
--- a/parquet/src/bin/parquet-rowcount.rs
+++ b/parquet/src/bin/parquet-rowcount.rs
@@ -56,8 +56,7 @@ fn main() {
     for filename in args.file_paths {
         let path = Path::new(&filename);
         let file = File::open(path).expect("Unable to open file");
-        let parquet_reader =
-            SerializedFileReader::new(file).expect("Unable to read file");
+        let parquet_reader = SerializedFileReader::new(file).expect("Unable to read file");
         let row_group_metadata = parquet_reader.metadata().row_groups();
         let mut total_num_rows = 0;
 
diff --git a/parquet/src/bin/parquet-show-bloom-filter.rs b/parquet/src/bin/parquet-show-bloom-filter.rs
index 80db51978433..b1b332590ad2 100644
--- a/parquet/src/bin/parquet-show-bloom-filter.rs
+++ b/parquet/src/bin/parquet-show-bloom-filter.rs
@@ -49,7 +49,9 @@ struct Args {
     #[clap(help("Check the bloom filter indexes for the given column"))]
     column: String,
     #[clap(
-        help("Check if the given values match bloom filter, the values will be evaluated as strings"),
+        help(
+            "Check if the given values match bloom filter, the values will be evaluated as strings"
+        ),
         required = true
     )]
     values: Vec<String>,
diff --git a/parquet/src/bloom_filter/mod.rs b/parquet/src/bloom_filter/mod.rs
index 4d2040b7f258..897cce7620aa 100644
--- a/parquet/src/bloom_filter/mod.rs
+++ b/parquet/src/bloom_filter/mod.rs
@@ -26,13 +26,12 @@ use crate::format::{
     BloomFilterAlgorithm, BloomFilterCompression, BloomFilterHash, BloomFilterHeader,
     SplitBlockAlgorithm, Uncompressed, XxHash,
 };
-use bytes::{Buf, Bytes};
+use crate::thrift::{TCompactSliceInputProtocol, TSerializable};
+use bytes::Bytes;
 use std::hash::Hasher;
 use std::io::Write;
 use std::sync::Arc;
-use thrift::protocol::{
-    TCompactInputProtocol, TCompactOutputProtocol, TOutputProtocol, TSerializable,
-};
+use thrift::protocol::{TCompactOutputProtocol, TOutputProtocol};
 use twox_hash::XxHash64;
 
 /// Salt as defined in the [spec](https://github.com/apache/parquet-format/blob/master/BloomFilter.md#technical-approach).
@@ -133,15 +132,14 @@ impl std::ops::IndexMut<usize> for Block {
 #[derive(Debug, Clone)]
 pub struct Sbbf(Vec<Block>);
 
-const SBBF_HEADER_SIZE_ESTIMATE: usize = 20;
+pub(crate) const SBBF_HEADER_SIZE_ESTIMATE: usize = 20;
 
-/// given an initial offset, and a [ChunkReader], try to read out a bloom filter header and return
+/// given an initial offset, and a byte buffer, try to read out a bloom filter header and return
 /// both the header and the offset after it (for bitset).
-fn chunk_read_bloom_filter_header_and_offset<R: ChunkReader>(
+pub(crate) fn chunk_read_bloom_filter_header_and_offset(
     offset: u64,
-    reader: Arc<R>,
+    buffer: Bytes,
 ) -> Result<(BloomFilterHeader, u64), ParquetError> {
-    let buffer = reader.get_bytes(offset, SBBF_HEADER_SIZE_ESTIMATE)?;
     let (header, length) = read_bloom_filter_header_and_length(buffer)?;
     Ok((header, offset + length))
 }
@@ -149,19 +147,14 @@ fn chunk_read_bloom_filter_header_and_offset<R: ChunkReader>(
 /// given a [Bytes] buffer, try to read out a bloom filter header and return both the header and
 /// length of the header.
 #[inline]
-fn read_bloom_filter_header_and_length(
+pub(crate) fn read_bloom_filter_header_and_length(
     buffer: Bytes,
 ) -> Result<(BloomFilterHeader, u64), ParquetError> {
     let total_length = buffer.len();
-    let mut buf_reader = buffer.reader();
-    let mut prot = TCompactInputProtocol::new(&mut buf_reader);
-    let header = BloomFilterHeader::read_from_in_protocol(&mut prot).map_err(|e| {
-        ParquetError::General(format!("Could not read bloom filter header: {e}"))
-    })?;
-    Ok((
-        header,
-        (total_length - buf_reader.into_inner().remaining()) as u64,
-    ))
+    let mut prot = TCompactSliceInputProtocol::new(buffer.as_ref());
+    let header = BloomFilterHeader::read_from_in_protocol(&mut prot)
+        .map_err(|e| ParquetError::General(format!("Could not read bloom filter header: {e}")))?;
+    Ok((header, (total_length - prot.as_slice().len()) as u64))
 }
 
 pub(crate) const BITSET_MIN_LENGTH: usize = 32;
@@ -205,7 +198,7 @@ impl Sbbf {
         Self::new(&bitset)
     }
 
-    fn new(bitset: &[u8]) -> Self {
+    pub(crate) fn new(bitset: &[u8]) -> Self {
         let data = bitset
             .chunks_exact(4 * 8)
             .map(|chunk| {
@@ -239,9 +232,7 @@ impl Sbbf {
             writer
                 .write_all(block.to_le_bytes().as_slice())
                 .map_err(|e| {
-                    ParquetError::General(format!(
-                        "Could not write bloom filter bit set: {e}"
-                    ))
+                    ParquetError::General(format!("Could not write bloom filter bit set: {e}"))
                 })?;
         }
         Ok(())
@@ -264,15 +255,20 @@ impl Sbbf {
         reader: Arc<R>,
     ) -> Result<Option<Self>, ParquetError> {
         let offset: u64 = if let Some(offset) = column_metadata.bloom_filter_offset() {
-            offset.try_into().map_err(|_| {
-                ParquetError::General("Bloom filter offset is invalid".to_string())
-            })?
+            offset
+                .try_into()
+                .map_err(|_| ParquetError::General("Bloom filter offset is invalid".to_string()))?
         } else {
             return Ok(None);
         };
 
+        let buffer = match column_metadata.bloom_filter_length() {
+            Some(length) => reader.get_bytes(offset, length as usize),
+            None => reader.get_bytes(offset, SBBF_HEADER_SIZE_ESTIMATE),
+        }?;
+
         let (header, bitset_offset) =
-            chunk_read_bloom_filter_header_and_offset(offset, reader.clone())?;
+            chunk_read_bloom_filter_header_and_offset(offset, buffer.clone())?;
 
         match header.algorithm {
             BloomFilterAlgorithm::BLOCK(_) => {
@@ -289,11 +285,17 @@ impl Sbbf {
                 // this match exists to future proof the singleton hash enum
             }
         }
-        // length in bytes
-        let length: usize = header.num_bytes.try_into().map_err(|_| {
-            ParquetError::General("Bloom filter length is invalid".to_string())
-        })?;
-        let bitset = reader.get_bytes(bitset_offset, length)?;
+
+        let bitset = match column_metadata.bloom_filter_length() {
+            Some(_) => buffer.slice((bitset_offset - offset) as usize..),
+            None => {
+                let bitset_length: usize = header.num_bytes.try_into().map_err(|_| {
+                    ParquetError::General("Bloom filter length is invalid".to_string())
+                })?;
+                reader.get_bytes(bitset_offset, bitset_length)?
+            }
+        };
+
         Ok(Some(Self::new(&bitset)))
     }
 
@@ -343,8 +345,7 @@ fn hash_as_bytes<A: AsBytes + ?Sized>(value: &A) -> u64 {
 mod tests {
     use super::*;
     use crate::format::{
-        BloomFilterAlgorithm, BloomFilterCompression, SplitBlockAlgorithm, Uncompressed,
-        XxHash,
+        BloomFilterAlgorithm, BloomFilterCompression, SplitBlockAlgorithm, Uncompressed, XxHash,
     };
 
     #[test]
@@ -382,8 +383,8 @@ mod tests {
     fn test_with_fixture() {
         // bloom filter produced by parquet-mr/spark for a column of i64 f"a{i}" for i in 0..10
         let bitset: &[u8] = &[
-            200, 1, 80, 20, 64, 68, 8, 109, 6, 37, 4, 67, 144, 80, 96, 32, 8, 132, 43,
-            33, 0, 5, 99, 65, 2, 0, 224, 44, 64, 78, 96, 4,
+            200, 1, 80, 20, 64, 68, 8, 109, 6, 37, 4, 67, 144, 80, 96, 32, 8, 132, 43, 33, 0, 5,
+            99, 65, 2, 0, 224, 44, 64, 78, 96, 4,
         ];
         let sbbf = Sbbf::new(bitset);
         for a in 0..10i64 {
@@ -397,8 +398,7 @@ mod tests {
     /// so altogether it'll be 20 bytes at most.
     #[test]
     fn test_bloom_filter_header_size_assumption() {
-        let buffer: &[u8; 16] =
-            &[21, 64, 28, 28, 0, 0, 28, 28, 0, 0, 28, 28, 0, 0, 0, 99];
+        let buffer: &[u8; 16] = &[21, 64, 28, 28, 0, 0, 28, 28, 0, 0, 28, 28, 0, 0, 0, 99];
         let (
             BloomFilterHeader {
                 algorithm,
diff --git a/parquet/src/column/page.rs b/parquet/src/column/page.rs
index ec9af2aa271a..947a633f48a2 100644
--- a/parquet/src/column/page.rs
+++ b/parquet/src/column/page.rs
@@ -17,11 +17,12 @@
 
 //! Contains Parquet Page definitions and page reader interface.
 
+use bytes::Bytes;
+
 use crate::basic::{Encoding, PageType};
 use crate::errors::{ParquetError, Result};
 use crate::file::{metadata::ColumnChunkMetaData, statistics::Statistics};
 use crate::format::PageHeader;
-use crate::util::memory::ByteBufferPtr;
 
 /// Parquet Page definition.
 ///
@@ -31,7 +32,7 @@ use crate::util::memory::ByteBufferPtr;
 #[derive(Clone)]
 pub enum Page {
     DataPage {
-        buf: ByteBufferPtr,
+        buf: Bytes,
         num_values: u32,
         encoding: Encoding,
         def_level_encoding: Encoding,
@@ -39,7 +40,7 @@ pub enum Page {
         statistics: Option<Statistics>,
     },
     DataPageV2 {
-        buf: ByteBufferPtr,
+        buf: Bytes,
         num_values: u32,
         encoding: Encoding,
         num_nulls: u32,
@@ -50,7 +51,7 @@ pub enum Page {
         statistics: Option<Statistics>,
     },
     DictionaryPage {
-        buf: ByteBufferPtr,
+        buf: Bytes,
         num_values: u32,
         encoding: Encoding,
         is_sorted: bool,
@@ -68,7 +69,7 @@ impl Page {
     }
 
     /// Returns internal byte buffer reference for this page.
-    pub fn buffer(&self) -> &ByteBufferPtr {
+    pub fn buffer(&self) -> &Bytes {
         match self {
             Page::DataPage { ref buf, .. } => buf,
             Page::DataPageV2 { ref buf, .. } => buf,
@@ -159,7 +160,7 @@ impl CompressedPage {
 
     /// Returns slice of compressed buffer in the page.
     pub fn data(&self) -> &[u8] {
-        self.compressed_page.buffer().data()
+        self.compressed_page.buffer()
     }
 
     /// Returns the thrift page header
@@ -320,6 +321,20 @@ pub trait PageReader: Iterator<Item = Result<Page>> + Send {
     /// Skips reading the next page, returns an error if no
     /// column index information
     fn skip_next_page(&mut self) -> Result<()>;
+
+    /// Returns `true` if the next page can be assumed to contain the start of a new record
+    ///
+    /// Prior to parquet V2 the specification was ambiguous as to whether a single record
+    /// could be split across multiple pages, and prior to [(#4327)] the Rust writer would do
+    /// this in certain situations. However, correctly interpreting the offset index relies on
+    /// this assumption holding [(#4943)], and so this mechanism is provided for a [`PageReader`]
+    /// to signal this to the calling context
+    ///
+    /// [(#4327)]: https://github.com/apache/arrow-rs/pull/4327
+    /// [(#4943)]: https://github.com/apache/arrow-rs/pull/4943
+    fn at_record_boundary(&mut self) -> Result<bool> {
+        Ok(self.peek_next_page()?.is_none())
+    }
 }
 
 /// API for writing pages in a column chunk.
@@ -356,7 +371,7 @@ mod tests {
     #[test]
     fn test_page() {
         let data_page = Page::DataPage {
-            buf: ByteBufferPtr::new(vec![0, 1, 2]),
+            buf: Bytes::from(vec![0, 1, 2]),
             num_values: 10,
             encoding: Encoding::PLAIN,
             def_level_encoding: Encoding::RLE,
@@ -364,7 +379,7 @@ mod tests {
             statistics: Some(Statistics::int32(Some(1), Some(2), None, 1, true)),
         };
         assert_eq!(data_page.page_type(), PageType::DATA_PAGE);
-        assert_eq!(data_page.buffer().data(), vec![0, 1, 2].as_slice());
+        assert_eq!(data_page.buffer(), vec![0, 1, 2].as_slice());
         assert_eq!(data_page.num_values(), 10);
         assert_eq!(data_page.encoding(), Encoding::PLAIN);
         assert_eq!(
@@ -373,7 +388,7 @@ mod tests {
         );
 
         let data_page_v2 = Page::DataPageV2 {
-            buf: ByteBufferPtr::new(vec![0, 1, 2]),
+            buf: Bytes::from(vec![0, 1, 2]),
             num_values: 10,
             encoding: Encoding::PLAIN,
             num_nulls: 5,
@@ -384,7 +399,7 @@ mod tests {
             statistics: Some(Statistics::int32(Some(1), Some(2), None, 1, true)),
         };
         assert_eq!(data_page_v2.page_type(), PageType::DATA_PAGE_V2);
-        assert_eq!(data_page_v2.buffer().data(), vec![0, 1, 2].as_slice());
+        assert_eq!(data_page_v2.buffer(), vec![0, 1, 2].as_slice());
         assert_eq!(data_page_v2.num_values(), 10);
         assert_eq!(data_page_v2.encoding(), Encoding::PLAIN);
         assert_eq!(
@@ -393,13 +408,13 @@ mod tests {
         );
 
         let dict_page = Page::DictionaryPage {
-            buf: ByteBufferPtr::new(vec![0, 1, 2]),
+            buf: Bytes::from(vec![0, 1, 2]),
             num_values: 10,
             encoding: Encoding::PLAIN,
             is_sorted: false,
         };
         assert_eq!(dict_page.page_type(), PageType::DICTIONARY_PAGE);
-        assert_eq!(dict_page.buffer().data(), vec![0, 1, 2].as_slice());
+        assert_eq!(dict_page.buffer(), vec![0, 1, 2].as_slice());
         assert_eq!(dict_page.num_values(), 10);
         assert_eq!(dict_page.encoding(), Encoding::PLAIN);
         assert_eq!(dict_page.statistics(), None);
@@ -408,7 +423,7 @@ mod tests {
     #[test]
     fn test_compressed_page() {
         let data_page = Page::DataPage {
-            buf: ByteBufferPtr::new(vec![0, 1, 2]),
+            buf: Bytes::from(vec![0, 1, 2]),
             num_values: 10,
             encoding: Encoding::PLAIN,
             def_level_encoding: Encoding::RLE,
diff --git a/parquet/src/column/reader.rs b/parquet/src/column/reader.rs
index 3ce00622e953..adfcd6390720 100644
--- a/parquet/src/column/reader.rs
+++ b/parquet/src/column/reader.rs
@@ -17,18 +17,18 @@
 
 //! Contains column reader API.
 
+use bytes::Bytes;
+
 use super::page::{Page, PageReader};
 use crate::basic::*;
 use crate::column::reader::decoder::{
-    ColumnValueDecoder, ColumnValueDecoderImpl, DefinitionLevelDecoder,
-    DefinitionLevelDecoderImpl, LevelsBufferSlice, RepetitionLevelDecoder,
-    RepetitionLevelDecoderImpl, ValuesBufferSlice,
+    ColumnValueDecoder, ColumnValueDecoderImpl, DefinitionLevelDecoder, DefinitionLevelDecoderImpl,
+    LevelsBufferSlice, RepetitionLevelDecoder, RepetitionLevelDecoderImpl, ValuesBufferSlice,
 };
 use crate::data_type::*;
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
 use crate::util::bit_util::{ceil, num_required_bits, read_num_bytes};
-use crate::util::memory::ByteBufferPtr;
 
 pub(crate) mod decoder;
 
@@ -51,34 +51,27 @@ pub fn get_column_reader(
     col_page_reader: Box<dyn PageReader>,
 ) -> ColumnReader {
     match col_descr.physical_type() {
-        Type::BOOLEAN => ColumnReader::BoolColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::INT32 => ColumnReader::Int32ColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::INT64 => ColumnReader::Int64ColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::INT96 => ColumnReader::Int96ColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::FLOAT => ColumnReader::FloatColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::DOUBLE => ColumnReader::DoubleColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
-        Type::BYTE_ARRAY => ColumnReader::ByteArrayColumnReader(ColumnReaderImpl::new(
-            col_descr,
-            col_page_reader,
-        )),
+        Type::BOOLEAN => {
+            ColumnReader::BoolColumnReader(ColumnReaderImpl::new(col_descr, col_page_reader))
+        }
+        Type::INT32 => {
+            ColumnReader::Int32ColumnReader(ColumnReaderImpl::new(col_descr, col_page_reader))
+        }
+        Type::INT64 => {
+            ColumnReader::Int64ColumnReader(ColumnReaderImpl::new(col_descr, col_page_reader))
+        }
+        Type::INT96 => {
+            ColumnReader::Int96ColumnReader(ColumnReaderImpl::new(col_descr, col_page_reader))
+        }
+        Type::FLOAT => {
+            ColumnReader::FloatColumnReader(ColumnReaderImpl::new(col_descr, col_page_reader))
+        }
+        Type::DOUBLE => {
+            ColumnReader::DoubleColumnReader(ColumnReaderImpl::new(col_descr, col_page_reader))
+        }
+        Type::BYTE_ARRAY => {
+            ColumnReader::ByteArrayColumnReader(ColumnReaderImpl::new(col_descr, col_page_reader))
+        }
         Type::FIXED_LEN_BYTE_ARRAY => ColumnReader::FixedLenByteArrayColumnReader(
             ColumnReaderImpl::new(col_descr, col_page_reader),
         ),
@@ -89,9 +82,7 @@ pub fn get_column_reader(
 /// non-generic type to a generic column reader type `ColumnReaderImpl`.
 ///
 /// Panics if actual enum value for `col_reader` does not match the type `T`.
-pub fn get_typed_column_reader<T: DataType>(
-    col_reader: ColumnReader,
-) -> ColumnReaderImpl<T> {
+pub fn get_typed_column_reader<T: DataType>(col_reader: ColumnReader) -> ColumnReaderImpl<T> {
     T::get_column_reader(col_reader).unwrap_or_else(|| {
         panic!(
             "Failed to convert column reader into a typed column reader for `{}` type",
@@ -206,8 +197,7 @@ where
         rep_levels: Option<&mut R::Slice>,
         values: &mut V::Slice,
     ) -> Result<(usize, usize)> {
-        let (_, values, levels) =
-            self.read_records(batch_size, def_levels, rep_levels, values)?;
+        let (_, values, levels) = self.read_records(batch_size, def_levels, rep_levels, values)?;
 
         Ok((values, levels))
     }
@@ -269,7 +259,7 @@ where
                         // Reached end of page, which implies records_read < remaining_records
                         // as otherwise would have stopped reading before reaching the end
                         assert!(records_read < remaining_records); // Sanity check
-                        records_read += 1;
+                        records_read += reader.flush_partial() as usize;
                     }
                     (records_read, levels_read)
                 }
@@ -285,10 +275,8 @@ where
                         .as_mut()
                         .ok_or_else(|| general_err!("must specify definition levels"))?;
 
-                    let read = reader.read_def_levels(
-                        out,
-                        total_levels_read..total_levels_read + levels_read,
-                    )?;
+                    let read = reader
+                        .read_def_levels(out, total_levels_read..total_levels_read + levels_read)?;
 
                     if read != levels_read {
                         return Err(general_err!("insufficient definition levels read from column - expected {rep_levels}, got {read}"));
@@ -380,7 +368,7 @@ where
                         // Reached end of page, which implies records_read < remaining_records
                         // as otherwise would have stopped reading before reaching the end
                         assert!(records_read < remaining_records); // Sanity check
-                        records_read += 1;
+                        records_read += decoder.flush_partial() as usize;
                     }
 
                     (records_read, levels_read)
@@ -401,8 +389,9 @@ where
             }
 
             let (values_read, def_levels_read) = match self.def_level_decoder.as_mut() {
-                Some(decoder) => decoder
-                    .skip_def_levels(rep_levels_read, self.descr.max_def_level())?,
+                Some(decoder) => {
+                    decoder.skip_def_levels(rep_levels_read, self.descr.max_def_level())?
+                }
                 None => (rep_levels_read, rep_levels_read),
             };
 
@@ -486,12 +475,12 @@ where
                                     max_rep_level,
                                     num_values,
                                     rep_level_encoding,
-                                    buf.start_from(offset),
+                                    buf.slice(offset..),
                                 )?;
                                 offset += bytes_read;
 
                                 self.has_record_delimiter =
-                                    self.page_reader.peek_next_page()?.is_none();
+                                    self.page_reader.at_record_boundary()?;
 
                                 self.rep_level_decoder
                                     .as_mut()
@@ -504,7 +493,7 @@ where
                                     max_def_level,
                                     num_values,
                                     def_level_encoding,
-                                    buf.start_from(offset),
+                                    buf.slice(offset..),
                                 )?;
                                 offset += bytes_read;
 
@@ -516,7 +505,7 @@ where
 
                             self.values_decoder.set_data(
                                 encoding,
-                                buf.start_from(offset),
+                                buf.slice(offset..),
                                 num_values as usize,
                                 None,
                             )?;
@@ -548,11 +537,11 @@ where
                                 // across multiple pages, however, the parquet writer
                                 // used to do this so we preserve backwards compatibility
                                 self.has_record_delimiter =
-                                    self.page_reader.peek_next_page()?.is_none();
+                                    self.page_reader.at_record_boundary()?;
 
                                 self.rep_level_decoder.as_mut().unwrap().set_data(
                                     Encoding::RLE,
-                                    buf.range(0, rep_levels_byte_len as usize),
+                                    buf.slice(..rep_levels_byte_len as usize),
                                 );
                             }
 
@@ -561,18 +550,16 @@ where
                             if self.descr.max_def_level() > 0 {
                                 self.def_level_decoder.as_mut().unwrap().set_data(
                                     Encoding::RLE,
-                                    buf.range(
-                                        rep_levels_byte_len as usize,
-                                        def_levels_byte_len as usize,
+                                    buf.slice(
+                                        rep_levels_byte_len as usize
+                                            ..(rep_levels_byte_len + def_levels_byte_len) as usize,
                                     ),
                                 );
                             }
 
                             self.values_decoder.set_data(
                                 encoding,
-                                buf.start_from(
-                                    (rep_levels_byte_len + def_levels_byte_len) as usize,
-                                ),
+                                buf.slice((rep_levels_byte_len + def_levels_byte_len) as usize..),
                                 num_values as usize,
                                 Some((num_values - num_nulls) as usize),
                             )?;
@@ -589,9 +576,7 @@ where
     /// (if it exists) into the buffer
     #[inline]
     pub(crate) fn has_next(&mut self) -> Result<bool> {
-        if self.num_buffered_values == 0
-            || self.num_buffered_values == self.num_decoded_values
-        {
+        if self.num_buffered_values == 0 || self.num_buffered_values == self.num_decoded_values {
             // TODO: should we return false if read_new_page() = true and
             // num_buffered_values = 0?
             if !self.read_new_page()? {
@@ -609,13 +594,16 @@ fn parse_v1_level(
     max_level: i16,
     num_buffered_values: u32,
     encoding: Encoding,
-    buf: ByteBufferPtr,
-) -> Result<(usize, ByteBufferPtr)> {
+    buf: Bytes,
+) -> Result<(usize, Bytes)> {
     match encoding {
         Encoding::RLE => {
             let i32_size = std::mem::size_of::<i32>();
             let data_size = read_num_bytes::<i32>(i32_size, buf.as_ref()) as usize;
-            Ok((i32_size + data_size, buf.range(i32_size, data_size)))
+            Ok((
+                i32_size + data_size,
+                buf.slice(i32_size..i32_size + data_size),
+            ))
         }
         Encoding::BIT_PACKED => {
             let bit_width = num_required_bits(max_level as u64);
@@ -623,7 +611,7 @@ fn parse_v1_level(
                 (num_buffered_values as usize * bit_width as usize) as i64,
                 8,
             ) as usize;
-            Ok((num_bytes, buf.range(0, num_bytes)))
+            Ok((num_bytes, buf.slice(..num_bytes)))
         }
         _ => Err(general_err!("invalid level encoding: {}", encoding)),
     }
@@ -1058,12 +1046,7 @@ mod tests {
 
     #[test]
     fn test_read_batch_values_def_rep_levels() {
-        test_read_batch_int32(
-            128,
-            &mut [0; 128],
-            Some(&mut [0; 128]),
-            Some(&mut [0; 128]),
-        );
+        test_read_batch_int32(128, &mut [0; 128], Some(&mut [0; 128]), Some(&mut [0; 128]));
     }
 
     #[test]
@@ -1389,17 +1372,14 @@ mod tests {
             let max_def_level = desc.max_def_level();
             let max_rep_level = desc.max_rep_level();
             let page_reader = InMemoryPageReader::new(pages);
-            let column_reader: ColumnReader =
-                get_column_reader(desc, Box::new(page_reader));
+            let column_reader: ColumnReader = get_column_reader(desc, Box::new(page_reader));
             let mut typed_column_reader = get_typed_column_reader::<T>(column_reader);
 
             let mut curr_values_read = 0;
             let mut curr_levels_read = 0;
             loop {
-                let actual_def_levels =
-                    def_levels.as_mut().map(|vec| &mut vec[curr_levels_read..]);
-                let actual_rep_levels =
-                    rep_levels.as_mut().map(|vec| &mut vec[curr_levels_read..]);
+                let actual_def_levels = def_levels.as_mut().map(|vec| &mut vec[curr_levels_read..]);
+                let actual_rep_levels = rep_levels.as_mut().map(|vec| &mut vec[curr_levels_read..]);
 
                 let (_, values_read, levels_read) = typed_column_reader
                     .read_records(
diff --git a/parquet/src/column/reader/decoder.rs b/parquet/src/column/reader/decoder.rs
index 369b335dc98f..ef62724689a8 100644
--- a/parquet/src/column/reader/decoder.rs
+++ b/parquet/src/column/reader/decoder.rs
@@ -18,6 +18,8 @@
 use std::collections::HashMap;
 use std::ops::Range;
 
+use bytes::Bytes;
+
 use crate::basic::Encoding;
 use crate::data_type::DataType;
 use crate::encodings::{
@@ -26,10 +28,7 @@ use crate::encodings::{
 };
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
-use crate::util::{
-    bit_util::{num_required_bits, BitReader},
-    memory::ByteBufferPtr,
-};
+use crate::util::bit_util::{num_required_bits, BitReader};
 
 /// A slice of levels buffer data that is written to by a [`ColumnLevelDecoder`]
 pub trait LevelsBufferSlice {
@@ -67,7 +66,7 @@ pub trait ColumnLevelDecoder {
     type Slice: LevelsBufferSlice + ?Sized;
 
     /// Set data for this [`ColumnLevelDecoder`]
-    fn set_data(&mut self, encoding: Encoding, data: ByteBufferPtr);
+    fn set_data(&mut self, encoding: Encoding, data: Bytes);
 }
 
 pub trait RepetitionLevelDecoder: ColumnLevelDecoder {
@@ -97,11 +96,10 @@ pub trait RepetitionLevelDecoder: ColumnLevelDecoder {
     ///
     /// A record only ends when the data contains a subsequent repetition level of 0,
     /// it is therefore left to the caller to delimit the final record in a column
-    fn skip_rep_levels(
-        &mut self,
-        num_records: usize,
-        num_levels: usize,
-    ) -> Result<(usize, usize)>;
+    fn skip_rep_levels(&mut self, num_records: usize, num_levels: usize) -> Result<(usize, usize)>;
+
+    /// Flush any partially read or skipped record
+    fn flush_partial(&mut self) -> bool;
 }
 
 pub trait DefinitionLevelDecoder: ColumnLevelDecoder {
@@ -115,20 +113,12 @@ pub trait DefinitionLevelDecoder: ColumnLevelDecoder {
     /// Implementations may panic if `range` overlaps with already written data
     ///
     // TODO: Should this return the number of nulls
-    fn read_def_levels(
-        &mut self,
-        out: &mut Self::Slice,
-        range: Range<usize>,
-    ) -> Result<usize>;
+    fn read_def_levels(&mut self, out: &mut Self::Slice, range: Range<usize>) -> Result<usize>;
 
     /// Skips over `num_levels` definition levels
     ///
     /// Returns the number of values skipped, and the number of levels skipped
-    fn skip_def_levels(
-        &mut self,
-        num_levels: usize,
-        max_def_level: i16,
-    ) -> Result<(usize, usize)>;
+    fn skip_def_levels(&mut self, num_levels: usize, max_def_level: i16) -> Result<(usize, usize)>;
 }
 
 /// Decodes value data to a [`ValuesBufferSlice`]
@@ -141,7 +131,7 @@ pub trait ColumnValueDecoder {
     /// Set the current dictionary page
     fn set_dict(
         &mut self,
-        buf: ByteBufferPtr,
+        buf: Bytes,
         num_values: u32,
         encoding: Encoding,
         is_sorted: bool,
@@ -161,7 +151,7 @@ pub trait ColumnValueDecoder {
     fn set_data(
         &mut self,
         encoding: Encoding,
-        data: ByteBufferPtr,
+        data: Bytes,
         num_levels: usize,
         num_values: Option<usize>,
     ) -> Result<()>;
@@ -206,7 +196,7 @@ impl<T: DataType> ColumnValueDecoder for ColumnValueDecoderImpl<T> {
 
     fn set_dict(
         &mut self,
-        buf: ByteBufferPtr,
+        buf: Bytes,
         num_values: u32,
         mut encoding: Encoding,
         _is_sorted: bool,
@@ -238,7 +228,7 @@ impl<T: DataType> ColumnValueDecoder for ColumnValueDecoderImpl<T> {
     fn set_data(
         &mut self,
         mut encoding: Encoding,
-        data: ByteBufferPtr,
+        data: Bytes,
         num_levels: usize,
         num_values: Option<usize>,
     ) -> Result<()> {
@@ -303,7 +293,7 @@ enum LevelDecoder {
 }
 
 impl LevelDecoder {
-    fn new(encoding: Encoding, data: ByteBufferPtr, bit_width: u8) -> Self {
+    fn new(encoding: Encoding, data: Bytes, bit_width: u8) -> Self {
         match encoding {
             Encoding::RLE => {
                 let mut decoder = RleDecoder::new(bit_width);
@@ -344,25 +334,17 @@ impl DefinitionLevelDecoderImpl {
 impl ColumnLevelDecoder for DefinitionLevelDecoderImpl {
     type Slice = [i16];
 
-    fn set_data(&mut self, encoding: Encoding, data: ByteBufferPtr) {
+    fn set_data(&mut self, encoding: Encoding, data: Bytes) {
         self.decoder = Some(LevelDecoder::new(encoding, data, self.bit_width))
     }
 }
 
 impl DefinitionLevelDecoder for DefinitionLevelDecoderImpl {
-    fn read_def_levels(
-        &mut self,
-        out: &mut Self::Slice,
-        range: Range<usize>,
-    ) -> Result<usize> {
+    fn read_def_levels(&mut self, out: &mut Self::Slice, range: Range<usize>) -> Result<usize> {
         self.decoder.as_mut().unwrap().read(&mut out[range])
     }
 
-    fn skip_def_levels(
-        &mut self,
-        num_levels: usize,
-        max_def_level: i16,
-    ) -> Result<(usize, usize)> {
+    fn skip_def_levels(&mut self, num_levels: usize, max_def_level: i16) -> Result<(usize, usize)> {
         let mut level_skip = 0;
         let mut value_skip = 0;
         let mut buf: Vec<i16> = vec![];
@@ -421,11 +403,7 @@ impl RepetitionLevelDecoderImpl {
     /// and returns the number of "complete" records along with the corresponding number of values
     ///
     /// A "complete" record is one where the buffer contains a subsequent repetition level of 0
-    fn count_records(
-        &mut self,
-        records_to_read: usize,
-        num_levels: usize,
-    ) -> (bool, usize, usize) {
+    fn count_records(&mut self, records_to_read: usize, num_levels: usize) -> (bool, usize, usize) {
         let mut records_read = 0;
 
         let levels = num_levels.min(self.buffer_len - self.buffer_offset);
@@ -447,7 +425,7 @@ impl RepetitionLevelDecoderImpl {
 impl ColumnLevelDecoder for RepetitionLevelDecoderImpl {
     type Slice = [i16];
 
-    fn set_data(&mut self, encoding: Encoding, data: ByteBufferPtr) {
+    fn set_data(&mut self, encoding: Encoding, data: Bytes) {
         self.decoder = Some(LevelDecoder::new(encoding, data, self.bit_width));
         self.buffer_len = 0;
         self.buffer_offset = 0;
@@ -491,11 +469,7 @@ impl RepetitionLevelDecoder for RepetitionLevelDecoderImpl {
         Ok((total_records_read, total_levels_read))
     }
 
-    fn skip_rep_levels(
-        &mut self,
-        num_records: usize,
-        num_levels: usize,
-    ) -> Result<(usize, usize)> {
+    fn skip_rep_levels(&mut self, num_records: usize, num_levels: usize) -> Result<(usize, usize)> {
         let mut total_records_read = 0;
         let mut total_levels_read = 0;
 
@@ -519,6 +493,10 @@ impl RepetitionLevelDecoder for RepetitionLevelDecoderImpl {
         }
         Ok((total_records_read, total_levels_read))
     }
+
+    fn flush_partial(&mut self) -> bool {
+        std::mem::take(&mut self.has_partial)
+    }
 }
 
 #[cfg(test)]
@@ -532,7 +510,7 @@ mod tests {
         let mut encoder = RleEncoder::new(1, 1024);
         encoder.put(0);
         (0..3).for_each(|_| encoder.put(1));
-        let data = ByteBufferPtr::new(encoder.consume());
+        let data = Bytes::from(encoder.consume());
 
         let mut decoder = RepetitionLevelDecoderImpl::new(1);
         decoder.set_data(Encoding::RLE, data.clone());
@@ -552,14 +530,13 @@ mod tests {
         for _ in 0..10 {
             let mut rng = thread_rng();
             let total_len = 10000_usize;
-            let mut encoded: Vec<i16> =
-                (0..total_len).map(|_| rng.gen_range(0..5)).collect();
+            let mut encoded: Vec<i16> = (0..total_len).map(|_| rng.gen_range(0..5)).collect();
             encoded[0] = 0;
             let mut encoder = RleEncoder::new(3, 1024);
             for v in &encoded {
                 encoder.put(*v as _)
             }
-            let data = ByteBufferPtr::new(encoder.consume());
+            let data = Bytes::from(encoder.consume());
 
             let mut decoder = RepetitionLevelDecoderImpl::new(5);
             decoder.set_data(Encoding::RLE, data);
diff --git a/parquet/src/column/writer/encoder.rs b/parquet/src/column/writer/encoder.rs
index fb5889b785a8..d0720dd24306 100644
--- a/parquet/src/column/writer/encoder.rs
+++ b/parquet/src/column/writer/encoder.rs
@@ -15,11 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::basic::Encoding;
+use bytes::Bytes;
+use half::f16;
+
+use crate::basic::{Encoding, LogicalType, Type};
 use crate::bloom_filter::Sbbf;
 use crate::column::writer::{
-    compare_greater, fallback_encoding, has_dictionary_support, is_nan, update_max,
-    update_min,
+    compare_greater, fallback_encoding, has_dictionary_support, is_nan, update_max, update_min,
 };
 use crate::data_type::private::ParquetValueType;
 use crate::data_type::DataType;
@@ -27,7 +29,6 @@ use crate::encodings::encoding::{get_encoder, DictEncoder, Encoder};
 use crate::errors::{ParquetError, Result};
 use crate::file::properties::{EnabledStatistics, WriterProperties};
 use crate::schema::types::{ColumnDescPtr, ColumnDescriptor};
-use crate::util::memory::ByteBufferPtr;
 
 /// A collection of [`ParquetValueType`] encoded by a [`ColumnValueEncoder`]
 pub trait ColumnValues {
@@ -50,14 +51,14 @@ impl<T: ParquetValueType> ColumnValues for [T] {
 
 /// The encoded data for a dictionary page
 pub struct DictionaryPage {
-    pub buf: ByteBufferPtr,
+    pub buf: Bytes,
     pub num_values: usize,
     pub is_sorted: bool,
 }
 
 /// The encoded values for a data page, with optional statistics
 pub struct DataPageValues<T> {
-    pub buf: ByteBufferPtr,
+    pub buf: Bytes,
     pub num_values: usize,
     pub encoding: Encoding,
     pub min_value: Option<T>,
@@ -168,9 +169,7 @@ impl<T: DataType> ColumnValueEncoder for ColumnValueEncoderImpl<T> {
         value_indices: Option<&[usize]>,
     ) -> Option<(Self::T, Self::T)> {
         match value_indices {
-            Some(indices) => {
-                get_min_max(&self.descr, indices.iter().map(|x| &values[*x]))
-            }
+            Some(indices) => get_min_max(&self.descr, indices.iter().map(|x| &values[*x])),
             None => get_min_max(&self.descr, values.iter()),
         }
     }
@@ -293,7 +292,7 @@ where
 {
     let first = loop {
         let next = iter.next()?;
-        if !is_nan(next) {
+        if !is_nan(descr, next) {
             break next;
         }
     };
@@ -301,7 +300,7 @@ where
     let mut min = first;
     let mut max = first;
     for val in iter {
-        if is_nan(val) {
+        if is_nan(descr, val) {
             continue;
         }
         if compare_greater(descr, min, val) {
@@ -311,5 +310,36 @@ where
             max = val;
         }
     }
-    Some((min.clone(), max.clone()))
+
+    // Float/Double statistics have special case for zero.
+    //
+    // If computed min is zero, whether negative or positive,
+    // the spec states that the min should be written as -0.0
+    // (negative zero)
+    //
+    // For max, it has similar logic but will be written as 0.0
+    // (positive zero)
+    let min = replace_zero(min, descr, -0.0);
+    let max = replace_zero(max, descr, 0.0);
+
+    Some((min, max))
+}
+
+#[inline]
+fn replace_zero<T: ParquetValueType>(val: &T, descr: &ColumnDescriptor, replace: f32) -> T {
+    match T::PHYSICAL_TYPE {
+        Type::FLOAT if f32::from_le_bytes(val.as_bytes().try_into().unwrap()) == 0.0 => {
+            T::try_from_le_slice(&f32::to_le_bytes(replace)).unwrap()
+        }
+        Type::DOUBLE if f64::from_le_bytes(val.as_bytes().try_into().unwrap()) == 0.0 => {
+            T::try_from_le_slice(&f64::to_le_bytes(replace as f64)).unwrap()
+        }
+        Type::FIXED_LEN_BYTE_ARRAY
+            if descr.logical_type() == Some(LogicalType::Float16)
+                && f16::from_le_bytes(val.as_bytes().try_into().unwrap()) == f16::NEG_ZERO =>
+        {
+            T::try_from_le_slice(&f16::to_le_bytes(f16::from_f32(replace))).unwrap()
+        }
+        _ => val.clone(),
+    }
 }
diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs
index 8c1c55409988..14b8655091e4 100644
--- a/parquet/src/column/writer/mod.rs
+++ b/parquet/src/column/writer/mod.rs
@@ -17,16 +17,17 @@
 
 //! Contains column writer API.
 
+use bytes::Bytes;
+use half::f16;
+
 use crate::bloom_filter::Sbbf;
-use crate::format::{ColumnIndex, OffsetIndex};
+use crate::format::{BoundaryOrder, ColumnIndex, OffsetIndex};
 use std::collections::{BTreeSet, VecDeque};
 use std::str;
 
 use crate::basic::{Compression, ConvertedType, Encoding, LogicalType, PageType, Type};
 use crate::column::page::{CompressedPage, Page, PageWriteSpec, PageWriter};
-use crate::column::writer::encoder::{
-    ColumnValueEncoder, ColumnValueEncoderImpl, ColumnValues,
-};
+use crate::column::writer::encoder::{ColumnValueEncoder, ColumnValueEncoderImpl, ColumnValues};
 use crate::compression::{create_codec, Codec, CodecOptionsBuilder};
 use crate::data_type::private::ParquetValueType;
 use crate::data_type::*;
@@ -40,7 +41,6 @@ use crate::file::{
     properties::{WriterProperties, WriterPropertiesPtr, WriterVersion},
 };
 use crate::schema::types::{ColumnDescPtr, ColumnDescriptor};
-use crate::util::memory::ByteBufferPtr;
 
 pub(crate) mod encoder;
 
@@ -96,41 +96,27 @@ pub fn get_column_writer<'a>(
     page_writer: Box<dyn PageWriter + 'a>,
 ) -> ColumnWriter<'a> {
     match descr.physical_type() {
-        Type::BOOLEAN => ColumnWriter::BoolColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::INT32 => ColumnWriter::Int32ColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::INT64 => ColumnWriter::Int64ColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::INT96 => ColumnWriter::Int96ColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::FLOAT => ColumnWriter::FloatColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::DOUBLE => ColumnWriter::DoubleColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
-        Type::BYTE_ARRAY => ColumnWriter::ByteArrayColumnWriter(ColumnWriterImpl::new(
-            descr,
-            props,
-            page_writer,
-        )),
+        Type::BOOLEAN => {
+            ColumnWriter::BoolColumnWriter(ColumnWriterImpl::new(descr, props, page_writer))
+        }
+        Type::INT32 => {
+            ColumnWriter::Int32ColumnWriter(ColumnWriterImpl::new(descr, props, page_writer))
+        }
+        Type::INT64 => {
+            ColumnWriter::Int64ColumnWriter(ColumnWriterImpl::new(descr, props, page_writer))
+        }
+        Type::INT96 => {
+            ColumnWriter::Int96ColumnWriter(ColumnWriterImpl::new(descr, props, page_writer))
+        }
+        Type::FLOAT => {
+            ColumnWriter::FloatColumnWriter(ColumnWriterImpl::new(descr, props, page_writer))
+        }
+        Type::DOUBLE => {
+            ColumnWriter::DoubleColumnWriter(ColumnWriterImpl::new(descr, props, page_writer))
+        }
+        Type::BYTE_ARRAY => {
+            ColumnWriter::ByteArrayColumnWriter(ColumnWriterImpl::new(descr, props, page_writer))
+        }
         Type::FIXED_LEN_BYTE_ARRAY => ColumnWriter::FixedLenByteArrayColumnWriter(
             ColumnWriterImpl::new(descr, props, page_writer),
         ),
@@ -141,9 +127,7 @@ pub fn get_column_writer<'a>(
 /// non-generic type to a generic column writer type `ColumnWriterImpl`.
 ///
 /// Panics if actual enum value for `col_writer` does not match the type `T`.
-pub fn get_typed_column_writer<T: DataType>(
-    col_writer: ColumnWriter,
-) -> ColumnWriterImpl<T> {
+pub fn get_typed_column_writer<T: DataType>(col_writer: ColumnWriter) -> ColumnWriterImpl<T> {
     T::get_column_writer(col_writer).unwrap_or_else(|| {
         panic!(
             "Failed to convert column writer into a typed column writer for `{}` type",
@@ -244,6 +228,13 @@ pub struct GenericColumnWriter<'a, E: ColumnValueEncoder> {
     // column index and offset index
     column_index_builder: ColumnIndexBuilder,
     offset_index_builder: OffsetIndexBuilder,
+
+    // Below fields used to incrementally check boundary order across data pages.
+    // We assume they are ascending/descending until proven wrong.
+    data_page_boundary_ascending: bool,
+    data_page_boundary_descending: bool,
+    /// (min, max)
+    last_non_null_data_page_min_max: Option<(E::T, E::T)>,
 }
 
 impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
@@ -295,6 +286,9 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
             column_index_builder: ColumnIndexBuilder::new(),
             offset_index_builder: OffsetIndexBuilder::new(),
             encodings,
+            data_page_boundary_ascending: true,
+            data_page_boundary_descending: true,
+            last_non_null_data_page_min_max: None,
         }
     }
 
@@ -341,33 +335,16 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
         if self.statistics_enabled == EnabledStatistics::Chunk {
             match (min, max) {
                 (Some(min), Some(max)) => {
-                    update_min(
-                        &self.descr,
-                        min,
-                        &mut self.column_metrics.min_column_value,
-                    );
-                    update_max(
-                        &self.descr,
-                        max,
-                        &mut self.column_metrics.max_column_value,
-                    );
+                    update_min(&self.descr, min, &mut self.column_metrics.min_column_value);
+                    update_max(&self.descr, max, &mut self.column_metrics.max_column_value);
                 }
                 (None, Some(_)) | (Some(_), None) => {
                     panic!("min/max should be both set or both None")
                 }
                 (None, None) => {
-                    if let Some((min, max)) = self.encoder.min_max(values, value_indices)
-                    {
-                        update_min(
-                            &self.descr,
-                            &min,
-                            &mut self.column_metrics.min_column_value,
-                        );
-                        update_max(
-                            &self.descr,
-                            &max,
-                            &mut self.column_metrics.max_column_value,
-                        );
+                    if let Some((min, max)) = self.encoder.min_max(values, value_indices) {
+                        update_min(&self.descr, &min, &mut self.column_metrics.min_column_value);
+                        update_max(&self.descr, &max, &mut self.column_metrics.max_column_value);
                     }
                 }
             };
@@ -500,6 +477,18 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
         let metadata = self.write_column_metadata()?;
         self.page_writer.close()?;
 
+        let boundary_order = match (
+            self.data_page_boundary_ascending,
+            self.data_page_boundary_descending,
+        ) {
+            // If the lists are composed of equal elements then will be marked as ascending
+            // (Also the case if all pages are null pages)
+            (true, _) => BoundaryOrder::ASCENDING,
+            (false, true) => BoundaryOrder::DESCENDING,
+            (false, false) => BoundaryOrder::UNORDERED,
+        };
+        self.column_index_builder.set_boundary_order(boundary_order);
+
         let column_index = self
             .column_index_builder
             .valid()
@@ -626,10 +615,8 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
             return false;
         }
 
-        self.page_metrics.num_buffered_rows as usize
-            >= self.props.data_page_row_count_limit()
-            || self.encoder.estimated_data_page_size()
-                >= self.props.data_page_size_limit()
+        self.page_metrics.num_buffered_rows as usize >= self.props.data_page_row_count_limit()
+            || self.encoder.estimated_data_page_size() >= self.props.data_page_size_limit()
     }
 
     /// Performs dictionary fallback.
@@ -645,10 +632,10 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
     }
 
     /// Update the column index and offset index when adding the data page
-    fn update_column_offset_index(&mut self, page_statistics: Option<&Statistics>) {
+    fn update_column_offset_index(&mut self, page_statistics: Option<&ValueStatistics<E::T>>) {
         // update the column index
-        let null_page = (self.page_metrics.num_buffered_rows as u64)
-            == self.page_metrics.num_page_nulls;
+        let null_page =
+            (self.page_metrics.num_buffered_rows as u64) == self.page_metrics.num_page_nulls;
         // a page contains only null values,
         // and writers have to set the corresponding entries in min_values and max_values to byte[0]
         if null_page && self.column_index_builder.valid() {
@@ -666,13 +653,45 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
                     self.column_index_builder.to_invalid();
                 }
                 Some(stat) => {
+                    // Check if min/max are still ascending/descending across pages
+                    let new_min = stat.min();
+                    let new_max = stat.max();
+                    if let Some((last_min, last_max)) = &self.last_non_null_data_page_min_max {
+                        if self.data_page_boundary_ascending {
+                            // If last min/max are greater than new min/max then not ascending anymore
+                            let not_ascending = compare_greater(&self.descr, last_min, new_min)
+                                || compare_greater(&self.descr, last_max, new_max);
+                            if not_ascending {
+                                self.data_page_boundary_ascending = false;
+                            }
+                        }
+
+                        if self.data_page_boundary_descending {
+                            // If new min/max are greater than last min/max then not descending anymore
+                            let not_descending = compare_greater(&self.descr, new_min, last_min)
+                                || compare_greater(&self.descr, new_max, last_max);
+                            if not_descending {
+                                self.data_page_boundary_descending = false;
+                            }
+                        }
+                    }
+                    self.last_non_null_data_page_min_max = Some((new_min.clone(), new_max.clone()));
+
                     // We only truncate if the data is represented as binary
                     match self.descr.physical_type() {
                         Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => {
                             self.column_index_builder.append(
                                 null_page,
-                                self.truncate_min_value(stat.min_bytes()),
-                                self.truncate_max_value(stat.max_bytes()),
+                                self.truncate_min_value(
+                                    self.props.column_index_truncate_length(),
+                                    stat.min_bytes(),
+                                )
+                                .0,
+                                self.truncate_max_value(
+                                    self.props.column_index_truncate_length(),
+                                    stat.max_bytes(),
+                                )
+                                .0,
                                 self.page_metrics.num_page_nulls as i64,
                             );
                         }
@@ -693,26 +712,26 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
             .append_row_count(self.page_metrics.num_buffered_rows as i64);
     }
 
-    fn truncate_min_value(&self, data: &[u8]) -> Vec<u8> {
-        self.props
-            .column_index_truncate_length()
+    fn truncate_min_value(&self, truncation_length: Option<usize>, data: &[u8]) -> (Vec<u8>, bool) {
+        truncation_length
             .filter(|l| data.len() > *l)
             .and_then(|l| match str::from_utf8(data) {
                 Ok(str_data) => truncate_utf8(str_data, l),
                 Err(_) => Some(data[..l].to_vec()),
             })
-            .unwrap_or_else(|| data.to_vec())
+            .map(|truncated| (truncated, true))
+            .unwrap_or_else(|| (data.to_vec(), false))
     }
 
-    fn truncate_max_value(&self, data: &[u8]) -> Vec<u8> {
-        self.props
-            .column_index_truncate_length()
+    fn truncate_max_value(&self, truncation_length: Option<usize>, data: &[u8]) -> (Vec<u8>, bool) {
+        truncation_length
             .filter(|l| data.len() > *l)
             .and_then(|l| match str::from_utf8(data) {
                 Ok(str_data) => truncate_utf8(str_data, l).and_then(increment_utf8),
                 Err(_) => increment(data[..l].to_vec()),
             })
-            .unwrap_or_else(|| data.to_vec())
+            .map(|truncated| (truncated, true))
+            .unwrap_or_else(|| (data.to_vec(), false))
     }
 
     /// Adds data page.
@@ -730,7 +749,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
             (Some(min), Some(max)) => {
                 update_min(&self.descr, &min, &mut self.column_metrics.min_column_value);
                 update_max(&self.descr, &max, &mut self.column_metrics.max_column_value);
-                Some(Statistics::new(
+                Some(ValueStatistics::new(
                     Some(min),
                     Some(max),
                     None,
@@ -743,6 +762,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
 
         // update column and offset index
         self.update_column_offset_index(page_statistics.as_ref());
+        let page_statistics = page_statistics.map(Statistics::from);
 
         let compressed_page = match self.props.writer_version() {
             WriterVersion::PARQUET_1_0 => {
@@ -768,7 +788,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
                     );
                 }
 
-                buffer.extend_from_slice(values_data.buf.data());
+                buffer.extend_from_slice(&values_data.buf);
                 let uncompressed_size = buffer.len();
 
                 if let Some(ref mut cmpr) = self.compressor {
@@ -778,7 +798,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
                 }
 
                 let data_page = Page::DataPage {
-                    buf: ByteBufferPtr::new(buffer),
+                    buf: buffer.into(),
                     num_values: self.page_metrics.num_buffered_values,
                     encoding: values_data.encoding,
                     def_level_encoding: Encoding::RLE,
@@ -794,15 +814,13 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
                 let mut buffer = vec![];
 
                 if max_rep_level > 0 {
-                    let levels =
-                        self.encode_levels_v2(&self.rep_levels_sink[..], max_rep_level);
+                    let levels = self.encode_levels_v2(&self.rep_levels_sink[..], max_rep_level);
                     rep_levels_byte_len = levels.len();
                     buffer.extend_from_slice(&levels[..]);
                 }
 
                 if max_def_level > 0 {
-                    let levels =
-                        self.encode_levels_v2(&self.def_levels_sink[..], max_def_level);
+                    let levels = self.encode_levels_v2(&self.def_levels_sink[..], max_def_level);
                     def_levels_byte_len = levels.len();
                     buffer.extend_from_slice(&levels[..]);
                 }
@@ -813,13 +831,13 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
                 // Data Page v2 compresses values only.
                 match self.compressor {
                     Some(ref mut cmpr) => {
-                        cmpr.compress(values_data.buf.data(), &mut buffer)?;
+                        cmpr.compress(&values_data.buf, &mut buffer)?;
                     }
-                    None => buffer.extend_from_slice(values_data.buf.data()),
+                    None => buffer.extend_from_slice(&values_data.buf),
                 }
 
                 let data_page = Page::DataPageV2 {
-                    buf: ByteBufferPtr::new(buffer),
+                    buf: buffer.into(),
                     num_values: self.page_metrics.num_buffered_values,
                     encoding: values_data.encoding,
                     num_nulls: self.page_metrics.num_page_nulls as u32,
@@ -842,8 +860,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
         }
 
         // Update total number of rows.
-        self.column_metrics.total_rows_written +=
-            self.page_metrics.num_buffered_rows as u64;
+        self.column_metrics.total_rows_written += self.page_metrics.num_buffered_rows as u64;
 
         // Reset state.
         self.rep_levels_sink.clear();
@@ -874,8 +891,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
         let total_compressed_size = self.column_metrics.total_compressed_size as i64;
         let total_uncompressed_size = self.column_metrics.total_uncompressed_size as i64;
         let num_values = self.column_metrics.total_num_values as i64;
-        let dict_page_offset =
-            self.column_metrics.dictionary_page_offset.map(|v| v as i64);
+        let dict_page_offset = self.column_metrics.dictionary_page_offset.map(|v| v as i64);
         // If data page offset is not set, then no pages have been written
         let data_page_offset = self.column_metrics.data_page_offset.unwrap_or(0) as i64;
 
@@ -895,20 +911,64 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
             .set_dictionary_page_offset(dict_page_offset);
 
         if self.statistics_enabled != EnabledStatistics::None {
+            let backwards_compatible_min_max = self.descr.sort_order().is_signed();
+
             let statistics = ValueStatistics::<E::T>::new(
                 self.column_metrics.min_column_value.clone(),
                 self.column_metrics.max_column_value.clone(),
                 self.column_metrics.column_distinct_count,
                 self.column_metrics.num_column_nulls,
                 false,
-            );
+            )
+            .with_backwards_compatible_min_max(backwards_compatible_min_max)
+            .into();
+
+            let statistics = match statistics {
+                Statistics::ByteArray(stats) if stats.has_min_max_set() => {
+                    let (min, did_truncate_min) = self.truncate_min_value(
+                        self.props.statistics_truncate_length(),
+                        stats.min_bytes(),
+                    );
+                    let (max, did_truncate_max) = self.truncate_max_value(
+                        self.props.statistics_truncate_length(),
+                        stats.max_bytes(),
+                    );
+                    Statistics::ByteArray(
+                        ValueStatistics::new(
+                            Some(min.into()),
+                            Some(max.into()),
+                            stats.distinct_count(),
+                            stats.null_count(),
+                            backwards_compatible_min_max,
+                        )
+                        .with_max_is_exact(!did_truncate_max)
+                        .with_min_is_exact(!did_truncate_min),
+                    )
+                }
+                Statistics::FixedLenByteArray(stats) if stats.has_min_max_set() => {
+                    let (min, did_truncate_min) = self.truncate_min_value(
+                        self.props.statistics_truncate_length(),
+                        stats.min_bytes(),
+                    );
+                    let (max, did_truncate_max) = self.truncate_max_value(
+                        self.props.statistics_truncate_length(),
+                        stats.max_bytes(),
+                    );
+                    Statistics::FixedLenByteArray(
+                        ValueStatistics::new(
+                            Some(min.into()),
+                            Some(max.into()),
+                            stats.distinct_count(),
+                            stats.null_count(),
+                            backwards_compatible_min_max,
+                        )
+                        .with_max_is_exact(!did_truncate_max)
+                        .with_min_is_exact(!did_truncate_min),
+                    )
+                }
+                stats => stats,
+            };
 
-            // Some common readers only support the deprecated statistics
-            // format so we also write them out if possible
-            // See https://github.com/apache/arrow-rs/issues/799
-            let statistics = statistics
-                .with_backwards_compatible_min_max(self.descr.sort_order().is_signed())
-                .into();
             builder = builder.set_statistics(statistics);
         }
 
@@ -920,12 +980,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
 
     /// Encodes definition or repetition levels for Data Page v1.
     #[inline]
-    fn encode_levels_v1(
-        &self,
-        encoding: Encoding,
-        levels: &[i16],
-        max_level: i16,
-    ) -> Vec<u8> {
+    fn encode_levels_v1(&self, encoding: Encoding, levels: &[i16], max_level: i16) -> Vec<u8> {
         let mut encoder = LevelEncoder::v1(encoding, max_level, levels.len());
         encoder.put(levels);
         encoder.consume()
@@ -947,10 +1002,8 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
         let page_spec = self.page_writer.write_page(page)?;
         // update offset index
         // compressed_size = header_size + compressed_data_size
-        self.offset_index_builder.append_offset_and_size(
-            page_spec.offset as i64,
-            page_spec.compressed_size as i32,
-        );
+        self.offset_index_builder
+            .append_offset_and_size(page_spec.offset as i64, page_spec.compressed_size as i32);
         self.update_metrics_for_page(page_spec);
         Ok(())
     }
@@ -968,8 +1021,8 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
 
             if let Some(ref mut cmpr) = self.compressor {
                 let mut output_buf = Vec::with_capacity(uncompressed_size);
-                cmpr.compress(page.buf.data(), &mut output_buf)?;
-                page.buf = ByteBufferPtr::new(output_buf);
+                cmpr.compress(&page.buf, &mut output_buf)?;
+                page.buf = Bytes::from(output_buf);
             }
 
             let dict_page = Page::DictionaryPage {
@@ -1014,27 +1067,24 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
     }
 }
 
-fn update_min<T: ParquetValueType>(
-    descr: &ColumnDescriptor,
-    val: &T,
-    min: &mut Option<T>,
-) {
-    update_stat::<T, _>(val, min, |cur| compare_greater(descr, cur, val))
+fn update_min<T: ParquetValueType>(descr: &ColumnDescriptor, val: &T, min: &mut Option<T>) {
+    update_stat::<T, _>(descr, val, min, |cur| compare_greater(descr, cur, val))
 }
 
-fn update_max<T: ParquetValueType>(
-    descr: &ColumnDescriptor,
-    val: &T,
-    max: &mut Option<T>,
-) {
-    update_stat::<T, _>(val, max, |cur| compare_greater(descr, val, cur))
+fn update_max<T: ParquetValueType>(descr: &ColumnDescriptor, val: &T, max: &mut Option<T>) {
+    update_stat::<T, _>(descr, val, max, |cur| compare_greater(descr, val, cur))
 }
 
 #[inline]
 #[allow(clippy::eq_op)]
-fn is_nan<T: ParquetValueType>(val: &T) -> bool {
+fn is_nan<T: ParquetValueType>(descr: &ColumnDescriptor, val: &T) -> bool {
     match T::PHYSICAL_TYPE {
         Type::FLOAT | Type::DOUBLE => val != val,
+        Type::FIXED_LEN_BYTE_ARRAY if descr.logical_type() == Some(LogicalType::Float16) => {
+            let val = val.as_bytes();
+            let val = f16::from_le_bytes([val[0], val[1]]);
+            val.is_nan()
+        }
         _ => false,
     }
 }
@@ -1044,11 +1094,15 @@ fn is_nan<T: ParquetValueType>(val: &T) -> bool {
 /// If `cur` is `None`, sets `cur` to `Some(val)`, otherwise calls `should_update` with
 /// the value of `cur`, and updates `cur` to `Some(val)` if it returns `true`
 
-fn update_stat<T: ParquetValueType, F>(val: &T, cur: &mut Option<T>, should_update: F)
-where
+fn update_stat<T: ParquetValueType, F>(
+    descr: &ColumnDescriptor,
+    val: &T,
+    cur: &mut Option<T>,
+    should_update: F,
+) where
     F: Fn(&T) -> bool,
 {
-    if is_nan(val) {
+    if is_nan(descr, val) {
         return;
     }
 
@@ -1094,6 +1148,14 @@ fn compare_greater<T: ParquetValueType>(descr: &ColumnDescriptor, a: &T, b: &T)
         };
     };
 
+    if let Some(LogicalType::Float16) = descr.logical_type() {
+        let a = a.as_bytes();
+        let a = f16::from_le_bytes([a[0], a[1]]);
+        let b = b.as_bytes();
+        let b = f16::from_le_bytes([b[0], b[1]]);
+        return a > b;
+    }
+
     a > b
 }
 
@@ -1117,9 +1179,7 @@ fn fallback_encoding(kind: Type, props: &WriterProperties) -> Encoding {
         (Type::INT32, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BINARY_PACKED,
         (Type::INT64, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BINARY_PACKED,
         (Type::BYTE_ARRAY, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BYTE_ARRAY,
-        (Type::FIXED_LEN_BYTE_ARRAY, WriterVersion::PARQUET_2_0) => {
-            Encoding::DELTA_BYTE_ARRAY
-        }
+        (Type::FIXED_LEN_BYTE_ARRAY, WriterVersion::PARQUET_2_0) => Encoding::DELTA_BYTE_ARRAY,
         _ => Encoding::PLAIN,
     }
 }
@@ -1152,9 +1212,7 @@ fn compare_greater_byte_array_decimals(a: &[u8], b: &[u8]) -> bool {
     // for equal length bytes arrays that have different first bytes.
     // The equality requirement is necessary for sign extension cases.
     // 0xFF10 should be equal to 0x10 (due to big endian sign extension).
-    if (0x80 & first_a) != (0x80 & first_b)
-        || (a_length == b_length && first_a != first_b)
-    {
+    if (0x80 & first_a) != (0x80 & first_b) || (a_length == b_length && first_a != first_b) {
         return (first_a as i8) > (first_b as i8);
     }
 
@@ -1227,10 +1285,9 @@ fn increment_utf8(mut data: Vec<u8>) -> Option<Vec<u8>> {
 
 #[cfg(test)]
 mod tests {
-    use crate::{
-        file::properties::DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH, format::BoundaryOrder,
-    };
+    use crate::{file::properties::DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH, format::BoundaryOrder};
     use bytes::Bytes;
+    use half::f16;
     use rand::distributions::uniform::SampleUniform;
     use std::sync::Arc;
 
@@ -1657,26 +1714,25 @@ mod tests {
     fn test_column_writer_check_byte_array_min_max() {
         let page_writer = get_test_page_writer();
         let props = Default::default();
-        let mut writer =
-            get_test_decimals_column_writer::<ByteArrayType>(page_writer, 0, 0, props);
+        let mut writer = get_test_decimals_column_writer::<ByteArrayType>(page_writer, 0, 0, props);
         writer
             .write_batch(
                 &[
                     ByteArray::from(vec![
-                        255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 179u8,
-                        172u8, 19u8, 35u8, 231u8, 90u8, 0u8, 0u8,
+                        255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 179u8, 172u8, 19u8,
+                        35u8, 231u8, 90u8, 0u8, 0u8,
                     ]),
                     ByteArray::from(vec![
-                        255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 228u8,
-                        62u8, 146u8, 152u8, 177u8, 56u8, 0u8, 0u8,
+                        255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 228u8, 62u8, 146u8,
+                        152u8, 177u8, 56u8, 0u8, 0u8,
                     ]),
                     ByteArray::from(vec![
-                        0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8,
-                        0u8, 0u8, 0u8,
+                        0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8,
+                        0u8,
                     ]),
                     ByteArray::from(vec![
-                        0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 41u8, 162u8, 36u8, 26u8,
-                        246u8, 44u8, 0u8, 0u8,
+                        0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 41u8, 162u8, 36u8, 26u8, 246u8,
+                        44u8, 0u8, 0u8,
                     ]),
                 ],
                 None,
@@ -1690,15 +1746,15 @@ mod tests {
                 assert_eq!(
                     stats.min(),
                     &ByteArray::from(vec![
-                        255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 179u8,
-                        172u8, 19u8, 35u8, 231u8, 90u8, 0u8, 0u8,
+                        255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 255u8, 179u8, 172u8, 19u8,
+                        35u8, 231u8, 90u8, 0u8, 0u8,
                     ])
                 );
                 assert_eq!(
                     stats.max(),
                     &ByteArray::from(vec![
-                        0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 41u8, 162u8, 36u8, 26u8,
-                        246u8, 44u8, 0u8, 0u8,
+                        0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 41u8, 162u8, 36u8, 26u8, 246u8,
+                        44u8, 0u8, 0u8,
                     ])
                 );
             } else {
@@ -1713,9 +1769,12 @@ mod tests {
     fn test_column_writer_uint32_converted_type_min_max() {
         let page_writer = get_test_page_writer();
         let props = Default::default();
-        let mut writer = get_test_unsigned_int_given_as_converted_column_writer::<
-            Int32Type,
-        >(page_writer, 0, 0, props);
+        let mut writer = get_test_unsigned_int_given_as_converted_column_writer::<Int32Type>(
+            page_writer,
+            0,
+            0,
+            props,
+        );
         writer.write_batch(&[0, 1, 2, 3, 4, 5], None, None).unwrap();
         let metadata = writer.close().unwrap().metadata;
         if let Some(stats) = metadata.statistics() {
@@ -1790,14 +1849,7 @@ mod tests {
 
         writer.write_batch(&[1, 2, 3, 4], None, None).unwrap();
         writer
-            .write_batch_with_statistics(
-                &[5, 6, 7],
-                None,
-                None,
-                Some(&5),
-                Some(&7),
-                Some(3),
-            )
+            .write_batch_with_statistics(&[5, 6, 7], None, None, Some(&5), Some(&7), Some(3))
             .unwrap();
 
         let r = writer.close().unwrap();
@@ -2144,6 +2196,135 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_column_writer_check_float16_min_max() {
+        let input = [
+            -f16::ONE,
+            f16::from_f32(3.0),
+            -f16::from_f32(2.0),
+            f16::from_f32(2.0),
+        ]
+        .into_iter()
+        .map(|s| ByteArray::from(s).into())
+        .collect::<Vec<_>>();
+
+        let stats = float16_statistics_roundtrip(&input);
+        assert!(stats.has_min_max_set());
+        assert!(stats.is_min_max_backwards_compatible());
+        assert_eq!(stats.min(), &ByteArray::from(-f16::from_f32(2.0)));
+        assert_eq!(stats.max(), &ByteArray::from(f16::from_f32(3.0)));
+    }
+
+    #[test]
+    fn test_column_writer_check_float16_nan_middle() {
+        let input = [f16::ONE, f16::NAN, f16::ONE + f16::ONE]
+            .into_iter()
+            .map(|s| ByteArray::from(s).into())
+            .collect::<Vec<_>>();
+
+        let stats = float16_statistics_roundtrip(&input);
+        assert!(stats.has_min_max_set());
+        assert!(stats.is_min_max_backwards_compatible());
+        assert_eq!(stats.min(), &ByteArray::from(f16::ONE));
+        assert_eq!(stats.max(), &ByteArray::from(f16::ONE + f16::ONE));
+    }
+
+    #[test]
+    fn test_float16_statistics_nan_middle() {
+        let input = [f16::ONE, f16::NAN, f16::ONE + f16::ONE]
+            .into_iter()
+            .map(|s| ByteArray::from(s).into())
+            .collect::<Vec<_>>();
+
+        let stats = float16_statistics_roundtrip(&input);
+        assert!(stats.has_min_max_set());
+        assert!(stats.is_min_max_backwards_compatible());
+        assert_eq!(stats.min(), &ByteArray::from(f16::ONE));
+        assert_eq!(stats.max(), &ByteArray::from(f16::ONE + f16::ONE));
+    }
+
+    #[test]
+    fn test_float16_statistics_nan_start() {
+        let input = [f16::NAN, f16::ONE, f16::ONE + f16::ONE]
+            .into_iter()
+            .map(|s| ByteArray::from(s).into())
+            .collect::<Vec<_>>();
+
+        let stats = float16_statistics_roundtrip(&input);
+        assert!(stats.has_min_max_set());
+        assert!(stats.is_min_max_backwards_compatible());
+        assert_eq!(stats.min(), &ByteArray::from(f16::ONE));
+        assert_eq!(stats.max(), &ByteArray::from(f16::ONE + f16::ONE));
+    }
+
+    #[test]
+    fn test_float16_statistics_nan_only() {
+        let input = [f16::NAN, f16::NAN]
+            .into_iter()
+            .map(|s| ByteArray::from(s).into())
+            .collect::<Vec<_>>();
+
+        let stats = float16_statistics_roundtrip(&input);
+        assert!(!stats.has_min_max_set());
+        assert!(stats.is_min_max_backwards_compatible());
+    }
+
+    #[test]
+    fn test_float16_statistics_zero_only() {
+        let input = [f16::ZERO]
+            .into_iter()
+            .map(|s| ByteArray::from(s).into())
+            .collect::<Vec<_>>();
+
+        let stats = float16_statistics_roundtrip(&input);
+        assert!(stats.has_min_max_set());
+        assert!(stats.is_min_max_backwards_compatible());
+        assert_eq!(stats.min(), &ByteArray::from(f16::NEG_ZERO));
+        assert_eq!(stats.max(), &ByteArray::from(f16::ZERO));
+    }
+
+    #[test]
+    fn test_float16_statistics_neg_zero_only() {
+        let input = [f16::NEG_ZERO]
+            .into_iter()
+            .map(|s| ByteArray::from(s).into())
+            .collect::<Vec<_>>();
+
+        let stats = float16_statistics_roundtrip(&input);
+        assert!(stats.has_min_max_set());
+        assert!(stats.is_min_max_backwards_compatible());
+        assert_eq!(stats.min(), &ByteArray::from(f16::NEG_ZERO));
+        assert_eq!(stats.max(), &ByteArray::from(f16::ZERO));
+    }
+
+    #[test]
+    fn test_float16_statistics_zero_min() {
+        let input = [f16::ZERO, f16::ONE, f16::NAN, f16::PI]
+            .into_iter()
+            .map(|s| ByteArray::from(s).into())
+            .collect::<Vec<_>>();
+
+        let stats = float16_statistics_roundtrip(&input);
+        assert!(stats.has_min_max_set());
+        assert!(stats.is_min_max_backwards_compatible());
+        assert_eq!(stats.min(), &ByteArray::from(f16::NEG_ZERO));
+        assert_eq!(stats.max(), &ByteArray::from(f16::PI));
+    }
+
+    #[test]
+    fn test_float16_statistics_neg_zero_max() {
+        let input = [f16::NEG_ZERO, f16::NEG_ONE, f16::NAN, -f16::PI]
+            .into_iter()
+            .map(|s| ByteArray::from(s).into())
+            .collect::<Vec<_>>();
+
+        let stats = float16_statistics_roundtrip(&input);
+        assert!(stats.has_min_max_set());
+        assert!(stats.is_min_max_backwards_compatible());
+        assert_eq!(stats.min(), &ByteArray::from(-f16::PI));
+        assert_eq!(stats.max(), &ByteArray::from(f16::ZERO));
+    }
+
     #[test]
     fn test_float_statistics_nan_middle() {
         let stats = statistics_roundtrip::<FloatType>(&[1.0, f32::NAN, 2.0]);
@@ -2178,6 +2359,64 @@ mod tests {
         assert!(matches!(stats, Statistics::Float(_)));
     }
 
+    #[test]
+    fn test_float_statistics_zero_only() {
+        let stats = statistics_roundtrip::<FloatType>(&[0.0]);
+        assert!(stats.has_min_max_set());
+        assert!(stats.is_min_max_backwards_compatible());
+        if let Statistics::Float(stats) = stats {
+            assert_eq!(stats.min(), &-0.0);
+            assert!(stats.min().is_sign_negative());
+            assert_eq!(stats.max(), &0.0);
+            assert!(stats.max().is_sign_positive());
+        } else {
+            panic!("expecting Statistics::Float");
+        }
+    }
+
+    #[test]
+    fn test_float_statistics_neg_zero_only() {
+        let stats = statistics_roundtrip::<FloatType>(&[-0.0]);
+        assert!(stats.has_min_max_set());
+        assert!(stats.is_min_max_backwards_compatible());
+        if let Statistics::Float(stats) = stats {
+            assert_eq!(stats.min(), &-0.0);
+            assert!(stats.min().is_sign_negative());
+            assert_eq!(stats.max(), &0.0);
+            assert!(stats.max().is_sign_positive());
+        } else {
+            panic!("expecting Statistics::Float");
+        }
+    }
+
+    #[test]
+    fn test_float_statistics_zero_min() {
+        let stats = statistics_roundtrip::<FloatType>(&[0.0, 1.0, f32::NAN, 2.0]);
+        assert!(stats.has_min_max_set());
+        assert!(stats.is_min_max_backwards_compatible());
+        if let Statistics::Float(stats) = stats {
+            assert_eq!(stats.min(), &-0.0);
+            assert!(stats.min().is_sign_negative());
+            assert_eq!(stats.max(), &2.0);
+        } else {
+            panic!("expecting Statistics::Float");
+        }
+    }
+
+    #[test]
+    fn test_float_statistics_neg_zero_max() {
+        let stats = statistics_roundtrip::<FloatType>(&[-0.0, -1.0, f32::NAN, -2.0]);
+        assert!(stats.has_min_max_set());
+        assert!(stats.is_min_max_backwards_compatible());
+        if let Statistics::Float(stats) = stats {
+            assert_eq!(stats.min(), &-2.0);
+            assert_eq!(stats.max(), &0.0);
+            assert!(stats.max().is_sign_positive());
+        } else {
+            panic!("expecting Statistics::Float");
+        }
+    }
+
     #[test]
     fn test_double_statistics_nan_middle() {
         let stats = statistics_roundtrip::<DoubleType>(&[1.0, f64::NAN, 2.0]);
@@ -2187,7 +2426,7 @@ mod tests {
             assert_eq!(stats.min(), &1.0);
             assert_eq!(stats.max(), &2.0);
         } else {
-            panic!("expecting Statistics::Float");
+            panic!("expecting Statistics::Double");
         }
     }
 
@@ -2200,7 +2439,7 @@ mod tests {
             assert_eq!(stats.min(), &1.0);
             assert_eq!(stats.max(), &2.0);
         } else {
-            panic!("expecting Statistics::Float");
+            panic!("expecting Statistics::Double");
         }
     }
 
@@ -2212,6 +2451,64 @@ mod tests {
         assert!(stats.is_min_max_backwards_compatible());
     }
 
+    #[test]
+    fn test_double_statistics_zero_only() {
+        let stats = statistics_roundtrip::<DoubleType>(&[0.0]);
+        assert!(stats.has_min_max_set());
+        assert!(stats.is_min_max_backwards_compatible());
+        if let Statistics::Double(stats) = stats {
+            assert_eq!(stats.min(), &-0.0);
+            assert!(stats.min().is_sign_negative());
+            assert_eq!(stats.max(), &0.0);
+            assert!(stats.max().is_sign_positive());
+        } else {
+            panic!("expecting Statistics::Double");
+        }
+    }
+
+    #[test]
+    fn test_double_statistics_neg_zero_only() {
+        let stats = statistics_roundtrip::<DoubleType>(&[-0.0]);
+        assert!(stats.has_min_max_set());
+        assert!(stats.is_min_max_backwards_compatible());
+        if let Statistics::Double(stats) = stats {
+            assert_eq!(stats.min(), &-0.0);
+            assert!(stats.min().is_sign_negative());
+            assert_eq!(stats.max(), &0.0);
+            assert!(stats.max().is_sign_positive());
+        } else {
+            panic!("expecting Statistics::Double");
+        }
+    }
+
+    #[test]
+    fn test_double_statistics_zero_min() {
+        let stats = statistics_roundtrip::<DoubleType>(&[0.0, 1.0, f64::NAN, 2.0]);
+        assert!(stats.has_min_max_set());
+        assert!(stats.is_min_max_backwards_compatible());
+        if let Statistics::Double(stats) = stats {
+            assert_eq!(stats.min(), &-0.0);
+            assert!(stats.min().is_sign_negative());
+            assert_eq!(stats.max(), &2.0);
+        } else {
+            panic!("expecting Statistics::Double");
+        }
+    }
+
+    #[test]
+    fn test_double_statistics_neg_zero_max() {
+        let stats = statistics_roundtrip::<DoubleType>(&[-0.0, -1.0, f64::NAN, -2.0]);
+        assert!(stats.has_min_max_set());
+        assert!(stats.is_min_max_backwards_compatible());
+        if let Statistics::Double(stats) = stats {
+            assert_eq!(stats.min(), &-2.0);
+            assert_eq!(stats.max(), &0.0);
+            assert!(stats.max().is_sign_positive());
+        } else {
+            panic!("expecting Statistics::Double");
+        }
+    }
+
     #[test]
     fn test_compare_greater_byte_array_decimals() {
         assert!(!compare_greater_byte_array_decimals(&[], &[],),);
@@ -2297,15 +2594,14 @@ mod tests {
         // and check the offset index and column index
         let page_writer = get_test_page_writer();
         let props = Default::default();
-        let mut writer =
-            get_test_column_writer::<FixedLenByteArrayType>(page_writer, 0, 0, props);
+        let mut writer = get_test_column_writer::<FixedLenByteArrayType>(page_writer, 0, 0, props);
 
         let mut data = vec![FixedLenByteArray::default(); 3];
         // This is the expected min value - "aaa..."
-        data[0].set_data(ByteBufferPtr::new(vec![97_u8; 200]));
+        data[0].set_data(Bytes::from(vec![97_u8; 200]));
         // This is the expected max value - "ZZZ..."
-        data[1].set_data(ByteBufferPtr::new(vec![112_u8; 200]));
-        data[2].set_data(ByteBufferPtr::new(vec![98_u8; 200]));
+        data[1].set_data(Bytes::from(vec![112_u8; 200]));
+        data[2].set_data(Bytes::from(vec![98_u8; 200]));
 
         writer.write_batch(&data, None, None).unwrap();
 
@@ -2320,7 +2616,7 @@ mod tests {
         // column index
         assert_eq!(1, column_index.null_pages.len());
         assert_eq!(1, offset_index.page_locations.len());
-        assert_eq!(BoundaryOrder::UNORDERED, column_index.boundary_order);
+        assert_eq!(BoundaryOrder::ASCENDING, column_index.boundary_order);
         assert!(!column_index.null_pages[0]);
         assert_eq!(0, column_index.null_counts.as_ref().unwrap()[0]);
 
@@ -2366,17 +2662,13 @@ mod tests {
         let page_writer = get_test_page_writer();
 
         // Truncate values at 1 byte
-        let builder =
-            WriterProperties::builder().set_column_index_truncate_length(Some(1));
+        let builder = WriterProperties::builder().set_column_index_truncate_length(Some(1));
         let props = Arc::new(builder.build());
-        let mut writer =
-            get_test_column_writer::<FixedLenByteArrayType>(page_writer, 0, 0, props);
+        let mut writer = get_test_column_writer::<FixedLenByteArrayType>(page_writer, 0, 0, props);
 
         let mut data = vec![FixedLenByteArray::default(); 1];
         // This is the expected min value
-        data[0].set_data(ByteBufferPtr::new(
-            String::from("Blart Versenwald III").into_bytes(),
-        ));
+        data[0].set_data(Bytes::from(String::from("Blart Versenwald III")));
 
         writer.write_batch(&data, None, None).unwrap();
 
@@ -2391,7 +2683,7 @@ mod tests {
         // column index
         assert_eq!(1, column_index.null_pages.len());
         assert_eq!(1, offset_index.page_locations.len());
-        assert_eq!(BoundaryOrder::UNORDERED, column_index.boundary_order);
+        assert_eq!(BoundaryOrder::ASCENDING, column_index.boundary_order);
         assert!(!column_index.null_pages[0]);
         assert_eq!(0, column_index.null_counts.as_ref().unwrap()[0]);
 
@@ -2419,6 +2711,148 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_statistics_truncating_byte_array() {
+        let page_writer = get_test_page_writer();
+
+        const TEST_TRUNCATE_LENGTH: usize = 1;
+
+        // Truncate values at 1 byte
+        let builder =
+            WriterProperties::builder().set_statistics_truncate_length(Some(TEST_TRUNCATE_LENGTH));
+        let props = Arc::new(builder.build());
+        let mut writer = get_test_column_writer::<ByteArrayType>(page_writer, 0, 0, props);
+
+        let mut data = vec![ByteArray::default(); 1];
+        // This is the expected min value
+        data[0].set_data(Bytes::from(String::from("Blart Versenwald III")));
+
+        writer.write_batch(&data, None, None).unwrap();
+
+        writer.flush_data_pages().unwrap();
+
+        let r = writer.close().unwrap();
+
+        assert_eq!(1, r.rows_written);
+
+        let stats = r.metadata.statistics().expect("statistics");
+        assert!(stats.has_min_max_set());
+        assert_eq!(stats.null_count(), 0);
+        assert_eq!(stats.distinct_count(), None);
+        if let Statistics::ByteArray(_stats) = stats {
+            let min_value = _stats.min();
+            let max_value = _stats.max();
+
+            assert!(!_stats.min_is_exact());
+            assert!(!_stats.max_is_exact());
+
+            assert_eq!(min_value.len(), TEST_TRUNCATE_LENGTH);
+            assert_eq!(max_value.len(), TEST_TRUNCATE_LENGTH);
+
+            assert_eq!("B".as_bytes(), min_value.as_bytes());
+            assert_eq!("C".as_bytes(), max_value.as_bytes());
+        } else {
+            panic!("expecting Statistics::ByteArray");
+        }
+    }
+
+    #[test]
+    fn test_statistics_truncating_fixed_len_byte_array() {
+        let page_writer = get_test_page_writer();
+
+        const TEST_TRUNCATE_LENGTH: usize = 1;
+
+        // Truncate values at 1 byte
+        let builder =
+            WriterProperties::builder().set_statistics_truncate_length(Some(TEST_TRUNCATE_LENGTH));
+        let props = Arc::new(builder.build());
+        let mut writer = get_test_column_writer::<FixedLenByteArrayType>(page_writer, 0, 0, props);
+
+        let mut data = vec![FixedLenByteArray::default(); 1];
+
+        const PSEUDO_DECIMAL_VALUE: i128 = 6541894651216648486512564456564654;
+        const PSEUDO_DECIMAL_BYTES: [u8; 16] = PSEUDO_DECIMAL_VALUE.to_be_bytes();
+
+        const EXPECTED_MIN: [u8; TEST_TRUNCATE_LENGTH] = [PSEUDO_DECIMAL_BYTES[0]]; // parquet specifies big-endian order for decimals
+        const EXPECTED_MAX: [u8; TEST_TRUNCATE_LENGTH] =
+            [PSEUDO_DECIMAL_BYTES[0].overflowing_add(1).0];
+
+        // This is the expected min value
+        data[0].set_data(Bytes::from(PSEUDO_DECIMAL_BYTES.as_slice()));
+
+        writer.write_batch(&data, None, None).unwrap();
+
+        writer.flush_data_pages().unwrap();
+
+        let r = writer.close().unwrap();
+
+        assert_eq!(1, r.rows_written);
+
+        let stats = r.metadata.statistics().expect("statistics");
+        assert!(stats.has_min_max_set());
+        assert_eq!(stats.null_count(), 0);
+        assert_eq!(stats.distinct_count(), None);
+        if let Statistics::FixedLenByteArray(_stats) = stats {
+            let min_value = _stats.min();
+            let max_value = _stats.max();
+
+            assert!(!_stats.min_is_exact());
+            assert!(!_stats.max_is_exact());
+
+            assert_eq!(min_value.len(), TEST_TRUNCATE_LENGTH);
+            assert_eq!(max_value.len(), TEST_TRUNCATE_LENGTH);
+
+            assert_eq!(EXPECTED_MIN.as_slice(), min_value.as_bytes());
+            assert_eq!(EXPECTED_MAX.as_slice(), max_value.as_bytes());
+
+            let reconstructed_min = i128::from_be_bytes([
+                min_value.as_bytes()[0],
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+            ]);
+
+            let reconstructed_max = i128::from_be_bytes([
+                max_value.as_bytes()[0],
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+            ]);
+
+            // check that the inner value is correctly bounded by the min/max
+            println!("min: {reconstructed_min} {PSEUDO_DECIMAL_VALUE}");
+            assert!(reconstructed_min <= PSEUDO_DECIMAL_VALUE);
+            println!("max {reconstructed_max} {PSEUDO_DECIMAL_VALUE}");
+            assert!(reconstructed_max >= PSEUDO_DECIMAL_VALUE);
+        } else {
+            panic!("expecting Statistics::FixedLenByteArray");
+        }
+    }
+
     #[test]
     fn test_send() {
         fn test<T: Send>() {}
@@ -2447,9 +2881,9 @@ mod tests {
 
         // Also show that BinaryArray level comparison works here
         let mut greater = ByteArray::new();
-        greater.set_data(ByteBufferPtr::new(v));
+        greater.set_data(Bytes::from(v));
         let mut original = ByteArray::new();
-        original.set_data(ByteBufferPtr::new("hello".as_bytes().to_vec()));
+        original.set_data(Bytes::from("hello".as_bytes().to_vec()));
         assert!(greater > original);
 
         // UTF8 string
@@ -2504,6 +2938,158 @@ mod tests {
         assert!(incremented.is_none())
     }
 
+    #[test]
+    fn test_boundary_order() -> Result<()> {
+        let descr = Arc::new(get_test_column_descr::<Int32Type>(1, 0));
+        // min max both ascending
+        let column_close_result = write_multiple_pages::<Int32Type>(
+            &descr,
+            &[
+                &[Some(-10), Some(10)],
+                &[Some(-5), Some(11)],
+                &[None],
+                &[Some(-5), Some(11)],
+            ],
+        )?;
+        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
+        assert_eq!(boundary_order, BoundaryOrder::ASCENDING);
+
+        // min max both descending
+        let column_close_result = write_multiple_pages::<Int32Type>(
+            &descr,
+            &[
+                &[Some(10), Some(11)],
+                &[Some(5), Some(11)],
+                &[None],
+                &[Some(-5), Some(0)],
+            ],
+        )?;
+        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
+        assert_eq!(boundary_order, BoundaryOrder::DESCENDING);
+
+        // min max both equal
+        let column_close_result = write_multiple_pages::<Int32Type>(
+            &descr,
+            &[&[Some(10), Some(11)], &[None], &[Some(10), Some(11)]],
+        )?;
+        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
+        assert_eq!(boundary_order, BoundaryOrder::ASCENDING);
+
+        // only nulls
+        let column_close_result =
+            write_multiple_pages::<Int32Type>(&descr, &[&[None], &[None], &[None]])?;
+        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
+        assert_eq!(boundary_order, BoundaryOrder::ASCENDING);
+
+        // one page
+        let column_close_result =
+            write_multiple_pages::<Int32Type>(&descr, &[&[Some(-10), Some(10)]])?;
+        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
+        assert_eq!(boundary_order, BoundaryOrder::ASCENDING);
+
+        // one non-null page
+        let column_close_result =
+            write_multiple_pages::<Int32Type>(&descr, &[&[Some(-10), Some(10)], &[None]])?;
+        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
+        assert_eq!(boundary_order, BoundaryOrder::ASCENDING);
+
+        // min max both unordered
+        let column_close_result = write_multiple_pages::<Int32Type>(
+            &descr,
+            &[
+                &[Some(10), Some(11)],
+                &[Some(11), Some(16)],
+                &[None],
+                &[Some(-5), Some(0)],
+            ],
+        )?;
+        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
+        assert_eq!(boundary_order, BoundaryOrder::UNORDERED);
+
+        // min max both ordered in different orders
+        let column_close_result = write_multiple_pages::<Int32Type>(
+            &descr,
+            &[
+                &[Some(1), Some(9)],
+                &[Some(2), Some(8)],
+                &[None],
+                &[Some(3), Some(7)],
+            ],
+        )?;
+        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
+        assert_eq!(boundary_order, BoundaryOrder::UNORDERED);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_boundary_order_logical_type() -> Result<()> {
+        // ensure that logical types account for different sort order than underlying
+        // physical type representation
+        let f16_descr = Arc::new(get_test_float16_column_descr(1, 0));
+        let fba_descr = {
+            let tpe = SchemaType::primitive_type_builder(
+                "col",
+                FixedLenByteArrayType::get_physical_type(),
+            )
+            .with_length(2)
+            .build()?;
+            Arc::new(ColumnDescriptor::new(
+                Arc::new(tpe),
+                1,
+                0,
+                ColumnPath::from("col"),
+            ))
+        };
+
+        let values: &[&[Option<FixedLenByteArray>]] = &[
+            &[Some(FixedLenByteArray::from(ByteArray::from(f16::ONE)))],
+            &[Some(FixedLenByteArray::from(ByteArray::from(f16::ZERO)))],
+            &[Some(FixedLenByteArray::from(ByteArray::from(
+                f16::NEG_ZERO,
+            )))],
+            &[Some(FixedLenByteArray::from(ByteArray::from(f16::NEG_ONE)))],
+        ];
+
+        // f16 descending
+        let column_close_result =
+            write_multiple_pages::<FixedLenByteArrayType>(&f16_descr, values)?;
+        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
+        assert_eq!(boundary_order, BoundaryOrder::DESCENDING);
+
+        // same bytes, but fba unordered
+        let column_close_result =
+            write_multiple_pages::<FixedLenByteArrayType>(&fba_descr, values)?;
+        let boundary_order = column_close_result.column_index.unwrap().boundary_order;
+        assert_eq!(boundary_order, BoundaryOrder::UNORDERED);
+
+        Ok(())
+    }
+
+    fn write_multiple_pages<T: DataType>(
+        column_descr: &Arc<ColumnDescriptor>,
+        pages: &[&[Option<T::T>]],
+    ) -> Result<ColumnCloseResult> {
+        let column_writer = get_column_writer(
+            column_descr.clone(),
+            Default::default(),
+            get_test_page_writer(),
+        );
+        let mut writer = get_typed_column_writer::<T>(column_writer);
+
+        for &page in pages {
+            let values = page.iter().filter_map(Clone::clone).collect::<Vec<_>>();
+            let def_levels = page
+                .iter()
+                .map(|maybe_value| if maybe_value.is_some() { 1 } else { 0 })
+                .collect::<Vec<_>>();
+            writer.write_batch(&values, Some(&def_levels), None)?;
+            writer.flush_data_pages()?;
+        }
+
+        writer.close()
+    }
+
     /// Performs write-read roundtrip with randomly generated values and levels.
     /// `max_size` is maximum number of values or levels (if `max_def_level` > 0) to write
     /// for a column.
@@ -2577,12 +3163,8 @@ mod tests {
             max_batch_size = max_batch_size.max(levels.len());
         }
 
-        let mut writer = get_test_column_writer::<T>(
-            page_writer,
-            max_def_level,
-            max_rep_level,
-            Arc::new(props),
-        );
+        let mut writer =
+            get_test_column_writer::<T>(page_writer, max_def_level, max_rep_level, Arc::new(props));
 
         let values_written = writer.write_batch(values, def_levels, rep_levels).unwrap();
         assert_eq!(values_written, values.len());
@@ -2603,8 +3185,7 @@ mod tests {
             )
             .unwrap(),
         );
-        let reader =
-            get_test_column_reader::<T>(page_reader, max_def_level, max_rep_level);
+        let reader = get_test_column_reader::<T>(page_reader, max_def_level, max_rep_level);
 
         let mut actual_values = vec![T::T::default(); max_batch_size];
         let mut actual_def_levels = def_levels.map(|_| vec![0i16; max_batch_size]);
@@ -2810,6 +3391,44 @@ mod tests {
         ColumnDescriptor::new(Arc::new(tpe), max_def_level, max_rep_level, path)
     }
 
+    fn float16_statistics_roundtrip(
+        values: &[FixedLenByteArray],
+    ) -> ValueStatistics<FixedLenByteArray> {
+        let page_writer = get_test_page_writer();
+        let props = Default::default();
+        let mut writer = get_test_float16_column_writer(page_writer, 0, 0, props);
+        writer.write_batch(values, None, None).unwrap();
+
+        let metadata = writer.close().unwrap().metadata;
+        if let Some(Statistics::FixedLenByteArray(stats)) = metadata.statistics() {
+            stats.clone()
+        } else {
+            panic!("metadata missing statistics");
+        }
+    }
+
+    fn get_test_float16_column_writer(
+        page_writer: Box<dyn PageWriter>,
+        max_def_level: i16,
+        max_rep_level: i16,
+        props: WriterPropertiesPtr,
+    ) -> ColumnWriterImpl<'static, FixedLenByteArrayType> {
+        let descr = Arc::new(get_test_float16_column_descr(max_def_level, max_rep_level));
+        let column_writer = get_column_writer(descr, props, page_writer);
+        get_typed_column_writer::<FixedLenByteArrayType>(column_writer)
+    }
+
+    fn get_test_float16_column_descr(max_def_level: i16, max_rep_level: i16) -> ColumnDescriptor {
+        let path = ColumnPath::from("col");
+        let tpe =
+            SchemaType::primitive_type_builder("col", FixedLenByteArrayType::get_physical_type())
+                .with_length(2)
+                .with_logical_type(Some(LogicalType::Float16))
+                .build()
+                .unwrap();
+        ColumnDescriptor::new(Arc::new(tpe), max_def_level, max_rep_level, path)
+    }
+
     /// Returns column writer for UINT32 Column provided as ConvertedType only
     fn get_test_unsigned_int_given_as_converted_column_writer<'a, T: DataType>(
         page_writer: Box<dyn PageWriter + 'a>,
diff --git a/parquet/src/compression.rs b/parquet/src/compression.rs
index f1831ed48444..a9a1afbbf213 100644
--- a/parquet/src/compression.rs
+++ b/parquet/src/compression.rs
@@ -255,7 +255,7 @@ mod gzip_codec {
             output_buf: &mut Vec<u8>,
             _uncompress_size: Option<usize>,
         ) -> Result<usize> {
-            let mut decoder = read::GzDecoder::new(input_buf);
+            let mut decoder = read::MultiGzDecoder::new(input_buf);
             decoder.read_to_end(output_buf).map_err(|e| e.into())
         }
 
@@ -388,7 +388,7 @@ mod lz4_codec {
     use std::io::{Read, Write};
 
     use crate::compression::Codec;
-    use crate::errors::Result;
+    use crate::errors::{ParquetError, Result};
 
     const LZ4_BUFFER_SIZE: usize = 4096;
 
@@ -409,7 +409,7 @@ mod lz4_codec {
             output_buf: &mut Vec<u8>,
             _uncompress_size: Option<usize>,
         ) -> Result<usize> {
-            let mut decoder = lz4::Decoder::new(input_buf)?;
+            let mut decoder = lz4_flex::frame::FrameDecoder::new(input_buf);
             let mut buffer: [u8; LZ4_BUFFER_SIZE] = [0; LZ4_BUFFER_SIZE];
             let mut total_len = 0;
             loop {
@@ -424,7 +424,7 @@ mod lz4_codec {
         }
 
         fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
-            let mut encoder = lz4::EncoderBuilder::new().build(output_buf)?;
+            let mut encoder = lz4_flex::frame::FrameEncoder::new(output_buf);
             let mut from = 0;
             loop {
                 let to = std::cmp::min(from + LZ4_BUFFER_SIZE, input_buf.len());
@@ -434,7 +434,10 @@ mod lz4_codec {
                     break;
                 }
             }
-            encoder.finish().1.map_err(|e| e.into())
+            match encoder.finish() {
+                Ok(_) => Ok(()),
+                Err(e) => Err(ParquetError::External(Box::new(e))),
+            }
         }
     }
 }
@@ -551,11 +554,7 @@ mod lz4_raw_codec {
                 }
             };
             output_buf.resize(offset + required_len, 0);
-            match lz4::block::decompress_to_buffer(
-                input_buf,
-                Some(required_len.try_into().unwrap()),
-                &mut output_buf[offset..],
-            ) {
+            match lz4_flex::block::decompress_into(input_buf, &mut output_buf[offset..]) {
                 Ok(n) => {
                     if n != required_len {
                         return Err(ParquetError::General(
@@ -564,25 +563,20 @@ mod lz4_raw_codec {
                     }
                     Ok(n)
                 }
-                Err(e) => Err(e.into()),
+                Err(e) => Err(ParquetError::External(Box::new(e))),
             }
         }
 
         fn compress(&mut self, input_buf: &[u8], output_buf: &mut Vec<u8>) -> Result<()> {
             let offset = output_buf.len();
-            let required_len = lz4::block::compress_bound(input_buf.len())?;
+            let required_len = lz4_flex::block::get_maximum_output_size(input_buf.len());
             output_buf.resize(offset + required_len, 0);
-            match lz4::block::compress_to_buffer(
-                input_buf,
-                None,
-                false,
-                &mut output_buf[offset..],
-            ) {
+            match lz4_flex::block::compress_into(input_buf, &mut output_buf[offset..]) {
                 Ok(n) => {
                     output_buf.truncate(offset + n);
                     Ok(())
                 }
-                Err(e) => Err(e.into()),
+                Err(e) => Err(ParquetError::External(Box::new(e))),
             }
         }
     }
@@ -666,11 +660,11 @@ mod lz4_hadoop_codec {
                     "Not enough bytes to hold advertised output",
                 ));
             }
-            let decompressed_size = lz4::block::decompress_to_buffer(
+            let decompressed_size = lz4_flex::decompress_into(
                 &input[..expected_compressed_size as usize],
-                Some(output_len as i32),
                 output,
-            )?;
+            )
+            .map_err(|e| ParquetError::External(Box::new(e)))?;
             if decompressed_size != expected_decompressed_size as usize {
                 return Err(io::Error::new(
                     io::ErrorKind::Other,
diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs
index 67d0bad98202..86da7a3acee4 100644
--- a/parquet/src/data_type.rs
+++ b/parquet/src/data_type.rs
@@ -18,6 +18,7 @@
 //! Data types that connect Parquet physical types with their Rust-specific
 //! representations.
 use bytes::Bytes;
+use half::f16;
 use std::cmp::Ordering;
 use std::fmt;
 use std::mem;
@@ -28,7 +29,7 @@ use crate::basic::Type;
 use crate::column::reader::{ColumnReader, ColumnReaderImpl};
 use crate::column::writer::{ColumnWriter, ColumnWriterImpl};
 use crate::errors::{ParquetError, Result};
-use crate::util::{bit_util::FromBytes, memory::ByteBufferPtr};
+use crate::util::bit_util::FromBytes;
 
 /// Rust representation for logical type INT96, value is backed by an array of `u32`.
 /// The type only takes 12 bytes, without extra padding.
@@ -103,7 +104,7 @@ impl fmt::Display for Int96 {
 /// Value is backed by a byte buffer.
 #[derive(Clone, Default)]
 pub struct ByteArray {
-    data: Option<ByteBufferPtr>,
+    data: Option<Bytes>,
 }
 
 // Special case Debug that prints out byte arrays that are valid utf8 as &str's
@@ -130,7 +131,7 @@ impl PartialOrd for ByteArray {
             (Some(_), None) => Some(Ordering::Greater),
             (Some(self_data), Some(other_data)) => {
                 // compare slices directly
-                self_data.data().partial_cmp(other_data.data())
+                self_data.partial_cmp(&other_data)
             }
         }
     }
@@ -167,7 +168,7 @@ impl ByteArray {
 
     /// Set data from another byte buffer.
     #[inline]
-    pub fn set_data(&mut self, data: ByteBufferPtr) {
+    pub fn set_data(&mut self, data: Bytes) {
         self.data = Some(data);
     }
 
@@ -178,7 +179,7 @@ impl ByteArray {
             self.data
                 .as_ref()
                 .expect("set_data should have been called")
-                .range(start, len),
+                .slice(start..start + len),
         )
     }
 
@@ -194,7 +195,7 @@ impl ByteArray {
 impl From<Vec<u8>> for ByteArray {
     fn from(buf: Vec<u8>) -> ByteArray {
         Self {
-            data: Some(ByteBufferPtr::new(buf)),
+            data: Some(buf.into()),
         }
     }
 }
@@ -204,7 +205,7 @@ impl<'a> From<&'a [u8]> for ByteArray {
         let mut v = Vec::new();
         v.extend_from_slice(b);
         Self {
-            data: Some(ByteBufferPtr::new(v)),
+            data: Some(v.into()),
         }
     }
 }
@@ -214,20 +215,20 @@ impl<'a> From<&'a str> for ByteArray {
         let mut v = Vec::new();
         v.extend_from_slice(s.as_bytes());
         Self {
-            data: Some(ByteBufferPtr::new(v)),
+            data: Some(v.into()),
         }
     }
 }
 
-impl From<ByteBufferPtr> for ByteArray {
-    fn from(ptr: ByteBufferPtr) -> ByteArray {
-        Self { data: Some(ptr) }
+impl From<Bytes> for ByteArray {
+    fn from(value: Bytes) -> Self {
+        Self { data: Some(value) }
     }
 }
 
-impl From<Bytes> for ByteArray {
-    fn from(value: Bytes) -> Self {
-        ByteBufferPtr::from(value).into()
+impl From<f16> for ByteArray {
+    fn from(value: f16) -> Self {
+        Self::from(value.to_le_bytes().as_slice())
     }
 }
 
@@ -539,9 +540,7 @@ impl AsBytes for bool {
 
 impl AsBytes for Int96 {
     fn as_bytes(&self) -> &[u8] {
-        unsafe {
-            std::slice::from_raw_parts(self.data() as *const [u32] as *const u8, 12)
-        }
+        unsafe { std::slice::from_raw_parts(self.data() as *const [u32] as *const u8, 12) }
     }
 }
 
@@ -582,9 +581,10 @@ impl AsBytes for str {
 }
 
 pub(crate) mod private {
+    use bytes::Bytes;
+
     use crate::encodings::decoding::PlainDecoderDetails;
     use crate::util::bit_util::{read_num_bytes, BitReader, BitWriter};
-    use crate::util::memory::ByteBufferPtr;
 
     use crate::basic::Type;
     use std::convert::TryInto;
@@ -620,17 +620,10 @@ pub(crate) mod private {
         ) -> Result<()>;
 
         /// Establish the data that will be decoded in a buffer
-        fn set_data(
-            decoder: &mut PlainDecoderDetails,
-            data: ByteBufferPtr,
-            num_values: usize,
-        );
+        fn set_data(decoder: &mut PlainDecoderDetails, data: Bytes, num_values: usize);
 
         /// Decode the value from a given buffer for a higher level decoder
-        fn decode(
-            buffer: &mut [Self],
-            decoder: &mut PlainDecoderDetails,
-        ) -> Result<usize>;
+        fn decode(buffer: &mut [Self], decoder: &mut PlainDecoderDetails) -> Result<usize>;
 
         fn skip(decoder: &mut PlainDecoderDetails, num_values: usize) -> Result<usize>;
 
@@ -641,7 +634,7 @@ pub(crate) mod private {
 
         /// Return the value as i64 if possible
         ///
-        /// This is essentially the same as `std::convert::TryInto<i64>` but can
+        /// This is essentially the same as `std::convert::TryInto<i64>` but can't be
         /// implemented for `f32` and `f64`, types that would fail orphan rules
         fn as_i64(&self) -> Result<i64> {
             Err(general_err!("Type cannot be converted to i64"))
@@ -649,7 +642,7 @@ pub(crate) mod private {
 
         /// Return the value as u64 if possible
         ///
-        /// This is essentially the same as `std::convert::TryInto<u64>` but can
+        /// This is essentially the same as `std::convert::TryInto<u64>` but can't be
         /// implemented for `f32` and `f64`, types that would fail orphan rules
         fn as_u64(&self) -> Result<u64> {
             self.as_i64()
@@ -680,20 +673,13 @@ pub(crate) mod private {
         }
 
         #[inline]
-        fn set_data(
-            decoder: &mut PlainDecoderDetails,
-            data: ByteBufferPtr,
-            num_values: usize,
-        ) {
+        fn set_data(decoder: &mut PlainDecoderDetails, data: Bytes, num_values: usize) {
             decoder.bit_reader.replace(BitReader::new(data));
             decoder.num_values = num_values;
         }
 
         #[inline]
-        fn decode(
-            buffer: &mut [Self],
-            decoder: &mut PlainDecoderDetails,
-        ) -> Result<usize> {
+        fn decode(buffer: &mut [Self], decoder: &mut PlainDecoderDetails) -> Result<usize> {
             let bit_reader = decoder.bit_reader.as_mut().unwrap();
             let num_values = std::cmp::min(buffer.len(), decoder.num_values);
             let values_read = bit_reader.get_batch(&mut buffer[..num_values], 1);
@@ -744,7 +730,7 @@ pub(crate) mod private {
                 }
 
                 #[inline]
-                fn set_data(decoder: &mut PlainDecoderDetails, data: ByteBufferPtr, num_values: usize) {
+                fn set_data(decoder: &mut PlainDecoderDetails, data: Bytes, num_values: usize) {
                     decoder.data.replace(data);
                     decoder.start = 0;
                     decoder.num_values = num_values;
@@ -764,7 +750,9 @@ pub(crate) mod private {
                     // SAFETY: Raw types should be as per the standard rust bit-vectors
                     unsafe {
                         let raw_buffer = &mut Self::slice_as_bytes_mut(buffer)[..bytes_to_decode];
-                        raw_buffer.copy_from_slice(data.range(decoder.start, bytes_to_decode).as_ref());
+                        raw_buffer.copy_from_slice(data.slice(
+                            decoder.start..decoder.start + bytes_to_decode
+                        ).as_ref());
                     };
                     decoder.start += bytes_to_decode;
                     decoder.num_values -= num_values;
@@ -823,10 +811,7 @@ pub(crate) mod private {
         ) -> Result<()> {
             for value in values {
                 let raw = unsafe {
-                    std::slice::from_raw_parts(
-                        value.data() as *const [u32] as *const u8,
-                        12,
-                    )
+                    std::slice::from_raw_parts(value.data() as *const [u32] as *const u8, 12)
                 };
                 writer.write_all(raw)?;
             }
@@ -834,21 +819,14 @@ pub(crate) mod private {
         }
 
         #[inline]
-        fn set_data(
-            decoder: &mut PlainDecoderDetails,
-            data: ByteBufferPtr,
-            num_values: usize,
-        ) {
+        fn set_data(decoder: &mut PlainDecoderDetails, data: Bytes, num_values: usize) {
             decoder.data.replace(data);
             decoder.start = 0;
             decoder.num_values = num_values;
         }
 
         #[inline]
-        fn decode(
-            buffer: &mut [Self],
-            decoder: &mut PlainDecoderDetails,
-        ) -> Result<usize> {
+        fn decode(buffer: &mut [Self], decoder: &mut PlainDecoderDetails) -> Result<usize> {
             // TODO - Remove the duplication between this and the general slice method
             let data = decoder
                 .data
@@ -862,17 +840,15 @@ pub(crate) mod private {
                 return Err(eof_err!("Not enough bytes to decode"));
             }
 
-            let data_range = data.range(decoder.start, bytes_to_decode);
-            let bytes: &[u8] = data_range.data();
+            let data_range = data.slice(decoder.start..decoder.start + bytes_to_decode);
+            let bytes: &[u8] = &data_range;
             decoder.start += bytes_to_decode;
 
             let mut pos = 0; // position in byte array
             for item in buffer.iter_mut().take(num_values) {
                 let elem0 = u32::from_le_bytes(bytes[pos..pos + 4].try_into().unwrap());
-                let elem1 =
-                    u32::from_le_bytes(bytes[pos + 4..pos + 8].try_into().unwrap());
-                let elem2 =
-                    u32::from_le_bytes(bytes[pos + 8..pos + 12].try_into().unwrap());
+                let elem1 = u32::from_le_bytes(bytes[pos + 4..pos + 8].try_into().unwrap());
+                let elem2 = u32::from_le_bytes(bytes[pos + 8..pos + 12].try_into().unwrap());
 
                 item.set_data(elem0, elem1, elem2);
                 pos += 12;
@@ -930,21 +906,14 @@ pub(crate) mod private {
         }
 
         #[inline]
-        fn set_data(
-            decoder: &mut PlainDecoderDetails,
-            data: ByteBufferPtr,
-            num_values: usize,
-        ) {
+        fn set_data(decoder: &mut PlainDecoderDetails, data: Bytes, num_values: usize) {
             decoder.data.replace(data);
             decoder.start = 0;
             decoder.num_values = num_values;
         }
 
         #[inline]
-        fn decode(
-            buffer: &mut [Self],
-            decoder: &mut PlainDecoderDetails,
-        ) -> Result<usize> {
+        fn decode(buffer: &mut [Self], decoder: &mut PlainDecoderDetails) -> Result<usize> {
             let data = decoder
                 .data
                 .as_mut()
@@ -952,8 +921,7 @@ pub(crate) mod private {
             let num_values = std::cmp::min(buffer.len(), decoder.num_values);
             for val_array in buffer.iter_mut().take(num_values) {
                 let len: usize =
-                    read_num_bytes::<u32>(4, data.start_from(decoder.start).as_ref())
-                        as usize;
+                    read_num_bytes::<u32>(4, data.slice(decoder.start..).as_ref()) as usize;
                 decoder.start += std::mem::size_of::<u32>();
 
                 if data.len() < decoder.start + len {
@@ -962,7 +930,7 @@ pub(crate) mod private {
 
                 let val: &mut Self = val_array.as_mut_any().downcast_mut().unwrap();
 
-                val.set_data(data.range(decoder.start, len));
+                val.set_data(data.slice(decoder.start..decoder.start + len));
                 decoder.start += len;
             }
             decoder.num_values -= num_values;
@@ -979,8 +947,7 @@ pub(crate) mod private {
 
             for _ in 0..num_values {
                 let len: usize =
-                    read_num_bytes::<u32>(4, data.start_from(decoder.start).as_ref())
-                        as usize;
+                    read_num_bytes::<u32>(4, data.slice(decoder.start..).as_ref()) as usize;
                 decoder.start += std::mem::size_of::<u32>() + len;
             }
             decoder.num_values -= num_values;
@@ -1021,21 +988,14 @@ pub(crate) mod private {
         }
 
         #[inline]
-        fn set_data(
-            decoder: &mut PlainDecoderDetails,
-            data: ByteBufferPtr,
-            num_values: usize,
-        ) {
+        fn set_data(decoder: &mut PlainDecoderDetails, data: Bytes, num_values: usize) {
             decoder.data.replace(data);
             decoder.start = 0;
             decoder.num_values = num_values;
         }
 
         #[inline]
-        fn decode(
-            buffer: &mut [Self],
-            decoder: &mut PlainDecoderDetails,
-        ) -> Result<usize> {
+        fn decode(buffer: &mut [Self], decoder: &mut PlainDecoderDetails) -> Result<usize> {
             assert!(decoder.type_length > 0);
 
             let data = decoder
@@ -1051,7 +1011,7 @@ pub(crate) mod private {
                     return Err(eof_err!("Not enough bytes to decode"));
                 }
 
-                item.set_data(data.range(decoder.start, len));
+                item.set_data(data.slice(decoder.start..decoder.start + len));
                 decoder.start += len;
             }
             decoder.num_values -= num_values;
@@ -1115,9 +1075,7 @@ pub trait DataType: 'static + Send {
     where
         Self: Sized;
 
-    fn get_column_writer(
-        column_writer: ColumnWriter<'_>,
-    ) -> Option<ColumnWriterImpl<'_, Self>>
+    fn get_column_writer(column_writer: ColumnWriter<'_>) -> Option<ColumnWriterImpl<'_, Self>>
     where
         Self: Sized;
 
@@ -1160,9 +1118,7 @@ macro_rules! make_type {
                 $size
             }
 
-            fn get_column_reader(
-                column_reader: ColumnReader,
-            ) -> Option<ColumnReaderImpl<Self>> {
+            fn get_column_reader(column_reader: ColumnReader) -> Option<ColumnReaderImpl<Self>> {
                 match column_reader {
                     ColumnReader::$reader_ident(w) => Some(w),
                     _ => None,
@@ -1289,7 +1245,7 @@ mod tests {
         );
         assert_eq!(ByteArray::from("ABC").data(), &[b'A', b'B', b'C']);
         assert_eq!(
-            ByteArray::from(ByteBufferPtr::new(vec![1u8, 2u8, 3u8, 4u8, 5u8])).data(),
+            ByteArray::from(Bytes::from(vec![1u8, 2u8, 3u8, 4u8, 5u8])).data(),
             &[1u8, 2u8, 3u8, 4u8, 5u8]
         );
         let buf = vec![6u8, 7u8, 8u8, 9u8, 10u8];
diff --git a/parquet/src/encodings/decoding.rs b/parquet/src/encodings/decoding.rs
index 7aed6df419ee..5843acdb6d0f 100644
--- a/parquet/src/encodings/decoding.rs
+++ b/parquet/src/encodings/decoding.rs
@@ -17,6 +17,7 @@
 
 //! Contains all supported decoders for Parquet.
 
+use bytes::Bytes;
 use num::traits::WrappingAdd;
 use num::FromPrimitive;
 use std::{cmp, marker::PhantomData, mem};
@@ -28,10 +29,7 @@ use crate::data_type::private::ParquetValueType;
 use crate::data_type::*;
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
-use crate::util::{
-    bit_util::{self, BitReader},
-    memory::ByteBufferPtr,
-};
+use crate::util::bit_util::{self, BitReader};
 
 pub(crate) mod private {
     use super::*;
@@ -145,7 +143,7 @@ pub(crate) mod private {
 pub trait Decoder<T: DataType>: Send {
     /// Sets the data to decode to be `data`, which should contain `num_values` of values
     /// to decode.
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()>;
+    fn set_data(&mut self, data: Bytes, num_values: usize) -> Result<()>;
 
     /// Consumes values from this decoder and write the results to `buffer`. This will try
     /// to fill up `buffer`.
@@ -238,7 +236,7 @@ pub struct PlainDecoderDetails {
     pub(crate) type_length: i32,
 
     // The byte array to decode from. Not set if `T` is bool.
-    pub(crate) data: Option<ByteBufferPtr>,
+    pub(crate) data: Option<Bytes>,
 
     // Read `data` bit by bit. Only set if `T` is bool.
     pub(crate) bit_reader: Option<BitReader>,
@@ -275,7 +273,7 @@ impl<T: DataType> PlainDecoder<T> {
 
 impl<T: DataType> Decoder<T> for PlainDecoder<T> {
     #[inline]
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
+    fn set_data(&mut self, data: Bytes, num_values: usize) -> Result<()> {
         T::T::set_data(&mut self.inner, data, num_values);
         Ok(())
     }
@@ -350,11 +348,11 @@ impl<T: DataType> DictDecoder<T> {
 }
 
 impl<T: DataType> Decoder<T> for DictDecoder<T> {
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
+    fn set_data(&mut self, data: Bytes, num_values: usize) -> Result<()> {
         // First byte in `data` is bit width
         let bit_width = data.as_ref()[0];
         let mut rle_decoder = RleDecoder::new(bit_width);
-        rle_decoder.set_data(data.start_from(1));
+        rle_decoder.set_data(data.slice(1..));
         self.num_values = num_values;
         self.rle_decoder = Some(rle_decoder);
         Ok(())
@@ -418,7 +416,7 @@ impl<T: DataType> RleValueDecoder<T> {
 
 impl<T: DataType> Decoder<T> for RleValueDecoder<T> {
     #[inline]
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
+    fn set_data(&mut self, data: Bytes, num_values: usize) -> Result<()> {
         // Only support RLE value reader for boolean values with bit width of 1.
         ensure_phys_ty!(Type::BOOLEAN, "RleValueDecoder only supports BoolType");
 
@@ -426,7 +424,8 @@ impl<T: DataType> Decoder<T> for RleValueDecoder<T> {
         const I32_SIZE: usize = mem::size_of::<i32>();
         let data_size = bit_util::read_num_bytes::<i32>(I32_SIZE, data.as_ref()) as usize;
         self.decoder = RleDecoder::new(1);
-        self.decoder.set_data(data.range(I32_SIZE, data_size));
+        self.decoder
+            .set_data(data.slice(I32_SIZE..I32_SIZE + data_size));
         self.values_left = num_values;
         Ok(())
     }
@@ -604,7 +603,7 @@ where
 {
     // # of total values is derived from encoding
     #[inline]
-    fn set_data(&mut self, data: ByteBufferPtr, _index: usize) -> Result<()> {
+    fn set_data(&mut self, data: Bytes, _index: usize) -> Result<()> {
         self.bit_reader = BitReader::new(data);
         self.initialized = true;
 
@@ -811,7 +810,7 @@ pub struct DeltaLengthByteArrayDecoder<T: DataType> {
     current_idx: usize,
 
     // Concatenated byte array data
-    data: Option<ByteBufferPtr>,
+    data: Option<Bytes>,
 
     // Offset into `data`, always point to the beginning of next byte array.
     offset: usize,
@@ -844,16 +843,16 @@ impl<T: DataType> DeltaLengthByteArrayDecoder<T> {
 }
 
 impl<T: DataType> Decoder<T> for DeltaLengthByteArrayDecoder<T> {
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
+    fn set_data(&mut self, data: Bytes, num_values: usize) -> Result<()> {
         match T::get_physical_type() {
             Type::BYTE_ARRAY => {
                 let mut len_decoder = DeltaBitPackDecoder::<Int32Type>::new();
-                len_decoder.set_data(data.all(), num_values)?;
+                len_decoder.set_data(data.clone(), num_values)?;
                 let num_lengths = len_decoder.values_left();
                 self.lengths.resize(num_lengths, 0);
                 len_decoder.get(&mut self.lengths[..])?;
 
-                self.data = Some(data.start_from(len_decoder.get_offset()));
+                self.data = Some(data.slice(len_decoder.get_offset()..));
                 self.offset = 0;
                 self.current_idx = 0;
                 self.num_values = num_lengths;
@@ -879,7 +878,7 @@ impl<T: DataType> Decoder<T> for DeltaLengthByteArrayDecoder<T> {
                     item.as_mut_any()
                         .downcast_mut::<ByteArray>()
                         .unwrap()
-                        .set_data(data.range(self.offset, len));
+                        .set_data(data.slice(self.offset..self.offset + len));
 
                     self.offset += len;
                     self.current_idx += 1;
@@ -977,18 +976,18 @@ impl<T: DataType> DeltaByteArrayDecoder<T> {
 }
 
 impl<T: DataType> Decoder<T> for DeltaByteArrayDecoder<T> {
-    fn set_data(&mut self, data: ByteBufferPtr, num_values: usize) -> Result<()> {
+    fn set_data(&mut self, data: Bytes, num_values: usize) -> Result<()> {
         match T::get_physical_type() {
             Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => {
                 let mut prefix_len_decoder = DeltaBitPackDecoder::<Int32Type>::new();
-                prefix_len_decoder.set_data(data.all(), num_values)?;
+                prefix_len_decoder.set_data(data.clone(), num_values)?;
                 let num_prefixes = prefix_len_decoder.values_left();
                 self.prefix_lengths.resize(num_prefixes, 0);
                 prefix_len_decoder.get(&mut self.prefix_lengths[..])?;
 
                 let mut suffix_decoder = DeltaLengthByteArrayDecoder::new();
                 suffix_decoder
-                    .set_data(data.start_from(prefix_len_decoder.get_offset()), num_values)?;
+                    .set_data(data.slice(prefix_len_decoder.get_offset()..), num_values)?;
                 self.suffix_decoder = Some(suffix_decoder);
                 self.num_values = num_prefixes;
                 self.current_idx = 0;
@@ -1023,7 +1022,7 @@ impl<T: DataType> Decoder<T> for DeltaByteArrayDecoder<T> {
                     result.extend_from_slice(&self.previous_value[0..prefix_len]);
                     result.extend_from_slice(suffix);
 
-                    let data = ByteBufferPtr::new(result.clone());
+                    let data = Bytes::from(result.clone());
 
                     match ty {
                         Type::BYTE_ARRAY => item
@@ -1131,33 +1130,21 @@ mod tests {
         let data = [42, 18, 52];
         let data_bytes = Int32Type::to_byte_array(&data[..]);
         let mut buffer = [0; 3];
-        test_plain_decode::<Int32Type>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
+        test_plain_decode::<Int32Type>(Bytes::from(data_bytes), 3, -1, &mut buffer[..], &data[..]);
     }
 
     #[test]
     fn test_plain_skip_int32() {
         let data = [42, 18, 52];
         let data_bytes = Int32Type::to_byte_array(&data[..]);
-        test_plain_skip::<Int32Type>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            1,
-            -1,
-            &data[1..],
-        );
+        test_plain_skip::<Int32Type>(Bytes::from(data_bytes), 3, 1, -1, &data[1..]);
     }
 
     #[test]
     fn test_plain_skip_all_int32() {
         let data = [42, 18, 52];
         let data_bytes = Int32Type::to_byte_array(&data[..]);
-        test_plain_skip::<Int32Type>(ByteBufferPtr::new(data_bytes), 3, 5, -1, &[]);
+        test_plain_skip::<Int32Type>(Bytes::from(data_bytes), 3, 5, -1, &[]);
     }
 
     #[test]
@@ -1169,7 +1156,7 @@ mod tests {
         let num_nulls = 5;
         let valid_bits = [0b01001010];
         test_plain_decode_spaced::<Int32Type>(
-            ByteBufferPtr::new(data_bytes),
+            Bytes::from(data_bytes),
             3,
             -1,
             &mut buffer[..],
@@ -1184,33 +1171,21 @@ mod tests {
         let data = [42, 18, 52];
         let data_bytes = Int64Type::to_byte_array(&data[..]);
         let mut buffer = [0; 3];
-        test_plain_decode::<Int64Type>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
+        test_plain_decode::<Int64Type>(Bytes::from(data_bytes), 3, -1, &mut buffer[..], &data[..]);
     }
 
     #[test]
     fn test_plain_skip_int64() {
         let data = [42, 18, 52];
         let data_bytes = Int64Type::to_byte_array(&data[..]);
-        test_plain_skip::<Int64Type>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            2,
-            -1,
-            &data[2..],
-        );
+        test_plain_skip::<Int64Type>(Bytes::from(data_bytes), 3, 2, -1, &data[2..]);
     }
 
     #[test]
     fn test_plain_skip_all_int64() {
         let data = [42, 18, 52];
         let data_bytes = Int64Type::to_byte_array(&data[..]);
-        test_plain_skip::<Int64Type>(ByteBufferPtr::new(data_bytes), 3, 3, -1, &[]);
+        test_plain_skip::<Int64Type>(Bytes::from(data_bytes), 3, 3, -1, &[]);
     }
 
     #[test]
@@ -1218,53 +1193,35 @@ mod tests {
         let data = [PI_f32, 2.414, 12.51];
         let data_bytes = FloatType::to_byte_array(&data[..]);
         let mut buffer = [0.0; 3];
-        test_plain_decode::<FloatType>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
+        test_plain_decode::<FloatType>(Bytes::from(data_bytes), 3, -1, &mut buffer[..], &data[..]);
     }
 
     #[test]
     fn test_plain_skip_float() {
         let data = [PI_f32, 2.414, 12.51];
         let data_bytes = FloatType::to_byte_array(&data[..]);
-        test_plain_skip::<FloatType>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            1,
-            -1,
-            &data[1..],
-        );
+        test_plain_skip::<FloatType>(Bytes::from(data_bytes), 3, 1, -1, &data[1..]);
     }
 
     #[test]
     fn test_plain_skip_all_float() {
         let data = [PI_f32, 2.414, 12.51];
         let data_bytes = FloatType::to_byte_array(&data[..]);
-        test_plain_skip::<FloatType>(ByteBufferPtr::new(data_bytes), 3, 4, -1, &[]);
+        test_plain_skip::<FloatType>(Bytes::from(data_bytes), 3, 4, -1, &[]);
     }
 
     #[test]
     fn test_plain_skip_double() {
         let data = [PI_f64, 2.414f64, 12.51f64];
         let data_bytes = DoubleType::to_byte_array(&data[..]);
-        test_plain_skip::<DoubleType>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            1,
-            -1,
-            &data[1..],
-        );
+        test_plain_skip::<DoubleType>(Bytes::from(data_bytes), 3, 1, -1, &data[1..]);
     }
 
     #[test]
     fn test_plain_skip_all_double() {
         let data = [PI_f64, 2.414f64, 12.51f64];
         let data_bytes = DoubleType::to_byte_array(&data[..]);
-        test_plain_skip::<DoubleType>(ByteBufferPtr::new(data_bytes), 3, 5, -1, &[]);
+        test_plain_skip::<DoubleType>(Bytes::from(data_bytes), 3, 5, -1, &[]);
     }
 
     #[test]
@@ -1272,13 +1229,7 @@ mod tests {
         let data = [PI_f64, 2.414f64, 12.51f64];
         let data_bytes = DoubleType::to_byte_array(&data[..]);
         let mut buffer = [0.0f64; 3];
-        test_plain_decode::<DoubleType>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
+        test_plain_decode::<DoubleType>(Bytes::from(data_bytes), 3, -1, &mut buffer[..], &data[..]);
     }
 
     #[test]
@@ -1290,13 +1241,7 @@ mod tests {
         data[3].set_data(40, 50, 60);
         let data_bytes = Int96Type::to_byte_array(&data[..]);
         let mut buffer = [Int96::new(); 4];
-        test_plain_decode::<Int96Type>(
-            ByteBufferPtr::new(data_bytes),
-            4,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
+        test_plain_decode::<Int96Type>(Bytes::from(data_bytes), 4, -1, &mut buffer[..], &data[..]);
     }
 
     #[test]
@@ -1307,13 +1252,7 @@ mod tests {
         data[2].set_data(10, 20, 30);
         data[3].set_data(40, 50, 60);
         let data_bytes = Int96Type::to_byte_array(&data[..]);
-        test_plain_skip::<Int96Type>(
-            ByteBufferPtr::new(data_bytes),
-            4,
-            2,
-            -1,
-            &data[2..],
-        );
+        test_plain_skip::<Int96Type>(Bytes::from(data_bytes), 4, 2, -1, &data[2..]);
     }
 
     #[test]
@@ -1324,7 +1263,7 @@ mod tests {
         data[2].set_data(10, 20, 30);
         data[3].set_data(40, 50, 60);
         let data_bytes = Int96Type::to_byte_array(&data[..]);
-        test_plain_skip::<Int96Type>(ByteBufferPtr::new(data_bytes), 4, 8, -1, &[]);
+        test_plain_skip::<Int96Type>(Bytes::from(data_bytes), 4, 8, -1, &[]);
     }
 
     #[test]
@@ -1334,13 +1273,7 @@ mod tests {
         ];
         let data_bytes = BoolType::to_byte_array(&data[..]);
         let mut buffer = [false; 10];
-        test_plain_decode::<BoolType>(
-            ByteBufferPtr::new(data_bytes),
-            10,
-            -1,
-            &mut buffer[..],
-            &data[..],
-        );
+        test_plain_decode::<BoolType>(Bytes::from(data_bytes), 10, -1, &mut buffer[..], &data[..]);
     }
 
     #[test]
@@ -1349,13 +1282,7 @@ mod tests {
             false, true, false, false, true, false, true, true, false, true,
         ];
         let data_bytes = BoolType::to_byte_array(&data[..]);
-        test_plain_skip::<BoolType>(
-            ByteBufferPtr::new(data_bytes),
-            10,
-            5,
-            -1,
-            &data[5..],
-        );
+        test_plain_skip::<BoolType>(Bytes::from(data_bytes), 10, 5, -1, &data[5..]);
     }
 
     #[test]
@@ -1364,18 +1291,18 @@ mod tests {
             false, true, false, false, true, false, true, true, false, true,
         ];
         let data_bytes = BoolType::to_byte_array(&data[..]);
-        test_plain_skip::<BoolType>(ByteBufferPtr::new(data_bytes), 10, 20, -1, &[]);
+        test_plain_skip::<BoolType>(Bytes::from(data_bytes), 10, 20, -1, &[]);
     }
 
     #[test]
     fn test_plain_decode_byte_array() {
         let mut data = vec![ByteArray::new(); 2];
-        data[0].set_data(ByteBufferPtr::new(String::from("hello").into_bytes()));
-        data[1].set_data(ByteBufferPtr::new(String::from("parquet").into_bytes()));
+        data[0].set_data(Bytes::from(String::from("hello")));
+        data[1].set_data(Bytes::from(String::from("parquet")));
         let data_bytes = ByteArrayType::to_byte_array(&data[..]);
         let mut buffer = vec![ByteArray::new(); 2];
         test_plain_decode::<ByteArrayType>(
-            ByteBufferPtr::new(data_bytes),
+            Bytes::from(data_bytes),
             2,
             -1,
             &mut buffer[..],
@@ -1386,37 +1313,31 @@ mod tests {
     #[test]
     fn test_plain_skip_byte_array() {
         let mut data = vec![ByteArray::new(); 2];
-        data[0].set_data(ByteBufferPtr::new(String::from("hello").into_bytes()));
-        data[1].set_data(ByteBufferPtr::new(String::from("parquet").into_bytes()));
+        data[0].set_data(Bytes::from(String::from("hello")));
+        data[1].set_data(Bytes::from(String::from("parquet")));
         let data_bytes = ByteArrayType::to_byte_array(&data[..]);
-        test_plain_skip::<ByteArrayType>(
-            ByteBufferPtr::new(data_bytes),
-            2,
-            1,
-            -1,
-            &data[1..],
-        );
+        test_plain_skip::<ByteArrayType>(Bytes::from(data_bytes), 2, 1, -1, &data[1..]);
     }
 
     #[test]
     fn test_plain_skip_all_byte_array() {
         let mut data = vec![ByteArray::new(); 2];
-        data[0].set_data(ByteBufferPtr::new(String::from("hello").into_bytes()));
-        data[1].set_data(ByteBufferPtr::new(String::from("parquet").into_bytes()));
+        data[0].set_data(Bytes::from(String::from("hello")));
+        data[1].set_data(Bytes::from(String::from("parquet")));
         let data_bytes = ByteArrayType::to_byte_array(&data[..]);
-        test_plain_skip::<ByteArrayType>(ByteBufferPtr::new(data_bytes), 2, 2, -1, &[]);
+        test_plain_skip::<ByteArrayType>(Bytes::from(data_bytes), 2, 2, -1, &[]);
     }
 
     #[test]
     fn test_plain_decode_fixed_len_byte_array() {
         let mut data = vec![FixedLenByteArray::default(); 3];
-        data[0].set_data(ByteBufferPtr::new(String::from("bird").into_bytes()));
-        data[1].set_data(ByteBufferPtr::new(String::from("come").into_bytes()));
-        data[2].set_data(ByteBufferPtr::new(String::from("flow").into_bytes()));
+        data[0].set_data(Bytes::from(String::from("bird")));
+        data[1].set_data(Bytes::from(String::from("come")));
+        data[2].set_data(Bytes::from(String::from("flow")));
         let data_bytes = FixedLenByteArrayType::to_byte_array(&data[..]);
         let mut buffer = vec![FixedLenByteArray::default(); 3];
         test_plain_decode::<FixedLenByteArrayType>(
-            ByteBufferPtr::new(data_bytes),
+            Bytes::from(data_bytes),
             3,
             4,
             &mut buffer[..],
@@ -1427,37 +1348,25 @@ mod tests {
     #[test]
     fn test_plain_skip_fixed_len_byte_array() {
         let mut data = vec![FixedLenByteArray::default(); 3];
-        data[0].set_data(ByteBufferPtr::new(String::from("bird").into_bytes()));
-        data[1].set_data(ByteBufferPtr::new(String::from("come").into_bytes()));
-        data[2].set_data(ByteBufferPtr::new(String::from("flow").into_bytes()));
+        data[0].set_data(Bytes::from(String::from("bird")));
+        data[1].set_data(Bytes::from(String::from("come")));
+        data[2].set_data(Bytes::from(String::from("flow")));
         let data_bytes = FixedLenByteArrayType::to_byte_array(&data[..]);
-        test_plain_skip::<FixedLenByteArrayType>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            1,
-            4,
-            &data[1..],
-        );
+        test_plain_skip::<FixedLenByteArrayType>(Bytes::from(data_bytes), 3, 1, 4, &data[1..]);
     }
 
     #[test]
     fn test_plain_skip_all_fixed_len_byte_array() {
         let mut data = vec![FixedLenByteArray::default(); 3];
-        data[0].set_data(ByteBufferPtr::new(String::from("bird").into_bytes()));
-        data[1].set_data(ByteBufferPtr::new(String::from("come").into_bytes()));
-        data[2].set_data(ByteBufferPtr::new(String::from("flow").into_bytes()));
+        data[0].set_data(Bytes::from(String::from("bird")));
+        data[1].set_data(Bytes::from(String::from("come")));
+        data[2].set_data(Bytes::from(String::from("flow")));
         let data_bytes = FixedLenByteArrayType::to_byte_array(&data[..]);
-        test_plain_skip::<FixedLenByteArrayType>(
-            ByteBufferPtr::new(data_bytes),
-            3,
-            6,
-            4,
-            &[],
-        );
+        test_plain_skip::<FixedLenByteArrayType>(Bytes::from(data_bytes), 3, 6, 4, &[]);
     }
 
     fn test_plain_decode<T: DataType>(
-        data: ByteBufferPtr,
+        data: Bytes,
         num_values: usize,
         type_length: i32,
         buffer: &mut [T::T],
@@ -1473,7 +1382,7 @@ mod tests {
     }
 
     fn test_plain_skip<T: DataType>(
-        data: ByteBufferPtr,
+        data: Bytes,
         num_values: usize,
         skip: usize,
         type_length: i32,
@@ -1501,7 +1410,7 @@ mod tests {
     }
 
     fn test_plain_decode_spaced<T: DataType>(
-        data: ByteBufferPtr,
+        data: Bytes,
         num_values: usize,
         type_length: i32,
         buffer: &mut [T::T],
@@ -1530,9 +1439,7 @@ mod tests {
     #[should_panic(expected = "RleValueDecoder only supports BoolType")]
     fn test_rle_value_decode_int32_not_supported() {
         let mut decoder = RleValueDecoder::<Int32Type>::new();
-        decoder
-            .set_data(ByteBufferPtr::new(vec![5, 0, 0, 0]), 1)
-            .unwrap();
+        decoder.set_data(Bytes::from(vec![5, 0, 0, 0]), 1).unwrap();
     }
 
     #[test]
@@ -1730,9 +1637,8 @@ mod tests {
             128, 1, 4, 3, 58, 28, 6, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         ];
-        let buffer = ByteBufferPtr::new(data_bytes);
         let mut decoder: DeltaBitPackDecoder<Int32Type> = DeltaBitPackDecoder::new();
-        decoder.set_data(buffer, 3).unwrap();
+        decoder.set_data(data_bytes.into(), 3).unwrap();
         // check exact offsets, because when reading partial values we end up with
         // some data not being read from bit reader
         assert_eq!(decoder.get_offset(), 5);
@@ -1794,7 +1700,7 @@ mod tests {
 
         let length = data.len();
 
-        let ptr = ByteBufferPtr::new(data);
+        let ptr = Bytes::from(data);
         let mut reader = BitReader::new(ptr.clone());
         assert_eq!(reader.get_vlq_int().unwrap(), 256);
         assert_eq!(reader.get_vlq_int().unwrap(), 4);
@@ -1810,7 +1716,7 @@ mod tests {
         assert_eq!(decoder.get_offset(), length);
 
         // Test with truncated buffer
-        decoder.set_data(ptr.range(0, 12), 0).unwrap();
+        decoder.set_data(ptr.slice(..12), 0).unwrap();
         let err = decoder.get(&mut output).unwrap_err().to_string();
         assert!(
             err.contains("Expected to read 64 values from miniblock got 8"),
diff --git a/parquet/src/encodings/encoding/dict_encoder.rs b/parquet/src/encodings/encoding/dict_encoder.rs
index 4f4a6ab4f55a..dafae064afbf 100644
--- a/parquet/src/encodings/encoding/dict_encoder.rs
+++ b/parquet/src/encodings/encoding/dict_encoder.rs
@@ -18,6 +18,8 @@
 // ----------------------------------------------------------------------
 // Dictionary encoding
 
+use bytes::Bytes;
+
 use crate::basic::{Encoding, Type};
 use crate::data_type::private::ParquetValueType;
 use crate::data_type::DataType;
@@ -27,7 +29,6 @@ use crate::errors::Result;
 use crate::schema::types::ColumnDescPtr;
 use crate::util::bit_util::num_required_bits;
 use crate::util::interner::{Interner, Storage};
-use crate::util::memory::ByteBufferPtr;
 
 #[derive(Debug)]
 struct KeyStorage<T: DataType> {
@@ -112,7 +113,7 @@ impl<T: DataType> DictEncoder<T> {
 
     /// Writes out the dictionary values with PLAIN encoding in a byte buffer, and return
     /// the result.
-    pub fn write_dict(&self) -> Result<ByteBufferPtr> {
+    pub fn write_dict(&self) -> Result<Bytes> {
         let mut plain_encoder = PlainEncoder::<T>::new();
         plain_encoder.put(&self.interner.storage().uniques)?;
         plain_encoder.flush_buffer()
@@ -120,7 +121,7 @@ impl<T: DataType> DictEncoder<T> {
 
     /// Writes out the dictionary values with RLE encoding in a byte buffer, and return
     /// the result.
-    pub fn write_indices(&mut self) -> Result<ByteBufferPtr> {
+    pub fn write_indices(&mut self) -> Result<Bytes> {
         let buffer_len = self.estimated_data_encoded_size();
         let mut buffer = Vec::with_capacity(buffer_len);
         buffer.push(self.bit_width());
@@ -131,7 +132,7 @@ impl<T: DataType> DictEncoder<T> {
             encoder.put(*index)
         }
         self.indices.clear();
-        Ok(ByteBufferPtr::new(encoder.consume()))
+        Ok(encoder.consume().into())
     }
 
     fn put_one(&mut self, value: &T::T) {
@@ -165,7 +166,7 @@ impl<T: DataType> Encoder<T> for DictEncoder<T> {
         RleEncoder::max_buffer_size(bit_width, self.indices.len())
     }
 
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
+    fn flush_buffer(&mut self) -> Result<Bytes> {
         self.write_indices()
     }
 }
diff --git a/parquet/src/encodings/encoding/mod.rs b/parquet/src/encodings/encoding/mod.rs
index 3088f332183b..89e61ee226ad 100644
--- a/parquet/src/encodings/encoding/mod.rs
+++ b/parquet/src/encodings/encoding/mod.rs
@@ -24,11 +24,9 @@ use crate::data_type::private::ParquetValueType;
 use crate::data_type::*;
 use crate::encodings::rle::RleEncoder;
 use crate::errors::{ParquetError, Result};
-use crate::util::{
-    bit_util::{self, num_required_bits, BitWriter},
-    memory::ByteBufferPtr,
-};
+use crate::util::bit_util::{self, num_required_bits, BitWriter};
 
+use bytes::Bytes;
 pub use dict_encoder::DictEncoder;
 
 mod dict_encoder;
@@ -70,7 +68,7 @@ pub trait Encoder<T: DataType>: Send {
 
     /// Flushes the underlying byte buffer that's being processed by this encoder, and
     /// return the immutable copy of it. This will also reset the internal state.
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr>;
+    fn flush_buffer(&mut self) -> Result<Bytes>;
 }
 
 /// Gets a encoder for the particular data type `T` and encoding `encoding`. Memory usage
@@ -143,7 +141,7 @@ impl<T: DataType> Encoder<T> for PlainEncoder<T> {
     }
 
     #[inline]
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
+    fn flush_buffer(&mut self) -> Result<Bytes> {
         self.buffer
             .extend_from_slice(self.bit_writer.flush_buffer());
         self.bit_writer.clear();
@@ -223,7 +221,7 @@ impl<T: DataType> Encoder<T> for RleValueEncoder<T> {
     }
 
     #[inline]
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
+    fn flush_buffer(&mut self) -> Result<Bytes> {
         ensure_phys_ty!(Type::BOOLEAN, "RleValueEncoder only supports BoolType");
         let rle_encoder = self
             .encoder
@@ -238,7 +236,7 @@ impl<T: DataType> Encoder<T> for RleValueEncoder<T> {
         let len = (buf.len() - 4) as i32;
         buf[..4].copy_from_slice(&len.to_le_bytes());
 
-        Ok(ByteBufferPtr::new(buf))
+        Ok(buf.into())
     }
 }
 
@@ -456,7 +454,7 @@ impl<T: DataType> Encoder<T> for DeltaBitPackEncoder<T> {
         self.bit_writer.bytes_written()
     }
 
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
+    fn flush_buffer(&mut self) -> Result<Bytes> {
         // Write remaining values
         self.flush_block_values()?;
         // Write page header with total values
@@ -597,7 +595,7 @@ impl<T: DataType> Encoder<T> for DeltaLengthByteArrayEncoder<T> {
         self.len_encoder.estimated_data_encoded_size() + self.encoded_size
     }
 
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
+    fn flush_buffer(&mut self) -> Result<Bytes> {
         ensure_phys_ty!(
             Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY,
             "DeltaLengthByteArrayEncoder only supports ByteArrayType"
@@ -605,14 +603,14 @@ impl<T: DataType> Encoder<T> for DeltaLengthByteArrayEncoder<T> {
 
         let mut total_bytes = vec![];
         let lengths = self.len_encoder.flush_buffer()?;
-        total_bytes.extend_from_slice(lengths.data());
+        total_bytes.extend_from_slice(&lengths);
         self.data.iter().for_each(|byte_array| {
             total_bytes.extend_from_slice(byte_array.data());
         });
         self.data.clear();
         self.encoded_size = 0;
 
-        Ok(ByteBufferPtr::new(total_bytes))
+        Ok(total_bytes.into())
     }
 }
 
@@ -696,7 +694,7 @@ impl<T: DataType> Encoder<T> for DeltaByteArrayEncoder<T> {
             + self.suffix_writer.estimated_data_encoded_size()
     }
 
-    fn flush_buffer(&mut self) -> Result<ByteBufferPtr> {
+    fn flush_buffer(&mut self) -> Result<Bytes> {
         match T::get_physical_type() {
             Type::BYTE_ARRAY | Type::FIXED_LEN_BYTE_ARRAY => {
                 // TODO: investigate if we can merge lengths and suffixes
@@ -704,17 +702,17 @@ impl<T: DataType> Encoder<T> for DeltaByteArrayEncoder<T> {
                 let mut total_bytes = vec![];
                 // Insert lengths ...
                 let lengths = self.prefix_len_encoder.flush_buffer()?;
-                total_bytes.extend_from_slice(lengths.data());
+                total_bytes.extend_from_slice(&lengths);
                 // ... followed by suffixes
                 let suffixes = self.suffix_writer.flush_buffer()?;
-                total_bytes.extend_from_slice(suffixes.data());
+                total_bytes.extend_from_slice(&suffixes);
 
                 self.previous.clear();
-                Ok(ByteBufferPtr::new(total_bytes))
+                Ok(total_bytes.into())
             }
             _ => panic!(
                 "DeltaByteArrayEncoder only supports ByteArrayType and FixedLenByteArrayType"
-            )
+            ),
         }
     }
 }
diff --git a/parquet/src/encodings/rle.rs b/parquet/src/encodings/rle.rs
index 63ab15c73ead..5807f6b9c527 100644
--- a/parquet/src/encodings/rle.rs
+++ b/parquet/src/encodings/rle.rs
@@ -17,12 +17,11 @@
 
 use std::{cmp, mem::size_of};
 
+use bytes::Bytes;
+
 use crate::errors::{ParquetError, Result};
 use crate::util::bit_util::from_le_slice;
-use crate::util::{
-    bit_util::{self, BitReader, BitWriter, FromBytes},
-    memory::ByteBufferPtr,
-};
+use crate::util::bit_util::{self, BitReader, BitWriter, FromBytes};
 
 /// Rle/Bit-Packing Hybrid Encoding
 /// The grammar for this encoding looks like the following (copied verbatim
@@ -326,7 +325,7 @@ impl RleDecoder {
     }
 
     #[inline]
-    pub fn set_data(&mut self, data: ByteBufferPtr) {
+    pub fn set_data(&mut self, data: Bytes) {
         if let Some(ref mut bit_reader) = self.bit_reader {
             bit_reader.reset(data);
         } else {
@@ -543,17 +542,15 @@ mod tests {
     use crate::util::bit_util::ceil;
     use rand::{self, distributions::Standard, thread_rng, Rng, SeedableRng};
 
-    use crate::util::memory::ByteBufferPtr;
-
     const MAX_WIDTH: usize = 32;
 
     #[test]
     fn test_rle_decode_int32() {
         // Test data: 0-7 with bit width 3
         // 00000011 10001000 11000110 11111010
-        let data = ByteBufferPtr::new(vec![0x03, 0x88, 0xC6, 0xFA]);
+        let data = vec![0x03, 0x88, 0xC6, 0xFA];
         let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data);
+        decoder.set_data(data.into());
         let mut buffer = vec![0; 8];
         let expected = vec![0, 1, 2, 3, 4, 5, 6, 7];
         let result = decoder.get_batch::<i32>(&mut buffer);
@@ -565,9 +562,9 @@ mod tests {
     fn test_rle_skip_int32() {
         // Test data: 0-7 with bit width 3
         // 00000011 10001000 11000110 11111010
-        let data = ByteBufferPtr::new(vec![0x03, 0x88, 0xC6, 0xFA]);
+        let data = vec![0x03, 0x88, 0xC6, 0xFA];
         let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data);
+        decoder.set_data(data.into());
         let expected = vec![2, 3, 4, 5, 6, 7];
         let skipped = decoder.skip(2).expect("skipping values");
         assert_eq!(skipped, 2);
@@ -598,18 +595,17 @@ mod tests {
     fn test_rle_decode_bool() {
         // RLE test data: 50 1s followed by 50 0s
         // 01100100 00000001 01100100 00000000
-        let data1 = ByteBufferPtr::new(vec![0x64, 0x01, 0x64, 0x00]);
+        let data1 = vec![0x64, 0x01, 0x64, 0x00];
 
         // Bit-packing test data: alternating 1s and 0s, 100 total
         // 100 / 8 = 13 groups
         // 00011011 10101010 ... 00001010
-        let data2 = ByteBufferPtr::new(vec![
-            0x1B, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA,
-            0x0A,
-        ]);
+        let data2 = vec![
+            0x1B, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x0A,
+        ];
 
         let mut decoder: RleDecoder = RleDecoder::new(1);
-        decoder.set_data(data1);
+        decoder.set_data(data1.into());
         let mut buffer = vec![false; 100];
         let mut expected = vec![];
         for i in 0..100 {
@@ -623,7 +619,7 @@ mod tests {
         assert!(result.is_ok());
         assert_eq!(buffer, expected);
 
-        decoder.set_data(data2);
+        decoder.set_data(data2.into());
         let mut buffer = vec![false; 100];
         let mut expected = vec![];
         for i in 0..100 {
@@ -642,18 +638,17 @@ mod tests {
     fn test_rle_skip_bool() {
         // RLE test data: 50 1s followed by 50 0s
         // 01100100 00000001 01100100 00000000
-        let data1 = ByteBufferPtr::new(vec![0x64, 0x01, 0x64, 0x00]);
+        let data1 = vec![0x64, 0x01, 0x64, 0x00];
 
         // Bit-packing test data: alternating 1s and 0s, 100 total
         // 100 / 8 = 13 groups
         // 00011011 10101010 ... 00001010
-        let data2 = ByteBufferPtr::new(vec![
-            0x1B, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA,
-            0x0A,
-        ]);
+        let data2 = vec![
+            0x1B, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0xAA, 0x0A,
+        ];
 
         let mut decoder: RleDecoder = RleDecoder::new(1);
-        decoder.set_data(data1);
+        decoder.set_data(data1.into());
         let mut buffer = vec![true; 50];
         let expected = vec![false; 50];
 
@@ -665,7 +660,7 @@ mod tests {
         assert_eq!(remainder, 50);
         assert_eq!(buffer, expected);
 
-        decoder.set_data(data2);
+        decoder.set_data(data2.into());
         let mut buffer = vec![false; 50];
         let mut expected = vec![];
         for i in 0..50 {
@@ -689,9 +684,9 @@ mod tests {
         // Test RLE encoding: 3 0s followed by 4 1s followed by 5 2s
         // 00000110 00000000 00001000 00000001 00001010 00000010
         let dict = vec![10, 20, 30];
-        let data = ByteBufferPtr::new(vec![0x06, 0x00, 0x08, 0x01, 0x0A, 0x02]);
+        let data = vec![0x06, 0x00, 0x08, 0x01, 0x0A, 0x02];
         let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data);
+        decoder.set_data(data.into());
         let mut buffer = vec![0; 12];
         let expected = vec![10, 10, 10, 20, 20, 20, 20, 30, 30, 30, 30, 30];
         let result = decoder.get_batch_with_dict::<i32>(&dict, &mut buffer, 12);
@@ -702,9 +697,9 @@ mod tests {
         // 011 100 101 011 100 101 011 100 101 100 101 101
         // 00000011 01100011 11000111 10001110 00000011 01100101 00001011
         let dict = vec!["aaa", "bbb", "ccc", "ddd", "eee", "fff"];
-        let data = ByteBufferPtr::new(vec![0x03, 0x63, 0xC7, 0x8E, 0x03, 0x65, 0x0B]);
+        let data = vec![0x03, 0x63, 0xC7, 0x8E, 0x03, 0x65, 0x0B];
         let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data);
+        decoder.set_data(data.into());
         let mut buffer = vec![""; 12];
         let expected = vec![
             "ddd", "eee", "fff", "ddd", "eee", "fff", "ddd", "eee", "fff", "eee", "fff",
@@ -724,9 +719,9 @@ mod tests {
         // Test RLE encoding: 3 0s followed by 4 1s followed by 5 2s
         // 00000110 00000000 00001000 00000001 00001010 00000010
         let dict = vec![10, 20, 30];
-        let data = ByteBufferPtr::new(vec![0x06, 0x00, 0x08, 0x01, 0x0A, 0x02]);
+        let data = vec![0x06, 0x00, 0x08, 0x01, 0x0A, 0x02];
         let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data);
+        decoder.set_data(data.into());
         let mut buffer = vec![0; 10];
         let expected = vec![10, 20, 20, 20, 20, 30, 30, 30, 30, 30];
         let skipped = decoder.skip(2).expect("skipping two values");
@@ -741,9 +736,9 @@ mod tests {
         // 011 100 101 011 100 101 011 100 101 100 101 101
         // 00000011 01100011 11000111 10001110 00000011 01100101 00001011
         let dict = vec!["aaa", "bbb", "ccc", "ddd", "eee", "fff"];
-        let data = ByteBufferPtr::new(vec![0x03, 0x63, 0xC7, 0x8E, 0x03, 0x65, 0x0B]);
+        let data = vec![0x03, 0x63, 0xC7, 0x8E, 0x03, 0x65, 0x0B];
         let mut decoder: RleDecoder = RleDecoder::new(3);
-        decoder.set_data(data);
+        decoder.set_data(data.into());
         let mut buffer = vec![""; 8];
         let expected = vec!["eee", "fff", "ddd", "eee", "fff", "eee", "fff", "fff"];
         let skipped = decoder.skip(4).expect("skipping four values");
@@ -766,7 +761,7 @@ mod tests {
         for v in values {
             encoder.put(*v as u64)
         }
-        let buffer = ByteBufferPtr::new(encoder.consume());
+        let buffer: Bytes = encoder.consume().into();
         if expected_len != -1 {
             assert_eq!(buffer.len(), expected_len as usize);
         }
@@ -776,7 +771,7 @@ mod tests {
 
         // Verify read
         let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(buffer.all());
+        decoder.set_data(buffer.clone());
         for v in values {
             let val: i64 = decoder
                 .get()
@@ -888,7 +883,7 @@ mod tests {
             (3 << 1) | 1, // bit-packed run of 3 * 8
         ];
         data.extend(std::iter::repeat(0xFF).take(20));
-        let data = ByteBufferPtr::new(data);
+        let data: Bytes = data.into();
 
         let mut decoder = RleDecoder::new(8);
         decoder.set_data(data.clone());
@@ -926,7 +921,7 @@ mod tests {
         buffer.push(0);
 
         let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(ByteBufferPtr::new(buffer));
+        decoder.set_data(buffer.into());
 
         // We don't always reliably know how many non-null values are contained in a page
         // and so the decoder must work correctly without a precise value count
@@ -963,7 +958,7 @@ mod tests {
         for _ in 0..run_bytes {
             writer.put_aligned(0xFF_u8, 1);
         }
-        let buffer = ByteBufferPtr::new(writer.consume());
+        let buffer: Bytes = writer.consume().into();
 
         let mut decoder = RleDecoder::new(1);
         decoder.set_data(buffer.clone());
@@ -992,7 +987,7 @@ mod tests {
         }
         let buffer = encoder.consume();
         let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(ByteBufferPtr::new(buffer));
+        decoder.set_data(Bytes::from(buffer));
         let mut actual_values: Vec<i16> = vec![0; values.len()];
         decoder
             .get_batch(&mut actual_values)
@@ -1007,11 +1002,11 @@ mod tests {
             encoder.put(*v as u64)
         }
 
-        let buffer = ByteBufferPtr::new(encoder.consume());
+        let buffer = Bytes::from(encoder.consume());
 
         // Verify read
         let mut decoder = RleDecoder::new(bit_width);
-        decoder.set_data(buffer.all());
+        decoder.set_data(buffer.clone());
         for v in values {
             let val = decoder
                 .get::<i32>()
diff --git a/parquet/src/file/footer.rs b/parquet/src/file/footer.rs
index 21de63e0c234..9695dbeae6e1 100644
--- a/parquet/src/file/footer.rs
+++ b/parquet/src/file/footer.rs
@@ -18,7 +18,7 @@
 use std::{io::Read, sync::Arc};
 
 use crate::format::{ColumnOrder as TColumnOrder, FileMetaData as TFileMetaData};
-use thrift::protocol::{TCompactInputProtocol, TSerializable};
+use crate::thrift::{TCompactSliceInputProtocol, TSerializable};
 
 use crate::basic::ColumnOrder;
 
@@ -62,18 +62,13 @@ pub fn parse_metadata<R: ChunkReader>(chunk_reader: &R) -> Result<ParquetMetaDat
     }
 
     let start = file_size - footer_metadata_len as u64;
-    read_metadata(chunk_reader.get_read(start)?)
+    decode_metadata(chunk_reader.get_bytes(start, metadata_len)?.as_ref())
 }
 
 /// Decodes [`ParquetMetaData`] from the provided bytes
-pub fn decode_metadata(metadata_read: &[u8]) -> Result<ParquetMetaData> {
-    read_metadata(metadata_read)
-}
-
-/// Decodes [`ParquetMetaData`] from the provided [`Read`]
-pub(crate) fn read_metadata<R: Read>(read: R) -> Result<ParquetMetaData> {
+pub fn decode_metadata(buf: &[u8]) -> Result<ParquetMetaData> {
     // TODO: row group filtering
-    let mut prot = TCompactInputProtocol::new(read);
+    let mut prot = TCompactSliceInputProtocol::new(buf);
     let t_file_metadata: TFileMetaData = TFileMetaData::read_from_in_protocol(&mut prot)
         .map_err(|e| ParquetError::General(format!("Could not parse metadata: {e}")))?;
     let schema = types::from_thrift(&t_file_metadata.schema)?;
@@ -225,8 +220,7 @@ mod tests {
         let schema = SchemaType::group_type_builder("schema").build().unwrap();
         let schema_descr = SchemaDescriptor::new(Arc::new(schema));
 
-        let t_column_orders =
-            Some(vec![TColumnOrder::TYPEORDER(TypeDefinedOrder::new())]);
+        let t_column_orders = Some(vec![TColumnOrder::TYPEORDER(TypeDefinedOrder::new())]);
 
         parse_column_orders(t_column_orders, &schema_descr);
     }
diff --git a/parquet/src/file/metadata.rs b/parquet/src/file/metadata.rs
index aaa3d28e206a..a1f3c87d0a72 100644
--- a/parquet/src/file/metadata.rs
+++ b/parquet/src/file/metadata.rs
@@ -37,8 +37,8 @@ use std::ops::Range;
 use std::sync::Arc;
 
 use crate::format::{
-    BoundaryOrder, ColumnChunk, ColumnIndex, ColumnMetaData, OffsetIndex, PageLocation,
-    RowGroup, SortingColumn,
+    BoundaryOrder, ColumnChunk, ColumnIndex, ColumnMetaData, OffsetIndex, PageLocation, RowGroup,
+    SortingColumn,
 };
 
 use crate::basic::{ColumnOrder, Compression, Encoding, Type};
@@ -348,10 +348,7 @@ impl RowGroupMetaData {
     }
 
     /// Method to convert from Thrift.
-    pub fn from_thrift(
-        schema_descr: SchemaDescPtr,
-        mut rg: RowGroup,
-    ) -> Result<RowGroupMetaData> {
+    pub fn from_thrift(schema_descr: SchemaDescPtr, mut rg: RowGroup) -> Result<RowGroupMetaData> {
         assert_eq!(schema_descr.num_columns(), rg.columns.len());
         let total_byte_size = rg.total_byte_size;
         let num_rows = rg.num_rows;
@@ -474,6 +471,7 @@ pub struct ColumnChunkMetaData {
     statistics: Option<Statistics>,
     encoding_stats: Option<Vec<PageEncodingStats>>,
     bloom_filter_offset: Option<i64>,
+    bloom_filter_length: Option<i32>,
     offset_index_offset: Option<i64>,
     offset_index_length: Option<i32>,
     column_index_offset: Option<i64>,
@@ -591,6 +589,11 @@ impl ColumnChunkMetaData {
         self.bloom_filter_offset
     }
 
+    /// Returns the offset for the bloom filter.
+    pub fn bloom_filter_length(&self) -> Option<i32> {
+        self.bloom_filter_length
+    }
+
     /// Returns the offset for the column index.
     pub fn column_index_offset(&self) -> Option<i64> {
         self.column_index_offset
@@ -657,6 +660,7 @@ impl ColumnChunkMetaData {
             })
             .transpose()?;
         let bloom_filter_offset = col_metadata.bloom_filter_offset;
+        let bloom_filter_length = col_metadata.bloom_filter_length;
         let offset_index_offset = cc.offset_index_offset;
         let offset_index_length = cc.offset_index_length;
         let column_index_offset = cc.column_index_offset;
@@ -677,6 +681,7 @@ impl ColumnChunkMetaData {
             statistics,
             encoding_stats,
             bloom_filter_offset,
+            bloom_filter_length,
             offset_index_offset,
             offset_index_length,
             column_index_offset,
@@ -722,6 +727,7 @@ impl ColumnChunkMetaData {
                 .as_ref()
                 .map(|vec| vec.iter().map(page_encoding_stats::to_thrift).collect()),
             bloom_filter_offset: self.bloom_filter_offset,
+            bloom_filter_length: self.bloom_filter_length,
         }
     }
 
@@ -752,6 +758,7 @@ impl ColumnChunkMetaDataBuilder {
             statistics: None,
             encoding_stats: None,
             bloom_filter_offset: None,
+            bloom_filter_length: None,
             offset_index_offset: None,
             offset_index_length: None,
             column_index_offset: None,
@@ -837,6 +844,12 @@ impl ColumnChunkMetaDataBuilder {
         self
     }
 
+    /// Sets optional bloom filter length in bytes.
+    pub fn set_bloom_filter_length(mut self, value: Option<i32>) -> Self {
+        self.0.bloom_filter_length = value;
+        self
+    }
+
     /// Sets optional offset index offset in bytes.
     pub fn set_offset_index_offset(mut self, value: Option<i64>) -> Self {
         self.0.offset_index_offset = value;
@@ -872,9 +885,8 @@ pub struct ColumnIndexBuilder {
     null_pages: Vec<bool>,
     min_values: Vec<Vec<u8>>,
     max_values: Vec<Vec<u8>>,
-    // TODO: calc the order for all pages in this column
-    boundary_order: BoundaryOrder,
     null_counts: Vec<i64>,
+    boundary_order: BoundaryOrder,
     // If one page can't get build index, need to ignore all index in this column
     valid: bool,
 }
@@ -891,8 +903,8 @@ impl ColumnIndexBuilder {
             null_pages: Vec::new(),
             min_values: Vec::new(),
             max_values: Vec::new(),
-            boundary_order: BoundaryOrder::UNORDERED,
             null_counts: Vec::new(),
+            boundary_order: BoundaryOrder::UNORDERED,
             valid: true,
         }
     }
@@ -910,6 +922,10 @@ impl ColumnIndexBuilder {
         self.null_counts.push(null_count);
     }
 
+    pub fn set_boundary_order(&mut self, boundary_order: BoundaryOrder) {
+        self.boundary_order = boundary_order;
+    }
+
     pub fn to_invalid(&mut self) {
         self.valid = false;
     }
@@ -972,9 +988,7 @@ impl OffsetIndexBuilder {
             .iter()
             .zip(self.compressed_page_size_array.iter())
             .zip(self.first_row_index_array.iter())
-            .map(|((offset, size), row_index)| {
-                PageLocation::new(*offset, *size, *row_index)
-            })
+            .map(|((offset, size), row_index)| PageLocation::new(*offset, *size, *row_index))
             .collect::<Vec<_>>();
         OffsetIndex::new(locations)
     }
@@ -1003,10 +1017,9 @@ mod tests {
             .unwrap();
 
         let row_group_exp = row_group_meta.to_thrift();
-        let row_group_res =
-            RowGroupMetaData::from_thrift(schema_descr, row_group_exp.clone())
-                .unwrap()
-                .to_thrift();
+        let row_group_res = RowGroupMetaData::from_thrift(schema_descr, row_group_exp.clone())
+            .unwrap()
+            .to_thrift();
 
         assert_eq!(row_group_res, row_group_exp);
     }
@@ -1053,6 +1066,7 @@ mod tests {
                 },
             ])
             .set_bloom_filter_offset(Some(6000))
+            .set_bloom_filter_length(Some(25))
             .set_offset_index_offset(Some(7000))
             .set_offset_index_length(Some(25))
             .set_column_index_offset(Some(8000))
@@ -1061,8 +1075,7 @@ mod tests {
             .unwrap();
 
         let col_chunk_res =
-            ColumnChunkMetaData::from_thrift(column_descr, col_metadata.to_thrift())
-                .unwrap();
+            ColumnChunkMetaData::from_thrift(column_descr, col_metadata.to_thrift()).unwrap();
 
         assert_eq!(col_chunk_res, col_metadata);
     }
@@ -1076,10 +1089,9 @@ mod tests {
             .unwrap();
 
         let col_chunk_exp = col_metadata.to_thrift();
-        let col_chunk_res =
-            ColumnChunkMetaData::from_thrift(column_descr, col_chunk_exp.clone())
-                .unwrap()
-                .to_thrift();
+        let col_chunk_res = ColumnChunkMetaData::from_thrift(column_descr, col_chunk_exp.clone())
+            .unwrap()
+            .to_thrift();
 
         assert_eq!(col_chunk_res, col_chunk_exp);
     }
diff --git a/parquet/src/file/mod.rs b/parquet/src/file/mod.rs
index c20fd38c7f8b..6589d2efaf8b 100644
--- a/parquet/src/file/mod.rs
+++ b/parquet/src/file/mod.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Main entrypoint for working with Parquet API.
+//! Low level APIs for reading raw parquet data.
 //!
 //! Provides access to file and row group readers and writers, record API, metadata, etc.
 //!
diff --git a/parquet/src/file/page_encoding_stats.rs b/parquet/src/file/page_encoding_stats.rs
index 95a73118042f..c941d401175c 100644
--- a/parquet/src/file/page_encoding_stats.rs
+++ b/parquet/src/file/page_encoding_stats.rs
@@ -35,9 +35,7 @@ pub struct PageEncodingStats {
 }
 
 /// Converts Thrift definition into `PageEncodingStats`.
-pub fn try_from_thrift(
-    thrift_encoding_stats: &TPageEncodingStats,
-) -> Result<PageEncodingStats> {
+pub fn try_from_thrift(thrift_encoding_stats: &TPageEncodingStats) -> Result<PageEncodingStats> {
     let page_type = PageType::try_from(thrift_encoding_stats.page_type)?;
     let encoding = Encoding::try_from(thrift_encoding_stats.encoding)?;
     let count = thrift_encoding_stats.count;
diff --git a/parquet/src/file/page_index/index_reader.rs b/parquet/src/file/page_index/index_reader.rs
index c36708a59aeb..f298601f5d59 100644
--- a/parquet/src/file/page_index/index_reader.rs
+++ b/parquet/src/file/page_index/index_reader.rs
@@ -24,17 +24,13 @@ use crate::file::metadata::ColumnChunkMetaData;
 use crate::file::page_index::index::{Index, NativeIndex};
 use crate::file::reader::ChunkReader;
 use crate::format::{ColumnIndex, OffsetIndex, PageLocation};
-use std::io::Cursor;
+use crate::thrift::{TCompactSliceInputProtocol, TSerializable};
 use std::ops::Range;
-use thrift::protocol::{TCompactInputProtocol, TSerializable};
 
 /// Computes the covering range of two optional ranges
 ///
 /// For example `acc_range(Some(7..9), Some(1..3)) = Some(1..9)`
-pub(crate) fn acc_range(
-    a: Option<Range<usize>>,
-    b: Option<Range<usize>>,
-) -> Option<Range<usize>> {
+pub(crate) fn acc_range(a: Option<Range<usize>>, b: Option<Range<usize>>) -> Option<Range<usize>> {
     match (a, b) {
         (Some(a), Some(b)) => Some(a.start.min(b.start)..a.end.max(b.end)),
         (None, x) | (x, None) => x,
@@ -113,20 +109,14 @@ pub fn read_pages_locations<R: ChunkReader>(
         .collect()
 }
 
-pub(crate) fn decode_offset_index(
-    data: &[u8],
-) -> Result<Vec<PageLocation>, ParquetError> {
-    let mut prot = TCompactInputProtocol::new(data);
+pub(crate) fn decode_offset_index(data: &[u8]) -> Result<Vec<PageLocation>, ParquetError> {
+    let mut prot = TCompactSliceInputProtocol::new(data);
     let offset = OffsetIndex::read_from_in_protocol(&mut prot)?;
     Ok(offset.page_locations)
 }
 
-pub(crate) fn decode_column_index(
-    data: &[u8],
-    column_type: Type,
-) -> Result<Index, ParquetError> {
-    let mut d = Cursor::new(data);
-    let mut prot = TCompactInputProtocol::new(&mut d);
+pub(crate) fn decode_column_index(data: &[u8], column_type: Type) -> Result<Index, ParquetError> {
+    let mut prot = TCompactSliceInputProtocol::new(data);
 
     let index = ColumnIndex::read_from_in_protocol(&mut prot)?;
 
@@ -138,9 +128,7 @@ pub(crate) fn decode_column_index(
         Type::FLOAT => Index::FLOAT(NativeIndex::<f32>::try_new(index)?),
         Type::DOUBLE => Index::DOUBLE(NativeIndex::<f64>::try_new(index)?),
         Type::BYTE_ARRAY => Index::BYTE_ARRAY(NativeIndex::try_new(index)?),
-        Type::FIXED_LEN_BYTE_ARRAY => {
-            Index::FIXED_LEN_BYTE_ARRAY(NativeIndex::try_new(index)?)
-        }
+        Type::FIXED_LEN_BYTE_ARRAY => Index::FIXED_LEN_BYTE_ARRAY(NativeIndex::try_new(index)?),
     };
 
     Ok(index)
diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs
index 3d6390c036ae..287e73c9906a 100644
--- a/parquet/src/file/properties.rs
+++ b/parquet/src/file/properties.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 //! Configuration via [`WriterProperties`] and [`ReaderProperties`]
+use std::str::FromStr;
 use std::{collections::HashMap, sync::Arc};
 
 use crate::basic::{Compression, Encoding};
@@ -43,14 +44,15 @@ pub const DEFAULT_MAX_STATISTICS_SIZE: usize = 4096;
 /// Default value for [`WriterProperties::max_row_group_size`]
 pub const DEFAULT_MAX_ROW_GROUP_SIZE: usize = 1024 * 1024;
 /// Default value for [`WriterProperties::created_by`]
-pub const DEFAULT_CREATED_BY: &str =
-    concat!("parquet-rs version ", env!("CARGO_PKG_VERSION"));
+pub const DEFAULT_CREATED_BY: &str = concat!("parquet-rs version ", env!("CARGO_PKG_VERSION"));
 /// Default value for [`WriterProperties::column_index_truncate_length`]
 pub const DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH: Option<usize> = Some(64);
 /// Default value for [`BloomFilterProperties::fpp`]
 pub const DEFAULT_BLOOM_FILTER_FPP: f64 = 0.05;
 /// Default value for [`BloomFilterProperties::ndv`]
 pub const DEFAULT_BLOOM_FILTER_NDV: u64 = 1_000_000_u64;
+/// Default values for [`WriterProperties::statistics_truncate_length`]
+pub const DEFAULT_STATISTICS_TRUNCATE_LENGTH: Option<usize> = None;
 
 /// Parquet writer version.
 ///
@@ -72,6 +74,18 @@ impl WriterVersion {
     }
 }
 
+impl FromStr for WriterVersion {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "PARQUET_1_0" | "parquet_1_0" => Ok(WriterVersion::PARQUET_1_0),
+            "PARQUET_2_0" | "parquet_2_0" => Ok(WriterVersion::PARQUET_2_0),
+            _ => Err(format!("Invalid writer version: {}", s)),
+        }
+    }
+}
+
 /// Reference counted writer properties.
 pub type WriterPropertiesPtr = Arc<WriterProperties>;
 
@@ -124,6 +138,7 @@ pub struct WriterProperties {
     column_properties: HashMap<ColumnPath, ColumnProperties>,
     sorting_columns: Option<Vec<SortingColumn>>,
     column_index_truncate_length: Option<usize>,
+    statistics_truncate_length: Option<usize>,
 }
 
 impl Default for WriterProperties {
@@ -229,6 +244,13 @@ impl WriterProperties {
         self.column_index_truncate_length
     }
 
+    /// Returns the maximum length of truncated min/max values in statistics.
+    ///
+    /// `None` if truncation is disabled, must be greater than 0 otherwise.
+    pub fn statistics_truncate_length(&self) -> Option<usize> {
+        self.statistics_truncate_length
+    }
+
     /// Returns encoding for a data page, when dictionary encoding is enabled.
     /// This is not configurable.
     #[inline]
@@ -299,10 +321,7 @@ impl WriterProperties {
     /// Returns the [`BloomFilterProperties`] for the given column
     ///
     /// Returns `None` if bloom filter is disabled
-    pub fn bloom_filter_properties(
-        &self,
-        col: &ColumnPath,
-    ) -> Option<&BloomFilterProperties> {
+    pub fn bloom_filter_properties(&self, col: &ColumnPath) -> Option<&BloomFilterProperties> {
         self.column_properties
             .get(col)
             .and_then(|c| c.bloom_filter_properties())
@@ -325,6 +344,7 @@ pub struct WriterPropertiesBuilder {
     column_properties: HashMap<ColumnPath, ColumnProperties>,
     sorting_columns: Option<Vec<SortingColumn>>,
     column_index_truncate_length: Option<usize>,
+    statistics_truncate_length: Option<usize>,
 }
 
 impl WriterPropertiesBuilder {
@@ -343,6 +363,7 @@ impl WriterPropertiesBuilder {
             column_properties: HashMap::new(),
             sorting_columns: None,
             column_index_truncate_length: DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH,
+            statistics_truncate_length: DEFAULT_STATISTICS_TRUNCATE_LENGTH,
         }
     }
 
@@ -361,6 +382,7 @@ impl WriterPropertiesBuilder {
             column_properties: self.column_properties,
             sorting_columns: self.sorting_columns,
             column_index_truncate_length: self.column_index_truncate_length,
+            statistics_truncate_length: self.statistics_truncate_length,
         }
     }
 
@@ -550,9 +572,7 @@ impl WriterPropertiesBuilder {
     /// Helper method to get existing or new mutable reference of column properties.
     #[inline]
     fn get_mut_props(&mut self, col: ColumnPath) -> &mut ColumnProperties {
-        self.column_properties
-            .entry(col)
-            .or_insert_with(Default::default)
+        self.column_properties.entry(col).or_default()
     }
 
     /// Sets encoding for a column.
@@ -597,11 +617,7 @@ impl WriterPropertiesBuilder {
 
     /// Sets max size for statistics for a column.
     /// Takes precedence over globally defined settings.
-    pub fn set_column_max_statistics_size(
-        mut self,
-        col: ColumnPath,
-        value: usize,
-    ) -> Self {
+    pub fn set_column_max_statistics_size(mut self, col: ColumnPath, value: usize) -> Self {
         self.get_mut_props(col).set_max_statistics_size(value);
         self
     }
@@ -609,11 +625,7 @@ impl WriterPropertiesBuilder {
     /// Sets whether a bloom filter should be created for a specific column.
     /// The behavior is similar to [`set_bloom_filter_enabled`](Self::set_bloom_filter_enabled).
     /// Takes precedence over globally defined settings.
-    pub fn set_column_bloom_filter_enabled(
-        mut self,
-        col: ColumnPath,
-        value: bool,
-    ) -> Self {
+    pub fn set_column_bloom_filter_enabled(mut self, col: ColumnPath, value: bool) -> Self {
         self.get_mut_props(col).set_bloom_filter_enabled(value);
         self
     }
@@ -644,6 +656,17 @@ impl WriterPropertiesBuilder {
         self.column_index_truncate_length = max_length;
         self
     }
+
+    /// Sets the max length of min/max value fields in statistics. Must be greater than 0.
+    /// If set to `None` - there's no effective limit.
+    pub fn set_statistics_truncate_length(mut self, max_length: Option<usize>) -> Self {
+        if let Some(value) = max_length {
+            assert!(value > 0, "Cannot have a 0 statistics truncate length. If you wish to disable min/max value truncation, set it to `None`.");
+        }
+
+        self.statistics_truncate_length = max_length;
+        self
+    }
 }
 
 /// Controls the level of statistics to be computed by the writer
@@ -657,6 +680,19 @@ pub enum EnabledStatistics {
     Page,
 }
 
+impl FromStr for EnabledStatistics {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "NONE" | "none" => Ok(EnabledStatistics::None),
+            "CHUNK" | "chunk" => Ok(EnabledStatistics::Chunk),
+            "PAGE" | "page" => Ok(EnabledStatistics::Page),
+            _ => Err(format!("Invalid statistics arg: {}", s)),
+        }
+    }
+}
+
 impl Default for EnabledStatistics {
     fn default() -> Self {
         DEFAULT_STATISTICS_ENABLED
@@ -888,9 +924,7 @@ impl ReaderPropertiesBuilder {
     pub fn build(self) -> ReaderProperties {
         ReaderProperties {
             codec_options: self.codec_options_builder.build(),
-            read_bloom_filter: self
-                .read_bloom_filter
-                .unwrap_or(DEFAULT_READ_BLOOM_FILTER),
+            read_bloom_filter: self.read_bloom_filter.unwrap_or(DEFAULT_READ_BLOOM_FILTER),
         }
     }
 
@@ -1042,10 +1076,7 @@ mod tests {
             .set_column_encoding(ColumnPath::from("col"), Encoding::RLE)
             .set_column_compression(ColumnPath::from("col"), Compression::SNAPPY)
             .set_column_dictionary_enabled(ColumnPath::from("col"), true)
-            .set_column_statistics_enabled(
-                ColumnPath::from("col"),
-                EnabledStatistics::Chunk,
-            )
+            .set_column_statistics_enabled(ColumnPath::from("col"), EnabledStatistics::Chunk)
             .set_column_max_statistics_size(ColumnPath::from("col"), 123)
             .set_column_bloom_filter_enabled(ColumnPath::from("col"), true)
             .set_column_bloom_filter_ndv(ColumnPath::from("col"), 100_u64)
@@ -1184,4 +1215,46 @@ mod tests {
 
         assert_eq!(props.codec_options(), &codec_options);
     }
+
+    #[test]
+    fn test_parse_writerversion() {
+        let mut writer_version = "PARQUET_1_0".parse::<WriterVersion>().unwrap();
+        assert_eq!(writer_version, WriterVersion::PARQUET_1_0);
+        writer_version = "PARQUET_2_0".parse::<WriterVersion>().unwrap();
+        assert_eq!(writer_version, WriterVersion::PARQUET_2_0);
+
+        // test lowercase
+        writer_version = "parquet_1_0".parse::<WriterVersion>().unwrap();
+        assert_eq!(writer_version, WriterVersion::PARQUET_1_0);
+
+        // test invalid version
+        match "PARQUET_-1_0".parse::<WriterVersion>() {
+            Ok(_) => panic!("Should not be able to parse PARQUET_-1_0"),
+            Err(e) => {
+                assert_eq!(e, "Invalid writer version: PARQUET_-1_0");
+            }
+        }
+    }
+
+    #[test]
+    fn test_parse_enabledstatistics() {
+        let mut enabled_statistics = "NONE".parse::<EnabledStatistics>().unwrap();
+        assert_eq!(enabled_statistics, EnabledStatistics::None);
+        enabled_statistics = "CHUNK".parse::<EnabledStatistics>().unwrap();
+        assert_eq!(enabled_statistics, EnabledStatistics::Chunk);
+        enabled_statistics = "PAGE".parse::<EnabledStatistics>().unwrap();
+        assert_eq!(enabled_statistics, EnabledStatistics::Page);
+
+        // test lowercase
+        enabled_statistics = "none".parse::<EnabledStatistics>().unwrap();
+        assert_eq!(enabled_statistics, EnabledStatistics::None);
+
+        //test invalid statistics
+        match "ChunkAndPage".parse::<EnabledStatistics>() {
+            Ok(_) => panic!("Should not be able to parse ChunkAndPage"),
+            Err(e) => {
+                assert_eq!(e, "Invalid statistics arg: ChunkAndPage");
+            }
+        }
+    }
 }
diff --git a/parquet/src/file/reader.rs b/parquet/src/file/reader.rs
index 7d2d7ea153d8..dd6a0fdd2312 100644
--- a/parquet/src/file/reader.rs
+++ b/parquet/src/file/reader.rs
@@ -134,7 +134,7 @@ pub trait FileReader: Send + Sync {
     /// Get the `i`th row group reader. Note this doesn't do bound check.
     fn get_row_group(&self, i: usize) -> Result<Box<dyn RowGroupReader + '_>>;
 
-    /// Get full iterator of `Row`s from a file (over all row groups).
+    /// Get an iterator over the row in this file, see [`RowIter`] for caveats.
     ///
     /// Iterator will automatically load the next row group to advance.
     ///
@@ -161,33 +161,28 @@ pub trait RowGroupReader: Send + Sync {
         let col_descr = schema_descr.column(i);
         let col_page_reader = self.get_column_page_reader(i)?;
         let col_reader = match col_descr.physical_type() {
-            Type::BOOLEAN => ColumnReader::BoolColumnReader(ColumnReaderImpl::new(
+            Type::BOOLEAN => {
+                ColumnReader::BoolColumnReader(ColumnReaderImpl::new(col_descr, col_page_reader))
+            }
+            Type::INT32 => {
+                ColumnReader::Int32ColumnReader(ColumnReaderImpl::new(col_descr, col_page_reader))
+            }
+            Type::INT64 => {
+                ColumnReader::Int64ColumnReader(ColumnReaderImpl::new(col_descr, col_page_reader))
+            }
+            Type::INT96 => {
+                ColumnReader::Int96ColumnReader(ColumnReaderImpl::new(col_descr, col_page_reader))
+            }
+            Type::FLOAT => {
+                ColumnReader::FloatColumnReader(ColumnReaderImpl::new(col_descr, col_page_reader))
+            }
+            Type::DOUBLE => {
+                ColumnReader::DoubleColumnReader(ColumnReaderImpl::new(col_descr, col_page_reader))
+            }
+            Type::BYTE_ARRAY => ColumnReader::ByteArrayColumnReader(ColumnReaderImpl::new(
                 col_descr,
                 col_page_reader,
             )),
-            Type::INT32 => ColumnReader::Int32ColumnReader(ColumnReaderImpl::new(
-                col_descr,
-                col_page_reader,
-            )),
-            Type::INT64 => ColumnReader::Int64ColumnReader(ColumnReaderImpl::new(
-                col_descr,
-                col_page_reader,
-            )),
-            Type::INT96 => ColumnReader::Int96ColumnReader(ColumnReaderImpl::new(
-                col_descr,
-                col_page_reader,
-            )),
-            Type::FLOAT => ColumnReader::FloatColumnReader(ColumnReaderImpl::new(
-                col_descr,
-                col_page_reader,
-            )),
-            Type::DOUBLE => ColumnReader::DoubleColumnReader(ColumnReaderImpl::new(
-                col_descr,
-                col_page_reader,
-            )),
-            Type::BYTE_ARRAY => ColumnReader::ByteArrayColumnReader(
-                ColumnReaderImpl::new(col_descr, col_page_reader),
-            ),
             Type::FIXED_LEN_BYTE_ARRAY => ColumnReader::FixedLenByteArrayColumnReader(
                 ColumnReaderImpl::new(col_descr, col_page_reader),
             ),
@@ -199,7 +194,7 @@ pub trait RowGroupReader: Send + Sync {
     /// to read bloom filters.
     fn get_column_bloom_filter(&self, i: usize) -> Option<&Sbbf>;
 
-    /// Get iterator of `Row`s from this row group.
+    /// Get an iterator over the row in this file, see [`RowIter`] for caveats.
     ///
     /// Projected schema can be a subset of or equal to the file schema, when it is None,
     /// full file schema is assumed.
diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs
index 4924dcc6f35a..fbb172d3b3c2 100644
--- a/parquet/src/file/serialized_reader.rs
+++ b/parquet/src/file/serialized_reader.rs
@@ -19,7 +19,6 @@
 //! Also contains implementations of the ChunkReader for files (with buffering) and byte arrays (RAM)
 
 use std::collections::VecDeque;
-use std::io::Cursor;
 use std::iter;
 use std::{convert::TryFrom, fs::File, io::Read, path::Path, sync::Arc};
 
@@ -40,8 +39,9 @@ use crate::format::{PageHeader, PageLocation, PageType};
 use crate::record::reader::RowIter;
 use crate::record::Row;
 use crate::schema::types::Type as SchemaType;
-use crate::util::memory::ByteBufferPtr;
-use thrift::protocol::{TCompactInputProtocol, TSerializable};
+use crate::thrift::{TCompactSliceInputProtocol, TSerializable};
+use bytes::Bytes;
+use thrift::protocol::TCompactInputProtocol;
 
 impl TryFrom<File> for SerializedFileReader<File> {
     type Error = ParquetError;
@@ -213,10 +213,8 @@ impl<R: 'static + ChunkReader> SerializedFileReader<R> {
             let mut offset_indexes = vec![];
 
             for rg in &mut filtered_row_groups {
-                let column_index =
-                    index_reader::read_columns_indexes(&chunk_reader, rg.columns())?;
-                let offset_index =
-                    index_reader::read_pages_locations(&chunk_reader, rg.columns())?;
+                let column_index = index_reader::read_columns_indexes(&chunk_reader, rg.columns())?;
+                let offset_index = index_reader::read_pages_locations(&chunk_reader, rg.columns())?;
                 columns_indexes.push(column_index);
                 offset_indexes.push(offset_index);
             }
@@ -388,7 +386,7 @@ fn read_page_header_len<T: Read>(input: &mut T) -> Result<(usize, PageHeader)> {
 /// Decodes a [`Page`] from the provided `buffer`
 pub(crate) fn decode_page(
     page_header: PageHeader,
-    buffer: ByteBufferPtr,
+    buffer: Bytes,
     physical_type: Type,
     decompressor: Option<&mut Box<dyn Codec>>,
 ) -> Result<Page> {
@@ -402,8 +400,8 @@ pub(crate) fn decode_page(
     let mut can_decompress = true;
 
     if let Some(ref header_v2) = page_header.data_page_header_v2 {
-        offset = (header_v2.definition_levels_byte_length
-            + header_v2.repetition_levels_byte_length) as usize;
+        offset = (header_v2.definition_levels_byte_length + header_v2.repetition_levels_byte_length)
+            as usize;
         // When is_compressed flag is missing the page is considered compressed
         can_decompress = header_v2.is_compressed.unwrap_or(true);
     }
@@ -430,17 +428,16 @@ pub(crate) fn decode_page(
                 ));
             }
 
-            ByteBufferPtr::new(decompressed)
+            Bytes::from(decompressed)
         }
         _ => buffer,
     };
 
     let result = match page_header.type_ {
         PageType::DICTIONARY_PAGE => {
-            let dict_header =
-                page_header.dictionary_page_header.as_ref().ok_or_else(|| {
-                    ParquetError::General("Missing dictionary page header".to_string())
-                })?;
+            let dict_header = page_header.dictionary_page_header.as_ref().ok_or_else(|| {
+                ParquetError::General("Missing dictionary page header".to_string())
+            })?;
             let is_sorted = dict_header.is_sorted.unwrap_or(false);
             Page::DictionaryPage {
                 buf: buffer,
@@ -450,9 +447,9 @@ pub(crate) fn decode_page(
             }
         }
         PageType::DATA_PAGE => {
-            let header = page_header.data_page_header.ok_or_else(|| {
-                ParquetError::General("Missing V1 data page header".to_string())
-            })?;
+            let header = page_header
+                .data_page_header
+                .ok_or_else(|| ParquetError::General("Missing V1 data page header".to_string()))?;
             Page::DataPage {
                 buf: buffer,
                 num_values: header.num_values as u32,
@@ -463,9 +460,9 @@ pub(crate) fn decode_page(
             }
         }
         PageType::DATA_PAGE_V2 => {
-            let header = page_header.data_page_header_v2.ok_or_else(|| {
-                ParquetError::General("Missing V2 data page header".to_string())
-            })?;
+            let header = page_header
+                .data_page_header_v2
+                .ok_or_else(|| ParquetError::General("Missing V2 data page header".to_string()))?;
             let is_compressed = header.is_compressed.unwrap_or(true);
             Page::DataPageV2 {
                 buf: buffer,
@@ -532,13 +529,7 @@ impl<R: ChunkReader> SerializedPageReader<R> {
         page_locations: Option<Vec<PageLocation>>,
     ) -> Result<Self> {
         let props = Arc::new(ReaderProperties::builder().build());
-        SerializedPageReader::new_with_properties(
-            reader,
-            meta,
-            total_rows,
-            page_locations,
-            props,
-        )
+        SerializedPageReader::new_with_properties(reader, meta, total_rows, page_locations, props)
     }
 
     /// Creates a new serialized page with custom options.
@@ -555,14 +546,11 @@ impl<R: ChunkReader> SerializedPageReader<R> {
         let state = match page_locations {
             Some(locations) => {
                 let dictionary_page = match locations.first() {
-                    Some(dict_offset) if dict_offset.offset as u64 != start => {
-                        Some(PageLocation {
-                            offset: start as i64,
-                            compressed_page_size: (dict_offset.offset as u64 - start)
-                                as i32,
-                            first_row_index: 0,
-                        })
-                    }
+                    Some(dict_offset) if dict_offset.offset as u64 != start => Some(PageLocation {
+                        offset: start as i64,
+                        compressed_page_size: (dict_offset.offset as u64 - start) as i32,
+                        first_row_index: 0,
+                    }),
                     _ => None,
                 };
 
@@ -639,7 +627,7 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
 
                     decode_page(
                         header,
-                        ByteBufferPtr::new(buffer),
+                        Bytes::from(buffer),
                         self.physical_type,
                         self.decompressor.as_mut(),
                     )?
@@ -661,14 +649,14 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
 
                     let buffer = self.reader.get_bytes(front.offset as u64, page_len)?;
 
-                    let mut cursor = Cursor::new(buffer.as_ref());
-                    let header = read_page_header(&mut cursor)?;
-                    let offset = cursor.position();
+                    let mut prot = TCompactSliceInputProtocol::new(buffer.as_ref());
+                    let header = PageHeader::read_from_in_protocol(&mut prot)?;
+                    let offset = buffer.len() - prot.as_slice().len();
 
-                    let bytes = buffer.slice(offset as usize..);
+                    let bytes = buffer.slice(offset..);
                     decode_page(
                         header,
-                        bytes.into(),
+                        bytes,
                         self.physical_type,
                         self.decompressor.as_mut(),
                     )?
@@ -770,6 +758,13 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
             }
         }
     }
+
+    fn at_record_boundary(&mut self) -> Result<bool> {
+        match &mut self.state {
+            SerializedPageReaderState::Values { .. } => Ok(self.peek_next_page()?.is_none()),
+            SerializedPageReaderState::Pages { .. } => Ok(true),
+        }
+    }
 }
 
 #[cfg(test)]
@@ -780,12 +775,11 @@ mod tests {
     use crate::format::BoundaryOrder;
 
     use crate::basic::{self, ColumnOrder};
+    use crate::column::reader::ColumnReader;
     use crate::data_type::private::ParquetValueType;
     use crate::data_type::{AsBytes, FixedLenByteArrayType};
     use crate::file::page_index::index::{Index, NativeIndex};
-    use crate::file::page_index::index_reader::{
-        read_columns_indexes, read_pages_locations,
-    };
+    use crate::file::page_index::index_reader::{read_columns_indexes, read_pages_locations};
     use crate::file::writer::SerializedFileWriter;
     use crate::record::RowAccessor;
     use crate::schema::parser::parse_message_type;
@@ -1147,8 +1141,7 @@ mod tests {
         assert_eq!(col0_metadata.bloom_filter_offset().unwrap(), 192);
 
         // test page encoding stats
-        let page_encoding_stats =
-            col0_metadata.page_encoding_stats().unwrap().get(0).unwrap();
+        let page_encoding_stats = col0_metadata.page_encoding_stats().unwrap().get(0).unwrap();
 
         assert_eq!(page_encoding_stats.page_type, basic::PageType::DATA_PAGE);
         assert_eq!(page_encoding_stats.encoding, Encoding::PLAIN);
@@ -1539,10 +1532,7 @@ mod tests {
         });
     }
 
-    fn get_row_group_min_max_bytes(
-        r: &RowGroupMetaData,
-        col_num: usize,
-    ) -> (&[u8], &[u8]) {
+    fn get_row_group_min_max_bytes(r: &RowGroupMetaData, col_num: usize) -> (&[u8], &[u8]) {
         let statistics = r.column(col_num).statistics().unwrap();
         (statistics.min_bytes(), statistics.max_bytes())
     }
@@ -1704,8 +1694,7 @@ mod tests {
         let schema = parse_message_type(message_type).unwrap();
         let mut out = Vec::with_capacity(1024);
         let mut writer =
-            SerializedFileWriter::new(&mut out, Arc::new(schema), Default::default())
-                .unwrap();
+            SerializedFileWriter::new(&mut out, Arc::new(schema), Default::default()).unwrap();
 
         let mut r = writer.next_row_group().unwrap();
         let mut c = r.next_column().unwrap().unwrap();
@@ -1742,4 +1731,28 @@ mod tests {
             _ => unreachable!(),
         }
     }
+
+    #[test]
+    fn test_multi_gz() {
+        let file = get_test_file("concatenated_gzip_members.parquet");
+        let reader = SerializedFileReader::new(file).unwrap();
+        let row_group_reader = reader.get_row_group(0).unwrap();
+        match row_group_reader.get_column_reader(0).unwrap() {
+            ColumnReader::Int64ColumnReader(mut reader) => {
+                let mut buffer = [0; 1024];
+                let mut def_levels = [0; 1024];
+                let (num_records, num_values, num_levels) = reader
+                    .read_records(1024, Some(&mut def_levels), None, &mut buffer)
+                    .unwrap();
+
+                assert_eq!(num_records, 513);
+                assert_eq!(num_values, 513);
+                assert_eq!(num_levels, 513);
+
+                let expected: Vec<i64> = (1..514).collect();
+                assert_eq!(&buffer[..513], &expected);
+            }
+            _ => unreachable!(),
+        }
+    }
 }
diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs
index b36e37a80c97..1bc003d48854 100644
--- a/parquet/src/file/statistics.rs
+++ b/parquet/src/file/statistics.rs
@@ -27,6 +27,8 @@
 //! assert_eq!(stats.null_count(), 3);
 //! assert!(stats.has_min_max_set());
 //! assert!(stats.is_min_max_deprecated());
+//! assert!(stats.min_is_exact());
+//! assert!(stats.max_is_exact());
 //!
 //! match stats {
 //!     Statistics::Int32(ref typed) => {
@@ -206,19 +208,27 @@ pub fn from_thrift(
                     null_count,
                     old_format,
                 ),
-                Type::BYTE_ARRAY => Statistics::byte_array(
-                    min.map(ByteArray::from),
-                    max.map(ByteArray::from),
-                    distinct_count,
-                    null_count,
-                    old_format,
+                Type::BYTE_ARRAY => Statistics::ByteArray(
+                    ValueStatistics::new(
+                        min.map(ByteArray::from),
+                        max.map(ByteArray::from),
+                        distinct_count,
+                        null_count,
+                        old_format,
+                    )
+                    .with_max_is_exact(stats.is_max_value_exact.unwrap_or(false))
+                    .with_min_is_exact(stats.is_min_value_exact.unwrap_or(false)),
                 ),
-                Type::FIXED_LEN_BYTE_ARRAY => Statistics::fixed_len_byte_array(
-                    min.map(ByteArray::from).map(FixedLenByteArray::from),
-                    max.map(ByteArray::from).map(FixedLenByteArray::from),
-                    distinct_count,
-                    null_count,
-                    old_format,
+                Type::FIXED_LEN_BYTE_ARRAY => Statistics::FixedLenByteArray(
+                    ValueStatistics::new(
+                        min.map(ByteArray::from).map(FixedLenByteArray::from),
+                        max.map(ByteArray::from).map(FixedLenByteArray::from),
+                        distinct_count,
+                        null_count,
+                        old_format,
+                    )
+                    .with_max_is_exact(stats.is_max_value_exact.unwrap_or(false))
+                    .with_min_is_exact(stats.is_min_value_exact.unwrap_or(false)),
                 ),
             };
 
@@ -243,16 +253,20 @@ pub fn to_thrift(stats: Option<&Statistics>) -> Option<TStatistics> {
         distinct_count: stats.distinct_count().map(|value| value as i64),
         max_value: None,
         min_value: None,
+        is_max_value_exact: None,
+        is_min_value_exact: None,
     };
 
     // Get min/max if set.
-    let (min, max) = if stats.has_min_max_set() {
+    let (min, max, min_exact, max_exact) = if stats.has_min_max_set() {
         (
             Some(stats.min_bytes().to_vec()),
             Some(stats.max_bytes().to_vec()),
+            Some(stats.min_is_exact()),
+            Some(stats.max_is_exact()),
         )
     } else {
-        (None, None)
+        (None, None, None, None)
     };
 
     if stats.is_min_max_backwards_compatible() {
@@ -266,6 +280,9 @@ pub fn to_thrift(stats: Option<&Statistics>) -> Option<TStatistics> {
         thrift_stats.max_value = max;
     }
 
+    thrift_stats.is_min_value_exact = min_exact;
+    thrift_stats.is_max_value_exact = max_exact;
+
     Some(thrift_stats)
 }
 
@@ -372,6 +389,16 @@ impl Statistics {
         statistics_enum_func![self, has_min_max_set]
     }
 
+    /// Returns `true` if the min value is set, and is an exact min value.
+    pub fn min_is_exact(&self) -> bool {
+        statistics_enum_func![self, min_is_exact]
+    }
+
+    /// Returns `true` if the max value is set, and is an exact max value.
+    pub fn max_is_exact(&self) -> bool {
+        statistics_enum_func![self, max_is_exact]
+    }
+
     /// Returns slice of bytes that represent min value.
     /// Panics if min value is not set.
     pub fn min_bytes(&self) -> &[u8] {
@@ -426,6 +453,10 @@ pub struct ValueStatistics<T> {
     distinct_count: Option<u64>,
     null_count: u64,
 
+    // Whether or not the min or max values are exact, or truncated.
+    is_max_value_exact: bool,
+    is_min_value_exact: bool,
+
     /// If `true` populate the deprecated `min` and `max` fields instead of
     /// `min_value` and `max_value`
     is_min_max_deprecated: bool,
@@ -445,6 +476,8 @@ impl<T: ParquetValueType> ValueStatistics<T> {
         is_min_max_deprecated: bool,
     ) -> Self {
         Self {
+            is_max_value_exact: max.is_some(),
+            is_min_value_exact: min.is_some(),
             min,
             max,
             distinct_count,
@@ -454,6 +487,28 @@ impl<T: ParquetValueType> ValueStatistics<T> {
         }
     }
 
+    /// Set whether the stored `min` field represents the exact
+    /// minimum, or just a bound on the minimum value.
+    ///
+    /// see [`Self::min_is_exact`]
+    pub fn with_min_is_exact(self, is_min_value_exact: bool) -> Self {
+        Self {
+            is_min_value_exact,
+            ..self
+        }
+    }
+
+    /// Set whether the stored `max` field represents the exact
+    /// maximum, or just a bound on the maximum value.
+    ///
+    /// see [`Self::max_is_exact`]
+    pub fn with_max_is_exact(self, is_max_value_exact: bool) -> Self {
+        Self {
+            is_max_value_exact,
+            ..self
+        }
+    }
+
     /// Set whether to write the deprecated `min` and `max` fields
     /// for compatibility with older parquet writers
     ///
@@ -504,13 +559,23 @@ impl<T: ParquetValueType> ValueStatistics<T> {
         self.min.is_some() && self.max.is_some()
     }
 
+    /// Whether or not max value is set, and is an exact value.
+    pub fn max_is_exact(&self) -> bool {
+        self.max.is_some() && self.is_max_value_exact
+    }
+
+    /// Whether or not min value is set, and is an exact value.
+    pub fn min_is_exact(&self) -> bool {
+        self.min.is_some() && self.is_min_value_exact
+    }
+
     /// Returns optional value of number of distinct values occurring.
-    fn distinct_count(&self) -> Option<u64> {
+    pub fn distinct_count(&self) -> Option<u64> {
         self.distinct_count
     }
 
     /// Returns null count.
-    fn null_count(&self) -> u64 {
+    pub fn null_count(&self) -> u64 {
         self.null_count
     }
 
@@ -554,6 +619,8 @@ impl<T: ParquetValueType> fmt::Display for ValueStatistics<T> {
         }
         write!(f, ", null_count: {}", self.null_count)?;
         write!(f, ", min_max_deprecated: {}", self.is_min_max_deprecated)?;
+        write!(f, ", max_value_exact: {}", self.is_max_value_exact)?;
+        write!(f, ", min_value_exact: {}", self.is_min_value_exact)?;
         write!(f, "}}")
     }
 }
@@ -563,13 +630,15 @@ impl<T: ParquetValueType> fmt::Debug for ValueStatistics<T> {
         write!(
             f,
             "{{min: {:?}, max: {:?}, distinct_count: {:?}, null_count: {}, \
-             min_max_deprecated: {}, min_max_backwards_compatible: {}}}",
+             min_max_deprecated: {}, min_max_backwards_compatible: {}, max_value_exact: {}, min_value_exact: {}}}",
             self.min,
             self.max,
             self.distinct_count,
             self.null_count,
             self.is_min_max_deprecated,
-            self.is_min_max_backwards_compatible
+            self.is_min_max_backwards_compatible,
+            self.is_max_value_exact,
+            self.is_min_value_exact
         )
     }
 }
@@ -607,6 +676,8 @@ mod tests {
             distinct_count: None,
             max_value: None,
             min_value: None,
+            is_max_value_exact: None,
+            is_min_value_exact: None,
         };
 
         from_thrift(Type::INT32, Some(thrift_stats)).unwrap();
@@ -624,14 +695,14 @@ mod tests {
         assert_eq!(
             format!("{stats:?}"),
             "Int32({min: Some(1), max: Some(12), distinct_count: None, null_count: 12, \
-             min_max_deprecated: true, min_max_backwards_compatible: true})"
+             min_max_deprecated: true, min_max_backwards_compatible: true, max_value_exact: true, min_value_exact: true})"
         );
 
         let stats = Statistics::int32(None, None, None, 7, false);
         assert_eq!(
             format!("{stats:?}"),
             "Int32({min: None, max: None, distinct_count: None, null_count: 7, \
-             min_max_deprecated: false, min_max_backwards_compatible: false})"
+             min_max_deprecated: false, min_max_backwards_compatible: false, max_value_exact: false, min_value_exact: false})"
         )
     }
 
@@ -640,14 +711,14 @@ mod tests {
         let stats = Statistics::int32(Some(1), Some(12), None, 12, true);
         assert_eq!(
             format!("{stats}"),
-            "{min: 1, max: 12, distinct_count: N/A, null_count: 12, min_max_deprecated: true}"
+            "{min: 1, max: 12, distinct_count: N/A, null_count: 12, min_max_deprecated: true, max_value_exact: true, min_value_exact: true}"
         );
 
         let stats = Statistics::int64(None, None, None, 7, false);
         assert_eq!(
             format!("{stats}"),
             "{min: N/A, max: N/A, distinct_count: N/A, null_count: 7, min_max_deprecated: \
-             false}"
+             false, max_value_exact: false, min_value_exact: false}"
         );
 
         let stats = Statistics::int96(
@@ -660,19 +731,23 @@ mod tests {
         assert_eq!(
             format!("{stats}"),
             "{min: [1, 0, 0], max: [2, 3, 4], distinct_count: N/A, null_count: 3, \
-             min_max_deprecated: true}"
+             min_max_deprecated: true, max_value_exact: true, min_value_exact: true}"
         );
 
-        let stats = Statistics::byte_array(
-            Some(ByteArray::from(vec![1u8])),
-            Some(ByteArray::from(vec![2u8])),
-            Some(5),
-            7,
-            false,
+        let stats = Statistics::ByteArray(
+            ValueStatistics::new(
+                Some(ByteArray::from(vec![1u8])),
+                Some(ByteArray::from(vec![2u8])),
+                Some(5),
+                7,
+                false,
+            )
+            .with_max_is_exact(false)
+            .with_min_is_exact(false),
         );
         assert_eq!(
             format!("{stats}"),
-            "{min: [1], max: [2], distinct_count: 5, null_count: 7, min_max_deprecated: false}"
+            "{min: [1], max: [2], distinct_count: 5, null_count: 7, min_max_deprecated: false, max_value_exact: false, min_value_exact: false}"
         );
     }
 
@@ -708,7 +783,45 @@ mod tests {
                 Some(ByteArray::from(vec![1, 2, 3]).into()),
                 None,
                 0,
-                true
+                true,
+            )
+        );
+
+        assert!(
+            Statistics::byte_array(
+                Some(ByteArray::from(vec![1, 2, 3])),
+                Some(ByteArray::from(vec![1, 2, 3])),
+                None,
+                0,
+                true,
+            ) != Statistics::ByteArray(
+                ValueStatistics::new(
+                    Some(ByteArray::from(vec![1, 2, 3])),
+                    Some(ByteArray::from(vec![1, 2, 3])),
+                    None,
+                    0,
+                    true,
+                )
+                .with_max_is_exact(false)
+            )
+        );
+
+        assert!(
+            Statistics::fixed_len_byte_array(
+                Some(FixedLenByteArray::from(vec![1, 2, 3])),
+                Some(FixedLenByteArray::from(vec![1, 2, 3])),
+                None,
+                0,
+                true,
+            ) != Statistics::FixedLenByteArray(
+                ValueStatistics::new(
+                    Some(FixedLenByteArray::from(vec![1, 2, 3])),
+                    Some(FixedLenByteArray::from(vec![1, 2, 3])),
+                    None,
+                    0,
+                    true,
+                )
+                .with_min_is_exact(false)
             )
         );
     }
diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index cafb1761352d..2b9f261d9f42 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -21,14 +21,13 @@
 use crate::bloom_filter::Sbbf;
 use crate::format as parquet;
 use crate::format::{ColumnIndex, OffsetIndex, RowGroup};
+use crate::thrift::TSerializable;
 use std::fmt::Debug;
 use std::io::{BufWriter, IoSlice, Read};
 use std::{io::Write, sync::Arc};
-use thrift::protocol::{TCompactOutputProtocol, TSerializable};
+use thrift::protocol::TCompactOutputProtocol;
 
-use crate::column::writer::{
-    get_typed_column_writer_mut, ColumnCloseResult, ColumnWriterImpl,
-};
+use crate::column::writer::{get_typed_column_writer_mut, ColumnCloseResult, ColumnWriterImpl};
 use crate::column::{
     page::{CompressedPage, PageWriteSpec, PageWriter},
     writer::{get_column_writer, ColumnWriter},
@@ -37,9 +36,7 @@ use crate::data_type::DataType;
 use crate::errors::{ParquetError, Result};
 use crate::file::reader::ChunkReader;
 use crate::file::{metadata::*, properties::WriterPropertiesPtr, PARQUET_MAGIC};
-use crate::schema::types::{
-    self, ColumnDescPtr, SchemaDescPtr, SchemaDescriptor, TypePtr,
-};
+use crate::schema::types::{self, ColumnDescPtr, SchemaDescPtr, SchemaDescriptor, TypePtr};
 
 /// A wrapper around a [`Write`] that keeps track of the number
 /// of bytes that have been written. The given [`Write`] is wrapped
@@ -67,10 +64,7 @@ impl<W: Write> TrackedWrite<W> {
     /// Returns the underlying writer.
     pub fn into_inner(self) -> Result<W> {
         self.inner.into_inner().map_err(|err| {
-            ParquetError::General(format!(
-                "fail to get inner writer: {:?}",
-                err.to_string()
-            ))
+            ParquetError::General(format!("fail to get inner writer: {:?}", err.to_string()))
         })
     }
 }
@@ -115,7 +109,8 @@ pub type OnCloseRowGroup<'a> = Box<
             Vec<Option<ColumnIndex>>,
             Vec<Option<OffsetIndex>>,
         ) -> Result<()>
-        + 'a,
+        + 'a
+        + Send,
 >;
 
 // ----------------------------------------------------------------------
@@ -191,16 +186,14 @@ impl<W: Write + Send> SerializedFileWriter<W> {
         let row_bloom_filters = &mut self.bloom_filters;
         let row_column_indexes = &mut self.column_indexes;
         let row_offset_indexes = &mut self.offset_indexes;
-        let on_close = |metadata,
-                        row_group_bloom_filter,
-                        row_group_column_index,
-                        row_group_offset_index| {
-            row_groups.push(metadata);
-            row_bloom_filters.push(row_group_bloom_filter);
-            row_column_indexes.push(row_group_column_index);
-            row_offset_indexes.push(row_group_offset_index);
-            Ok(())
-        };
+        let on_close =
+            |metadata, row_group_bloom_filter, row_group_column_index, row_group_offset_index| {
+                row_groups.push(metadata);
+                row_bloom_filters.push(row_group_bloom_filter);
+                row_column_indexes.push(row_group_column_index);
+                row_offset_indexes.push(row_group_offset_index);
+                Ok(())
+            };
 
         let row_group_writer = SerializedRowGroupWriter::new(
             self.descr.clone(),
@@ -236,8 +229,7 @@ impl<W: Write + Send> SerializedFileWriter<W> {
         // iter each column
         // write offset index to the file
         for (row_group_idx, row_group) in row_groups.iter_mut().enumerate() {
-            for (column_idx, column_metadata) in row_group.columns.iter_mut().enumerate()
-            {
+            for (column_idx, column_metadata) in row_group.columns.iter_mut().enumerate() {
                 match &self.offset_indexes[row_group_idx][column_idx] {
                     Some(offset_index) => {
                         let start_offset = self.buf.bytes_written();
@@ -267,12 +259,15 @@ impl<W: Write + Send> SerializedFileWriter<W> {
                     Some(bloom_filter) => {
                         let start_offset = self.buf.bytes_written();
                         bloom_filter.write(&mut self.buf)?;
+                        let end_offset = self.buf.bytes_written();
                         // set offset and index for bloom filter
-                        column_chunk
+                        let column_chunk_meta = column_chunk
                             .meta_data
                             .as_mut()
-                            .expect("can't have bloom filter without column metadata")
-                            .bloom_filter_offset = Some(start_offset as i64);
+                            .expect("can't have bloom filter without column metadata");
+                        column_chunk_meta.bloom_filter_offset = Some(start_offset as i64);
+                        column_chunk_meta.bloom_filter_length =
+                            Some((end_offset - start_offset) as i32);
                     }
                     None => {}
                 }
@@ -287,8 +282,7 @@ impl<W: Write + Send> SerializedFileWriter<W> {
         // iter each column
         // write column index to the file
         for (row_group_idx, row_group) in row_groups.iter_mut().enumerate() {
-            for (column_idx, column_metadata) in row_group.columns.iter_mut().enumerate()
-            {
+            for (column_idx, column_metadata) in row_group.columns.iter_mut().enumerate() {
                 match &self.column_indexes[row_group_idx][column_idx] {
                     Some(column_index) => {
                         let start_offset = self.buf.bytes_written();
@@ -499,10 +493,7 @@ impl<'a, W: Write + Send> SerializedRowGroupWriter<'a, W> {
 
     /// Returns the next column writer, if available, using the factory function;
     /// otherwise returns `None`.
-    pub(crate) fn next_column_with_factory<'b, F, C>(
-        &'b mut self,
-        factory: F,
-    ) -> Result<Option<C>>
+    pub(crate) fn next_column_with_factory<'b, F, C>(&'b mut self, factory: F) -> Result<Option<C>>
     where
         F: FnOnce(
             ColumnDescPtr,
@@ -545,9 +536,9 @@ impl<'a, W: Write + Send> SerializedRowGroupWriter<'a, W> {
         mut close: ColumnCloseResult,
     ) -> Result<()> {
         self.assert_previous_writer_closed()?;
-        let desc = self.next_column_desc().ok_or_else(|| {
-            general_err!("exhausted columns in SerializedRowGroupWriter")
-        })?;
+        let desc = self
+            .next_column_desc()
+            .ok_or_else(|| general_err!("exhausted columns in SerializedRowGroupWriter"))?;
 
         let metadata = close.metadata;
 
@@ -654,10 +645,7 @@ pub struct SerializedColumnWriter<'a> {
 impl<'a> SerializedColumnWriter<'a> {
     /// Create a new [`SerializedColumnWriter`] from a [`ColumnWriter`] and an
     /// optional callback to be invoked on [`Self::close`]
-    pub fn new(
-        inner: ColumnWriter<'a>,
-        on_close: Option<OnCloseColumnChunk<'a>>,
-    ) -> Self {
+    pub fn new(inner: ColumnWriter<'a>, on_close: Option<OnCloseColumnChunk<'a>>) -> Self {
         Self { inner, on_close }
     }
 
@@ -768,7 +756,6 @@ mod tests {
     use crate::record::{Row, RowAccessor};
     use crate::schema::parser::parse_message_type;
     use crate::schema::types::{ColumnDescriptor, ColumnPath};
-    use crate::util::memory::ByteBufferPtr;
 
     #[test]
     fn test_row_group_writer_error_not_all_columns_written() {
@@ -857,8 +844,7 @@ mod tests {
                 .unwrap(),
         );
         let props = Default::default();
-        let writer =
-            SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap();
+        let writer = SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap();
         writer.close().unwrap();
 
         let reader = SerializedFileReader::new(file).unwrap();
@@ -887,8 +873,7 @@ mod tests {
                 )]))
                 .build(),
         );
-        let writer =
-            SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap();
+        let writer = SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap();
         writer.close().unwrap();
 
         let reader = SerializedFileReader::new(file).unwrap();
@@ -933,8 +918,7 @@ mod tests {
                 .set_writer_version(WriterVersion::PARQUET_2_0)
                 .build(),
         );
-        let writer =
-            SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap();
+        let writer = SerializedFileWriter::new(file.try_clone().unwrap(), schema, props).unwrap();
         writer.close().unwrap();
 
         let reader = SerializedFileReader::new(file).unwrap();
@@ -1055,7 +1039,7 @@ mod tests {
     fn test_page_writer_data_pages() {
         let pages = vec![
             Page::DataPage {
-                buf: ByteBufferPtr::new(vec![1, 2, 3, 4, 5, 6, 7, 8]),
+                buf: Bytes::from(vec![1, 2, 3, 4, 5, 6, 7, 8]),
                 num_values: 10,
                 encoding: Encoding::DELTA_BINARY_PACKED,
                 def_level_encoding: Encoding::RLE,
@@ -1063,7 +1047,7 @@ mod tests {
                 statistics: Some(Statistics::int32(Some(1), Some(3), None, 7, true)),
             },
             Page::DataPageV2 {
-                buf: ByteBufferPtr::new(vec![4; 128]),
+                buf: Bytes::from(vec![4; 128]),
                 num_values: 10,
                 encoding: Encoding::DELTA_BINARY_PACKED,
                 num_nulls: 2,
@@ -1083,13 +1067,13 @@ mod tests {
     fn test_page_writer_dict_pages() {
         let pages = vec![
             Page::DictionaryPage {
-                buf: ByteBufferPtr::new(vec![1, 2, 3, 4, 5]),
+                buf: Bytes::from(vec![1, 2, 3, 4, 5]),
                 num_values: 5,
                 encoding: Encoding::RLE_DICTIONARY,
                 is_sorted: false,
             },
             Page::DataPage {
-                buf: ByteBufferPtr::new(vec![1, 2, 3, 4, 5, 6, 7, 8]),
+                buf: Bytes::from(vec![1, 2, 3, 4, 5, 6, 7, 8]),
                 num_values: 10,
                 encoding: Encoding::DELTA_BINARY_PACKED,
                 def_level_encoding: Encoding::RLE,
@@ -1097,7 +1081,7 @@ mod tests {
                 statistics: Some(Statistics::int32(Some(1), Some(3), None, 7, true)),
             },
             Page::DataPageV2 {
-                buf: ByteBufferPtr::new(vec![4; 128]),
+                buf: Bytes::from(vec![4; 128]),
                 num_values: 10,
                 encoding: Encoding::DELTA_BINARY_PACKED,
                 num_nulls: 2,
@@ -1137,19 +1121,16 @@ mod tests {
                     ref statistics,
                 } => {
                     total_num_values += num_values as i64;
-                    let output_buf = compress_helper(compressor.as_mut(), buf.data());
+                    let output_buf = compress_helper(compressor.as_mut(), buf);
 
                     Page::DataPage {
-                        buf: ByteBufferPtr::new(output_buf),
+                        buf: Bytes::from(output_buf),
                         num_values,
                         encoding,
                         def_level_encoding,
                         rep_level_encoding,
-                        statistics: from_thrift(
-                            physical_type,
-                            to_thrift(statistics.as_ref()),
-                        )
-                        .unwrap(),
+                        statistics: from_thrift(physical_type, to_thrift(statistics.as_ref()))
+                            .unwrap(),
                     }
                 }
                 Page::DataPageV2 {
@@ -1165,13 +1146,12 @@ mod tests {
                 } => {
                     total_num_values += num_values as i64;
                     let offset = (def_levels_byte_len + rep_levels_byte_len) as usize;
-                    let cmp_buf =
-                        compress_helper(compressor.as_mut(), &buf.data()[offset..]);
-                    let mut output_buf = Vec::from(&buf.data()[..offset]);
+                    let cmp_buf = compress_helper(compressor.as_mut(), &buf[offset..]);
+                    let mut output_buf = Vec::from(&buf[..offset]);
                     output_buf.extend_from_slice(&cmp_buf[..]);
 
                     Page::DataPageV2 {
-                        buf: ByteBufferPtr::new(output_buf),
+                        buf: Bytes::from(output_buf),
                         num_values,
                         encoding,
                         num_nulls,
@@ -1179,11 +1159,8 @@ mod tests {
                         def_levels_byte_len,
                         rep_levels_byte_len,
                         is_compressed: compressor.is_some(),
-                        statistics: from_thrift(
-                            physical_type,
-                            to_thrift(statistics.as_ref()),
-                        )
-                        .unwrap(),
+                        statistics: from_thrift(physical_type, to_thrift(statistics.as_ref()))
+                            .unwrap(),
                     }
                 }
                 Page::DictionaryPage {
@@ -1192,10 +1169,10 @@ mod tests {
                     encoding,
                     is_sorted,
                 } => {
-                    let output_buf = compress_helper(compressor.as_mut(), buf.data());
+                    let output_buf = compress_helper(compressor.as_mut(), buf);
 
                     Page::DictionaryPage {
-                        buf: ByteBufferPtr::new(output_buf),
+                        buf: Bytes::from(output_buf),
                         num_values,
                         encoding,
                         is_sorted,
@@ -1270,7 +1247,7 @@ mod tests {
     /// Check if pages match.
     fn assert_page(left: &Page, right: &Page) {
         assert_eq!(left.page_type(), right.page_type());
-        assert_eq!(left.buffer().data(), right.buffer().data());
+        assert_eq!(&left.buffer(), &right.buffer());
         assert_eq!(left.num_values(), right.num_values());
         assert_eq!(left.encoding(), right.encoding());
         assert_eq!(to_thrift(left.statistics()), to_thrift(right.statistics()));
@@ -1286,12 +1263,7 @@ mod tests {
         W: Write + Send,
         R: ChunkReader + From<W> + 'static,
     {
-        test_roundtrip::<W, R, Int32Type, _>(
-            file,
-            data,
-            |r| r.get_int(0).unwrap(),
-            compression,
-        )
+        test_roundtrip::<W, R, Int32Type, _>(file, data, |r| r.get_int(0).unwrap(), compression)
     }
 
     /// Tests roundtrip of data of type `D` written using `W` and read using `R`
@@ -1324,8 +1296,7 @@ mod tests {
                 .set_compression(compression)
                 .build(),
         );
-        let mut file_writer =
-            SerializedFileWriter::new(&mut file, schema, props).unwrap();
+        let mut file_writer = SerializedFileWriter::new(&mut file, schema, props).unwrap();
         let mut rows: i64 = 0;
 
         for (idx, subset) in data.iter().enumerate() {
@@ -1373,10 +1344,7 @@ mod tests {
 
     /// File write-read roundtrip.
     /// `data` consists of arrays of values for each row group.
-    fn test_file_roundtrip(
-        file: File,
-        data: Vec<Vec<i32>>,
-    ) -> crate::format::FileMetaData {
+    fn test_file_roundtrip(file: File, data: Vec<Vec<i32>>) -> crate::format::FileMetaData {
         test_roundtrip_i32::<File, File>(file, data, Compression::UNCOMPRESSED)
     }
 
@@ -1462,10 +1430,7 @@ mod tests {
         });
     }
 
-    fn test_kv_metadata(
-        initial_kv: Option<Vec<KeyValue>>,
-        final_kv: Option<Vec<KeyValue>>,
-    ) {
+    fn test_kv_metadata(initial_kv: Option<Vec<KeyValue>>, final_kv: Option<Vec<KeyValue>>) {
         let schema = Arc::new(
             types::Type::group_type_builder("schema")
                 .with_fields(vec![Arc::new(
@@ -1594,8 +1559,7 @@ mod tests {
         let props = Arc::new(WriterProperties::builder().build());
 
         let mut file = Vec::with_capacity(1024);
-        let mut file_writer =
-            SerializedFileWriter::new(&mut file, schema, props.clone()).unwrap();
+        let mut file_writer = SerializedFileWriter::new(&mut file, schema, props.clone()).unwrap();
 
         let columns = file_writer.descr.columns();
         let mut column_state: Vec<(_, Option<ColumnCloseResult>)> = columns
@@ -1710,8 +1674,7 @@ mod tests {
         assert!(row_group.columns[1].column_index_offset.is_none());
 
         let options = ReadOptionsBuilder::new().with_page_index().build();
-        let reader =
-            SerializedFileReader::new_with_options(Bytes::from(file), options).unwrap();
+        let reader = SerializedFileReader::new_with_options(Bytes::from(file), options).unwrap();
 
         let offset_index = reader.metadata().offset_index().unwrap();
         assert_eq!(offset_index.len(), 1); // 1 row group
diff --git a/parquet/src/format.rs b/parquet/src/format.rs
index 0851b2287fba..4700b05dc282 100644
--- a/parquet/src/format.rs
+++ b/parquet/src/format.rs
@@ -1,9 +1,10 @@
-// Autogenerated by Thrift Compiler (0.17.0)
+// Autogenerated by Thrift Compiler (0.19.0)
 // DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
 
+#![allow(dead_code)]
 #![allow(unused_imports)]
 #![allow(unused_extern_crates)]
-#![allow(clippy::too_many_arguments, clippy::type_complexity, clippy::vec_box)]
+#![allow(clippy::too_many_arguments, clippy::type_complexity, clippy::vec_box, clippy::wrong_self_convention)]
 #![cfg_attr(rustfmt, rustfmt_skip)]
 
 use std::cell::RefCell;
@@ -52,12 +53,12 @@ impl Type {
   ];
 }
 
-impl TSerializable for Type {
+impl crate::thrift::TSerializable for Type {
   #[allow(clippy::trivially_copy_pass_by_ref)]
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     o_prot.write_i32(self.0)
   }
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<Type> {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<Type> {
     let enum_value = i_prot.read_i32()?;
     Ok(Type::from(enum_value))
   }
@@ -99,7 +100,7 @@ impl From<&Type> for i32 {
 
 /// DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet.
 /// ConvertedType is superseded by LogicalType.  This enum should not be extended.
-///
+/// 
 /// See LogicalTypes.md for conversion between ConvertedType and LogicalType.
 #[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct ConvertedType(pub i32);
@@ -117,12 +118,12 @@ impl ConvertedType {
   /// an enum is converted into a binary field
   pub const ENUM: ConvertedType = ConvertedType(4);
   /// A decimal value.
-  ///
+  /// 
   /// This may be used to annotate binary or fixed primitive types. The
   /// underlying byte array stores the unscaled value encoded as two's
   /// complement using big-endian byte order (the most significant byte is the
   /// zeroth element). The value of the decimal is the value * 10^{-scale}.
-  ///
+  /// 
   /// This must be accompanied by a (maximum) precision and a scale in the
   /// SchemaElement. The precision specifies the number of digits in the decimal
   /// and the scale stores the location of the decimal point. For example 1.23
@@ -130,62 +131,62 @@ impl ConvertedType {
   /// 2 digits over).
   pub const DECIMAL: ConvertedType = ConvertedType(5);
   /// A Date
-  ///
+  /// 
   /// Stored as days since Unix epoch, encoded as the INT32 physical type.
-  ///
+  /// 
   pub const DATE: ConvertedType = ConvertedType(6);
   /// A time
-  ///
+  /// 
   /// The total number of milliseconds since midnight.  The value is stored
   /// as an INT32 physical type.
   pub const TIME_MILLIS: ConvertedType = ConvertedType(7);
   /// A time.
-  ///
+  /// 
   /// The total number of microseconds since midnight.  The value is stored as
   /// an INT64 physical type.
   pub const TIME_MICROS: ConvertedType = ConvertedType(8);
   /// A date/time combination
-  ///
+  /// 
   /// Date and time recorded as milliseconds since the Unix epoch.  Recorded as
   /// a physical type of INT64.
   pub const TIMESTAMP_MILLIS: ConvertedType = ConvertedType(9);
   /// A date/time combination
-  ///
+  /// 
   /// Date and time recorded as microseconds since the Unix epoch.  The value is
   /// stored as an INT64 physical type.
   pub const TIMESTAMP_MICROS: ConvertedType = ConvertedType(10);
   /// An unsigned integer value.
-  ///
+  /// 
   /// The number describes the maximum number of meaningful data bits in
   /// the stored value. 8, 16 and 32 bit values are stored using the
   /// INT32 physical type.  64 bit values are stored using the INT64
   /// physical type.
-  ///
+  /// 
   pub const UINT_8: ConvertedType = ConvertedType(11);
   pub const UINT_16: ConvertedType = ConvertedType(12);
   pub const UINT_32: ConvertedType = ConvertedType(13);
   pub const UINT_64: ConvertedType = ConvertedType(14);
   /// A signed integer value.
-  ///
+  /// 
   /// The number describes the maximum number of meaningful data bits in
   /// the stored value. 8, 16 and 32 bit values are stored using the
   /// INT32 physical type.  64 bit values are stored using the INT64
   /// physical type.
-  ///
+  /// 
   pub const INT_8: ConvertedType = ConvertedType(15);
   pub const INT_16: ConvertedType = ConvertedType(16);
   pub const INT_32: ConvertedType = ConvertedType(17);
   pub const INT_64: ConvertedType = ConvertedType(18);
   /// An embedded JSON document
-  ///
+  /// 
   /// A JSON document embedded within a single UTF8 column.
   pub const JSON: ConvertedType = ConvertedType(19);
   /// An embedded BSON document
-  ///
+  /// 
   /// A BSON document embedded within a single BINARY column.
   pub const BSON: ConvertedType = ConvertedType(20);
   /// An interval of time
-  ///
+  /// 
   /// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12
   /// This data is composed of three separate little endian unsigned
   /// integers.  Each stores a component of a duration of time.  The first
@@ -221,12 +222,12 @@ impl ConvertedType {
   ];
 }
 
-impl TSerializable for ConvertedType {
+impl crate::thrift::TSerializable for ConvertedType {
   #[allow(clippy::trivially_copy_pass_by_ref)]
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     o_prot.write_i32(self.0)
   }
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<ConvertedType> {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<ConvertedType> {
     let enum_value = i_prot.read_i32()?;
     Ok(ConvertedType::from(enum_value))
   }
@@ -298,12 +299,12 @@ impl FieldRepetitionType {
   ];
 }
 
-impl TSerializable for FieldRepetitionType {
+impl crate::thrift::TSerializable for FieldRepetitionType {
   #[allow(clippy::trivially_copy_pass_by_ref)]
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     o_prot.write_i32(self.0)
   }
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<FieldRepetitionType> {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<FieldRepetitionType> {
     let enum_value = i_prot.read_i32()?;
     Ok(FieldRepetitionType::from(enum_value))
   }
@@ -396,12 +397,12 @@ impl Encoding {
   ];
 }
 
-impl TSerializable for Encoding {
+impl crate::thrift::TSerializable for Encoding {
   #[allow(clippy::trivially_copy_pass_by_ref)]
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     o_prot.write_i32(self.0)
   }
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<Encoding> {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<Encoding> {
     let enum_value = i_prot.read_i32()?;
     Ok(Encoding::from(enum_value))
   }
@@ -443,11 +444,11 @@ impl From<&Encoding> for i32 {
 }
 
 /// Supported compression algorithms.
-///
+/// 
 /// Codecs added in format version X.Y can be read by readers based on X.Y and later.
 /// Codec support may vary between readers based on the format version and
 /// libraries available at runtime.
-///
+/// 
 /// See Compression.md for a detailed specification of these algorithms.
 #[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct CompressionCodec(pub i32);
@@ -473,12 +474,12 @@ impl CompressionCodec {
   ];
 }
 
-impl TSerializable for CompressionCodec {
+impl crate::thrift::TSerializable for CompressionCodec {
   #[allow(clippy::trivially_copy_pass_by_ref)]
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     o_prot.write_i32(self.0)
   }
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<CompressionCodec> {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<CompressionCodec> {
     let enum_value = i_prot.read_i32()?;
     Ok(CompressionCodec::from(enum_value))
   }
@@ -534,12 +535,12 @@ impl PageType {
   ];
 }
 
-impl TSerializable for PageType {
+impl crate::thrift::TSerializable for PageType {
   #[allow(clippy::trivially_copy_pass_by_ref)]
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     o_prot.write_i32(self.0)
   }
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<PageType> {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<PageType> {
     let enum_value = i_prot.read_i32()?;
     Ok(PageType::from(enum_value))
   }
@@ -591,12 +592,12 @@ impl BoundaryOrder {
   ];
 }
 
-impl TSerializable for BoundaryOrder {
+impl crate::thrift::TSerializable for BoundaryOrder {
   #[allow(clippy::trivially_copy_pass_by_ref)]
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     o_prot.write_i32(self.0)
   }
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<BoundaryOrder> {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<BoundaryOrder> {
     let enum_value = i_prot.read_i32()?;
     Ok(BoundaryOrder::from(enum_value))
   }
@@ -637,17 +638,17 @@ impl From<&BoundaryOrder> for i32 {
 
 /// Statistics per row group and per page
 /// All fields are optional.
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct Statistics {
   /// DEPRECATED: min and max value of the column. Use min_value and max_value.
-  ///
+  /// 
   /// Values are encoded using PLAIN encoding, except that variable-length byte
   /// arrays do not include a length prefix.
-  ///
+  /// 
   /// These fields encode min and max values determined by signed comparison
   /// only. New files should use the correct order for a column's logical type
   /// and store the values in the min_value and max_value fields.
-  ///
+  /// 
   /// To support older readers, these may be set when the column order is
   /// signed.
   pub max: Option<Vec<u8>>,
@@ -656,16 +657,26 @@ pub struct Statistics {
   pub null_count: Option<i64>,
   /// count of distinct values occurring
   pub distinct_count: Option<i64>,
-  /// Min and max values for the column, determined by its ColumnOrder.
-  ///
+  /// Lower and upper bound values for the column, determined by its ColumnOrder.
+  /// 
+  /// These may be the actual minimum and maximum values found on a page or column
+  /// chunk, but can also be (more compact) values that do not exist on a page or
+  /// column chunk. For example, instead of storing "Blart Versenwald III", a writer
+  /// may set min_value="B", max_value="C". Such more compact values must still be
+  /// valid values within the column's logical type.
+  /// 
   /// Values are encoded using PLAIN encoding, except that variable-length byte
   /// arrays do not include a length prefix.
   pub max_value: Option<Vec<u8>>,
   pub min_value: Option<Vec<u8>>,
+  /// If true, max_value is the actual maximum value for a column
+  pub is_max_value_exact: Option<bool>,
+  /// If true, min_value is the actual minimum value for a column
+  pub is_min_value_exact: Option<bool>,
 }
 
 impl Statistics {
-  pub fn new<F1, F2, F3, F4, F5, F6>(max: F1, min: F2, null_count: F3, distinct_count: F4, max_value: F5, min_value: F6) -> Statistics where F1: Into<Option<Vec<u8>>>, F2: Into<Option<Vec<u8>>>, F3: Into<Option<i64>>, F4: Into<Option<i64>>, F5: Into<Option<Vec<u8>>>, F6: Into<Option<Vec<u8>>> {
+  pub fn new<F1, F2, F3, F4, F5, F6, F7, F8>(max: F1, min: F2, null_count: F3, distinct_count: F4, max_value: F5, min_value: F6, is_max_value_exact: F7, is_min_value_exact: F8) -> Statistics where F1: Into<Option<Vec<u8>>>, F2: Into<Option<Vec<u8>>>, F3: Into<Option<i64>>, F4: Into<Option<i64>>, F5: Into<Option<Vec<u8>>>, F6: Into<Option<Vec<u8>>>, F7: Into<Option<bool>>, F8: Into<Option<bool>> {
     Statistics {
       max: max.into(),
       min: min.into(),
@@ -673,12 +684,14 @@ impl Statistics {
       distinct_count: distinct_count.into(),
       max_value: max_value.into(),
       min_value: min_value.into(),
+      is_max_value_exact: is_max_value_exact.into(),
+      is_min_value_exact: is_min_value_exact.into(),
     }
   }
 }
 
-impl TSerializable for Statistics {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<Statistics> {
+impl crate::thrift::TSerializable for Statistics {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<Statistics> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<Vec<u8>> = None;
     let mut f_2: Option<Vec<u8>> = None;
@@ -686,6 +699,8 @@ impl TSerializable for Statistics {
     let mut f_4: Option<i64> = None;
     let mut f_5: Option<Vec<u8>> = None;
     let mut f_6: Option<Vec<u8>> = None;
+    let mut f_7: Option<bool> = None;
+    let mut f_8: Option<bool> = None;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
@@ -717,6 +732,14 @@ impl TSerializable for Statistics {
           let val = i_prot.read_bytes()?;
           f_6 = Some(val);
         },
+        7 => {
+          let val = i_prot.read_bool()?;
+          f_7 = Some(val);
+        },
+        8 => {
+          let val = i_prot.read_bool()?;
+          f_8 = Some(val);
+        },
         _ => {
           i_prot.skip(field_ident.field_type)?;
         },
@@ -731,10 +754,12 @@ impl TSerializable for Statistics {
       distinct_count: f_4,
       max_value: f_5,
       min_value: f_6,
+      is_max_value_exact: f_7,
+      is_min_value_exact: f_8,
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("Statistics");
     o_prot.write_struct_begin(&struct_ident)?;
     if let Some(ref fld_var) = self.max {
@@ -767,30 +792,27 @@ impl TSerializable for Statistics {
       o_prot.write_bytes(fld_var)?;
       o_prot.write_field_end()?
     }
+    if let Some(fld_var) = self.is_max_value_exact {
+      o_prot.write_field_begin(&TFieldIdentifier::new("is_max_value_exact", TType::Bool, 7))?;
+      o_prot.write_bool(fld_var)?;
+      o_prot.write_field_end()?
+    }
+    if let Some(fld_var) = self.is_min_value_exact {
+      o_prot.write_field_begin(&TFieldIdentifier::new("is_min_value_exact", TType::Bool, 8))?;
+      o_prot.write_bool(fld_var)?;
+      o_prot.write_field_end()?
+    }
     o_prot.write_field_stop()?;
     o_prot.write_struct_end()
   }
 }
 
-impl Default for Statistics {
-  fn default() -> Self {
-    Statistics{
-      max: Some(Vec::new()),
-      min: Some(Vec::new()),
-      null_count: Some(0),
-      distinct_count: Some(0),
-      max_value: Some(Vec::new()),
-      min_value: Some(Vec::new()),
-    }
-  }
-}
-
 //
 // StringType
 //
 
 /// Empty structs to use as logical type annotations
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct StringType {
 }
 
@@ -800,27 +822,22 @@ impl StringType {
   }
 }
 
-impl TSerializable for StringType {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<StringType> {
+impl crate::thrift::TSerializable for StringType {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<StringType> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = StringType {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("StringType");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -828,17 +845,11 @@ impl TSerializable for StringType {
   }
 }
 
-impl Default for StringType {
-  fn default() -> Self {
-    StringType{}
-  }
-}
-
 //
 // UUIDType
 //
 
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct UUIDType {
 }
 
@@ -848,27 +859,22 @@ impl UUIDType {
   }
 }
 
-impl TSerializable for UUIDType {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<UUIDType> {
+impl crate::thrift::TSerializable for UUIDType {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<UUIDType> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = UUIDType {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("UUIDType");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -876,17 +882,11 @@ impl TSerializable for UUIDType {
   }
 }
 
-impl Default for UUIDType {
-  fn default() -> Self {
-    UUIDType{}
-  }
-}
-
 //
 // MapType
 //
 
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct MapType {
 }
 
@@ -896,27 +896,22 @@ impl MapType {
   }
 }
 
-impl TSerializable for MapType {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<MapType> {
+impl crate::thrift::TSerializable for MapType {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<MapType> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = MapType {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("MapType");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -924,17 +919,11 @@ impl TSerializable for MapType {
   }
 }
 
-impl Default for MapType {
-  fn default() -> Self {
-    MapType{}
-  }
-}
-
 //
 // ListType
 //
 
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct ListType {
 }
 
@@ -944,27 +933,22 @@ impl ListType {
   }
 }
 
-impl TSerializable for ListType {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<ListType> {
+impl crate::thrift::TSerializable for ListType {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<ListType> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = ListType {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("ListType");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -972,17 +956,11 @@ impl TSerializable for ListType {
   }
 }
 
-impl Default for ListType {
-  fn default() -> Self {
-    ListType{}
-  }
-}
-
 //
 // EnumType
 //
 
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct EnumType {
 }
 
@@ -992,27 +970,22 @@ impl EnumType {
   }
 }
 
-impl TSerializable for EnumType {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<EnumType> {
+impl crate::thrift::TSerializable for EnumType {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<EnumType> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = EnumType {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("EnumType");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -1020,17 +993,11 @@ impl TSerializable for EnumType {
   }
 }
 
-impl Default for EnumType {
-  fn default() -> Self {
-    EnumType{}
-  }
-}
-
 //
 // DateType
 //
 
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct DateType {
 }
 
@@ -1040,27 +1007,22 @@ impl DateType {
   }
 }
 
-impl TSerializable for DateType {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<DateType> {
+impl crate::thrift::TSerializable for DateType {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<DateType> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = DateType {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("DateType");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -1068,9 +1030,40 @@ impl TSerializable for DateType {
   }
 }
 
-impl Default for DateType {
-  fn default() -> Self {
-    DateType{}
+//
+// Float16Type
+//
+
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct Float16Type {
+}
+
+impl Float16Type {
+  pub fn new() -> Float16Type {
+    Float16Type {}
+  }
+}
+
+impl crate::thrift::TSerializable for Float16Type {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<Float16Type> {
+    i_prot.read_struct_begin()?;
+    loop {
+      let field_ident = i_prot.read_field_begin()?;
+      if field_ident.field_type == TType::Stop {
+        break;
+      }
+      i_prot.skip(field_ident.field_type)?;
+      i_prot.read_field_end()?;
+    }
+    i_prot.read_struct_end()?;
+    let ret = Float16Type {};
+    Ok(ret)
+  }
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
+    let struct_ident = TStructIdentifier::new("Float16Type");
+    o_prot.write_struct_begin(&struct_ident)?;
+    o_prot.write_field_stop()?;
+    o_prot.write_struct_end()
   }
 }
 
@@ -1079,11 +1072,11 @@ impl Default for DateType {
 //
 
 /// Logical type to annotate a column that is always null.
-///
+/// 
 /// Sometimes when discovering the schema of existing data, values are always
 /// null and the physical type can't be determined. This annotation signals
 /// the case where the physical type was guessed from all null values.
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct NullType {
 }
 
@@ -1093,27 +1086,22 @@ impl NullType {
   }
 }
 
-impl TSerializable for NullType {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<NullType> {
+impl crate::thrift::TSerializable for NullType {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<NullType> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = NullType {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("NullType");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -1121,21 +1109,18 @@ impl TSerializable for NullType {
   }
 }
 
-impl Default for NullType {
-  fn default() -> Self {
-    NullType{}
-  }
-}
-
 //
 // DecimalType
 //
 
 /// Decimal logical type annotation
-///
+/// 
+/// Scale must be zero or a positive integer less than or equal to the precision.
+/// Precision must be a non-zero positive integer.
+/// 
 /// To maintain forward-compatibility in v1, implementations using this logical
 /// type must also set scale and precision on the annotated SchemaElement.
-///
+/// 
 /// Allowed for physical types: INT32, INT64, FIXED, and BINARY
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct DecimalType {
@@ -1152,8 +1137,8 @@ impl DecimalType {
   }
 }
 
-impl TSerializable for DecimalType {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<DecimalType> {
+impl crate::thrift::TSerializable for DecimalType {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<DecimalType> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<i32> = None;
     let mut f_2: Option<i32> = None;
@@ -1187,7 +1172,7 @@ impl TSerializable for DecimalType {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("DecimalType");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("scale", TType::I32, 1))?;
@@ -1206,7 +1191,7 @@ impl TSerializable for DecimalType {
 //
 
 /// Time units for logical types
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct MilliSeconds {
 }
 
@@ -1216,27 +1201,22 @@ impl MilliSeconds {
   }
 }
 
-impl TSerializable for MilliSeconds {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<MilliSeconds> {
+impl crate::thrift::TSerializable for MilliSeconds {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<MilliSeconds> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = MilliSeconds {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("MilliSeconds");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -1244,17 +1224,11 @@ impl TSerializable for MilliSeconds {
   }
 }
 
-impl Default for MilliSeconds {
-  fn default() -> Self {
-    MilliSeconds{}
-  }
-}
-
 //
 // MicroSeconds
 //
 
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct MicroSeconds {
 }
 
@@ -1264,27 +1238,22 @@ impl MicroSeconds {
   }
 }
 
-impl TSerializable for MicroSeconds {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<MicroSeconds> {
+impl crate::thrift::TSerializable for MicroSeconds {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<MicroSeconds> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = MicroSeconds {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("MicroSeconds");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -1292,17 +1261,11 @@ impl TSerializable for MicroSeconds {
   }
 }
 
-impl Default for MicroSeconds {
-  fn default() -> Self {
-    MicroSeconds{}
-  }
-}
-
 //
 // NanoSeconds
 //
 
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct NanoSeconds {
 }
 
@@ -1312,27 +1275,22 @@ impl NanoSeconds {
   }
 }
 
-impl TSerializable for NanoSeconds {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<NanoSeconds> {
+impl crate::thrift::TSerializable for NanoSeconds {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<NanoSeconds> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = NanoSeconds {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("NanoSeconds");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -1340,12 +1298,6 @@ impl TSerializable for NanoSeconds {
   }
 }
 
-impl Default for NanoSeconds {
-  fn default() -> Self {
-    NanoSeconds{}
-  }
-}
-
 //
 // TimeUnit
 //
@@ -1357,8 +1309,8 @@ pub enum TimeUnit {
   NANOS(NanoSeconds),
 }
 
-impl TSerializable for TimeUnit {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<TimeUnit> {
+impl crate::thrift::TSerializable for TimeUnit {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<TimeUnit> {
     let mut ret: Option<TimeUnit> = None;
     let mut received_field_count = 0;
     i_prot.read_struct_begin()?;
@@ -1420,7 +1372,7 @@ impl TSerializable for TimeUnit {
       Ok(ret.expect("return value should have been constructed"))
     }
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("TimeUnit");
     o_prot.write_struct_begin(&struct_ident)?;
     match *self {
@@ -1450,7 +1402,7 @@ impl TSerializable for TimeUnit {
 //
 
 /// Timestamp logical type annotation
-///
+/// 
 /// Allowed for physical types: INT64
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct TimestampType {
@@ -1467,8 +1419,8 @@ impl TimestampType {
   }
 }
 
-impl TSerializable for TimestampType {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<TimestampType> {
+impl crate::thrift::TSerializable for TimestampType {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<TimestampType> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<bool> = None;
     let mut f_2: Option<TimeUnit> = None;
@@ -1502,7 +1454,7 @@ impl TSerializable for TimestampType {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("TimestampType");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("isAdjustedToUTC", TType::Bool, 1))?;
@@ -1521,7 +1473,7 @@ impl TSerializable for TimestampType {
 //
 
 /// Time logical type annotation
-///
+/// 
 /// Allowed for physical types: INT32 (millis), INT64 (micros, nanos)
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct TimeType {
@@ -1538,8 +1490,8 @@ impl TimeType {
   }
 }
 
-impl TSerializable for TimeType {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<TimeType> {
+impl crate::thrift::TSerializable for TimeType {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<TimeType> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<bool> = None;
     let mut f_2: Option<TimeUnit> = None;
@@ -1573,7 +1525,7 @@ impl TSerializable for TimeType {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("TimeType");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("isAdjustedToUTC", TType::Bool, 1))?;
@@ -1592,9 +1544,9 @@ impl TSerializable for TimeType {
 //
 
 /// Integer logical type annotation
-///
+/// 
 /// bitWidth must be 8, 16, 32, or 64.
-///
+/// 
 /// Allowed for physical types: INT32, INT64
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct IntType {
@@ -1611,8 +1563,8 @@ impl IntType {
   }
 }
 
-impl TSerializable for IntType {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<IntType> {
+impl crate::thrift::TSerializable for IntType {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<IntType> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<i8> = None;
     let mut f_2: Option<bool> = None;
@@ -1646,7 +1598,7 @@ impl TSerializable for IntType {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("IntType");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("bitWidth", TType::I08, 1))?;
@@ -1665,9 +1617,9 @@ impl TSerializable for IntType {
 //
 
 /// Embedded JSON logical type annotation
-///
+/// 
 /// Allowed for physical types: BINARY
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct JsonType {
 }
 
@@ -1677,27 +1629,22 @@ impl JsonType {
   }
 }
 
-impl TSerializable for JsonType {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<JsonType> {
+impl crate::thrift::TSerializable for JsonType {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<JsonType> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = JsonType {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("JsonType");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -1705,20 +1652,14 @@ impl TSerializable for JsonType {
   }
 }
 
-impl Default for JsonType {
-  fn default() -> Self {
-    JsonType{}
-  }
-}
-
 //
 // BsonType
 //
 
 /// Embedded BSON logical type annotation
-///
+/// 
 /// Allowed for physical types: BINARY
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct BsonType {
 }
 
@@ -1728,27 +1669,22 @@ impl BsonType {
   }
 }
 
-impl TSerializable for BsonType {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<BsonType> {
+impl crate::thrift::TSerializable for BsonType {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<BsonType> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = BsonType {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("BsonType");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -1756,12 +1692,6 @@ impl TSerializable for BsonType {
   }
 }
 
-impl Default for BsonType {
-  fn default() -> Self {
-    BsonType{}
-  }
-}
-
 //
 // LogicalType
 //
@@ -1781,10 +1711,11 @@ pub enum LogicalType {
   JSON(JsonType),
   BSON(BsonType),
   UUID(UUIDType),
+  FLOAT16(Float16Type),
 }
 
-impl TSerializable for LogicalType {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<LogicalType> {
+impl crate::thrift::TSerializable for LogicalType {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<LogicalType> {
     let mut ret: Option<LogicalType> = None;
     let mut received_field_count = 0;
     i_prot.read_struct_begin()?;
@@ -1886,6 +1817,13 @@ impl TSerializable for LogicalType {
           }
           received_field_count += 1;
         },
+        15 => {
+          let val = Float16Type::read_from_in_protocol(i_prot)?;
+          if ret.is_none() {
+            ret = Some(LogicalType::FLOAT16(val));
+          }
+          received_field_count += 1;
+        },
         _ => {
           i_prot.skip(field_ident.field_type)?;
           received_field_count += 1;
@@ -1916,7 +1854,7 @@ impl TSerializable for LogicalType {
       Ok(ret.expect("return value should have been constructed"))
     }
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("LogicalType");
     o_prot.write_struct_begin(&struct_ident)?;
     match *self {
@@ -1985,6 +1923,11 @@ impl TSerializable for LogicalType {
         f.write_to_out_protocol(o_prot)?;
         o_prot.write_field_end()?;
       },
+      LogicalType::FLOAT16(ref f) => {
+        o_prot.write_field_begin(&TFieldIdentifier::new("FLOAT16", TType::Struct, 15))?;
+        f.write_to_out_protocol(o_prot)?;
+        o_prot.write_field_end()?;
+      },
     }
     o_prot.write_field_stop()?;
     o_prot.write_struct_end()
@@ -2003,7 +1946,7 @@ impl TSerializable for LogicalType {
 pub struct SchemaElement {
   /// Data type for this field. Not set if the current element is a non-leaf node
   pub type_: Option<Type>,
-  /// If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the vales.
+  /// If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the values.
   /// Otherwise, if specified, this is the maximum bit length to store any of the values.
   /// (e.g. a low cardinality INT col could have this set to 3).  Note that this is
   /// in the schema, and therefore fixed for the entire file.
@@ -2020,12 +1963,12 @@ pub struct SchemaElement {
   pub num_children: Option<i32>,
   /// DEPRECATED: When the schema is the result of a conversion from another model.
   /// Used to record the original type to help with cross conversion.
-  ///
+  /// 
   /// This is superseded by logicalType.
   pub converted_type: Option<ConvertedType>,
   /// DEPRECATED: Used when this column contains decimal data.
   /// See the DECIMAL converted type for more details.
-  ///
+  /// 
   /// This is superseded by using the DecimalType annotation in logicalType.
   pub scale: Option<i32>,
   pub precision: Option<i32>,
@@ -2033,7 +1976,7 @@ pub struct SchemaElement {
   /// original field id in the parquet schema
   pub field_id: Option<i32>,
   /// The logical type of this SchemaElement
-  ///
+  /// 
   /// LogicalType replaces ConvertedType, but ConvertedType is still required
   /// for some logical types to ensure forward-compatibility in format v1.
   pub logical_type: Option<LogicalType>,
@@ -2056,8 +1999,8 @@ impl SchemaElement {
   }
 }
 
-impl TSerializable for SchemaElement {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<SchemaElement> {
+impl crate::thrift::TSerializable for SchemaElement {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<SchemaElement> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<Type> = None;
     let mut f_2: Option<i32> = None;
@@ -2138,7 +2081,7 @@ impl TSerializable for SchemaElement {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("SchemaElement");
     o_prot.write_struct_begin(&struct_ident)?;
     if let Some(ref fld_var) = self.type_ {
@@ -2225,8 +2168,8 @@ impl DataPageHeader {
   }
 }
 
-impl TSerializable for DataPageHeader {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<DataPageHeader> {
+impl crate::thrift::TSerializable for DataPageHeader {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<DataPageHeader> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<i32> = None;
     let mut f_2: Option<Encoding> = None;
@@ -2280,7 +2223,7 @@ impl TSerializable for DataPageHeader {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("DataPageHeader");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("num_values", TType::I32, 1))?;
@@ -2309,7 +2252,7 @@ impl TSerializable for DataPageHeader {
 // IndexPageHeader
 //
 
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct IndexPageHeader {
 }
 
@@ -2319,27 +2262,22 @@ impl IndexPageHeader {
   }
 }
 
-impl TSerializable for IndexPageHeader {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<IndexPageHeader> {
+impl crate::thrift::TSerializable for IndexPageHeader {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<IndexPageHeader> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = IndexPageHeader {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("IndexPageHeader");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -2347,16 +2285,14 @@ impl TSerializable for IndexPageHeader {
   }
 }
 
-impl Default for IndexPageHeader {
-  fn default() -> Self {
-    IndexPageHeader{}
-  }
-}
-
 //
 // DictionaryPageHeader
 //
 
+/// The dictionary page must be placed at the first position of the column chunk
+/// if it is partly or completely dictionary encoded. At most one dictionary page
+/// can be placed in a column chunk.
+/// 
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct DictionaryPageHeader {
   /// Number of values in the dictionary *
@@ -2377,8 +2313,8 @@ impl DictionaryPageHeader {
   }
 }
 
-impl TSerializable for DictionaryPageHeader {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<DictionaryPageHeader> {
+impl crate::thrift::TSerializable for DictionaryPageHeader {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<DictionaryPageHeader> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<i32> = None;
     let mut f_2: Option<Encoding> = None;
@@ -2418,7 +2354,7 @@ impl TSerializable for DictionaryPageHeader {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("DictionaryPageHeader");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("num_values", TType::I32, 1))?;
@@ -2444,7 +2380,7 @@ impl TSerializable for DictionaryPageHeader {
 /// New page format allowing reading levels without decompressing the data
 /// Repetition and definition levels are uncompressed
 /// The remaining section containing the data is compressed if is_compressed is true
-///
+/// 
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct DataPageHeaderV2 {
   /// Number of values, including NULLs, in this data page. *
@@ -2485,8 +2421,8 @@ impl DataPageHeaderV2 {
   }
 }
 
-impl TSerializable for DataPageHeaderV2 {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<DataPageHeaderV2> {
+impl crate::thrift::TSerializable for DataPageHeaderV2 {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<DataPageHeaderV2> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<i32> = None;
     let mut f_2: Option<i32> = None;
@@ -2560,7 +2496,7 @@ impl TSerializable for DataPageHeaderV2 {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("DataPageHeaderV2");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("num_values", TType::I32, 1))?;
@@ -2601,7 +2537,7 @@ impl TSerializable for DataPageHeaderV2 {
 //
 
 /// Block-based algorithm type annotation. *
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct SplitBlockAlgorithm {
 }
 
@@ -2611,27 +2547,22 @@ impl SplitBlockAlgorithm {
   }
 }
 
-impl TSerializable for SplitBlockAlgorithm {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<SplitBlockAlgorithm> {
+impl crate::thrift::TSerializable for SplitBlockAlgorithm {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<SplitBlockAlgorithm> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = SplitBlockAlgorithm {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("SplitBlockAlgorithm");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -2639,12 +2570,6 @@ impl TSerializable for SplitBlockAlgorithm {
   }
 }
 
-impl Default for SplitBlockAlgorithm {
-  fn default() -> Self {
-    SplitBlockAlgorithm{}
-  }
-}
-
 //
 // BloomFilterAlgorithm
 //
@@ -2654,8 +2579,8 @@ pub enum BloomFilterAlgorithm {
   BLOCK(SplitBlockAlgorithm),
 }
 
-impl TSerializable for BloomFilterAlgorithm {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<BloomFilterAlgorithm> {
+impl crate::thrift::TSerializable for BloomFilterAlgorithm {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<BloomFilterAlgorithm> {
     let mut ret: Option<BloomFilterAlgorithm> = None;
     let mut received_field_count = 0;
     i_prot.read_struct_begin()?;
@@ -2703,7 +2628,7 @@ impl TSerializable for BloomFilterAlgorithm {
       Ok(ret.expect("return value should have been constructed"))
     }
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("BloomFilterAlgorithm");
     o_prot.write_struct_begin(&struct_ident)?;
     match *self {
@@ -2724,8 +2649,8 @@ impl TSerializable for BloomFilterAlgorithm {
 
 /// Hash strategy type annotation. xxHash is an extremely fast non-cryptographic hash
 /// algorithm. It uses 64 bits version of xxHash.
-///
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+/// 
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct XxHash {
 }
 
@@ -2735,27 +2660,22 @@ impl XxHash {
   }
 }
 
-impl TSerializable for XxHash {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<XxHash> {
+impl crate::thrift::TSerializable for XxHash {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<XxHash> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = XxHash {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("XxHash");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -2763,12 +2683,6 @@ impl TSerializable for XxHash {
   }
 }
 
-impl Default for XxHash {
-  fn default() -> Self {
-    XxHash{}
-  }
-}
-
 //
 // BloomFilterHash
 //
@@ -2778,8 +2692,8 @@ pub enum BloomFilterHash {
   XXHASH(XxHash),
 }
 
-impl TSerializable for BloomFilterHash {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<BloomFilterHash> {
+impl crate::thrift::TSerializable for BloomFilterHash {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<BloomFilterHash> {
     let mut ret: Option<BloomFilterHash> = None;
     let mut received_field_count = 0;
     i_prot.read_struct_begin()?;
@@ -2827,7 +2741,7 @@ impl TSerializable for BloomFilterHash {
       Ok(ret.expect("return value should have been constructed"))
     }
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("BloomFilterHash");
     o_prot.write_struct_begin(&struct_ident)?;
     match *self {
@@ -2847,8 +2761,8 @@ impl TSerializable for BloomFilterHash {
 //
 
 /// The compression used in the Bloom filter.
-///
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+/// 
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct Uncompressed {
 }
 
@@ -2858,27 +2772,22 @@ impl Uncompressed {
   }
 }
 
-impl TSerializable for Uncompressed {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<Uncompressed> {
+impl crate::thrift::TSerializable for Uncompressed {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<Uncompressed> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = Uncompressed {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("Uncompressed");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -2886,12 +2795,6 @@ impl TSerializable for Uncompressed {
   }
 }
 
-impl Default for Uncompressed {
-  fn default() -> Self {
-    Uncompressed{}
-  }
-}
-
 //
 // BloomFilterCompression
 //
@@ -2901,8 +2804,8 @@ pub enum BloomFilterCompression {
   UNCOMPRESSED(Uncompressed),
 }
 
-impl TSerializable for BloomFilterCompression {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<BloomFilterCompression> {
+impl crate::thrift::TSerializable for BloomFilterCompression {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<BloomFilterCompression> {
     let mut ret: Option<BloomFilterCompression> = None;
     let mut received_field_count = 0;
     i_prot.read_struct_begin()?;
@@ -2950,7 +2853,7 @@ impl TSerializable for BloomFilterCompression {
       Ok(ret.expect("return value should have been constructed"))
     }
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("BloomFilterCompression");
     o_prot.write_struct_begin(&struct_ident)?;
     match *self {
@@ -2971,7 +2874,7 @@ impl TSerializable for BloomFilterCompression {
 
 /// Bloom filter header is stored at beginning of Bloom filter data of each column
 /// and followed by its bitset.
-///
+/// 
 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct BloomFilterHeader {
   /// The size of bitset in bytes *
@@ -2995,8 +2898,8 @@ impl BloomFilterHeader {
   }
 }
 
-impl TSerializable for BloomFilterHeader {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<BloomFilterHeader> {
+impl crate::thrift::TSerializable for BloomFilterHeader {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<BloomFilterHeader> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<i32> = None;
     let mut f_2: Option<BloomFilterAlgorithm> = None;
@@ -3044,7 +2947,7 @@ impl TSerializable for BloomFilterHeader {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("BloomFilterHeader");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("numBytes", TType::I32, 1))?;
@@ -3076,32 +2979,22 @@ pub struct PageHeader {
   pub uncompressed_page_size: i32,
   /// Compressed (and potentially encrypted) page size in bytes, not including this header *
   pub compressed_page_size: i32,
-  /// The 32bit CRC for the page, to be be calculated as follows:
-  /// - Using the standard CRC32 algorithm
-  /// - On the data only, i.e. this header should not be included. 'Data'
-  ///   hereby refers to the concatenation of the repetition levels, the
-  ///   definition levels and the column value, in this exact order.
-  /// - On the encoded versions of the repetition levels, definition levels and
-  ///   column values
-  /// - On the compressed versions of the repetition levels, definition levels
-  ///   and column values where possible;
-  ///   - For v1 data pages, the repetition levels, definition levels and column
-  ///     values are always compressed together. If a compression scheme is
-  ///     specified, the CRC shall be calculated on the compressed version of
-  ///     this concatenation. If no compression scheme is specified, the CRC
-  ///     shall be calculated on the uncompressed version of this concatenation.
-  ///   - For v2 data pages, the repetition levels and definition levels are
-  ///     handled separately from the data and are never compressed (only
-  ///     encoded). If a compression scheme is specified, the CRC shall be
-  ///     calculated on the concatenation of the uncompressed repetition levels,
-  ///     uncompressed definition levels and the compressed column values.
-  ///     If no compression scheme is specified, the CRC shall be calculated on
-  ///     the uncompressed concatenation.
-  /// - In encrypted columns, CRC is calculated after page encryption; the
-  ///   encryption itself is performed after page compression (if compressed)
+  /// The 32-bit CRC checksum for the page, to be be calculated as follows:
+  /// 
+  /// - The standard CRC32 algorithm is used (with polynomial 0x04C11DB7,
+  ///   the same as in e.g. GZip).
+  /// - All page types can have a CRC (v1 and v2 data pages, dictionary pages,
+  ///   etc.).
+  /// - The CRC is computed on the serialization binary representation of the page
+  ///   (as written to disk), excluding the page header. For example, for v1
+  ///   data pages, the CRC is computed on the concatenation of repetition levels,
+  ///   definition levels and column values (optionally compressed, optionally
+  ///   encrypted).
+  /// - The CRC computation therefore takes place after any compression
+  ///   and encryption steps, if any.
+  /// 
   /// If enabled, this allows for disabling checksumming in HDFS if only a few
   /// pages need to be read.
-  ///
   pub crc: Option<i32>,
   pub data_page_header: Option<DataPageHeader>,
   pub index_page_header: Option<IndexPageHeader>,
@@ -3124,8 +3017,8 @@ impl PageHeader {
   }
 }
 
-impl TSerializable for PageHeader {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<PageHeader> {
+impl crate::thrift::TSerializable for PageHeader {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<PageHeader> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<PageType> = None;
     let mut f_2: Option<i32> = None;
@@ -3196,7 +3089,7 @@ impl TSerializable for PageHeader {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("PageHeader");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("type", TType::I32, 1))?;
@@ -3258,8 +3151,8 @@ impl KeyValue {
   }
 }
 
-impl TSerializable for KeyValue {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<KeyValue> {
+impl crate::thrift::TSerializable for KeyValue {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<KeyValue> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<String> = None;
     let mut f_2: Option<String> = None;
@@ -3292,7 +3185,7 @@ impl TSerializable for KeyValue {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("KeyValue");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("key", TType::String, 1))?;
@@ -3334,8 +3227,8 @@ impl SortingColumn {
   }
 }
 
-impl TSerializable for SortingColumn {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<SortingColumn> {
+impl crate::thrift::TSerializable for SortingColumn {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<SortingColumn> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<i32> = None;
     let mut f_2: Option<bool> = None;
@@ -3376,7 +3269,7 @@ impl TSerializable for SortingColumn {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("SortingColumn");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("column_idx", TType::I32, 1))?;
@@ -3418,8 +3311,8 @@ impl PageEncodingStats {
   }
 }
 
-impl TSerializable for PageEncodingStats {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<PageEncodingStats> {
+impl crate::thrift::TSerializable for PageEncodingStats {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<PageEncodingStats> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<PageType> = None;
     let mut f_2: Option<Encoding> = None;
@@ -3460,7 +3353,7 @@ impl TSerializable for PageEncodingStats {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("PageEncodingStats");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("page_type", TType::I32, 1))?;
@@ -3516,10 +3409,16 @@ pub struct ColumnMetaData {
   pub encoding_stats: Option<Vec<PageEncodingStats>>,
   /// Byte offset from beginning of file to Bloom filter data. *
   pub bloom_filter_offset: Option<i64>,
+  /// Size of Bloom filter data including the serialized header, in bytes.
+  /// Added in 2.10 so readers may not read this field from old files and
+  /// it can be obtained after the BloomFilterHeader has been deserialized.
+  /// Writers should write this field so readers can read the bloom filter
+  /// in a single I/O.
+  pub bloom_filter_length: Option<i32>,
 }
 
 impl ColumnMetaData {
-  pub fn new<F8, F10, F11, F12, F13, F14>(type_: Type, encodings: Vec<Encoding>, path_in_schema: Vec<String>, codec: CompressionCodec, num_values: i64, total_uncompressed_size: i64, total_compressed_size: i64, key_value_metadata: F8, data_page_offset: i64, index_page_offset: F10, dictionary_page_offset: F11, statistics: F12, encoding_stats: F13, bloom_filter_offset: F14) -> ColumnMetaData where F8: Into<Option<Vec<KeyValue>>>, F10: Into<Option<i64>>, F11: Into<Option<i64>>, F12: Into<Option<Statistics>>, F13: Into<Option<Vec<PageEncodingStats>>>, F14: Into<Option<i64>> {
+  pub fn new<F8, F10, F11, F12, F13, F14, F15>(type_: Type, encodings: Vec<Encoding>, path_in_schema: Vec<String>, codec: CompressionCodec, num_values: i64, total_uncompressed_size: i64, total_compressed_size: i64, key_value_metadata: F8, data_page_offset: i64, index_page_offset: F10, dictionary_page_offset: F11, statistics: F12, encoding_stats: F13, bloom_filter_offset: F14, bloom_filter_length: F15) -> ColumnMetaData where F8: Into<Option<Vec<KeyValue>>>, F10: Into<Option<i64>>, F11: Into<Option<i64>>, F12: Into<Option<Statistics>>, F13: Into<Option<Vec<PageEncodingStats>>>, F14: Into<Option<i64>>, F15: Into<Option<i32>> {
     ColumnMetaData {
       type_,
       encodings,
@@ -3535,12 +3434,13 @@ impl ColumnMetaData {
       statistics: statistics.into(),
       encoding_stats: encoding_stats.into(),
       bloom_filter_offset: bloom_filter_offset.into(),
+      bloom_filter_length: bloom_filter_length.into(),
     }
   }
 }
 
-impl TSerializable for ColumnMetaData {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<ColumnMetaData> {
+impl crate::thrift::TSerializable for ColumnMetaData {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<ColumnMetaData> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<Type> = None;
     let mut f_2: Option<Vec<Encoding>> = None;
@@ -3556,6 +3456,7 @@ impl TSerializable for ColumnMetaData {
     let mut f_12: Option<Statistics> = None;
     let mut f_13: Option<Vec<PageEncodingStats>> = None;
     let mut f_14: Option<i64> = None;
+    let mut f_15: Option<i32> = None;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
@@ -3643,6 +3544,10 @@ impl TSerializable for ColumnMetaData {
           let val = i_prot.read_i64()?;
           f_14 = Some(val);
         },
+        15 => {
+          let val = i_prot.read_i32()?;
+          f_15 = Some(val);
+        },
         _ => {
           i_prot.skip(field_ident.field_type)?;
         },
@@ -3673,10 +3578,11 @@ impl TSerializable for ColumnMetaData {
       statistics: f_12,
       encoding_stats: f_13,
       bloom_filter_offset: f_14,
+      bloom_filter_length: f_15,
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("ColumnMetaData");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("type", TType::I32, 1))?;
@@ -3749,6 +3655,11 @@ impl TSerializable for ColumnMetaData {
       o_prot.write_i64(fld_var)?;
       o_prot.write_field_end()?
     }
+    if let Some(fld_var) = self.bloom_filter_length {
+      o_prot.write_field_begin(&TFieldIdentifier::new("bloom_filter_length", TType::I32, 15))?;
+      o_prot.write_i32(fld_var)?;
+      o_prot.write_field_end()?
+    }
     o_prot.write_field_stop()?;
     o_prot.write_struct_end()
   }
@@ -3758,7 +3669,7 @@ impl TSerializable for ColumnMetaData {
 // EncryptionWithFooterKey
 //
 
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct EncryptionWithFooterKey {
 }
 
@@ -3768,27 +3679,22 @@ impl EncryptionWithFooterKey {
   }
 }
 
-impl TSerializable for EncryptionWithFooterKey {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<EncryptionWithFooterKey> {
+impl crate::thrift::TSerializable for EncryptionWithFooterKey {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<EncryptionWithFooterKey> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = EncryptionWithFooterKey {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("EncryptionWithFooterKey");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -3796,12 +3702,6 @@ impl TSerializable for EncryptionWithFooterKey {
   }
 }
 
-impl Default for EncryptionWithFooterKey {
-  fn default() -> Self {
-    EncryptionWithFooterKey{}
-  }
-}
-
 //
 // EncryptionWithColumnKey
 //
@@ -3823,8 +3723,8 @@ impl EncryptionWithColumnKey {
   }
 }
 
-impl TSerializable for EncryptionWithColumnKey {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<EncryptionWithColumnKey> {
+impl crate::thrift::TSerializable for EncryptionWithColumnKey {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<EncryptionWithColumnKey> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<Vec<String>> = None;
     let mut f_2: Option<Vec<u8>> = None;
@@ -3863,7 +3763,7 @@ impl TSerializable for EncryptionWithColumnKey {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("EncryptionWithColumnKey");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("path_in_schema", TType::List, 1))?;
@@ -3893,8 +3793,8 @@ pub enum ColumnCryptoMetaData {
   ENCRYPTIONWITHCOLUMNKEY(EncryptionWithColumnKey),
 }
 
-impl TSerializable for ColumnCryptoMetaData {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<ColumnCryptoMetaData> {
+impl crate::thrift::TSerializable for ColumnCryptoMetaData {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<ColumnCryptoMetaData> {
     let mut ret: Option<ColumnCryptoMetaData> = None;
     let mut received_field_count = 0;
     i_prot.read_struct_begin()?;
@@ -3949,7 +3849,7 @@ impl TSerializable for ColumnCryptoMetaData {
       Ok(ret.expect("return value should have been constructed"))
     }
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("ColumnCryptoMetaData");
     o_prot.write_struct_begin(&struct_ident)?;
     match *self {
@@ -3977,14 +3877,14 @@ impl TSerializable for ColumnCryptoMetaData {
 pub struct ColumnChunk {
   /// File where column data is stored.  If not set, assumed to be same file as
   /// metadata.  This path is relative to the current file.
-  ///
+  /// 
   pub file_path: Option<String>,
   /// Byte offset in file_path to the ColumnMetaData *
   pub file_offset: i64,
   /// Column metadata for this chunk. This is the same content as what is at
   /// file_path/file_offset.  Having it here has it replicated in the file
   /// metadata.
-  ///
+  /// 
   pub meta_data: Option<ColumnMetaData>,
   /// File offset of ColumnChunk's OffsetIndex *
   pub offset_index_offset: Option<i64>,
@@ -4016,8 +3916,8 @@ impl ColumnChunk {
   }
 }
 
-impl TSerializable for ColumnChunk {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<ColumnChunk> {
+impl crate::thrift::TSerializable for ColumnChunk {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<ColumnChunk> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<String> = None;
     let mut f_2: Option<i64> = None;
@@ -4092,7 +3992,7 @@ impl TSerializable for ColumnChunk {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("ColumnChunk");
     o_prot.write_struct_begin(&struct_ident)?;
     if let Some(ref fld_var) = self.file_path {
@@ -4151,7 +4051,7 @@ impl TSerializable for ColumnChunk {
 pub struct RowGroup {
   /// Metadata for each column chunk in this row group.
   /// This list must have the same order as the SchemaElement list in FileMetaData.
-  ///
+  /// 
   pub columns: Vec<ColumnChunk>,
   /// Total byte size of all the uncompressed column data in this row group *
   pub total_byte_size: i64,
@@ -4184,8 +4084,8 @@ impl RowGroup {
   }
 }
 
-impl TSerializable for RowGroup {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<RowGroup> {
+impl crate::thrift::TSerializable for RowGroup {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<RowGroup> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<Vec<ColumnChunk>> = None;
     let mut f_2: Option<i64> = None;
@@ -4262,7 +4162,7 @@ impl TSerializable for RowGroup {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("RowGroup");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("columns", TType::List, 1))?;
@@ -4312,7 +4212,7 @@ impl TSerializable for RowGroup {
 //
 
 /// Empty struct to signal the order defined by the physical or logical type
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct TypeDefinedOrder {
 }
 
@@ -4322,27 +4222,22 @@ impl TypeDefinedOrder {
   }
 }
 
-impl TSerializable for TypeDefinedOrder {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<TypeDefinedOrder> {
+impl crate::thrift::TSerializable for TypeDefinedOrder {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<TypeDefinedOrder> {
     i_prot.read_struct_begin()?;
     loop {
       let field_ident = i_prot.read_field_begin()?;
       if field_ident.field_type == TType::Stop {
         break;
       }
-      let field_id = field_id(&field_ident)?;
-      match field_id {
-        _ => {
-          i_prot.skip(field_ident.field_type)?;
-        },
-      };
+      i_prot.skip(field_ident.field_type)?;
       i_prot.read_field_end()?;
     }
     i_prot.read_struct_end()?;
     let ret = TypeDefinedOrder {};
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("TypeDefinedOrder");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_stop()?;
@@ -4350,12 +4245,6 @@ impl TSerializable for TypeDefinedOrder {
   }
 }
 
-impl Default for TypeDefinedOrder {
-  fn default() -> Self {
-    TypeDefinedOrder{}
-  }
-}
-
 //
 // ColumnOrder
 //
@@ -4365,8 +4254,8 @@ pub enum ColumnOrder {
   TYPEORDER(TypeDefinedOrder),
 }
 
-impl TSerializable for ColumnOrder {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<ColumnOrder> {
+impl crate::thrift::TSerializable for ColumnOrder {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<ColumnOrder> {
     let mut ret: Option<ColumnOrder> = None;
     let mut received_field_count = 0;
     i_prot.read_struct_begin()?;
@@ -4414,7 +4303,7 @@ impl TSerializable for ColumnOrder {
       Ok(ret.expect("return value should have been constructed"))
     }
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("ColumnOrder");
     o_prot.write_struct_begin(&struct_ident)?;
     match *self {
@@ -4455,8 +4344,8 @@ impl PageLocation {
   }
 }
 
-impl TSerializable for PageLocation {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<PageLocation> {
+impl crate::thrift::TSerializable for PageLocation {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<PageLocation> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<i64> = None;
     let mut f_2: Option<i32> = None;
@@ -4497,7 +4386,7 @@ impl TSerializable for PageLocation {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("PageLocation");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("offset", TType::I64, 1))?;
@@ -4533,8 +4422,8 @@ impl OffsetIndex {
   }
 }
 
-impl TSerializable for OffsetIndex {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<OffsetIndex> {
+impl crate::thrift::TSerializable for OffsetIndex {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<OffsetIndex> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<Vec<PageLocation>> = None;
     loop {
@@ -4567,7 +4456,7 @@ impl TSerializable for OffsetIndex {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("OffsetIndex");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("page_locations", TType::List, 1))?;
@@ -4596,13 +4485,14 @@ pub struct ColumnIndex {
   /// byte\[0\], so that all lists have the same length. If false, the
   /// corresponding entries in min_values and max_values must be valid.
   pub null_pages: Vec<bool>,
-  /// Two lists containing lower and upper bounds for the values of each page.
-  /// These may be the actual minimum and maximum values found on a page, but
-  /// can also be (more compact) values that do not exist on a page. For
-  /// example, instead of storing ""Blart Versenwald III", a writer may set
-  /// min_values\[i\]="B", max_values\[i\]="C". Such more compact values must still
-  /// be valid values within the column's logical type. Readers must make sure
-  /// that list entries are populated before using them by inspecting null_pages.
+  /// Two lists containing lower and upper bounds for the values of each page
+  /// determined by the ColumnOrder of the column. These may be the actual
+  /// minimum and maximum values found on a page, but can also be (more compact)
+  /// values that do not exist on a page. For example, instead of storing ""Blart
+  /// Versenwald III", a writer may set min_values\[i\]="B", max_values\[i\]="C".
+  /// Such more compact values must still be valid values within the column's
+  /// logical type. Readers must make sure that list entries are populated before
+  /// using them by inspecting null_pages.
   pub min_values: Vec<Vec<u8>>,
   pub max_values: Vec<Vec<u8>>,
   /// Stores whether both min_values and max_values are ordered and if so, in
@@ -4626,8 +4516,8 @@ impl ColumnIndex {
   }
 }
 
-impl TSerializable for ColumnIndex {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<ColumnIndex> {
+impl crate::thrift::TSerializable for ColumnIndex {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<ColumnIndex> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<Vec<bool>> = None;
     let mut f_2: Option<Vec<Vec<u8>>> = None;
@@ -4705,7 +4595,7 @@ impl TSerializable for ColumnIndex {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("ColumnIndex");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("null_pages", TType::List, 1))?;
@@ -4750,7 +4640,7 @@ impl TSerializable for ColumnIndex {
 // AesGcmV1
 //
 
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct AesGcmV1 {
   /// AAD prefix *
   pub aad_prefix: Option<Vec<u8>>,
@@ -4771,8 +4661,8 @@ impl AesGcmV1 {
   }
 }
 
-impl TSerializable for AesGcmV1 {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<AesGcmV1> {
+impl crate::thrift::TSerializable for AesGcmV1 {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<AesGcmV1> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<Vec<u8>> = None;
     let mut f_2: Option<Vec<u8>> = None;
@@ -4810,7 +4700,7 @@ impl TSerializable for AesGcmV1 {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("AesGcmV1");
     o_prot.write_struct_begin(&struct_ident)?;
     if let Some(ref fld_var) = self.aad_prefix {
@@ -4833,21 +4723,11 @@ impl TSerializable for AesGcmV1 {
   }
 }
 
-impl Default for AesGcmV1 {
-  fn default() -> Self {
-    AesGcmV1{
-      aad_prefix: Some(Vec::new()),
-      aad_file_unique: Some(Vec::new()),
-      supply_aad_prefix: Some(false),
-    }
-  }
-}
-
 //
 // AesGcmCtrV1
 //
 
-#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+#[derive(Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
 pub struct AesGcmCtrV1 {
   /// AAD prefix *
   pub aad_prefix: Option<Vec<u8>>,
@@ -4868,8 +4748,8 @@ impl AesGcmCtrV1 {
   }
 }
 
-impl TSerializable for AesGcmCtrV1 {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<AesGcmCtrV1> {
+impl crate::thrift::TSerializable for AesGcmCtrV1 {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<AesGcmCtrV1> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<Vec<u8>> = None;
     let mut f_2: Option<Vec<u8>> = None;
@@ -4907,7 +4787,7 @@ impl TSerializable for AesGcmCtrV1 {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("AesGcmCtrV1");
     o_prot.write_struct_begin(&struct_ident)?;
     if let Some(ref fld_var) = self.aad_prefix {
@@ -4930,16 +4810,6 @@ impl TSerializable for AesGcmCtrV1 {
   }
 }
 
-impl Default for AesGcmCtrV1 {
-  fn default() -> Self {
-    AesGcmCtrV1{
-      aad_prefix: Some(Vec::new()),
-      aad_file_unique: Some(Vec::new()),
-      supply_aad_prefix: Some(false),
-    }
-  }
-}
-
 //
 // EncryptionAlgorithm
 //
@@ -4950,8 +4820,8 @@ pub enum EncryptionAlgorithm {
   AESGCMCTRV1(AesGcmCtrV1),
 }
 
-impl TSerializable for EncryptionAlgorithm {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<EncryptionAlgorithm> {
+impl crate::thrift::TSerializable for EncryptionAlgorithm {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<EncryptionAlgorithm> {
     let mut ret: Option<EncryptionAlgorithm> = None;
     let mut received_field_count = 0;
     i_prot.read_struct_begin()?;
@@ -5006,7 +4876,7 @@ impl TSerializable for EncryptionAlgorithm {
       Ok(ret.expect("return value should have been constructed"))
     }
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("EncryptionAlgorithm");
     o_prot.write_struct_begin(&struct_ident)?;
     match *self {
@@ -5051,19 +4921,22 @@ pub struct FileMetaData {
   /// String for application that wrote this file.  This should be in the format
   /// `<Application>` version `<App Version>` (build `<App Build Hash>`).
   /// e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55)
-  ///
+  /// 
   pub created_by: Option<String>,
-  /// Sort order used for the min_value and max_value fields of each column in
-  /// this file. Sort orders are listed in the order matching the columns in the
-  /// schema. The indexes are not necessary the same though, because only leaf
-  /// nodes of the schema are represented in the list of sort orders.
-  ///
-  /// Without column_orders, the meaning of the min_value and max_value fields is
-  /// undefined. To ensure well-defined behaviour, if min_value and max_value are
-  /// written to a Parquet file, column_orders must be written as well.
-  ///
-  /// The obsolete min and max fields are always sorted by signed comparison
-  /// regardless of column_orders.
+  /// Sort order used for the min_value and max_value fields in the Statistics
+  /// objects and the min_values and max_values fields in the ColumnIndex
+  /// objects of each column in this file. Sort orders are listed in the order
+  /// matching the columns in the schema. The indexes are not necessary the same
+  /// though, because only leaf nodes of the schema are represented in the list
+  /// of sort orders.
+  /// 
+  /// Without column_orders, the meaning of the min_value and max_value fields
+  /// in the Statistics object and the ColumnIndex object is undefined. To ensure
+  /// well-defined behaviour, if these fields are written to a Parquet file,
+  /// column_orders must be written as well.
+  /// 
+  /// The obsolete min and max fields in the Statistics object are always sorted
+  /// by signed comparison regardless of column_orders.
   pub column_orders: Option<Vec<ColumnOrder>>,
   /// Encryption algorithm. This field is set only in encrypted files
   /// with plaintext footer. Files with encrypted footer store algorithm id
@@ -5090,8 +4963,8 @@ impl FileMetaData {
   }
 }
 
-impl TSerializable for FileMetaData {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<FileMetaData> {
+impl crate::thrift::TSerializable for FileMetaData {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<FileMetaData> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<i32> = None;
     let mut f_2: Option<Vec<SchemaElement>> = None;
@@ -5193,7 +5066,7 @@ impl TSerializable for FileMetaData {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("FileMetaData");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("version", TType::I32, 1))?;
@@ -5279,8 +5152,8 @@ impl FileCryptoMetaData {
   }
 }
 
-impl TSerializable for FileCryptoMetaData {
-  fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result<FileCryptoMetaData> {
+impl crate::thrift::TSerializable for FileCryptoMetaData {
+  fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<FileCryptoMetaData> {
     i_prot.read_struct_begin()?;
     let mut f_1: Option<EncryptionAlgorithm> = None;
     let mut f_2: Option<Vec<u8>> = None;
@@ -5313,7 +5186,7 @@ impl TSerializable for FileCryptoMetaData {
     };
     Ok(ret)
   }
-  fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> {
+  fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()> {
     let struct_ident = TStructIdentifier::new("FileCryptoMetaData");
     o_prot.write_struct_begin(&struct_ident)?;
     o_prot.write_field_begin(&TFieldIdentifier::new("encryption_algorithm", TType::Struct, 1))?;
diff --git a/parquet/src/lib.rs b/parquet/src/lib.rs
index 2371f8837bb0..db5d72634389 100644
--- a/parquet/src/lib.rs
+++ b/parquet/src/lib.rs
@@ -15,24 +15,67 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//!
 //! This crate contains the official Native Rust implementation of
 //! [Apache Parquet](https://parquet.apache.org/), part of
 //! the [Apache Arrow](https://arrow.apache.org/) project.
+//! The crate provides a number of APIs to read and write Parquet files,
+//! covering a range of use cases.
 //!
 //! Please see the [parquet crates.io](https://crates.io/crates/parquet)
 //! page for feature flags and tips to improve performance.
 //!
-//! # Getting Started
-//! Start with some examples:
+//! # Format Overview
+//!
+//! Parquet is a columnar format, which means that unlike row formats like [CSV], values are
+//! iterated along columns instead of rows. Parquet is similar in spirit to [Arrow], with Parquet
+//! focusing on storage efficiency whereas Arrow prioritizes compute efficiency.
+//!
+//! Parquet files are partitioned for scalability. Each file contains metadata,
+//! along with zero or more "row groups", each row group containing one or
+//! more columns. The APIs in this crate reflect this structure.
+//!
+//! Parquet distinguishes between "logical" and "physical" data types.
+//! For instance, strings (logical type) are stored as byte arrays (physical type).
+//! Likewise, temporal types like dates, times, timestamps, etc. (logical type)
+//! are stored as integers (physical type). This crate exposes both kinds of types.
+//!
+//! For more details about the Parquet format, see the
+//! [Parquet spec](https://github.com/apache/parquet-format/blob/master/README.md#file-format).
+//!
+//! # APIs
+//!
+//! This crate exposes a number of APIs for different use-cases.
+//!
+//! ## Read/Write Arrow
 //!
-//! 1. [mod@file] for reading and writing parquet files using the
-//! [ColumnReader](column::reader::ColumnReader) API.
+//! The [`arrow`] module allows reading and writing Parquet data to/from Arrow `RecordBatch`.
+//! This makes for a simple and performant interface to parquet data, whilst allowing workloads
+//! to leverage the wide range of data transforms provided by the [arrow] crate, and by the
+//! ecosystem of libraries and services using [Arrow] as an interop format.
 //!
-//! 2. [arrow] for reading and writing parquet files to Arrow
-//! `RecordBatch`es
+//! ## Read/Write Arrow Async
 //!
-//! 3. [arrow::async_reader] and [arrow::async_writer] for `async` reading
-//! and writing parquet files to Arrow `RecordBatch`es (requires the `async` feature).
+//! When the `async` feature is enabled, [`arrow::async_reader`] and [`arrow::async_writer`]
+//! provide the ability to read and write [`arrow`] data asynchronously. Additionally, with the
+//! `object_store` feature is enabled, [`ParquetObjectReader`](arrow::async_reader::ParquetObjectReader)
+//! provides efficient integration with object storage services such as S3 via the [object_store]
+//! crate, automatically optimizing IO based on any predicates or projections provided.
+//!
+//! ## Read/Write Parquet
+//!
+//! Workloads needing finer-grained control, or looking to not take a dependency on arrow,
+//! can use the lower-level APIs in [`mod@file`]. These APIs expose the underlying parquet
+//! data model, and therefore require knowledge of the underlying parquet format,
+//! including the details of [Dremel] record shredding and [Logical Types]. Most workloads
+//! should prefer the arrow interfaces.
+//!
+//! [arrow]: https://docs.rs/arrow/latest/arrow/index.html
+//! [Arrow]: https://arrow.apache.org/
+//! [CSV]: https://en.wikipedia.org/wiki/Comma-separated_values
+//! [Dremel]: https://research.google/pubs/pub36632/
+//! [Logical Types]: https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
+//! [object_store]: https://docs.rs/object_store/latest/object_store/
 
 /// Defines a an item with an experimental public API
 ///
@@ -74,10 +117,6 @@ pub mod data_type;
 #[doc(hidden)]
 pub use self::encodings::{decoding, encoding};
 
-#[cfg(feature = "experimental")]
-#[doc(hidden)]
-pub use self::util::memory;
-
 experimental!(#[macro_use] mod util);
 #[cfg(feature = "arrow")]
 pub mod arrow;
@@ -88,3 +127,5 @@ pub mod bloom_filter;
 pub mod file;
 pub mod record;
 pub mod schema;
+
+pub mod thrift;
diff --git a/parquet/src/record/api.rs b/parquet/src/record/api.rs
index ccff233c21db..e4f473562e01 100644
--- a/parquet/src/record/api.rs
+++ b/parquet/src/record/api.rs
@@ -20,9 +20,11 @@
 use std::fmt;
 
 use chrono::{TimeZone, Utc};
+use half::f16;
+use num::traits::Float;
 use num_bigint::{BigInt, Sign};
 
-use crate::basic::{ConvertedType, Type as PhysicalType};
+use crate::basic::{ConvertedType, LogicalType, Type as PhysicalType};
 use crate::data_type::{ByteArray, Decimal, Int96};
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::ColumnDescPtr;
@@ -121,6 +123,7 @@ pub trait RowAccessor {
     fn get_ushort(&self, i: usize) -> Result<u16>;
     fn get_uint(&self, i: usize) -> Result<u32>;
     fn get_ulong(&self, i: usize) -> Result<u64>;
+    fn get_float16(&self, i: usize) -> Result<f16>;
     fn get_float(&self, i: usize) -> Result<f32>;
     fn get_double(&self, i: usize) -> Result<f64>;
     fn get_timestamp_millis(&self, i: usize) -> Result<i64>;
@@ -215,6 +218,8 @@ impl RowAccessor for Row {
 
     row_primitive_accessor!(get_ulong, ULong, u64);
 
+    row_primitive_accessor!(get_float16, Float16, f16);
+
     row_primitive_accessor!(get_float, Float, f32);
 
     row_primitive_accessor!(get_double, Double, f64);
@@ -293,6 +298,7 @@ pub trait ListAccessor {
     fn get_ushort(&self, i: usize) -> Result<u16>;
     fn get_uint(&self, i: usize) -> Result<u32>;
     fn get_ulong(&self, i: usize) -> Result<u64>;
+    fn get_float16(&self, i: usize) -> Result<f16>;
     fn get_float(&self, i: usize) -> Result<f32>;
     fn get_double(&self, i: usize) -> Result<f64>;
     fn get_timestamp_millis(&self, i: usize) -> Result<i64>;
@@ -358,6 +364,8 @@ impl ListAccessor for List {
 
     list_primitive_accessor!(get_ulong, ULong, u64);
 
+    list_primitive_accessor!(get_float16, Float16, f16);
+
     list_primitive_accessor!(get_float, Float, f32);
 
     list_primitive_accessor!(get_double, Double, f64);
@@ -449,6 +457,8 @@ impl<'a> ListAccessor for MapList<'a> {
 
     map_list_primitive_accessor!(get_ulong, ULong, u64);
 
+    map_list_primitive_accessor!(get_float16, Float16, f16);
+
     map_list_primitive_accessor!(get_float, Float, f32);
 
     map_list_primitive_accessor!(get_double, Double, f64);
@@ -510,6 +520,8 @@ pub enum Field {
     UInt(u32),
     // Unsigned integer UINT_64.
     ULong(u64),
+    /// IEEE 16-bit floating point value.
+    Float16(f16),
     /// IEEE 32-bit floating point value.
     Float(f32),
     /// IEEE 64-bit floating point value.
@@ -552,6 +564,7 @@ impl Field {
             Field::UShort(_) => "UShort",
             Field::UInt(_) => "UInt",
             Field::ULong(_) => "ULong",
+            Field::Float16(_) => "Float16",
             Field::Float(_) => "Float",
             Field::Double(_) => "Double",
             Field::Decimal(_) => "Decimal",
@@ -636,20 +649,20 @@ impl Field {
         Field::Double(value)
     }
 
-    /// Converts Parquet BYTE_ARRAY type with converted type into either UTF8 string or
-    /// array of bytes.
+    /// Converts Parquet BYTE_ARRAY type with converted type into a UTF8
+    /// string, decimal, float16, or an array of bytes.
     #[inline]
     pub fn convert_byte_array(descr: &ColumnDescPtr, value: ByteArray) -> Result<Self> {
         let field = match descr.physical_type() {
             PhysicalType::BYTE_ARRAY => match descr.converted_type() {
                 ConvertedType::UTF8 | ConvertedType::ENUM | ConvertedType::JSON => {
-                    let value =
-                        String::from_utf8(value.data().to_vec()).map_err(|e| {
-                            general_err!(
-                                "Error reading BYTE_ARRAY as String. Bytes: {:?} Error: {:?}",
-                                value.data(), e
-                            )
-                        })?;
+                    let value = String::from_utf8(value.data().to_vec()).map_err(|e| {
+                        general_err!(
+                            "Error reading BYTE_ARRAY as String. Bytes: {:?} Error: {:?}",
+                            value.data(),
+                            e
+                        )
+                    })?;
                     Field::Str(value)
                 }
                 ConvertedType::BSON | ConvertedType::NONE => Field::Bytes(value),
@@ -666,6 +679,16 @@ impl Field {
                     descr.type_precision(),
                     descr.type_scale(),
                 )),
+                ConvertedType::NONE if descr.logical_type() == Some(LogicalType::Float16) => {
+                    if value.len() != 2 {
+                        return Err(general_err!(
+                            "Error reading FIXED_LEN_BYTE_ARRAY as FLOAT16. Length must be 2, got {}",
+                            value.len()
+                        ));
+                    }
+                    let bytes = [value.data()[0], value.data()[1]];
+                    Field::Float16(f16::from_le_bytes(bytes))
+                }
                 ConvertedType::NONE => Field::Bytes(value),
                 _ => nyi!(descr, value),
             },
@@ -690,6 +713,9 @@ impl Field {
             Field::UShort(n) => Value::Number(serde_json::Number::from(*n)),
             Field::UInt(n) => Value::Number(serde_json::Number::from(*n)),
             Field::ULong(n) => Value::Number(serde_json::Number::from(*n)),
+            Field::Float16(n) => serde_json::Number::from_f64(f64::from(*n))
+                .map(Value::Number)
+                .unwrap_or(Value::Null),
             Field::Float(n) => serde_json::Number::from_f64(f64::from(*n))
                 .map(Value::Number)
                 .unwrap_or(Value::Null),
@@ -700,12 +726,8 @@ impl Field {
             Field::Str(s) => Value::String(s.to_owned()),
             Field::Bytes(b) => Value::String(BASE64_STANDARD.encode(b.data())),
             Field::Date(d) => Value::String(convert_date_to_string(*d)),
-            Field::TimestampMillis(ts) => {
-                Value::String(convert_timestamp_millis_to_string(*ts))
-            }
-            Field::TimestampMicros(ts) => {
-                Value::String(convert_timestamp_micros_to_string(*ts))
-            }
+            Field::TimestampMillis(ts) => Value::String(convert_timestamp_millis_to_string(*ts)),
+            Field::TimestampMicros(ts) => Value::String(convert_timestamp_micros_to_string(*ts)),
             Field::Group(row) => row.to_json_value(),
             Field::ListInternal(fields) => {
                 Value::Array(fields.elements.iter().map(|f| f.to_json_value()).collect())
@@ -740,6 +762,15 @@ impl fmt::Display for Field {
             Field::UShort(value) => write!(f, "{value}"),
             Field::UInt(value) => write!(f, "{value}"),
             Field::ULong(value) => write!(f, "{value}"),
+            Field::Float16(value) => {
+                if !value.is_finite() {
+                    write!(f, "{value}")
+                } else if value.trunc() == value {
+                    write!(f, "{value}.0")
+                } else {
+                    write!(f, "{value}")
+                }
+            }
             Field::Float(value) => {
                 if !(1e-15..=1e19).contains(&value) {
                     write!(f, "{value:E}")
@@ -955,8 +986,7 @@ mod tests {
         let row = Field::convert_int32(&descr, 14611);
         assert_eq!(row, Field::Date(14611));
 
-        let descr =
-            make_column_descr![PhysicalType::INT32, ConvertedType::DECIMAL, 0, 8, 2];
+        let descr = make_column_descr![PhysicalType::INT32, ConvertedType::DECIMAL, 0, 8, 2];
         let row = Field::convert_int32(&descr, 444);
         assert_eq!(row, Field::Decimal(Decimal::from_i32(444, 8, 2)));
     }
@@ -971,13 +1001,11 @@ mod tests {
         let row = Field::convert_int64(&descr, 78239823);
         assert_eq!(row, Field::ULong(78239823));
 
-        let descr =
-            make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MILLIS];
+        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MILLIS];
         let row = Field::convert_int64(&descr, 1541186529153);
         assert_eq!(row, Field::TimestampMillis(1541186529153));
 
-        let descr =
-            make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MICROS];
+        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::TIMESTAMP_MICROS];
         let row = Field::convert_int64(&descr, 1541186529153123);
         assert_eq!(row, Field::TimestampMicros(1541186529153123));
 
@@ -985,8 +1013,7 @@ mod tests {
         let row = Field::convert_int64(&descr, 2222);
         assert_eq!(row, Field::Long(2222));
 
-        let descr =
-            make_column_descr![PhysicalType::INT64, ConvertedType::DECIMAL, 0, 8, 2];
+        let descr = make_column_descr![PhysicalType::INT64, ConvertedType::DECIMAL, 0, 8, 2];
         let row = Field::convert_int64(&descr, 3333);
         assert_eq!(row, Field::Decimal(Decimal::from_i64(3333, 8, 2)));
     }
@@ -1054,8 +1081,7 @@ mod tests {
         assert_eq!(row.unwrap(), Field::Bytes(value));
 
         // DECIMAL
-        let descr =
-            make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::DECIMAL, 0, 8, 2];
+        let descr = make_column_descr![PhysicalType::BYTE_ARRAY, ConvertedType::DECIMAL, 0, 8, 2];
         let value = ByteArray::from(vec![207, 200]);
         let row = Field::convert_byte_array(&descr, value.clone());
         assert_eq!(
@@ -1078,6 +1104,24 @@ mod tests {
             Field::Decimal(Decimal::from_bytes(value, 17, 5))
         );
 
+        // FLOAT16
+        let descr = {
+            let tpe = PrimitiveTypeBuilder::new("col", PhysicalType::FIXED_LEN_BYTE_ARRAY)
+                .with_logical_type(Some(LogicalType::Float16))
+                .with_length(2)
+                .build()
+                .unwrap();
+            Arc::new(ColumnDescriptor::new(
+                Arc::new(tpe),
+                0,
+                0,
+                ColumnPath::from("col"),
+            ))
+        };
+        let value = ByteArray::from(f16::PI);
+        let row = Field::convert_byte_array(&descr, value.clone());
+        assert_eq!(row.unwrap(), Field::Float16(f16::PI));
+
         // NONE (FIXED_LEN_BYTE_ARRAY)
         let descr = make_column_descr![
             PhysicalType::FIXED_LEN_BYTE_ARRAY,
@@ -1154,6 +1198,18 @@ mod tests {
         check_datetime_conversion(2014, 11, 28, 21, 15, 12);
     }
 
+    #[test]
+    fn test_convert_float16_to_string() {
+        assert_eq!(format!("{}", Field::Float16(f16::ONE)), "1.0");
+        assert_eq!(format!("{}", Field::Float16(f16::PI)), "3.140625");
+        assert_eq!(format!("{}", Field::Float16(f16::MAX)), "65504.0");
+        assert_eq!(format!("{}", Field::Float16(f16::NAN)), "NaN");
+        assert_eq!(format!("{}", Field::Float16(f16::INFINITY)), "inf");
+        assert_eq!(format!("{}", Field::Float16(f16::NEG_INFINITY)), "-inf");
+        assert_eq!(format!("{}", Field::Float16(f16::ZERO)), "0.0");
+        assert_eq!(format!("{}", Field::Float16(f16::NEG_ZERO)), "-0.0");
+    }
+
     #[test]
     fn test_convert_float_to_string() {
         assert_eq!(format!("{}", Field::Float(1.0)), "1.0");
@@ -1227,6 +1283,7 @@ mod tests {
         assert_eq!(format!("{}", Field::UShort(2)), "2");
         assert_eq!(format!("{}", Field::UInt(3)), "3");
         assert_eq!(format!("{}", Field::ULong(4)), "4");
+        assert_eq!(format!("{}", Field::Float16(f16::E)), "2.71875");
         assert_eq!(format!("{}", Field::Float(5.0)), "5.0");
         assert_eq!(format!("{}", Field::Float(5.1234)), "5.1234");
         assert_eq!(format!("{}", Field::Double(6.0)), "6.0");
@@ -1293,6 +1350,7 @@ mod tests {
         assert!(Field::UShort(2).is_primitive());
         assert!(Field::UInt(3).is_primitive());
         assert!(Field::ULong(4).is_primitive());
+        assert!(Field::Float16(f16::E).is_primitive());
         assert!(Field::Float(5.0).is_primitive());
         assert!(Field::Float(5.1234).is_primitive());
         assert!(Field::Double(6.0).is_primitive());
@@ -1353,6 +1411,7 @@ mod tests {
             ("15".to_string(), Field::TimestampMillis(1262391174000)),
             ("16".to_string(), Field::TimestampMicros(1262391174000000)),
             ("17".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
+            ("18".to_string(), Field::Float16(f16::PI)),
         ]);
 
         assert_eq!("null", format!("{}", row.fmt(0)));
@@ -1379,6 +1438,7 @@ mod tests {
             format!("{}", row.fmt(16))
         );
         assert_eq!("0.04", format!("{}", row.fmt(17)));
+        assert_eq!("3.140625", format!("{}", row.fmt(18)));
     }
 
     #[test]
@@ -1438,6 +1498,7 @@ mod tests {
                 Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
             ),
             ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
+            ("p".to_string(), Field::Float16(f16::from_f32(9.1))),
         ]);
 
         assert!(!row.get_bool(1).unwrap());
@@ -1454,6 +1515,7 @@ mod tests {
         assert_eq!("abc", row.get_string(12).unwrap());
         assert_eq!(5, row.get_bytes(13).unwrap().len());
         assert_eq!(7, row.get_decimal(14).unwrap().precision());
+        assert!((f16::from_f32(9.1) - row.get_float16(15).unwrap()).abs() < f16::EPSILON);
     }
 
     #[test]
@@ -1478,6 +1540,7 @@ mod tests {
                 Field::Bytes(ByteArray::from(vec![1, 2, 3, 4, 5])),
             ),
             ("o".to_string(), Field::Decimal(Decimal::from_i32(4, 7, 2))),
+            ("p".to_string(), Field::Float16(f16::from_f32(9.1))),
         ]);
 
         for i in 0..row.len() {
@@ -1592,6 +1655,9 @@ mod tests {
         let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
         assert_eq!(7, list.get_ulong(1).unwrap());
 
+        let list = make_list(vec![Field::Float16(f16::PI)]);
+        assert!((f16::PI - list.get_float16(0).unwrap()).abs() < f16::EPSILON);
+
         let list = make_list(vec![
             Field::Float(8.1),
             Field::Float(9.2),
@@ -1642,6 +1708,9 @@ mod tests {
         let list = make_list(vec![Field::ULong(6), Field::ULong(7)]);
         assert!(list.get_float(1).is_err());
 
+        let list = make_list(vec![Field::Float16(f16::PI)]);
+        assert!(list.get_string(0).is_err());
+
         let list = make_list(vec![
             Field::Float(8.1),
             Field::Float(9.2),
@@ -1777,6 +1846,10 @@ mod tests {
             Field::ULong(4).to_json_value(),
             Value::Number(serde_json::Number::from(4))
         );
+        assert_eq!(
+            Field::Float16(f16::from_f32(5.0)).to_json_value(),
+            Value::Number(serde_json::Number::from_f64(5.0).unwrap())
+        );
         assert_eq!(
             Field::Float(5.0).to_json_value(),
             Value::Number(serde_json::Number::from_f64(5.0).unwrap())
@@ -1825,11 +1898,7 @@ mod tests {
             serde_json::json!({"X": 1, "Y": 2.2, "Z": "abc"})
         );
 
-        let row = Field::ListInternal(make_list(vec![
-            Field::Int(1),
-            Field::Int(12),
-            Field::Null,
-        ]));
+        let row = Field::ListInternal(make_list(vec![Field::Int(1), Field::Int(12), Field::Null]));
         let array = vec![
             Value::Number(serde_json::Number::from(1)),
             Value::Number(serde_json::Number::from(12)),
diff --git a/parquet/src/record/mod.rs b/parquet/src/record/mod.rs
index ce83cfa2b14a..f40e91418da1 100644
--- a/parquet/src/record/mod.rs
+++ b/parquet/src/record/mod.rs
@@ -19,13 +19,14 @@
 
 mod api;
 pub mod reader;
+mod record_reader;
 mod record_writer;
 mod triplet;
 
 pub use self::{
     api::{
-        Field, List, ListAccessor, Map, MapAccessor, Row, RowAccessor, RowColumnIter,
-        RowFormatter,
+        Field, List, ListAccessor, Map, MapAccessor, Row, RowAccessor, RowColumnIter, RowFormatter,
     },
+    record_reader::RecordReader,
     record_writer::RecordWriter,
 };
diff --git a/parquet/src/record/reader.rs b/parquet/src/record/reader.rs
index 1069eab15f23..feaa8055e2dd 100644
--- a/parquet/src/record/reader.rs
+++ b/parquet/src/record/reader.rs
@@ -82,14 +82,8 @@ impl TreeBuilder {
         let mut path = Vec::new();
 
         for field in descr.root_schema().get_fields() {
-            let reader = self.reader_tree(
-                field.clone(),
-                &mut path,
-                0,
-                0,
-                &paths,
-                row_group_reader,
-            )?;
+            let reader =
+                self.reader_tree(field.clone(), &mut path, 0, 0, &paths, row_group_reader)?;
             readers.push(reader);
         }
 
@@ -152,11 +146,7 @@ impl TreeBuilder {
             match field.get_basic_info().converted_type() {
                 // List types
                 ConvertedType::LIST => {
-                    assert_eq!(
-                        field.get_fields().len(),
-                        1,
-                        "Invalid list type {field:?}"
-                    );
+                    assert_eq!(field.get_fields().len(), 1, "Invalid list type {field:?}");
 
                     let repeated_field = field.get_fields()[0].clone();
                     assert_eq!(
@@ -208,11 +198,7 @@ impl TreeBuilder {
                 }
                 // Map types (key-value pairs)
                 ConvertedType::MAP | ConvertedType::MAP_KEY_VALUE => {
-                    assert_eq!(
-                        field.get_fields().len(),
-                        1,
-                        "Invalid map type: {field:?}"
-                    );
+                    assert_eq!(field.get_fields().len(), 1, "Invalid map type: {field:?}");
                     assert!(
                         !field.get_fields()[0].is_primitive(),
                         "Invalid map type: {field:?}"
@@ -404,8 +390,7 @@ impl Reader {
             Reader::GroupReader(_, _, ref mut readers) => {
                 let mut fields = Vec::new();
                 for reader in readers {
-                    fields
-                        .push((String::from(reader.field_name()), reader.read_field()?));
+                    fields.push((String::from(reader.field_name()), reader.read_field()?));
                 }
                 Ok(make_row(fields))
             }
@@ -436,10 +421,7 @@ impl Reader {
                     if reader.repetition() != Repetition::OPTIONAL
                         || reader.current_def_level() > def_level
                     {
-                        fields.push((
-                            String::from(reader.field_name()),
-                            reader.read_field()?,
-                        ));
+                        fields.push((String::from(reader.field_name()), reader.read_field()?));
                     } else {
                         reader.advance_columns();
                         fields.push((String::from(reader.field_name()), Field::Null));
@@ -471,13 +453,7 @@ impl Reader {
                 }
                 Field::ListInternal(make_list(elements))
             }
-            Reader::KeyValueReader(
-                _,
-                def_level,
-                rep_level,
-                ref mut keys,
-                ref mut values,
-            ) => {
+            Reader::KeyValueReader(_, def_level, rep_level, ref mut keys, ref mut values) => {
                 let mut pairs = Vec::new();
                 loop {
                     if keys.current_def_level() > def_level {
@@ -633,9 +609,20 @@ impl<'a> Either<'a> {
     }
 }
 
-/// Iterator of [`Row`]s.
-/// It is used either for a single row group to iterate over data in that row group, or
-/// an entire file with auto buffering of all row groups.
+/// Access parquet data as an iterator of [`Row`]
+///
+/// # Caveats
+///
+/// Parquet stores data in a columnar fashion using [Dremel] encoding, and is therefore highly
+/// optimised for reading data by column, not row. As a consequence applications concerned with
+/// performance should prefer the columnar arrow or [ColumnReader] APIs.
+///
+/// Additionally the current implementation does not correctly handle repeated fields ([#2394]),
+/// and workloads looking to handle such schema should use the other APIs.
+///
+/// [#2394]: https://github.com/apache/arrow-rs/issues/2394
+/// [ColumnReader]: crate::file::reader::RowGroupReader::get_column_reader
+/// [Dremel]: https://research.google/pubs/pub36632/
 pub struct RowIter<'a> {
     descr: SchemaDescPtr,
     tree_builder: TreeBuilder,
@@ -672,19 +659,14 @@ impl<'a> RowIter<'a> {
     /// file.
     pub fn from_file(proj: Option<Type>, reader: &'a dyn FileReader) -> Result<Self> {
         let either = Either::Left(reader);
-        let descr = Self::get_proj_descr(
-            proj,
-            reader.metadata().file_metadata().schema_descr_ptr(),
-        )?;
+        let descr =
+            Self::get_proj_descr(proj, reader.metadata().file_metadata().schema_descr_ptr())?;
 
         Ok(Self::new(Some(either), None, descr))
     }
 
     /// Creates iterator of [`Row`]s for a specific row group.
-    pub fn from_row_group(
-        proj: Option<Type>,
-        reader: &'a dyn RowGroupReader,
-    ) -> Result<Self> {
+    pub fn from_row_group(proj: Option<Type>, reader: &'a dyn RowGroupReader) -> Result<Self> {
         let descr = Self::get_proj_descr(proj, reader.metadata().schema_descr_ptr())?;
         let tree_builder = Self::tree_builder();
         let row_iter = tree_builder.as_iter(descr.clone(), reader)?;
@@ -730,10 +712,7 @@ impl<'a> RowIter<'a> {
     /// Helper method to get schema descriptor for projected schema.
     /// If projection is None, then full schema is returned.
     #[inline]
-    fn get_proj_descr(
-        proj: Option<Type>,
-        root_descr: SchemaDescPtr,
-    ) -> Result<SchemaDescPtr> {
+    fn get_proj_descr(proj: Option<Type>, root_descr: SchemaDescPtr) -> Result<SchemaDescPtr> {
         match proj {
             Some(projection) => {
                 // check if projection is part of file schema
@@ -747,6 +726,12 @@ impl<'a> RowIter<'a> {
         }
     }
 
+    /// Sets batch size for this row iter.
+    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
+        self.tree_builder = self.tree_builder.with_batch_size(batch_size);
+        self
+    }
+
     /// Returns common tree builder, so the same settings are applied to both iterators
     /// from file reader and row group.
     #[inline]
@@ -993,17 +978,11 @@ mod tests {
                                     list![
                                         group![
                                             ("E".to_string(), Field::Int(10)),
-                                            (
-                                                "F".to_string(),
-                                                Field::Str("aaa".to_string())
-                                            )
+                                            ("F".to_string(), Field::Str("aaa".to_string()))
                                         ],
                                         group![
                                             ("E".to_string(), Field::Int(-10)),
-                                            (
-                                                "F".to_string(),
-                                                Field::Str("bbb".to_string())
-                                            )
+                                            ("F".to_string(), Field::Str("bbb".to_string()))
                                         ]
                                     ],
                                     list![group![
@@ -1083,10 +1062,7 @@ mod tests {
                                         ],
                                         group![
                                             ("E".to_string(), Field::Int(10)),
-                                            (
-                                                "F".to_string(),
-                                                Field::Str("aaa".to_string())
-                                            )
+                                            ("F".to_string(), Field::Str("aaa".to_string()))
                                         ],
                                         group![
                                             ("E".to_string(), Field::Null),
@@ -1094,10 +1070,7 @@ mod tests {
                                         ],
                                         group![
                                             ("E".to_string(), Field::Int(-10)),
-                                            (
-                                                "F".to_string(),
-                                                Field::Str("bbb".to_string())
-                                            )
+                                            ("F".to_string(), Field::Str("bbb".to_string()))
                                         ],
                                         group![
                                             ("E".to_string(), Field::Null),
@@ -1107,10 +1080,7 @@ mod tests {
                                     list![
                                         group![
                                             ("E".to_string(), Field::Int(11)),
-                                            (
-                                                "F".to_string(),
-                                                Field::Str("c".to_string())
-                                            )
+                                            ("F".to_string(), Field::Str("c".to_string()))
                                         ],
                                         Field::Null
                                     ],
@@ -1134,10 +1104,7 @@ mod tests {
                                 ),
                                 (
                                     Field::Str("g2".to_string()),
-                                    group![(
-                                        "H".to_string(),
-                                        group![("i".to_string(), list![])]
-                                    )]
+                                    group![("H".to_string(), group![("i".to_string(), list![])])]
                                 ),
                                 (Field::Str("g3".to_string()), Field::Null),
                                 (
@@ -1271,8 +1238,7 @@ mod tests {
       }
     ";
         let schema = parse_message_type(schema).unwrap();
-        let rows =
-            test_file_reader_rows("nested_maps.snappy.parquet", Some(schema)).unwrap();
+        let rows = test_file_reader_rows("nested_maps.snappy.parquet", Some(schema)).unwrap();
         let expected_rows = vec![
             row![
                 ("c".to_string(), Field::Double(1.0)),
@@ -1339,8 +1305,7 @@ mod tests {
       }
     ";
         let schema = parse_message_type(schema).unwrap();
-        let rows =
-            test_file_reader_rows("nested_maps.snappy.parquet", Some(schema)).unwrap();
+        let rows = test_file_reader_rows("nested_maps.snappy.parquet", Some(schema)).unwrap();
         let expected_rows = vec![
             row![(
                 "a".to_string(),
@@ -1406,8 +1371,7 @@ mod tests {
       }
     ";
         let schema = parse_message_type(schema).unwrap();
-        let rows =
-            test_file_reader_rows("nested_lists.snappy.parquet", Some(schema)).unwrap();
+        let rows = test_file_reader_rows("nested_lists.snappy.parquet", Some(schema)).unwrap();
         let expected_rows = vec![
             row![(
                 "a".to_string(),
diff --git a/parquet/src/record/record_reader.rs b/parquet/src/record/record_reader.rs
new file mode 100644
index 000000000000..bcfeb95dcdf4
--- /dev/null
+++ b/parquet/src/record/record_reader.rs
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use super::super::errors::ParquetError;
+use super::super::file::reader::RowGroupReader;
+
+/// read up to `max_records` records from `row_group_reader` into `self`
+/// The type parameter `T` is used to work around the rust orphan rule
+/// when implementing on types such as `Vec<T>`.
+pub trait RecordReader<T> {
+    fn read_from_row_group(
+        &mut self,
+        row_group_reader: &mut dyn RowGroupReader,
+        num_records: usize,
+    ) -> Result<(), ParquetError>;
+}
diff --git a/parquet/src/record/record_writer.rs b/parquet/src/record/record_writer.rs
index 62099051f513..0b2b95ef7dea 100644
--- a/parquet/src/record/record_writer.rs
+++ b/parquet/src/record/record_writer.rs
@@ -20,6 +20,10 @@ use crate::schema::types::TypePtr;
 use super::super::errors::ParquetError;
 use super::super::file::writer::SerializedRowGroupWriter;
 
+/// `write_to_row_group` writes from `self` into `row_group_writer`
+/// `schema` builds the schema used by `row_group_writer`
+/// The type parameter `T` is used to work around the rust orphan rule
+/// when implementing on types such as `&[T]`.
 pub trait RecordWriter<T> {
     fn write_to_row_group<W: std::io::Write + Send>(
         &self,
diff --git a/parquet/src/record/triplet.rs b/parquet/src/record/triplet.rs
index 1d3488bf2d63..7647b23e28d7 100644
--- a/parquet/src/record/triplet.rs
+++ b/parquet/src/record/triplet.rs
@@ -58,32 +58,30 @@ impl TripletIter {
     /// Creates new triplet for column reader
     pub fn new(descr: ColumnDescPtr, reader: ColumnReader, batch_size: usize) -> Self {
         match descr.physical_type() {
-            PhysicalType::BOOLEAN => TripletIter::BoolTripletIter(TypedTripletIter::new(
-                descr, batch_size, reader,
-            )),
-            PhysicalType::INT32 => TripletIter::Int32TripletIter(TypedTripletIter::new(
-                descr, batch_size, reader,
-            )),
-            PhysicalType::INT64 => TripletIter::Int64TripletIter(TypedTripletIter::new(
-                descr, batch_size, reader,
-            )),
-            PhysicalType::INT96 => TripletIter::Int96TripletIter(TypedTripletIter::new(
-                descr, batch_size, reader,
-            )),
-            PhysicalType::FLOAT => TripletIter::FloatTripletIter(TypedTripletIter::new(
-                descr, batch_size, reader,
-            )),
-            PhysicalType::DOUBLE => TripletIter::DoubleTripletIter(
-                TypedTripletIter::new(descr, batch_size, reader),
-            ),
-            PhysicalType::BYTE_ARRAY => TripletIter::ByteArrayTripletIter(
+            PhysicalType::BOOLEAN => {
+                TripletIter::BoolTripletIter(TypedTripletIter::new(descr, batch_size, reader))
+            }
+            PhysicalType::INT32 => {
+                TripletIter::Int32TripletIter(TypedTripletIter::new(descr, batch_size, reader))
+            }
+            PhysicalType::INT64 => {
+                TripletIter::Int64TripletIter(TypedTripletIter::new(descr, batch_size, reader))
+            }
+            PhysicalType::INT96 => {
+                TripletIter::Int96TripletIter(TypedTripletIter::new(descr, batch_size, reader))
+            }
+            PhysicalType::FLOAT => {
+                TripletIter::FloatTripletIter(TypedTripletIter::new(descr, batch_size, reader))
+            }
+            PhysicalType::DOUBLE => {
+                TripletIter::DoubleTripletIter(TypedTripletIter::new(descr, batch_size, reader))
+            }
+            PhysicalType::BYTE_ARRAY => {
+                TripletIter::ByteArrayTripletIter(TypedTripletIter::new(descr, batch_size, reader))
+            }
+            PhysicalType::FIXED_LEN_BYTE_ARRAY => TripletIter::FixedLenByteArrayTripletIter(
                 TypedTripletIter::new(descr, batch_size, reader),
             ),
-            PhysicalType::FIXED_LEN_BYTE_ARRAY => {
-                TripletIter::FixedLenByteArrayTripletIter(TypedTripletIter::new(
-                    descr, batch_size, reader,
-                ))
-            }
         }
     }
 
@@ -159,16 +157,13 @@ impl TripletIter {
             TripletIter::DoubleTripletIter(ref typed) => {
                 Field::convert_double(typed.column_descr(), *typed.current_value())
             }
-            TripletIter::ByteArrayTripletIter(ref typed) => Field::convert_byte_array(
+            TripletIter::ByteArrayTripletIter(ref typed) => {
+                Field::convert_byte_array(typed.column_descr(), typed.current_value().clone())?
+            }
+            TripletIter::FixedLenByteArrayTripletIter(ref typed) => Field::convert_byte_array(
                 typed.column_descr(),
-                typed.current_value().clone(),
+                typed.current_value().clone().into(),
             )?,
-            TripletIter::FixedLenByteArrayTripletIter(ref typed) => {
-                Field::convert_byte_array(
-                    typed.column_descr(),
-                    typed.current_value().clone().into(),
-                )?
-            }
         };
         Ok(field)
     }
@@ -371,8 +366,7 @@ mod tests {
     #[test]
     #[should_panic(expected = "Expected positive batch size, found: 0")]
     fn test_triplet_zero_batch_size() {
-        let column_path =
-            ColumnPath::from(vec!["b_struct".to_string(), "b_c_int".to_string()]);
+        let column_path = ColumnPath::from(vec!["b_struct".to_string(), "b_c_int".to_string()]);
         test_column_in_file("nulls.snappy.parquet", 0, &column_path, &[], &[], &[]);
     }
 
diff --git a/parquet/src/schema/parser.rs b/parquet/src/schema/parser.rs
index d589f8c1100a..dcef11aa66d4 100644
--- a/parquet/src/schema/parser.rs
+++ b/parquet/src/schema/parser.rs
@@ -44,9 +44,7 @@
 
 use std::sync::Arc;
 
-use crate::basic::{
-    ConvertedType, LogicalType, Repetition, TimeUnit, Type as PhysicalType,
-};
+use crate::basic::{ConvertedType, LogicalType, Repetition, TimeUnit, Type as PhysicalType};
 use crate::errors::{ParquetError, Result};
 use crate::schema::types::{Type, TypePtr};
 
@@ -153,11 +151,7 @@ fn assert_token(token: Option<&str>, expected: &str) -> Result<()> {
 
 // Utility function to parse i32 or return general error.
 #[inline]
-fn parse_i32(
-    value: Option<&str>,
-    not_found_msg: &str,
-    parse_fail_msg: &str,
-) -> Result<i32> {
+fn parse_i32(value: Option<&str>, not_found_msg: &str, parse_fail_msg: &str) -> Result<i32> {
     value
         .ok_or_else(|| general_err!(not_found_msg))
         .and_then(|v| v.parse::<i32>().map_err(|_| general_err!(parse_fail_msg)))
@@ -165,11 +159,7 @@ fn parse_i32(
 
 // Utility function to parse boolean or return general error.
 #[inline]
-fn parse_bool(
-    value: Option<&str>,
-    not_found_msg: &str,
-    parse_fail_msg: &str,
-) -> Result<bool> {
+fn parse_bool(value: Option<&str>, not_found_msg: &str, parse_fail_msg: &str) -> Result<bool> {
     value
         .ok_or_else(|| general_err!(not_found_msg))
         .and_then(|v| {
@@ -238,9 +228,7 @@ impl<'a> Parser<'a> {
             .and_then(|v| v.to_uppercase().parse::<Repetition>())?;
 
         match self.tokenizer.next() {
-            Some(group) if group.to_uppercase() == "GROUP" => {
-                self.add_group_type(Some(repetition))
-            }
+            Some(group) if group.to_uppercase() == "GROUP" => self.add_group_type(Some(repetition)),
             Some(type_string) => {
                 let physical_type = type_string.to_uppercase().parse::<PhysicalType>()?;
                 self.add_primitive_type(repetition, physical_type)
@@ -267,10 +255,9 @@ impl<'a> Parser<'a> {
                     let upper = v.to_uppercase();
                     let logical = upper.parse::<LogicalType>();
                     match logical {
-                        Ok(logical) => Ok((
-                            Some(logical.clone()),
-                            ConvertedType::from(Some(logical)),
-                        )),
+                        Ok(logical) => {
+                            Ok((Some(logical.clone()), ConvertedType::from(Some(logical))))
+                        }
                         Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
                     }
                 })?;
@@ -324,184 +311,187 @@ impl<'a> Parser<'a> {
             .ok_or_else(|| general_err!("Expected name, found None"))?;
 
         // Parse converted type
-        let (logical_type, converted_type, precision, scale) = if let Some("(") =
-            self.tokenizer.next()
-        {
-            let (mut logical, mut converted) = self
-                .tokenizer
-                .next()
-                .ok_or_else(|| {
-                    general_err!("Expected logical or converted type, found None")
-                })
-                .and_then(|v| {
-                    let upper = v.to_uppercase();
-                    let logical = upper.parse::<LogicalType>();
-                    match logical {
-                        Ok(logical) => Ok((
-                            Some(logical.clone()),
-                            ConvertedType::from(Some(logical)),
-                        )),
-                        Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
-                    }
-                })?;
-
-            // Parse precision and scale for decimals
-            let mut precision: i32 = -1;
-            let mut scale: i32 = -1;
-
-            // Parse the concrete logical type
-            if let Some(tpe) = &logical {
-                match tpe {
-                    LogicalType::Decimal { .. } => {
-                        if let Some("(") = self.tokenizer.next() {
-                            precision = parse_i32(
-                                self.tokenizer.next(),
-                                "Expected precision, found None",
-                                "Failed to parse precision for DECIMAL type",
-                            )?;
-                            if let Some(",") = self.tokenizer.next() {
-                                scale = parse_i32(
+        let (logical_type, converted_type, precision, scale) =
+            if let Some("(") = self.tokenizer.next() {
+                let (mut logical, mut converted) = self
+                    .tokenizer
+                    .next()
+                    .ok_or_else(|| general_err!("Expected logical or converted type, found None"))
+                    .and_then(|v| {
+                        let upper = v.to_uppercase();
+                        let logical = upper.parse::<LogicalType>();
+                        match logical {
+                            Ok(logical) => {
+                                Ok((Some(logical.clone()), ConvertedType::from(Some(logical))))
+                            }
+                            Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
+                        }
+                    })?;
+
+                // Parse precision and scale for decimals
+                let mut precision: i32 = -1;
+                let mut scale: i32 = -1;
+
+                // Parse the concrete logical type
+                if let Some(tpe) = &logical {
+                    match tpe {
+                        LogicalType::Decimal { .. } => {
+                            if let Some("(") = self.tokenizer.next() {
+                                precision = parse_i32(
                                     self.tokenizer.next(),
-                                    "Expected scale, found None",
-                                    "Failed to parse scale for DECIMAL type",
+                                    "Expected precision, found None",
+                                    "Failed to parse precision for DECIMAL type",
                                 )?;
-                                assert_token(self.tokenizer.next(), ")")?;
-                            } else {
-                                scale = 0
+                                if let Some(",") = self.tokenizer.next() {
+                                    scale = parse_i32(
+                                        self.tokenizer.next(),
+                                        "Expected scale, found None",
+                                        "Failed to parse scale for DECIMAL type",
+                                    )?;
+                                    assert_token(self.tokenizer.next(), ")")?;
+                                } else {
+                                    scale = 0
+                                }
+                                logical = Some(LogicalType::Decimal { scale, precision });
+                                converted = ConvertedType::from(logical.clone());
                             }
-                            logical = Some(LogicalType::Decimal { scale, precision });
-                            converted = ConvertedType::from(logical.clone());
                         }
-                    }
-                    LogicalType::Time { .. } => {
-                        if let Some("(") = self.tokenizer.next() {
-                            let unit = parse_timeunit(
-                                self.tokenizer.next(),
-                                "Invalid timeunit found",
-                                "Failed to parse timeunit for TIME type",
-                            )?;
-                            if let Some(",") = self.tokenizer.next() {
-                                let is_adjusted_to_u_t_c = parse_bool(
+                        LogicalType::Time { .. } => {
+                            if let Some("(") = self.tokenizer.next() {
+                                let unit = parse_timeunit(
                                     self.tokenizer.next(),
-                                    "Invalid boolean found",
-                                    "Failed to parse timezone info for TIME type",
+                                    "Invalid timeunit found",
+                                    "Failed to parse timeunit for TIME type",
                                 )?;
-                                assert_token(self.tokenizer.next(), ")")?;
-                                logical = Some(LogicalType::Time {
-                                    is_adjusted_to_u_t_c,
-                                    unit,
-                                });
-                                converted = ConvertedType::from(logical.clone());
-                            } else {
-                                // Invalid token for unit
-                                self.tokenizer.backtrack();
+                                if let Some(",") = self.tokenizer.next() {
+                                    let is_adjusted_to_u_t_c = parse_bool(
+                                        self.tokenizer.next(),
+                                        "Invalid boolean found",
+                                        "Failed to parse timezone info for TIME type",
+                                    )?;
+                                    assert_token(self.tokenizer.next(), ")")?;
+                                    logical = Some(LogicalType::Time {
+                                        is_adjusted_to_u_t_c,
+                                        unit,
+                                    });
+                                    converted = ConvertedType::from(logical.clone());
+                                } else {
+                                    // Invalid token for unit
+                                    self.tokenizer.backtrack();
+                                }
                             }
                         }
-                    }
-                    LogicalType::Timestamp { .. } => {
-                        if let Some("(") = self.tokenizer.next() {
-                            let unit = parse_timeunit(
-                                self.tokenizer.next(),
-                                "Invalid timeunit found",
-                                "Failed to parse timeunit for TIMESTAMP type",
-                            )?;
-                            if let Some(",") = self.tokenizer.next() {
-                                let is_adjusted_to_u_t_c = parse_bool(
+                        LogicalType::Timestamp { .. } => {
+                            if let Some("(") = self.tokenizer.next() {
+                                let unit = parse_timeunit(
                                     self.tokenizer.next(),
-                                    "Invalid boolean found",
-                                    "Failed to parse timezone info for TIMESTAMP type",
+                                    "Invalid timeunit found",
+                                    "Failed to parse timeunit for TIMESTAMP type",
                                 )?;
-                                assert_token(self.tokenizer.next(), ")")?;
-                                logical = Some(LogicalType::Timestamp {
-                                    is_adjusted_to_u_t_c,
-                                    unit,
-                                });
-                                converted = ConvertedType::from(logical.clone());
-                            } else {
-                                // Invalid token for unit
-                                self.tokenizer.backtrack();
+                                if let Some(",") = self.tokenizer.next() {
+                                    let is_adjusted_to_u_t_c = parse_bool(
+                                        self.tokenizer.next(),
+                                        "Invalid boolean found",
+                                        "Failed to parse timezone info for TIMESTAMP type",
+                                    )?;
+                                    assert_token(self.tokenizer.next(), ")")?;
+                                    logical = Some(LogicalType::Timestamp {
+                                        is_adjusted_to_u_t_c,
+                                        unit,
+                                    });
+                                    converted = ConvertedType::from(logical.clone());
+                                } else {
+                                    // Invalid token for unit
+                                    self.tokenizer.backtrack();
+                                }
                             }
                         }
-                    }
-                    LogicalType::Integer { .. } => {
-                        if let Some("(") = self.tokenizer.next() {
-                            let bit_width = parse_i32(
-                                self.tokenizer.next(),
-                                "Invalid bit_width found",
-                                "Failed to parse bit_width for INTEGER type",
-                            )? as i8;
-                            match physical_type {
-                                PhysicalType::INT32 => {
-                                    match bit_width {
+                        LogicalType::Integer { .. } => {
+                            if let Some("(") = self.tokenizer.next() {
+                                let bit_width = parse_i32(
+                                    self.tokenizer.next(),
+                                    "Invalid bit_width found",
+                                    "Failed to parse bit_width for INTEGER type",
+                                )? as i8;
+                                match physical_type {
+                                    PhysicalType::INT32 => match bit_width {
                                         8 | 16 | 32 => {}
                                         _ => {
-                                            return Err(general_err!("Incorrect bit width {} for INT32", bit_width))
+                                            return Err(general_err!(
+                                                "Incorrect bit width {} for INT32",
+                                                bit_width
+                                            ))
+                                        }
+                                    },
+                                    PhysicalType::INT64 => {
+                                        if bit_width != 64 {
+                                            return Err(general_err!(
+                                                "Incorrect bit width {} for INT64",
+                                                bit_width
+                                            ));
                                         }
                                     }
-                                }
-                                PhysicalType::INT64 => {
-                                    if bit_width != 64 {
-                                        return Err(general_err!("Incorrect bit width {} for INT64", bit_width))
+                                    _ => {
+                                        return Err(general_err!(
+                                        "Logical type Integer cannot be used with physical type {}",
+                                        physical_type
+                                    ))
                                     }
                                 }
-                                _ => {
-                                    return Err(general_err!("Logical type Integer cannot be used with physical type {}", physical_type))
+                                if let Some(",") = self.tokenizer.next() {
+                                    let is_signed = parse_bool(
+                                        self.tokenizer.next(),
+                                        "Invalid boolean found",
+                                        "Failed to parse is_signed for INTEGER type",
+                                    )?;
+                                    assert_token(self.tokenizer.next(), ")")?;
+                                    logical = Some(LogicalType::Integer {
+                                        bit_width,
+                                        is_signed,
+                                    });
+                                    converted = ConvertedType::from(logical.clone());
+                                } else {
+                                    // Invalid token for unit
+                                    self.tokenizer.backtrack();
                                 }
                             }
-                            if let Some(",") = self.tokenizer.next() {
-                                let is_signed = parse_bool(
-                                    self.tokenizer.next(),
-                                    "Invalid boolean found",
-                                    "Failed to parse is_signed for INTEGER type",
-                                )?;
-                                assert_token(self.tokenizer.next(), ")")?;
-                                logical = Some(LogicalType::Integer {
-                                    bit_width,
-                                    is_signed,
-                                });
-                                converted = ConvertedType::from(logical.clone());
-                            } else {
-                                // Invalid token for unit
-                                self.tokenizer.backtrack();
-                            }
                         }
+                        _ => {}
                     }
-                    _ => {}
-                }
-            } else if converted == ConvertedType::DECIMAL {
-                if let Some("(") = self.tokenizer.next() {
-                    // Parse precision
-                    precision = parse_i32(
-                        self.tokenizer.next(),
-                        "Expected precision, found None",
-                        "Failed to parse precision for DECIMAL type",
-                    )?;
-
-                    // Parse scale
-                    scale = if let Some(",") = self.tokenizer.next() {
-                        parse_i32(
+                } else if converted == ConvertedType::DECIMAL {
+                    if let Some("(") = self.tokenizer.next() {
+                        // Parse precision
+                        precision = parse_i32(
                             self.tokenizer.next(),
-                            "Expected scale, found None",
-                            "Failed to parse scale for DECIMAL type",
-                        )?
+                            "Expected precision, found None",
+                            "Failed to parse precision for DECIMAL type",
+                        )?;
+
+                        // Parse scale
+                        scale = if let Some(",") = self.tokenizer.next() {
+                            parse_i32(
+                                self.tokenizer.next(),
+                                "Expected scale, found None",
+                                "Failed to parse scale for DECIMAL type",
+                            )?
+                        } else {
+                            // Scale is not provided, set it to 0.
+                            self.tokenizer.backtrack();
+                            0
+                        };
+
+                        assert_token(self.tokenizer.next(), ")")?;
                     } else {
-                        // Scale is not provided, set it to 0.
                         self.tokenizer.backtrack();
-                        0
-                    };
-
-                    assert_token(self.tokenizer.next(), ")")?;
-                } else {
-                    self.tokenizer.backtrack();
+                    }
                 }
-            }
 
-            assert_token(self.tokenizer.next(), ")")?;
-            (logical, converted, precision, scale)
-        } else {
-            self.tokenizer.backtrack();
-            (None, ConvertedType::NONE, -1, -1)
-        };
+                assert_token(self.tokenizer.next(), ")")?;
+                (logical, converted, precision, scale)
+            } else {
+                self.tokenizer.backtrack();
+                (None, ConvertedType::NONE, -1, -1)
+            };
 
         // Parse optional id
         let id = if let Some("=") = self.tokenizer.next() {
@@ -605,12 +595,11 @@ mod tests {
         assert_eq!(
             res,
             vec![
-                "message", "schema", "{", "required", "int32", "a", ";", "optional",
-                "binary", "c", "(", "UTF8", ")", ";", "required", "group", "d", "{",
-                "required", "int32", "a", ";", "optional", "binary", "c", "(", "UTF8",
-                ")", ";", "}", "required", "group", "e", "(", "LIST", ")", "{",
-                "repeated", "group", "list", "{", "required", "int32", "element", ";",
-                "}", "}", "}"
+                "message", "schema", "{", "required", "int32", "a", ";", "optional", "binary", "c",
+                "(", "UTF8", ")", ";", "required", "group", "d", "{", "required", "int32", "a",
+                ";", "optional", "binary", "c", "(", "UTF8", ")", ";", "}", "required", "group",
+                "e", "(", "LIST", ")", "{", "repeated", "group", "list", "{", "required", "int32",
+                "element", ";", "}", "}", "}"
             ]
         );
     }
@@ -834,6 +823,7 @@ mod tests {
             message root {
               optional fixed_len_byte_array(5) f1 (DECIMAL(9, 3));
               optional fixed_len_byte_array (16) f2 (DECIMAL (38, 18));
+              optional fixed_len_byte_array (2) f3 (FLOAT16);
             }
         ";
         let message = parse(schema).unwrap();
@@ -841,36 +831,37 @@ mod tests {
         let expected = Type::group_type_builder("root")
             .with_fields(vec![
                 Arc::new(
-                    Type::primitive_type_builder(
-                        "f1",
-                        PhysicalType::FIXED_LEN_BYTE_ARRAY,
-                    )
-                    .with_logical_type(Some(LogicalType::Decimal {
-                        precision: 9,
-                        scale: 3,
-                    }))
-                    .with_converted_type(ConvertedType::DECIMAL)
-                    .with_length(5)
-                    .with_precision(9)
-                    .with_scale(3)
-                    .build()
-                    .unwrap(),
+                    Type::primitive_type_builder("f1", PhysicalType::FIXED_LEN_BYTE_ARRAY)
+                        .with_logical_type(Some(LogicalType::Decimal {
+                            precision: 9,
+                            scale: 3,
+                        }))
+                        .with_converted_type(ConvertedType::DECIMAL)
+                        .with_length(5)
+                        .with_precision(9)
+                        .with_scale(3)
+                        .build()
+                        .unwrap(),
                 ),
                 Arc::new(
-                    Type::primitive_type_builder(
-                        "f2",
-                        PhysicalType::FIXED_LEN_BYTE_ARRAY,
-                    )
-                    .with_logical_type(Some(LogicalType::Decimal {
-                        precision: 38,
-                        scale: 18,
-                    }))
-                    .with_converted_type(ConvertedType::DECIMAL)
-                    .with_length(16)
-                    .with_precision(38)
-                    .with_scale(18)
-                    .build()
-                    .unwrap(),
+                    Type::primitive_type_builder("f2", PhysicalType::FIXED_LEN_BYTE_ARRAY)
+                        .with_logical_type(Some(LogicalType::Decimal {
+                            precision: 38,
+                            scale: 18,
+                        }))
+                        .with_converted_type(ConvertedType::DECIMAL)
+                        .with_length(16)
+                        .with_precision(38)
+                        .with_scale(18)
+                        .build()
+                        .unwrap(),
+                ),
+                Arc::new(
+                    Type::primitive_type_builder("f3", PhysicalType::FIXED_LEN_BYTE_ARRAY)
+                        .with_logical_type(Some(LogicalType::Float16))
+                        .with_length(2)
+                        .build()
+                        .unwrap(),
                 ),
             ])
             .build()
@@ -910,14 +901,11 @@ mod tests {
                                 .with_logical_type(Some(LogicalType::List))
                                 .with_converted_type(ConvertedType::LIST)
                                 .with_fields(vec![Arc::new(
-                                    Type::primitive_type_builder(
-                                        "a2",
-                                        PhysicalType::BYTE_ARRAY,
-                                    )
-                                    .with_repetition(Repetition::REPEATED)
-                                    .with_converted_type(ConvertedType::UTF8)
-                                    .build()
-                                    .unwrap(),
+                                    Type::primitive_type_builder("a2", PhysicalType::BYTE_ARRAY)
+                                        .with_repetition(Repetition::REPEATED)
+                                        .with_converted_type(ConvertedType::UTF8)
+                                        .build()
+                                        .unwrap(),
                                 )])
                                 .build()
                                 .unwrap(),
diff --git a/parquet/src/schema/printer.rs b/parquet/src/schema/printer.rs
index 0c90c5405a2b..2dec8a5be9f7 100644
--- a/parquet/src/schema/printer.rs
+++ b/parquet/src/schema/printer.rs
@@ -46,9 +46,7 @@
 use std::{fmt, io};
 
 use crate::basic::{ConvertedType, LogicalType, TimeUnit, Type as PhysicalType};
-use crate::file::metadata::{
-    ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData,
-};
+use crate::file::metadata::{ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData};
 use crate::schema::types::Type;
 
 /// Prints Parquet metadata [`ParquetMetaData`] information.
@@ -119,10 +117,7 @@ fn print_row_group_metadata(out: &mut dyn io::Write, rg_metadata: &RowGroupMetaD
 }
 
 #[allow(unused_must_use)]
-fn print_column_chunk_metadata(
-    out: &mut dyn io::Write,
-    cc_metadata: &ColumnChunkMetaData,
-) {
+fn print_column_chunk_metadata(out: &mut dyn io::Write, cc_metadata: &ColumnChunkMetaData) {
     writeln!(out, "column type: {}", cc_metadata.column_type());
     writeln!(out, "column path: {}", cc_metadata.column_path());
     let encoding_strs: Vec<_> = cc_metadata
@@ -167,6 +162,11 @@ fn print_column_chunk_metadata(
         Some(bfo) => bfo.to_string(),
     };
     writeln!(out, "bloom filter offset: {bloom_filter_offset_str}");
+    let bloom_filter_length_str = match cc_metadata.bloom_filter_length() {
+        None => "N/A".to_owned(),
+        Some(bfo) => bfo.to_string(),
+    };
+    writeln!(out, "bloom filter length: {bloom_filter_length_str}");
     let offset_index_offset_str = match cc_metadata.offset_index_offset() {
         None => "N/A".to_owned(),
         Some(oio) => oio.to_string(),
@@ -270,6 +270,7 @@ fn print_logical_and_converted(
             LogicalType::Enum => "ENUM".to_string(),
             LogicalType::List => "LIST".to_string(),
             LogicalType::Map => "MAP".to_string(),
+            LogicalType::Float16 => "FLOAT16".to_string(),
             LogicalType::Unknown => "UNKNOWN".to_string(),
         },
         None => {
@@ -643,36 +644,39 @@ mod tests {
                 "REQUIRED FIXED_LEN_BYTE_ARRAY (16) field (UUID);",
             ),
             (
-                Type::primitive_type_builder(
-                    "decimal",
-                    PhysicalType::FIXED_LEN_BYTE_ARRAY,
-                )
-                .with_logical_type(Some(LogicalType::Decimal {
-                    precision: 32,
-                    scale: 20,
-                }))
-                .with_precision(32)
-                .with_scale(20)
-                .with_length(decimal_length_from_precision(32))
-                .with_repetition(Repetition::REPEATED)
-                .build()
-                .unwrap(),
+                Type::primitive_type_builder("decimal", PhysicalType::FIXED_LEN_BYTE_ARRAY)
+                    .with_logical_type(Some(LogicalType::Decimal {
+                        precision: 32,
+                        scale: 20,
+                    }))
+                    .with_precision(32)
+                    .with_scale(20)
+                    .with_length(decimal_length_from_precision(32))
+                    .with_repetition(Repetition::REPEATED)
+                    .build()
+                    .unwrap(),
                 "REPEATED FIXED_LEN_BYTE_ARRAY (14) decimal (DECIMAL(32,20));",
             ),
             (
-                Type::primitive_type_builder(
-                    "decimal",
-                    PhysicalType::FIXED_LEN_BYTE_ARRAY,
-                )
-                .with_converted_type(ConvertedType::DECIMAL)
-                .with_precision(19)
-                .with_scale(4)
-                .with_length(decimal_length_from_precision(19))
-                .with_repetition(Repetition::OPTIONAL)
-                .build()
-                .unwrap(),
+                Type::primitive_type_builder("decimal", PhysicalType::FIXED_LEN_BYTE_ARRAY)
+                    .with_converted_type(ConvertedType::DECIMAL)
+                    .with_precision(19)
+                    .with_scale(4)
+                    .with_length(decimal_length_from_precision(19))
+                    .with_repetition(Repetition::OPTIONAL)
+                    .build()
+                    .unwrap(),
                 "OPTIONAL FIXED_LEN_BYTE_ARRAY (9) decimal (DECIMAL(19,4));",
             ),
+            (
+                Type::primitive_type_builder("float16", PhysicalType::FIXED_LEN_BYTE_ARRAY)
+                    .with_logical_type(Some(LogicalType::Float16))
+                    .with_length(2)
+                    .with_repetition(Repetition::REQUIRED)
+                    .build()
+                    .unwrap(),
+                "REQUIRED FIXED_LEN_BYTE_ARRAY (2) float16 (FLOAT16);",
+            ),
         ];
 
         types_and_strings.into_iter().for_each(|(field, expected)| {
@@ -703,13 +707,12 @@ mod tests {
                 .with_logical_type(Some(LogicalType::String))
                 .with_id(Some(1))
                 .build();
-            let f4 =
-                Type::primitive_type_builder("f4", PhysicalType::FIXED_LEN_BYTE_ARRAY)
-                    .with_repetition(Repetition::REPEATED)
-                    .with_converted_type(ConvertedType::INTERVAL)
-                    .with_length(12)
-                    .with_id(Some(2))
-                    .build();
+            let f4 = Type::primitive_type_builder("f4", PhysicalType::FIXED_LEN_BYTE_ARRAY)
+                .with_repetition(Repetition::REPEATED)
+                .with_converted_type(ConvertedType::INTERVAL)
+                .with_length(12)
+                .with_id(Some(2))
+                .build();
 
             let struct_fields = vec![
                 Arc::new(f1.unwrap()),
diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs
index f4cb3a9956d6..2f36deffbab5 100644
--- a/parquet/src/schema/types.rs
+++ b/parquet/src/schema/types.rs
@@ -22,8 +22,7 @@ use std::{collections::HashMap, convert::From, fmt, sync::Arc};
 use crate::format::SchemaElement;
 
 use crate::basic::{
-    ColumnOrder, ConvertedType, LogicalType, Repetition, SortOrder, TimeUnit,
-    Type as PhysicalType,
+    ColumnOrder, ConvertedType, LogicalType, Repetition, SortOrder, TimeUnit, Type as PhysicalType,
 };
 use crate::errors::{ParquetError, Result};
 
@@ -58,10 +57,7 @@ pub enum Type {
 
 impl Type {
     /// Creates primitive type builder with provided field name and physical type.
-    pub fn primitive_type_builder(
-        name: &str,
-        physical_type: PhysicalType,
-    ) -> PrimitiveTypeBuilder {
+    pub fn primitive_type_builder(name: &str, physical_type: PhysicalType) -> PrimitiveTypeBuilder {
         PrimitiveTypeBuilder::new(name, physical_type)
     }
 
@@ -128,8 +124,7 @@ impl Type {
     /// This method can be used to check if projected columns are part of the root schema.
     pub fn check_contains(&self, sub_type: &Type) -> bool {
         // Names match, and repetitions match or not set for both
-        let basic_match = self.get_basic_info().name()
-            == sub_type.get_basic_info().name()
+        let basic_match = self.get_basic_info().name() == sub_type.get_basic_info().name()
             && (self.is_schema() && sub_type.is_schema()
                 || !self.is_schema()
                     && !sub_type.is_schema()
@@ -292,9 +287,7 @@ impl<'a> PrimitiveTypeBuilder<'a> {
                 // If a converted type is populated, check that it is consistent with
                 // its logical type
                 if self.converted_type != ConvertedType::NONE {
-                    if ConvertedType::from(self.logical_type.clone())
-                        != self.converted_type
-                    {
+                    if ConvertedType::from(self.logical_type.clone()) != self.converted_type {
                         return Err(general_err!(
                             "Logical type {:?} is incompatible with converted type {} for field '{}'",
                             logical_type,
@@ -363,6 +356,14 @@ impl<'a> PrimitiveTypeBuilder<'a> {
                     (LogicalType::Json, PhysicalType::BYTE_ARRAY) => {}
                     (LogicalType::Bson, PhysicalType::BYTE_ARRAY) => {}
                     (LogicalType::Uuid, PhysicalType::FIXED_LEN_BYTE_ARRAY) => {}
+                    (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY)
+                        if self.length == 2 => {}
+                    (LogicalType::Float16, PhysicalType::FIXED_LEN_BYTE_ARRAY) => {
+                        return Err(general_err!(
+                            "FLOAT16 cannot annotate field '{}' because it is not a FIXED_LEN_BYTE_ARRAY(2) field",
+                            self.name
+                        ))
+                    }
                     (a, b) => {
                         return Err(general_err!(
                             "Cannot annotate {:?} from {} for field '{}'",
@@ -420,9 +421,7 @@ impl<'a> PrimitiveTypeBuilder<'a> {
                 }
             }
             ConvertedType::INTERVAL => {
-                if self.physical_type != PhysicalType::FIXED_LEN_BYTE_ARRAY
-                    || self.length != 12
-                {
+                if self.physical_type != PhysicalType::FIXED_LEN_BYTE_ARRAY || self.length != 12 {
                     return Err(general_err!(
                         "INTERVAL cannot annotate field '{}' because it is not a FIXED_LEN_BYTE_ARRAY(12) field",
                         self.name
@@ -431,7 +430,10 @@ impl<'a> PrimitiveTypeBuilder<'a> {
             }
             ConvertedType::ENUM => {
                 if self.physical_type != PhysicalType::BYTE_ARRAY {
-                    return Err(general_err!("ENUM cannot annotate field '{}' because it is not a BYTE_ARRAY field", self.name));
+                    return Err(general_err!(
+                        "ENUM cannot annotate field '{}' because it is not a BYTE_ARRAY field",
+                        self.name
+                    ));
                 }
             }
             _ => {
@@ -507,8 +509,7 @@ impl<'a> PrimitiveTypeBuilder<'a> {
                 }
             }
             PhysicalType::FIXED_LEN_BYTE_ARRAY => {
-                let max_precision =
-                    (2f64.powi(8 * self.length - 1) - 1f64).log10().floor() as i32;
+                let max_precision = (2f64.powi(8 * self.length - 1) - 1f64).log10().floor() as i32;
 
                 if self.precision > max_precision {
                     return Err(general_err!(
@@ -1049,10 +1050,7 @@ pub fn from_thrift(elements: &[SchemaElement]) -> Result<TypePtr> {
 /// The first result is the starting index for the next Type after this one. If it is
 /// equal to `elements.len()`, then this Type is the last one.
 /// The second result is the result Type.
-fn from_thrift_helper(
-    elements: &[SchemaElement],
-    index: usize,
-) -> Result<(usize, TypePtr)> {
+fn from_thrift_helper(elements: &[SchemaElement], index: usize) -> Result<(usize, TypePtr)> {
     // Whether or not the current node is root (message type).
     // There is only one message type node in the schema tree.
     let is_root_node = index == 0;
@@ -1086,8 +1084,7 @@ fn from_thrift_helper(
                     "Repetition level must be defined for a primitive type"
                 ));
             }
-            let repetition =
-                Repetition::try_from(elements[index].repetition_type.unwrap())?;
+            let repetition = Repetition::try_from(elements[index].repetition_type.unwrap())?;
             let physical_type = PhysicalType::try_from(elements[index].type_.unwrap())?;
             let length = elements[index].type_length.unwrap_or(-1);
             let scale = elements[index].scale.unwrap_or(-1);
@@ -1515,6 +1512,41 @@ mod tests {
                 "Parquet error: Invalid FIXED_LEN_BYTE_ARRAY length: -1 for field 'foo'"
             );
         }
+
+        result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(Some(LogicalType::Float16))
+            .with_length(2)
+            .build();
+        assert!(result.is_ok());
+
+        // Can't be other than FIXED_LEN_BYTE_ARRAY for physical type
+        result = Type::primitive_type_builder("foo", PhysicalType::FLOAT)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(Some(LogicalType::Float16))
+            .with_length(2)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(
+                format!("{e}"),
+                "Parquet error: Cannot annotate Float16 from FLOAT for field 'foo'"
+            );
+        }
+
+        // Must have length 2
+        result = Type::primitive_type_builder("foo", PhysicalType::FIXED_LEN_BYTE_ARRAY)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(Some(LogicalType::Float16))
+            .with_length(4)
+            .build();
+        assert!(result.is_err());
+        if let Err(e) = result {
+            assert_eq!(
+                format!("{e}"),
+                "Parquet error: FLOAT16 cannot annotate field 'foo' because it is not a FIXED_LEN_BYTE_ARRAY(2) field"
+            );
+        }
     }
 
     #[test]
@@ -1617,8 +1649,7 @@ mod tests {
             .with_repetition(Repetition::REQUIRED)
             .with_converted_type(ConvertedType::INT_64)
             .build()?;
-        let item2 =
-            Type::primitive_type_builder("item2", PhysicalType::BOOLEAN).build()?;
+        let item2 = Type::primitive_type_builder("item2", PhysicalType::BOOLEAN).build()?;
         let item3 = Type::primitive_type_builder("item3", PhysicalType::INT32)
             .with_repetition(Repetition::REPEATED)
             .with_converted_type(ConvertedType::INT_32)
@@ -1993,6 +2024,7 @@ mod tests {
         let message_type = "
     message conversions {
       REQUIRED INT64 id;
+      OPTIONAL FIXED_LEN_BYTE_ARRAY (2) f16 (FLOAT16);
       OPTIONAL group int_array_Array (LIST) {
         REPEATED group list {
           OPTIONAL group element (LIST) {
diff --git a/parquet/src/schema/visitor.rs b/parquet/src/schema/visitor.rs
index f83782c638f1..35fde11f1fbb 100644
--- a/parquet/src/schema/visitor.rs
+++ b/parquet/src/schema/visitor.rs
@@ -59,17 +59,11 @@ pub trait TypeVisitor<R, C> {
 
                 match list_item.as_ref() {
                     Type::PrimitiveType { .. } => {
-                        if list_item.get_basic_info().repetition() == Repetition::REPEATED
-                        {
-                            self.visit_list_with_item(
-                                list_type.clone(),
-                                list_item.clone(),
-                                context,
-                            )
+                        if list_item.get_basic_info().repetition() == Repetition::REPEATED {
+                            self.visit_list_with_item(list_type.clone(), list_item.clone(), context)
                         } else {
                             Err(General(
-                                "Primitive element type of list must be repeated."
-                                    .to_string(),
+                                "Primitive element type of list must be repeated.".to_string(),
                             ))
                         }
                     }
@@ -87,11 +81,7 @@ pub trait TypeVisitor<R, C> {
                                 context,
                             )
                         } else {
-                            self.visit_list_with_item(
-                                list_type.clone(),
-                                list_item.clone(),
-                                context,
-                            )
+                            self.visit_list_with_item(list_type.clone(), list_item.clone(), context)
                         }
                     }
                 }
@@ -176,11 +166,7 @@ mod tests {
             Ok(true)
         }
 
-        fn visit_map(
-            &mut self,
-            _map_type: TypePtr,
-            _context: TestVisitorContext,
-        ) -> Result<bool> {
+        fn visit_map(&mut self, _map_type: TypePtr, _context: TestVisitorContext) -> Result<bool> {
             unimplemented!()
         }
 
diff --git a/parquet/src/thrift.rs b/parquet/src/thrift.rs
new file mode 100644
index 000000000000..ad6c3f688002
--- /dev/null
+++ b/parquet/src/thrift.rs
@@ -0,0 +1,281 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Custom thrift definitions
+
+use thrift::protocol::{
+    TFieldIdentifier, TInputProtocol, TListIdentifier, TMapIdentifier, TMessageIdentifier,
+    TOutputProtocol, TSetIdentifier, TStructIdentifier, TType,
+};
+
+/// Reads and writes the struct to Thrift protocols.
+///
+/// Unlike [`thrift::protocol::TSerializable`] this uses generics instead of trait objects
+pub trait TSerializable: Sized {
+    fn read_from_in_protocol<T: TInputProtocol>(i_prot: &mut T) -> thrift::Result<Self>;
+    fn write_to_out_protocol<T: TOutputProtocol>(&self, o_prot: &mut T) -> thrift::Result<()>;
+}
+
+/// A more performant implementation of [`TCompactInputProtocol`] that reads a slice
+///
+/// [`TCompactInputProtocol`]: thrift::protocol::TCompactInputProtocol
+pub(crate) struct TCompactSliceInputProtocol<'a> {
+    buf: &'a [u8],
+    // Identifier of the last field deserialized for a struct.
+    last_read_field_id: i16,
+    // Stack of the last read field ids (a new entry is added each time a nested struct is read).
+    read_field_id_stack: Vec<i16>,
+    // Boolean value for a field.
+    // Saved because boolean fields and their value are encoded in a single byte,
+    // and reading the field only occurs after the field id is read.
+    pending_read_bool_value: Option<bool>,
+}
+
+impl<'a> TCompactSliceInputProtocol<'a> {
+    pub fn new(buf: &'a [u8]) -> Self {
+        Self {
+            buf,
+            last_read_field_id: 0,
+            read_field_id_stack: Vec::with_capacity(16),
+            pending_read_bool_value: None,
+        }
+    }
+
+    pub fn as_slice(&self) -> &'a [u8] {
+        self.buf
+    }
+
+    fn read_vlq(&mut self) -> thrift::Result<u64> {
+        let mut in_progress = 0;
+        let mut shift = 0;
+        loop {
+            let byte = self.read_byte()?;
+            in_progress |= ((byte & 0x7F) as u64) << shift;
+            shift += 7;
+            if byte & 0x80 == 0 {
+                return Ok(in_progress);
+            }
+        }
+    }
+
+    fn read_zig_zag(&mut self) -> thrift::Result<i64> {
+        let val = self.read_vlq()?;
+        Ok((val >> 1) as i64 ^ -((val & 1) as i64))
+    }
+
+    fn read_list_set_begin(&mut self) -> thrift::Result<(TType, i32)> {
+        let header = self.read_byte()?;
+        let element_type = collection_u8_to_type(header & 0x0F)?;
+
+        let possible_element_count = (header & 0xF0) >> 4;
+        let element_count = if possible_element_count != 15 {
+            // high bits set high if count and type encoded separately
+            possible_element_count as i32
+        } else {
+            self.read_vlq()? as _
+        };
+
+        Ok((element_type, element_count))
+    }
+}
+
+impl<'a> TInputProtocol for TCompactSliceInputProtocol<'a> {
+    fn read_message_begin(&mut self) -> thrift::Result<TMessageIdentifier> {
+        unimplemented!()
+    }
+
+    fn read_message_end(&mut self) -> thrift::Result<()> {
+        unimplemented!()
+    }
+
+    fn read_struct_begin(&mut self) -> thrift::Result<Option<TStructIdentifier>> {
+        self.read_field_id_stack.push(self.last_read_field_id);
+        self.last_read_field_id = 0;
+        Ok(None)
+    }
+
+    fn read_struct_end(&mut self) -> thrift::Result<()> {
+        self.last_read_field_id = self
+            .read_field_id_stack
+            .pop()
+            .expect("should have previous field ids");
+        Ok(())
+    }
+
+    fn read_field_begin(&mut self) -> thrift::Result<TFieldIdentifier> {
+        // we can read at least one byte, which is:
+        // - the type
+        // - the field delta and the type
+        let field_type = self.read_byte()?;
+        let field_delta = (field_type & 0xF0) >> 4;
+        let field_type = match field_type & 0x0F {
+            0x01 => {
+                self.pending_read_bool_value = Some(true);
+                Ok(TType::Bool)
+            }
+            0x02 => {
+                self.pending_read_bool_value = Some(false);
+                Ok(TType::Bool)
+            }
+            ttu8 => u8_to_type(ttu8),
+        }?;
+
+        match field_type {
+            TType::Stop => Ok(
+                TFieldIdentifier::new::<Option<String>, String, Option<i16>>(
+                    None,
+                    TType::Stop,
+                    None,
+                ),
+            ),
+            _ => {
+                if field_delta != 0 {
+                    self.last_read_field_id += field_delta as i16;
+                } else {
+                    self.last_read_field_id = self.read_i16()?;
+                };
+
+                Ok(TFieldIdentifier {
+                    name: None,
+                    field_type,
+                    id: Some(self.last_read_field_id),
+                })
+            }
+        }
+    }
+
+    fn read_field_end(&mut self) -> thrift::Result<()> {
+        Ok(())
+    }
+
+    fn read_bool(&mut self) -> thrift::Result<bool> {
+        match self.pending_read_bool_value.take() {
+            Some(b) => Ok(b),
+            None => {
+                let b = self.read_byte()?;
+                match b {
+                    0x01 => Ok(true),
+                    0x02 => Ok(false),
+                    unkn => Err(thrift::Error::Protocol(thrift::ProtocolError {
+                        kind: thrift::ProtocolErrorKind::InvalidData,
+                        message: format!("cannot convert {} into bool", unkn),
+                    })),
+                }
+            }
+        }
+    }
+
+    fn read_bytes(&mut self) -> thrift::Result<Vec<u8>> {
+        let len = self.read_vlq()? as usize;
+        let ret = self.buf.get(..len).ok_or_else(eof_error)?.to_vec();
+        self.buf = &self.buf[len..];
+        Ok(ret)
+    }
+
+    fn read_i8(&mut self) -> thrift::Result<i8> {
+        Ok(self.read_byte()? as _)
+    }
+
+    fn read_i16(&mut self) -> thrift::Result<i16> {
+        Ok(self.read_zig_zag()? as _)
+    }
+
+    fn read_i32(&mut self) -> thrift::Result<i32> {
+        Ok(self.read_zig_zag()? as _)
+    }
+
+    fn read_i64(&mut self) -> thrift::Result<i64> {
+        self.read_zig_zag()
+    }
+
+    fn read_double(&mut self) -> thrift::Result<f64> {
+        let slice = (self.buf[..8]).try_into().unwrap();
+        self.buf = &self.buf[8..];
+        Ok(f64::from_le_bytes(slice))
+    }
+
+    fn read_string(&mut self) -> thrift::Result<String> {
+        let bytes = self.read_bytes()?;
+        String::from_utf8(bytes).map_err(From::from)
+    }
+
+    fn read_list_begin(&mut self) -> thrift::Result<TListIdentifier> {
+        let (element_type, element_count) = self.read_list_set_begin()?;
+        Ok(TListIdentifier::new(element_type, element_count))
+    }
+
+    fn read_list_end(&mut self) -> thrift::Result<()> {
+        Ok(())
+    }
+
+    fn read_set_begin(&mut self) -> thrift::Result<TSetIdentifier> {
+        unimplemented!()
+    }
+
+    fn read_set_end(&mut self) -> thrift::Result<()> {
+        unimplemented!()
+    }
+
+    fn read_map_begin(&mut self) -> thrift::Result<TMapIdentifier> {
+        unimplemented!()
+    }
+
+    fn read_map_end(&mut self) -> thrift::Result<()> {
+        Ok(())
+    }
+
+    #[inline]
+    fn read_byte(&mut self) -> thrift::Result<u8> {
+        let ret = *self.buf.first().ok_or_else(eof_error)?;
+        self.buf = &self.buf[1..];
+        Ok(ret)
+    }
+}
+
+fn collection_u8_to_type(b: u8) -> thrift::Result<TType> {
+    match b {
+        0x01 => Ok(TType::Bool),
+        o => u8_to_type(o),
+    }
+}
+
+fn u8_to_type(b: u8) -> thrift::Result<TType> {
+    match b {
+        0x00 => Ok(TType::Stop),
+        0x03 => Ok(TType::I08), // equivalent to TType::Byte
+        0x04 => Ok(TType::I16),
+        0x05 => Ok(TType::I32),
+        0x06 => Ok(TType::I64),
+        0x07 => Ok(TType::Double),
+        0x08 => Ok(TType::String),
+        0x09 => Ok(TType::List),
+        0x0A => Ok(TType::Set),
+        0x0B => Ok(TType::Map),
+        0x0C => Ok(TType::Struct),
+        unkn => Err(thrift::Error::Protocol(thrift::ProtocolError {
+            kind: thrift::ProtocolErrorKind::InvalidData,
+            message: format!("cannot convert {} into TType", unkn),
+        })),
+    }
+}
+
+fn eof_error() -> thrift::Error {
+    thrift::Error::Transport(thrift::TransportError {
+        kind: thrift::TransportErrorKind::EndOfFile,
+        message: "Unexpected EOF".to_string(),
+    })
+}
diff --git a/parquet/src/util/bit_util.rs b/parquet/src/util/bit_util.rs
index 597190a46eff..b1dd23574a19 100644
--- a/parquet/src/util/bit_util.rs
+++ b/parquet/src/util/bit_util.rs
@@ -17,10 +17,11 @@
 
 use std::{cmp, mem::size_of};
 
+use bytes::Bytes;
+
 use crate::data_type::{AsBytes, ByteArray, FixedLenByteArray, Int96};
 use crate::errors::{ParquetError, Result};
 use crate::util::bit_pack::{unpack16, unpack32, unpack64, unpack8};
-use crate::util::memory::ByteBufferPtr;
 
 #[inline]
 pub fn from_le_slice<T: FromBytes>(bs: &[u8]) -> T {
@@ -341,7 +342,7 @@ pub const MAX_VLQ_BYTE_LEN: usize = 10;
 
 pub struct BitReader {
     /// The byte buffer to read from, passed in by client
-    buffer: ByteBufferPtr,
+    buffer: Bytes,
 
     /// Bytes are memcpy'd from `buffer` and values are read from this variable.
     /// This is faster than reading values byte by byte directly from `buffer`
@@ -365,7 +366,7 @@ pub struct BitReader {
 /// Utility class to read bit/byte stream. This class can read bits or bytes that are
 /// either byte aligned or not.
 impl BitReader {
-    pub fn new(buffer: ByteBufferPtr) -> Self {
+    pub fn new(buffer: Bytes) -> Self {
         BitReader {
             buffer,
             buffered_values: 0,
@@ -374,7 +375,7 @@ impl BitReader {
         }
     }
 
-    pub fn reset(&mut self, buffer: ByteBufferPtr) {
+    pub fn reset(&mut self, buffer: Bytes) {
         self.buffer = buffer;
         self.buffered_values = 0;
         self.byte_offset = 0;
@@ -456,8 +457,6 @@ impl BitReader {
             }
         }
 
-        let in_buf = self.buffer.data();
-
         // Read directly into output buffer
         match size_of::<T>() {
             1 => {
@@ -465,7 +464,7 @@ impl BitReader {
                 let out = unsafe { std::slice::from_raw_parts_mut(ptr, batch.len()) };
                 while values_to_read - i >= 8 {
                     let out_slice = (&mut out[i..i + 8]).try_into().unwrap();
-                    unpack8(&in_buf[self.byte_offset..], out_slice, num_bits);
+                    unpack8(&self.buffer[self.byte_offset..], out_slice, num_bits);
                     self.byte_offset += num_bits;
                     i += 8;
                 }
@@ -475,7 +474,7 @@ impl BitReader {
                 let out = unsafe { std::slice::from_raw_parts_mut(ptr, batch.len()) };
                 while values_to_read - i >= 16 {
                     let out_slice = (&mut out[i..i + 16]).try_into().unwrap();
-                    unpack16(&in_buf[self.byte_offset..], out_slice, num_bits);
+                    unpack16(&self.buffer[self.byte_offset..], out_slice, num_bits);
                     self.byte_offset += 2 * num_bits;
                     i += 16;
                 }
@@ -485,7 +484,7 @@ impl BitReader {
                 let out = unsafe { std::slice::from_raw_parts_mut(ptr, batch.len()) };
                 while values_to_read - i >= 32 {
                     let out_slice = (&mut out[i..i + 32]).try_into().unwrap();
-                    unpack32(&in_buf[self.byte_offset..], out_slice, num_bits);
+                    unpack32(&self.buffer[self.byte_offset..], out_slice, num_bits);
                     self.byte_offset += 4 * num_bits;
                     i += 32;
                 }
@@ -495,7 +494,7 @@ impl BitReader {
                 let out = unsafe { std::slice::from_raw_parts_mut(ptr, batch.len()) };
                 while values_to_read - i >= 64 {
                     let out_slice = (&mut out[i..i + 64]).try_into().unwrap();
-                    unpack64(&in_buf[self.byte_offset..], out_slice, num_bits);
+                    unpack64(&self.buffer[self.byte_offset..], out_slice, num_bits);
                     self.byte_offset += 8 * num_bits;
                     i += 64;
                 }
@@ -506,7 +505,7 @@ impl BitReader {
         // Try to read smaller batches if possible
         if size_of::<T>() > 4 && values_to_read - i >= 32 && num_bits <= 32 {
             let mut out_buf = [0_u32; 32];
-            unpack32(&in_buf[self.byte_offset..], &mut out_buf, num_bits);
+            unpack32(&self.buffer[self.byte_offset..], &mut out_buf, num_bits);
             self.byte_offset += 4 * num_bits;
 
             for out in out_buf {
@@ -520,7 +519,7 @@ impl BitReader {
 
         if size_of::<T>() > 2 && values_to_read - i >= 16 && num_bits <= 16 {
             let mut out_buf = [0_u16; 16];
-            unpack16(&in_buf[self.byte_offset..], &mut out_buf, num_bits);
+            unpack16(&self.buffer[self.byte_offset..], &mut out_buf, num_bits);
             self.byte_offset += 2 * num_bits;
 
             for out in out_buf {
@@ -534,7 +533,7 @@ impl BitReader {
 
         if size_of::<T>() > 1 && values_to_read - i >= 8 && num_bits <= 8 {
             let mut out_buf = [0_u8; 8];
-            unpack8(&in_buf[self.byte_offset..], &mut out_buf, num_bits);
+            unpack8(&self.buffer[self.byte_offset..], &mut out_buf, num_bits);
             self.byte_offset += num_bits;
 
             for out in out_buf {
@@ -595,7 +594,7 @@ impl BitReader {
         self.byte_offset = self.get_byte_offset();
         self.bit_offset = 0;
 
-        let src = &self.buffer.data()[self.byte_offset..];
+        let src = &self.buffer[self.byte_offset..];
         let to_read = num_bytes.min(src.len());
         buf.extend_from_slice(&src[..to_read]);
 
@@ -620,7 +619,7 @@ impl BitReader {
         }
 
         // Advance byte_offset to next unread byte and read num_bytes
-        let v = read_num_bytes::<T>(num_bytes, &self.buffer.data()[self.byte_offset..]);
+        let v = read_num_bytes::<T>(num_bytes, &self.buffer[self.byte_offset..]);
         self.byte_offset += num_bytes;
 
         Some(v)
@@ -672,14 +671,14 @@ impl BitReader {
     fn load_buffered_values(&mut self) {
         let bytes_to_read = cmp::min(self.buffer.len() - self.byte_offset, 8);
         self.buffered_values =
-            read_num_bytes::<u64>(bytes_to_read, &self.buffer.data()[self.byte_offset..]);
+            read_num_bytes::<u64>(bytes_to_read, &self.buffer[self.byte_offset..]);
     }
 }
 
 impl From<Vec<u8>> for BitReader {
     #[inline]
     fn from(buffer: Vec<u8>) -> Self {
-        BitReader::new(ByteBufferPtr::new(buffer))
+        BitReader::new(buffer.into())
     }
 }
 
@@ -771,12 +770,12 @@ mod tests {
     #[test]
     fn test_bit_reader_get_aligned() {
         // 01110101 11001011
-        let buffer = ByteBufferPtr::new(vec![0x75, 0xCB]);
-        let mut bit_reader = BitReader::new(buffer.all());
+        let buffer = Bytes::from(vec![0x75, 0xCB]);
+        let mut bit_reader = BitReader::new(buffer.clone());
         assert_eq!(bit_reader.get_value::<i32>(3), Some(5));
         assert_eq!(bit_reader.get_aligned::<i32>(1), Some(203));
         assert_eq!(bit_reader.get_value::<i32>(1), None);
-        bit_reader.reset(buffer.all());
+        bit_reader.reset(buffer.clone());
         assert_eq!(bit_reader.get_aligned::<i32>(3), None);
     }
 
@@ -1128,7 +1127,7 @@ mod tests {
     #[test]
     fn test_get_batch_zero_extend() {
         let to_read = vec![0xFF; 4];
-        let mut reader = BitReader::new(ByteBufferPtr::new(to_read));
+        let mut reader = BitReader::from(to_read);
 
         // Create a non-zeroed output buffer
         let mut output = [u64::MAX; 32];
diff --git a/parquet/src/util/memory.rs b/parquet/src/util/memory.rs
deleted file mode 100644
index 25d15dd4ff73..000000000000
--- a/parquet/src/util/memory.rs
+++ /dev/null
@@ -1,149 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Utility methods and structs for working with memory.
-
-use bytes::Bytes;
-use std::{
-    fmt::{Debug, Display, Formatter, Result as FmtResult},
-    ops::Index,
-};
-
-// ----------------------------------------------------------------------
-// Immutable Buffer (BufferPtr) classes
-
-/// An representation of a slice on a reference-counting and read-only byte array.
-/// Sub-slices can be further created from this. The byte array will be released
-/// when all slices are dropped.
-///
-/// TODO: Remove and replace with [`bytes::Bytes`]
-#[derive(Clone, Debug)]
-pub struct ByteBufferPtr {
-    data: Bytes,
-}
-
-impl ByteBufferPtr {
-    /// Creates new buffer from a vector.
-    pub fn new(v: Vec<u8>) -> Self {
-        Self { data: v.into() }
-    }
-
-    /// Returns slice of data in this buffer.
-    #[inline]
-    pub fn data(&self) -> &[u8] {
-        &self.data
-    }
-
-    /// Returns length of this buffer
-    #[inline]
-    pub fn len(&self) -> usize {
-        self.data.len()
-    }
-
-    /// Returns whether this buffer is empty
-    #[inline]
-    pub fn is_empty(&self) -> bool {
-        self.data.is_empty()
-    }
-
-    /// Returns a shallow copy of the buffer.
-    /// Reference counted pointer to the data is copied.
-    pub fn all(&self) -> Self {
-        self.clone()
-    }
-
-    /// Returns a shallow copy of the buffer that starts with `start` position.
-    pub fn start_from(&self, start: usize) -> Self {
-        Self {
-            data: self.data.slice(start..),
-        }
-    }
-
-    /// Returns a shallow copy that is a range slice within this buffer.
-    pub fn range(&self, start: usize, len: usize) -> Self {
-        Self {
-            data: self.data.slice(start..start + len),
-        }
-    }
-}
-
-impl Index<usize> for ByteBufferPtr {
-    type Output = u8;
-
-    fn index(&self, index: usize) -> &u8 {
-        &self.data[index]
-    }
-}
-
-impl Display for ByteBufferPtr {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        write!(f, "{:?}", self.data)
-    }
-}
-
-impl AsRef<[u8]> for ByteBufferPtr {
-    #[inline]
-    fn as_ref(&self) -> &[u8] {
-        &self.data
-    }
-}
-
-impl From<Vec<u8>> for ByteBufferPtr {
-    fn from(data: Vec<u8>) -> Self {
-        Self { data: data.into() }
-    }
-}
-
-impl From<Bytes> for ByteBufferPtr {
-    fn from(data: Bytes) -> Self {
-        Self { data }
-    }
-}
-
-impl From<ByteBufferPtr> for Bytes {
-    fn from(value: ByteBufferPtr) -> Self {
-        value.data
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_byte_ptr() {
-        let values = (0..50).collect();
-        let ptr = ByteBufferPtr::new(values);
-        assert_eq!(ptr.len(), 50);
-        assert_eq!(ptr[40], 40);
-
-        let ptr2 = ptr.all();
-        assert_eq!(ptr2.len(), 50);
-        assert_eq!(ptr2[40], 40);
-
-        let ptr3 = ptr.start_from(20);
-        assert_eq!(ptr3.len(), 30);
-        assert_eq!(ptr3[0], 20);
-
-        let ptr4 = ptr3.range(10, 10);
-        assert_eq!(ptr4.len(), 10);
-        assert_eq!(ptr4[0], 30);
-
-        let expected: Vec<u8> = (30..40).collect();
-        assert_eq!(ptr4.as_ref(), expected.as_slice());
-    }
-}
diff --git a/parquet/src/util/mod.rs b/parquet/src/util/mod.rs
index d96a62a9f363..dfa1285afcf2 100644
--- a/parquet/src/util/mod.rs
+++ b/parquet/src/util/mod.rs
@@ -15,7 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-pub mod memory;
 #[macro_use]
 pub mod bit_util;
 mod bit_pack;
diff --git a/parquet/src/util/test_common/page_util.rs b/parquet/src/util/test_common/page_util.rs
index c51c5158cd42..b4fed752fdc5 100644
--- a/parquet/src/util/test_common/page_util.rs
+++ b/parquet/src/util/test_common/page_util.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use bytes::Bytes;
+
 use crate::basic::Encoding;
 use crate::column::page::{Page, PageIterator};
 use crate::column::page::{PageMetadata, PageReader};
@@ -23,7 +25,6 @@ use crate::encodings::encoding::{get_encoder, Encoder};
 use crate::encodings::levels::LevelEncoder;
 use crate::errors::Result;
 use crate::schema::types::ColumnDescPtr;
-use crate::util::memory::ByteBufferPtr;
 use std::iter::Peekable;
 use std::mem;
 
@@ -31,7 +32,7 @@ pub trait DataPageBuilder {
     fn add_rep_levels(&mut self, max_level: i16, rep_levels: &[i16]);
     fn add_def_levels(&mut self, max_level: i16, def_levels: &[i16]);
     fn add_values<T: DataType>(&mut self, encoding: Encoding, values: &[T::T]);
-    fn add_indices(&mut self, indices: ByteBufferPtr);
+    fn add_indices(&mut self, indices: Bytes);
     fn consume(self) -> Page;
 }
 
@@ -112,18 +113,18 @@ impl DataPageBuilder for DataPageBuilderImpl {
         let encoded_values = encoder
             .flush_buffer()
             .expect("consume_buffer() should be OK");
-        self.buffer.extend_from_slice(encoded_values.data());
+        self.buffer.extend_from_slice(&encoded_values);
     }
 
-    fn add_indices(&mut self, indices: ByteBufferPtr) {
+    fn add_indices(&mut self, indices: Bytes) {
         self.encoding = Some(Encoding::RLE_DICTIONARY);
-        self.buffer.extend_from_slice(indices.data());
+        self.buffer.extend_from_slice(&indices);
     }
 
     fn consume(self) -> Page {
         if self.datapage_v2 {
             Page::DataPageV2 {
-                buf: ByteBufferPtr::new(self.buffer),
+                buf: Bytes::from(self.buffer),
                 num_values: self.num_values,
                 encoding: self.encoding.unwrap(),
                 num_nulls: 0, /* set to dummy value - don't need this when reading
@@ -137,7 +138,7 @@ impl DataPageBuilder for DataPageBuilderImpl {
             }
         } else {
             Page::DataPage {
-                buf: ByteBufferPtr::new(self.buffer),
+                buf: Bytes::from(self.buffer),
                 num_values: self.num_values,
                 encoding: self.encoding.unwrap(),
                 def_level_encoding: Encoding::RLE,
diff --git a/parquet/src/util/test_common/rand_gen.rs b/parquet/src/util/test_common/rand_gen.rs
index c36b9060ca58..a267c34840c1 100644
--- a/parquet/src/util/test_common/rand_gen.rs
+++ b/parquet/src/util/test_common/rand_gen.rs
@@ -17,6 +17,7 @@
 
 use crate::basic::Encoding;
 use crate::column::page::Page;
+use bytes::Bytes;
 use rand::{
     distributions::{uniform::SampleUniform, Distribution, Standard},
     thread_rng, Rng,
@@ -26,7 +27,6 @@ use std::collections::VecDeque;
 use crate::data_type::*;
 use crate::encodings::encoding::{DictEncoder, Encoder};
 use crate::schema::types::ColumnDescPtr;
-use crate::util::memory::ByteBufferPtr;
 use crate::util::{DataPageBuilder, DataPageBuilderImpl};
 
 /// Random generator of data type `T` values and sequences.
@@ -90,7 +90,7 @@ impl RandGen<ByteArrayType> for ByteArrayType {
         for _ in 0..len {
             value.push(rng.gen_range(0..255));
         }
-        result.set_data(ByteBufferPtr::new(value));
+        result.set_data(Bytes::from(value));
         result
     }
 }
diff --git a/parquet/tests/arrow_writer_layout.rs b/parquet/tests/arrow_writer_layout.rs
index 3142c8c52063..cd124031cfdc 100644
--- a/parquet/tests/arrow_writer_layout.rs
+++ b/parquet/tests/arrow_writer_layout.rs
@@ -60,8 +60,7 @@ fn do_test(test: LayoutTest) {
     let mut buf = Vec::with_capacity(1024);
 
     let mut writer =
-        ArrowWriter::try_new(&mut buf, test.batches[0].schema(), Some(test.props))
-            .unwrap();
+        ArrowWriter::try_new(&mut buf, test.batches[0].schema(), Some(test.props)).unwrap();
     for batch in test.batches {
         writer.write(&batch).unwrap();
     }
@@ -71,8 +70,7 @@ fn do_test(test: LayoutTest) {
     // Re-read file to decode column index
     let read_options = ArrowReaderOptions::new().with_page_index(true);
     let reader =
-        ParquetRecordBatchReaderBuilder::try_new_with_options(b.clone(), read_options)
-            .unwrap();
+        ParquetRecordBatchReaderBuilder::try_new_with_options(b.clone(), read_options).unwrap();
 
     assert_layout(&b, reader.metadata().as_ref(), &test.layout);
 }
@@ -89,9 +87,7 @@ fn assert_layout(file_reader: &Bytes, meta: &ParquetMetaData, layout: &Layout) {
         // Check against offset index
         assert_eq!(offset_index.len(), row_group_layout.columns.len());
 
-        for (column_index, column_layout) in
-            offset_index.iter().zip(&row_group_layout.columns)
-        {
+        for (column_index, column_layout) in offset_index.iter().zip(&row_group_layout.columns) {
             assert_eq!(
                 column_index.len(),
                 column_layout.pages.len(),
@@ -147,8 +143,7 @@ fn assert_layout(file_reader: &Bytes, meta: &ParquetMetaData, layout: &Layout) {
             let pages = page_reader.collect::<Result<Vec<_>, _>>().unwrap();
             assert_eq!(
                 pages.len(),
-                column_layout.pages.len()
-                    + column_layout.dictionary_page.is_some() as usize,
+                column_layout.pages.len() + column_layout.dictionary_page.is_some() as usize,
                 "page {idx} count mismatch"
             );
 
@@ -190,7 +185,7 @@ fn test_primitive() {
                     pages: (0..8)
                         .map(|_| Page {
                             rows: 250,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 1000,
                             encoding: Encoding::PLAIN,
                             page_type: PageType::DATA_PAGE,
@@ -219,14 +214,14 @@ fn test_primitive() {
                     pages: vec![
                         Page {
                             rows: 250,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 258,
                             encoding: Encoding::RLE_DICTIONARY,
                             page_type: PageType::DATA_PAGE,
                         },
                         Page {
                             rows: 1750,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 7000,
                             encoding: Encoding::PLAIN,
                             page_type: PageType::DATA_PAGE,
@@ -234,7 +229,7 @@ fn test_primitive() {
                     ],
                     dictionary_page: Some(Page {
                         rows: 250,
-                        page_header_size: 34,
+                        page_header_size: 36,
                         compressed_size: 1000,
                         encoding: Encoding::PLAIN,
                         page_type: PageType::DICTIONARY_PAGE,
@@ -261,42 +256,42 @@ fn test_primitive() {
                     pages: vec![
                         Page {
                             rows: 400,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 452,
                             encoding: Encoding::RLE_DICTIONARY,
                             page_type: PageType::DATA_PAGE,
                         },
                         Page {
                             rows: 370,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 472,
                             encoding: Encoding::RLE_DICTIONARY,
                             page_type: PageType::DATA_PAGE,
                         },
                         Page {
                             rows: 330,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 464,
                             encoding: Encoding::RLE_DICTIONARY,
                             page_type: PageType::DATA_PAGE,
                         },
                         Page {
                             rows: 330,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 464,
                             encoding: Encoding::RLE_DICTIONARY,
                             page_type: PageType::DATA_PAGE,
                         },
                         Page {
                             rows: 330,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 464,
                             encoding: Encoding::RLE_DICTIONARY,
                             page_type: PageType::DATA_PAGE,
                         },
                         Page {
                             rows: 240,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 332,
                             encoding: Encoding::RLE_DICTIONARY,
                             page_type: PageType::DATA_PAGE,
@@ -304,7 +299,7 @@ fn test_primitive() {
                     ],
                     dictionary_page: Some(Page {
                         rows: 2000,
-                        page_header_size: 34,
+                        page_header_size: 36,
                         compressed_size: 8000,
                         encoding: Encoding::PLAIN,
                         page_type: PageType::DICTIONARY_PAGE,
@@ -330,7 +325,7 @@ fn test_primitive() {
                     pages: (0..20)
                         .map(|_| Page {
                             rows: 100,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 400,
                             encoding: Encoding::PLAIN,
                             page_type: PageType::DATA_PAGE,
@@ -365,14 +360,14 @@ fn test_string() {
                     pages: (0..15)
                         .map(|_| Page {
                             rows: 130,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 1040,
                             encoding: Encoding::PLAIN,
                             page_type: PageType::DATA_PAGE,
                         })
                         .chain(std::iter::once(Page {
                             rows: 50,
-                            page_header_size: 33,
+                            page_header_size: 35,
                             compressed_size: 400,
                             encoding: Encoding::PLAIN,
                             page_type: PageType::DATA_PAGE,
@@ -401,21 +396,21 @@ fn test_string() {
                     pages: vec![
                         Page {
                             rows: 130,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 138,
                             encoding: Encoding::RLE_DICTIONARY,
                             page_type: PageType::DATA_PAGE,
                         },
                         Page {
                             rows: 1250,
-                            page_header_size: 36,
+                            page_header_size: 38,
                             compressed_size: 10000,
                             encoding: Encoding::PLAIN,
                             page_type: PageType::DATA_PAGE,
                         },
                         Page {
                             rows: 620,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 4960,
                             encoding: Encoding::PLAIN,
                             page_type: PageType::DATA_PAGE,
@@ -423,7 +418,7 @@ fn test_string() {
                     ],
                     dictionary_page: Some(Page {
                         rows: 130,
-                        page_header_size: 34,
+                        page_header_size: 36,
                         compressed_size: 1040,
                         encoding: Encoding::PLAIN,
                         page_type: PageType::DICTIONARY_PAGE,
@@ -450,42 +445,42 @@ fn test_string() {
                     pages: vec![
                         Page {
                             rows: 400,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 452,
                             encoding: Encoding::RLE_DICTIONARY,
                             page_type: PageType::DATA_PAGE,
                         },
                         Page {
                             rows: 370,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 472,
                             encoding: Encoding::RLE_DICTIONARY,
                             page_type: PageType::DATA_PAGE,
                         },
                         Page {
                             rows: 330,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 464,
                             encoding: Encoding::RLE_DICTIONARY,
                             page_type: PageType::DATA_PAGE,
                         },
                         Page {
                             rows: 330,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 464,
                             encoding: Encoding::RLE_DICTIONARY,
                             page_type: PageType::DATA_PAGE,
                         },
                         Page {
                             rows: 330,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 464,
                             encoding: Encoding::RLE_DICTIONARY,
                             page_type: PageType::DATA_PAGE,
                         },
                         Page {
                             rows: 240,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 332,
                             encoding: Encoding::RLE_DICTIONARY,
                             page_type: PageType::DATA_PAGE,
@@ -493,7 +488,7 @@ fn test_string() {
                     ],
                     dictionary_page: Some(Page {
                         rows: 2000,
-                        page_header_size: 34,
+                        page_header_size: 36,
                         compressed_size: 16000,
                         encoding: Encoding::PLAIN,
                         page_type: PageType::DICTIONARY_PAGE,
@@ -533,7 +528,7 @@ fn test_list() {
                     pages: (0..10)
                         .map(|_| Page {
                             rows: 20,
-                            page_header_size: 34,
+                            page_header_size: 36,
                             compressed_size: 672,
                             encoding: Encoding::PLAIN,
                             page_type: PageType::DATA_PAGE,
diff --git a/parquet_derive/README.md b/parquet_derive/README.md
index b20721079c2d..c267a92430e0 100644
--- a/parquet_derive/README.md
+++ b/parquet_derive/README.md
@@ -19,9 +19,9 @@
 
 # Parquet Derive
 
-A crate for deriving `RecordWriter` for arbitrary, _simple_ structs. This does not generate writers for arbitrarily nested
-structures. It only works for primitives and a few generic structures and
-various levels of reference. Please see features checklist for what is currently
+A crate for deriving `RecordWriter` and `RecordReader` for arbitrary, _simple_ structs. This does not
+generate readers or writers for arbitrarily nested structures. It only works for primitives and a few
+generic structures and various levels of reference. Please see features checklist for what is currently
 supported.
 
 Derive also has some support for the chrono time library. You must must enable the `chrono` feature to get this support.
@@ -77,16 +77,55 @@ writer.close_row_group(row_group).unwrap();
 writer.close().unwrap();
 ```
 
+Example usage of deriving a `RecordReader` for your struct:
+
+```rust
+use parquet::file::{serialized_reader::SerializedFileReader, reader::FileReader};
+use parquet_derive::ParquetRecordReader;
+
+#[derive(ParquetRecordReader)]
+struct ACompleteRecord {
+    pub a_bool: bool,
+    pub a_string: String,
+    pub i16: i16,
+    pub i32: i32,
+    pub u64: u64,
+    pub isize: isize,
+    pub float: f32,
+    pub double: f64,
+    pub now: chrono::NaiveDateTime,
+    pub byte_vec: Vec<u8>,
+}
+
+// Initialize your parquet file
+let reader = SerializedFileReader::new(file).unwrap();
+let mut row_group = reader.get_row_group(0).unwrap();
+
+// create your records vector to read into
+let mut chunks: Vec<ACompleteRecord> = Vec::new();
+
+// The derived `RecordReader` takes over here
+chunks.read_from_row_group(&mut *row_group, 1).unwrap();
+```
+
 ## Features
 
 - [x] Support writing `String`, `&str`, `bool`, `i32`, `f32`, `f64`, `Vec<u8>`
 - [ ] Support writing dictionaries
 - [x] Support writing logical types like timestamp
-- [x] Derive definition_levels for `Option`
-- [ ] Derive definition levels for nested structures
+- [x] Derive definition_levels for `Option` for writing
+- [ ] Derive definition levels for nested structures for writing
 - [ ] Derive writing tuple struct
 - [ ] Derive writing `tuple` container types
 
+- [x] Support reading `String`, `&str`, `bool`, `i32`, `f32`, `f64`, `Vec<u8>`
+- [ ] Support reading/writing dictionaries
+- [x] Support reading/writing logical types like timestamp
+- [ ] Handle definition_levels for `Option` for reading
+- [ ] Handle definition levels for nested structures for reading
+- [ ] Derive reading/writing tuple struct
+- [ ] Derive reading/writing `tuple` container types
+
 ## Requirements
 
 - Same as `parquet-rs`
@@ -103,4 +142,4 @@ To compile and view in the browser, run `cargo doc --no-deps --open`.
 
 ## License
 
-Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0.
+Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0.
\ No newline at end of file
diff --git a/parquet_derive/src/lib.rs b/parquet_derive/src/lib.rs
index c6641cd8091d..671a46db0f31 100644
--- a/parquet_derive/src/lib.rs
+++ b/parquet_derive/src/lib.rs
@@ -44,7 +44,7 @@ mod parquet_field;
 /// use parquet::file::writer::SerializedFileWriter;
 ///
 /// use std::sync::Arc;
-//
+///
 /// #[derive(ParquetRecordWriter)]
 /// struct ACompleteRecord<'a> {
 ///   pub a_bool: bool,
@@ -137,3 +137,89 @@ pub fn parquet_record_writer(input: proc_macro::TokenStream) -> proc_macro::Toke
     }
   }).into()
 }
+
+/// Derive flat, simple RecordReader implementations. Works by parsing
+/// a struct tagged with `#[derive(ParquetRecordReader)]` and emitting
+/// the correct writing code for each field of the struct. Column readers
+/// are generated in the order they are defined.
+///
+/// It is up to the programmer to keep the order of the struct
+/// fields lined up with the schema.
+///
+/// Example:
+///
+/// ```ignore
+/// use parquet::file::{serialized_reader::SerializedFileReader, reader::FileReader};
+/// use parquet_derive::{ParquetRecordReader};
+///
+/// #[derive(ParquetRecordReader)]
+/// struct ACompleteRecord {
+///     pub a_bool: bool,
+///     pub a_string: String,
+/// }
+///
+/// pub fn read_some_records() -> Vec<ACompleteRecord> {
+///   let mut samples: Vec<ACompleteRecord> = Vec::new();
+///
+///   let reader = SerializedFileReader::new(file).unwrap();
+///   let mut row_group = reader.get_row_group(0).unwrap();
+///   samples.read_from_row_group(&mut *row_group, 1).unwrap();
+///   samples
+/// }
+/// ```
+///
+#[proc_macro_derive(ParquetRecordReader)]
+pub fn parquet_record_reader(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
+    let input: DeriveInput = parse_macro_input!(input as DeriveInput);
+    let fields = match input.data {
+        Data::Struct(DataStruct { fields, .. }) => fields,
+        Data::Enum(_) => unimplemented!("Enum currently is not supported"),
+        Data::Union(_) => unimplemented!("Union currently is not supported"),
+    };
+
+    let field_infos: Vec<_> = fields.iter().map(parquet_field::Field::from).collect();
+    let field_names: Vec<_> = fields.iter().map(|f| f.ident.clone()).collect();
+    let reader_snippets: Vec<proc_macro2::TokenStream> =
+        field_infos.iter().map(|x| x.reader_snippet()).collect();
+    let i: Vec<_> = (0..reader_snippets.len()).collect();
+
+    let derived_for = input.ident;
+    let generics = input.generics;
+
+    (quote! {
+
+    impl #generics ::parquet::record::RecordReader<#derived_for #generics> for Vec<#derived_for #generics> {
+      fn read_from_row_group(
+        &mut self,
+        row_group_reader: &mut dyn ::parquet::file::reader::RowGroupReader,
+        num_records: usize,
+      ) -> Result<(), ::parquet::errors::ParquetError> {
+        use ::parquet::column::reader::ColumnReader;
+
+        let mut row_group_reader = row_group_reader;
+
+        for _ in 0..num_records {
+          self.push(#derived_for {
+            #(
+              #field_names: Default::default()
+            ),*
+          })
+        }
+
+        let records = self; // Used by all the reader snippets to be more clear
+
+        #(
+          {
+              if let Ok(mut column_reader) = row_group_reader.get_column_reader(#i) {
+                  #reader_snippets
+              } else {
+                  return Err(::parquet::errors::ParquetError::General("Failed to get next column".into()))
+              }
+          }
+        );*
+
+        Ok(())
+      }
+    }
+  }).into()
+}
diff --git a/parquet_derive/src/parquet_field.rs b/parquet_derive/src/parquet_field.rs
index ea6878283a33..bb33b3196855 100644
--- a/parquet_derive/src/parquet_field.rs
+++ b/parquet_derive/src/parquet_field.rs
@@ -165,9 +165,7 @@ impl Field {
                         | Type::Slice(ref third_type) => match **third_type {
                             Type::TypePath(_) => Some(self.optional_definition_levels()),
                             Type::Reference(_, ref fourth_type) => match **fourth_type {
-                                Type::TypePath(_) => {
-                                    Some(self.optional_definition_levels())
-                                }
+                                Type::TypePath(_) => Some(self.optional_definition_levels()),
                                 _ => unimplemented!("Unsupported definition encountered"),
                             },
                             _ => unimplemented!("Unsupported definition encountered"),
@@ -175,9 +173,7 @@ impl Field {
                         Type::Reference(_, ref third_type) => match **third_type {
                             Type::TypePath(_) => Some(self.optional_definition_levels()),
                             Type::Slice(ref fourth_type) => match **fourth_type {
-                                Type::TypePath(_) => {
-                                    Some(self.optional_definition_levels())
-                                }
+                                Type::TypePath(_) => Some(self.optional_definition_levels()),
                                 _ => unimplemented!("Unsupported definition encountered"),
                             },
                             _ => unimplemented!("Unsupported definition encountered"),
@@ -223,6 +219,72 @@ impl Field {
         }
     }
 
+    /// Takes the parsed field of the struct and emits a valid
+    /// column reader snippet. Should match exactly what you
+    /// would write by hand.
+    ///
+    /// Can only generate writers for basic structs, for example:
+    ///
+    /// struct Record {
+    ///   a_bool: bool
+    /// }
+    ///
+    /// but not
+    ///
+    /// struct UnsupportedNestedRecord {
+    ///   a_property: bool,
+    ///   nested_record: Record
+    /// }
+    ///
+    /// because this parsing logic is not sophisticated enough for definition
+    /// levels beyond 2.
+    ///
+    /// `Option` types and references not supported
+    pub fn reader_snippet(&self) -> proc_macro2::TokenStream {
+        let ident = &self.ident;
+        let column_reader = self.ty.column_reader();
+        let parquet_type = self.ty.physical_type_as_rust();
+
+        // generate the code to read the column into a vector `vals`
+        let write_batch_expr = quote! {
+            let mut vals_vec = Vec::new();
+            vals_vec.resize(num_records, Default::default());
+            let mut vals: &mut [#parquet_type] = vals_vec.as_mut_slice();
+            if let #column_reader(mut typed) = column_reader {
+                typed.read_records(num_records, None, None, vals)?;
+            } else {
+                panic!("Schema and struct disagree on type for {}", stringify!{#ident});
+            }
+        };
+
+        // generate the code to convert each element of `vals` to the correct type and then write
+        // it to its field in the corresponding struct
+        let vals_writer = match &self.ty {
+            Type::TypePath(_) => self.copied_direct_fields(),
+            Type::Reference(_, ref first_type) => match **first_type {
+                Type::TypePath(_) => self.copied_direct_fields(),
+                Type::Slice(ref second_type) => match **second_type {
+                    Type::TypePath(_) => self.copied_direct_fields(),
+                    ref f => unimplemented!("Unsupported: {:#?}", f),
+                },
+                ref f => unimplemented!("Unsupported: {:#?}", f),
+            },
+            Type::Vec(ref first_type) => match **first_type {
+                Type::TypePath(_) => self.copied_direct_fields(),
+                ref f => unimplemented!("Unsupported: {:#?}", f),
+            },
+            f => unimplemented!("Unsupported: {:#?}", f),
+        };
+
+        quote! {
+            {
+                #write_batch_expr
+
+                #vals_writer
+            }
+        }
+    }
+
     pub fn parquet_type(&self) -> proc_macro2::TokenStream {
         // TODO: Support group types
         // TODO: Add length if dealing with fixedlenbinary
@@ -281,8 +343,7 @@ impl Field {
     fn option_into_vals(&self) -> proc_macro2::TokenStream {
         let field_name = &self.ident;
         let is_a_byte_buf = self.is_a_byte_buf;
-        let is_a_timestamp =
-            self.third_party_type == Some(ThirdPartyType::ChronoNaiveDateTime);
+        let is_a_timestamp = self.third_party_type == Some(ThirdPartyType::ChronoNaiveDateTime);
         let is_a_date = self.third_party_type == Some(ThirdPartyType::ChronoNaiveDate);
         let is_a_uuid = self.third_party_type == Some(ThirdPartyType::Uuid);
         let copy_to_vec = !matches!(
@@ -324,28 +385,31 @@ impl Field {
         }
     }
 
+    // generates code to read `field_name` from each record into a vector `vals`
     fn copied_direct_vals(&self) -> proc_macro2::TokenStream {
         let field_name = &self.ident;
-        let is_a_byte_buf = self.is_a_byte_buf;
-        let is_a_timestamp =
-            self.third_party_type == Some(ThirdPartyType::ChronoNaiveDateTime);
-        let is_a_date = self.third_party_type == Some(ThirdPartyType::ChronoNaiveDate);
-        let is_a_uuid = self.third_party_type == Some(ThirdPartyType::Uuid);
 
-        let access = if is_a_timestamp {
-            quote! { rec.#field_name.timestamp_millis() }
-        } else if is_a_date {
-            quote! { rec.#field_name.signed_duration_since(::chrono::NaiveDate::from_ymd(1970, 1, 1)).num_days() as i32 }
-        } else if is_a_uuid {
-            quote! { (&rec.#field_name.to_string()[..]).into() }
-        } else if is_a_byte_buf {
-            quote! { (&rec.#field_name[..]).into() }
-        } else {
-            // Type might need converting to a physical type
-            match self.ty.physical_type() {
-                parquet::basic::Type::INT32 => quote! { rec.#field_name as i32 },
-                parquet::basic::Type::INT64 => quote! { rec.#field_name as i64 },
-                _ => quote! { rec.#field_name },
+        let access = match self.third_party_type {
+            Some(ThirdPartyType::ChronoNaiveDateTime) => {
+                quote! { rec.#field_name.timestamp_millis() }
+            }
+            Some(ThirdPartyType::ChronoNaiveDate) => {
+                quote! { rec.#field_name.signed_duration_since(::chrono::NaiveDate::from_ymd(1970, 1, 1)).num_days() as i32 }
+            }
+            Some(ThirdPartyType::Uuid) => {
+                quote! { (&rec.#field_name.to_string()[..]).into() }
+            }
+            _ => {
+                if self.is_a_byte_buf {
+                    quote! { (&rec.#field_name[..]).into() }
+                } else {
+                    // Type might need converting to a physical type
+                    match self.ty.physical_type() {
+                        parquet::basic::Type::INT32 => quote! { rec.#field_name as i32 },
+                        parquet::basic::Type::INT64 => quote! { rec.#field_name as i64 },
+                        _ => quote! { rec.#field_name },
+                    }
+                }
             }
         };
 
@@ -354,6 +418,44 @@ impl Field {
         }
     }
 
+    // generates code to read a vector `records` into `field_name` for each record
+    fn copied_direct_fields(&self) -> proc_macro2::TokenStream {
+        let field_name = &self.ident;
+
+        let value = match self.third_party_type {
+            Some(ThirdPartyType::ChronoNaiveDateTime) => {
+                quote! { ::chrono::naive::NaiveDateTime::from_timestamp_millis(vals[i]).unwrap() }
+            }
+            Some(ThirdPartyType::ChronoNaiveDate) => {
+                // NaiveDateTime::UNIX_EPOCH.num_days_from_ce() == 719163
+                quote! {
+                    ::chrono::naive::NaiveDate::from_num_days_from_ce_opt(vals[i].saturating_add(719163)).unwrap()
+                }
+            }
+            Some(ThirdPartyType::Uuid) => {
+                quote! { ::uuid::Uuid::parse_str(vals[i].data().convert()).unwrap() }
+            }
+            _ => match &self.ty {
+                Type::TypePath(_) => match self.ty.last_part().as_str() {
+                    "String" => quote! { String::from(std::str::from_utf8(vals[i].data())
+                    .expect("invalid UTF-8 sequence")) },
+                    t => {
+                        let s: proc_macro2::TokenStream = t.parse().unwrap();
+                        quote! { vals[i] as #s }
+                    }
+                },
+                Type::Vec(_) => quote! { vals[i].data().to_vec() },
+                f => unimplemented!("Unsupported: {:#?}", f),
+            },
+        };
+
+        quote! {
+            for (i, r) in &mut records[..num_records].iter_mut().enumerate() {
+                r.#field_name = #value;
+            }
+        }
+    }
+
     fn optional_definition_levels(&self) -> proc_macro2::TokenStream {
         let field_name = &self.ident;
 
@@ -402,6 +504,29 @@ impl Type {
         }
     }
 
+    /// Takes a rust type and returns the appropriate
+    /// parquet-rs column reader
+    fn column_reader(&self) -> syn::TypePath {
+        use parquet::basic::Type as BasicType;
+
+        match self.physical_type() {
+            BasicType::BOOLEAN => {
+                syn::parse_quote!(ColumnReader::BoolColumnReader)
+            }
+            BasicType::INT32 => syn::parse_quote!(ColumnReader::Int32ColumnReader),
+            BasicType::INT64 => syn::parse_quote!(ColumnReader::Int64ColumnReader),
+            BasicType::INT96 => syn::parse_quote!(ColumnReader::Int96ColumnReader),
+            BasicType::FLOAT => syn::parse_quote!(ColumnReader::FloatColumnReader),
+            BasicType::DOUBLE => syn::parse_quote!(ColumnReader::DoubleColumnReader),
+            BasicType::BYTE_ARRAY => {
+                syn::parse_quote!(ColumnReader::ByteArrayColumnReader)
+            }
+            BasicType::FIXED_LEN_BYTE_ARRAY => {
+                syn::parse_quote!(ColumnReader::FixedLenByteArrayColumnReader)
+            }
+        }
+    }
+
     /// Helper to simplify a nested field definition to its leaf type
     ///
     /// Ex:
@@ -415,10 +540,7 @@ impl Type {
         Type::leaf_type_recursive_helper(self, None)
     }
 
-    fn leaf_type_recursive_helper<'a>(
-        ty: &'a Type,
-        parent_ty: Option<&'a Type>,
-    ) -> &'a Type {
+    fn leaf_type_recursive_helper<'a>(ty: &'a Type, parent_ty: Option<&'a Type>) -> &'a Type {
         match ty {
             Type::TypePath(_) => parent_ty.unwrap_or(ty),
             Type::Option(ref first_type)
@@ -524,6 +646,23 @@ impl Type {
         }
     }
 
+    fn physical_type_as_rust(&self) -> proc_macro2::TokenStream {
+        use parquet::basic::Type as BasicType;
+
+        match self.physical_type() {
+            BasicType::BOOLEAN => quote! { bool },
+            BasicType::INT32 => quote! { i32 },
+            BasicType::INT64 => quote! { i64 },
+            BasicType::INT96 => unimplemented!("96-bit int currently is not supported"),
+            BasicType::FLOAT => quote! { f32 },
+            BasicType::DOUBLE => quote! { f64 },
+            BasicType::BYTE_ARRAY => quote! { ::parquet::data_type::ByteArray },
+            BasicType::FIXED_LEN_BYTE_ARRAY => {
+                quote! { ::parquet::data_type::FixedLenByteArray }
+            }
+        }
+    }
+
     fn logical_type(&self) -> proc_macro2::TokenStream {
         let last_part = self.last_part();
         let leaf_type = self.leaf_type_recursive();
@@ -598,9 +737,7 @@ impl Type {
         let last_part = self.last_part();
 
         match last_part.trim() {
-            "NaiveDateTime" => {
-                Some(quote! { ::parquet::basic::ConvertedType::TIMESTAMP_MILLIS })
-            }
+            "NaiveDateTime" => Some(quote! { ::parquet::basic::ConvertedType::TIMESTAMP_MILLIS }),
             _ => None,
         }
     }
@@ -636,10 +773,9 @@ impl Type {
     fn from_type_path(f: &syn::Field, p: &syn::TypePath) -> Self {
         let last_segment = p.path.segments.last().unwrap();
 
-        let is_vec =
-            last_segment.ident == syn::Ident::new("Vec", proc_macro2::Span::call_site());
-        let is_option = last_segment.ident
-            == syn::Ident::new("Option", proc_macro2::Span::call_site());
+        let is_vec = last_segment.ident == syn::Ident::new("Vec", proc_macro2::Span::call_site());
+        let is_option =
+            last_segment.ident == syn::Ident::new("Option", proc_macro2::Span::call_site());
 
         if is_vec || is_option {
             let generic_type = match &last_segment.arguments {
@@ -725,6 +861,39 @@ mod test {
         )
     }
 
+    #[test]
+    fn test_generating_a_simple_reader_snippet() {
+        let snippet: proc_macro2::TokenStream = quote! {
+          struct ABoringStruct {
+            counter: usize,
+          }
+        };
+
+        let fields = extract_fields(snippet);
+        let counter = Field::from(&fields[0]);
+
+        let snippet = counter.reader_snippet().to_string();
+        assert_eq!(
+            snippet,
+            (quote! {
+                 {
+                     let mut vals_vec = Vec::new();
+                     vals_vec.resize(num_records, Default::default());
+                     let mut vals: &mut[i64] = vals_vec.as_mut_slice();
+                     if let ColumnReader::Int64ColumnReader(mut typed) = column_reader {
+                         typed.read_records(num_records, None, None, vals)?;
+                     } else {
+                         panic!("Schema and struct disagree on type for {}", stringify!{ counter });
+                     }
+                     for (i, r) in &mut records[..num_records].iter_mut().enumerate() {
+                         r.counter = vals[i] as usize;
+                     }
+                 }
+            })
+            .to_string()
+        )
+    }
+
     #[test]
     fn test_optional_to_writer_snippet() {
         let struct_def: proc_macro2::TokenStream = quote! {
@@ -834,6 +1003,32 @@ mod test {
         );
     }
 
+    #[test]
+    fn test_converting_to_column_reader_type() {
+        let snippet: proc_macro2::TokenStream = quote! {
+          struct ABasicStruct {
+            yes_no: bool,
+            name: String,
+          }
+        };
+
+        let fields = extract_fields(snippet);
+        let processed: Vec<_> = fields.iter().map(Field::from).collect();
+
+        let column_readers: Vec<_> = processed
+            .iter()
+            .map(|field| field.ty.column_reader())
+            .collect();
+
+        assert_eq!(
+            column_readers,
+            vec![
+                syn::parse_quote!(ColumnReader::BoolColumnReader),
+                syn::parse_quote!(ColumnReader::ByteArrayColumnReader)
+            ]
+        );
+    }
+
     #[test]
     fn convert_basic_struct() {
         let snippet: proc_macro2::TokenStream = quote! {
@@ -1007,7 +1202,7 @@ mod test {
     }
 
     #[test]
-    fn test_chrono_timestamp_millis() {
+    fn test_chrono_timestamp_millis_write() {
         let snippet: proc_macro2::TokenStream = quote! {
           struct ATimestampStruct {
             henceforth: chrono::NaiveDateTime,
@@ -1050,7 +1245,34 @@ mod test {
     }
 
     #[test]
-    fn test_chrono_date() {
+    fn test_chrono_timestamp_millis_read() {
+        let snippet: proc_macro2::TokenStream = quote! {
+          struct ATimestampStruct {
+            henceforth: chrono::NaiveDateTime,
+          }
+        };
+
+        let fields = extract_fields(snippet);
+        let when = Field::from(&fields[0]);
+        assert_eq!(when.reader_snippet().to_string(),(quote!{
+            {
+                let mut vals_vec = Vec::new();
+                vals_vec.resize(num_records, Default::default());
+                let mut vals: &mut[i64] = vals_vec.as_mut_slice();
+                if let ColumnReader::Int64ColumnReader(mut typed) = column_reader {
+                    typed.read_records(num_records, None, None, vals)?;
+                } else {
+                    panic!("Schema and struct disagree on type for {}", stringify!{ henceforth });
+                }
+                for (i, r) in &mut records[..num_records].iter_mut().enumerate() {
+                    r.henceforth = ::chrono::naive::NaiveDateTime::from_timestamp_millis(vals[i]).unwrap();
+                }
+            }
+        }).to_string());
+    }
+
+    #[test]
+    fn test_chrono_date_write() {
         let snippet: proc_macro2::TokenStream = quote! {
           struct ATimestampStruct {
             henceforth: chrono::NaiveDate,
@@ -1093,7 +1315,34 @@ mod test {
     }
 
     #[test]
-    fn test_uuid() {
+    fn test_chrono_date_read() {
+        let snippet: proc_macro2::TokenStream = quote! {
+          struct ATimestampStruct {
+            henceforth: chrono::NaiveDate,
+          }
+        };
+
+        let fields = extract_fields(snippet);
+        let when = Field::from(&fields[0]);
+        assert_eq!(when.reader_snippet().to_string(),(quote!{
+            {
+                let mut vals_vec = Vec::new();
+                vals_vec.resize(num_records, Default::default());
+                let mut vals: &mut [i32] = vals_vec.as_mut_slice();
+                if let ColumnReader::Int32ColumnReader(mut typed) = column_reader {
+                    typed.read_records(num_records, None, None, vals)?;
+                } else {
+                    panic!("Schema and struct disagree on type for {}", stringify!{ henceforth });
+                }
+                for (i, r) in &mut records[..num_records].iter_mut().enumerate() {
+                    r.henceforth = ::chrono::naive::NaiveDate::from_num_days_from_ce_opt(vals[i].saturating_add(719163)).unwrap();
+                }
+            }
+        }).to_string());
+    }
+
+    #[test]
+    fn test_uuid_write() {
         let snippet: proc_macro2::TokenStream = quote! {
           struct AUuidStruct {
             unique_id: uuid::Uuid,
@@ -1135,6 +1384,33 @@ mod test {
         }).to_string());
     }
 
+    #[test]
+    fn test_uuid_read() {
+        let snippet: proc_macro2::TokenStream = quote! {
+          struct AUuidStruct {
+            unique_id: uuid::Uuid,
+          }
+        };
+
+        let fields = extract_fields(snippet);
+        let when = Field::from(&fields[0]);
+        assert_eq!(when.reader_snippet().to_string(),(quote!{
+            {
+                let mut vals_vec = Vec::new();
+                vals_vec.resize(num_records, Default::default());
+                let mut vals: &mut [::parquet::data_type::ByteArray] = vals_vec.as_mut_slice();
+                if let ColumnReader::ByteArrayColumnReader(mut typed) = column_reader {
+                    typed.read_records(num_records, None, None, vals)?;
+                } else {
+                    panic!("Schema and struct disagree on type for {}", stringify!{ unique_id });
+                }
+                for (i, r) in &mut records[..num_records].iter_mut().enumerate() {
+                    r.unique_id = ::uuid::Uuid::parse_str(vals[i].data().convert()).unwrap();
+                }
+            }
+        }).to_string());
+    }
+
     #[test]
     fn test_converted_type() {
         let snippet: proc_macro2::TokenStream = quote! {
diff --git a/parquet_derive_test/Cargo.toml b/parquet_derive_test/Cargo.toml
index be24db85a109..a5d2e76d4503 100644
--- a/parquet_derive_test/Cargo.toml
+++ b/parquet_derive_test/Cargo.toml
@@ -31,4 +31,4 @@ rust-version = { workspace = true }
 [dependencies]
 parquet = { workspace = true }
 parquet_derive = { path = "../parquet_derive", default-features = false }
-chrono = { version="0.4.23", default-features = false, features = [ "clock" ] }
+chrono = { workspace = true }
diff --git a/parquet_derive_test/src/lib.rs b/parquet_derive_test/src/lib.rs
index f4f8be1e0d8c..a8b631ecc024 100644
--- a/parquet_derive_test/src/lib.rs
+++ b/parquet_derive_test/src/lib.rs
@@ -17,7 +17,7 @@
 
 #![allow(clippy::approx_constant)]
 
-use parquet_derive::ParquetRecordWriter;
+use parquet_derive::{ParquetRecordReader, ParquetRecordWriter};
 
 #[derive(ParquetRecordWriter)]
 struct ACompleteRecord<'a> {
@@ -49,6 +49,21 @@ struct ACompleteRecord<'a> {
     pub borrowed_maybe_borrowed_byte_vec: &'a Option<&'a [u8]>,
 }
 
+#[derive(PartialEq, ParquetRecordWriter, ParquetRecordReader, Debug)]
+struct APartiallyCompleteRecord {
+    pub bool: bool,
+    pub string: String,
+    pub i16: i16,
+    pub i32: i32,
+    pub u64: u64,
+    pub isize: isize,
+    pub float: f32,
+    pub double: f64,
+    pub now: chrono::NaiveDateTime,
+    pub date: chrono::NaiveDate,
+    pub byte_vec: Vec<u8>,
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -56,7 +71,8 @@ mod tests {
     use std::{env, fs, io::Write, sync::Arc};
 
     use parquet::{
-        file::writer::SerializedFileWriter, record::RecordWriter,
+        file::writer::SerializedFileWriter,
+        record::{RecordReader, RecordWriter},
         schema::parser::parse_message_type,
     };
 
@@ -138,14 +154,63 @@ mod tests {
 
         assert_eq!(&schema, &generated_schema);
 
+        let props = Default::default();
+        let mut writer = SerializedFileWriter::new(file, generated_schema, props).unwrap();
+
+        let mut row_group = writer.next_row_group().unwrap();
+        drs.as_slice().write_to_row_group(&mut row_group).unwrap();
+        row_group.close().unwrap();
+        writer.close().unwrap();
+    }
+
+    #[test]
+    fn test_parquet_derive_read_write_combined() {
+        let file = get_temp_file("test_parquet_derive_combined", &[]);
+
+        let mut drs: Vec<APartiallyCompleteRecord> = vec![APartiallyCompleteRecord {
+            bool: true,
+            string: "a string".into(),
+            i16: -45,
+            i32: 456,
+            u64: 4563424,
+            isize: -365,
+            float: 3.5,
+            double: std::f64::NAN,
+            now: chrono::Utc::now().naive_local(),
+            date: chrono::naive::NaiveDate::from_ymd_opt(2015, 3, 14).unwrap(),
+            byte_vec: vec![0x65, 0x66, 0x67],
+        }];
+
+        let mut out: Vec<APartiallyCompleteRecord> = Vec::new();
+
+        use parquet::file::{reader::FileReader, serialized_reader::SerializedFileReader};
+
+        let generated_schema = drs.as_slice().schema().unwrap();
+
         let props = Default::default();
         let mut writer =
-            SerializedFileWriter::new(file, generated_schema, props).unwrap();
+            SerializedFileWriter::new(file.try_clone().unwrap(), generated_schema, props).unwrap();
 
         let mut row_group = writer.next_row_group().unwrap();
         drs.as_slice().write_to_row_group(&mut row_group).unwrap();
         row_group.close().unwrap();
         writer.close().unwrap();
+
+        let reader = SerializedFileReader::new(file).unwrap();
+
+        let mut row_group = reader.get_row_group(0).unwrap();
+        out.read_from_row_group(&mut *row_group, 1).unwrap();
+
+        // correct for rounding error when writing milliseconds
+        drs[0].now =
+            chrono::naive::NaiveDateTime::from_timestamp_millis(drs[0].now.timestamp_millis())
+                .unwrap();
+
+        assert!(out[0].double.is_nan()); // these three lines are necessary because NAN != NAN
+        out[0].double = 0.;
+        drs[0].double = 0.;
+
+        assert_eq!(drs[0], out[0]);
     }
 
     /// Returns file handle for a temp file in 'target' directory with a provided content
diff --git a/pre-commit.sh b/pre-commit.sh
index 5ce080793242..f82390e229a9 100755
--- a/pre-commit.sh
+++ b/pre-commit.sh
@@ -20,7 +20,7 @@
 # This file is git pre-commit hook.
 #
 # Soft link it as git hook under top dir of apache arrow git repository:
-# $ ln -s  ../../rust/pre-commit.sh .git/hooks/pre-commit
+# $ ln -s  ../../pre-commit.sh .git/hooks/pre-commit
 #
 # This file be run directly:
 # $ ./pre-commit.sh
@@ -37,14 +37,12 @@ function BYELLOW() {
 	echo "\033[1;33m$@\033[0m"
 }
 
-RUST_DIR="rust"
-
 # env GIT_DIR is set by git when run a pre-commit hook.
 if [ -z "${GIT_DIR}" ]; then
 	GIT_DIR=$(git rev-parse --show-toplevel)
 fi
 
-cd ${GIT_DIR}/${RUST_DIR}
+cd ${GIT_DIR}
 
 NUM_CHANGES=$(git diff --cached --name-only . |
 	grep -e ".*/*.rs$" |
diff --git a/rustfmt.toml b/rustfmt.toml
index 4522e520a469..585c1b612978 100644
--- a/rustfmt.toml
+++ b/rustfmt.toml
@@ -16,9 +16,3 @@
 # under the License.
 
 edition = "2021"
-max_width = 90
-
-# ignore generated files
-# ignore = [
-#    "arrow/src/ipc/gen",
-#]