From d9498a00dd1d3f0a5468b4b7c7da84750a7636f1 Mon Sep 17 00:00:00 2001 From: Brian Olsen Date: Fri, 5 Jan 2024 00:54:46 -0600 Subject: [PATCH] Shift site build to use monorepo and gh-pages --- .github/workflows/flink-ci.yml | 1 + .github/workflows/hive-ci.yml | 1 + .github/workflows/java-ci.yml | 1 + .../workflows/site-ci.yml | 31 ++- .github/workflows/spark-ci.yml | 3 +- .gitignore | 5 +- site/.gitignore | 119 ---------- site/Makefile | 34 +++ site/README.md | 142 ++++++------ site/dev/build.sh | 23 ++ site/dev/clean.sh | 22 ++ site/dev/common.sh | 216 ++++++++++++++++++ site/dev/deploy.sh | 24 ++ site/dev/serve.sh | 23 ++ site/dev/setup_env.sh | 26 +++ site/docs/blogs.md | 14 ++ site/docs/community.md | 4 +- site/docs/hive-quickstart.md | 2 +- site/docs/how-to-release.md | 5 +- site/docs/multi-engine-support.md | 4 +- site/docs/releases.md | 142 +++++++++++- site/docs/roadmap.md | 53 +++-- site/docs/spec.md | 38 ++- site/docs/vendors.md | 9 + site/docs/view-spec.md | 72 +++--- site/mkdocs.yml | 69 +++--- site/nav.yml | 48 ++++ site/requirements.txt | 2 +- 28 files changed, 812 insertions(+), 321 deletions(-) rename site/variables.yml => .github/workflows/site-ci.yml (67%) delete mode 100644 site/.gitignore create mode 100755 site/Makefile create mode 100755 site/dev/build.sh create mode 100755 site/dev/clean.sh create mode 100755 site/dev/common.sh create mode 100755 site/dev/deploy.sh create mode 100755 site/dev/serve.sh create mode 100755 site/dev/setup_env.sh create mode 100644 site/nav.yml diff --git a/.github/workflows/flink-ci.yml b/.github/workflows/flink-ci.yml index 702ae9bc898d..4ea046550593 100644 --- a/.github/workflows/flink-ci.yml +++ b/.github/workflows/flink-ci.yml @@ -40,6 +40,7 @@ on: - 'spark/**' - 'pig/**' - 'docs/**' + - 'site/**' - 'open-api/**' - 'format/**' - '.gitattributes' diff --git a/.github/workflows/hive-ci.yml b/.github/workflows/hive-ci.yml index f582e516fcd1..0d8b62137b32 100644 --- a/.github/workflows/hive-ci.yml +++ b/.github/workflows/hive-ci.yml @@ -38,6 +38,7 @@ on: - 'flink/**' - 'pig/**' - 'docs/**' + - 'site/**' - 'open-api/**' - 'format/**' - '.gitattributes' diff --git a/.github/workflows/java-ci.yml b/.github/workflows/java-ci.yml index 4936e2b6514b..9e5ace5c8410 100644 --- a/.github/workflows/java-ci.yml +++ b/.github/workflows/java-ci.yml @@ -35,6 +35,7 @@ on: - '.asf.yml' - 'dev/**' - 'docs/**' + - 'site/**' - 'open-api/**' - 'format/**' - '.gitattributes' diff --git a/site/variables.yml b/.github/workflows/site-ci.yml similarity index 67% rename from site/variables.yml rename to .github/workflows/site-ci.yml index 4c1011ce2bbc..95a1fb3b94e0 100644 --- a/site/variables.yml +++ b/.github/workflows/site-ci.yml @@ -1,3 +1,4 @@ +# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information @@ -14,13 +15,23 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - -extra: - icebergVersion: 1.4.0 - social: - - icon: fontawesome/brands/github-alt - link: https://github.com/apache/iceberg - - icon: fontawesome/brands/youtube - link: https://www.youtube.com/@ApacheIceberg - - icon: fontawesome/brands/slack - link: https://join.slack.com/t/apache-iceberg/shared_invite/zt-1znkcg5zm-7_FE~pcox347XwZE3GNfPg +# +name: site-ci +on: + push: + branches: + - main + paths: + - site/** + workflow_dispatch: +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: 3.x + - name: Deploy Iceberg documentation + run: make deploy + working-directory: ./site diff --git a/.github/workflows/spark-ci.yml b/.github/workflows/spark-ci.yml index c77f95fe7aa3..45e63b6c816c 100644 --- a/.github/workflows/spark-ci.yml +++ b/.github/workflows/spark-ci.yml @@ -33,6 +33,7 @@ on: - '.gitignore' - '.asf.yml' - 'dev/**' + - 'site/**' - 'mr/**' - 'hive3/**' - 'hive3-orc-bundle/**' @@ -141,4 +142,4 @@ jobs: with: name: test logs path: | - **/build/testlogs \ No newline at end of file + **/build/testlogs diff --git a/.gitignore b/.gitignore index 23febc6ccf6b..d9848cab06d3 100644 --- a/.gitignore +++ b/.gitignore @@ -24,7 +24,9 @@ gradle/wrapper/gradle-wrapper.jar lib/ # web site build -site/site +site/site/ +#site/docs/docs/ +site/docs/javadoc/ # benchmark output spark/v3.3/spark/benchmark/* @@ -62,4 +64,3 @@ metastore_db/ # Spark/metastore files spark-warehouse/ derby.log - diff --git a/site/.gitignore b/site/.gitignore deleted file mode 100644 index cc9d8b1ced10..000000000000 --- a/site/.gitignore +++ /dev/null @@ -1,119 +0,0 @@ -## Temp remove for first phase -.github/ - -## MkDocs -/site/ - -## Vale -.github/vale/ -.vale.ini - -## MacOS - -# General -.DS_Store -.AppleDouble -.LSOverride - -# Icon must end with two \r -Icon - -# Thumbnails -._* - -# Files that might appear in the root of a volume -.DocumentRevisions-V100 -.fseventsd -.Spotlight-V100 -.TemporaryItems -.Trashes -.VolumeIcon.icns -.com.apple.timemachine.donotpresent - -# Directories potentially created on remote AFP share -.AppleDB -.AppleDesktop -Network Trash Folder -Temporary Items -.apdisk - -## Linux - -*~ - -# temporary files which can be created if a process still has a handle open of a deleted file -.fuse_hidden* - -# KDE directory preferences -.directory - -# Linux trash folder which might appear on any partition or disk -.Trash-* - -# .nfs files are created when an open file is removed but is still being accessed -.nfs* - -## Eclipse - -.metadata -tmp/ -*.tmp -*.bak -*.swp -*~.nib -local.properties -.settings/ -.loadpath -.recommenders - -# External tool builders -.externalToolBuilders/ - -# Locally stored "Eclipse launch configurations" -*.launch - -# PyDev specific (Python IDE for Eclipse) -*.pydevproject - -# CDT-specific (C/C++ Development Tooling) -.cproject - -# CDT- autotools -.autotools - -# Java annotation processor (APT) -.factorypath - -# PDT-specific (PHP Development Tools) -.buildpath - -# sbteclipse plugin -.target - -# Tern plugin -.tern-project - -# TeXlipse plugin -.texlipse - -# STS (Spring Tool Suite) -.springBeans - -# Code Recommenders -.recommenders/ - -# Annotation Processing -.apt_generated/ -.apt_generated_test/ - -# Scala IDE specific (Scala & Java development for Eclipse) -.cache-main -.scala_dependencies -.worksheet - -# Project description file. -# Typically, this file would be tracked if it contains build/dependency configurations: -.project - - - diff --git a/site/Makefile b/site/Makefile new file mode 100755 index 000000000000..ef66118ad5f7 --- /dev/null +++ b/site/Makefile @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.PHONY: help +help: # Show help for each of the Makefile recipes. + @grep -E '^[a-zA-Z0-9 -]+:.*#' Makefile | sort | while read -r l; do printf "\033[1;32m$$(echo $$l | cut -f 1 -d':')\033[00m:$$(echo $$l | cut -f 2- -d'#')\n"; done + +.PHONY: serve +serve: # Clean, build, and run the docs site locally. + dev/serve.sh + +.PHONY: build +build: # Clean and build the docs site locally. + dev/build.sh + +.PHONY: deploy +deploy: # Clean, build, and deploy the Iceberg docs site. + dev/deploy.sh + +.PHONY: clean +clean: # Clean the local docs site. + dev/clean.sh diff --git a/site/README.md b/site/README.md index f78160861dd6..a58b988fccfe 100644 --- a/site/README.md +++ b/site/README.md @@ -27,52 +27,64 @@ This subproject contains the [MkDocs projects](https://www.mkdocs.org/) that def ## Usage -The directory structure in this repository mimics the sitemap hierarchy of the website. This aims to help contributors find the source files needed to make their changes faster. To understand the layout and naming, it is helpful to have some basic understandings of the MkDocs framework defaults. +The directory structure in this repository aims to mimic the sitemap hierarchy of the website. This helps contributors find the source files needed when updating or adding new documentation. It's helpful to have some basic understanding of the MkDocs framework defaults. ### MkDocs background -In MkDocs, the [`docs_dir`](https://www.mkdocs.org/user-guide/configuration/#docs_dir) points to the root directory containing the source markdown files for an MkDocs project. By default, this points to the `docs` directory. When you build MkDocs `mkdocs build`, MkDocs generates the static site in the [`site_dir`](https://www.mkdocs.org/user-guide/configuration/#site_dir) becomes the root of that project for the generated site. +In MkDocs, the [`docs_dir`](https://www.mkdocs.org/user-guide/configuration/#docs_dir) points to the root directory containing the source markdown files for an MkDocs project. By default, this points to directory named `docs` in the same location as the [`mkdocs.yaml` file](https://www.mkdocs.org/user-guide/configuration/#introduction). Use `mkdocs build`is used to build the project. During the build, MkDocs generates the static site in the [`site_dir`](https://www.mkdocs.org/user-guide/configuration/#site_dir) which becomes the root of that project for the generated site. ### Iceberg docs layout -In the Iceberg docs, since the top-level site and versioned docs are contained in the same directory, they all live under the `/site` directory of the main Iceberg repository. The `/site/docs` directory is named this way to follow the [MkDocs convention](https://www.mkdocs.org/user-guide/configuration/#docs_dir), while the `/site/docs/docs` directory is an analog to the "Docs" navigation tab. Under this directory, you'll find the `/site/docs/docs/nightly` directory, which contains the state of the documentation in the local revisions. +The static Iceberg website lives under the `/site` directory, while the versioned documentation lives under the `/docs` of the main Iceberg repository. The `/site/docs` directory is named that way to follow the [MkDocs convention](https://www.mkdocs.org/user-guide/configuration/#docs_dir). The `/docs` directory contains the current state of the versioned documentation with local revisions. Notice that the root `/site` and `/docs` just happened to share the same naming convention as MkDocs but does not correlate to the mkdocs + +The static Iceberg site pages are Markdown files that live at `/site/docs/*.md`. The versioned documentation are Markdown files that live at `/docs/docs/*.md` files. You may ask where the older versions of the docs and javadocs are, which is covered later in the build section. + +``` +. +├── docs (versioned) +│ ├── docs +│ │ ├── assets +│ │ ├── api.md +│ │ ├── ... +│ │ └── table-migration.md +│ └── mkdocs.yml +└── site (non-versioned) + ├── docs + │   ├── about.md + │   ├── ... + │   └── view-spec.md + ├── ... + ├── Makefile + ├── mkdocs.yml + └── requirements.txt +``` +### Building the versioned docs + +The Iceberg versioned docs are committed in the [orphan `docs` branch](https://github.com/apache/iceberg/tree/docs) and mounted using [git worktree](https://git-scm.com/docs/git-worktree) at build time. The `docs` branch contains the versioned documenation source files at the root. These versions are mounted at the `/site/docs/docs/` directory at build time. The `latest` version, is a soft link to the most recent [semver version](https://semver.org/) in the `docs` branch. There is also an [orphan `javadoc` branch](https://github.com/apache/iceberg/tree/javadoc) that contains prior staticly generated versions of the javadocs mounted at `/site/docs/javadoc/` during build time. + +The docs are built, run, and released using [make](https://www.gnu.org/software/make/manual/make.html). The [Makefile](Makefile) and the [common shell script](dev/common.sh) support the following command: -The non-versioned site pages are all the `/site/docs/.*md` files and the docs are the `/site/docs/docs//docs/*.md` files. Notice the location of the `mkdocs.yml`. Looking at this though, you may ask where the older versions and javadocs are. +``` site > make help``` +> [build](dev/build.sh): Clean and build the site locally. +> [clean](dev/clean.sh): Clean the local site. +> [deploy](dev/deploy.sh): Clean, build, and deploy the Iceberg docs site. +> help: Show help for each of the Makefile recipes. +> [release](dev/release.sh): Release the current `/docs` as `ICEBERG_VERSION` (`make release ICEBERG_VERSION=`). +> [serve](dev/serve.sh): Clean, build, and run the site locally. + +To scaffold the versioned docs and build the project, run the `build` recipe. ``` -./site/ -├── docs -│   ├── assets -│   ├── docs -│   │   └── nightly -│   │   ├── docs -│   │   │ ├── assets -│   │   │ ├── api.md -│   │   │ ├── ... -│   │   │ └── table-migration.md -│   │   └── mkdocs.yml (versioned) -│   ├── about.md -│   ├── ... -│   └── view-spec.md -├── README.md -├── mkdocs.yml (non-versioned) -├── requirements.txt -└── variables.yml +make build ``` -### Building the versioned docs - -> [!IMPORTANT] -> This build process is currently missing older versions and the javadoc branches. -> Until these branches are merged, these steps will not work. -All previously versioned docs will be committed in `docs-` branches and mounted using [git worktree](https://git-scm.com/docs/git-worktree) at build time. The worktree will pull these versions in following the `/site/docs/docs/` convention. The `latest` version, will be a secondary copy of the most recent build version in the worktree, but pointing to `/site/docs/docs/latest`. There is also a `javadoc` branch that contains all prior static generation versions of the javadocs in a single tag. +This step will generate the following layout: ``` ./site/ └── docs    ├── docs -    │ ├── nightly -    │ ├── latest +    │ ├── latest (symlink to /site/docs/1.4.0/)    │ ├── 1.4.0    │ ├── 1.3.1    │   └── ... @@ -80,62 +92,41 @@ All previously versioned docs will be committed in `docs-` branches and    ├── latest    ├── 1.4.0    ├── 1.3.1 -       └── ... +      └── ... ``` -### Install - -1. (Optional) Set up venv +To run this, run the `serve` recipe, which runs the `build` recipe and calls `mkdocs serve`. This will run locally at . ``` -python -m venv mkdocs_env -source mkdocs_env/bin/activate +make serve ``` -1. Install required Python libraries +To clear all build files, run `clean`. ``` -pip install -r requirements.txt +make clean ``` -#### Adding additional versioned documentation +#### Offline mode -To build locally with additional docs versions, add them to your working tree. -For now, I'm just adding a single version, and the javadocs directory. +One of the great advantages to the MkDocs material plugin is the [offline feature](https://squidfunk.github.io/mkdocs-material/plugins/offline). You can view the Iceberg docs without the need of a server. To enable OFFLINE builds, add theOFFLINE environment variable to either `build` or `serve` recipes. ``` -git worktree add site/docs/docs/1.4.0 docs-1.4.0 -git worktree add site/docs/javadoc javadoc +make build OFFLINE=true ``` -## Build - -Run the build command in the root directory, and optionally add `--clean` to force MkDocs to clear previously generated pages. - -``` -mkdocs build [--clean] -``` - -## Run - -Start MkDocs server locally to verify the site looks good. - -``` -mkdocs serve -``` +> [!WARNING] +> Building with offline mode disables the [use_directory_urls](https://www.mkdocs.org/user-guide/configuration/#use_directory_urls) setting, ensuring that users can open your documentation directly from the local file system. Do not enable this for releases or deployments. ## Release process -Deploying a version of the docs is a two step process: - 1. ~~Cut a new release from the current branch revision. This creates a new branch `docs-`.~~ - +Deploying the docs is a two step process: + 1. Release a new version by copying the current `/docs` directory to a new version directory in the `docs` branch and a new javadoc build in the `javadoc` branch. ``` - .github/bin/deploy_docs.sh -v 1.4.0 + make release ICEBERG_VERSION=${ICEBERG_VERSION} + ``` + 1. Build and push the generated site to `asf-site`. + ``` + make deploy ``` - - ~~See [deploy_docs.sh](.github/bin/deploy_docs.sh) for more details.~~ - - 1. Make sure to add the new version to the list of versions to pull into git worktree. - 1. Follow the steps in [the build process](#build). - 1. Push the generated site to `gh-pages`. ## Validate Links @@ -147,15 +138,12 @@ As mentioned in the MkDocs section, when you build MkDocs `mkdocs build`, MkDocs ./site/ ├── docs │   ├── docs -│   │  ├── nightly -│   │  │ ├── docs -│   │  │ └── mkdocs.yml -│   │  ├── latest -│   │  │ ├── docs -│   │  │ └── mkdocs.yml -│   │  └── 1.4.0 -│   │  ├── docs -│   │ └── mkdocs.yml +│   │   ├── latest +│   │   │ ├── docs +│   │   │ └── mkdocs.yml +│   │   └── 1.4.0 +│   │   ├── docs +│   │ └── mkdocs.yml │   └─ javadoc │   ├── latest │   └── 1.4.0 diff --git a/site/dev/build.sh b/site/dev/build.sh new file mode 100755 index 000000000000..3b7c3acfaa33 --- /dev/null +++ b/site/dev/build.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +./dev/setup_env.sh + +mkdocs build diff --git a/site/dev/clean.sh b/site/dev/clean.sh new file mode 100755 index 000000000000..588cc4aaedda --- /dev/null +++ b/site/dev/clean.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source dev/common.sh +set -e + +clean diff --git a/site/dev/common.sh b/site/dev/common.sh new file mode 100755 index 000000000000..59d6fcdc2773 --- /dev/null +++ b/site/dev/common.sh @@ -0,0 +1,216 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +REMOTE="iceberg_docs" + +# Ensures the presence of a specified remote repository for documentation. +# If the remote doesn't exist, it adds it using the provided URL. +# Then, it fetches updates from the remote repository. +create_or_update_docs_remote () { + echo " --> create or update docs remote" + + # Check if the remote exists before attempting to add it + git config "remote.${REMOTE}.url" >/dev/null || + git remote add "${REMOTE}" https://github.com/apache/iceberg.git + + # Fetch updates from the remote repository + git fetch "${REMOTE}" +} + + +# Pulls updates from a specified branch of a remote repository. +# Arguments: +# $1: Branch name to pull updates from +pull_remote () { + echo " --> pull remote" + + local BRANCH="$1" + + # Ensure the branch argument is not empty + assert_not_empty "${BRANCH}" + + # Perform a pull from the specified branch of the remote repository + git pull "${REMOTE}" "${BRANCH}" +} + +# Pushes changes from a local branch to a specified branch of a remote repository. +# Arguments: +# $1: Branch name to push changes to +push_remote () { + echo " --> push remote" + + local BRANCH="$1" + + # Ensure the branch argument is not empty + assert_not_empty "${BRANCH}" + + # Push changes to the specified branch of the remote repository + git push "${REMOTE}" "${BRANCH}" +} + +# Installs or upgrades dependencies specified in the 'requirements.txt' file using pip. +install_deps () { + echo " --> install deps" + + # Use pip to install or upgrade dependencies from the 'requirements.txt' file quietly + pip -q install -r requirements.txt --upgrade +} + +# Checks if a provided argument is not empty. If empty, displays an error message and exits with a status code 1. +# Arguments: +# $1: Argument to check for emptiness +assert_not_empty () { + + if [ -z "$1" ]; then + echo "No argument supplied" + + # Exit with an error code if no argument is provided + exit 1 + fi +} + +# Finds and retrieves the latest version of the documentation based on the directory structure. +# Assumes the documentation versions are numeric folders within 'docs/docs/'. +get_latest_version () { + # Find the latest numeric folder within 'docs/docs/' structure + local latest=$(ls -d docs/docs/[0-9]* | sort -V | tail -1) + + # Extract the version number from the latest directory path + local latest_version=$(basename "${latest}") + + # Output the latest version number + echo "${latest_version}" +} + +# Creates a 'latest' version of the documentation based on a specified ICEBERG_VERSION. +# Arguments: +# $1: ICEBERG_VERSION - The version number of the documentation to be treated as the latest. +create_latest () { + echo " --> create latest" + + local ICEBERG_VERSION="$1" + + # Ensure ICEBERG_VERSION is not empty + assert_not_empty "${ICEBERG_VERSION}" + + # Output the provided ICEBERG_VERSION for verification + echo "${ICEBERG_VERSION}" + + # Remove any existing 'latest' directory and recreate it + rm -rf docs/docs/latest/ + mkdir docs/docs/latest/ + + # Create symbolic links and copy configuration files for the 'latest' documentation + ln -s "../${ICEBERG_VERSION}/docs" docs/docs/latest/docs + cp "docs/docs/${ICEBERG_VERSION}/mkdocs.yml" docs/docs/latest/ + + cd docs/docs/ + + # Update version information within the 'latest' documentation + update_version "latest" + cd - +} + +# Updates version information within the mkdocs.yml file for a specified ICEBERG_VERSION. +# Arguments: +# $1: ICEBERG_VERSION - The version number used for updating the mkdocs.yml file. +update_version () { + echo " --> update version" + + local ICEBERG_VERSION="$1" + + # Ensure ICEBERG_VERSION is not empty + assert_not_empty "${ICEBERG_VERSION}" + + # Update version information within the mkdocs.yml file using sed commands + if [ "$(uname)" == "Darwin" ] + then + sed -i '' -E "s/(^site\_name:[[:space:]]+docs\/).*$/\1${ICEBERG_VERSION}/" ${ICEBERG_VERSION}/mkdocs.yml + sed -i '' -E "s/(^[[:space:]]*-[[:space:]]+Javadoc:.*\/javadoc\/).*$/\1${ICEBERG_VERSION}/" ${ICEBERG_VERSION}/mkdocs.yml + elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ] + then + sed -i'' -E "s/(^site_name:[[:space:]]+docs\/)[^[:space:]]+/\1${ICEBERG_VERSION}/" "${ICEBERG_VERSION}/mkdocs.yml" + sed -i'' -E "s/(^[[:space:]]*-[[:space:]]+Javadoc:.*\/javadoc\/).*$/\1${ICEBERG_VERSION}/" "${ICEBERG_VERSION}/mkdocs.yml" + fi + +} + +# Excludes versioned documentation from search indexing by modifying .md files. +# Arguments: +# $1: ICEBERG_VERSION - The version number of the documentation to exclude from search indexing. +search_exclude_versioned_docs () { + echo " --> search exclude version docs" + local ICEBERG_VERSION="$1" + + # Ensure ICEBERG_VERSION is not empty + assert_not_empty "${ICEBERG_VERSION}" + + cd "${ICEBERG_VERSION}/docs/" + + # Modify .md files to exclude versioned documentation from search indexing + python3 -c "import os +for f in filter(lambda x: x.endswith('.md'), os.listdir()): lines = open(f).readlines(); open(f, 'w').writelines(lines[:2] + ['search:\n', ' exclude: true\n'] + lines[2:]);" + + cd - +} + +# Sets up local worktrees for the documentation and performs operations related to different versions. +pull_versioned_docs () { + echo " --> pull versioned docs" + + # Ensure the remote repository for documentation exists and is up-to-date + create_or_update_docs_remote + + rm -r docs/docs + + # Add local worktrees for documentation and javadoc from the remote repository + git worktree add -f docs/docs "${REMOTE}/docs" + git worktree add -f docs/javadoc "${REMOTE}/javadoc" + + # Retrieve the latest version of documentation for processing + local latest_version=$(get_latest_version) + + # Output the latest version for debugging purposes + echo "Latest version is: ${latest_version}" + + # Create the 'latest' version of documentation + create_latest "${latest_version}" +} + +# Cleans up artifacts and temporary files generated during documentation management. +clean () { + echo " --> clean" + + # Temporarily disable script exit on errors to ensure cleanup continues + set +e + + # Remove 'latest' directories and related Git worktrees + rm -rf docs/docs/latest &> /dev/null + git worktree remove docs/docs &> /dev/null + git worktree remove docs/javadoc &> /dev/null + + git restore docs/docs + + # Remove any additional temporary artifacts (e.g., 'site/' directory) + rm -rf site/ &> /dev/null + + set -e # Re-enable script exit on errors +} + diff --git a/site/dev/deploy.sh b/site/dev/deploy.sh new file mode 100755 index 000000000000..c55503d99460 --- /dev/null +++ b/site/dev/deploy.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +./dev/setup_env.sh + +mkdocs gh-deploy --dirty # --remote-branch asf-site + diff --git a/site/dev/serve.sh b/site/dev/serve.sh new file mode 100755 index 000000000000..8901de92ab04 --- /dev/null +++ b/site/dev/serve.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +./dev/setup_env.sh + +mkdocs serve --dirty --watch . diff --git a/site/dev/setup_env.sh b/site/dev/setup_env.sh new file mode 100755 index 000000000000..cd228d1eab3a --- /dev/null +++ b/site/dev/setup_env.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source dev/common.sh +set -e + +clean + +install_deps + +pull_versioned_docs diff --git a/site/docs/blogs.md b/site/docs/blogs.md index a5ef127235c5..17aed7bd785c 100644 --- a/site/docs/blogs.md +++ b/site/docs/blogs.md @@ -22,6 +22,20 @@ title: "Blogs" Here is a list of company blogs that talk about Iceberg. The blogs are ordered from most recent to oldest. +### [Apache Hive-4.x with Iceberg Branches & Tags](https://medium.com/@ayushtkn/apache-hive-4-x-with-iceberg-branches-tags-3d52293ac0bf/) +**Date**: October 12th, 2023, **Company**: Cloudera + +**Authors**: [Ayush Saxena](https://www.linkedin.com/in/ayush151/) + +### [Apache Hive 4.x With Apache Iceberg](https://medium.com/@ayushtkn/apache-hive-4-x-with-apache-iceberg-part-i-355e7a380725/) +**Date**: October 12th, 2023, **Company**: Cloudera + +**Authors**: [Ayush Saxena](https://www.linkedin.com/in/ayush151/) + +### [From Hive Tables to Iceberg Tables: Hassle-Free](https://blog.cloudera.com/from-hive-tables-to-iceberg-tables-hassle-free/) +**Date**: July 14th, 2023, **Company**: Cloudera + +**Authors**: [Srinivas Rishindra Pothireddi](https://www.linkedin.com/in/srinivas-rishindra/) ### [From Hive Tables to Iceberg Tables: Hassle-Free](https://blog.cloudera.com/from-hive-tables-to-iceberg-tables-hassle-free/) **Date**: July 14th, 2023, **Company**: Cloudera diff --git a/site/docs/community.md b/site/docs/community.md index bf5d4449b43e..6d39ce96aad6 100644 --- a/site/docs/community.md +++ b/site/docs/community.md @@ -40,13 +40,13 @@ Issues are tracked in GitHub: ## Slack -We use the [Apache Iceberg workspace](https://apache-iceberg.slack.com/) on Slack. To be invited, follow [this invite link](https://join.slack.com/t/apache-iceberg/shared_invite/zt-1znkcg5zm-7_FE~pcox347XwZE3GNfPg). +We use the [Apache Iceberg workspace](https://apache-iceberg.slack.com/) on Slack. To be invited, follow [this invite link](https://join.slack.com/t/apache-iceberg/shared_invite/zt-287g3akar-K9Oe_En5j1UL7Y_Ikpai3A). Please note that this link may occasionally break when Slack does an upgrade. If you encounter problems using it, please let us know by sending an email to . ## Iceberg Community Events -This calendar contians two calendar feeds: +This calendar contains two calendar feeds: * Iceberg Community Events - Events such as conferences and meetups, aimed to educate and inspire Iceberg users. * Iceberg Dev Events - Events such as the triweekly Iceberg sync, aimed to discuss the project roadmap and how to implement features. diff --git a/site/docs/hive-quickstart.md b/site/docs/hive-quickstart.md index 57cc02157a53..80247525f7d0 100644 --- a/site/docs/hive-quickstart.md +++ b/site/docs/hive-quickstart.md @@ -39,7 +39,7 @@ Take a look at the Tags tab in [Apache Hive docker images](https://hub.docker.co Set the version variable. ```sh -export HIVE_VERSION=4.0.0-alpha-2 +export HIVE_VERSION=4.0.0-beta-1 ``` Start the container, using the option `--platform linux/amd64` for a Mac with an M-Series chip: diff --git a/site/docs/how-to-release.md b/site/docs/how-to-release.md index e2d9ae4ceec3..8a774cc6ee45 100644 --- a/site/docs/how-to-release.md +++ b/site/docs/how-to-release.md @@ -303,9 +303,10 @@ Thanks to everyone for contributing! Create a PR in the `iceberg` repo to make revapi run on the new release. For an example see [this PR](https://github.com/apache/iceberg/pull/6275). -#### Update github issue template +#### Update GitHub -Create a PR in the `iceberg` repo to add the new version to the github issue template. For an example see [this PR](https://github.com/apache/iceberg/pull/6287). +- Create a PR in the `iceberg` repo to add the new version to the github issue template. For an example see [this PR](https://github.com/apache/iceberg/pull/6287). +- Draft [a new release to update Github](https://github.com/apache/iceberg/releases/new) to show the latest release. A changelog can be generated automatically using Github. ### Documentation Release diff --git a/site/docs/multi-engine-support.md b/site/docs/multi-engine-support.md index 7a4eb2ea8891..cd7fddf3224d 100644 --- a/site/docs/multi-engine-support.md +++ b/site/docs/multi-engine-support.md @@ -63,8 +63,8 @@ Each engine version undergoes the following lifecycle stages: | ---------- | ------------------ | ----------------------- |------------------------| ------------------ | | 2.4 | End of Life | 0.7.0-incubating | 1.2.1 | [iceberg-spark-runtime-2.4](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-2.4/1.2.1/iceberg-spark-runtime-2.4-1.2.1.jar) | | 3.0 | End of Life | 0.9.0 | 1.0.0 | [iceberg-spark-runtime-3.0_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.0_2.12/1.0.0/iceberg-spark-runtime-3.0_2.12-1.0.0.jar) | -| 3.1 | Deprecated | 0.12.0 | {{ icebergVersion }} | [iceberg-spark-runtime-3.1_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.1_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.1_2.12-{{ icebergVersion }}.jar) [1] | -| 3.2 | Maintained | 0.13.0 | {{ icebergVersion }} | [iceberg-spark-runtime-3.2_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.2_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.2_2.12-{{ icebergVersion }}.jar) | +| 3.1 | End of Life | 0.12.0 | 1.3.1 | [iceberg-spark-runtime-3.1_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.1_2.12/1.3.1/iceberg-spark-runtime-3.1_2.12-1.3.1.jar) [1] | +| 3.2 | Deprecated | 0.13.0 | {{ icebergVersion }} | [iceberg-spark-runtime-3.2_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.2_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.2_2.12-{{ icebergVersion }}.jar) | | 3.3 | Maintained | 0.14.0 | {{ icebergVersion }} | [iceberg-spark-runtime-3.3_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.3_2.12-{{ icebergVersion }}.jar) | | 3.4 | Maintained | 1.3.0 | {{ icebergVersion }} | [iceberg-spark-runtime-3.4_2.12](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.4_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.4_2.12-{{ icebergVersion }}.jar) | diff --git a/site/docs/releases.md b/site/docs/releases.md index 264773b46dc7..13ee45edd9ca 100644 --- a/site/docs/releases.md +++ b/site/docs/releases.md @@ -23,14 +23,17 @@ title: "Releases" The latest version of Iceberg is [{{ icebergVersion }}](https://github.com/apache/iceberg/releases/tag/apache-iceberg-{{ icebergVersion }}). * [{{ icebergVersion }} source tar.gz](https://www.apache.org/dyn/closer.cgi/iceberg/apache-iceberg-{{ icebergVersion }}/apache-iceberg-{{ icebergVersion }}.tar.gz) -- [signature](https://downloads.apache.org/iceberg/apache-iceberg-{{ icebergVersion }}/apache-iceberg-{{ icebergVersion }}.tar.gz.asc) -- [sha512](https://downloads.apache.org/iceberg/apache-iceberg-{{ icebergVersion }}/apache-iceberg-{{ icebergVersion }}.tar.gz.sha512) +* [{{ icebergVersion }} Spark 3.5\_2.12 runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.5_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.5_2.12-{{ icebergVersion }}.jar) -- [3.5\_2.13](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.5_2.13/{{ icebergVersion }}/iceberg-spark-runtime-3.5_2.13-{{ icebergVersion }}.jar) * [{{ icebergVersion }} Spark 3.4\_2.12 runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.4_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.4_2.12-{{ icebergVersion }}.jar) -- [3.4\_2.13](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.4_2.13/{{ icebergVersion }}/iceberg-spark-runtime-3.4_2.13-{{ icebergVersion }}.jar) * [{{ icebergVersion }} Spark 3.3\_2.12 runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.3_2.12-{{ icebergVersion }}.jar) -- [3.3\_2.13](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.3_2.13/{{ icebergVersion }}/iceberg-spark-runtime-3.3_2.13-{{ icebergVersion }}.jar) * [{{ icebergVersion }} Spark 3.2\_2.12 runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.2_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.2_2.12-{{ icebergVersion }}.jar) -- [3.2\_2.13](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.2_2.13/{{ icebergVersion }}/iceberg-spark-runtime-3.2_2.13-{{ icebergVersion }}.jar) -* [{{ icebergVersion }} Spark 3.1 runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-spark-runtime-3.1_2.12/{{ icebergVersion }}/iceberg-spark-runtime-3.1_2.12-{{ icebergVersion }}.jar) * [{{ icebergVersion }} Flink 1.17 runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-flink-runtime-1.17/{{ icebergVersion }}/iceberg-flink-runtime-1.17-{{ icebergVersion }}.jar) * [{{ icebergVersion }} Flink 1.16 runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-flink-runtime-1.16/{{ icebergVersion }}/iceberg-flink-runtime-1.16-{{ icebergVersion }}.jar) * [{{ icebergVersion }} Flink 1.15 runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-flink-runtime-1.15/{{ icebergVersion }}/iceberg-flink-runtime-1.15-{{ icebergVersion }}.jar) * [{{ icebergVersion }} Hive runtime Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-hive-runtime/{{ icebergVersion }}/iceberg-hive-runtime-{{ icebergVersion }}.jar) +* [{{ icebergVersion }} aws-bundle Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-aws-bundle/{{ icebergVersion }}/iceberg-aws-bundle-{{ icebergVersion }}.jar) +* [{{ icebergVersion }} gcp-bundle Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-gcp-bundle/{{ icebergVersion }}/iceberg-gcp-bundle-{{ icebergVersion }}.jar) +* [{{ icebergVersion }} azure-bundle Jar](https://search.maven.org/remotecontent?filepath=org/apache/iceberg/iceberg-azure-bundle/{{ icebergVersion }}/iceberg-azure-bundle-{{ icebergVersion }}.jar) To use Iceberg in Spark or Flink, download the runtime JAR for your engine version and add it to the jars folder of your installation. @@ -64,7 +67,140 @@ To add a dependency on Iceberg in Maven, add the following to your `pom.xml`: ``` -## 1.3.1 release +### 1.4.3 Release + +Apache Iceberg 1.4.3 was released on December 27, 2023. The main issue it solves is missing files from a transaction retry with conflicting manifests. It is recommended to upgrade if you use transactions. + +- Core: Scan only live entries in partitions table (#8969) by @Fokko in [#9197](https://github.com/apache/iceberg/pull/9197) +- Core: Fix missing files from transaction retries with conflicting manifest merges by [@nastra](https://github.com/nastra) in [#9337](https://github.com/apache/iceberg/pull/9337) +- JDBC Catalog: Fix namespaceExists check with special characters by [@ismailsimsek](https://github.com/ismailsimsek) in [#9291](https://github.com/apache/iceberg/pull/9291) +- Core: Expired Snapshot files in a transaction should be deleted by [@bartash](https://github.com/bartash) in [#9223](https://github.com/apache/iceberg/pull/9223) +- Core: Fix missing delete files from transaction by [@nastra](https://github.com/nastra) in [#9356](https://github.com/apache/iceberg/pull/9356) + + +## Past releases + +### 1.4.2 Release + +Apache Iceberg 1.4.2 was released on November 2, 2023. +The 1.4.2 patch release addresses fixing a remaining case where split offsets +should be ignored when they are deemed invalid. + +* Core + - Core: Ignore split offsets array when split offset is past file length ([\#8925](https://github.com/apache/iceberg/pull/8925)) + +### 1.4.1 Release + +Apache Iceberg 1.4.1 was released on October 23, 2023. +The 1.4.1 release addresses various issues identified in the 1.4.0 release. + +* Core + - Core: Do not use a lazy split offset list in manifests ([\#8834](https://github.com/apache/iceberg/pull/8834)) + - Core: Ignore split offsets when the last split offset is past the file length ([\#8860](https://github.com/apache/iceberg/pull/8860)) +* AWS + - Avoid static global credentials provider which doesn't play well with lifecycle management ([\#8677](https://github.com/apache/iceberg/pull/8677)) +* Flink + - Reverting the default custom partitioner for bucket column ([\#8848](https://github.com/apache/iceberg/pull/8848)) + +### 1.4.0 release + +Apache Iceberg 1.4.0 was released on October 4, 2023. +The 1.4.0 release adds a variety of new features and bug fixes. + +* API + - Implement bound expression sanitization ([\#8149](https://github.com/apache/iceberg/pull/8149)) + - Remove overflow checks in `DefaultCounter` causing performance issues ([\#8297](https://github.com/apache/iceberg/pull/8297)) + - Support incremental scanning with branch ([\#5984](https://github.com/apache/iceberg/pull/5984)) + - Add a validation API to `DeleteFiles` which validates files exist ([\#8525](https://github.com/apache/iceberg/pull/8525)) +* Core + - Use V2 format by default in new tables ([\#8381](https://github.com/apache/iceberg/pull/8381)) + - Use `zstd` compression for Parquet by default in new tables ([\#8593](https://github.com/apache/iceberg/pull/8593)) + - Add strict metadata cleanup mode and enable it by default ([\#8397](https://github.com/apache/iceberg/pull/8397)) ([\#8599](https://github.com/apache/iceberg/pull/8599)) + - Avoid generating huge manifests during commits ([\#6335](https://github.com/apache/iceberg/pull/6335)) + - Add a writer for unordered position deletes ([\#7692](https://github.com/apache/iceberg/pull/7692)) + - Optimize `DeleteFileIndex` ([\#8157](https://github.com/apache/iceberg/pull/8157)) + - Optimize lookup in `DeleteFileIndex` without useful bounds ([\#8278](https://github.com/apache/iceberg/pull/8278)) + - Optimize split offsets handling ([\#8336](https://github.com/apache/iceberg/pull/8336)) + - Optimize computing user-facing state in data tasks ([\#8346](https://github.com/apache/iceberg/pull/8346)) + - Don't persist useless file and position bounds for deletes ([\#8360](https://github.com/apache/iceberg/pull/8360)) + - Don't persist counts for paths and positions in position delete files ([\#8590](https://github.com/apache/iceberg/pull/8590)) + - Support setting system-level properties via environmental variables ([\#5659](https://github.com/apache/iceberg/pull/5659)) + - Add JSON parser for `ContentFile` and `FileScanTask` ([\#6934](https://github.com/apache/iceberg/pull/6934)) + - Add REST spec and request for commits to multiple tables ([\#7741](https://github.com/apache/iceberg/pull/7741)) + - Add REST API for committing changes against multiple tables ([\#7569](https://github.com/apache/iceberg/pull/7569)) + - Default to exponential retry strategy in REST client ([\#8366](https://github.com/apache/iceberg/pull/8366)) + - Support registering tables with REST session catalog ([\#6512](https://github.com/apache/iceberg/pull/6512)) + - Add last updated timestamp and snapshot ID to partitions metadata table ([\#7581](https://github.com/apache/iceberg/pull/7581)) + - Add total data size to partitions metadata table ([\#7920](https://github.com/apache/iceberg/pull/7920)) + - Extend `ResolvingFileIO` to support bulk operations ([\#7976](https://github.com/apache/iceberg/pull/7976)) + - Key metadata in Avro format ([\#6450](https://github.com/apache/iceberg/pull/6450)) + - Add AES GCM encryption stream ([\#3231](https://github.com/apache/iceberg/pull/3231)) + - Fix a connection leak in streaming delete filters ([\#8132](https://github.com/apache/iceberg/pull/8132)) + - Fix lazy snapshot loading history ([\#8470](https://github.com/apache/iceberg/pull/8470)) + - Fix unicode handling in HTTPClient ([\#8046](https://github.com/apache/iceberg/pull/8046)) + - Fix paths for unpartitioned specs in writers ([\#7685](https://github.com/apache/iceberg/pull/7685)) + - Fix OOM caused by Avro decoder caching ([\#7791](https://github.com/apache/iceberg/pull/7791)) +* Spark + - Added support for Spark 3.5 + - Code for DELETE, UPDATE, and MERGE commands has moved to Spark, and all related extensions have been dropped from Iceberg. + - Support for WHEN NOT MATCHED BY SOURCE clause in MERGE. + - Column pruning in merge-on-read operations. + - Ability to request a bigger advisory partition size for the final write to produce well-sized output files without harming the job parallelism. + - Dropped support for Spark 3.1 + - Deprecated support for Spark 3.2 + - Support vectorized reads for merge-on-read operations in Spark 3.4 and 3.5 ([\#8466](https://github.com/apache/iceberg/pull/8466)) + - Increase default advisory partition size for writes in Spark 3.5 ([\#8660](https://github.com/apache/iceberg/pull/8660)) + - Support distributed planning in Spark 3.4 and 3.5 ([\#8123](https://github.com/apache/iceberg/pull/8123)) + - Support pushing down system functions by V2 filters in Spark 3.4 and 3.5 ([\#7886](https://github.com/apache/iceberg/pull/7886)) + - Support fanout position delta writers in Spark 3.4 and 3.5 ([\#7703](https://github.com/apache/iceberg/pull/7703)) + - Use fanout writers for unsorted tables by default in Spark 3.5 ([\#8621](https://github.com/apache/iceberg/pull/8621)) + - Support multiple shuffle partitions per file in compaction in Spark 3.4 and 3.5 ([\#7897](https://github.com/apache/iceberg/pull/7897)) + - Output net changes across snapshots for carryover rows in CDC ([\#7326](https://github.com/apache/iceberg/pull/7326)) + - Display read metrics on Spark SQL UI ([\#7447](https://github.com/apache/iceberg/pull/7447)) ([\#8445](https://github.com/apache/iceberg/pull/8445)) + - Adjust split size to benefit from cluster parallelism in Spark 3.4 and 3.5 ([\#7714](https://github.com/apache/iceberg/pull/7714)) + - Add `fast_forward` procedure ([\#8081](https://github.com/apache/iceberg/pull/8081)) + - Support filters when rewriting position deletes ([\#7582](https://github.com/apache/iceberg/pull/7582)) + - Support setting current snapshot with ref ([\#8163](https://github.com/apache/iceberg/pull/8163)) + - Make backup table name configurable during migration ([\#8227](https://github.com/apache/iceberg/pull/8227)) + - Add write and SQL options to override compression config ([\#8313](https://github.com/apache/iceberg/pull/8313)) + - Correct partition transform functions to match the spec ([\#8192](https://github.com/apache/iceberg/pull/8192)) + - Enable extra commit properties with metadata delete ([\#7649](https://github.com/apache/iceberg/pull/7649)) +* Flink + - Add possibility of ordering the splits based on the file sequence number ([\#7661](https://github.com/apache/iceberg/pull/7661)) + - Fix serialization in `TableSink` with anonymous object ([\#7866](https://github.com/apache/iceberg/pull/7866)) + - Switch to `FileScanTaskParser` for JSON serialization of `IcebergSourceSplit` ([\#7978](https://github.com/apache/iceberg/pull/7978)) + - Custom partitioner for bucket partitions ([\#7161](https://github.com/apache/iceberg/pull/7161)) + - Implement data statistics coordinator to aggregate data statistics from operator subtasks ([\#7360](https://github.com/apache/iceberg/pull/7360)) + - Support alter table column ([\#7628](https://github.com/apache/iceberg/pull/7628)) +* Parquet + - Add encryption config to read and write builders ([\#2639](https://github.com/apache/iceberg/pull/2639)) + - Skip writing bloom filters for deletes ([\#7617](https://github.com/apache/iceberg/pull/7617)) + - Cache codecs by name and level ([\#8182](https://github.com/apache/iceberg/pull/8182)) + - Fix decimal data reading from `ParquetAvroValueReaders` ([\#8246](https://github.com/apache/iceberg/pull/8246)) + - Handle filters with transforms by assuming data must be scanned ([\#8243](https://github.com/apache/iceberg/pull/8243)) +* ORC + - Handle filters with transforms by assuming the filter matches ([\#8244](https://github.com/apache/iceberg/pull/8244)) +* Vendor Integrations + - GCP: Fix single byte read in `GCSInputStream` ([\#8071](https://github.com/apache/iceberg/pull/8071)) + - GCP: Add properties for OAtuh2 and update library ([\#8073](https://github.com/apache/iceberg/pull/8073)) + - GCP: Add prefix and bulk operations to `GCSFileIO` ([\#8168](https://github.com/apache/iceberg/pull/8168)) + - GCP: Add bundle jar for GCP-related dependencies ([\#8231](https://github.com/apache/iceberg/pull/8231)) + - GCP: Add range reads to `GCSInputStream` ([\#8301](https://github.com/apache/iceberg/pull/8301)) + - AWS: Add bundle jar for AWS-related dependencies ([\#8261](https://github.com/apache/iceberg/pull/8261)) + - AWS: support config storage class for `S3FileIO` ([\#8154](https://github.com/apache/iceberg/pull/8154)) + - AWS: Add `FileIO` tracker/closer to Glue catalog ([\#8315](https://github.com/apache/iceberg/pull/8315)) + - AWS: Update S3 signer spec to allow an optional string body in `S3SignRequest` ([\#8361](https://github.com/apache/iceberg/pull/8361)) + - Azure: Add `FileIO` that supports ADLSv2 storage ([\#8303](https://github.com/apache/iceberg/pull/8303)) + - Azure: Make `ADLSFileIO` implement `DelegateFileIO` ([\#8563](https://github.com/apache/iceberg/pull/8563)) + - Nessie: Provide better commit message on table registration ([\#8385](https://github.com/apache/iceberg/pull/8385)) +* Dependencies + - Bump Nessie to 0.71.0 + - Bump ORC to 1.9.1 + - Bump Arrow to 12.0.1 + - Bump AWS Java SDK to 2.20.131 + + +### 1.3.1 release Apache Iceberg 1.3.1 was released on July 25, 2023. The 1.3.1 release addresses various issues identified in the 1.3.0 release. @@ -80,8 +216,6 @@ The 1.3.1 release addresses various issues identified in the 1.3.0 release. * Flink - FlinkCatalog creation no longer creates the default database ([\#8039](https://github.com/apache/iceberg/pull/8039)) -## Past releases - ### 1.3.0 release Apache Iceberg 1.3.0 was released on May 30th, 2023. diff --git a/site/docs/roadmap.md b/site/docs/roadmap.md index c0b0efe9dd8d..7df1114c76b6 100644 --- a/site/docs/roadmap.md +++ b/site/docs/roadmap.md @@ -20,28 +20,37 @@ title: "Roadmap" # Roadmap Overview -This roadmap outlines projects that the Iceberg community is working on, their priority, and a rough size estimate. -This is based on the latest [community priority discussion](https://lists.apache.org/thread.html/r84e80216c259c81f824c6971504c321cd8c785774c489d52d4fc123f%40%3Cdev.iceberg.apache.org%3E). +This roadmap outlines projects that the Iceberg community is working on. Each high-level item links to a Github project board that tracks the current status. Related design docs will be linked on the planning boards. -# Priority 1 - -* API: [Iceberg 1.0.0](https://github.com/apache/iceberg/projects/3) [medium] -* Python: [Pythonic refactor](https://github.com/apache/iceberg/projects/7) [medium] -* Spec: [Z-ordering / Space-filling curves](https://github.com/apache/iceberg/projects/16) [medium] -* Spec: [Snapshot tagging and branching](https://github.com/apache/iceberg/projects/4) [small] -* Views: [Spec](https://github.com/apache/iceberg/projects/6) [medium] -* Puffin: [Implement statistics information in table snapshot](https://github.com/apache/iceberg/pull/4741) [medium] -* Flink: [FLIP-27 based Iceberg source](https://github.com/apache/iceberg/projects/23) [large] - -# Priority 2 - -* ORC: [Support delete files stored as ORC](https://github.com/apache/iceberg/projects/13) [small] -* Spark: [DSv2 streaming improvements](https://github.com/apache/iceberg/projects/2) [small] -* Flink: [Inline file compaction](https://github.com/apache/iceberg/projects/14) [small] -* Flink: [Support UPSERT](https://github.com/apache/iceberg/projects/15) [small] -* Spec: [Secondary indexes](https://github.com/apache/iceberg/projects/17) [large] -* Spec v3: [Encryption](https://github.com/apache/iceberg/projects/5) [large] -* Spec v3: [Relative paths](https://github.com/apache/iceberg/projects/18) [large] -* Spec v3: [Default field values](https://github.com/apache/iceberg/projects/19) [medium] +# General + +* [Multi-table transaction support](https://github.com/apache/iceberg/projects/30) +* [Views Support](https://github.com/apache/iceberg/projects/29) +* [Change Data Capture (CDC) Support](https://github.com/apache/iceberg/projects/26) +* [Snapshot tagging and branching](https://github.com/apache/iceberg/projects/4) +* [Inline file compaction](https://github.com/apache/iceberg/projects/14) +* [Delete File compaction](https://github.com/apache/iceberg/projects/10) +* [Z-ordering / Space-filling curves](https://github.com/apache/iceberg/projects/16) +* [Support UPSERT](https://github.com/apache/iceberg/projects/15) + +# Clients +_Python, Rust, and Go projects are pointing to their respective repositories which include +their own issues as the implementations are not final._ + +* [Add the Iceberg Python Client](https://github.com/apache/iceberg-python) +* [Add the Iceberg Rust Client](https://github.com/apache/iceberg-rust) +* [Add the Iceberg Go Client](https://github.com/apache/iceberg-go) + +# Spec V2 + +* [Views Spec](https://github.com/apache/iceberg/projects/6) +* [DSv2 streaming improvements](https://github.com/apache/iceberg/projects/2) +* [Secondary indexes](https://github.com/apache/iceberg/projects/17) + +# Spec V3 + +* [Encryption](https://github.com/apache/iceberg/projects/5) +* [Relative paths](https://github.com/apache/iceberg/projects/18) +* [Default field values](https://github.com/apache/iceberg/projects/19) diff --git a/site/docs/spec.md b/site/docs/spec.md index e1c1ed1b49c0..9223bafda324 100644 --- a/site/docs/spec.md +++ b/site/docs/spec.md @@ -1,8 +1,5 @@ --- title: "Spec" -url: spec -toc: true -disableSidebar: true ---