Skip to content

Commit

Permalink
[FEAT] Add support for windows in daft (#1386)
Browse files Browse the repository at this point in the history
* Implements fixes for daft to be able to build for windows
* implements protocol checking in filesystem.py for windows style paths
* implements protocol checking in native local objectio for windows
style paths.
* fix unit tests to use os libraries to construct path
* rewrite file io unit tests to not use a open file descriptor which
breaks on windows
* disable AVX for x86 builds for mac and windows
* implement unit test, rust tests and import tests CI for windows 
* implements windows wheel building for publish pipeline
* disables query plan toggle for CI
* fix REPR style unit tests for `\r`
  • Loading branch information
samster25 authored Sep 25, 2023
1 parent e6252f9 commit d7d1ae0
Show file tree
Hide file tree
Showing 15 changed files with 285 additions and 187 deletions.
91 changes: 67 additions & 24 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,21 +14,25 @@ env:

jobs:
unit-tests-with-coverage:
runs-on: ubuntu-latest
timeout-minutes: 15
runs-on: ${{ matrix.os }}-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
python-version: ['3.7', '3.10']
daft-runner: [py, ray]
new-query-planner: [1, 0]
pyarrow-version: [6.0.1, 12.0]
os: [ubuntu, windows]
exclude:
- daft-runner: ray
pyarrow-version: 6.0.1
os: ubuntu
- daft-runner: py
python-version: '3.10'
pyarrow-version: 6.0.1
os: ubuntu
- os: windows
pyarrow-version: 12.0

steps:
- uses: actions/checkout@v4
Expand All @@ -53,15 +57,24 @@ jobs:
echo "$GITHUB_WORKSPACE/venv/bin" >> $GITHUB_PATH
- name: Install dependencies
if: ${{ (matrix.os != 'windows') }}
run: |
pip install --upgrade pip
pip install -r requirements-dev.txt
- name: Install dependencies (Windows)
if: ${{ (matrix.os == 'windows') }}
run: |
.\venv\Scripts\activate
python -m pip install --upgrade pip
python -m pip install -r requirements-dev.txt
- name: Override pyarrow
if: ${{ matrix.pyarrow-version }}
if: ${{ (matrix.pyarrow-version) && (matrix.os != 'windows') }}
run: pip install pyarrow==${{ matrix.pyarrow-version }}

- name: Build library and Test with pytest
- name: Build library and Test with pytest (unix)
if: ${{ (matrix.os != 'windows') }}
run: |
source activate
# source <(cargo llvm-cov show-env --export-prefix)
Expand All @@ -71,11 +84,25 @@ jobs:
maturin develop
mkdir -p report-output && pytest --cov=daft --ignore tests/integration --durations=50
coverage combine -a --data-file='.coverage' || true
coverage xml -o ./report-output/coverage-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.daft-runner }}-${{ matrix.pyarrow-version }}-${{ matrix.new-query-planner }}.xml
# cargo llvm-cov --no-run --lcov --output-path report-output/rust-coverage-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.daft-runner }}-${{ matrix.pyarrow-version }}-${{ matrix.new-query-planner }}.lcov
coverage xml -o ./report-output/coverage-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.daft-runner }}-${{ matrix.pyarrow-version }}.xml
# cargo llvm-cov --no-run --lcov --output-path report-output/rust-coverage-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.daft-runner }}-${{ matrix.pyarrow-version }}.lcov
env:
DAFT_RUNNER: ${{ matrix.daft-runner }}
- name: Build library and Test with pytest (windows)
if: ${{ (matrix.os == 'windows') }}
run: |
.\venv\Scripts\activate
# source <(cargo llvm-cov show-env --export-prefix)
# export CARGO_TARGET_DIR=$CARGO_LLVM_COV_TARGET_DIR
# export CARGO_INCREMENTAL=1
# cargo llvm-cov clean --workspace
maturin develop
mkdir -p report-output && pytest --cov=daft --ignore tests\integration --durations=50
coverage combine -a --data-file='.coverage' || true
coverage xml -o .\report-output\coverage-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.daft-runner }}-${{ matrix.pyarrow-version }}.xml
# cargo llvm-cov --no-run --lcov --output-path report-output\rust-coverage-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.daft-runner }}-${{ matrix.pyarrow-version }}.lcov
env:
DAFT_RUNNER: ${{ matrix.daft-runner }}
DAFT_NEW_QUERY_PLANNER: ${{ matrix.new-query-planner }}

- name: Upload coverage report
uses: actions/upload-artifact@v3
Expand Down Expand Up @@ -151,7 +178,6 @@ jobs:
matrix:
python-version: ['3.7']
daft-runner: [py, ray]
new-query-planner: [1, 0]
steps:
- uses: actions/checkout@v4
with:
Expand Down Expand Up @@ -186,7 +212,6 @@ jobs:
pytest tests/integration/test_tpch.py --durations=50
env:
DAFT_RUNNER: ${{ matrix.daft-runner }}
DAFT_NEW_QUERY_PLANNER: ${{ matrix.new-query-planner }}
- name: Send Slack notification on failure
uses: slackapi/[email protected]
if: ${{ failure() && (github.ref == 'refs/heads/main') }}
Expand Down Expand Up @@ -219,7 +244,6 @@ jobs:
matrix:
python-version: ['3.8'] # can't use 3.7 due to requiring anon mode for adlfs
daft-runner: [py, ray]
new-query-planner: [1, 0]
# These permissions are needed to interact with GitHub's OIDC Token endpoint.
# This is used in the step "Assume GitHub Actions AWS Credentials"
permissions:
Expand Down Expand Up @@ -276,7 +300,6 @@ jobs:
pytest tests/integration/io -m 'integration' --durations=50
env:
DAFT_RUNNER: ${{ matrix.daft-runner }}
DAFT_NEW_QUERY_PLANNER: ${{ matrix.new-query-planner }}
- name: Send Slack notification on failure
uses: slackapi/[email protected]
if: ${{ failure() && (github.ref == 'refs/heads/main') }}
Expand All @@ -298,10 +321,12 @@ jobs:
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK

rust-tests:
runs-on: ubuntu-latest
timeout-minutes: 15
runs-on: ${{ matrix.os }}-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
os: [ubuntu, windows]
steps:
- uses: actions/checkout@v4
- uses: moonrepo/setup-rust@v0
Expand Down Expand Up @@ -379,13 +404,13 @@ jobs:
SLACK_WEBHOOK_TYPE: INCOMING_WEBHOOK

test-imports:
runs-on: ubuntu-latest
timeout-minutes: 15
runs-on: ${{ matrix.os }}-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix:
os: [ubuntu, windows]
python-version: ['3.7']

steps:
- uses: actions/checkout@v4
- uses: moonrepo/setup-rust@v0
Expand All @@ -401,25 +426,43 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- name: Setup Virtual Env
- name: Unix Build
if: ${{ (matrix.os != 'windows') }}
run: |
python -m venv venv
source venv/bin/activate
pip install maturin
python -m pip install maturin
maturin build --out dist
- name: Build Rust Library
- name: Windows Build
if: ${{ (matrix.os == 'windows') }}
run: |
venv/bin/maturin build --out dist
python -m venv venv
.\venv\Scripts\activate
python -m pip install maturin
maturin build --out dist
- name: Test Imports in Clean Env
- name: Test Imports in Clean Env (Unix)
if: ${{ (matrix.os != 'windows') }}
run: |
rm -rf daft
rm -rf venv
python -m venv venv
source venv/bin/activate
ls -R ./dist
venv/bin/pip install dist/*.whl
venv/bin/python -c 'import daft; from daft import *'
pip install dist/*.whl
python -c 'import daft; from daft import *'
- name: Test Imports in Clean Env (Windows)
if: ${{ (matrix.os == 'windows') }}
run: |
rd -r daft
rd -r venv
python -m venv venv
.\venv\Scripts\activate
$FILES = Get-ChildItem -Path .\dist\*.whl -Force -Recurse
python -m pip install $FILES[0].FullName
python -c 'import daft; from daft import *'
- name: Send Slack notification on failure
uses: slackapi/[email protected]
Expand Down
31 changes: 20 additions & 11 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@ on:
tags:
- v*
workflow_dispatch:

env:
PACKAGE_NAME: getdaft
PYTHON_VERSION: 3.7.16 # to build abi3 wheels
PYTHON_VERSION: 3.8
DAFT_ANALYTICS_ENABLED: '0'

IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') && ( ! endsWith(github.ref, 'dev0')) }}
Expand All @@ -32,8 +31,11 @@ jobs:
strategy:
fail-fast: false
matrix:
runs: [ubuntu-latest, macos-latest-xl]
runs: [ubuntu-latest, macos-latest-xl, windows-latest-l]
compile_arch: [x86_64, aarch64]
exclude:
- runs: windows-latest-l
compile_arch: aarch64
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -45,16 +47,14 @@ jobs:
architecture: x64
- run: pip install -U twine toml
- run: python tools/patch_package_version.py
- uses: moonrepo/setup-rust@v0
- name: Build wheels - Mac x86
if: ${{ (matrix.runs == 'macos-latest-xl') && (matrix.compile_arch == 'x86_64') }}
- name: Build wheels - Mac and Windows x86
if: ${{ ((matrix.runs == 'macos-latest-xl') || (matrix.runs == 'windows-latest-l')) && (matrix.compile_arch == 'x86_64') }}
uses: messense/maturin-action@v1
with:
target: x86_64
manylinux: auto
args: --profile release-lto --out dist --sdist
env:
RUSTFLAGS: -C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma
RUSTFLAGS: -C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2
- name: Build wheels - Linux x86
if: ${{ (matrix.runs == 'ubuntu-latest') && (matrix.compile_arch == 'x86_64') }}
uses: messense/maturin-action@v1
Expand Down Expand Up @@ -86,13 +86,21 @@ jobs:
env:
RUSTFLAGS: -Ctarget-cpu=apple-m1

- name: Install and test built wheel - x86_64
if: ${{ matrix.compile_arch == 'x86_64' }}
- name: Install and test built wheel - Linux and Mac x86_64
if: ${{ ((matrix.runs == 'macos-latest-xl') || (matrix.runs == 'ubuntu-latest')) && (matrix.compile_arch == 'x86_64') }}
run: |
pip install -r requirements-dev.txt dist/${{ env.PACKAGE_NAME }}-*x86_64*.whl --force-reinstall
rm -rf daft
pytest -v
- name: Install and test built wheel - Windows x86_64
if: ${{ (matrix.runs == 'windows-latest-l') && (matrix.compile_arch == 'x86_64') }}
run: |
$FILES = Get-ChildItem -Path .\dist\${{ env.PACKAGE_NAME }}-*-win_amd64.whl -Force -Recurse
pip install -r requirements-dev.txt $FILES[0].FullName --force-reinstall
rd -r daft
pytest -v
- name: Upload wheels
uses: actions/upload-artifact@v3
with:
Expand All @@ -101,7 +109,7 @@ jobs:

- name: Send Slack notification on failure
uses: slackapi/[email protected]
if: failure()
if: ${{ failure() && (github.ref == 'refs/heads/main') }}
with:
payload: |
{
Expand All @@ -122,6 +130,7 @@ jobs:

publish:
name: Publish wheels to PYPI and Anaconda
if: ${{ (github.ref == 'refs/heads/main') }}
runs-on: ubuntu-latest
needs:
- build-and-test
Expand Down
3 changes: 3 additions & 0 deletions daft/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,11 @@ def get_filesystem(protocol: str, **kwargs) -> fsspec.AbstractFileSystem:

def get_protocol_from_path(path: str) -> str:
parsed_scheme = urllib.parse.urlparse(path, allow_fragments=False).scheme
parsed_scheme = parsed_scheme.lower()
if parsed_scheme == "" or parsed_scheme is None:
return "file"
if sys.platform == "win32" and len(parsed_scheme) == 1 and ("a" <= parsed_scheme) and (parsed_scheme <= "z"):
return "file"
return parsed_scheme


Expand Down
4 changes: 2 additions & 2 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ Pillow==9.5.0
opencv-python==4.8.0.76

# Pyarrow
pyarrow==12

pyarrow==12; platform_system != "Windows"
pyarrow==6.0.1; platform_system === "Windows"
# Ray
ray[data, default]==2.6.3
pydantic<2 # pin pydantic because Ray uses broken APIs
Expand Down
4 changes: 4 additions & 0 deletions src/daft-io/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,10 @@ fn parse_url(input: &str) -> Result<(SourceType, Cow<'_, str>)> {
"s3" => Ok((SourceType::S3, fixed_input)),
"az" | "abfs" => Ok((SourceType::AzureBlob, fixed_input)),
"gcs" | "gs" => Ok((SourceType::GCS, fixed_input)),
#[cfg(target_env = "msvc")]
_ if scheme.len() == 1 && ("a" <= scheme.as_str() && (scheme.as_str() <= "z")) => {
Ok((SourceType::File, Cow::Owned(format!("file://{input}"))))
}
_ => Err(Error::NotImplementedSource { store: scheme }),
}
}
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use tikv_jemallocator::Jemalloc;
#[global_allocator]
static GLOBAL: Jemalloc = Jemalloc;

#[cfg(not(target_env = "msvc"))]
union U {
x: &'static u8,
y: &'static libc::c_char,
Expand Down
Loading

0 comments on commit d7d1ae0

Please sign in to comment.