Skip to content

Commit

Permalink
thread in iostats
Browse files Browse the repository at this point in the history
  • Loading branch information
samster25 committed Oct 16, 2023
2 parents c9d1468 + 9d20890 commit bfcd297
Show file tree
Hide file tree
Showing 54 changed files with 1,106 additions and 283 deletions.
94 changes: 94 additions & 0 deletions .github/workflows/build-artifact-s3.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
name: daft-build-artifact-s3

on:
workflow_dispatch:
inputs:
rust-profile:
description: Profile to compile with
required: true
default: release-lto
type: choice
options:
- release-lto
- release

env:
PACKAGE_NAME: getdaft
PYTHON_VERSION: 3.8

jobs:
build-and-push:
name: platform wheels for ${{ matrix.os }}-${{ matrix.compile_arch }}
runs-on: ${{ matrix.os }}-latest
strategy:
fail-fast: false
matrix:
os: [ubuntu]
compile_arch: [x86_64, aarch64]
# These permissions are needed to interact with GitHub's OIDC Token endpoint.
# This is used in the step "Assume GitHub Actions AWS Credentials"
permissions:
id-token: write
contents: read
steps:
- uses: actions/checkout@v4
with:
submodules: true
fetch-depth: 0
- name: Assume GitHub Actions AWS Credentials
uses: aws-actions/configure-aws-credentials@v3
with:
aws-region: us-west-2
role-to-assume: ${{ secrets.ACTIONS_AWS_ROLE_ARN }}
role-session-name: DaftPythonPackageGitHubWorkflow
- uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
architecture: x64
- run: pip install -U toml
- run: python tools/patch_package_version.py
- name: Build wheels - Linux x86
if: ${{ (matrix.os == 'ubuntu') && (matrix.compile_arch == 'x86_64') }}
uses: messense/maturin-action@v1
with:
target: x86_64
manylinux: auto
args: --profile ${{ inputs.rust-profile }} --out dist
before-script-linux: yum -y install perl-IPC-Cmd
env:
RUSTFLAGS: -C target-feature=+fxsr,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx,+fma
- name: Build wheels - Linux aarch64
if: ${{ (matrix.os == 'ubuntu') && (matrix.compile_arch == 'aarch64') }}
uses: messense/maturin-action@v1
with:
target: aarch64-unknown-linux-gnu
manylinux: auto
# GCC 4.8.5 in manylinux2014 container doesn't support c11 atomic. This caused issues with the `ring` crate that causes TLS to fail
container: messense/manylinux_2_24-cross:aarch64
args: --profile ${{ inputs.rust-profile }} --out dist
before-script-linux: export JEMALLOC_SYS_WITH_LG_PAGE=16
- name: Copy all files as zip for Glue
run: for file in dist/*.whl; do cp $file dist/`basename $file .whl`.zip; done
- name: Upload files to s3
run: for file in dist/*; do aws s3 cp $file s3://github-actions-artifacts-bucket/daft-build-artifact-s3/${{ github.sha }}/ --no-progress; done



list-wheels:
name: List Wheels and Zip files Published to S3
runs-on: ubuntu-latest
needs:
- build-and-push

permissions:
id-token: write
contents: read
steps:
- name: Assume GitHub Actions AWS Credentials
uses: aws-actions/configure-aws-credentials@v3
with:
aws-region: us-west-2
role-to-assume: ${{ secrets.ACTIONS_AWS_ROLE_ARN }}
role-session-name: DaftPythonPackageGitHubWorkflow
- name: List Wheels
run: aws s3 ls s3://github-actions-artifacts-bucket/daft-build-artifact-s3/${{ github.sha }}/
1 change: 0 additions & 1 deletion .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ on:
branches: [main]
pull_request:
branches: [main]

env:
DAFT_ANALYTICS_ENABLED: '0'

Expand Down
6 changes: 6 additions & 0 deletions SECURITY.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Security Policy

## Reporting a Vulnerability

Please do not make a Github issue when reporting security issues but email [email protected].
Thank you!
4 changes: 3 additions & 1 deletion benchmarking/tpch/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import contextlib
import csv
import logging
import math
import os
import platform
Expand All @@ -13,14 +14,15 @@
from typing import Any, Callable

import ray
from loguru import logger

import daft
from benchmarking.tpch import answers, data_generation
from daft import DataFrame
from daft.context import get_context
from daft.runners.profiler import profiler

logger = logging.getLogger(__name__)

ALL_TABLES = [
"part",
"supplier",
Expand Down
5 changes: 3 additions & 2 deletions benchmarking/tpch/data_generation.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
from __future__ import annotations

import argparse
import logging
import math
import os
import shlex
import sqlite3
import subprocess
from glob import glob

from loguru import logger

import daft

logger = logging.getLogger(__name__)

SCHEMA = {
"part": [
"P_PARTKEY",
Expand Down
5 changes: 3 additions & 2 deletions benchmarking/tpch/pipelined_data_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,18 @@

import argparse
import glob
import logging
import os
import pathlib
import shlex
import shutil
import subprocess
from multiprocessing import Pool

from loguru import logger

from benchmarking.tpch.data_generation import gen_parquet

logger = logging.getLogger(__name__)

STATIC_TABLES = ["nation", "region"]


Expand Down
8 changes: 0 additions & 8 deletions daft/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import os

from daft.logging import setup_logger

###
# Set up code coverage for when running code coverage with ray
###
Expand All @@ -20,12 +18,6 @@
"Environ: {!r} "
"Exception: {!r}\n".format({k: v for k, v in os.environ.items() if k.startswith("COV_CORE")}, exc)
)
###
# Setup logging
###


setup_logger()

###
# Get build constants from Rust .so
Expand Down
20 changes: 16 additions & 4 deletions daft/context.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
from __future__ import annotations

import dataclasses
import logging
import os
import warnings
from typing import TYPE_CHECKING, ClassVar

from loguru import logger

if TYPE_CHECKING:
from daft.logical.builder import LogicalPlanBuilder
from daft.runners.runner import Runner

logger = logging.getLogger(__name__)


class _RunnerConfig:
name = ClassVar[str]
Expand Down Expand Up @@ -75,7 +76,6 @@ def runner(self) -> Runner:
if self.runner_config.name == "ray":
from daft.runners.ray_runner import RayRunner

logger.info("Using RayRunner")
assert isinstance(self.runner_config, _RayRunnerConfig)
_RUNNER = RayRunner(
address=self.runner_config.address,
Expand All @@ -84,7 +84,19 @@ def runner(self) -> Runner:
elif self.runner_config.name == "py":
from daft.runners.pyrunner import PyRunner

logger.info("Using PyRunner")
try:
import ray

if ray.is_initialized():
logger.warning(
"WARNING: Daft is NOT using Ray for execution!\n"
"Daft is using the PyRunner but we detected an active Ray connection. "
"If you intended to use the Daft RayRunner, please first run `daft.context.set_runner_ray()` "
"before executing Daft queries."
)
except ImportError:
pass

assert isinstance(self.runner_config, _PyRunnerConfig)
_RUNNER = PyRunner(use_thread_pool=self.runner_config.use_thread_pool)

Expand Down
5 changes: 5 additions & 0 deletions daft/daft.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,11 @@ class ParquetSourceConfig:
Configuration of a Parquet data source.
"""

# Whether or not to use a multithreaded tokio runtime for processing I/O
multithreaded_io: bool

def __init__(self, multithreaded_io: bool): ...

class CsvSourceConfig:
"""
Configuration of a CSV data source.
Expand Down
3 changes: 2 additions & 1 deletion daft/dataframe/to_torch.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from __future__ import annotations

import logging
from typing import Any, Iterable, Iterator

from loguru import logger
logger = logging.getLogger(__name__)

try:
# When available, subclass from the newer torchdata DataPipes instead of torch Datasets.
Expand Down
1 change: 1 addition & 0 deletions daft/execution/execution_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,7 @@ def _handle_tabular_files_scan(
schema=self.schema,
storage_config=self.storage_config,
read_options=read_options,
multithreaded_io=format_config.multithreaded_io,
)
for fp in filepaths
]
Expand Down
Loading

0 comments on commit bfcd297

Please sign in to comment.