Eventual-Inc · jaychia · Oct 16, 2023 · Oct 13, 2023 · Oct 13, 2023 · Oct 13, 2023
diff --git a/benchmarking/tpch/__main__.py b/benchmarking/tpch/__main__.py
@@ -3,6 +3,7 @@
 import argparse
 import contextlib
 import csv
+import logging
 import math
 import os
 import platform
@@ -13,14 +14,15 @@
 from typing import Any, Callable
 
 import ray
-from loguru import logger
 
 import daft
 from benchmarking.tpch import answers, data_generation
 from daft import DataFrame
 from daft.context import get_context
 from daft.runners.profiler import profiler
 
+logger = logging.getLogger(__name__)
+
 ALL_TABLES = [
     "part",
     "supplier",

diff --git a/benchmarking/tpch/data_generation.py b/benchmarking/tpch/data_generation.py
@@ -1,17 +1,18 @@
 from __future__ import annotations
 
 import argparse
+import logging
 import math
 import os
 import shlex
 import sqlite3
 import subprocess
 from glob import glob
 
-from loguru import logger
-
 import daft
 
+logger = logging.getLogger(__name__)
+
 SCHEMA = {
     "part": [
         "P_PARTKEY",

diff --git a/benchmarking/tpch/pipelined_data_generation.py b/benchmarking/tpch/pipelined_data_generation.py
@@ -15,17 +15,18 @@
 
 import argparse
 import glob
+import logging
 import os
 import pathlib
 import shlex
 import shutil
 import subprocess
 from multiprocessing import Pool
 
-from loguru import logger
-
 from benchmarking.tpch.data_generation import gen_parquet
 
+logger = logging.getLogger(__name__)
+
 STATIC_TABLES = ["nation", "region"]
 
 

diff --git a/daft/__init__.py b/daft/__init__.py
@@ -2,8 +2,6 @@
 
 import os
 
-from daft.logging import setup_logger
-
 ###
 # Set up code coverage for when running code coverage with ray
 ###
@@ -20,12 +18,6 @@
             "Environ: {!r} "
             "Exception: {!r}\n".format({k: v for k, v in os.environ.items() if k.startswith("COV_CORE")}, exc)
         )
-###
-# Setup logging
-###
-
-
-setup_logger()
 
 ###
 # Get build constants from Rust .so

diff --git a/daft/context.py b/daft/context.py
@@ -1,16 +1,17 @@
 from __future__ import annotations
 
 import dataclasses
+import logging
 import os
 import warnings
 from typing import TYPE_CHECKING, ClassVar
 
-from loguru import logger
-
 if TYPE_CHECKING:
     from daft.logical.builder import LogicalPlanBuilder
     from daft.runners.runner import Runner
 
+logger = logging.getLogger(__name__)
+
 
 class _RunnerConfig:
     name = ClassVar[str]
@@ -75,7 +76,6 @@
         if self.runner_config.name == "ray":
             from daft.runners.ray_runner import RayRunner
 
-            logger.info("Using RayRunner")
             assert isinstance(self.runner_config, _RayRunnerConfig)
             _RUNNER = RayRunner(
                 address=self.runner_config.address,
@@ -84,7 +84,19 @@
         elif self.runner_config.name == "py":
             from daft.runners.pyrunner import PyRunner
 
-            logger.info("Using PyRunner")
+            try:
+                import ray
+
+                if ray.is_initialized():
+                    logger.warning(
+                        "WARNING: Daft is NOT using Ray for execution!\n"
+                        "Daft is using the PyRunner but we detected an active Ray connection. "
+                        "If you intended to use the Daft RayRunner, please first run `daft.context.set_runner_ray()` "
+                        "before executing Daft queries."
+                    )
+            except ImportError:
+                pass
+
             assert isinstance(self.runner_config, _PyRunnerConfig)
             _RUNNER = PyRunner(use_thread_pool=self.runner_config.use_thread_pool)
 

diff --git a/daft/dataframe/to_torch.py b/daft/dataframe/to_torch.py
@@ -1,8 +1,9 @@
 from __future__ import annotations
 
+import logging
 from typing import Any, Iterable, Iterator
 
-from loguru import logger
+logger = logging.getLogger(__name__)
 
 try:
     # When available, subclass from the newer torchdata DataPipes instead of torch Datasets.

diff --git a/daft/execution/physical_plan.py b/daft/execution/physical_plan.py
@@ -13,13 +13,12 @@
 
 from __future__ import annotations
 
+import logging
 import math
 import pathlib
 from collections import deque
 from typing import Generator, Iterator, TypeVar, Union
 
-from loguru import logger
-
 from daft.daft import (
     FileFormat,
     FileFormatConfig,
@@ -40,6 +39,8 @@
 from daft.logical.schema import Schema
 from daft.runners.partitioning import PartialPartitionMetadata
 
+logger = logging.getLogger(__name__)
+
 PartitionT = TypeVar("PartitionT")
 T = TypeVar("T")
 
@@ -123,7 +124,7 @@
 
         except StopIteration:
             if len(materializations) > 0:
-                logger.debug("file_read blocked on completion of first source in: {sources}", sources=materializations)
+                logger.debug(f"file_read blocked on completion of first source in: {materializations}")
                 yield None
             else:
                 return
@@ -231,10 +232,8 @@
             if len(left_requests) + len(right_requests) > 0:
                 logger.debug(
                     "join blocked on completion of sources.\n"
-                    "Left sources: {left_requests}\n"
-                    "Right sources: {right_requests}",
-                    left_requests=left_requests,
-                    right_requests=right_requests,
+                    f"Left sources: {left_requests}\n"
+                    f"Right sources: {right_requests}",
                 )
                 yield None
 
@@ -339,7 +338,7 @@
 
         # (Optimization. If we are doing limit(0) and already have a partition executing to use for it, just wait.)
         if remaining_rows == 0 and len(materializations) > 0:
-            logger.debug("global_limit blocked on completion of: {source}", source=materializations[0])
+            logger.debug(f"global_limit blocked on completion of: {materializations[0]}")
             yield None
             continue
 
@@ -364,9 +363,7 @@
 
         except StopIteration:
             if len(materializations) > 0:
-                logger.debug(
-                    "global_limit blocked on completion of first source in: {sources}", sources=materializations
-                )
+                logger.debug(f"global_limit blocked on completion of first source in: {materializations}")
                 yield None
             else:
                 return
@@ -396,9 +393,7 @@
 
         except StopIteration:
             if len(materializations) > 0:
-                logger.debug(
-                    "flatten_plan blocked on completion of first source in: {sources}", sources=materializations
-                )
+                logger.debug(f"flatten_plan blocked on completion of first source in: {materializations}")
                 yield None
             else:
                 return
@@ -427,7 +422,7 @@
         yield step
 
     while any(not _.done() for _ in materializations):
-        logger.debug("split_to blocked on completion of all sources: {sources}", sources=materializations)
+        logger.debug(f"split_to blocked on completion of all sources: {materializations}")
         yield None
 
     splits_per_partition = deque([1 for _ in materializations])
@@ -517,7 +512,7 @@
 
         except StopIteration:
             if len(materializations) > 0:
-                logger.debug("coalesce blocked on completion of a task in: {sources}", sources=materializations)
+                logger.debug(f"coalesce blocked on completion of a task in: {materializations}")
                 yield None
             else:
                 return
@@ -547,7 +542,7 @@
     # All fanouts dispatched. Wait for all of them to materialize
     # (since we need all of them to emit even a single reduce).
     while any(not _.done() for _ in materializations):
-        logger.debug("reduce blocked on completion of all sources in: {sources}", sources=materializations)
+        logger.debug(f"reduce blocked on completion of all sources in: {materializations}")
         yield None
 
     inputs_to_reduce = [deque(_.partitions()) for _ in materializations]
@@ -587,7 +582,7 @@
     sample_materializations: deque[SingleOutputPartitionTask[PartitionT]] = deque()
     for source in source_materializations:
         while not source.done():
-            logger.debug("sort blocked on completion of source: {source}", source=source)
+            logger.debug(f"sort blocked on completion of source: {source}")
             yield None
 
         sample = (
@@ -606,7 +601,7 @@
 
     # Wait for samples to materialize.
     while any(not _.done() for _ in sample_materializations):
-        logger.debug("sort blocked on completion of all samples: {samples}", samples=sample_materializations)
+        logger.debug(f"sort blocked on completion of all samples: {sample_materializations}")
         yield None
 
     # Reduce the samples to get sort boundaries.
@@ -628,7 +623,7 @@
 
     # Wait for boundaries to materialize.
     while not boundaries.done():
-        logger.debug("sort blocked on completion of boundary partition: {boundaries}", boundaries=boundaries)
+        logger.debug(f"sort blocked on completion of boundary partition: {boundaries}")
         yield None
 
     # Create a range fanout plan.
@@ -699,7 +694,7 @@
 
         except StopIteration:
             if len(materializations) > 0:
-                logger.debug("materialize blocked on completion of all sources: {sources}", sources=materializations)
+                logger.debug(f"materialize blocked on completion of all sources: {materializations}")
                 yield None
             else:
                 return

diff --git a/daft/filesystem.py b/daft/filesystem.py
@@ -12,12 +12,12 @@
 else:
     from typing import Literal
 
+import logging
 from typing import Any
 
 import fsspec
 import pyarrow as pa
 from fsspec.registry import get_filesystem_class
-from loguru import logger
 from pyarrow.fs import (
     FileSystem,
     FSSpecHandler,
@@ -28,6 +28,8 @@
 from daft.daft import FileFormat, FileInfos, NativeStorageConfig, StorageConfig
 from daft.table import Table
 
+logger = logging.getLogger(__name__)
+
 _CACHED_FSES: dict[str, FileSystem] = {}
 
 

diff --git a/daft/internal/rule_runner.py b/daft/internal/rule_runner.py
@@ -1,13 +1,14 @@
 from __future__ import annotations
 
+import logging
 from dataclasses import dataclass
 from typing import Generic, TypeVar
 
-from loguru import logger
-
 from daft.internal.rule import Rule
 from daft.internal.treenode import TreeNode
 
+logger = logging.getLogger(__name__)
+
 TreeNodeType = TypeVar("TreeNodeType", bound="TreeNode")
 
 

diff --git a/daft/internal/treenode.py b/daft/internal/treenode.py
@@ -1,14 +1,15 @@
 from __future__ import annotations
 
+import logging
 import os
 import typing
 from typing import TYPE_CHECKING, Generic, List, TypeVar, cast
 
-from loguru import logger
-
 if TYPE_CHECKING:
     from daft.internal.rule import Rule
 
+logger = logging.getLogger(__name__)
+
 TreeNodeType = TypeVar("TreeNodeType", bound="TreeNode")
 
 

diff --git a/daft/logging.py b/daft/logging.py