diff --git a/tests/trace/test_perf.py b/tests/trace/test_perf.py new file mode 100644 index 00000000000..5dc920504ae --- /dev/null +++ b/tests/trace/test_perf.py @@ -0,0 +1,9 @@ +def test_import_is_fast(): + import time + + start_time = time.time() + import weave # noqa: F401 + + end_time = time.time() - start_time + + assert end_time < 2, f"Import took {end_time} seconds" diff --git a/weave/__init__.py b/weave/__init__.py index 87ae9aee013..c90e875820c 100644 --- a/weave/__init__.py +++ b/weave/__init__.py @@ -9,12 +9,14 @@ from weave.flow.agent import Agent as Agent from weave.flow.agent import AgentState as AgentState from weave.flow.dataset import Dataset -from weave.flow.eval import Evaluation, Scorer +from weave.flow.eval import Evaluation from weave.flow.model import Model from weave.flow.obj import Object from weave.flow.prompt.prompt import EasyPrompt, MessagesPrompt, Prompt, StringPrompt -from weave.trace.util import Thread as Thread -from weave.trace.util import ThreadPoolExecutor as ThreadPoolExecutor +from weave.trace.util import ( + Thread, # noqa: F401 + ThreadPoolExecutor, # noqa: F401 +) # Alias for succinct code P = EasyPrompt @@ -39,5 +41,4 @@ StringPrompt, MessagesPrompt, Evaluation, - Scorer, ] diff --git a/weave/flow/eval.py b/weave/flow/eval.py index bf78dc06d85..897af493d63 100644 --- a/weave/flow/eval.py +++ b/weave/flow/eval.py @@ -6,7 +6,7 @@ import traceback from collections.abc import Coroutine from datetime import datetime -from typing import Any, Callable, Literal, Optional, Union, cast +from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Union, cast from pydantic import PrivateAttr, model_validator from rich import print @@ -18,14 +18,6 @@ from weave.flow.model import Model, get_infer_method from weave.flow.obj import Object from weave.flow.util import make_memorable_name -from weave.scorers import ( - Scorer, - _has_oldstyle_scorers, - _validate_scorer_signature, - auto_summarize, - get_scorer_attributes, - transpose, -) from weave.trace.context.weave_client_context import get_weave_client from weave.trace.env import get_weave_parallelism from weave.trace.errors import OpCallError @@ -34,6 +26,9 @@ from weave.trace.vals import WeaveObject from weave.trace.weave_client import Call, get_ref +if TYPE_CHECKING: + from weave.scorers import Scorer + console = Console() logger = logging.getLogger(__name__) @@ -120,7 +115,7 @@ def function_to_evaluate(question: str): """ dataset: Union[Dataset, list] - scorers: Optional[list[Union[Callable, Op, Scorer]]] = None + scorers: Optional[list[Union[Callable, Op, "Scorer"]]] = None preprocess_model_input: Optional[Callable] = None trials: int = 1 @@ -140,6 +135,8 @@ def _update_display_name(self) -> "Evaluation": return self def model_post_init(self, __context: Any) -> None: + from weave.scorers import _has_oldstyle_scorers, _validate_scorer_signature + scorers: list[Union[Callable, Scorer, Op]] = [] for scorer in self.scorers or []: if isinstance(scorer, Scorer): @@ -178,6 +175,8 @@ def model_post_init(self, __context: Any) -> None: async def predict_and_score( self, model: Union[Callable, Model], example: dict ) -> dict: + from weave.scorers import get_scorer_attributes + if self.preprocess_model_input is None: model_input = example else: @@ -443,6 +442,8 @@ async def predict_and_score( @weave.op() async def summarize(self, eval_table: EvaluationResults) -> dict: + from weave.scorers import auto_summarize, get_scorer_attributes, transpose + eval_table_rows = list(eval_table.rows) cols = transpose(eval_table_rows) summary = {} @@ -529,7 +530,7 @@ async def evaluate(self, model: Union[Callable, Model]) -> dict: def evaluate( dataset: Union[Dataset, list], model: Union[Callable, Model], - scores: Optional[list[Union[Callable, Scorer]]] = None, + scores: Optional[list[Union[Callable, "Scorer"]]] = None, preprocess_model_input: Optional[Callable] = None, ) -> dict: eval = Evaluation(