Merge branch 'master' into jake/remove-unused-js-deps

wandb · Dec 10, 2024 · aeebf91 · aeebf91
2 parents b184d83 + 0966c9f
commit aeebf91
Show file tree

Hide file tree

Showing 92 changed files with 2,180 additions and 1,276 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,7 +16,7 @@ repos:
     hooks:
       - id: mypy
         additional_dependencies:
-          [types-pkg-resources==0.1.3, types-all, wandb>=0.15.5]
+          [types-pkg-resources==0.1.3, types-all, wandb>=0.15.5, wandb<0.19.0]
         # Note: You have to update pyproject.toml[tool.mypy] too!
         args: ["--config-file=pyproject.toml"]
         exclude: (.*pyi$)|(weave_query)|(tests)|(examples)

diff --git a/pyproject.toml b/pyproject.toml
@@ -226,7 +226,7 @@ module = "weave_query.*"
 ignore_errors = true
 
 [tool.bumpversion]
-current_version = "0.51.23-dev0"
+current_version = "0.51.24-dev0"
 parse = """(?x)
     (?P<major>0|[1-9]\\d*)\\.
     (?P<minor>0|[1-9]\\d*)\\.

diff --git a/sdks/node/package-lock.json b/sdks/node/package-lock.json
diff --git a/sdks/node/package.json b/sdks/node/package.json
@@ -1,11 +1,19 @@
 {
   "name": "weave",
-  "version": "0.7.0",
+  "version": "0.7.3",
   "description": "AI development toolkit",
-  "types": "dist/src/index.d.ts",
-  "main": "dist/src/index.js",
+  "types": "dist/index.d.ts",
+  "main": "dist/index.js",
   "type": "commonjs",
+  "exports": {
+    ".": {
+      "types": "./dist/index.d.ts",
+      "default": "./dist/index.js"
+    }
+  },
   "scripts": {
+    "build": "tsc --outDir dist",
+    "prepare": "npm run build",
     "test": "jest --silent",
     "test:coverage": "jest --coverage",
     "test:watch": "jest --watch",
@@ -14,6 +22,9 @@
     "generate-api": "swagger-typescript-api -p ./weave.openapi.json -o ./src/generated -n traceServerApi.ts",
     "dev": "nodemon"
   },
+  "files": [
+    "dist"
+  ],
   "repository": {
     "type": "git",
     "url": "https://github.com/wandb/weave/js"

diff --git a/sdks/node/tsconfig.json b/sdks/node/tsconfig.json
@@ -7,19 +7,28 @@
     "sourceMap": true,
     "strict": true,
     "esModuleInterop": true,
-    "outDir": "./dist",
+    "outDir": "dist",
     "paths": {
       "weave": ["./src/index.ts"]
-    }
+    },
+    "declaration": true,
+    "declarationMap": true,
+    "rootDir": "src",
+    "tsBuildInfoFile": "dist/.tsbuildinfo"
   },
   "include": ["src/**/*"],
-  "exclude": ["src", "examples", "dist", "node_modules"],
-  "references": [
-    {
-      "path": "./src/tsconfig.src.json"
-    },
-    {
-      "path": "./examples/tsconfig.examples.json"
-    }
+  "exclude": [
+    "examples",
+    "dist",
+    "node_modules",
+    "src/integrations/checkOpenai.ts"
   ]
+  // "references": [
+  //   {
+  //     "path": "./src/tsconfig.src.json"
+  //   },
+  //   {
+  //     "path": "./examples/tsconfig.examples.json"
+  //   }
+  // ]
 }
diff --git a/tests/integrations/instructor/instructor_test.py b/tests/integrations/instructor/instructor_test.py
@@ -64,7 +64,7 @@ def test_instructor_openai(
     assert op_name_from_ref(call.op_name) == "openai.chat.completions.create"
     output = call.output
     output_arguments = json.loads(
-        output.choices[0].message.tool_calls[0].function.arguments
+        output["choices"][0]["message"]["tool_calls"][0]["function"]["arguments"]
     )
     assert "person_name" in output_arguments
     assert "age" in output_arguments
@@ -112,7 +112,7 @@ async def extract_person(text: str) -> Person:
     assert op_name_from_ref(call.op_name) == "openai.chat.completions.create"
     output = call.output
     output_arguments = json.loads(
-        output.choices[0].message.tool_calls[0].function.arguments
+        output["choices"][0]["message"]["tool_calls"][0]["function"]["arguments"]
     )
     assert "person_name" in output_arguments
     assert "age" in output_arguments
@@ -166,7 +166,7 @@ def test_instructor_iterable(
     assert call.started_at < call.ended_at
     assert op_name_from_ref(call.op_name) == "openai.chat.completions.create"
     output = call.output
-    output_arguments = json.loads(output.choices[0].message.content)
+    output_arguments = json.loads(output["choices"][0]["message"]["content"])
     assert "tasks" in output_arguments
     assert "person_name" in output_arguments["tasks"][0]
     assert "age" in output_arguments["tasks"][0]

diff --git a/tests/integrations/openai/openai_test.py b/tests/integrations/openai/openai_test.py
@@ -38,10 +38,10 @@ def test_openai_quickstart(client: weave.trace.weave_client.WeaveClient) -> None
     assert call.started_at < call.ended_at  # type: ignore
 
     output = call.output
-    assert output.model == "gpt-4o-2024-05-13"
-    assert output.object == "chat.completion"
+    assert output["model"] == "gpt-4o-2024-05-13"
+    assert output["object"] == "chat.completion"
 
-    usage = call.summary["usage"][output.model]  # type: ignore
+    usage = call.summary["usage"][output["model"]]  # type: ignore
     assert usage["requests"] == 1
     assert usage["completion_tokens"] == 28
     assert usage["prompt_tokens"] == 11
@@ -86,10 +86,10 @@ async def test_openai_async_quickstart(
     assert call.started_at < call.ended_at  # type: ignore
 
     output = call.output
-    assert output.model == "gpt-4o-2024-05-13"
-    assert output.object == "chat.completion"
+    assert output["model"] == "gpt-4o-2024-05-13"
+    assert output["object"] == "chat.completion"
 
-    usage = call.summary["usage"][output.model]  # type: ignore
+    usage = call.summary["usage"][output["model"]]  # type: ignore
     assert usage["requests"] == 1
     assert usage["completion_tokens"] == 28
     assert usage["prompt_tokens"] == 11
@@ -315,10 +315,10 @@ def test_openai_function_call(client: weave.trace.weave_client.WeaveClient) -> N
     assert call.started_at < call.ended_at  # type: ignore
 
     output = call.output
-    assert output.model == "gpt-4o-2024-05-13"
-    assert output.object == "chat.completion"
+    assert output["model"] == "gpt-4o-2024-05-13"
+    assert output["object"] == "chat.completion"
 
-    usage = call.summary["usage"][output.model]  # type: ignore
+    usage = call.summary["usage"][output["model"]]  # type: ignore
     assert usage["total_tokens"] == 117
     assert usage["completion_tokens"] == 18
     assert usage["prompt_tokens"] == 99
@@ -401,10 +401,10 @@ async def test_openai_function_call_async(
     assert call.started_at < call.ended_at  # type: ignore
 
     output = call.output
-    assert output.model == "gpt-4o-2024-05-13"
-    assert output.object == "chat.completion"
+    assert output["model"] == "gpt-4o-2024-05-13"
+    assert output["object"] == "chat.completion"
 
-    usage = call.summary["usage"][output.model]  # type: ignore
+    usage = call.summary["usage"][output["model"]]  # type: ignore
     assert usage["total_tokens"] == 117
     assert usage["completion_tokens"] == 18
     assert usage["prompt_tokens"] == 99
@@ -577,10 +577,10 @@ def test_openai_tool_call(client: weave.trace.weave_client.WeaveClient) -> None:
     assert call.started_at < call.ended_at  # type: ignore
 
     output = call.output
-    assert output.model == "gpt-4o-2024-05-13"
-    assert output.object == "chat.completion"
+    assert output["model"] == "gpt-4o-2024-05-13"
+    assert output["object"] == "chat.completion"
 
-    usage = call.summary["usage"][output.model]  # type: ignore
+    usage = call.summary["usage"][output["model"]]  # type: ignore
     assert usage["total_tokens"] == 117
     assert usage["completion_tokens"] == 27
     assert usage["prompt_tokens"] == 90
@@ -664,10 +664,10 @@ async def test_openai_tool_call_async(
     assert call.started_at < call.ended_at  # type: ignore
 
     output = call.output
-    assert output.model == "gpt-4o-2024-05-13"
-    assert output.object == "chat.completion"
+    assert output["model"] == "gpt-4o-2024-05-13"
+    assert output["object"] == "chat.completion"
 
-    usage = call.summary["usage"][output.model]  # type: ignore
+    usage = call.summary["usage"][output["model"]]  # type: ignore
     assert usage["total_tokens"] == 117
     assert usage["completion_tokens"] == 27
     assert usage["prompt_tokens"] == 90

diff --git a/tests/trace/test_evaluations.py b/tests/trace/test_evaluations.py
@@ -7,7 +7,7 @@
 from PIL import Image
 
 import weave
-from tests.trace.util import AnyIntMatcher
+from tests.trace.util import AnyIntMatcher, AnyStrMatcher
 from weave import Evaluation, Model
 from weave.scorers import Scorer
 from weave.trace.refs import CallRef
@@ -504,8 +504,8 @@ async def test_evaluation_data_topology(client):
             }
         },
         "weave": {
+            "display_name": AnyStrMatcher(),
             "latency_ms": AnyIntMatcher(),
-            "trace_name": "Evaluation.evaluate",
             "status": "success",
         },
     }
@@ -1029,3 +1029,21 @@ def my_second_scorer(text, output, model_output):
 
     with pytest.raises(ValueError, match="Both 'output' and 'model_output'"):
         evaluation = weave.Evaluation(dataset=ds, scorers=[my_second_scorer])
+
+
+@pytest.mark.asyncio
+async def test_evaluation_with_custom_name(client):
+    dataset = weave.Dataset(rows=[{"input": "hi", "output": "hello"}])
+    evaluation = weave.Evaluation(dataset=dataset, evaluation_name="wow-custom!")
+
+    @weave.op()
+    def model(input: str) -> str:
+        return "hmmm"
+
+    await evaluation.evaluate(model)
+
+    calls = list(client.get_calls(filter=tsi.CallsFilter(trace_roots_only=True)))
+    assert len(calls) == 1
+
+    call = calls[0]
+    assert call.display_name == "wow-custom!"
diff --git a/tests/trace/test_exec.py b/tests/trace/test_exec.py
@@ -101,7 +101,7 @@ def test_publish_works_for_code_with_no_source_file(
 
     ref = captured["ref"]
     op = ref.get()
-    actual_captured_code = op.art.path_contents["obj.py"].decode()
+    actual_captured_code = op.get_captured_code()
     expected_captured_code = expected_captured_code[1:]  # ignore first newline
 
     assert actual_captured_code == expected_captured_code
diff --git a/tests/trace/test_trace_server_common.py b/tests/trace/test_trace_server_common.py
@@ -1,4 +1,5 @@
 from weave.trace_server.trace_server_common import (
+    DynamicBatchProcessor,
     LRUCache,
     get_nested_key,
     set_nested_key,
@@ -54,3 +55,26 @@ def test_lru_cache():
     cache["c"] = 10
     assert cache["c"] == 10
     assert cache["d"] == 4
+
+
+def test_dynamic_batch_processor():
+    # Initialize processor with:
+    # - initial batch size of 2
+    # - max size of 8
+    # - growth factor of 2
+    processor = DynamicBatchProcessor(initial_size=2, max_size=8, growth_factor=2)
+
+    test_data = range(15)
+
+    batches = list(processor.make_batches(iter(test_data)))
+
+    # Expected batch sizes: 2, 4, 8, 1
+    assert batches[0] == [0, 1]
+    assert batches[1] == [2, 3, 4, 5]
+    assert batches[2] == [6, 7, 8, 9, 10, 11, 12, 13]
+    assert batches[3] == [14]
+    assert len(batches) == 4
+
+    # Verify all items were processed
+    flattened = [item for batch in batches for item in batch]
+    assert flattened == list(range(15))
diff --git a/tests/trace/test_trace_settings.py b/tests/trace/test_trace_settings.py
@@ -104,7 +104,7 @@ def test_func():
 
     ref = weave.publish(test_func)
     test_func2 = ref.get()
-    code2 = test_func2.art.path_contents["obj.py"].decode()
+    code2 = test_func2.get_captured_code()
     assert "Code-capture was disabled" in code2
 
     parse_and_apply_settings(UserSettings(capture_code=True))
@@ -117,7 +117,7 @@ def test_func():
 
     ref2 = weave.publish(test_func)
     test_func3 = ref2.get()
-    code3 = test_func3.art.path_contents["obj.py"].decode()
+    code3 = test_func3.get_captured_code()
     assert "Code-capture was disabled" not in code3
 
 
@@ -130,7 +130,7 @@ def test_func():
 
     ref = weave.publish(test_func)
     test_func2 = ref.get()
-    code2 = test_func2.art.path_contents["obj.py"].decode()
+    code2 = test_func2.get_captured_code()
     assert "Code-capture was disabled" in code2
 
     os.environ["WEAVE_CAPTURE_CODE"] = "true"
@@ -141,7 +141,7 @@ def test_func():
 
     ref2 = weave.publish(test_func)
     test_func3 = ref2.get()
-    code3 = test_func3.art.path_contents["obj.py"].decode()
+    code3 = test_func3.get_captured_code()
     assert "Code-capture was disabled" not in code3
 
 

diff --git a/...race/type_serializers/Audio/audio_test.py → ...s/trace/type_handlers/Audio/audio_test.py b/...race/type_serializers/Audio/audio_test.py → ...s/trace/type_handlers/Audio/audio_test.py
diff --git a/...race/type_serializers/Image/image_test.py → ...s/trace/type_handlers/Image/image_test.py b/...race/type_serializers/Image/image_test.py → ...s/trace/type_handlers/Image/image_test.py
@@ -1,3 +1,4 @@
+import random
 from pathlib import Path
 
 import pytest
@@ -130,3 +131,41 @@ def accept_image_jpg_pillow(val):
         assert res == "Image size: 100x100"
     finally:
         file_path.unlink()
+
+
+@pytest.fixture
+def dataset_ref(client):
+    # This fixture represents a saved dataset containing images
+    IMAGE_SIZE = (1024, 1024)
+    N_ROWS = 50
+
+    def make_random_image():
+        random_colour = (
+            random.randint(0, 255),
+            random.randint(0, 255),
+            random.randint(0, 255),
+        )
+        return Image.new("RGB", IMAGE_SIZE, random_colour)
+
+    rows = [{"img": make_random_image()} for _ in range(N_ROWS)]
+    dataset = weave.Dataset(rows=rows)
+    ref = weave.publish(dataset)
+
+    return ref
+
+
+@pytest.mark.asyncio
+async def test_images_in_dataset_for_evaluation(client, dataset_ref):
+    dataset = dataset_ref.get()
+    evaluation = weave.Evaluation(dataset=dataset)
+
+    @weave.op
+    def model(img: Image) -> dict[str, str]:
+        return {"result": "hello"}
+
+    # Expect that evaluation works for a ref-get'd dataset containing images
+    res = await evaluation.evaluate(model)
+
+    assert isinstance(res, dict)
+    assert "model_latency" in res and "mean" in res["model_latency"]
+    assert isinstance(res["model_latency"]["mean"], (int, float))
diff --git a/tests/trace/util.py b/tests/trace/util.py
@@ -8,6 +8,13 @@ def client_is_sqlite(client):
     return isinstance(client.server._internal_trace_server, SqliteTraceServer)
 
 
+class AnyStrMatcher:
+    """Matches any string."""
+
+    def __eq__(self, other):
+        return isinstance(other, str)
+
+
 class AnyIntMatcher:
     """Matches any integer."""
 

diff --git a/weave-js/src/assets/icons/icon-filled-circle.svg b/weave-js/src/assets/icons/icon-filled-circle.svg