Merge branch 'master' into josiah/playground-full

wandb · Nov 22, 2024 · 7178a0f · 7178a0f
2 parents 947c153 + dc4d654
commit 7178a0f
Show file tree

Hide file tree

Showing 104 changed files with 5,143 additions and 728 deletions.
diff --git a/docs/docs/guides/core-types/imgs/prompt-comparison.png b/docs/docs/guides/core-types/imgs/prompt-comparison.png
diff --git a/docs/docs/guides/core-types/imgs/prompt-object.png b/docs/docs/guides/core-types/imgs/prompt-object.png
diff --git a/docs/docs/guides/core-types/prompts.md b/docs/docs/guides/core-types/prompts.md
diff --git a/docs/docs/guides/tracking/tracing.mdx b/docs/docs/guides/tracking/tracing.mdx
@@ -194,7 +194,6 @@ However, often LLM applications have additional logic (such as pre/post processi
 
     ```python showLineNumbers
     # Notice that we pass the `instance` as the first argument.
-    # highlight-next-line
     print(instance.my_method.call(instance, "World"))
     ```
 

diff --git a/docs/sidebars.ts b/docs/sidebars.ts
@@ -64,7 +64,7 @@ const sidebars: SidebarsConfig = {
             "guides/evaluation/scorers",
           ],
         },
-        // "guides/core-types/prompts",
+        "guides/core-types/prompts",
         "guides/core-types/models",
         "guides/core-types/datasets",
         "guides/tracking/feedback",

diff --git a/pyproject.toml b/pyproject.toml
@@ -34,7 +34,7 @@ classifiers = [
 requires-python = ">=3.9"
 dynamic = ["version"]
 dependencies = [
-  "pydantic>=2.0.0",
+  "pydantic>=2.0.0,<2.10.0", # Pinning to resolve issues caused in 2.10.0 release
   "wandb>=0.17.1",
   "packaging>=21.0",         # For version parsing in integrations
   "tenacity>=8.3.0,!=8.4.0", # Excluding 8.4.0 because it had a bug on import of AsyncRetrying
@@ -43,6 +43,7 @@ dependencies = [
   "numpy>1.21.0",            # Used in box.py and scorer.py (should be made optional)
   "rich",                    # Used for special formatting of tables (should be made optional)
   "gql[aiohttp,requests]",   # Used exclusively in wandb_api.py
+  "jsonschema>=4.23.0",      # Used by scorers for field validation
 ]
 
 [project.optional-dependencies]
@@ -90,7 +91,6 @@ test = [
   "clickhouse_connect==0.7.0",
   "fastapi>=0.110.0",
   "sqlparse==0.5.0",
-  "jsonschema>=4.23.0",
 
   # Integration Tests
   "pytest-recording>=0.13.2",
@@ -222,7 +222,7 @@ module = "weave_query.*"
 ignore_errors = true
 
 [tool.bumpversion]
-current_version = "0.51.20-dev0"
+current_version = "0.51.23-dev0"
 parse = """(?x)
     (?P<major>0|[1-9]\\d*)\\.
     (?P<minor>0|[1-9]\\d*)\\.

diff --git a/tests/integrations/integration_utilities_test.py b/tests/integrations/integration_utilities_test.py
@@ -5,19 +5,19 @@
     _truncated_str,
     truncate_op_name,
 )
+from weave.trace_server.constants import MAX_OP_NAME_LENGTH
 
-MAX_RUN_NAME_LENGTH = 128
 NON_HASH_LIMIT = 5
 
 
 def test_truncate_op_name_less_than_limit() -> None:
-    name = _make_string_of_length(MAX_RUN_NAME_LENGTH - 1)
+    name = _make_string_of_length(MAX_OP_NAME_LENGTH - 1)
     trunc = truncate_op_name(name)
     assert trunc == name
 
 
 def test_truncate_op_name_at_limit() -> None:
-    name = _make_string_of_length(MAX_RUN_NAME_LENGTH)
+    name = _make_string_of_length(MAX_OP_NAME_LENGTH)
     trunc = truncate_op_name(name)
     assert trunc == name
 
@@ -29,15 +29,13 @@ def test_truncate_op_name_too_short_for_hash() -> None:
         if tail_len <= chars_to_remove:
             with pytest.raises(ValueError):
                 name, trunc = _truncated_str(
-                    tail_len, MAX_RUN_NAME_LENGTH + chars_to_remove
+                    tail_len, MAX_OP_NAME_LENGTH + chars_to_remove
                 )
         else:
-            name, trunc = _truncated_str(
-                tail_len, MAX_RUN_NAME_LENGTH + chars_to_remove
-            )
-            assert trunc == name[:MAX_RUN_NAME_LENGTH]
+            name, trunc = _truncated_str(tail_len, MAX_OP_NAME_LENGTH + chars_to_remove)
+            assert trunc == name[:MAX_OP_NAME_LENGTH]
 
-    name, trunc = _truncated_str(NON_HASH_LIMIT + 1, MAX_RUN_NAME_LENGTH + 1)
+    name, trunc = _truncated_str(NON_HASH_LIMIT + 1, MAX_OP_NAME_LENGTH + 1)
     assert (
         trunc
         == "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.a_0_a"
@@ -49,24 +47,22 @@ def test_truncate_op_name_too_short_for_hash() -> None:
         if tail_len <= chars_to_remove:
             with pytest.raises(ValueError):
                 name, trunc = _truncated_str(
-                    tail_len, MAX_RUN_NAME_LENGTH + chars_to_remove
+                    tail_len, MAX_OP_NAME_LENGTH + chars_to_remove
                 )
         else:
-            name, trunc = _truncated_str(
-                tail_len, MAX_RUN_NAME_LENGTH + chars_to_remove
-            )
-            assert trunc == name[:MAX_RUN_NAME_LENGTH]
+            name, trunc = _truncated_str(tail_len, MAX_OP_NAME_LENGTH + chars_to_remove)
+            assert trunc == name[:MAX_OP_NAME_LENGTH]
 
 
 def test_truncate_op_name_with_digest() -> None:
-    name = _make_string_of_length(MAX_RUN_NAME_LENGTH + 1)
+    name = _make_string_of_length(MAX_OP_NAME_LENGTH + 1)
     trunc = truncate_op_name(name)
     assert (
         trunc
         == "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_b325_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
     )
 
-    name = _make_string_of_length(MAX_RUN_NAME_LENGTH + 10)
+    name = _make_string_of_length(MAX_OP_NAME_LENGTH + 10)
     trunc = truncate_op_name(name)
     assert (
         trunc

diff --git a/tests/trace/test_annotation_feedback.py b/tests/trace/test_annotation_feedback.py
@@ -10,8 +10,8 @@ def test_human_feedback_basic(client):
         description="A numerical field with a range of -1 to 1",
         json_schema={
             "type": "number",
-            "min": -1,
-            "max": 1,
+            "minimum": -1,
+            "maximum": 1,
         },
         unique_among_creators=True,
         op_scope=None,
@@ -49,8 +49,8 @@ def test_human_feedback_basic(client):
     assert objects.objs[1].val["op_scope"] == ["weave:///entity/project/op/name:digest"]
     assert objects.objs[0].val["json_schema"] == {
         "type": "number",
-        "min": -1,
-        "max": 1,
+        "minimum": -1,
+        "maximum": 1,
     }
     assert objects.objs[1].val["json_schema"] == {
         "type": "string",

diff --git a/tests/trace/test_evaluations.py b/tests/trace/test_evaluations.py
@@ -1007,3 +1007,25 @@ def score(self, text, output) -> bool:
     assert feedback["feedback_type"] == "wandb.runnable.MyScorer"
     assert feedback["payload"] == {"output": True}
     assert feedback["runnable_ref"] == get_ref(scorer).uri()
+
+
+def test_scorers_with_output_and_model_output_raise_error():
+    class MyScorer(Scorer):
+        @weave.op
+        def score(self, text, output, model_output):
+            return text == output == model_output
+
+    @weave.op
+    def my_second_scorer(text, output, model_output):
+        return text == output == model_output
+
+    ds = [{"text": "hello"}]
+
+    with pytest.raises(ValueError, match="Both 'output' and 'model_output'"):
+        scorer = MyScorer()
+
+    with pytest.raises(ValueError, match="Both 'output' and 'model_output'"):
+        evaluation = weave.Evaluation(dataset=ds, scorers=[MyScorer()])
+
+    with pytest.raises(ValueError, match="Both 'output' and 'model_output'"):
+        evaluation = weave.Evaluation(dataset=ds, scorers=[my_second_scorer])
diff --git a/tests/trace/test_prompt.py b/tests/trace/test_prompt.py
@@ -2,22 +2,21 @@
 
 
 def test_stringprompt_format():
-    class MyPrompt(StringPrompt):
-        def format(self, **kwargs) -> str:
-            return "Imagine a lot of complicated logic build this string."
-
-    prompt = MyPrompt()
-    assert prompt.format() == "Imagine a lot of complicated logic build this string."
+    prompt = StringPrompt("You are a pirate. Tell us your thoughts on {topic}.")
+    assert (
+        prompt.format(topic="airplanes")
+        == "You are a pirate. Tell us your thoughts on airplanes."
+    )
 
 
 def test_messagesprompt_format():
-    class MyPrompt(MessagesPrompt):
-        def format(self, **kwargs) -> list:
-            return [
-                {"role": "user", "content": "What's 23 * 42"},
-            ]
-
-    prompt = MyPrompt()
-    assert prompt.format() == [
-        {"role": "user", "content": "What's 23 * 42"},
+    prompt = MessagesPrompt(
+        [
+            {"role": "system", "content": "You are a pirate."},
+            {"role": "user", "content": "Tell us your thoughts on {topic}."},
+        ]
+    )
+    assert prompt.format(topic="airplanes") == [
+        {"role": "system", "content": "You are a pirate."},
+        {"role": "user", "content": "Tell us your thoughts on airplanes."},
     ]
diff --git a/tests/trace/test_weave_client.py b/tests/trace/test_weave_client.py
@@ -28,6 +28,7 @@
 )
 from weave.trace.serializer import get_serializer_for_obj, register_serializer
 from weave.trace_server.clickhouse_trace_server_batched import NotFoundError
+from weave.trace_server.constants import MAX_DISPLAY_NAME_LENGTH
 from weave.trace_server.sqlite_trace_server import (
     NotFoundError as sqliteNotFoundError,
 )
@@ -1520,3 +1521,26 @@ def my_op(a: int) -> int:
     assert len(calls) == 1
     call = calls[0]
     assert call.display_name == "custom_display_name"
+
+
+def test_long_display_names_are_elided(client):
+    @weave.op(call_display_name="a" * 2048)
+    def func():
+        pass
+
+    # The display name is correct client side
+    _, call = func.call()
+    assert len(call.display_name) <= MAX_DISPLAY_NAME_LENGTH
+
+    # The display name is correct server side
+    calls = list(func.calls())
+    call = calls[0]
+    assert len(call.display_name) <= MAX_DISPLAY_NAME_LENGTH
+
+    # Calling set_display_name is correct
+    call.set_display_name("b" * 2048)
+    assert len(call.display_name) <= MAX_DISPLAY_NAME_LENGTH
+
+    calls = list(func.calls())
+    call = calls[0]
+    assert len(call.display_name) <= MAX_DISPLAY_NAME_LENGTH
diff --git a/wb_schema.gql b/wb_schema.gql
@@ -169,6 +169,7 @@ type User implements Node {
   photoUrl: String
   deletedAt: DateTime
   teams(before: String, after: String, first: Int, last: Int): EntityConnection
+  admin: Boolean
 }
 
 type UserConnection {

diff --git a/weave-js/package.json b/weave-js/package.json
@@ -48,6 +48,7 @@
     "@radix-ui/react-slider": "^1.0.4",
     "@radix-ui/react-switch": "^1.0.3",
     "@radix-ui/react-tabs": "^1.0.4",
+    "@radix-ui/react-toggle-group": "^1.1.0",
     "@radix-ui/react-tooltip": "^1.0.7",
     "@segment/analytics.js-core": "^4.1.11",
     "@segment/analytics.js-integration-segmentio": "^4.4.7",

diff --git a/weave-js/src/common/hooks/useViewerInfo.ts b/weave-js/src/common/hooks/useViewerInfo.ts
@@ -11,6 +11,7 @@ const VIEWER_QUERY = gql`
     viewer {
       id
       username
+      admin
       teams {
         edges {
           node {
@@ -28,6 +29,7 @@ type UserInfo = {
   id: string;
   username: string;
   teams: string[];
+  admin: boolean;
 };
 type UserInfoResponseLoading = {
   loading: true;
@@ -71,6 +73,7 @@ export const useViewerInfo = (): UserInfoResponse => {
           id,
           username,
           teams,
+          admin: userInfo.admin,
         },
       });
     });

diff --git a/weave-js/src/components/FancyPage/useProjectSidebar.ts b/weave-js/src/components/FancyPage/useProjectSidebar.ts
@@ -170,13 +170,13 @@ export const useProjectSidebar = (
             key: 'dividerWithinWeave-2',
             isShown: isWeaveOnly,
           },
-          // {
-          //   type: 'button' as const,
-          //   name: 'Prompts',
-          //   slug: 'weave/prompts',
-          //   isShown: showWeaveSidebarItems || isShowAll,
-          //   iconName: IconNames.ForumChatBubble,
-          // },
+          {
+            type: 'button' as const,
+            name: 'Prompts',
+            slug: 'weave/prompts',
+            isShown: showWeaveSidebarItems || isShowAll,
+            iconName: IconNames.ForumChatBubble,
+          },
           {
             type: 'button' as const,
             name: 'Models',

diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse2/CellValue.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse2/CellValue.tsx
@@ -19,7 +19,6 @@ import {SmallRef} from './SmallRef';
 
 type CellValueProps = {
   value: any;
-  isExpanded?: boolean;
 };
 
 const Collapsed = styled.div<{hasScrolling: boolean}>`
@@ -32,15 +31,15 @@ const Collapsed = styled.div<{hasScrolling: boolean}>`
 `;
 Collapsed.displayName = 'S.Collapsed';
 
-export const CellValue = ({value, isExpanded = false}: CellValueProps) => {
+export const CellValue = ({value}: CellValueProps) => {
   if (value === undefined) {
     return null;
   }
   if (value === null) {
     return <ValueViewPrimitive>null</ValueViewPrimitive>;
   }
   if (isWeaveRef(value)) {
-    return <SmallRef objRef={parseRef(value)} iconOnly={isExpanded} />;
+    return <SmallRef objRef={parseRef(value)} />;
   }
   if (typeof value === 'boolean') {
     return (

diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse2/SmallRef.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse2/SmallRef.tsx
@@ -4,7 +4,6 @@ import {
   isWandbArtifactRef,
   isWeaveObjectRef,
   ObjectRef,
-  parseRef,
   refUri,
 } from '@wandb/weave/react';
 import React, {FC} from 'react';
@@ -201,11 +200,3 @@ export const SmallRef: FC<{
     </Link>
   );
 };
-
-export const parseRefMaybe = (s: string): ObjectRef | null => {
-  try {
-    return parseRef(s);
-  } catch (e) {
-    return null;
-  }
-};
diff --git a/weave-js/src/components/PagePanelComponents/Home/Browse3.tsx b/weave-js/src/components/PagePanelComponents/Home/Browse3.tsx
@@ -47,6 +47,7 @@ import {Button} from '../../Button';
 import {ErrorBoundary} from '../../ErrorBoundary';
 import {Browse2EntityPage} from './Browse2/Browse2EntityPage';
 import {Browse2HomePage} from './Browse2/Browse2HomePage';
+import {ComparePage} from './Browse3/compare/ComparePage';
 import {
   baseContext,
   browse2Context,
@@ -73,7 +74,6 @@ import {CallPage} from './Browse3/pages/CallPage/CallPage';
 import {CallsPage} from './Browse3/pages/CallsPage/CallsPage';
 import {
   ALWAYS_PIN_LEFT_CALLS,
-  DEFAULT_COLUMN_VISIBILITY_CALLS,
   DEFAULT_FILTER_CALLS,
   DEFAULT_PIN_CALLS,
   DEFAULT_SORT_CALLS,
@@ -537,6 +537,9 @@ const Browse3ProjectRoot: FC<{
           ]}>
           <PlaygroundPageBinding />
         </Route>
+        <Route path={`${projectRoot}/compare`}>
+          <ComparePageBinding />
+        </Route>
       </Switch>
     </Box>
   );
@@ -736,7 +739,7 @@ const CallsPageBinding = () => {
     try {
       return JSON.parse(query.cols);
     } catch (e) {
-      return DEFAULT_COLUMN_VISIBILITY_CALLS;
+      return {};
     }
   }, [query.cols]);
   const setColumnVisibilityModel = (newModel: GridColumnVisibilityModel) => {
@@ -1040,6 +1043,12 @@ const TablesPageBinding = () => {
   return <TablesPage entity={params.entity} project={params.project} />;
 };
 
+const ComparePageBinding = () => {
+  const params = useParamsDecoded<Browse3TabItemParams>();
+
+  return <ComparePage entity={params.entity} project={params.project} />;
+};
+
 const AppBarLink = (props: ComponentProps<typeof RouterLink>) => (
   <MaterialLink
     sx={{